From 239144659b29c0a5ecd83a34e0e57387a1060ed7 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 7 Dec 2010 18:50:28 -0500
Subject: Initial checkin for VOLK - Vector-Optimized Library of Kernels. This
 is a new SIMD library.

It currently stands by itself under the GNU Radio tree and can be used separately. We will integrate the build process into GNU Raio and start building off of its functionality over time.
---
 volk/lib/Makefile.am                               | 361 +++++++++++++++++++++
 volk/lib/assembly.h                                |  67 ++++
 volk/lib/cpuid_x86.S                               |  60 ++++
 volk/lib/cpuid_x86_64.S                            |  54 +++
 volk/lib/qa_16s_add_quad_aligned16.cc              |  89 +++++
 volk/lib/qa_16s_add_quad_aligned16.h               |  18 +
 volk/lib/qa_16s_branch_4_state_8_aligned16.cc      | 106 ++++++
 volk/lib/qa_16s_branch_4_state_8_aligned16.h       |  18 +
 volk/lib/qa_16s_convert_32f_aligned16.cc           |  73 +++++
 volk/lib/qa_16s_convert_32f_aligned16.h            |  18 +
 volk/lib/qa_16s_convert_32f_unaligned16.cc         |  73 +++++
 volk/lib/qa_16s_convert_32f_unaligned16.h          |  18 +
 volk/lib/qa_16s_convert_8s_aligned16.cc            |  60 ++++
 volk/lib/qa_16s_convert_8s_aligned16.h             |  18 +
 volk/lib/qa_16s_convert_8s_unaligned16.cc          |  60 ++++
 volk/lib/qa_16s_convert_8s_unaligned16.h           |  18 +
 volk/lib/qa_16s_max_star_aligned16.cc              |  65 ++++
 volk/lib/qa_16s_max_star_aligned16.h               |  18 +
 volk/lib/qa_16s_max_star_horizontal_aligned16.cc   |  79 +++++
 volk/lib/qa_16s_max_star_horizontal_aligned16.h    |  18 +
 .../lib/qa_16s_permute_and_scalar_add_aligned16.cc |  78 +++++
 volk/lib/qa_16s_permute_and_scalar_add_aligned16.h |  18 +
 volk/lib/qa_16s_quad_max_star_aligned16.cc         |  59 ++++
 volk/lib/qa_16s_quad_max_star_aligned16.h          |  18 +
 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc     |  76 +++++
 volk/lib/qa_16sc_deinterleave_16s_aligned16.h      |  18 +
 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc     |  63 ++++
 volk/lib/qa_16sc_deinterleave_32f_aligned16.h      |  18 +
 .../lib/qa_16sc_deinterleave_real_16s_aligned16.cc |  71 ++++
 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h |  18 +
 .../lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 123 +++++++
 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h |  18 +
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc |  60 ++++
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h  |  18 +
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc        |  70 ++++
 volk/lib/qa_16sc_magnitude_16s_aligned16.h         |  18 +
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc        |  70 ++++
 volk/lib/qa_16sc_magnitude_32f_aligned16.h         |  18 +
 volk/lib/qa_16u_byteswap_aligned16.cc              |  60 ++++
 volk/lib/qa_16u_byteswap_aligned16.h               |  18 +
 volk/lib/qa_32f_accumulator_aligned16.cc           |  56 ++++
 volk/lib/qa_32f_accumulator_aligned16.h            |  18 +
 volk/lib/qa_32f_add_aligned16.cc                   |  60 ++++
 volk/lib/qa_32f_add_aligned16.h                    |  18 +
 .../qa_32f_calc_spectral_noise_floor_aligned16.cc  |  59 ++++
 .../qa_32f_calc_spectral_noise_floor_aligned16.h   |  18 +
 volk/lib/qa_32f_convert_16s_aligned16.cc           |  70 ++++
 volk/lib/qa_32f_convert_16s_aligned16.h            |  18 +
 volk/lib/qa_32f_convert_16s_unaligned16.cc         |  70 ++++
 volk/lib/qa_32f_convert_16s_unaligned16.h          |  18 +
 volk/lib/qa_32f_convert_32s_aligned16.cc           |  70 ++++
 volk/lib/qa_32f_convert_32s_aligned16.h            |  18 +
 volk/lib/qa_32f_convert_32s_unaligned16.cc         |  70 ++++
 volk/lib/qa_32f_convert_32s_unaligned16.h          |  18 +
 volk/lib/qa_32f_convert_64f_aligned16.cc           |  60 ++++
 volk/lib/qa_32f_convert_64f_aligned16.h            |  18 +
 volk/lib/qa_32f_convert_64f_unaligned16.cc         |  60 ++++
 volk/lib/qa_32f_convert_64f_unaligned16.h          |  18 +
 volk/lib/qa_32f_convert_8s_aligned16.cc            |  70 ++++
 volk/lib/qa_32f_convert_8s_aligned16.h             |  18 +
 volk/lib/qa_32f_convert_8s_unaligned16.cc          |  70 ++++
 volk/lib/qa_32f_convert_8s_unaligned16.h           |  18 +
 volk/lib/qa_32f_divide_aligned16.cc                |  60 ++++
 volk/lib/qa_32f_divide_aligned16.h                 |  18 +
 volk/lib/qa_32f_dot_prod_aligned16.cc              | 183 +++++++++++
 volk/lib/qa_32f_dot_prod_aligned16.h               |  18 +
 volk/lib/qa_32f_dot_prod_unaligned16.cc            | 190 +++++++++++
 volk/lib/qa_32f_dot_prod_unaligned16.h             |  18 +
 volk/lib/qa_32f_fm_detect_aligned16.cc             |  60 ++++
 volk/lib/qa_32f_fm_detect_aligned16.h              |  18 +
 volk/lib/qa_32f_index_max_aligned16.cc             | 103 ++++++
 volk/lib/qa_32f_index_max_aligned16.h              |  18 +
 volk/lib/qa_32f_interleave_16sc_aligned16.cc       |  75 +++++
 volk/lib/qa_32f_interleave_16sc_aligned16.h        |  18 +
 volk/lib/qa_32f_interleave_32fc_aligned16.cc       |  62 ++++
 volk/lib/qa_32f_interleave_32fc_aligned16.h        |  18 +
 volk/lib/qa_32f_max_aligned16.cc                   |  60 ++++
 volk/lib/qa_32f_max_aligned16.h                    |  18 +
 volk/lib/qa_32f_min_aligned16.cc                   |  60 ++++
 volk/lib/qa_32f_min_aligned16.h                    |  18 +
 volk/lib/qa_32f_multiply_aligned16.cc              |  60 ++++
 volk/lib/qa_32f_multiply_aligned16.h               |  18 +
 volk/lib/qa_32f_normalize_aligned16.cc             |  65 ++++
 volk/lib/qa_32f_normalize_aligned16.h              |  18 +
 volk/lib/qa_32f_power_aligned16.cc                 |  95 ++++++
 volk/lib/qa_32f_power_aligned16.h                  |  18 +
 volk/lib/qa_32f_sqrt_aligned16.cc                  |  59 ++++
 volk/lib/qa_32f_sqrt_aligned16.h                   |  18 +
 volk/lib/qa_32f_stddev_aligned16.cc                |  74 +++++
 volk/lib/qa_32f_stddev_aligned16.h                 |  18 +
 volk/lib/qa_32f_stddev_and_mean_aligned16.cc       |  75 +++++
 volk/lib/qa_32f_stddev_and_mean_aligned16.h        |  18 +
 volk/lib/qa_32f_subtract_aligned16.cc              |  60 ++++
 volk/lib/qa_32f_subtract_aligned16.h               |  18 +
 volk/lib/qa_32f_sum_of_poly_aligned16.cc           | 142 ++++++++
 volk/lib/qa_32f_sum_of_poly_aligned16.h            |  18 +
 volk/lib/qa_32fc_32f_multiply_aligned16.cc         |  85 +++++
 volk/lib/qa_32fc_32f_multiply_aligned16.h          |  18 +
 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc       |  83 +++++
 volk/lib/qa_32fc_32f_power_32fc_aligned16.h        |  18 +
 volk/lib/qa_32fc_atan2_32f_aligned16.cc            |  75 +++++
 volk/lib/qa_32fc_atan2_32f_aligned16.h             |  18 +
 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc   | 137 ++++++++
 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h    |  18 +
 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc     |  63 ++++
 volk/lib/qa_32fc_deinterleave_32f_aligned16.h      |  18 +
 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc     |  63 ++++
 volk/lib/qa_32fc_deinterleave_64f_aligned16.h      |  18 +
 .../lib/qa_32fc_deinterleave_real_16s_aligned16.cc |  60 ++++
 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h |  18 +
 .../lib/qa_32fc_deinterleave_real_32f_aligned16.cc |  60 ++++
 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h |  18 +
 .../lib/qa_32fc_deinterleave_real_64f_aligned16.cc |  60 ++++
 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h |  18 +
 volk/lib/qa_32fc_dot_prod_aligned16.cc             | 214 ++++++++++++
 volk/lib/qa_32fc_dot_prod_aligned16.h              |  20 ++
 volk/lib/qa_32fc_index_max_aligned16.cc            |  89 +++++
 volk/lib/qa_32fc_index_max_aligned16.h             |  18 +
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc        |  70 ++++
 volk/lib/qa_32fc_magnitude_16s_aligned16.h         |  18 +
 volk/lib/qa_32fc_magnitude_32f_aligned16.cc        |  70 ++++
 volk/lib/qa_32fc_magnitude_32f_aligned16.h         |  18 +
 volk/lib/qa_32fc_multiply_aligned16.cc             |  86 +++++
 volk/lib/qa_32fc_multiply_aligned16.h              |  18 +
 ...qa_32fc_power_spectral_density_32f_aligned16.cc |  63 ++++
 .../qa_32fc_power_spectral_density_32f_aligned16.h |  18 +
 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc   |  63 ++++
 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h    |  18 +
 volk/lib/qa_32fc_square_dist_aligned16.cc          |  91 ++++++
 volk/lib/qa_32fc_square_dist_aligned16.h           |  18 +
 .../qa_32fc_square_dist_scalar_mult_aligned16.cc   |  96 ++++++
 .../qa_32fc_square_dist_scalar_mult_aligned16.h    |  18 +
 volk/lib/qa_32s_and_aligned16.cc                   |  60 ++++
 volk/lib/qa_32s_and_aligned16.h                    |  18 +
 volk/lib/qa_32s_convert_32f_aligned16.cc           |  60 ++++
 volk/lib/qa_32s_convert_32f_aligned16.h            |  18 +
 volk/lib/qa_32s_convert_32f_unaligned16.cc         |  60 ++++
 volk/lib/qa_32s_convert_32f_unaligned16.h          |  18 +
 volk/lib/qa_32s_or_aligned16.cc                    |  60 ++++
 volk/lib/qa_32s_or_aligned16.h                     |  18 +
 volk/lib/qa_32u_byteswap_aligned16.cc              |  59 ++++
 volk/lib/qa_32u_byteswap_aligned16.h               |  18 +
 volk/lib/qa_32u_popcnt_aligned16.cc                |  61 ++++
 volk/lib/qa_32u_popcnt_aligned16.h                 |  18 +
 volk/lib/qa_64f_convert_32f_aligned16.cc           |  60 ++++
 volk/lib/qa_64f_convert_32f_aligned16.h            |  18 +
 volk/lib/qa_64f_convert_32f_unaligned16.cc         |  60 ++++
 volk/lib/qa_64f_convert_32f_unaligned16.h          |  18 +
 volk/lib/qa_64f_max_aligned16.cc                   |  60 ++++
 volk/lib/qa_64f_max_aligned16.h                    |  18 +
 volk/lib/qa_64f_min_aligned16.cc                   |  60 ++++
 volk/lib/qa_64f_min_aligned16.h                    |  18 +
 volk/lib/qa_64u_byteswap_aligned16.cc              |  59 ++++
 volk/lib/qa_64u_byteswap_aligned16.h               |  18 +
 volk/lib/qa_64u_popcnt_aligned16.cc                |  61 ++++
 volk/lib/qa_64u_popcnt_aligned16.h                 |  18 +
 volk/lib/qa_8s_convert_16s_aligned16.cc            |  63 ++++
 volk/lib/qa_8s_convert_16s_aligned16.h             |  18 +
 volk/lib/qa_8s_convert_16s_unaligned16.cc          |  63 ++++
 volk/lib/qa_8s_convert_16s_unaligned16.h           |  18 +
 volk/lib/qa_8s_convert_32f_aligned16.cc            |  63 ++++
 volk/lib/qa_8s_convert_32f_aligned16.h             |  18 +
 volk/lib/qa_8s_convert_32f_unaligned16.cc          |  63 ++++
 volk/lib/qa_8s_convert_32f_unaligned16.h           |  18 +
 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc      |  67 ++++
 volk/lib/qa_8sc_deinterleave_16s_aligned16.h       |  18 +
 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc      | 134 ++++++++
 volk/lib/qa_8sc_deinterleave_32f_aligned16.h       |  18 +
 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc |  64 ++++
 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h  |  18 +
 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc | 138 ++++++++
 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h  |  18 +
 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc  |  60 ++++
 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h   |  18 +
 .../qa_8sc_multiply_conjugate_16sc_aligned16.cc    |  87 +++++
 .../lib/qa_8sc_multiply_conjugate_16sc_aligned16.h |  18 +
 .../qa_8sc_multiply_conjugate_32fc_aligned16.cc    |  87 +++++
 .../lib/qa_8sc_multiply_conjugate_32fc_aligned16.h |  18 +
 volk/lib/qa_volk.cc                                | 211 ++++++++++++
 volk/lib/qa_volk.h                                 |  36 ++
 volk/lib/test_all.cc                               |  82 +++++
 volk/lib/volk_rank_archs.c                         |  13 +
 volk/lib/volk_rank_archs.h                         |  14 +
 183 files changed, 9136 insertions(+)
 create mode 100644 volk/lib/Makefile.am
 create mode 100644 volk/lib/assembly.h
 create mode 100644 volk/lib/cpuid_x86.S
 create mode 100644 volk/lib/cpuid_x86_64.S
 create mode 100644 volk/lib/qa_16s_add_quad_aligned16.cc
 create mode 100644 volk/lib/qa_16s_add_quad_aligned16.h
 create mode 100644 volk/lib/qa_16s_branch_4_state_8_aligned16.cc
 create mode 100644 volk/lib/qa_16s_branch_4_state_8_aligned16.h
 create mode 100644 volk/lib/qa_16s_convert_32f_aligned16.cc
 create mode 100644 volk/lib/qa_16s_convert_32f_aligned16.h
 create mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.cc
 create mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.h
 create mode 100644 volk/lib/qa_16s_convert_8s_aligned16.cc
 create mode 100644 volk/lib/qa_16s_convert_8s_aligned16.h
 create mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.cc
 create mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.h
 create mode 100644 volk/lib/qa_16s_max_star_aligned16.cc
 create mode 100644 volk/lib/qa_16s_max_star_aligned16.h
 create mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.cc
 create mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.h
 create mode 100644 volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
 create mode 100644 volk/lib/qa_16s_permute_and_scalar_add_aligned16.h
 create mode 100644 volk/lib/qa_16s_quad_max_star_aligned16.cc
 create mode 100644 volk/lib/qa_16s_quad_max_star_aligned16.h
 create mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
 create mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.h
 create mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
 create mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.h
 create mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
 create mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
 create mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
 create mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
 create mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
 create mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
 create mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.cc
 create mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.h
 create mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.cc
 create mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.h
 create mode 100644 volk/lib/qa_16u_byteswap_aligned16.cc
 create mode 100644 volk/lib/qa_16u_byteswap_aligned16.h
 create mode 100644 volk/lib/qa_32f_accumulator_aligned16.cc
 create mode 100644 volk/lib/qa_32f_accumulator_aligned16.h
 create mode 100644 volk/lib/qa_32f_add_aligned16.cc
 create mode 100644 volk/lib/qa_32f_add_aligned16.h
 create mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
 create mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
 create mode 100644 volk/lib/qa_32f_convert_16s_aligned16.cc
 create mode 100644 volk/lib/qa_32f_convert_16s_aligned16.h
 create mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.cc
 create mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.h
 create mode 100644 volk/lib/qa_32f_convert_32s_aligned16.cc
 create mode 100644 volk/lib/qa_32f_convert_32s_aligned16.h
 create mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.cc
 create mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.h
 create mode 100644 volk/lib/qa_32f_convert_64f_aligned16.cc
 create mode 100644 volk/lib/qa_32f_convert_64f_aligned16.h
 create mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.cc
 create mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.h
 create mode 100644 volk/lib/qa_32f_convert_8s_aligned16.cc
 create mode 100644 volk/lib/qa_32f_convert_8s_aligned16.h
 create mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.cc
 create mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.h
 create mode 100644 volk/lib/qa_32f_divide_aligned16.cc
 create mode 100644 volk/lib/qa_32f_divide_aligned16.h
 create mode 100644 volk/lib/qa_32f_dot_prod_aligned16.cc
 create mode 100644 volk/lib/qa_32f_dot_prod_aligned16.h
 create mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.cc
 create mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.h
 create mode 100644 volk/lib/qa_32f_fm_detect_aligned16.cc
 create mode 100644 volk/lib/qa_32f_fm_detect_aligned16.h
 create mode 100644 volk/lib/qa_32f_index_max_aligned16.cc
 create mode 100644 volk/lib/qa_32f_index_max_aligned16.h
 create mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.cc
 create mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.h
 create mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.cc
 create mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.h
 create mode 100644 volk/lib/qa_32f_max_aligned16.cc
 create mode 100644 volk/lib/qa_32f_max_aligned16.h
 create mode 100644 volk/lib/qa_32f_min_aligned16.cc
 create mode 100644 volk/lib/qa_32f_min_aligned16.h
 create mode 100644 volk/lib/qa_32f_multiply_aligned16.cc
 create mode 100644 volk/lib/qa_32f_multiply_aligned16.h
 create mode 100644 volk/lib/qa_32f_normalize_aligned16.cc
 create mode 100644 volk/lib/qa_32f_normalize_aligned16.h
 create mode 100644 volk/lib/qa_32f_power_aligned16.cc
 create mode 100644 volk/lib/qa_32f_power_aligned16.h
 create mode 100644 volk/lib/qa_32f_sqrt_aligned16.cc
 create mode 100644 volk/lib/qa_32f_sqrt_aligned16.h
 create mode 100644 volk/lib/qa_32f_stddev_aligned16.cc
 create mode 100644 volk/lib/qa_32f_stddev_aligned16.h
 create mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.cc
 create mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.h
 create mode 100644 volk/lib/qa_32f_subtract_aligned16.cc
 create mode 100644 volk/lib/qa_32f_subtract_aligned16.h
 create mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.cc
 create mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.h
 create mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.h
 create mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.h
 create mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.h
 create mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
 create mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.h
 create mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.h
 create mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
 create mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
 create mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
 create mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.h
 create mode 100644 volk/lib/qa_32fc_index_max_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_index_max_aligned16.h
 create mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.h
 create mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.h
 create mode 100644 volk/lib/qa_32fc_multiply_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_multiply_aligned16.h
 create mode 100644 volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h
 create mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
 create mode 100644 volk/lib/qa_32fc_square_dist_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_square_dist_aligned16.h
 create mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
 create mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
 create mode 100644 volk/lib/qa_32s_and_aligned16.cc
 create mode 100644 volk/lib/qa_32s_and_aligned16.h
 create mode 100644 volk/lib/qa_32s_convert_32f_aligned16.cc
 create mode 100644 volk/lib/qa_32s_convert_32f_aligned16.h
 create mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.cc
 create mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.h
 create mode 100644 volk/lib/qa_32s_or_aligned16.cc
 create mode 100644 volk/lib/qa_32s_or_aligned16.h
 create mode 100644 volk/lib/qa_32u_byteswap_aligned16.cc
 create mode 100644 volk/lib/qa_32u_byteswap_aligned16.h
 create mode 100644 volk/lib/qa_32u_popcnt_aligned16.cc
 create mode 100644 volk/lib/qa_32u_popcnt_aligned16.h
 create mode 100644 volk/lib/qa_64f_convert_32f_aligned16.cc
 create mode 100644 volk/lib/qa_64f_convert_32f_aligned16.h
 create mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.cc
 create mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.h
 create mode 100644 volk/lib/qa_64f_max_aligned16.cc
 create mode 100644 volk/lib/qa_64f_max_aligned16.h
 create mode 100644 volk/lib/qa_64f_min_aligned16.cc
 create mode 100644 volk/lib/qa_64f_min_aligned16.h
 create mode 100644 volk/lib/qa_64u_byteswap_aligned16.cc
 create mode 100644 volk/lib/qa_64u_byteswap_aligned16.h
 create mode 100644 volk/lib/qa_64u_popcnt_aligned16.cc
 create mode 100644 volk/lib/qa_64u_popcnt_aligned16.h
 create mode 100644 volk/lib/qa_8s_convert_16s_aligned16.cc
 create mode 100644 volk/lib/qa_8s_convert_16s_aligned16.h
 create mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.cc
 create mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.h
 create mode 100644 volk/lib/qa_8s_convert_32f_aligned16.cc
 create mode 100644 volk/lib/qa_8s_convert_32f_aligned16.h
 create mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.cc
 create mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.h
 create mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
 create mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.h
 create mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
 create mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.h
 create mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
 create mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
 create mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
 create mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
 create mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
 create mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
 create mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
 create mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
 create mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
 create mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
 create mode 100644 volk/lib/qa_volk.cc
 create mode 100644 volk/lib/qa_volk.h
 create mode 100644 volk/lib/test_all.cc
 create mode 100644 volk/lib/volk_rank_archs.c
 create mode 100644 volk/lib/volk_rank_archs.h

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
new file mode 100644
index 000000000..97eb75680
--- /dev/null
+++ b/volk/lib/Makefile.am
@@ -0,0 +1,361 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+include $(top_srcdir)/Makefile.common
+
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) $(LV_CXXFLAGS)
+
+
+# We build 2 libraries and 1 executable here.  One library contains
+# everything except the libcppunit QA code, and one contains only the
+# libcppunit-based QA code.  The C++ QA code is especially recommended
+# when you have general purpose C or C++ code that may not get
+# thoroughly exercised by building and running a GR block.  The
+# executable runs the QA code at "make check" time.
+#
+# N.B., If there's a SWIG generated shared library and associated
+# python code, it will be contained in ../python, not here.  (That
+# code is conditionally built depending on the state of the
+# --without-python configure option.)  However, the .i should be here
+# next to the .h that it's based on.
+
+
+# list of programs run by "make check" and "make distcheck"
+TESTS = test_all
+
+
+lib_LTLIBRARIES = \
+	libvolk.la \
+	libvolk_runtime.la \
+	libvolk_qa.la
+
+
+# ----------------------------------------------------------------
+#                      The main library
+# ----------------------------------------------------------------
+
+universal_runtime_CODE = 	\
+	volk_runtime.c	\
+	volk_init.c \
+	volk_rank_archs.c	
+
+universal_CODE = 		\
+	volk.c 			\
+	volk_environment_init.c
+
+generic_CODE = 		\
+	volk_cpu_generic.cc
+
+x86_CODE = 		\
+	volk_cpu_x86.c
+
+x86_SUBCODE = 		\
+	cpuid_x86.S
+
+x86_64_SUBCODE = 	\
+	cpuid_x86_64.S
+
+powerpc_CODE = \
+	volk_cpu_powerpc.cc
+
+
+if MD_CPU_generic
+libvolk_la_SOURCES =	\
+	$(generic_CODE)		\
+	$(universal_CODE)
+libvolk_runtime_la_SOURCES =	\
+	$(generic_CODE)		\
+	$(universal_runtime_CODE)
+
+endif
+
+if MD_CPU_x86
+if MD_SUBCPU_x86_64
+libvolk_la_SOURCES =	\
+	$(x86_CODE)		\
+	$(x86_64_SUBCODE)		\
+	$(universal_CODE) 		
+
+libvolk_runtime_la_SOURCES =	\
+	$(x86_CODE)		\
+	$(x86_64_SUBCODE)		\
+	$(universal_runtime_CODE) 		
+else
+libvolk_la_SOURCES =	\
+	$(x86_CODE)		\
+	$(x86_SUBCODE)	\
+	$(universal_CODE)
+
+libvolk_runtime_la_SOURCES =	\
+	$(x86_CODE)		\
+	$(x86_SUBCODE)	\
+	$(universal_runtime_CODE)
+endif
+endif
+
+
+if MD_CPU_powerpc
+libvolk_la_SOURCES =	\
+	$(powerpc_CODE)		\
+	$(universal_CODE)
+
+libvolk_runtime_la_SOURCES =	\
+	$(powerpc_CODE)		\
+	$(universal_runtime_CODE)
+endif
+
+
+
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+
+libvolk_la_LIBADD =
+
+
+
+# ----------------------------------------------------------------
+#        The QA library.  Note libvolk.la in LIBADD
+# ----------------------------------------------------------------
+libvolk_qa_la_SOURCES = \
+	qa_volk.cc \
+	qa_16s_quad_max_star_aligned16.cc \
+	qa_32fc_dot_prod_aligned16.cc \
+	qa_32fc_square_dist_aligned16.cc \
+	qa_32fc_square_dist_scalar_mult_aligned16.cc \
+	qa_32f_sum_of_poly_aligned16.cc \
+	qa_32fc_index_max_aligned16.cc \
+	qa_32f_index_max_aligned16.cc \
+	qa_32fc_conjugate_dot_prod_aligned16.cc \
+	qa_16s_permute_and_scalar_add_aligned16.cc \
+	qa_16s_branch_4_state_8_aligned16.cc \
+	qa_16s_max_star_horizontal_aligned16.cc \
+	qa_16s_max_star_aligned16.cc \
+	qa_16s_add_quad_aligned16.cc \
+	qa_32f_add_aligned16.cc \
+	qa_32f_subtract_aligned16.cc \
+	qa_32f_max_aligned16.cc \
+	qa_32f_min_aligned16.cc \
+	qa_64f_max_aligned16.cc \
+	qa_64f_min_aligned16.cc \
+	qa_32s_and_aligned16.cc \
+	qa_32s_or_aligned16.cc \
+	qa_32f_dot_prod_aligned16.cc \
+	qa_32f_dot_prod_unaligned16.cc \
+	qa_32f_fm_detect_aligned16.cc \
+	qa_32fc_32f_multiply_aligned16.cc \
+	qa_32fc_multiply_aligned16.cc \
+	qa_32f_divide_aligned16.cc \
+	qa_32f_multiply_aligned16.cc \
+	qa_32f_sqrt_aligned16.cc \
+	qa_8sc_multiply_conjugate_16sc_aligned16.cc \
+	qa_8sc_multiply_conjugate_32fc_aligned16.cc \
+	qa_32u_popcnt_aligned16.cc \
+	qa_64u_popcnt_aligned16.cc \
+	qa_64u_byteswap_aligned16.cc \
+	qa_8sc_deinterleave_32f_aligned16.cc \
+	qa_16sc_deinterleave_32f_aligned16.cc \
+	qa_8sc_deinterleave_16s_aligned16.cc \
+	qa_32f_interleave_32fc_aligned16.cc \
+	qa_16u_byteswap_aligned16.cc \
+	qa_16sc_deinterleave_16s_aligned16.cc \
+	qa_32fc_deinterleave_real_32f_aligned16.cc \
+	qa_32fc_magnitude_32f_aligned16.cc \
+	qa_32fc_deinterleave_real_64f_aligned16.cc \
+	qa_32fc_deinterleave_real_16s_aligned16.cc \
+	qa_32fc_magnitude_16s_aligned16.cc \
+	qa_32fc_deinterleave_32f_aligned16.cc \
+	qa_8sc_deinterleave_real_8s_aligned16.cc \
+	qa_32fc_deinterleave_64f_aligned16.cc \
+	qa_32f_interleave_16sc_aligned16.cc \
+	qa_16sc_deinterleave_real_8s_aligned16.cc \
+	qa_16sc_deinterleave_real_32f_aligned16.cc \
+	qa_16sc_magnitude_32f_aligned16.cc \
+	qa_32u_byteswap_aligned16.cc \
+	qa_16sc_deinterleave_real_16s_aligned16.cc \
+	qa_8sc_deinterleave_real_32f_aligned16.cc \
+	qa_16sc_magnitude_16s_aligned16.cc \
+	qa_32f_normalize_aligned16.cc \
+	qa_8sc_deinterleave_real_16s_aligned16.cc \
+	qa_16s_convert_32f_aligned16.cc \
+	qa_16s_convert_32f_unaligned16.cc \
+	qa_16s_convert_8s_aligned16.cc \
+	qa_16s_convert_8s_unaligned16.cc \
+	qa_32f_convert_16s_aligned16.cc \
+	qa_32f_convert_16s_unaligned16.cc \
+	qa_32f_convert_32s_aligned16.cc \
+	qa_32f_convert_32s_unaligned16.cc \
+	qa_32f_convert_64f_aligned16.cc \
+	qa_32f_convert_64f_unaligned16.cc \
+	qa_32f_convert_8s_aligned16.cc \
+	qa_32f_convert_8s_unaligned16.cc \
+	qa_32s_convert_32f_aligned16.cc \
+	qa_32s_convert_32f_unaligned16.cc \
+	qa_64f_convert_32f_aligned16.cc \
+	qa_64f_convert_32f_unaligned16.cc \
+	qa_8s_convert_16s_aligned16.cc \
+	qa_8s_convert_16s_unaligned16.cc \
+	qa_8s_convert_32f_aligned16.cc \
+	qa_8s_convert_32f_unaligned16.cc \
+	qa_32fc_32f_power_32fc_aligned16.cc \
+	qa_32f_power_aligned16.cc \
+	qa_32fc_atan2_32f_aligned16.cc \
+	qa_32fc_power_spectral_density_32f_aligned16.cc \
+	qa_32fc_power_spectrum_32f_aligned16.cc \
+	qa_32f_calc_spectral_noise_floor_aligned16.cc \
+	qa_32f_accumulator_aligned16.cc \
+	qa_32f_stddev_aligned16.cc \
+	qa_32f_stddev_and_mean_aligned16.cc
+
+libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
+
+libvolk_qa_la_LIBADD = \
+	libvolk.la \
+	libvolk_runtime.la \
+	$(CPPUNIT_LIBS)
+
+# ----------------------------------------------------------------
+# headers that don't get installed
+# ----------------------------------------------------------------
+noinst_HEADERS = \
+	volk_init.h \
+	qa_volk.h \
+	qa_16s_quad_max_star_aligned16.h \
+	qa_32fc_dot_prod_aligned16.h \
+	qa_32fc_square_dist_aligned16.h \
+	qa_32fc_square_dist_scalar_mult_aligned16.h \
+	qa_32f_sum_of_poly_aligned16.h \
+	qa_32fc_index_max_aligned16.h \
+	qa_32f_index_max_aligned16.h \
+	qa_32fc_conjugate_dot_prod_aligned16.h \
+	qa_16s_permute_and_scalar_add_aligned16.h \
+	qa_16s_branch_4_state_8_aligned16.h \
+	qa_16s_max_star_horizontal_aligned16.h \
+	qa_16s_max_star_aligned16.h \
+	qa_16s_add_quad_aligned16.h \
+	qa_32f_add_aligned16.h \
+	qa_32f_subtract_aligned16.h \
+	qa_32f_max_aligned16.h \
+	qa_32f_min_aligned16.h \
+	qa_64f_max_aligned16.h \
+	qa_64f_min_aligned16.h \
+	qa_32s_and_aligned16.h \
+	qa_32s_or_aligned16.h \
+	qa_32f_dot_prod_aligned16.h \
+	qa_32f_dot_prod_unaligned16.h \
+	qa_32f_fm_detect_aligned16.h \
+	qa_32fc_32f_multiply_aligned16.h \
+	qa_32fc_multiply_aligned16.h \
+	qa_32f_divide_aligned16.h \
+	qa_32f_multiply_aligned16.h \
+	qa_32f_sqrt_aligned16.h \
+	qa_8sc_multiply_conjugate_16sc_aligned16.h \
+	qa_8sc_multiply_conjugate_32fc_aligned16.h \
+	qa_32u_popcnt_aligned16.h \
+	qa_64u_popcnt_aligned16.h \
+	qa_64u_byteswap_aligned16.h \
+	qa_8sc_deinterleave_32f_aligned16.h \
+	qa_16sc_deinterleave_32f_aligned16.h \
+	qa_8sc_deinterleave_16s_aligned16.h \
+	qa_32f_interleave_32fc_aligned16.h \
+	qa_16u_byteswap_aligned16.h \
+	qa_16sc_deinterleave_16s_aligned16.h \
+	qa_32fc_deinterleave_real_32f_aligned16.h \
+	qa_32fc_magnitude_32f_aligned16.h \
+	qa_32fc_deinterleave_real_64f_aligned16.h \
+	qa_32fc_deinterleave_real_16s_aligned16.h \
+	qa_32fc_magnitude_16s_aligned16.h \
+	qa_32fc_deinterleave_32f_aligned16.h \
+	qa_8sc_deinterleave_real_8s_aligned16.h \
+	qa_32fc_deinterleave_64f_aligned16.h \
+	qa_32f_interleave_16sc_aligned16.h \
+	qa_16sc_deinterleave_real_8s_aligned16.h \
+	qa_16sc_deinterleave_real_32f_aligned16.h \
+	qa_16sc_magnitude_32f_aligned16.h \
+	qa_32u_byteswap_aligned16.h \
+	qa_16sc_deinterleave_real_16s_aligned16.h \
+	qa_8sc_deinterleave_real_32f_aligned16.h \
+	qa_16sc_magnitude_16s_aligned16.h \
+	qa_32f_normalize_aligned16.h \
+	qa_8sc_deinterleave_real_16s_aligned16.h \
+	qa_16s_convert_32f_aligned16.h \
+	qa_16s_convert_32f_unaligned16.h \
+	qa_16s_convert_8s_aligned16.h \
+	qa_16s_convert_8s_unaligned16.h \
+	qa_32f_convert_16s_aligned16.h \
+	qa_32f_convert_16s_unaligned16.h \
+	qa_32f_convert_32s_aligned16.h \
+	qa_32f_convert_32s_unaligned16.h \
+	qa_32f_convert_64f_aligned16.h \
+	qa_32f_convert_64f_unaligned16.h \
+	qa_32f_convert_8s_aligned16.h \
+	qa_32f_convert_8s_unaligned16.h \
+	qa_32s_convert_32f_aligned16.h \
+	qa_32s_convert_32f_unaligned16.h \
+	qa_64f_convert_32f_aligned16.h \
+	qa_64f_convert_32f_unaligned16.h \
+	qa_8s_convert_16s_aligned16.h \
+	qa_8s_convert_16s_unaligned16.h \
+	qa_8s_convert_32f_aligned16.h \
+	qa_8s_convert_32f_unaligned16.h \
+	qa_32fc_32f_power_32fc_aligned16.h \
+	qa_32f_power_aligned16.h \
+	qa_32fc_atan2_32f_aligned16.h \
+	qa_32fc_power_spectral_density_32f_aligned16.h \
+	qa_32fc_power_spectrum_32f_aligned16.h \
+	qa_32f_calc_spectral_noise_floor_aligned16.h \
+	qa_32f_accumulator_aligned16.h \
+	qa_32f_stddev_aligned16.h \
+	qa_32f_stddev_and_mean_aligned16.h
+
+
+# ----------------------------------------------------------------
+# Our test program
+# ----------------------------------------------------------------
+noinst_PROGRAMS = \
+	test_all
+
+test_all_SOURCES = test_all.cc
+test_all_LDADD   = libvolk_qa.la
+
+
+distclean-local: 
+	rm -f volk.c
+	rm -f volk_cpu_generic.c
+	rm -f volk_cpu_powerpc.c
+	rm -f volk_cpu_x86.c
+	rm -f volk_init.c
+	rm -f volk_init.h
+	rm -f volk_mktables
+	rm -f volk_mktables.c
+	rm -f volk_proccpu_sim.c
+	rm -f volk_runtime.c
+	rm -f volk_tables.h
+	rm -f volk_environment_init.c
+#SUBDIRS = 
+
+#ifdef BUILD_SSE
+#SUBDIRS += sse
+#elif BUILD_SPU
+#SUBDIRS += spu
+#else
+#SUBDIRS += port
+#endif
+
+
diff --git a/volk/lib/assembly.h b/volk/lib/assembly.h
new file mode 100644
index 000000000..8a99aa07c
--- /dev/null
+++ b/volk/lib/assembly.h
@@ -0,0 +1,67 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2002 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _ASSEMBLY_H_
+#define _ASSEMBLY_H_
+
+#if defined (__APPLE__) && defined (__APPLE_CC__)
+
+// XCode ignores the .scl and .type functions in XCode 2.2.1 and 2.3,
+// but creates an error in XCode 2.4.  Just ignore them.
+
+#define GLOB_SYMB(f)    _ ## f
+
+#define DEF_FUNC_HEAD(f)  /* none */
+
+#define FUNC_TAIL(f)    /* none*/
+
+#elif !defined (__ELF__)
+
+/*
+ * Too bad, the following define does not work as expected --SF
+ * 	#define GLOB_SYMB(f)	__USER_LABEL_PREFIX__ ## f
+ */
+#define GLOB_SYMB(f)	_ ## f
+
+#define DEF_FUNC_HEAD(f)	\
+	.def	GLOB_SYMB(f); .scl 2; .type 32; .endef
+
+#define FUNC_TAIL(f)	/* none */
+
+
+#else	/* !__ELF__ */
+
+
+#define GLOB_SYMB(f)	f
+
+#define DEF_FUNC_HEAD(f)	\
+	.type	GLOB_SYMB(f),@function	\
+
+#define FUNC_TAIL(f)	\
+  .Lfe1:		\
+	.size	GLOB_SYMB(f),.Lfe1-GLOB_SYMB(f)
+
+
+#endif	/* !__ELF__ */
+
+
+#endif /* _ASSEMBLY_H_ */
diff --git a/volk/lib/cpuid_x86.S b/volk/lib/cpuid_x86.S
new file mode 100644
index 000000000..4e1a9404f
--- /dev/null
+++ b/volk/lib/cpuid_x86.S
@@ -0,0 +1,60 @@
+#	
+# Copyright 2003 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+#
+# execute CPUID instruction, return EAX, EBX, ECX and EDX values in result
+#
+#  void cpuid_x86 (unsigned int op, unsigned int result[4]);
+#
+
+#include "assembly.h"
+
+.file "cpuid_x86.S"
+	.version	"01.01"
+.text
+.globl	GLOB_SYMB(cpuid_x86)
+	DEF_FUNC_HEAD(cpuid_x86)
+GLOB_SYMB(cpuid_x86):
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx		# must save in PIC mode, holds GOT pointer
+	pushl	%esi
+	
+	movl	8(%ebp), %eax	# op
+	movl	12(%ebp), %esi	# result
+	cpuid
+	movl	%eax, 0(%esi)
+	movl	%ebx, 4(%esi)
+	movl	%ecx, 8(%esi)
+	movl	%edx, 12(%esi)
+	
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+
+FUNC_TAIL(cpuid_x86)
+	.ident	"Hand coded cpuid assembly"
+	
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/volk/lib/cpuid_x86_64.S b/volk/lib/cpuid_x86_64.S
new file mode 100644
index 000000000..32b1847cd
--- /dev/null
+++ b/volk/lib/cpuid_x86_64.S
@@ -0,0 +1,54 @@
+#	
+# Copyright 2003,2005 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+#
+# execute CPUID instruction, return EAX, EBX, ECX and EDX values in result
+#
+#  void cpuid_x86 (unsigned int op, unsigned int result[4]);
+#
+
+#include "assembly.h"
+
+.file "cpuid_x86_64.S"
+	.version	"01.01"
+.text
+.globl	GLOB_SYMB(cpuid_x86)
+	DEF_FUNC_HEAD(cpuid_x86)
+GLOB_SYMB(cpuid_x86):
+	mov	%rbx, %r11	# must save in PIC mode, holds GOT pointer
+	
+	mov	%rdi, %rax	# op
+	cpuid
+	movl	%eax, 0(%rsi)	# result
+	movl	%ebx, 4(%rsi)
+	movl	%ecx, 8(%rsi)
+	movl	%edx, 12(%rsi)
+	
+	mov	%r11, %rbx
+	retq
+
+FUNC_TAIL(cpuid_x86)
+	.ident	"Hand coded cpuid64 assembly"
+	
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc
new file mode 100644
index 000000000..c3005c1be
--- /dev/null
+++ b/volk/lib/qa_16s_add_quad_aligned16.cc
@@ -0,0 +1,89 @@
+#include <volk/volk.h>
+#include <qa_16s_add_quad_aligned16.h>
+#include <volk/volk_16s_add_quad_aligned16.h>
+#include <cstdlib>
+#include <time.h>
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_16s_add_quad_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+
+
+void qa_16s_add_quad_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3200;
+  const int ITERS = 100000;
+  short input0[vlen] __attribute__ ((aligned (16)));
+  short input1[vlen] __attribute__ ((aligned (16)));
+  short input2[vlen] __attribute__ ((aligned (16)));
+  short input3[vlen] __attribute__ ((aligned (16)));
+  short input4[vlen] __attribute__ ((aligned (16)));
+  
+  short output0[vlen] __attribute__ ((aligned (16)));
+  short output1[vlen] __attribute__ ((aligned (16)));
+  short output2[vlen] __attribute__ ((aligned (16)));
+  short output3[vlen] __attribute__ ((aligned (16)));
+  short output01[vlen] __attribute__ ((aligned (16)));
+  short output11[vlen] __attribute__ ((aligned (16)));
+  short output21[vlen] __attribute__ ((aligned (16)));
+  short output31[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {
+    short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    
+    input0[i] = plus0 - minus0;
+    input1[i] = plus1 - minus1;
+    input2[i] = plus2 - minus2;
+    input3[i] = plus3 - minus3;
+    input4[i] = plus4 - minus4;
+    
+  }
+  printf("16s_add_quad_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
+    CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
+    CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16s_add_quad_aligned16.h b/volk/lib/qa_16s_add_quad_aligned16.h
new file mode 100644
index 000000000..3c1ae978b
--- /dev/null
+++ b/volk/lib/qa_16s_add_quad_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
+#define INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_add_quad_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
new file mode 100644
index 000000000..ba5e8ed93
--- /dev/null
+++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
@@ -0,0 +1,106 @@
+#include <volk/volk.h>
+#include <qa_16s_branch_4_state_8_aligned16.h>
+#include <cstdlib>
+#include <time.h>
+
+//test for ssse3
+
+#ifndef LV_HAVE_SSSE3
+
+void qa_16s_branch_4_state_8_aligned16::t1() {
+  printf("ssse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_16s_branch_4_state_8_aligned16::t1() {
+  const int num_iters = 1000000;
+  const int vlen = 32;
+
+  static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
+  static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
+  static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
+  static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
+  static char* permuters[4] = {permute0, permute1, permute2, permute3};
+  
+  unsigned int num_bytes = vlen << 1;
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  
+  short target[vlen] __attribute__ ((aligned (16)));
+  short target2[vlen] __attribute__ ((aligned (16)));
+  short target3[vlen] __attribute__ ((aligned (16)));
+  
+  short src0[vlen] __attribute__ ((aligned (16)));
+  short permute_indexes[vlen] __attribute__ ((aligned (16))) =  {
+7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
+  short cntl0[vlen] __attribute__ ((aligned (16))) = {
+    0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
+  short cntl1[vlen] __attribute__ ((aligned (16))) = {
+    0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
+  short cntl2[vlen] __attribute__ ((aligned (16))) = {
+    0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
+  short cntl3[vlen] __attribute__ ((aligned (16))) =  {
+    0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
+  short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4};
+  
+  
+
+  for(int i = 0; i < vlen; ++i) {
+    src0[i] = i;
+    
+  }
+  
+
+  printf("16s_branch_4_state_8_aligned\n");
+  
+  
+  start = clock();
+  for(int i = 0; i < num_iters; ++i) {
+    volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
+  }
+  end = clock();
+  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+
+  printf("permute_and_scalar_add_time: %f\n", total);
+  
+  
+
+  start = clock();
+  for(int i = 0; i < num_iters; ++i) {
+    volk_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
+  }
+  end = clock();
+
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+
+  printf("branch_4_state_8_time, ssse3: %f\n", total);
+  
+  start = clock();
+  for(int i = 0; i < num_iters; ++i) {
+    volk_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
+  }
+  end = clock();
+  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+
+  printf("permute_and_scalar_add_time, generic: %f\n", total);
+  
+  
+  
+  for(int i = 0; i < vlen; ++i) {
+    printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    
+    CPPUNIT_ASSERT(target[i] == target2[i]);
+    CPPUNIT_ASSERT(target[i] == target3[i]);
+  }
+}
+
+
+#endif
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.h b/volk/lib/qa_16s_branch_4_state_8_aligned16.h
new file mode 100644
index 000000000..41ab073e0
--- /dev/null
+++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
+#define INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_branch_4_state_8_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_branch_4_state_8_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc
new file mode 100644
index 000000000..7878d4737
--- /dev/null
+++ b/volk/lib/qa_16s_convert_32f_aligned16.cc
@@ -0,0 +1,73 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_16s_convert_32f_aligned16.h>
+#include <volk/volk_16s_convert_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE
+
+void qa_16s_convert_32f_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_16s_convert_32f_aligned16::t1() {
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int16_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
+  }
+  printf("16s_convert_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h
new file mode 100644
index 000000000..ef813d96f
--- /dev/null
+++ b/volk/lib/qa_16s_convert_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H
+#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc
new file mode 100644
index 000000000..8c3121e5c
--- /dev/null
+++ b/volk/lib/qa_16s_convert_32f_unaligned16.cc
@@ -0,0 +1,73 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_16s_convert_32f_unaligned16.h>
+#include <volk/volk_16s_convert_32f_unaligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE
+
+void qa_16s_convert_32f_unaligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_16s_convert_32f_unaligned16::t1() {
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int16_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
+  }
+  printf("16s_convert_32f_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h
new file mode 100644
index 000000000..aeb04f770
--- /dev/null
+++ b/volk/lib/qa_16s_convert_32f_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H
+#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc
new file mode 100644
index 000000000..734b7784e
--- /dev/null
+++ b/volk/lib/qa_16s_convert_8s_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_16s_convert_8s_aligned16.h>
+#include <volk/volk_16s_convert_8s_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_16s_convert_8s_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_16s_convert_8s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int16_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
+  }
+  printf("16s_convert_8s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h
new file mode 100644
index 000000000..2e409d0cc
--- /dev/null
+++ b/volk/lib/qa_16s_convert_8s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H
+#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc
new file mode 100644
index 000000000..275ab7668
--- /dev/null
+++ b/volk/lib/qa_16s_convert_8s_unaligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_16s_convert_8s_unaligned16.h>
+#include <volk/volk_16s_convert_8s_unaligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_16s_convert_8s_unaligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_16s_convert_8s_unaligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int16_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
+  }
+  printf("16s_convert_8s_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h
new file mode 100644
index 000000000..4b2fe9e42
--- /dev/null
+++ b/volk/lib/qa_16s_convert_8s_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H
+#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */
diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc
new file mode 100644
index 000000000..b46b9ae8e
--- /dev/null
+++ b/volk/lib/qa_16s_max_star_aligned16.cc
@@ -0,0 +1,65 @@
+#include <volk/volk.h>
+#include <qa_16s_max_star_aligned16.h>
+#include <volk/volk_16s_max_star_aligned16.h>
+#include <cstdlib>
+#include <time.h>
+//test for ssse3
+
+#ifndef LV_HAVE_SSSE3
+
+void qa_16s_max_star_aligned16::t1() {
+  printf("ssse3 not available... no test performed\n");
+}
+
+#else
+
+
+
+void qa_16s_max_star_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 6400;
+  const int ITERS = 100000;
+  short input0[vlen] __attribute__ ((aligned (16)));
+  short output0[1] __attribute__ ((aligned (16)));
+
+  short output1[1] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {
+    short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+
+    short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
+    
+    input0[i] = plus0 - minus0;
+    
+  }
+  printf("16s_max_star_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("ssse3_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < 1; ++i) {
+    
+    CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h
new file mode 100644
index 000000000..119f87c4d
--- /dev/null
+++ b/volk/lib/qa_16s_max_star_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H
+#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_max_star_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
new file mode 100644
index 000000000..4d44735df
--- /dev/null
+++ b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
@@ -0,0 +1,79 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_16s_max_star_horizontal_aligned16.h>
+#include <volk/volk_16s_max_star_horizontal_aligned16.h>
+#include <cstdlib>
+#include <time.h>
+//test for ssse3
+
+#ifndef LV_HAVE_SSSE3
+
+void qa_16s_max_star_horizontal_aligned16::t1() {
+  printf("ssse3 not available... no test performed\n");
+}
+
+#else
+
+
+void qa_16s_max_star_horizontal_aligned16::t1() {
+
+  
+  volk_runtime_init();
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 32;
+  const int ITERS = 1;
+  short input0[vlen] __attribute__ ((aligned (16)));
+  short output0[vlen>>1] __attribute__ ((aligned (16)));
+
+  short output1[vlen>>1] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {
+    short plus0 = ((short) (rand() - (RAND_MAX/2)));
+    
+    short minus0 = ((short) (rand() - (RAND_MAX/2)));
+    
+    input0[i] = plus0 - minus0;
+    
+  }
+  printf("16s_max_star_horizontal_aligned\n");
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic");
+    volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic");
+    volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+
+    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen);
+    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen);
+    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen);
+    /*    volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3");
+    volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");
+    volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("ssse3_time: %f\n", total);
+  
+  for(int i = 0; i < (vlen >> 1); ++i) {
+    //    printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+    
+  }
+  for(int i = 0; i < (vlen >> 1); ++i) {
+      
+      CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
+    }
+	}
+   
+  
+#endif
+	
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h
new file mode 100644
index 000000000..9f9757253
--- /dev/null
+++ b/volk/lib/qa_16s_max_star_horizontal_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H
+#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
new file mode 100644
index 000000000..3c4f5c6cc
--- /dev/null
+++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
@@ -0,0 +1,78 @@
+#include <volk/volk.h>
+#include <qa_16s_permute_and_scalar_add_aligned16.h>
+#include <volk/volk_16s_permute_and_scalar_add_aligned16.h>
+#include <cstdlib>
+#include <time.h>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_16s_permute_and_scalar_add_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_16s_permute_and_scalar_add_aligned16::t1() {
+  const int vlen = 64;
+  
+  unsigned int num_bytes = vlen << 1;
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  
+  short target[vlen] __attribute__ ((aligned (16)));
+  short target2[vlen] __attribute__ ((aligned (16)));
+  short src0[vlen] __attribute__ ((aligned (16)));
+  short permute_indexes[vlen] __attribute__ ((aligned (16)));
+  short cntl0[vlen] __attribute__ ((aligned (16)));
+  short cntl1[vlen] __attribute__ ((aligned (16)));
+  short cntl2[vlen] __attribute__ ((aligned (16)));
+  short cntl3[vlen] __attribute__ ((aligned (16)));
+  short scalars[4] __attribute__ ((aligned (16))) = {1, 2, 3, 4};
+
+  for(int i = 0; i < vlen; ++i) {
+    src0[i] = i;
+    permute_indexes[i] = (3 * i)%vlen;
+    cntl0[i] = 0xff;
+    cntl1[i] = 0xff * (i%2);
+    cntl2[i] = 0xff * ((i>>1)%2);
+    cntl3[i] = 0xff * ((i%4) == 3);
+  }
+
+  printf("16s_permute_and_scalar_add_aligned\n");
+  
+  start = clock();
+  for(int i = 0; i < 100000; ++i) {
+    volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
+  }
+  end = clock();
+
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+
+  printf("generic_time: %f\n", total);
+  
+  start = clock();
+  for(int i = 0; i < 100000; ++i) {
+    volk_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
+  }
+  end = clock();
+  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+
+  printf("sse2_time: %f\n", total);
+  
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("generic... %d, sse2... %d\n", target[i], target2[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    
+    CPPUNIT_ASSERT(target[i] == target2[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h
new file mode 100644
index 000000000..3643aeef6
--- /dev/null
+++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
+#define INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_permute_and_scalar_add_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_permute_and_scalar_add_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc
new file mode 100644
index 000000000..80a220c93
--- /dev/null
+++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc
@@ -0,0 +1,59 @@
+#include <volk/volk.h>
+#include <qa_16s_quad_max_star_aligned16.h>
+#include <volk/volk_16s_quad_max_star_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_16s_quad_max_star_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_16s_quad_max_star_aligned16::t1() {
+  const int vlen = 34;
+  
+  short input0[vlen] __attribute__ ((aligned (16)));
+  short input1[vlen] __attribute__ ((aligned (16)));
+  short input2[vlen] __attribute__ ((aligned (16)));
+  short input3[vlen] __attribute__ ((aligned (16)));
+
+  short output0[vlen] __attribute__ ((aligned (16)));
+  short output1[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {
+    short plus0 = (short) (rand() - (RAND_MAX/2));
+    short plus1 = (short) (rand() - (RAND_MAX/2));
+    short plus2 = (short) (rand() - (RAND_MAX/2));
+    short plus3 = (short) (rand() - (RAND_MAX/2));
+
+    short minus0 = (short) (rand() - (RAND_MAX/2));
+    short minus1 = (short) (rand() - (RAND_MAX/2));
+    short minus2 = (short) (rand() - (RAND_MAX/2));
+    short minus3 = (short) (rand() - (RAND_MAX/2));
+
+    input0[i] = plus0 - minus0;
+    input1[i] = plus1 - minus1;
+    input2[i] = plus2 - minus2;
+    input3[i] = plus3 - minus3;
+  }
+
+  volk_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic");
+
+  volk_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2");
+
+  printf("16s_quad_max_star_aligned\n");
+  for(int i = 0; i < vlen; ++i) {
+    printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    
+    CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.h b/volk/lib/qa_16s_quad_max_star_aligned16.h
new file mode 100644
index 000000000..51e77081a
--- /dev/null
+++ b/volk/lib/qa_16s_quad_max_star_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
+#define INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16s_quad_max_star_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16s_quad_max_star_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
new file mode 100644
index 000000000..e700ac72c
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
@@ -0,0 +1,76 @@
+#include <volk/volk.h>
+#include <qa_16sc_deinterleave_16s_aligned16.h>
+#include <volk/volk_16sc_deinterleave_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSSE3
+
+void qa_16sc_deinterleave_16s_aligned16::t1() {
+  printf("ssse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_16sc_deinterleave_16s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
+  int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+  int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* loadInput = (int16_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0));
+  }
+  printf("16sc_deinterleave_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("ssse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse2[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_sse21[i]);
+
+    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_ssse3[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_ssse31[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h
new file mode 100644
index 000000000..995ab5b34
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H
+#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
new file mode 100644
index 000000000..6ee076998
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk.h>
+#include <qa_16sc_deinterleave_32f_aligned16.h>
+#include <volk/volk_16sc_deinterleave_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE2
+
+void qa_16sc_deinterleave_32f_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_16sc_deinterleave_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_generic1[vlen] __attribute__ ((aligned (16)));
+  float output_sse2[vlen] __attribute__ ((aligned (16)));
+  float output_sse21[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* loadInput = (int16_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
+  }
+  printf("16sc_deinterleave_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_sse21[i], fabs(output_generic1[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h
new file mode 100644
index 000000000..fea3b6c2d
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H
+#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
new file mode 100644
index 000000000..ca048ea67
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
@@ -0,0 +1,71 @@
+#include <volk/volk.h>
+#include <qa_16sc_deinterleave_real_16s_aligned16.h>
+#include <volk/volk_16sc_deinterleave_real_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSSE3
+
+void qa_16sc_deinterleave_real_16s_aligned16::t1() {
+  printf("ssse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_16sc_deinterleave_real_16s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
+  int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* loadInput = (int16_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0));
+  }
+  printf("16sc_deinterleave_real_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("ssse3_time: %f\n", total);
+
+  for(int i = 0; i < vlen; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
new file mode 100644
index 000000000..ebb70b97a
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
+#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
new file mode 100644
index 000000000..0f4ba6923
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
@@ -0,0 +1,123 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_16sc_deinterleave_real_32f_aligned16.h>
+#include <volk/volk_16sc_deinterleave_real_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE4_1
+
+#ifndef LV_HAVE_SSE
+
+void qa_16sc_deinterleave_real_32f_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_16sc_deinterleave_real_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* loadInput = (int16_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
+  }
+  printf("16sc_deinterleave_real_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif /* SSE */
+
+#else
+
+void qa_16sc_deinterleave_real_32f_aligned16::t1() {
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* loadInput = (int16_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0);
+  }
+  printf("16sc_deinterleave_real_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif /* SSE4_1 */
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
new file mode 100644
index 000000000..e83426473
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
+#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
new file mode 100644
index 000000000..5ab458bc9
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_16sc_deinterleave_real_8s_aligned16.h>
+#include <volk/volk_16sc_deinterleave_real_8s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSSE3
+
+void qa_16sc_deinterleave_real_8s_aligned16::t1() {
+  printf("ssse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_16sc_deinterleave_real_8s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* loadInput = (int16_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0);
+  }
+  printf("16sc_deinterleave_real_8s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("ssse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
new file mode 100644
index 000000000..04e5511e5
--- /dev/null
+++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
+#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
new file mode 100644
index 000000000..b14610757
--- /dev/null
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_16sc_magnitude_16s_aligned16.h>
+#include <volk/volk_16sc_magnitude_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE3
+
+void qa_16sc_magnitude_16s_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_16sc_magnitude_16s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* loadInput = (int16_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
+  }
+  printf("16sc_magnitude_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h
new file mode 100644
index 000000000..4664b70f4
--- /dev/null
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H
+#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
new file mode 100644
index 000000000..06dff2fd5
--- /dev/null
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_16sc_magnitude_32f_aligned16.h>
+#include <volk/volk_16sc_magnitude_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE3
+
+void qa_16sc_magnitude_32f_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_16sc_magnitude_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse3[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* inputLoad = (int16_t*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("16sc_magnitude_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h
new file mode 100644
index 000000000..0c25673ea
--- /dev/null
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H
+#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
new file mode 100644
index 000000000..6b19828a4
--- /dev/null
+++ b/volk/lib/qa_16u_byteswap_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_16u_byteswap_aligned16.h>
+#include <volk/volk_16u_byteswap_aligned16.h>
+#include <cstdlib>
+#include <cstring>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE2
+
+void qa_16u_byteswap_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_16u_byteswap_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100001;
+  
+  uint16_t output0[vlen] __attribute__ ((aligned (16)));
+  uint16_t output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
+  }
+  memcpy(output01, output0, vlen*sizeof(uint16_t));
+
+  printf("16u_byteswap_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16u_byteswap_aligned16_manual(output0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h
new file mode 100644
index 000000000..e11b23e3f
--- /dev/null
+++ b/volk/lib/qa_16u_byteswap_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H
+#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_16u_byteswap_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc
new file mode 100644
index 000000000..ea637d600
--- /dev/null
+++ b/volk/lib/qa_32f_accumulator_aligned16.cc
@@ -0,0 +1,56 @@
+#include <volk/volk.h>
+#include <qa_32f_accumulator_aligned16.h>
+#include <volk/volk_32f_accumulator_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_accumulator_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_accumulator_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  float accumulator_generic;
+  float accumulator_sse;
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_accumulator_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  //printf("%d...%d\n", output0[i], output01[i]);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4);
+}
+
+#endif
diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h
new file mode 100644
index 000000000..0004d3ff0
--- /dev/null
+++ b/volk/lib/qa_32f_accumulator_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H
+#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_accumulator_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
new file mode 100644
index 000000000..92f35c7ec
--- /dev/null
+++ b/volk/lib/qa_32f_add_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_add_aligned16.h>
+#include <volk/volk_32f_add_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_add_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_add_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_add_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h
new file mode 100644
index 000000000..58e2a151c
--- /dev/null
+++ b/volk/lib/qa_32f_add_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H
+#define INCLUDED_QA_32F_ADD_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_add_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_add_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
new file mode 100644
index 000000000..3c8137004
--- /dev/null
+++ b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
@@ -0,0 +1,59 @@
+#include <volk/volk.h>
+#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
+#include <volk/volk_32f_calc_spectral_noise_floor_aligned16.h>
+#include <cstdlib>
+#include <math.h>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_calc_spectral_noise_floor_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_calc_spectral_noise_floor_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[1] __attribute__ ((aligned (16)));
+  float output01[1] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_calc_spectral_noise_floor_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < 1; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
new file mode 100644
index 000000000..c5dce2c4b
--- /dev/null
+++ b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
+#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc
new file mode 100644
index 000000000..84a4c40c4
--- /dev/null
+++ b/volk/lib/qa_32f_convert_16s_aligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_32f_convert_16s_aligned16.h>
+#include <volk/volk_32f_convert_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_convert_16s_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_convert_16s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_convert_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < vlen; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h
new file mode 100644
index 000000000..fce1eb417
--- /dev/null
+++ b/volk/lib/qa_32f_convert_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H
+#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc
new file mode 100644
index 000000000..9469daed2
--- /dev/null
+++ b/volk/lib/qa_32f_convert_16s_unaligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_32f_convert_16s_unaligned16.h>
+#include <volk/volk_32f_convert_16s_unaligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_convert_16s_unaligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_convert_16s_unaligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_convert_16s_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h
new file mode 100644
index 000000000..492bc80e6
--- /dev/null
+++ b/volk/lib/qa_32f_convert_16s_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H
+#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc
new file mode 100644
index 000000000..ff24c7b0d
--- /dev/null
+++ b/volk/lib/qa_32f_convert_32s_aligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_32f_convert_32s_aligned16.h>
+#include <volk/volk_32f_convert_32s_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_convert_32s_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_convert_32s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  int32_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int32_t output_sse[vlen] __attribute__ ((aligned (16)));
+  int32_t output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_convert_32s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h
new file mode 100644
index 000000000..97d854463
--- /dev/null
+++ b/volk/lib/qa_32f_convert_32s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H
+#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc
new file mode 100644
index 000000000..e63b17994
--- /dev/null
+++ b/volk/lib/qa_32f_convert_32s_unaligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_32f_convert_32s_unaligned16.h>
+#include <volk/volk_32f_convert_32s_unaligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_convert_32s_unaligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_convert_32s_unaligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  int32_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int32_t output_sse[vlen] __attribute__ ((aligned (16)));
+  int32_t output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_convert_32s_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h
new file mode 100644
index 000000000..5d662d86d
--- /dev/null
+++ b/volk/lib/qa_32f_convert_32s_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H
+#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc
new file mode 100644
index 000000000..c546e47de
--- /dev/null
+++ b/volk/lib/qa_32f_convert_64f_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_convert_64f_aligned16.h>
+#include <volk/volk_32f_convert_64f_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_convert_64f_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_convert_64f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  double output_generic[vlen] __attribute__ ((aligned (16)));
+  double output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_convert_64f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h
new file mode 100644
index 000000000..41eb3e094
--- /dev/null
+++ b/volk/lib/qa_32f_convert_64f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H
+#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc
new file mode 100644
index 000000000..24b51f9af
--- /dev/null
+++ b/volk/lib/qa_32f_convert_64f_unaligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_convert_64f_unaligned16.h>
+#include <volk/volk_32f_convert_64f_unaligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_convert_64f_unaligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_convert_64f_unaligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  double output_generic[vlen] __attribute__ ((aligned (16)));
+  double output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_convert_64f_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h
new file mode 100644
index 000000000..4b144f033
--- /dev/null
+++ b/volk/lib/qa_32f_convert_64f_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H
+#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc
new file mode 100644
index 000000000..a3d4d6567
--- /dev/null
+++ b/volk/lib/qa_32f_convert_8s_aligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_32f_convert_8s_aligned16.h>
+#include <volk/volk_32f_convert_8s_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_convert_8s_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_convert_8s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int8_t output_sse[vlen] __attribute__ ((aligned (16)));
+  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_convert_8s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h
new file mode 100644
index 000000000..68a523f34
--- /dev/null
+++ b/volk/lib/qa_32f_convert_8s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H
+#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc
new file mode 100644
index 000000000..d885fd6bb
--- /dev/null
+++ b/volk/lib/qa_32f_convert_8s_unaligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_32f_convert_8s_unaligned16.h>
+#include <volk/volk_32f_convert_8s_unaligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_convert_8s_unaligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_convert_8s_unaligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int8_t output_sse[vlen] __attribute__ ((aligned (16)));
+  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_convert_8s_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
+    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h
new file mode 100644
index 000000000..88d4ff42a
--- /dev/null
+++ b/volk/lib/qa_32f_convert_8s_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H
+#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
new file mode 100644
index 000000000..b20999beb
--- /dev/null
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_divide_aligned16.h>
+#include <volk/volk_32f_divide_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_divide_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_divide_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_divide_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h
new file mode 100644
index 000000000..79d5ae4b8
--- /dev/null
+++ b/volk/lib/qa_32f_divide_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H
+#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_divide_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc
new file mode 100644
index 000000000..98c1f2d99
--- /dev/null
+++ b/volk/lib/qa_32f_dot_prod_aligned16.cc
@@ -0,0 +1,183 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32f_dot_prod_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+#ifndef LV_HAVE_SSE4_1
+
+#ifdef LV_HAVE_SSE3
+void qa_32f_dot_prod_aligned16::t1() {
+  const int vlen = 2046;
+  const int ITER = 100000;
+
+  int i;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  float * input;
+  float * taps;
+  
+  float * result_generic;
+  float * result_sse;
+  float * result_sse3;
+
+  ret = posix_memalign((void**)&input, 16, vlen* sizeof(float));
+  ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float));
+  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
+
+  random_floats((float*)input, vlen);
+  random_floats((float*)taps, vlen);
+  
+  
+  printf("32f_dot_prod_aligned16\n");
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  printf("generic: %f ... sse: %f  ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]);
+
+  for(i = 0; i < ITER; i++){
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
+  }
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse);
+  free(result_sse3);
+  
+}
+#else
+void qa_32f_dot_prod_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#endif /* LV_HAVE_SSE3 */
+
+#else
+
+void qa_32f_dot_prod_aligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  const int vlen = 4095;
+  const int ITER = 100000;
+
+  int i;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  float * input;
+  float * taps;
+  
+  float * result_generic;
+  float * result_sse;
+  float * result_sse3;
+  float * result_sse4_1;
+
+  ret = posix_memalign((void**)&input, 16, vlen * sizeof(float));
+  ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
+  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float));
+
+  random_floats((float*)input, vlen);
+  random_floats((float*)taps, vlen);
+  
+  printf("32f_dot_prod_aligned16\n");
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  //printf("generic: %f ... sse: %f  ... sse3 %f  ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]);
+  for(i =0; i < ITER; i++){
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA);
+  }
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse);
+  free(result_sse3);
+  free(result_sse4_1);
+  
+}
+
+#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h
new file mode 100644
index 000000000..6931a9e98
--- /dev/null
+++ b/volk/lib/qa_32f_dot_prod_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H
+#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc
new file mode 100644
index 000000000..8e97d4249
--- /dev/null
+++ b/volk/lib/qa_32f_dot_prod_unaligned16.cc
@@ -0,0 +1,190 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32f_dot_prod_unaligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+#ifndef LV_HAVE_SSE4_1
+
+#ifdef LV_HAVE_SSE3
+void qa_32f_dot_prod_unaligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  const int vlen = 2046;
+  const int ITER = 100000;
+
+  int i;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  float * input;
+  float * taps;
+  
+  float * result_generic;
+  float * result_sse;
+  float * result_sse3;
+
+  ret = posix_memalign((void**)&input, 16, vlen* sizeof(float));
+  ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float));
+  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
+
+  random_floats((float*)input, vlen);
+  random_floats((float*)taps, vlen);
+  
+  
+  printf("32f_dot_prod_unaligned16\n");
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  printf("generic: %f ... sse: %f  ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]);
+
+  for(i = 0; i < ITER; i++){
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
+  }
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse);
+  free(result_sse3);
+  
+}
+#else
+void qa_32f_dot_prod_unaligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#endif /* LV_HAVE_SSE3 */
+
+#else
+
+void qa_32f_dot_prod_unaligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  const int vlen = 4095;
+  const int ITER = 100000;
+
+  int i;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  float * input;
+  float * taps;
+  
+  float * result_generic;
+  float * result_sse;
+  float * result_sse3;
+  float * result_sse4_1;
+
+  ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float));
+  ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float));
+  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
+  ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float));
+
+  input = &input[1]; // Make sure the buffer is unaligned
+  taps = &taps[1]; // Make sure the buffer is unaligned
+
+  random_floats((float*)input, vlen);
+  random_floats((float*)taps, vlen);
+  
+  printf("32f_dot_prod_unaligned16\n");
+  
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  start = clock();
+  for(i = 0; i < ITER; i++){
+    get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  //printf("generic: %f ... sse: %f  ... sse3 %f  ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]);
+  for(i =0; i < ITER; i++){
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA);
+  }
+
+  free(&input[-1]);
+  free(&taps[-1]);
+  free(result_generic);
+  free(result_sse);
+  free(result_sse3);
+  free(result_sse4_1);
+  
+}
+
+#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h
new file mode 100644
index 000000000..e8bad07fe
--- /dev/null
+++ b/volk/lib/qa_32f_dot_prod_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H
+#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc
new file mode 100644
index 000000000..ca65add28
--- /dev/null
+++ b/volk/lib/qa_32f_fm_detect_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_fm_detect_aligned16.h>
+#include <volk/volk_32f_fm_detect_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_fm_detect_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_fm_detect_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_fm_detect_aligned\n");
+
+  start = clock();
+  float save = 0.1;
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  save = 0.1;
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.h b/volk/lib/qa_32f_fm_detect_aligned16.h
new file mode 100644
index 000000000..a2680c524
--- /dev/null
+++ b/volk/lib/qa_32f_fm_detect_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
+#define INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_fm_detect_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_fm_detect_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_index_max_aligned16.cc b/volk/lib/qa_32f_index_max_aligned16.cc
new file mode 100644
index 000000000..a1c3d4cd1
--- /dev/null
+++ b/volk/lib/qa_32f_index_max_aligned16.cc
@@ -0,0 +1,103 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32f_index_max_aligned16.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#define ERR_DELTA (1e-4)
+#define NUM_ITERS 1000000
+#define VEC_LEN 3097
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  unsigned int i = 0;
+  for (; i < n; i++) {
+
+    buf[i] = uniform () * 32767;
+
+  }
+}
+
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_index_max_aligned16::t1(){
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+
+void qa_32f_index_max_aligned16::t1(){
+ 
+  const int vlen = VEC_LEN;
+
+  
+  volk_runtime_init();
+  
+  volk_environment_init();
+  int ret;
+
+  unsigned int* target_sse4_1;
+  unsigned int* target_sse;
+  unsigned int* target_generic;
+  float* src0 ;
+  
+  
+  unsigned int i_target_sse4_1;
+  target_sse4_1 = &i_target_sse4_1;
+  unsigned int i_target_sse;
+  target_sse = &i_target_sse;
+  unsigned int i_target_generic;
+  target_generic = &i_target_generic;
+
+  ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
+  
+  random_floats((float*)src0, vlen);
+  
+  printf("32f_index_max_aligned16\n");
+
+  clock_t start, end;
+  double total;
+  
+  
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
+  }
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic time: %f\n", total);
+
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
+  }
+  
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse time: %f\n", total);
+  
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, vlen);
+  }
+  
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4.1 time: %f\n", total);
+  
+  
+  printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
+  CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
+  CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
+  
+  free(src0);
+}
+
+#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32f_index_max_aligned16.h b/volk/lib/qa_32f_index_max_aligned16.h
new file mode 100644
index 000000000..8cadffa47
--- /dev/null
+++ b/volk/lib/qa_32f_index_max_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
+#define INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_index_max_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_index_max_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc
new file mode 100644
index 000000000..2a937637f
--- /dev/null
+++ b/volk/lib/qa_32f_interleave_16sc_aligned16.cc
@@ -0,0 +1,75 @@
+#include <volk/volk.h>
+#include <qa_32f_interleave_16sc_aligned16.h>
+#include <volk/volk_32f_interleave_16sc_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32f_interleave_16sc_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_interleave_16sc_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  std::complex<int16_t> output_generic[vlen] __attribute__ ((aligned (16)));
+  std::complex<int16_t> output_sse[vlen] __attribute__ ((aligned (16)));
+  std::complex<int16_t> output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
+  }
+  printf("32f_interleave_16sc_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01);
+
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h
new file mode 100644
index 000000000..8d2914817
--- /dev/null
+++ b/volk/lib/qa_32f_interleave_16sc_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H
+#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc
new file mode 100644
index 000000000..c22dd1046
--- /dev/null
+++ b/volk/lib/qa_32f_interleave_32fc_aligned16.cc
@@ -0,0 +1,62 @@
+#include <volk/volk.h>
+#include <qa_32f_interleave_32fc_aligned16.h>
+#include <volk/volk_32f_interleave_32fc_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_interleave_32fc_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_interleave_32fc_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  std::complex<float> output_generic[vlen] __attribute__ ((aligned (16)));
+  std::complex<float> output_sse[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
+  }
+  printf("32f_interleave_32fc_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h
new file mode 100644
index 000000000..cba518d37
--- /dev/null
+++ b/volk/lib/qa_32f_interleave_32fc_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H
+#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
new file mode 100644
index 000000000..3ef375176
--- /dev/null
+++ b/volk/lib/qa_32f_max_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_max_aligned16.h>
+#include <volk/volk_32f_max_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_max_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_max_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_max_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h
new file mode 100644
index 000000000..d535479f4
--- /dev/null
+++ b/volk/lib/qa_32f_max_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H
+#define INCLUDED_QA_32F_MAX_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_max_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_max_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
new file mode 100644
index 000000000..617e18b24
--- /dev/null
+++ b/volk/lib/qa_32f_min_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_min_aligned16.h>
+#include <volk/volk_32f_min_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_min_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_min_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_min_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h
new file mode 100644
index 000000000..90961ac92
--- /dev/null
+++ b/volk/lib/qa_32f_min_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H
+#define INCLUDED_QA_32F_MIN_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_min_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_min_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
new file mode 100644
index 000000000..c77fe97da
--- /dev/null
+++ b/volk/lib/qa_32f_multiply_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_multiply_aligned16.h>
+#include <volk/volk_32f_multiply_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_multiply_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_multiply_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_multiply_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h
new file mode 100644
index 000000000..7032a2ad4
--- /dev/null
+++ b/volk/lib/qa_32f_multiply_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H
+#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_multiply_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc
new file mode 100644
index 000000000..2954fc3ae
--- /dev/null
+++ b/volk/lib/qa_32f_normalize_aligned16.cc
@@ -0,0 +1,65 @@
+#include <volk/volk.h>
+#include <qa_32f_normalize_aligned16.h>
+#include <volk/volk_32f_normalize_aligned16.h>
+#include <cstdlib>
+#include <cstring>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_normalize_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_normalize_aligned16::t1() {
+  
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  const int vlen = 320001;
+  const int ITERS = 100;
+
+  float* output0;
+  float* output01;
+  ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float));
+  ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float));
+
+  for(int i = 0; i < vlen; ++i) {   
+    output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  memcpy(output01, output0, vlen*sizeof(float));
+  printf("32f_normalize_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    // printf("%e...%e\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
+  }
+
+  free(output0);
+  free(output01);
+}
+
+#endif
diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h
new file mode 100644
index 000000000..7c421eb82
--- /dev/null
+++ b/volk/lib/qa_32f_normalize_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H
+#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_normalize_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc
new file mode 100644
index 000000000..1b331daeb
--- /dev/null
+++ b/volk/lib/qa_32f_power_aligned16.cc
@@ -0,0 +1,95 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32f_power_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+#ifdef LV_HAVE_SSE
+void qa_32f_power_aligned16::t1() {
+
+  
+  volk_runtime_init();
+
+  const int vlen = 2046;
+  const int ITERS = 10000;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  float* input;
+  int i;
+  
+  float* result_generic;
+  float* result_sse;
+  float* result_sse4_1;
+
+  ret = posix_memalign((void**)&input, 16, vlen *  sizeof(float));
+  ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float));
+  ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float));
+  ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float));
+
+  random_floats((float*)input, vlen);
+
+  const float power = 3;
+  
+  printf("32f_power_aligned16\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_power_aligned16_manual(result_generic, input, power, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_power_aligned16_manual(result_sse, input, power, vlen,  "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4.1_time: %f\n", total);
+
+
+  for(i = 0; i < vlen; i++){
+    //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA);
+  }
+
+  free(input);
+  free(result_generic);
+  free(result_sse);
+  
+}
+#else
+void qa_32f_power_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#endif /* LV_HAVE_SSE */
+
diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h
new file mode 100644
index 000000000..d45df4e56
--- /dev/null
+++ b/volk/lib/qa_32f_power_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H
+#define INCLUDED_QA_32F_POWER_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_power_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_power_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
new file mode 100644
index 000000000..a3e6abc18
--- /dev/null
+++ b/volk/lib/qa_32f_sqrt_aligned16.cc
@@ -0,0 +1,59 @@
+#include <volk/volk.h>
+#include <qa_32f_sqrt_aligned16.h>
+#include <volk/volk_32f_sqrt_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_sqrt_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_sqrt_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output01[vlen] __attribute__ ((aligned (16)));
+
+  // No reason to test negative numbers because they result in NaN.
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX));
+  }
+  printf("32f_sqrt_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h
new file mode 100644
index 000000000..e4b99d981
--- /dev/null
+++ b/volk/lib/qa_32f_sqrt_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H
+#define INCLUDED_QA_32F_SQRT_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_sqrt_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc
new file mode 100644
index 000000000..c0f22cdea
--- /dev/null
+++ b/volk/lib/qa_32f_stddev_aligned16.cc
@@ -0,0 +1,74 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32f_stddev_aligned16.h>
+#include <volk/volk_32f_stddev_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_stddev_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_stddev_aligned16::t1() {
+  volk_runtime_init();  
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+
+  float stddev_generic;
+  float stddev_sse;
+  float stddev_sse4_1;
+  float mean = 0;
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    mean += input0[i];
+  }
+  mean /= static_cast<float>(vlen);
+
+  printf("32f_stddev_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  //printf("%d...%d\n", output0[i], output01[i]);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4);
+
+}
+
+#endif
diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h
new file mode 100644
index 000000000..7f8d7a5fc
--- /dev/null
+++ b/volk/lib/qa_32f_stddev_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H
+#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_stddev_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
new file mode 100644
index 000000000..dcad8bcf3
--- /dev/null
+++ b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
@@ -0,0 +1,75 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32f_stddev_and_mean_aligned16.h>
+#include <volk/volk_32f_stddev_and_mean_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_stddev_and_mean_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_stddev_and_mean_aligned16::t1() {
+  volk_runtime_init();  
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  float stddev_generic;
+  float stddev_sse;
+  float stddev_sse4_1;
+  float mean_generic;
+  float mean_sse;
+  float mean_sse4_1;
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_stddev_and_mean_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4);
+
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4);
+
+}
+
+#endif
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h
new file mode 100644
index 000000000..e08bd249a
--- /dev/null
+++ b/volk/lib/qa_32f_stddev_and_mean_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H
+#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc
new file mode 100644
index 000000000..a7e1b5ae3
--- /dev/null
+++ b/volk/lib/qa_32f_subtract_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32f_subtract_aligned16.h>
+#include <volk/volk_32f_subtract_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32f_subtract_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32f_subtract_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+  }
+  printf("32f_subtract_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h
new file mode 100644
index 000000000..97c14f129
--- /dev/null
+++ b/volk/lib/qa_32f_subtract_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H
+#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_subtract_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc
new file mode 100644
index 000000000..494776357
--- /dev/null
+++ b/volk/lib/qa_32f_sum_of_poly_aligned16.cc
@@ -0,0 +1,142 @@
+#include <volk/volk.h>
+#include <qa_32f_sum_of_poly_aligned16.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <math.h>
+
+#define SNR 30.0
+#define CENTER -4.0
+#define CUTOFF -5.595
+#define ERR_DELTA (1e-4)
+#define NUM_ITERS 100000
+#define VEC_LEN 64
+static float uniform() {
+  return ((float) rand() / RAND_MAX);	// uniformly (0, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  unsigned int i = 0;
+  for (; i < n; i++) {
+
+    buf[i] =  uniform () * -SNR/2.0;
+
+  }
+}
+
+
+#ifndef LV_HAVE_SSE3
+
+void qa_32f_sum_of_poly_aligned16::t1(){
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+
+void qa_32f_sum_of_poly_aligned16::t1(){
+  int i = 0;
+  
+  volk_environment_init();
+  int ret;
+
+  const int vlen = VEC_LEN;
+  float cutoff = CUTOFF;
+  
+  float* center_point_array;
+  float* target;
+  float* target_generic;
+  float* src0 ;
+
+
+  ret = posix_memalign((void**)&center_point_array, 16, 24);
+  ret = posix_memalign((void**)&target, 16, 4);
+  ret = posix_memalign((void**)&target_generic, 16, 4);
+  ret = posix_memalign((void**)&src0, 16, (vlen << 2));
+  
+ 
+  random_floats((float*)src0, vlen);
+ 
+  float a = (float)CENTER;
+  float etoa = expf(a);
+  center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 +
+			   (-4.0 * a * a * a)/24.0 + 
+			   (3.0 * a * a)/6.0 +
+			   (-2.0 * a)/2.0 +
+			   (1.0)) * etoa;
+  center_point_array[1] = (//(-10.0 * a * a * a)/120.0 +
+			   (6.0 * a * a)/24.0 + 
+			   (-3.0 * a)/6.0 +
+			   (1.0/2.0)) * etoa;
+  center_point_array[2] = (//(10.0 * a * a)/120.0 +
+			   (-4.0 * a)/24.0 +
+			   (1.0/6.0)) * etoa;
+  center_point_array[3] = (//(-5.0 * a)/120.0 +
+			   (1.0/24.0)) * etoa;
+  //center_point_array[4] = ((1.0)/120.0) * etoa;
+  center_point_array[4] = (//(a * a * a * a * a)/120.0 +
+			   (a * a * a * a)/24.0 +
+			   (a * a * a)/-6.0 +
+			   (a * a)/2.0 +
+			   -a + 1.0) * etoa;
+  
+  printf("32f_sum_of_poly_aligned16\n");
+
+  clock_t start, end;
+  double total;
+  
+  float my_sum = 0.0;
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    float sum = 0.0;
+    for(int l = 0; l < vlen; ++l) {
+      
+      sum += expf(src0[l]);
+      
+    }
+    my_sum = sum;
+  }
+  
+  
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("exp time: %f\n", total);
+  
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    
+    volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic");
+  
+  }
+  
+  
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic time: %f\n", total);
+  
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3");
+  }
+  
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3 approx time: %f\n", total);
+
+
+  
+  printf("exp: %f, sse3: %f\n", my_sum, target[i]);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA);
+  
+
+  free(center_point_array);
+  free(target);
+  free(target_generic);
+  free(src0);
+
+  
+}
+
+#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h
new file mode 100644
index 000000000..67a347f9a
--- /dev/null
+++ b/volk/lib/qa_32f_sum_of_poly_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H
+#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
new file mode 100644
index 000000000..4eba0a3cd
--- /dev/null
+++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
@@ -0,0 +1,85 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32fc_32f_multiply_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+#ifdef LV_HAVE_SSE3
+void qa_32fc_32f_multiply_aligned16::t1() {
+
+  const int vlen = 2046;
+  const int ITERS = 100000;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  std::complex<float>* input;
+  float * taps;
+  int i;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result_sse3;
+
+  ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
+  ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
+  ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float));
+  ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float));
+
+  random_floats((float*)input, vlen * 2);
+  random_floats(taps, vlen);
+  
+  printf("32fc_32f_multiply_aligned16\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  for(i = 0; i < vlen; i++){
+    assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
+  }
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse3);
+  
+}
+#else
+void qa_32fc_32f_multiply_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#endif /* LV_HAVE_SSE3 */
+
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h
new file mode 100644
index 000000000..fc3b3eeb2
--- /dev/null
+++ b/volk/lib/qa_32fc_32f_multiply_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H
+#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
new file mode 100644
index 000000000..64ea65da9
--- /dev/null
+++ b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
@@ -0,0 +1,83 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32fc_32f_power_32fc_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1.5e-3)
+
+//test for sse
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+#ifdef LV_HAVE_SSE
+void qa_32fc_32f_power_32fc_aligned16::t1() {
+
+  const int vlen = 2046;
+  const int ITERS = 10000;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  std::complex<float>* input;
+  int i;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result_sse;
+
+  ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
+  ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float));
+  ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float));
+
+  random_floats((float*)input, vlen * 2);
+
+  const float power = 3.2;
+  
+  printf("32fc_32f_power_32fc_aligned16\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen,  "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(i = 0; i < vlen; i++){
+    assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA);
+  }
+
+  free(input);
+  free(result_generic);
+  free(result_sse);
+  
+}
+#else
+void qa_32fc_32f_power_32fc_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#endif /* LV_HAVE_SSE */
+
diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h
new file mode 100644
index 000000000..464b7b7cc
--- /dev/null
+++ b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H
+#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc
new file mode 100644
index 000000000..a24382d71
--- /dev/null
+++ b/volk/lib/qa_32fc_atan2_32f_aligned16.cc
@@ -0,0 +1,75 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32fc_atan2_32f_aligned16.h>
+#include <volk/volk_32fc_atan2_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32fc_atan2_32f_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_atan2_32f_aligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_atan2_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h
new file mode 100644
index 000000000..9c4dc209a
--- /dev/null
+++ b/volk/lib/qa_32fc_atan2_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H
+#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
new file mode 100644
index 000000000..497914e0a
--- /dev/null
+++ b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
@@ -0,0 +1,137 @@
+#include <volk/volk.h>
+#include <qa_32fc_conjugate_dot_prod_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+
+#if LV_HAVE_SSE && LV_HAVE_64
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_conjugate_dot_prod_aligned16::t1() {
+  const int vlen = 789743;
+  
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse");
+
+  printf("32fc_conjugate_dot_prod_aligned16\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#elif LV_HAVE_SSE && LV_HAVE_32
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_conjugate_dot_prod_aligned16::t1() {
+  const int vlen = 789743;
+  
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32");
+
+  printf("32fc_conjugate_dot_prod_aligned16\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#else
+
+void qa_32fc_conjugate_dot_prod_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
new file mode 100644
index 000000000..507b1769b
--- /dev/null
+++ b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H
+#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
new file mode 100644
index 000000000..0f5a030f5
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk.h>
+#include <qa_32fc_deinterleave_32f_aligned16.h>
+#include <volk/volk_32fc_deinterleave_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32fc_deinterleave_32f_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_deinterleave_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_generic1[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse1[vlen] __attribute__ ((aligned (16)));
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_deinterleave_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h
new file mode 100644
index 000000000..78660e6ad
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H
+#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
new file mode 100644
index 000000000..6e051afbc
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk.h>
+#include <qa_32fc_deinterleave_64f_aligned16.h>
+#include <volk/volk_32fc_deinterleave_64f_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32fc_deinterleave_64f_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_deinterleave_64f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  double output_generic[vlen] __attribute__ ((aligned (16)));
+  double output_generic1[vlen] __attribute__ ((aligned (16)));
+  double output_sse2[vlen] __attribute__ ((aligned (16)));
+  double output_sse21[vlen] __attribute__ ((aligned (16)));
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_deinterleave_64f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h
new file mode 100644
index 000000000..f924b9752
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H
+#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
new file mode 100644
index 000000000..850518524
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32fc_deinterleave_real_16s_aligned16.h>
+#include <volk/volk_32fc_deinterleave_real_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32fc_deinterleave_real_16s_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_deinterleave_real_16s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_deinterleave_real_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
new file mode 100644
index 000000000..68b80f27d
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
+#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
new file mode 100644
index 000000000..321deb184
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32fc_deinterleave_real_32f_aligned16.h>
+#include <volk/volk_32fc_deinterleave_real_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32fc_deinterleave_real_32f_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_deinterleave_real_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_deinterleave_real_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
new file mode 100644
index 000000000..765450bb6
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
+#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
new file mode 100644
index 000000000..aedb2e387
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32fc_deinterleave_real_64f_aligned16.h>
+#include <volk/volk_32fc_deinterleave_real_64f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32fc_deinterleave_real_64f_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_deinterleave_real_64f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  double output_generic[vlen] __attribute__ ((aligned (16)));
+  double output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_deinterleave_real_64f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
new file mode 100644
index 000000000..3e55fb812
--- /dev/null
+++ b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H
+#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc
new file mode 100644
index 000000000..bcf9ea954
--- /dev/null
+++ b/volk/lib/qa_32fc_dot_prod_aligned16.cc
@@ -0,0 +1,214 @@
+#include <volk/volk.h>
+#include <qa_32fc_dot_prod_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <stdio.h>
+
+
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+
+
+#if LV_HAVE_SSE3
+void qa_32fc_dot_prod_aligned16::t1() {
+
+  const int vlen = 2046;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result_sse3;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result_sse3, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result_sse3[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  printf("32fc_dot_prod_aligned16\n");
+  
+  start = clock();
+  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  
+  start = clock();
+  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3");
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
+
+  
+  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse3);
+  
+}
+
+#else
+void qa_32fc_dot_prod_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#endif
+
+#if LV_HAVE_SSE && LV_HAVE_32
+void qa_32fc_dot_prod_aligned16::t2() {
+
+  const int vlen = 2046;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result_sse3;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result_sse3, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result_sse3[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  printf("32fc_dot_prod_aligned16\n");
+  
+  start = clock();
+  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  
+  start = clock();
+  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32");
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_32_time: %f\n", total);
+
+  printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
+
+  
+  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse3);
+  
+}
+
+#else
+void qa_32fc_dot_prod_aligned16::t2() {
+  printf("sse_32 not available... no test performed\n");
+}
+
+#endif
+
+#if LV_HAVE_SSE && LV_HAVE_64
+
+void qa_32fc_dot_prod_aligned16::t3() {
+
+  const int vlen = 2046;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result_sse3;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result_sse3, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result_sse3[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  printf("32fc_dot_prod_aligned16\n");
+  
+  start = clock();
+  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  
+  start = clock();
+  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64");
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_64_time: %f\n", total);
+
+  printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
+
+  
+  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse3);
+  
+}
+
+#else
+void qa_32fc_dot_prod_aligned16::t3() {
+  printf("sse_64 not available... no test performed\n");
+}
+
+
+
+#endif 
diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h
new file mode 100644
index 000000000..4b360db27
--- /dev/null
+++ b/volk/lib/qa_32fc_dot_prod_aligned16.h
@@ -0,0 +1,20 @@
+#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H
+#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+  void t2 ();
+  void t3 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_index_max_aligned16.cc b/volk/lib/qa_32fc_index_max_aligned16.cc
new file mode 100644
index 000000000..4d83f1639
--- /dev/null
+++ b/volk/lib/qa_32fc_index_max_aligned16.cc
@@ -0,0 +1,89 @@
+#include <volk/volk.h>
+#include <qa_32fc_index_max_aligned16.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#define ERR_DELTA (1e-4)
+#define NUM_ITERS 1000000
+#define VEC_LEN 3096
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  unsigned int i = 0;
+  for (; i < n; i++) {
+
+    buf[i] = uniform () * 32767;
+
+  }
+}
+
+
+#ifndef LV_HAVE_SSE3
+
+void qa_32fc_index_max_aligned16::t1(){
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+
+void qa_32fc_index_max_aligned16::t1(){
+ 
+  const int vlen = VEC_LEN;
+  
+  volk_environment_init();
+  int ret;
+  
+  unsigned int* target;
+  unsigned int* target_generic;
+  std::complex<float>* src0 ;
+  
+  
+  unsigned int i_target;
+  target = &i_target;
+  unsigned int i_target_generic;
+  target_generic = &i_target_generic;
+  ret = posix_memalign((void**)&src0, 16, vlen << 3);
+  
+  random_floats((float*)src0, vlen * 2);
+  
+  printf("32fc_index_max_aligned16\n");
+
+  clock_t start, end;
+  double total;
+  
+  
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    volk_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
+  }
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic time: %f\n", total);
+
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+  volk_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
+  }
+  
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3 time: %f\n", total);
+
+  
+  
+  
+  printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
+  
+
+  
+  free(src0);
+}
+
+#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_index_max_aligned16.h b/volk/lib/qa_32fc_index_max_aligned16.h
new file mode 100644
index 000000000..0990bcb1f
--- /dev/null
+++ b/volk/lib/qa_32fc_index_max_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
+#define INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_index_max_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_index_max_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
new file mode 100644
index 000000000..a4be1616b
--- /dev/null
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_32fc_magnitude_16s_aligned16.h>
+#include <volk/volk_32fc_magnitude_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE3
+
+void qa_32fc_magnitude_16s_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_magnitude_16s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_magnitude_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h
new file mode 100644
index 000000000..ffdf1dd9e
--- /dev/null
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H
+#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
new file mode 100644
index 000000000..d69ada408
--- /dev/null
+++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
@@ -0,0 +1,70 @@
+#include <volk/volk.h>
+#include <qa_32fc_magnitude_32f_aligned16.h>
+#include <volk/volk_32fc_magnitude_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE3
+
+void qa_32fc_magnitude_32f_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_magnitude_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse3[vlen] __attribute__ ((aligned (16)));
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_magnitude_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h
new file mode 100644
index 000000000..a2881308c
--- /dev/null
+++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H
+#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc
new file mode 100644
index 000000000..e1f7eab3d
--- /dev/null
+++ b/volk/lib/qa_32fc_multiply_aligned16.cc
@@ -0,0 +1,86 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32fc_multiply_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-3)
+
+//test for sse
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+#ifdef LV_HAVE_SSE3
+void qa_32fc_multiply_aligned16::t1() {
+
+  const int vlen = 2046;
+  const int ITERS = 100000;
+
+  int i;
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result_sse3;
+
+  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float));
+  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float));
+  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
+  ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float));
+  
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  printf("32fc_multiply_aligned16\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  for(i = 0; i < vlen; i++){
+    assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
+  }
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse3);
+  
+}
+#else
+void qa_32fc_multiply_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#endif /* LV_HAVE_SSE3 */
diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h
new file mode 100644
index 000000000..c8abaa8fe
--- /dev/null
+++ b/volk/lib/qa_32fc_multiply_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H
+#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_multiply_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
new file mode 100644
index 000000000..83cdf4b15
--- /dev/null
+++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk.h>
+#include <qa_32fc_power_spectral_density_32f_aligned16.h>
+#include <volk/volk_32fc_power_spectral_density_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse3
+
+#ifndef LV_HAVE_SSE3
+
+void qa_32fc_power_spectral_density_32f_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_power_spectral_density_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse3[vlen] __attribute__ ((aligned (16)));
+
+  const float scalar = vlen;
+  const float rbw = 1.7;
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+  printf("32fc_power_spectral_density_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h
new file mode 100644
index 000000000..26f430bec
--- /dev/null
+++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
+#define INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_power_spectral_density_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_power_spectral_density_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
new file mode 100644
index 000000000..4d1359068
--- /dev/null
+++ b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk.h>
+#include <qa_32fc_power_spectrum_32f_aligned16.h>
+#include <volk/volk_32fc_power_spectrum_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse3
+
+#ifndef LV_HAVE_SSE3
+
+void qa_32fc_power_spectrum_32f_aligned16::t1() {
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_32fc_power_spectrum_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse3[vlen] __attribute__ ((aligned (16)));
+
+  const float scalar = vlen;
+
+  float* inputLoad = (float*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
+  }
+
+  printf("32fc_power_spectrum_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
new file mode 100644
index 000000000..d991223f3
--- /dev/null
+++ b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H
+#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc
new file mode 100644
index 000000000..d9ead8495
--- /dev/null
+++ b/volk/lib/qa_32fc_square_dist_aligned16.cc
@@ -0,0 +1,91 @@
+#include <volk/volk.h>
+#include <qa_32fc_square_dist_aligned16.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#define ERR_DELTA (1e-4)
+#define NUM_ITERS 10000000
+#define VEC_LEN 64
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  unsigned int i = 0;
+  for (; i < n; i++) {
+
+    buf[i] = uniform () * 32767;
+
+  }
+}
+
+
+#ifndef LV_HAVE_SSE3
+
+void qa_32fc_square_dist_aligned16::t1(){
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+
+void qa_32fc_square_dist_aligned16::t1(){
+  int i = 0;
+  
+  const int vlen = VEC_LEN;
+  volk_environment_init();
+  int ret;
+  
+  float* target;
+  float* target_generic;
+  std::complex<float>* src0 ;
+  std::complex<float>* points;
+
+  ret = posix_memalign((void**)&points, 16, vlen << 3);
+  ret = posix_memalign((void**)&target, 16, vlen << 2);
+  ret = posix_memalign((void**)&target_generic, 16, vlen << 2);
+  ret = posix_memalign((void**)&src0, 16, 8);
+  
+  random_floats((float*)points, vlen * 2);
+  random_floats((float*)src0, 2);
+  
+  printf("32fc_square_dist_aligned16\n");
+  
+  clock_t start, end;
+  double total;
+  
+  
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic");
+  }
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic time: %f\n", total);
+
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+  volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3");
+  }
+  
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3 time: %f\n", total);
+
+  
+  
+  for(; i < vlen; ++i) {
+    //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA);
+  }
+
+  free(target);
+  free(target_generic);
+  free(points);
+  free(src0);
+}
+
+#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h
new file mode 100644
index 000000000..9d365d8b0
--- /dev/null
+++ b/volk/lib/qa_32fc_square_dist_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H
+#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
new file mode 100644
index 000000000..f923d1d5c
--- /dev/null
+++ b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
@@ -0,0 +1,96 @@
+#include <volk/volk.h>
+#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define ERR_DELTA .0001
+#define NUM_ITERS 10000000
+#define VEC_LEN 64
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  unsigned int i = 0;
+  for (; i < n; i++) {
+
+    buf[i] = uniform () * 32767;
+
+  }
+}
+
+
+#ifndef LV_HAVE_SSE3
+
+void qa_32fc_square_dist_scalar_mult_aligned16::t1(){
+  printf("sse3 not available... no test performed\n");
+}
+
+#else
+
+
+void qa_32fc_square_dist_scalar_mult_aligned16::t1(){
+  int i = 0;
+  
+  const int vlen = VEC_LEN;
+  
+  volk_environment_init();
+  int ret;
+  
+  float* target;
+  float* target_generic;
+  std::complex<float>* src0 ;
+  std::complex<float>* points;
+  float scalar;
+
+  ret = posix_memalign((void**)&points, 16, vlen << 3);
+  ret = posix_memalign((void**)&target, 16, vlen << 2);
+  ret = posix_memalign((void**)&target_generic, 16, vlen << 2);
+  ret = posix_memalign((void**)&src0, 16, 8);
+  
+  random_floats((float*)points, vlen * 2);
+  random_floats((float*)src0, 2);
+  random_floats(&scalar, 1);
+  
+  printf("32fc_square_dist_scalar_mult_aligned16\n");
+  
+  clock_t start, end;
+  double total;
+  
+  
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic");
+  }
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic time: %f\n", total);
+  
+  start = clock();
+  for(int k = 0; k < NUM_ITERS; ++k) {
+    volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3");
+  }
+  
+  end = clock();  
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse3 time: %f\n", total);
+
+  
+  
+  for(i = 0; i < vlen; ++i) {
+    printf("generic: %f, sse3: %f\n", target_generic[i], target[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA);
+  }
+
+  free(target);
+  free(target_generic);
+  free(points);
+  free(src0);
+}
+
+#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
new file mode 100644
index 000000000..ac4e3c45b
--- /dev/null
+++ b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
+#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc
new file mode 100644
index 000000000..72d05cf6f
--- /dev/null
+++ b/volk/lib/qa_32s_and_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32s_and_aligned16.h>
+#include <volk/volk_32s_and_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32s_and_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32s_and_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int32_t input0[vlen] __attribute__ ((aligned (16)));
+  int32_t input1[vlen] __attribute__ ((aligned (16)));
+  
+  int32_t output0[vlen] __attribute__ ((aligned (16)));
+  int32_t output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
+    input1[i] = ((int32_t) (rand() - (RAND_MAX/2)));
+  }
+  printf("32s_and_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h
new file mode 100644
index 000000000..dfcb47c63
--- /dev/null
+++ b/volk/lib/qa_32s_and_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H
+#define INCLUDED_QA_32S_AND_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32s_and_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32s_and_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc
new file mode 100644
index 000000000..eab3fe016
--- /dev/null
+++ b/volk/lib/qa_32s_convert_32f_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32s_convert_32f_aligned16.h>
+#include <volk/volk_32s_convert_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32s_convert_32f_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32s_convert_32f_aligned16::t1() {
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+
+  int32_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
+  }
+  printf("32s_convert_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h
new file mode 100644
index 000000000..efd2a2eea
--- /dev/null
+++ b/volk/lib/qa_32s_convert_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H
+#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc
new file mode 100644
index 000000000..0e504cfa1
--- /dev/null
+++ b/volk/lib/qa_32s_convert_32f_unaligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32s_convert_32f_unaligned16.h>
+#include <volk/volk_32s_convert_32f_unaligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32s_convert_32f_unaligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32s_convert_32f_unaligned16::t1() {
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+
+  int32_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
+  }
+  printf("32s_convert_32f_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h
new file mode 100644
index 000000000..5006f5fd8
--- /dev/null
+++ b/volk/lib/qa_32s_convert_32f_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H
+#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
new file mode 100644
index 000000000..e09dfb91c
--- /dev/null
+++ b/volk/lib/qa_32s_or_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_32s_or_aligned16.h>
+#include <volk/volk_32s_or_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE
+
+void qa_32s_or_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_32s_or_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int32_t input0[vlen] __attribute__ ((aligned (16)));
+  int32_t input1[vlen] __attribute__ ((aligned (16)));
+  
+  int32_t output0[vlen] __attribute__ ((aligned (16)));
+  int32_t output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
+    input1[i] = ((int32_t) (rand() - (RAND_MAX/2)));
+  }
+  printf("32s_or_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h
new file mode 100644
index 000000000..9e949eb52
--- /dev/null
+++ b/volk/lib/qa_32s_or_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H
+#define INCLUDED_QA_32S_OR_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32s_or_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32s_or_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */
diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc
new file mode 100644
index 000000000..8b1023876
--- /dev/null
+++ b/volk/lib/qa_32u_byteswap_aligned16.cc
@@ -0,0 +1,59 @@
+#include <volk/volk.h>
+#include <qa_32u_byteswap_aligned16.h>
+#include <volk/volk_32u_byteswap_aligned16.h>
+#include <cstdlib>
+#include <cstring>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE2
+
+void qa_32u_byteswap_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32u_byteswap_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100001;
+  
+  uint32_t output0[vlen] __attribute__ ((aligned (16)));
+  uint32_t output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
+  }
+  memcpy(output01, output0, vlen*sizeof(uint32_t));
+  printf("32u_byteswap_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32u_byteswap_aligned16_manual(output0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h
new file mode 100644
index 000000000..47bad4c3d
--- /dev/null
+++ b/volk/lib/qa_32u_byteswap_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H
+#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32u_byteswap_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc
new file mode 100644
index 000000000..49fcddeb2
--- /dev/null
+++ b/volk/lib/qa_32u_popcnt_aligned16.cc
@@ -0,0 +1,61 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_32u_popcnt_aligned16.h>
+#include <volk/volk_32u_popcnt_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE4_2
+
+void qa_32u_popcnt_aligned16::t1() {
+  printf("sse4.2 not available... no test performed\n");
+}
+
+#else
+
+void qa_32u_popcnt_aligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+
+  const int ITERS = 10000000;
+  uint32_t input0 __attribute__ ((aligned (16)));
+  
+  uint32_t output0 __attribute__ ((aligned (16)));
+  uint32_t output01 __attribute__ ((aligned (16)));
+
+    input0 = ((uint32_t) (rand() - (RAND_MAX/2)));
+    output0 = 0;
+    output01 = 0;
+
+  printf("32u_popcnt_aligned\n");
+
+  start = clock();
+  uint32_t ret = 0;
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32u_popcnt_aligned16_manual(&ret, input0, "generic");
+    output0 += ret;
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  ret = 0;
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_32u_popcnt_aligned16(&ret, input0);
+    output01 += ret;
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4.2_time: %f\n", total);
+
+  
+  CPPUNIT_ASSERT_EQUAL(output0, output01);
+}
+
+#endif
diff --git a/volk/lib/qa_32u_popcnt_aligned16.h b/volk/lib/qa_32u_popcnt_aligned16.h
new file mode 100644
index 000000000..fa1dc1041
--- /dev/null
+++ b/volk/lib/qa_32u_popcnt_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32U_POPCNT_ALIGNED16_H
+#define INCLUDED_QA_32U_POPCNT_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32u_popcnt_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32u_popcnt_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32U_POPCNT_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc
new file mode 100644
index 000000000..0eaebf00a
--- /dev/null
+++ b/volk/lib/qa_64f_convert_32f_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_64f_convert_32f_aligned16.h>
+#include <volk/volk_64f_convert_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_64f_convert_32f_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_64f_convert_32f_aligned16::t1() {
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+
+  double input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
+  }
+  printf("64f_convert_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h
new file mode 100644
index 000000000..95d79f73d
--- /dev/null
+++ b/volk/lib/qa_64f_convert_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H
+#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc
new file mode 100644
index 000000000..dcf94bd27
--- /dev/null
+++ b/volk/lib/qa_64f_convert_32f_unaligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_64f_convert_32f_unaligned16.h>
+#include <volk/volk_64f_convert_32f_unaligned16.h>
+#include <cstdlib>
+
+//test for sse2
+
+#ifndef LV_HAVE_SSE2
+
+void qa_64f_convert_32f_unaligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_64f_convert_32f_unaligned16::t1() {
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+
+  double input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse2[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
+  }
+  printf("64f_convert_32f_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h
new file mode 100644
index 000000000..430327e81
--- /dev/null
+++ b/volk/lib/qa_64f_convert_32f_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H
+#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc
new file mode 100644
index 000000000..41ab078b0
--- /dev/null
+++ b/volk/lib/qa_64f_max_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_64f_max_aligned16.h>
+#include <volk/volk_64f_max_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE2
+
+void qa_64f_max_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_64f_max_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  double input0[vlen] __attribute__ ((aligned (16)));
+  double input1[vlen] __attribute__ ((aligned (16)));
+  
+  double output0[vlen] __attribute__ ((aligned (16)));
+  double output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
+    input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
+  }
+  printf("64f_max_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h
new file mode 100644
index 000000000..7cbd4d4c1
--- /dev/null
+++ b/volk/lib/qa_64f_max_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H
+#define INCLUDED_QA_64F_MAX_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_64f_max_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_64f_max_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc
new file mode 100644
index 000000000..b4664d065
--- /dev/null
+++ b/volk/lib/qa_64f_min_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_64f_min_aligned16.h>
+#include <volk/volk_64f_min_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE2
+
+void qa_64f_min_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_64f_min_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  double input0[vlen] __attribute__ ((aligned (16)));
+  double input1[vlen] __attribute__ ((aligned (16)));
+  
+  double output0[vlen] __attribute__ ((aligned (16)));
+  double output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
+    input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
+  }
+  printf("64f_min_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h
new file mode 100644
index 000000000..a0e95395f
--- /dev/null
+++ b/volk/lib/qa_64f_min_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H
+#define INCLUDED_QA_64F_MIN_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_64f_min_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_64f_min_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc
new file mode 100644
index 000000000..4f5d4d02b
--- /dev/null
+++ b/volk/lib/qa_64u_byteswap_aligned16.cc
@@ -0,0 +1,59 @@
+#include <volk/volk.h>
+#include <qa_64u_byteswap_aligned16.h>
+#include <volk/volk_64u_byteswap_aligned16.h>
+#include <cstdlib>
+#include <cstring>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE2
+
+void qa_64u_byteswap_aligned16::t1() {
+  printf("sse2 not available... no test performed\n");
+}
+
+#else
+
+void qa_64u_byteswap_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100001;
+  
+  uint64_t output0[vlen] __attribute__ ((aligned (16)));
+  uint64_t output01[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
+  }
+  memcpy(output01, output0, vlen*sizeof(uint64_t));
+  printf("64u_byteswap_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64u_byteswap_aligned16_manual(output0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse2_time: %f\n", total);
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h
new file mode 100644
index 000000000..a4fa0c983
--- /dev/null
+++ b/volk/lib/qa_64u_byteswap_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H
+#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_64u_byteswap_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc
new file mode 100644
index 000000000..bce9ff6c2
--- /dev/null
+++ b/volk/lib/qa_64u_popcnt_aligned16.cc
@@ -0,0 +1,61 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_64u_popcnt_aligned16.h>
+#include <volk/volk_64u_popcnt_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE4_2
+
+void qa_64u_popcnt_aligned16::t1() {
+  printf("sse4.2 not available... no test performed\n");
+}
+
+#else
+
+void qa_64u_popcnt_aligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+
+  const int ITERS = 10000000;
+  uint64_t input0 __attribute__ ((aligned (16)));
+  
+  uint64_t output0 __attribute__ ((aligned (16)));
+  uint64_t output01 __attribute__ ((aligned (16)));
+
+    input0 = ((uint64_t) (rand() - (RAND_MAX/2)));
+    output0 = 0;
+    output01 = 0;
+
+  printf("64u_popcnt_aligned\n");
+
+  start = clock();
+  uint64_t ret = 0;
+  for(int count = 0; count < ITERS; ++count) {
+    volk_64u_popcnt_aligned16_manual(&ret, input0, "generic");
+    output0 += ret;
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  ret = 0;
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_64u_popcnt_aligned16(&ret, input0);
+    output01 += ret;
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4.2_time: %f\n", total);
+
+  
+  CPPUNIT_ASSERT_EQUAL(output0, output01);
+}
+
+#endif
diff --git a/volk/lib/qa_64u_popcnt_aligned16.h b/volk/lib/qa_64u_popcnt_aligned16.h
new file mode 100644
index 000000000..217822d6e
--- /dev/null
+++ b/volk/lib/qa_64u_popcnt_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_64U_POPCNT_ALIGNED16_H
+#define INCLUDED_QA_64U_POPCNT_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_64u_popcnt_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_64u_popcnt_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_64U_POPCNT_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc
new file mode 100644
index 000000000..35f08fb81
--- /dev/null
+++ b/volk/lib/qa_8s_convert_16s_aligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8s_convert_16s_aligned16.h>
+#include <volk/volk_8s_convert_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse4_1
+
+#ifndef LV_HAVE_SSE4_1
+
+void qa_8s_convert_16s_aligned16::t1() {
+  printf("sse4.1 not available... no test performed\n");
+}
+
+#else
+
+void qa_8s_convert_16s_aligned16::t1() {
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int8_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
+  }
+  printf("8s_convert_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h
new file mode 100644
index 000000000..38739fc96
--- /dev/null
+++ b/volk/lib/qa_8s_convert_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H
+#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc
new file mode 100644
index 000000000..bb326f818
--- /dev/null
+++ b/volk/lib/qa_8s_convert_16s_unaligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8s_convert_16s_unaligned16.h>
+#include <volk/volk_8s_convert_16s_unaligned16.h>
+#include <cstdlib>
+
+//test for sse4_1
+
+#ifndef LV_HAVE_SSE4_1
+
+void qa_8s_convert_16s_unaligned16::t1() {
+  printf("sse4.1 not available... no test performed\n");
+}
+
+#else
+
+void qa_8s_convert_16s_unaligned16::t1() {
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int8_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
+  }
+  printf("8s_convert_16s_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h
new file mode 100644
index 000000000..d39fffc35
--- /dev/null
+++ b/volk/lib/qa_8s_convert_16s_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H
+#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc
new file mode 100644
index 000000000..522da0b9d
--- /dev/null
+++ b/volk/lib/qa_8s_convert_32f_aligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8s_convert_32f_aligned16.h>
+#include <volk/volk_8s_convert_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse4.1
+
+#ifndef LV_HAVE_SSE4_1
+
+void qa_8s_convert_32f_aligned16::t1() {
+  printf("sse4_1 not available... no test performed\n");
+}
+
+#else
+
+void qa_8s_convert_32f_aligned16::t1() {
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int8_t input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
+  }
+  printf("8s_convert_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h
new file mode 100644
index 000000000..7f8401d42
--- /dev/null
+++ b/volk/lib/qa_8s_convert_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H
+#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc
new file mode 100644
index 000000000..ea1fb7c74
--- /dev/null
+++ b/volk/lib/qa_8s_convert_32f_unaligned16.cc
@@ -0,0 +1,63 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8s_convert_32f_unaligned16.h>
+#include <volk/volk_8s_convert_32f_unaligned16.h>
+#include <cstdlib>
+
+//test for sse4.1
+
+#ifndef LV_HAVE_SSE4_1
+
+void qa_8s_convert_32f_unaligned16::t1() {
+  printf("sse4_1 not available... no test performed\n");
+}
+
+#else
+
+void qa_8s_convert_32f_unaligned16::t1() {
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  int8_t input0[vlen+1] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen+1] __attribute__ ((aligned (16)));
+  float output_sse4_1[vlen+1] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
+  }
+  printf("8s_convert_32f_unaligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%e...%e\n", output_generic[i], output_sse4_1[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h
new file mode 100644
index 000000000..aad2f8c22
--- /dev/null
+++ b/volk/lib/qa_8s_convert_32f_unaligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H
+#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
new file mode 100644
index 000000000..823e7fe2e
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
@@ -0,0 +1,67 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8sc_deinterleave_16s_aligned16.h>
+#include <volk/volk_8sc_deinterleave_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE4_1
+
+void qa_8sc_deinterleave_16s_aligned16::t1() {
+  printf("sse4_1 not available... no test performed\n");
+}
+
+#else
+
+void qa_8sc_deinterleave_16s_aligned16::t1() {
+
+  
+  volk_runtime_init();  
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse4_11[vlen] __attribute__ ((aligned (16)));
+
+  int8_t* loadInput = (int8_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+  }
+  printf("8sc_deinterleave_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4.1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse4_1[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_sse4_11[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h
new file mode 100644
index 000000000..9c99fed70
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H
+#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
new file mode 100644
index 000000000..fb580516c
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
@@ -0,0 +1,134 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8sc_deinterleave_32f_aligned16.h>
+#include <volk/volk_8sc_deinterleave_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE4_1
+
+#ifndef LV_HAVE_SSE
+
+void qa_8sc_deinterleave_32f_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_8sc_deinterleave_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_generic1[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse1[vlen] __attribute__ ((aligned (16)));
+
+  int8_t* loadInput = (int8_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+  }
+  printf("8sc_deinterleave_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif /* LV_HAVE_SSE */
+
+#else
+
+void qa_8sc_deinterleave_32f_aligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_generic1[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+  float output_sse1[vlen] __attribute__ ((aligned (16)));
+  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
+  float output_sse14_1[vlen] __attribute__ ((aligned (16)));
+
+  int8_t* loadInput = (int8_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+  }
+  printf("8sc_deinterleave_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4.1_time: %f\n", total);
+
+  for(int i = 0; i < vlen; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max<double>((output_generic[i])*1e-4, 1e-4));
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
+
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
+  }
+}
+
+
+#endif /* LV_HAVE_SSE4_1 */
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h
new file mode 100644
index 000000000..63b5fdadb
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H
+#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
new file mode 100644
index 000000000..1cc844b52
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
@@ -0,0 +1,64 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8sc_deinterleave_real_16s_aligned16.h>
+#include <volk/volk_8sc_deinterleave_real_16s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE4_1
+
+void qa_8sc_deinterleave_real_16s_aligned16::t1() {
+  printf("sse4_1 not available... no test performed\n");
+}
+
+#else
+
+void qa_8sc_deinterleave_real_16s_aligned16::t1() {
+
+  
+  volk_runtime_init();  
+
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
+
+  int8_t* loadInput = (int8_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+  }
+  printf("8sc_deinterleave_real_16s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4.1_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse4_1[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
new file mode 100644
index 000000000..02050926f
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
+#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
new file mode 100644
index 000000000..10e537cde
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
@@ -0,0 +1,138 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8sc_deinterleave_real_32f_aligned16.h>
+#include <volk/volk_8sc_deinterleave_real_32f_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSE4_1
+
+#ifndef LV_HAVE_SSE
+
+void qa_8sc_deinterleave_real_32f_aligned16::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#else
+
+void qa_8sc_deinterleave_real_32f_aligned16::t1() {
+  
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_sse[vlen] __attribute__ ((aligned (16)));
+
+  int8_t* loadInput = (int8_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+  }
+  printf("8sc_deinterleave_real_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+  }
+}
+
+#endif /* LV_HAVE_SSE */
+
+#else
+
+void qa_8sc_deinterleave_real_32f_aligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int8_t> *input0;
+  
+  float* output_generic;
+  float* output_sse;
+  float* output_sse4_1;
+
+  ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t));
+  ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float));
+  ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float));
+  ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float));
+
+  int8_t* loadInput = (int8_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0);
+  }
+
+  printf("8sc_deinterleave_real_32f_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse_time: %f\n", total);
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
+  }
+
+  free(input0);
+  free(output_generic);
+  free(output_sse);
+  free(output_sse4_1);
+}
+
+#endif /* LV_HAVE_SSE4_1 */
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
new file mode 100644
index 000000000..93338e488
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
+#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
new file mode 100644
index 000000000..d84df8119
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
@@ -0,0 +1,60 @@
+#include <volk/volk.h>
+#include <qa_8sc_deinterleave_real_8s_aligned16.h>
+#include <volk/volk_8sc_deinterleave_real_8s_aligned16.h>
+#include <cstdlib>
+
+//test for sse
+
+#ifndef LV_HAVE_SSSE3
+
+void qa_8sc_deinterleave_real_8s_aligned16::t1() {
+  printf("ssse3 not available... no test performed\n");
+}
+
+#else
+
+void qa_8sc_deinterleave_real_8s_aligned16::t1() {
+  
+  volk_environment_init();
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 100000;
+  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+
+  int8_t* loadInput = (int8_t*)input0;
+  for(int i = 0; i < vlen*2; ++i) {   
+    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+  }
+  printf("8sc_deinterleave_real_8s_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("ssse3_time: %f\n", total);
+
+  for(int i = 0; i < 1; ++i) {
+    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
+    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
+  }
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
+  }
+}
+
+#endif
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
new file mode 100644
index 000000000..92fc0dd4a
--- /dev/null
+++ b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
+#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
new file mode 100644
index 000000000..d64eac8ce
--- /dev/null
+++ b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
@@ -0,0 +1,87 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+#ifndef LV_HAVE_SSE4_1
+
+void qa_8sc_multiply_conjugate_16sc_aligned16::t1() {
+  printf("sse4.1 not available... no test performed\n");
+}
+
+#else
+
+void qa_8sc_multiply_conjugate_16sc_aligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  const int vlen = 2046;
+  const int ITERS = 100000;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  std::complex<int8_t>* input;
+  std::complex<int8_t>* taps;
+  
+  std::complex<int16_t>* result_generic;
+  std::complex<int16_t>* result_sse4_1;
+  int i;
+  int8_t* inputInt8_T;
+  int8_t* tapsInt8_T;
+
+  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t));
+  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t));
+  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t));
+  ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t));
+  
+  inputInt8_T = (int8_t*)input;
+  tapsInt8_T = (int8_t*)taps;
+  for(int i = 0; i < vlen*2; ++i) {   
+    inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+    tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+  }
+  
+  printf("8sc_multiply_conjugate_16sc_aligned16\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex<int16_t>*)result_generic, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex<int16_t>*)result_sse4_1, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(i = 0; i < vlen; i++){
+    //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i]));
+
+    assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA);
+  }
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse4_1);
+  
+}
+
+#endif /*LV_HAVE_SSE4_1*/
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
new file mode 100644
index 000000000..0e78a5eca
--- /dev/null
+++ b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H
+#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
new file mode 100644
index 000000000..c27f0e0ca
--- /dev/null
+++ b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
@@ -0,0 +1,87 @@
+#include <volk/volk_runtime.h>
+#include <volk/volk.h>
+#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+#ifndef LV_HAVE_SSE4_1
+
+void qa_8sc_multiply_conjugate_32fc_aligned16::t1() {
+  printf("sse4.1 not available... no test performed\n");
+}
+
+#else
+
+void qa_8sc_multiply_conjugate_32fc_aligned16::t1() {
+  
+  
+  volk_runtime_init();
+
+  const int vlen = 2046;
+  const int ITERS = 100000;
+
+  volk_environment_init();
+  int ret;
+  clock_t start, end;
+  double total;
+  std::complex<int8_t>* input;
+  std::complex<int8_t>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result_sse4_1;
+  int i;
+  int8_t* inputInt8_T;
+  int8_t* tapsInt8_T;
+
+  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t));
+  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t));
+  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
+  ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float));
+  
+
+  inputInt8_T = (int8_t*)input;
+  tapsInt8_T = (int8_t*)taps;
+  for(int i = 0; i < vlen*2; ++i) {   
+    inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+    tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
+  }
+  
+  printf("8sc_multiply_conjugate_32fc_aligned16\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen,  "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen);
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("sse4_1_time: %f\n", total);
+
+  for(i = 0; i < vlen; i++){
+    //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i]));
+    assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA);
+  }
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result_sse4_1);
+  
+}
+
+#endif /*LV_HAVE_SSE4_1*/
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
new file mode 100644
index 000000000..eb9ae309c
--- /dev/null
+++ b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H
+#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
new file mode 100644
index 000000000..c3c27b69b
--- /dev/null
+++ b/volk/lib/qa_volk.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * This class gathers together all the test cases for the example
+ * directory into a single test suite.  As you create new test cases,
+ * add them here.
+ */
+
+#include <qa_volk.h>
+#include <qa_16s_quad_max_star_aligned16.h>
+#include <qa_32fc_dot_prod_aligned16.h>
+#include <qa_32fc_square_dist_aligned16.h>
+#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
+#include <qa_32f_sum_of_poly_aligned16.h>
+#include <qa_32fc_index_max_aligned16.h>
+#include <qa_32f_index_max_aligned16.h>
+#include <qa_32fc_conjugate_dot_prod_aligned16.h>
+#include <qa_16s_permute_and_scalar_add_aligned16.h>
+#include <qa_16s_branch_4_state_8_aligned16.h>
+#include <qa_16s_max_star_horizontal_aligned16.h>
+#include <qa_16s_max_star_aligned16.h>
+#include <qa_16s_add_quad_aligned16.h>
+#include <qa_32f_add_aligned16.h>
+#include <qa_32f_subtract_aligned16.h>
+#include <qa_32f_max_aligned16.h>
+#include <qa_32f_min_aligned16.h>
+#include <qa_64f_max_aligned16.h>
+#include <qa_64f_min_aligned16.h>
+#include <qa_32s_and_aligned16.h>
+#include <qa_32s_or_aligned16.h>
+#include <qa_32f_dot_prod_aligned16.h>
+#include <qa_32f_dot_prod_unaligned16.h>
+#include <qa_32f_fm_detect_aligned16.h>
+#include <qa_32fc_32f_multiply_aligned16.h>
+#include <qa_32fc_multiply_aligned16.h>
+#include <qa_32f_divide_aligned16.h>
+#include <qa_32f_multiply_aligned16.h>
+#include <qa_32f_sqrt_aligned16.h>
+#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
+#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
+#include <qa_32u_popcnt_aligned16.h>
+#include <qa_64u_popcnt_aligned16.h>
+#include <qa_16u_byteswap_aligned16.h>
+#include <qa_32u_byteswap_aligned16.h>
+#include <qa_64u_byteswap_aligned16.h>
+#include <qa_32f_normalize_aligned16.h>
+#include <qa_16sc_deinterleave_16s_aligned16.h>
+#include <qa_16sc_deinterleave_32f_aligned16.h>
+#include <qa_16sc_deinterleave_real_16s_aligned16.h>
+#include <qa_16sc_deinterleave_real_32f_aligned16.h>
+#include <qa_16sc_deinterleave_real_8s_aligned16.h>
+#include <qa_16sc_magnitude_16s_aligned16.h>
+#include <qa_16sc_magnitude_32f_aligned16.h>
+#include <qa_32fc_deinterleave_32f_aligned16.h>
+#include <qa_32fc_deinterleave_64f_aligned16.h>
+#include <qa_32fc_deinterleave_real_16s_aligned16.h>
+#include <qa_32fc_deinterleave_real_32f_aligned16.h>
+#include <qa_32fc_deinterleave_real_64f_aligned16.h>
+#include <qa_32fc_magnitude_16s_aligned16.h>
+#include <qa_32fc_magnitude_32f_aligned16.h>
+#include <qa_32f_interleave_16sc_aligned16.h>
+#include <qa_32f_interleave_32fc_aligned16.h>
+#include <qa_8sc_deinterleave_16s_aligned16.h>
+#include <qa_8sc_deinterleave_32f_aligned16.h>
+#include <qa_8sc_deinterleave_real_16s_aligned16.h>
+#include <qa_8sc_deinterleave_real_32f_aligned16.h>
+#include <qa_8sc_deinterleave_real_8s_aligned16.h>
+#include <qa_16s_convert_32f_aligned16.h>
+#include <qa_16s_convert_32f_unaligned16.h>
+#include <qa_16s_convert_8s_aligned16.h>
+#include <qa_16s_convert_8s_unaligned16.h>
+#include <qa_32f_convert_16s_aligned16.h>
+#include <qa_32f_convert_16s_unaligned16.h>
+#include <qa_32f_convert_32s_aligned16.h>
+#include <qa_32f_convert_32s_unaligned16.h>
+#include <qa_32f_convert_64f_aligned16.h>
+#include <qa_32f_convert_64f_unaligned16.h>
+#include <qa_32f_convert_8s_aligned16.h>
+#include <qa_32f_convert_8s_unaligned16.h>
+#include <qa_32s_convert_32f_aligned16.h>
+#include <qa_32s_convert_32f_unaligned16.h>
+#include <qa_64f_convert_32f_aligned16.h>
+#include <qa_64f_convert_32f_unaligned16.h>
+#include <qa_8s_convert_16s_aligned16.h>
+#include <qa_8s_convert_16s_unaligned16.h>
+#include <qa_8s_convert_32f_aligned16.h>
+#include <qa_8s_convert_32f_unaligned16.h>
+#include <qa_32fc_32f_power_32fc_aligned16.h>
+#include <qa_32f_power_aligned16.h>
+#include <qa_32fc_atan2_32f_aligned16.h>
+#include <qa_32fc_power_spectral_density_32f_aligned16.h> 
+#include <qa_32fc_power_spectrum_32f_aligned16.h>
+#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
+#include <qa_32f_accumulator_aligned16.h>
+#include <qa_32f_stddev_aligned16.h>
+#include <qa_32f_stddev_and_mean_aligned16.h>
+
+CppUnit::TestSuite *
+qa_volk::suite()
+{
+  CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
+
+  s->addTest(qa_16s_quad_max_star_aligned16::suite());
+  s->addTest(qa_32fc_dot_prod_aligned16::suite());
+  s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
+  s->addTest(qa_32fc_square_dist_aligned16::suite());
+  s->addTest(qa_32f_sum_of_poly_aligned16::suite());
+  s->addTest(qa_32fc_index_max_aligned16::suite());
+  s->addTest(qa_32f_index_max_aligned16::suite());
+  s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite());
+  s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite());
+  s->addTest(qa_16s_branch_4_state_8_aligned16::suite());
+  s->addTest(qa_16s_max_star_horizontal_aligned16::suite());
+  s->addTest(qa_16s_max_star_aligned16::suite());
+  s->addTest(qa_16s_add_quad_aligned16::suite());
+  s->addTest(qa_32f_add_aligned16::suite());
+  s->addTest(qa_32f_subtract_aligned16::suite());
+  s->addTest(qa_32f_max_aligned16::suite());
+  s->addTest(qa_32f_min_aligned16::suite());
+  s->addTest(qa_64f_max_aligned16::suite());
+  s->addTest(qa_64f_min_aligned16::suite());
+  s->addTest(qa_32s_and_aligned16::suite());
+  s->addTest(qa_32s_or_aligned16::suite());
+  s->addTest(qa_32f_dot_prod_aligned16::suite());
+  s->addTest(qa_32f_dot_prod_unaligned16::suite());
+  s->addTest(qa_32f_fm_detect_aligned16::suite());
+  s->addTest(qa_32fc_32f_multiply_aligned16::suite());
+  s->addTest(qa_32fc_multiply_aligned16::suite());
+  s->addTest(qa_32f_divide_aligned16::suite());
+  s->addTest(qa_32f_multiply_aligned16::suite());
+  s->addTest(qa_32f_sqrt_aligned16::suite());
+  s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite());
+  s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite());
+  s->addTest(qa_32u_popcnt_aligned16::suite());
+  s->addTest(qa_64u_popcnt_aligned16::suite());
+  s->addTest(qa_16u_byteswap_aligned16::suite());
+  s->addTest(qa_32u_byteswap_aligned16::suite());
+  s->addTest(qa_64u_byteswap_aligned16::suite());
+  s->addTest(qa_32f_normalize_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_16s_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_32f_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite());
+  s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite());
+  s->addTest(qa_16sc_magnitude_16s_aligned16::suite());
+  s->addTest(qa_16sc_magnitude_32f_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_32f_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_64f_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite());
+  s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite());
+  s->addTest(qa_32fc_magnitude_16s_aligned16::suite());
+  s->addTest(qa_32fc_magnitude_32f_aligned16::suite());
+  s->addTest(qa_32f_interleave_16sc_aligned16::suite());
+  s->addTest(qa_32f_interleave_32fc_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_16s_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_32f_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite());
+  s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite());
+  s->addTest(qa_16s_convert_32f_aligned16::suite());
+  s->addTest(qa_16s_convert_32f_unaligned16::suite());
+  s->addTest(qa_16s_convert_8s_aligned16::suite());
+  s->addTest(qa_16s_convert_8s_unaligned16::suite());
+  s->addTest(qa_32f_convert_16s_aligned16::suite());
+  s->addTest(qa_32f_convert_16s_unaligned16::suite());
+  s->addTest(qa_32f_convert_32s_aligned16::suite());
+  s->addTest(qa_32f_convert_32s_unaligned16::suite());
+  s->addTest(qa_32f_convert_64f_aligned16::suite());
+  s->addTest(qa_32f_convert_64f_unaligned16::suite());
+  s->addTest(qa_32f_convert_8s_aligned16::suite());
+  s->addTest(qa_32f_convert_8s_unaligned16::suite());
+  s->addTest(qa_32s_convert_32f_aligned16::suite());
+  s->addTest(qa_32s_convert_32f_unaligned16::suite());
+  s->addTest(qa_64f_convert_32f_aligned16::suite());
+  s->addTest(qa_64f_convert_32f_unaligned16::suite());
+  s->addTest(qa_8s_convert_16s_aligned16::suite());
+  s->addTest(qa_8s_convert_16s_unaligned16::suite());
+  s->addTest(qa_8s_convert_32f_aligned16::suite());
+  s->addTest(qa_8s_convert_32f_unaligned16::suite());
+  s->addTest(qa_32fc_32f_power_32fc_aligned16::suite());
+  s->addTest(qa_32f_power_aligned16::suite());
+  s->addTest(qa_32fc_atan2_32f_aligned16::suite());
+  s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite());
+  s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite());
+  s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite());
+  s->addTest(qa_32f_accumulator_aligned16::suite());
+  s->addTest(qa_32f_stddev_aligned16::suite());
+  s->addTest(qa_32f_stddev_and_mean_aligned16::suite());
+
+  return s;
+}
diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h
new file mode 100644
index 000000000..43fa7faba
--- /dev/null
+++ b/volk/lib/qa_volk.h
@@ -0,0 +1,36 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Example Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Example Public License for more details.
+ * 
+ * You should have received a copy of the GNU Example Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_QA_VOLK_H
+#define INCLUDED_QA_VOLK_H
+
+#include <cppunit/TestSuite.h>
+
+//! collect all the tests for the example directory
+
+class qa_volk {
+ public:
+  //! return suite of tests for all of example directory
+  static CppUnit::TestSuite *suite ();
+};
+
+#endif /* INCLUDED_QA_VOLK_H */
diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc
new file mode 100644
index 000000000..50ac08eab
--- /dev/null
+++ b/volk/lib/test_all.cc
@@ -0,0 +1,82 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2002,2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <cppunit/ui/text/TestRunner.h>
+#include <cppunit/TextTestRunner.h>
+
+#include <qa_volk.h>
+
+#include <cppunit/XmlOutputter.h>
+#include <iostream>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string>
+#include <fstream>
+
+int 
+main (int argc, char **argv)
+{
+  
+  int opt = 0;
+  std::string xmlOutputFile("");
+
+  while( (opt = getopt(argc, argv, "o:")) != -1){
+    switch(opt){
+    case 'o':
+      if(optarg){
+	xmlOutputFile.assign(optarg);
+      }
+      else{
+	std::cerr << "No xml file output specified for -o" << std::endl;
+	exit(EXIT_FAILURE);
+      }
+      break;
+
+    default: /* '?' */
+      fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n",
+	      argv[0]);
+      exit(EXIT_FAILURE);
+    }
+
+  }
+
+  CppUnit::TextUi::TestRunner runner;
+
+  runner.addTest (qa_volk::suite ());
+
+  bool was_successful = false;
+  if(!xmlOutputFile.empty()){
+    std::ofstream xmlOutput(xmlOutputFile.c_str());
+    if(xmlOutput.is_open()){
+      runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput));
+
+      was_successful = runner.run("", false, true, false);
+    }
+    xmlOutput.close();
+  }
+  else{
+    was_successful = runner.run ("", false);
+  }
+
+  return was_successful ? 0 : 1;
+}
diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c
new file mode 100644
index 000000000..b1a93db26
--- /dev/null
+++ b/volk/lib/volk_rank_archs.c
@@ -0,0 +1,13 @@
+#include<volk_rank_archs.h>
+#include<stdio.h>
+
+unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch) {
+  int i = 2;
+  unsigned int best_val = 0;
+  for(; i < arch_defs[0] + 1; ++i) {
+    if((arch_defs[i]&(!arch)) == 0) {
+      best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val;
+    }
+  }
+  return best_val;
+}
diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h
new file mode 100644
index 000000000..26b9f7503
--- /dev/null
+++ b/volk/lib/volk_rank_archs.h
@@ -0,0 +1,14 @@
+#ifndef INCLUDED_VOLK_RANK_ARCHS_H
+#define INCLUDED_VOLK_RANK_ARCHS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int volk_rank_archs(const int* arch_defs, unsigned int arch);
+ 
+
+#ifdef __cplusplus
+}
+#endif
+#endif /*INCLUDED_VOLK_RANK_ARCHS_H*/
-- 
cgit 


From f8b0c86d8a9eb347cb7187e3b01ed46c66de6a64 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 8 Dec 2010 01:09:35 -0500
Subject: volk: Adding gitignore files.

---
 volk/lib/.gitignore | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 volk/lib/.gitignore

(limited to 'volk/lib')

diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore
new file mode 100644
index 000000000..573fb1618
--- /dev/null
+++ b/volk/lib/.gitignore
@@ -0,0 +1,21 @@
+/*.cache
+/*.la
+/*.lo
+/*.pc
+/.deps
+/.la
+/.libs
+/.lo
+/Makefile
+/Makefile.in
+/volk.c
+/volk_cpu_generic.c
+/volk_cpu_powerpc.c
+/volk_cpu_x86.c
+/volk_environment_init.c
+/volk_init.c
+/volk_init.h
+/volk_mktables
+/volk_mktables.c
+/volk_proccpu_sim.c
+/volk_runtime.c
-- 
cgit 


From 74f206edb2c7bfbe010b5a5cbc5fe2f07965c3a6 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 8 Dec 2010 01:29:58 -0500
Subject: volk: Fixing makefiles for dist. Distcheck still failing on other
 issues now.

---
 volk/lib/Makefile.am | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 97eb75680..54df42d54 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -61,7 +61,7 @@ universal_CODE = 		\
 	volk_environment_init.c
 
 generic_CODE = 		\
-	volk_cpu_generic.cc
+	volk_cpu_generic.c
 
 x86_CODE = 		\
 	volk_cpu_x86.c
@@ -73,7 +73,7 @@ x86_64_SUBCODE = 	\
 	cpuid_x86_64.S
 
 powerpc_CODE = \
-	volk_cpu_powerpc.cc
+	volk_cpu_powerpc.c
 
 
 if MD_CPU_generic
@@ -236,6 +236,7 @@ libvolk_qa_la_LIBADD = \
 noinst_HEADERS = \
 	volk_init.h \
 	qa_volk.h \
+	assembly.h \
 	qa_16s_quad_max_star_aligned16.h \
 	qa_32fc_dot_prod_aligned16.h \
 	qa_32fc_square_dist_aligned16.h \
-- 
cgit 


From 46d55649012e4fb2838a6f8e9f3c9226ea8b2d50 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 8 Dec 2010 12:19:28 -0500
Subject: volk: Working on VPATH build issues. Makes it through configure,
 fails on make.

---
 volk/lib/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 54df42d54..4ee934e8b 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -46,6 +46,7 @@ lib_LTLIBRARIES = \
 	libvolk_runtime.la \
 	libvolk_qa.la
 
+EXTRA_DIST = volk_mktables.c
 
 # ----------------------------------------------------------------
 #                      The main library
-- 
cgit 


From 1cc88091470dd4654b6936cda92d81841e135209 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 8 Dec 2010 17:00:38 -0500
Subject: volk: more changes to build system so that VPATH builds properly and
 project makes distcheck.

---
 volk/lib/Makefile.am | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 4ee934e8b..7e808695f 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -1,5 +1,5 @@
 #
-# Copyright 2008 Free Software Foundation, Inc.
+# Copyright 2010 Free Software Foundation, Inc.
 # 
 # This file is part of GNU Radio
 # 
@@ -20,7 +20,9 @@
 
 include $(top_srcdir)/Makefile.common
 
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) $(LV_CXXFLAGS)
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
+	-I$(top_builddir)/include \
+	$(LV_CXXFLAGS) $(WITH_INCLUDES)
 
 
 # We build 2 libraries and 1 executable here.  One library contains
@@ -46,7 +48,10 @@ lib_LTLIBRARIES = \
 	libvolk_runtime.la \
 	libvolk_qa.la
 
-EXTRA_DIST = volk_mktables.c
+EXTRA_DIST = \
+	volk_mktables.c		\
+	volk_rank_archs.h 	\
+	volk_proccpu_sim.c
 
 # ----------------------------------------------------------------
 #                      The main library
-- 
cgit 


From a8f33e1b577342fd8149d9308d474871c44c7d52 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 8 Dec 2010 17:26:40 -0500
Subject: Removing autotests of volk during make check and distchecks since
 they take a long time to run.

These can be run by hand by executing volk/lib/test_all

Also made a comment about needing a possible fix for this makefile.
---
 volk/lib/Makefile.am | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 7e808695f..a95860d11 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -20,6 +20,10 @@
 
 include $(top_srcdir)/Makefile.common
 
+#FIXME: forcing the top_builddir for distcheck seems like a bit
+# of a hack. Figure out the right way to do this to find built
+# volk_config.h and volk_tables.h
+
 AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
 	-I$(top_builddir)/include \
 	$(LV_CXXFLAGS) $(WITH_INCLUDES)
@@ -40,7 +44,7 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
 
 
 # list of programs run by "make check" and "make distcheck"
-TESTS = test_all
+#TESTS = test_all
 
 
 lib_LTLIBRARIES = \
-- 
cgit 


From f3c684751dc3da3a06d5960d8b961739bdf0fd12 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 9 Dec 2010 17:34:29 -0500
Subject: volk: adding generic QA test for 16sc_magnitude_32f.

---
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 42 ++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index 06dff2fd5..2c9e48f6e 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -8,7 +8,47 @@
 #ifndef LV_HAVE_SSE3
 
 void qa_16sc_magnitude_32f_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_known[vlen] __attribute__ ((aligned (16)));
+
+  int16_t* inputLoad = (int16_t*)input0;
+  for(int i = 0; i < 2*vlen; ++i) {   
+    inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2));
+  }
+  printf("16sc_magnitude_32f_aligned\n");
+
+  float scale = 32768.0;
+  for(int i = 0; i < vlen; ++i) {   
+    float re = (float)(input0[i].real())/scale;
+    float im = (float)(input0[i].imag())/scale;
+    output_known[i] = sqrt(re*re + im*im);
+  }
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  /*
+  for(int i = 0; i < 100; ++i) {
+    printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag());
+    printf("generic... %f == %f\n", output_generic[i], output_known[i]);
+  }
+  */
+  
+  for(int i = 0; i < vlen; ++i) {
+    //printf("%d...%d\n", output0[i], output01[i]);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4);
+  }
 }
 
 #else
-- 
cgit 


From 31c85c66f38ed304db06e0696b3df1d2407378c8 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 9 Dec 2010 17:53:05 -0500
Subject: volk: Adding a few more generic-only test cases.

---
 volk/lib/qa_32f_add_aligned16.cc      | 55 ++++++++++++++++++++++++++++++++++-
 volk/lib/qa_32f_divide_aligned16.cc   | 55 ++++++++++++++++++++++++++++++++++-
 volk/lib/qa_32f_multiply_aligned16.cc | 55 ++++++++++++++++++++++++++++++++++-
 volk/lib/qa_32f_sqrt_aligned16.cc     | 53 +++++++++++++++++++++++++++++++++
 4 files changed, 215 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
index 92f35c7ec..002aebfc9 100644
--- a/volk/lib/qa_32f_add_aligned16.cc
+++ b/volk/lib/qa_32f_add_aligned16.cc
@@ -1,3 +1,22 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, see 
+ * <http://www.gnu.org/licenses/>.
+ */
+
 #include <volk/volk.h>
 #include <qa_32f_add_aligned16.h>
 #include <volk/volk_32f_add_aligned16.h>
@@ -8,7 +27,41 @@
 #ifndef LV_HAVE_SSE
 
 void qa_32f_add_aligned16::t1() {
-  printf("sse not available... no test performed\n");
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output_known[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    output_known[i] = input0[i] + input1[i];
+  }
+  printf("32f_add_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  /*
+  for(int i = 0; i < 10; ++i) {
+    printf("inputs: %f, %f\n", input0[i], input1[i]);
+    printf("generic... %f == %f\n", output0[i], output_known[i]);
+  }
+  */
+  
+  for(int i = 0; i < vlen; ++i) {
+    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
+  }
 }
 
 #else
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
index b20999beb..8826bf94f 100644
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -1,3 +1,22 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, see 
+ * <http://www.gnu.org/licenses/>.
+ */
+
 #include <volk/volk.h>
 #include <qa_32f_divide_aligned16.h>
 #include <volk/volk_32f_divide_aligned16.h>
@@ -8,7 +27,41 @@
 #ifndef LV_HAVE_SSE
 
 void qa_32f_divide_aligned16::t1() {
-  printf("sse not available... no test performed\n");
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output_known[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    output_known[i] = input0[i] / input1[i];
+  }
+  printf("32f_divide_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  /*
+  for(int i = 0; i < 10; ++i) {
+    printf("inputs: %f, %f\n", input0[i], input1[i]);
+    printf("generic... %f == %f\n", output0[i], output_known[i]);
+  }
+  */
+  
+  for(int i = 0; i < vlen; ++i) {
+    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
+  }
 }
 
 #else
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
index c77fe97da..e52748466 100644
--- a/volk/lib/qa_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32f_multiply_aligned16.cc
@@ -1,3 +1,22 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, see 
+ * <http://www.gnu.org/licenses/>.
+ */
+
 #include <volk/volk.h>
 #include <qa_32f_multiply_aligned16.h>
 #include <volk/volk_32f_multiply_aligned16.h>
@@ -8,7 +27,41 @@
 #ifndef LV_HAVE_SSE
 
 void qa_32f_multiply_aligned16::t1() {
-  printf("sse not available... no test performed\n");
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  float input1[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output_known[vlen] __attribute__ ((aligned (16)));
+
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
+    output_known[i] = input0[i] * input1[i];
+  }
+  printf("32f_multiply_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+
+  /*
+  for(int i = 0; i < 10; ++i) {
+    printf("inputs: %f, %f\n", input0[i], input1[i]);
+    printf("generic... %f == %f\n", output0[i], output_known[i]);
+  }
+  */
+  
+  for(int i = 0; i < vlen; ++i) {
+    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
+  }
 }
 
 #else
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
index a3e6abc18..9a5f71de0 100644
--- a/volk/lib/qa_32f_sqrt_aligned16.cc
+++ b/volk/lib/qa_32f_sqrt_aligned16.cc
@@ -1,3 +1,22 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2010 Free Software Foundation, Inc.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, see 
+ * <http://www.gnu.org/licenses/>.
+ */
+
 #include <volk/volk.h>
 #include <qa_32f_sqrt_aligned16.h>
 #include <volk/volk_32f_sqrt_aligned16.h>
@@ -9,6 +28,40 @@
 
 void qa_32f_sqrt_aligned16::t1() {
   printf("sse not available... no test performed\n");
+  clock_t start, end;
+  double total;
+  const int vlen = 3201;
+  const int ITERS = 10000;
+  float input0[vlen] __attribute__ ((aligned (16)));
+  
+  float output0[vlen] __attribute__ ((aligned (16)));
+  float output_known[vlen] __attribute__ ((aligned (16)));
+
+  // No reason to test negative numbers because they result in NaN.
+  for(int i = 0; i < vlen; ++i) {   
+    input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX));
+    output_known[i] = sqrt(input0[i]);
+  }
+  printf("32f_sqrt_aligned\n");
+
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("generic_time: %f\n", total);
+  
+  /*
+  for(int i = 0; i < 10; ++i) {
+    printf("inputs: %f\n", input0[i]);
+    printf("generic... %f == %f\n", output0[i], output_known[i]);
+  }
+  */
+  
+  for(int i = 0; i < vlen; ++i) {
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4);
+  }
 }
 
 #else
-- 
cgit 


From 8375fd6ca2f6e5edb923abe0d6341b6d4d2d1aae Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Fri, 10 Dec 2010 01:48:17 -0500
Subject: volk: Fixing build system to handle making volk_mktables,
 volk_tables.h, and volk_config.h instead of a standalone shell script.

---
 volk/lib/Makefile.am | 1 -
 1 file changed, 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index a95860d11..814d438fd 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -353,7 +353,6 @@ distclean-local:
 	rm -f volk_cpu_x86.c
 	rm -f volk_init.c
 	rm -f volk_init.h
-	rm -f volk_mktables
 	rm -f volk_mktables.c
 	rm -f volk_proccpu_sim.c
 	rm -f volk_runtime.c
-- 
cgit 


From bef3db60e73953f2d2ecdc6a86a81e11df3b103d Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Mon, 13 Dec 2010 19:18:45 -0800
Subject: volk: committed some stuff i neglected

---
 volk/lib/Makefile.am              | 17 +++++++++++------
 volk/lib/qa_32f_sqrt_aligned16.cc | 15 +++++++++++++++
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 814d438fd..1291b01cd 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -45,7 +45,9 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
 
 # list of programs run by "make check" and "make distcheck"
 #TESTS = test_all
-
+#orc stuff gets built in the ORC directory conditional to ORC being enabled.
+#it gets linked in during the build of libvolk as an added library.
+#there might be a better way to do this.
 
 lib_LTLIBRARIES = \
 	libvolk.la \
@@ -72,6 +74,9 @@ universal_CODE = 		\
 
 generic_CODE = 		\
 	volk_cpu_generic.c
+	
+orc_CODE =      \
+	volk_cpu_orc.c
 
 x86_CODE = 		\
 	volk_cpu_x86.c
@@ -133,10 +138,9 @@ endif
 
 
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-
-libvolk_la_LIBADD =
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
+libvolk_la_LIBADD = ../orc/libvolk_orc.a
 
 
@@ -233,11 +237,12 @@ libvolk_qa_la_SOURCES = \
 	qa_32f_stddev_aligned16.cc \
 	qa_32f_stddev_and_mean_aligned16.cc
 
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
+libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
 
 libvolk_qa_la_LIBADD = \
 	libvolk.la \
 	libvolk_runtime.la \
+	../orc/libvolk_orc.a \
 	$(CPPUNIT_LIBS)
 
 # ----------------------------------------------------------------
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
index 9a5f71de0..81d66dad7 100644
--- a/volk/lib/qa_32f_sqrt_aligned16.cc
+++ b/volk/lib/qa_32f_sqrt_aligned16.cc
@@ -52,6 +52,14 @@ void qa_32f_sqrt_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  
   /*
   for(int i = 0; i < 10; ++i) {
     printf("inputs: %f\n", input0[i]);
@@ -92,6 +100,13 @@ void qa_32f_sqrt_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse");
   }
-- 
cgit 


From 611526f9dfba0df4a1a49d47916706438ac194b3 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 01:00:29 -0800
Subject: Volk: Automated more automake for orc. Brought orcc generation in.
 Shared library libvolk_orc.la. Linking is hackery right now with specified
 -lorc-0.4 flags; this should change. Otherwise pretty much OK.

---
 volk/lib/Makefile.am | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 1291b01cd..649d461e0 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -138,9 +138,9 @@ endif
 
 
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
-libvolk_la_LIBADD = ../orc/libvolk_orc.a
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
+libvolk_la_LIBADD = ../orc/libvolk_orc.la
 
 
@@ -237,12 +237,12 @@ libvolk_qa_la_SOURCES = \
 	qa_32f_stddev_aligned16.cc \
 	qa_32f_stddev_and_mean_aligned16.cc
 
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lorc-0.4
+libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
 
 libvolk_qa_la_LIBADD = \
 	libvolk.la \
 	libvolk_runtime.la \
-	../orc/libvolk_orc.a \
+	../orc/libvolk_orc.la \
 	$(CPPUNIT_LIBS)
 
 # ----------------------------------------------------------------
-- 
cgit 


From 05f4bced29987a0a573d1fc5b214f3fa01dc84bd Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 13:36:55 -0800
Subject: Volk: More autotools stuff for Orc. Should build OK with or without
 Orc now.

---
 volk/lib/Makefile.am | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 649d461e0..385401ae1 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -138,10 +138,13 @@ endif
 
 
-libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
-libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
+if HAVE_ORC
 libvolk_la_LIBADD = ../orc/libvolk_orc.la
-
+libvolk_la_LDFLAGS += -lorc-0.4
+libvolk_runtime_la_LDFLAGS += -lorc-0.4
+endif
 
 
 # ----------------------------------------------------------------
@@ -237,13 +240,18 @@ libvolk_qa_la_SOURCES = \
 	qa_32f_stddev_aligned16.cc \
 	qa_32f_stddev_and_mean_aligned16.cc
 
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(ORC_LDFLAGS) -lorc-0.4
+libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
 
 libvolk_qa_la_LIBADD = \
 	libvolk.la \
 	libvolk_runtime.la \
-	../orc/libvolk_orc.la \
 	$(CPPUNIT_LIBS)
+	
+if HAVE_ORC
+libvolk_qa_la_LIBADD += \
+    ../orc/libvolk_orc.la
+    libvolk_qa_la_LDFLAGS += -lorc-0.4
+endif
 
 # ----------------------------------------------------------------
 # headers that don't get installed
-- 
cgit 


From d8031649fa3186d7e6b000dcfaa349deacf51262 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 16:41:14 -0800
Subject: Volk: patch via Nick M.

---
 volk/lib/Makefile.am | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 385401ae1..d38004f2a 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -74,9 +74,6 @@ universal_CODE = 		\
 
 generic_CODE = 		\
 	volk_cpu_generic.c
-	
-orc_CODE =      \
-	volk_cpu_orc.c
 
 x86_CODE = 		\
 	volk_cpu_x86.c
@@ -356,7 +353,7 @@ noinst_PROGRAMS = \
 	test_all
 
 test_all_SOURCES = test_all.cc
-test_all_LDADD   = libvolk_qa.la
+test_all_LDADD   = libvolk_qa.la ../orc/libvolk_orc.la
 
 
 distclean-local: 
-- 
cgit 


From 2e9a7d350713b4e1b21458db8f3fce8a557858ae Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 17:13:40 -0800
Subject: Volk: Added QA tests for all the Orc stuff. Added a 16u_byteswap but
 it's broken right now.

---
 volk/lib/qa_16u_byteswap_aligned16.cc   | 9 +++++++++
 volk/lib/qa_32f_add_aligned16.cc        | 9 +++++++++
 volk/lib/qa_32s_and_aligned16.cc        | 9 +++++++++
 volk/lib/qa_8s_convert_32f_aligned16.cc | 8 ++++++++
 4 files changed, 35 insertions(+)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
index 6b19828a4..c30b6ba41 100644
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ b/volk/lib/qa_16u_byteswap_aligned16.cc
@@ -24,6 +24,7 @@ void qa_16u_byteswap_aligned16::t1() {
   
   uint16_t output0[vlen] __attribute__ ((aligned (16)));
   uint16_t output01[vlen] __attribute__ ((aligned (16)));
+  uint16_t output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
@@ -40,6 +41,13 @@ void qa_16u_byteswap_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16u_byteswap_aligned16_manual(output02, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2");
   }
@@ -54,6 +62,7 @@ void qa_16u_byteswap_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);    
   }
 }
 
diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
index 002aebfc9..d9214e8a2 100644
--- a/volk/lib/qa_32f_add_aligned16.cc
+++ b/volk/lib/qa_32f_add_aligned16.cc
@@ -78,6 +78,7 @@ void qa_32f_add_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -93,6 +94,13 @@ void qa_32f_add_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -107,6 +115,7 @@ void qa_32f_add_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc
index 72d05cf6f..5720ee869 100644
--- a/volk/lib/qa_32s_and_aligned16.cc
+++ b/volk/lib/qa_32s_and_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32s_and_aligned16::t1() {
   
   int32_t output0[vlen] __attribute__ ((aligned (16)));
   int32_t output01[vlen] __attribute__ ((aligned (16)));
+  int32_t output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
@@ -40,6 +41,13 @@ void qa_32s_and_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32s_and_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc
index 522da0b9d..3b3aa6919 100644
--- a/volk/lib/qa_8s_convert_32f_aligned16.cc
+++ b/volk/lib/qa_8s_convert_32f_aligned16.cc
@@ -40,6 +40,14 @@ void qa_8s_convert_32f_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
 
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
-- 
cgit 


From 87a9b14e0b0e2c2d0dcd75d42f2a15211265f102 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 17:44:34 -0800
Subject: Volk: added references to libs instead of specifying them directly

---
 volk/lib/Makefile.am | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index d38004f2a..faab4a010 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -133,14 +133,21 @@ libvolk_runtime_la_SOURCES =	\
 	$(universal_runtime_CODE)
 endif
 
+volk_orc_LDFLAGS = \
+	$(ORC_LDFLAGS) \
+	-lorc-0.4
+	
+volk_orc_LIBADD = \
+	../orc/libvolk_orc.la
 
-
+if HAVE_ORC
+libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
+libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
+libvolk_la_LIBADD = $(volk_orc_LIBADD)
+else
 libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
 libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-if HAVE_ORC
-libvolk_la_LIBADD = ../orc/libvolk_orc.la
-libvolk_la_LDFLAGS += -lorc-0.4
-libvolk_runtime_la_LDFLAGS += -lorc-0.4
+libvolk_la_LIBADD =
 endif
 
 
@@ -243,12 +250,6 @@ libvolk_qa_la_LIBADD = \
 	libvolk.la \
 	libvolk_runtime.la \
 	$(CPPUNIT_LIBS)
-	
-if HAVE_ORC
-libvolk_qa_la_LIBADD += \
-    ../orc/libvolk_orc.la
-    libvolk_qa_la_LDFLAGS += -lorc-0.4
-endif
 
 # ----------------------------------------------------------------
 # headers that don't get installed
@@ -353,7 +354,7 @@ noinst_PROGRAMS = \
 	test_all
 
 test_all_SOURCES = test_all.cc
-test_all_LDADD   = libvolk_qa.la ../orc/libvolk_orc.la
+test_all_LDADD   = libvolk_qa.la
 
 
 distclean-local: 
-- 
cgit 


From 21426265324c883c91eeaaf75a81f2ccdc6e249d Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 21:12:49 -0800
Subject: Volk: Build fixes to work with/without Orc.

---
 volk/lib/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index faab4a010..253033461 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -140,7 +140,7 @@ volk_orc_LDFLAGS = \
 volk_orc_LIBADD = \
 	../orc/libvolk_orc.la
 
-if HAVE_ORC
+if LV_HAVE_ORC
 libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
 libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 $(volk_orc_LDFLAGS)
 libvolk_la_LIBADD = $(volk_orc_LIBADD)
-- 
cgit 


From f9ee6a55cb397f9302769a25a8c959fa162354f0 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 14 Dec 2010 22:58:33 -0800
Subject: Volk: Some new basic Orc implementations with QA code

---
 volk/lib/qa_16u_byteswap_aligned16.cc |  1 +
 volk/lib/qa_32f_divide_aligned16.cc   | 10 ++++++++++
 volk/lib/qa_32f_multiply_aligned16.cc |  9 +++++++++
 volk/lib/qa_32f_subtract_aligned16.cc |  9 +++++++++
 4 files changed, 29 insertions(+)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
index c30b6ba41..b740f91df 100644
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ b/volk/lib/qa_16u_byteswap_aligned16.cc
@@ -30,6 +30,7 @@ void qa_16u_byteswap_aligned16::t1() {
     output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
   }
   memcpy(output01, output0, vlen*sizeof(uint16_t));
+  memcpy(output02, output0, vlen*sizeof(uint16_t));
 
   printf("16u_byteswap_aligned\n");
 
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
index 8826bf94f..f104e0443 100644
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -35,6 +35,7 @@ void qa_32f_divide_aligned16::t1() {
   float input1[vlen] __attribute__ ((aligned (16)));
   
   float output0[vlen] __attribute__ ((aligned (16)));
+  float output1[vlen] __attribute__ ((aligned (16)));
   float output_known[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
@@ -51,6 +52,14 @@ void qa_32f_divide_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
 
   /*
   for(int i = 0; i < 10; ++i) {
@@ -61,6 +70,7 @@ void qa_32f_divide_aligned16::t1() {
   
   for(int i = 0; i < vlen; ++i) {
     CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
+    CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]);
   }
 }
 
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
index e52748466..f9c034d70 100644
--- a/volk/lib/qa_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32f_multiply_aligned16.cc
@@ -78,6 +78,7 @@ void qa_32f_multiply_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -93,6 +94,13 @@ void qa_32f_multiply_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -107,6 +115,7 @@ void qa_32f_multiply_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc
index a7e1b5ae3..5a5a7c9b6 100644
--- a/volk/lib/qa_32f_subtract_aligned16.cc
+++ b/volk/lib/qa_32f_subtract_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_subtract_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -40,6 +41,13 @@ void qa_32f_subtract_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32f_subtract_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
-- 
cgit 


From 15ad4b5398e474bfb52fdb7e826b69f3e398c0b0 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 15 Dec 2010 16:27:42 -0800
Subject: Volk: A bunch of new ORC routines plus tests. Also fixed a typo in
 the generic version of 16sc_magnitude_16s_a16.

---
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc |  9 +++++++++
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 20 ++++++++++++++++++++
 volk/lib/qa_32f_divide_aligned16.cc         |  9 +++++++++
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc |  9 +++++++++
 volk/lib/qa_32fc_magnitude_32f_aligned16.cc |  9 +++++++++
 volk/lib/qa_32s_or_aligned16.cc             |  9 +++++++++
 6 files changed, 65 insertions(+)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index b14610757..c8f13ff84 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -23,6 +23,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
   std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
   
   int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
 
@@ -40,6 +41,13 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
   }
@@ -64,6 +72,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
   }
 }
 
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index 2c9e48f6e..e7178863c 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -15,6 +15,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
   
   float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_orc[vlen] __attribute__ ((aligned (16)));
   float output_known[vlen] __attribute__ ((aligned (16)));
 
   int16_t* inputLoad = (int16_t*)input0;
@@ -37,6 +38,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
 
   /*
   for(int i = 0; i < 100; ++i) {
@@ -48,6 +57,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4);
   }
 }
 
@@ -63,6 +73,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
   
   float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_orc[vlen] __attribute__ ((aligned (16)));
   float output_sse[vlen] __attribute__ ((aligned (16)));
   float output_sse3[vlen] __attribute__ ((aligned (16)));
 
@@ -79,6 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
@@ -104,6 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
   }
 }
 
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
index f104e0443..b2c2ecf9a 100644
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -88,6 +88,7 @@ void qa_32f_divide_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -103,6 +104,13 @@ void qa_32f_divide_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -117,6 +125,7 @@ void qa_32f_divide_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index a4be1616b..c3e65866b 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -23,6 +23,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
   
   int16_t output_generic[vlen] __attribute__ ((aligned (16)));
+  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
 
@@ -40,6 +41,13 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
   }
@@ -64,6 +72,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
   }
 }
 
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
index d69ada408..6a1d46c7a 100644
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
@@ -23,6 +23,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
   std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
   
   float output_generic[vlen] __attribute__ ((aligned (16)));
+  float output_orc[vlen] __attribute__ ((aligned (16)));
   float output_sse[vlen] __attribute__ ((aligned (16)));
   float output_sse3[vlen] __attribute__ ((aligned (16)));
 
@@ -40,6 +41,13 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse");
   }
@@ -64,6 +72,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
   }
 }
 
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
index e09dfb91c..9ea5283a6 100644
--- a/volk/lib/qa_32s_or_aligned16.cc
+++ b/volk/lib/qa_32s_or_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32s_or_aligned16::t1() {
   
   int32_t output0[vlen] __attribute__ ((aligned (16)));
   int32_t output01[vlen] __attribute__ ((aligned (16)));
+  int32_t output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
@@ -40,6 +41,13 @@ void qa_32s_or_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32s_or_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
-- 
cgit 


From ce3e4c33d170b65cf288faec7d8da6a496eb6101 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Thu, 16 Dec 2010 21:33:54 -0500
Subject: Including time header to qa files.

---
 volk/lib/qa_16s_add_quad_aligned16.cc                    | 2 +-
 volk/lib/qa_16s_branch_4_state_8_aligned16.cc            | 2 +-
 volk/lib/qa_16s_convert_32f_aligned16.cc                 | 1 +
 volk/lib/qa_16s_convert_32f_unaligned16.cc               | 1 +
 volk/lib/qa_16s_convert_8s_aligned16.cc                  | 1 +
 volk/lib/qa_16s_convert_8s_unaligned16.cc                | 1 +
 volk/lib/qa_16s_max_star_aligned16.cc                    | 2 +-
 volk/lib/qa_16s_max_star_horizontal_aligned16.cc         | 2 +-
 volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc      | 2 +-
 volk/lib/qa_16s_quad_max_star_aligned16.cc               | 1 +
 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc           | 1 +
 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc           | 1 +
 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc      | 1 +
 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc      | 1 +
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc       | 1 +
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc              | 1 +
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc              | 1 +
 volk/lib/qa_16u_byteswap_aligned16.cc                    | 1 +
 volk/lib/qa_32f_accumulator_aligned16.cc                 | 1 +
 volk/lib/qa_32f_add_aligned16.cc                         | 1 +
 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc   | 1 +
 volk/lib/qa_32f_convert_16s_aligned16.cc                 | 1 +
 volk/lib/qa_32f_convert_16s_unaligned16.cc               | 1 +
 volk/lib/qa_32f_convert_32s_aligned16.cc                 | 1 +
 volk/lib/qa_32f_convert_32s_unaligned16.cc               | 1 +
 volk/lib/qa_32f_convert_64f_aligned16.cc                 | 1 +
 volk/lib/qa_32f_convert_64f_unaligned16.cc               | 1 +
 volk/lib/qa_32f_convert_8s_aligned16.cc                  | 1 +
 volk/lib/qa_32f_convert_8s_unaligned16.cc                | 1 +
 volk/lib/qa_32f_divide_aligned16.cc                      | 1 +
 volk/lib/qa_32f_fm_detect_aligned16.cc                   | 1 +
 volk/lib/qa_32f_interleave_16sc_aligned16.cc             | 1 +
 volk/lib/qa_32f_interleave_32fc_aligned16.cc             | 1 +
 volk/lib/qa_32f_max_aligned16.cc                         | 1 +
 volk/lib/qa_32f_min_aligned16.cc                         | 1 +
 volk/lib/qa_32f_multiply_aligned16.cc                    | 1 +
 volk/lib/qa_32f_normalize_aligned16.cc                   | 1 +
 volk/lib/qa_32f_sqrt_aligned16.cc                        | 1 +
 volk/lib/qa_32f_stddev_aligned16.cc                      | 1 +
 volk/lib/qa_32f_stddev_and_mean_aligned16.cc             | 1 +
 volk/lib/qa_32f_subtract_aligned16.cc                    | 1 +
 volk/lib/qa_32fc_atan2_32f_aligned16.cc                  | 1 +
 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc         | 1 +
 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc           | 1 +
 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc           | 1 +
 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc      | 1 +
 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc      | 1 +
 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc      | 1 +
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc              | 1 +
 volk/lib/qa_32fc_magnitude_32f_aligned16.cc              | 1 +
 volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc | 1 +
 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc         | 1 +
 volk/lib/qa_32s_and_aligned16.cc                         | 1 +
 volk/lib/qa_32s_convert_32f_aligned16.cc                 | 1 +
 volk/lib/qa_32s_convert_32f_unaligned16.cc               | 1 +
 volk/lib/qa_32s_or_aligned16.cc                          | 1 +
 volk/lib/qa_32u_byteswap_aligned16.cc                    | 1 +
 volk/lib/qa_32u_popcnt_aligned16.cc                      | 1 +
 volk/lib/qa_64f_convert_32f_aligned16.cc                 | 1 +
 volk/lib/qa_64f_convert_32f_unaligned16.cc               | 1 +
 volk/lib/qa_64f_max_aligned16.cc                         | 1 +
 volk/lib/qa_64f_min_aligned16.cc                         | 1 +
 volk/lib/qa_64u_byteswap_aligned16.cc                    | 1 +
 volk/lib/qa_64u_popcnt_aligned16.cc                      | 1 +
 volk/lib/qa_8s_convert_16s_aligned16.cc                  | 1 +
 volk/lib/qa_8s_convert_16s_unaligned16.cc                | 1 +
 volk/lib/qa_8s_convert_32f_aligned16.cc                  | 1 +
 volk/lib/qa_8s_convert_32f_unaligned16.cc                | 1 +
 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc            | 1 +
 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc            | 1 +
 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc       | 1 +
 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc       | 1 +
 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc        | 1 +
 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc     | 2 +-
 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc     | 2 +-
 75 files changed, 75 insertions(+), 7 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc
index c3005c1be..154aa0f17 100644
--- a/volk/lib/qa_16s_add_quad_aligned16.cc
+++ b/volk/lib/qa_16s_add_quad_aligned16.cc
@@ -2,7 +2,7 @@
 #include <qa_16s_add_quad_aligned16.h>
 #include <volk/volk_16s_add_quad_aligned16.h>
 #include <cstdlib>
-#include <time.h>
+#include <ctime>
 //test for sse2
 
 #ifndef LV_HAVE_SSE2
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
index ba5e8ed93..62deffaeb 100644
--- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
+++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
@@ -1,7 +1,7 @@
 #include <volk/volk.h>
 #include <qa_16s_branch_4_state_8_aligned16.h>
 #include <cstdlib>
-#include <time.h>
+#include <ctime>
 
 //test for ssse3
 
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc
index 7878d4737..6215f4a64 100644
--- a/volk/lib/qa_16s_convert_32f_aligned16.cc
+++ b/volk/lib/qa_16s_convert_32f_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_16s_convert_32f_aligned16.h>
 #include <volk/volk_16s_convert_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc
index 8c3121e5c..46c2e48ac 100644
--- a/volk/lib/qa_16s_convert_32f_unaligned16.cc
+++ b/volk/lib/qa_16s_convert_32f_unaligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_16s_convert_32f_unaligned16.h>
 #include <volk/volk_16s_convert_32f_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc
index 734b7784e..8225aa0cf 100644
--- a/volk/lib/qa_16s_convert_8s_aligned16.cc
+++ b/volk/lib/qa_16s_convert_8s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16s_convert_8s_aligned16.h>
 #include <volk/volk_16s_convert_8s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc
index 275ab7668..e6ce5030e 100644
--- a/volk/lib/qa_16s_convert_8s_unaligned16.cc
+++ b/volk/lib/qa_16s_convert_8s_unaligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16s_convert_8s_unaligned16.h>
 #include <volk/volk_16s_convert_8s_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc
index b46b9ae8e..c6f828ba6 100644
--- a/volk/lib/qa_16s_max_star_aligned16.cc
+++ b/volk/lib/qa_16s_max_star_aligned16.cc
@@ -2,7 +2,7 @@
 #include <qa_16s_max_star_aligned16.h>
 #include <volk/volk_16s_max_star_aligned16.h>
 #include <cstdlib>
-#include <time.h>
+#include <ctime>
 //test for ssse3
 
 #ifndef LV_HAVE_SSSE3
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
index 4d44735df..0a58570e2 100644
--- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
+++ b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
@@ -3,7 +3,7 @@
 #include <qa_16s_max_star_horizontal_aligned16.h>
 #include <volk/volk_16s_max_star_horizontal_aligned16.h>
 #include <cstdlib>
-#include <time.h>
+#include <ctime>
 //test for ssse3
 
 #ifndef LV_HAVE_SSSE3
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
index 3c4f5c6cc..819b2256b 100644
--- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
+++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
@@ -2,7 +2,7 @@
 #include <qa_16s_permute_and_scalar_add_aligned16.h>
 #include <volk/volk_16s_permute_and_scalar_add_aligned16.h>
 #include <cstdlib>
-#include <time.h>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc
index 80a220c93..66f8c9afa 100644
--- a/volk/lib/qa_16s_quad_max_star_aligned16.cc
+++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16s_quad_max_star_aligned16.h>
 #include <volk/volk_16s_quad_max_star_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
index e700ac72c..c775e8596 100644
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16sc_deinterleave_16s_aligned16.h>
 #include <volk/volk_16sc_deinterleave_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
index 6ee076998..b25094e89 100644
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16sc_deinterleave_32f_aligned16.h>
 #include <volk/volk_16sc_deinterleave_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
index ca048ea67..c67064ea6 100644
--- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16sc_deinterleave_real_16s_aligned16.h>
 #include <volk/volk_16sc_deinterleave_real_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
index 0f4ba6923..f86f03b88 100644
--- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_16sc_deinterleave_real_32f_aligned16.h>
 #include <volk/volk_16sc_deinterleave_real_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
index 5ab458bc9..dd446567e 100644
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16sc_deinterleave_real_8s_aligned16.h>
 #include <volk/volk_16sc_deinterleave_real_8s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index b14610757..9799ef43b 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16sc_magnitude_16s_aligned16.h>
 #include <volk/volk_16sc_magnitude_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index 2c9e48f6e..1ebe644c5 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_16sc_magnitude_32f_aligned16.h>
 #include <volk/volk_16sc_magnitude_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
index 6b19828a4..ea117a820 100644
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ b/volk/lib/qa_16u_byteswap_aligned16.cc
@@ -3,6 +3,7 @@
 #include <volk/volk_16u_byteswap_aligned16.h>
 #include <cstdlib>
 #include <cstring>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc
index ea637d600..0defef283 100644
--- a/volk/lib/qa_32f_accumulator_aligned16.cc
+++ b/volk/lib/qa_32f_accumulator_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_accumulator_aligned16.h>
 #include <volk/volk_32f_accumulator_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
index 002aebfc9..f80d562d4 100644
--- a/volk/lib/qa_32f_add_aligned16.cc
+++ b/volk/lib/qa_32f_add_aligned16.cc
@@ -21,6 +21,7 @@
 #include <qa_32f_add_aligned16.h>
 #include <volk/volk_32f_add_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
index 3c8137004..5d6987333 100644
--- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
+++ b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
@@ -3,6 +3,7 @@
 #include <volk/volk_32f_calc_spectral_noise_floor_aligned16.h>
 #include <cstdlib>
 #include <math.h>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc
index 84a4c40c4..3e2452e68 100644
--- a/volk/lib/qa_32f_convert_16s_aligned16.cc
+++ b/volk/lib/qa_32f_convert_16s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_convert_16s_aligned16.h>
 #include <volk/volk_32f_convert_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc
index 9469daed2..e016b7ff7 100644
--- a/volk/lib/qa_32f_convert_16s_unaligned16.cc
+++ b/volk/lib/qa_32f_convert_16s_unaligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_convert_16s_unaligned16.h>
 #include <volk/volk_32f_convert_16s_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc
index ff24c7b0d..abceb52fb 100644
--- a/volk/lib/qa_32f_convert_32s_aligned16.cc
+++ b/volk/lib/qa_32f_convert_32s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_convert_32s_aligned16.h>
 #include <volk/volk_32f_convert_32s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc
index e63b17994..90f84b56f 100644
--- a/volk/lib/qa_32f_convert_32s_unaligned16.cc
+++ b/volk/lib/qa_32f_convert_32s_unaligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_convert_32s_unaligned16.h>
 #include <volk/volk_32f_convert_32s_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc
index c546e47de..1d0754ac9 100644
--- a/volk/lib/qa_32f_convert_64f_aligned16.cc
+++ b/volk/lib/qa_32f_convert_64f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_convert_64f_aligned16.h>
 #include <volk/volk_32f_convert_64f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc
index 24b51f9af..6f7d5066d 100644
--- a/volk/lib/qa_32f_convert_64f_unaligned16.cc
+++ b/volk/lib/qa_32f_convert_64f_unaligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_convert_64f_unaligned16.h>
 #include <volk/volk_32f_convert_64f_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc
index a3d4d6567..6a53629b5 100644
--- a/volk/lib/qa_32f_convert_8s_aligned16.cc
+++ b/volk/lib/qa_32f_convert_8s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_convert_8s_aligned16.h>
 #include <volk/volk_32f_convert_8s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc
index d885fd6bb..fbc5c20e6 100644
--- a/volk/lib/qa_32f_convert_8s_unaligned16.cc
+++ b/volk/lib/qa_32f_convert_8s_unaligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_convert_8s_unaligned16.h>
 #include <volk/volk_32f_convert_8s_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
index 8826bf94f..3257a3751 100644
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ b/volk/lib/qa_32f_divide_aligned16.cc
@@ -21,6 +21,7 @@
 #include <qa_32f_divide_aligned16.h>
 #include <volk/volk_32f_divide_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc
index ca65add28..592304f83 100644
--- a/volk/lib/qa_32f_fm_detect_aligned16.cc
+++ b/volk/lib/qa_32f_fm_detect_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_fm_detect_aligned16.h>
 #include <volk/volk_32f_fm_detect_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc
index 2a937637f..a7ae60780 100644
--- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc
+++ b/volk/lib/qa_32f_interleave_16sc_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_interleave_16sc_aligned16.h>
 #include <volk/volk_32f_interleave_16sc_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc
index c22dd1046..333b6fce8 100644
--- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc
+++ b/volk/lib/qa_32f_interleave_32fc_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_interleave_32fc_aligned16.h>
 #include <volk/volk_32f_interleave_32fc_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
index 3ef375176..ceb913cb4 100644
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ b/volk/lib/qa_32f_max_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_max_aligned16.h>
 #include <volk/volk_32f_max_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
index 617e18b24..580a60e7d 100644
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ b/volk/lib/qa_32f_min_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_min_aligned16.h>
 #include <volk/volk_32f_min_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
index e52748466..0c242b649 100644
--- a/volk/lib/qa_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32f_multiply_aligned16.cc
@@ -21,6 +21,7 @@
 #include <qa_32f_multiply_aligned16.h>
 #include <volk/volk_32f_multiply_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc
index 2954fc3ae..1c7b485a6 100644
--- a/volk/lib/qa_32f_normalize_aligned16.cc
+++ b/volk/lib/qa_32f_normalize_aligned16.cc
@@ -3,6 +3,7 @@
 #include <volk/volk_32f_normalize_aligned16.h>
 #include <cstdlib>
 #include <cstring>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
index 9a5f71de0..62d55767a 100644
--- a/volk/lib/qa_32f_sqrt_aligned16.cc
+++ b/volk/lib/qa_32f_sqrt_aligned16.cc
@@ -21,6 +21,7 @@
 #include <qa_32f_sqrt_aligned16.h>
 #include <volk/volk_32f_sqrt_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc
index c0f22cdea..5934d70df 100644
--- a/volk/lib/qa_32f_stddev_aligned16.cc
+++ b/volk/lib/qa_32f_stddev_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_32f_stddev_aligned16.h>
 #include <volk/volk_32f_stddev_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
index dcad8bcf3..78c701d78 100644
--- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
+++ b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_32f_stddev_and_mean_aligned16.h>
 #include <volk/volk_32f_stddev_and_mean_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc
index a7e1b5ae3..ffe4b504c 100644
--- a/volk/lib/qa_32f_subtract_aligned16.cc
+++ b/volk/lib/qa_32f_subtract_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32f_subtract_aligned16.h>
 #include <volk/volk_32f_subtract_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc
index a24382d71..c55ab5aa0 100644
--- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_atan2_32f_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_32fc_atan2_32f_aligned16.h>
 #include <volk/volk_32fc_atan2_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
index 497914e0a..2f9a30395 100644
--- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
+++ b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_conjugate_dot_prod_aligned16.h>
 #include <stdlib.h>
 #include <math.h>
+#include <time.h>
 
 
 #define assertcomplexEqual(expected, actual, delta)			\
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
index 0f5a030f5..72e084c05 100644
--- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_deinterleave_32f_aligned16.h>
 #include <volk/volk_32fc_deinterleave_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
index 6e051afbc..89770c236 100644
--- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
+++ b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_deinterleave_64f_aligned16.h>
 #include <volk/volk_32fc_deinterleave_64f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
index 850518524..7472476f7 100644
--- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_deinterleave_real_16s_aligned16.h>
 #include <volk/volk_32fc_deinterleave_real_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
index 321deb184..5cbdc49b3 100644
--- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_deinterleave_real_32f_aligned16.h>
 #include <volk/volk_32fc_deinterleave_real_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
index aedb2e387..4147e30ae 100644
--- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
+++ b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_deinterleave_real_64f_aligned16.h>
 #include <volk/volk_32fc_deinterleave_real_64f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index a4be1616b..16984e30d 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_magnitude_16s_aligned16.h>
 #include <volk/volk_32fc_magnitude_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
index d69ada408..b99f1ddcf 100644
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_magnitude_32f_aligned16.h>
 #include <volk/volk_32fc_magnitude_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
index 83cdf4b15..a3d0955bd 100644
--- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_power_spectral_density_32f_aligned16.h>
 #include <volk/volk_32fc_power_spectral_density_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse3
 
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
index 4d1359068..1444c78a9 100644
--- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32fc_power_spectrum_32f_aligned16.h>
 #include <volk/volk_32fc_power_spectrum_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse3
 
diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc
index 72d05cf6f..661801709 100644
--- a/volk/lib/qa_32s_and_aligned16.cc
+++ b/volk/lib/qa_32s_and_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32s_and_aligned16.h>
 #include <volk/volk_32s_and_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc
index eab3fe016..07d799809 100644
--- a/volk/lib/qa_32s_convert_32f_aligned16.cc
+++ b/volk/lib/qa_32s_convert_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32s_convert_32f_aligned16.h>
 #include <volk/volk_32s_convert_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc
index 0e504cfa1..2ec610ffb 100644
--- a/volk/lib/qa_32s_convert_32f_unaligned16.cc
+++ b/volk/lib/qa_32s_convert_32f_unaligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32s_convert_32f_unaligned16.h>
 #include <volk/volk_32s_convert_32f_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
index e09dfb91c..9da2ae344 100644
--- a/volk/lib/qa_32s_or_aligned16.cc
+++ b/volk/lib/qa_32s_or_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_32s_or_aligned16.h>
 #include <volk/volk_32s_or_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc
index 8b1023876..313c786b6 100644
--- a/volk/lib/qa_32u_byteswap_aligned16.cc
+++ b/volk/lib/qa_32u_byteswap_aligned16.cc
@@ -3,6 +3,7 @@
 #include <volk/volk_32u_byteswap_aligned16.h>
 #include <cstdlib>
 #include <cstring>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc
index 49fcddeb2..618a82a02 100644
--- a/volk/lib/qa_32u_popcnt_aligned16.cc
+++ b/volk/lib/qa_32u_popcnt_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_32u_popcnt_aligned16.h>
 #include <volk/volk_32u_popcnt_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc
index 0eaebf00a..7f9c4584a 100644
--- a/volk/lib/qa_64f_convert_32f_aligned16.cc
+++ b/volk/lib/qa_64f_convert_32f_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_64f_convert_32f_aligned16.h>
 #include <volk/volk_64f_convert_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc
index dcf94bd27..98aadbf4d 100644
--- a/volk/lib/qa_64f_convert_32f_unaligned16.cc
+++ b/volk/lib/qa_64f_convert_32f_unaligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_64f_convert_32f_unaligned16.h>
 #include <volk/volk_64f_convert_32f_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse2
 
diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc
index 41ab078b0..76e755514 100644
--- a/volk/lib/qa_64f_max_aligned16.cc
+++ b/volk/lib/qa_64f_max_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_64f_max_aligned16.h>
 #include <volk/volk_64f_max_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc
index b4664d065..4b70d2881 100644
--- a/volk/lib/qa_64f_min_aligned16.cc
+++ b/volk/lib/qa_64f_min_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_64f_min_aligned16.h>
 #include <volk/volk_64f_min_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc
index 4f5d4d02b..20d012c9e 100644
--- a/volk/lib/qa_64u_byteswap_aligned16.cc
+++ b/volk/lib/qa_64u_byteswap_aligned16.cc
@@ -3,6 +3,7 @@
 #include <volk/volk_64u_byteswap_aligned16.h>
 #include <cstdlib>
 #include <cstring>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc
index bce9ff6c2..85ef58795 100644
--- a/volk/lib/qa_64u_popcnt_aligned16.cc
+++ b/volk/lib/qa_64u_popcnt_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_64u_popcnt_aligned16.h>
 #include <volk/volk_64u_popcnt_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc
index 35f08fb81..8dd5f76ca 100644
--- a/volk/lib/qa_8s_convert_16s_aligned16.cc
+++ b/volk/lib/qa_8s_convert_16s_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_8s_convert_16s_aligned16.h>
 #include <volk/volk_8s_convert_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse4_1
 
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc
index bb326f818..12c502d4b 100644
--- a/volk/lib/qa_8s_convert_16s_unaligned16.cc
+++ b/volk/lib/qa_8s_convert_16s_unaligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_8s_convert_16s_unaligned16.h>
 #include <volk/volk_8s_convert_16s_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse4_1
 
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc
index 522da0b9d..672f5662f 100644
--- a/volk/lib/qa_8s_convert_32f_aligned16.cc
+++ b/volk/lib/qa_8s_convert_32f_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_8s_convert_32f_aligned16.h>
 #include <volk/volk_8s_convert_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse4.1
 
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc
index ea1fb7c74..43468b1b1 100644
--- a/volk/lib/qa_8s_convert_32f_unaligned16.cc
+++ b/volk/lib/qa_8s_convert_32f_unaligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_8s_convert_32f_unaligned16.h>
 #include <volk/volk_8s_convert_32f_unaligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse4.1
 
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
index 823e7fe2e..94e63e37d 100644
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_8sc_deinterleave_16s_aligned16.h>
 #include <volk/volk_8sc_deinterleave_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
index fb580516c..29073eed7 100644
--- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
+++ b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_8sc_deinterleave_32f_aligned16.h>
 #include <volk/volk_8sc_deinterleave_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
index 1cc844b52..4980c982a 100644
--- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
+++ b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_8sc_deinterleave_real_16s_aligned16.h>
 #include <volk/volk_8sc_deinterleave_real_16s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
index 10e537cde..3c3f737a1 100644
--- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
+++ b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
@@ -3,6 +3,7 @@
 #include <qa_8sc_deinterleave_real_32f_aligned16.h>
 #include <volk/volk_8sc_deinterleave_real_32f_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
index d84df8119..a33d1bf30 100644
--- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
+++ b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
@@ -2,6 +2,7 @@
 #include <qa_8sc_deinterleave_real_8s_aligned16.h>
 #include <volk/volk_8sc_deinterleave_real_8s_aligned16.h>
 #include <cstdlib>
+#include <ctime>
 
 //test for sse
 
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
index d64eac8ce..216bf1cef 100644
--- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
+++ b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
@@ -3,7 +3,7 @@
 #include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
 #include <stdlib.h>
 #include <math.h>
-#include <time.h>
+#include <ctime>
 
 #define assertcomplexEqual(expected, actual, delta)			\
   CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
index c27f0e0ca..4c707446e 100644
--- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
+++ b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
@@ -3,7 +3,7 @@
 #include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
 #include <stdlib.h>
 #include <math.h>
-#include <time.h>
+#include <ctime>
 
 #define assertcomplexEqual(expected, actual, delta)			\
   CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-- 
cgit 


From c6fff77de9b686761f93f0e1de237f8543f5e919 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 17 Dec 2010 11:14:41 -0800
Subject: Volk: A bunch of new Orc routines plus a couple of build changes.
 32fc_magnitude_16s fails test_all right now.

---
 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc     | 12 ++++++++++++
 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc     | 11 +++++++++++
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc |  9 +++++++++
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc        |  5 +++--
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc        |  6 +++---
 volk/lib/qa_32f_max_aligned16.cc                   |  9 +++++++++
 volk/lib/qa_32f_min_aligned16.cc                   |  9 +++++++++
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc        |  8 ++++----
 volk/lib/qa_volk.cc                                |  1 -
 9 files changed, 60 insertions(+), 10 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
index e700ac72c..7e9e31df5 100644
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
@@ -26,6 +26,8 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
   int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
   int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
+  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
+  int16_t output_orc1[vlen] __attribute__ ((aligned (16)));
   int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
   int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
 
@@ -43,6 +45,13 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
   }
@@ -70,6 +79,9 @@ void qa_16sc_deinterleave_16s_aligned16::t1() {
 
     CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_ssse3[i]);
     CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_ssse31[i]);
+    
+    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_orc[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_orc1[i]);
   }
 }
 
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
index 6ee076998..45100206d 100644
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
@@ -26,6 +26,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
   float output_generic1[vlen] __attribute__ ((aligned (16)));
   float output_sse2[vlen] __attribute__ ((aligned (16)));
   float output_sse21[vlen] __attribute__ ((aligned (16)));
+  float output_orc[vlen] __attribute__ ((aligned (16)));
+  float output_orc1[vlen] __attribute__ ((aligned (16)));
 
   int16_t* loadInput = (int16_t*)input0;
   for(int i = 0; i < vlen*2; ++i) {   
@@ -41,6 +43,13 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
   }
@@ -57,6 +66,8 @@ void qa_16sc_deinterleave_32f_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_sse21[i], fabs(output_generic1[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_orc1[i], fabs(output_generic1[i])*1e-4);
   }
 }
 
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
index 5ab458bc9..d187d20c3 100644
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
@@ -24,6 +24,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
   
   int8_t output_generic[vlen] __attribute__ ((aligned (16)));
   int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
+  int8_t output_orc[vlen] __attribute__ ((aligned (16)));
 
   int16_t* loadInput = (int16_t*)input0;
   for(int i = 0; i < vlen*2; ++i) {   
@@ -39,6 +40,13 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
   }
@@ -54,6 +62,7 @@ void qa_16sc_deinterleave_real_8s_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
+    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
   }
 }
 
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index c8f13ff84..dd4ae75ff 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -40,13 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
-  start = clock();
+/*  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
   }
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("orc_time: %f\n", total);
+*/
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
@@ -72,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
+    //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
   }
 }
 
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
index e7178863c..53d42e28c 100644
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
@@ -90,14 +90,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
-  start = clock();
+/*  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
   }
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("orc_time: %f\n", total);
-
+*/
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
@@ -123,7 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
+//    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
   }
 }
 
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
index 3ef375176..cb1fd3627 100644
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ b/volk/lib/qa_32f_max_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_max_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -40,6 +41,13 @@ void qa_32f_max_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32f_max_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
index 617e18b24..bf453f360 100644
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ b/volk/lib/qa_32f_min_aligned16.cc
@@ -25,6 +25,7 @@ void qa_32f_min_aligned16::t1() {
   
   float output0[vlen] __attribute__ ((aligned (16)));
   float output01[vlen] __attribute__ ((aligned (16)));
+  float output02[vlen] __attribute__ ((aligned (16)));
 
   for(int i = 0; i < vlen; ++i) {   
     input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
@@ -40,6 +41,13 @@ void qa_32f_min_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
   start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
   }
@@ -54,6 +62,7 @@ void qa_32f_min_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
+    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
   }
 }
 
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index c3e65866b..105d32d0c 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3_time: %f\n", total);
 
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
+  //for(int i = 0; i < 10; ++i) {
+  //  printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+  //  printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+  //}
   
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index c3c27b69b..f6a334da7 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -118,7 +118,6 @@ CppUnit::TestSuite *
 qa_volk::suite()
 {
   CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
-
   s->addTest(qa_16s_quad_max_star_aligned16::suite());
   s->addTest(qa_32fc_dot_prod_aligned16::suite());
   s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
-- 
cgit 


From 200720da362e30f74083aad4dc106e4a057638bf Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 17 Dec 2010 12:20:16 -0800
Subject: Volk: Magnitude functions. 32fc_magnitude_16s currently clips to +MAX
 instead of -MAX.

---
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 6 +++---
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
index dd4ae75ff..d00315b57 100644
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
@@ -40,14 +40,14 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("generic_time: %f\n", total);
-/*  start = clock();
+  start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
   }
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("orc_time: %f\n", total);
-*/
+
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
     volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
@@ -73,7 +73,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() {
     //printf("%d...%d\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-    //CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
   }
 }
 
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index 105d32d0c..53b3bf790 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,10 +63,10 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3_time: %f\n", total);
 
-  //for(int i = 0; i < 10; ++i) {
-  //  printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
-  //  printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
-  //}
+  for(int i = 0; i < 10; ++i) {
+    printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+    printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+  }
   
   for(int i = 0; i < vlen; ++i) {
     //printf("%d...%d\n", output0[i], output01[i]);
-- 
cgit 


From 0e92b93f21fc9c324c379bc318120d414e7422cc Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 17 Dec 2010 13:35:40 -0800
Subject: Volk: Orc impl for 32fc_magnitude_16s saturates at -max instead of
 +max.

---
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 6 +++---
 volk/lib/qa_volk.cc                         | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
index 53b3bf790..93d4ec150 100644
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
@@ -63,9 +63,9 @@ void qa_32fc_magnitude_16s_aligned16::t1() {
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3_time: %f\n", total);
 
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
-    printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
+  for(int i = 0; i < 1; ++i) {
+  //  printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
+  //  printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
   }
   
   for(int i = 0; i < vlen; ++i) {
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index f6a334da7..c3c27b69b 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -118,6 +118,7 @@ CppUnit::TestSuite *
 qa_volk::suite()
 {
   CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
+
   s->addTest(qa_16s_quad_max_star_aligned16::suite());
   s->addTest(qa_32fc_dot_prod_aligned16::suite());
   s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
-- 
cgit 


From 79c514b542d25e709903b41cfdc1673aae35ac1d Mon Sep 17 00:00:00 2001
From: Eric Blossom
Date: Thu, 23 Dec 2010 14:29:56 -0800
Subject: Update volk .gitignores

---
 volk/lib/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'volk/lib')

diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore
index 573fb1618..0f17543ab 100644
--- a/volk/lib/.gitignore
+++ b/volk/lib/.gitignore
@@ -19,3 +19,4 @@
 /volk_mktables.c
 /volk_proccpu_sim.c
 /volk_runtime.c
+/test_all
-- 
cgit 


From 5b45b875ed58fd66234764a05da42c6eaff22c4d Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 11 Jan 2011 15:17:55 -0800
Subject: Volk: Added more Orc routines (including complex multiply). Started
 redoing the testing framework so it's easier to add new archs to tests.

---
 volk/lib/Makefile.am                       |  2 +
 volk/lib/qa_32f_normalize_aligned16.cc     | 13 +++++
 volk/lib/qa_32fc_32f_multiply_aligned16.cc | 84 +++++++++++++-----------------
 volk/lib/qa_32fc_multiply_aligned16.cc     | 12 +++++
 4 files changed, 64 insertions(+), 47 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 253033461..0aeafe4aa 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -156,6 +156,7 @@ endif
 # ----------------------------------------------------------------
 libvolk_qa_la_SOURCES = \
 	qa_volk.cc \
+	qa_utils.cc \
 	qa_16s_quad_max_star_aligned16.cc \
 	qa_32fc_dot_prod_aligned16.cc \
 	qa_32fc_square_dist_aligned16.cc \
@@ -257,6 +258,7 @@ libvolk_qa_la_LIBADD = \
 noinst_HEADERS = \
 	volk_init.h \
 	qa_volk.h \
+	qa_utils.h \
 	assembly.h \
 	qa_16s_quad_max_star_aligned16.h \
 	qa_32fc_dot_prod_aligned16.h \
diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc
index 1c7b485a6..0da43ecff 100644
--- a/volk/lib/qa_32f_normalize_aligned16.cc
+++ b/volk/lib/qa_32f_normalize_aligned16.cc
@@ -26,13 +26,16 @@ void qa_32f_normalize_aligned16::t1() {
 
   float* output0;
   float* output01;
+  float* output02;
   ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float));
   ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float));
+  ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float));
 
   for(int i = 0; i < vlen; ++i) {   
     output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
   }
   memcpy(output01, output0, vlen*sizeof(float));
+  memcpy(output02, output0, vlen*sizeof(float));
   printf("32f_normalize_aligned\n");
 
   start = clock();
@@ -49,6 +52,14 @@ void qa_32f_normalize_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse_time: %f\n", total);
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
+  
   for(int i = 0; i < 1; ++i) {
     //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
     //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
@@ -57,10 +68,12 @@ void qa_32f_normalize_aligned16::t1() {
   for(int i = 0; i < vlen; ++i) {
     // printf("%e...%e\n", output0[i], output01[i]);
     CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4);
   }
 
   free(output0);
   free(output01);
+  free(output02);
 }
 
 #endif
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
index 4eba0a3cd..7bb8d21c1 100644
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
@@ -2,28 +2,12 @@
 #include <volk/volk.h>
 #include <qa_32fc_32f_multiply_aligned16.h>
 #include <stdlib.h>
-#include <math.h>
 #include <time.h>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+#include <string.h>
+#include <qa_utils.h>
 
 #define	ERR_DELTA	(1e-4)
 
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE3
 void qa_32fc_32f_multiply_aligned16::t1() {
 
   const int vlen = 2046;
@@ -36,50 +20,56 @@ void qa_32fc_32f_multiply_aligned16::t1() {
   std::complex<float>* input;
   float * taps;
   int i;
+  std::vector<std::string> archs;
+  archs.push_back("generic");
+#ifdef LV_HAVE_SSE3
+  archs.push_back("sse3");
+#endif
+#ifdef LV_HAVE_ORC
+  archs.push_back("orc");
+#endif
   
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
+  std::vector<std::complex<float>* > results;
 
   ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
   ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, vlen * 2 * sizeof(float));
+  
+  for(i=0; i < archs.size(); i++) {
+      std::complex<float> *ptr;
+      ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float));
+      if(ret) {
+          printf("Couldn't allocate memory\n");
+          exit(1);
+      }
+      results.push_back(ptr);
+  }
 
   random_floats((float*)input, vlen * 2);
   random_floats(taps, vlen);
   
   printf("32fc_32f_multiply_aligned16\n");
 
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_multiply_aligned16_manual(result_generic, input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3");
+  for(i=0; i < archs.size(); i++) {
+    start = clock();
+    for(int count = 0; count < ITERS; ++count) {
+      volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str());
+    }
+    end = clock();
+    total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+    printf("%s_time: %f\n", archs[i].c_str(), total);
   }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
 
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
+  for(i=0; i < vlen; i++) {
+      int j = 1;
+      for(j; j < archs.size(); j++) {
+          assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA);
+      }
   }
 
   free(input);
   free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-#else
-void qa_32fc_32f_multiply_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
+  for(i=0; i < archs.size(); i++) {      
+    free(results[i]);
+  }
 }
 
-#endif /* LV_HAVE_SSE3 */
-
diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc
index e1f7eab3d..022b58ad6 100644
--- a/volk/lib/qa_32fc_multiply_aligned16.cc
+++ b/volk/lib/qa_32fc_multiply_aligned16.cc
@@ -41,11 +41,13 @@ void qa_32fc_multiply_aligned16::t1() {
   
   std::complex<float>* result_generic;
   std::complex<float>* result_sse3;
+  std::complex<float>* result_orc;
 
   ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float));
   ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float));
   ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
   ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float));
+  ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float));
   
   random_floats((float*)input, vlen * 2);
   random_floats((float*)taps, vlen * 2);
@@ -67,15 +69,25 @@ void qa_32fc_multiply_aligned16::t1() {
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
   printf("sse3_time: %f\n", total);
+  
+  start = clock();
+  for(int count = 0; count < ITERS; ++count) {
+    volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc");
+  }
+  end = clock();
+  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
+  printf("orc_time: %f\n", total);
 
   for(i = 0; i < vlen; i++){
     assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
+    assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA);
   }
 
   free(input);
   free(taps);
   free(result_generic);
   free(result_sse3);
+  free(result_orc);
   
 }
 #else
-- 
cgit 


From c77bb3e71562daa68e9a195a0131b7cc04324784 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 12 Jan 2011 19:20:35 -0800
Subject: Volk: Working on a new QA architecture that doesn't require
 individual test programs.

---
 volk/lib/Makefile.am                          |   2 -
 volk/lib/qa_32fc_32f_multiply_aligned16.cc    |   6 +-
 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc |   2 +-
 volk/lib/qa_utils.cc                          | 223 ++++++++++++++++++++++++++
 volk/lib/qa_utils.h                           |  19 +++
 volk/lib/qa_volk.cc                           |   2 +-
 6 files changed, 247 insertions(+), 7 deletions(-)
 create mode 100644 volk/lib/qa_utils.cc
 create mode 100644 volk/lib/qa_utils.h

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 0aeafe4aa..a10b0a362 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -156,7 +156,6 @@ endif
 # ----------------------------------------------------------------
 libvolk_qa_la_SOURCES = \
 	qa_volk.cc \
-	qa_utils.cc \
 	qa_16s_quad_max_star_aligned16.cc \
 	qa_32fc_dot_prod_aligned16.cc \
 	qa_32fc_square_dist_aligned16.cc \
@@ -181,7 +180,6 @@ libvolk_qa_la_SOURCES = \
 	qa_32f_dot_prod_aligned16.cc \
 	qa_32f_dot_prod_unaligned16.cc \
 	qa_32f_fm_detect_aligned16.cc \
-	qa_32fc_32f_multiply_aligned16.cc \
 	qa_32fc_multiply_aligned16.cc \
 	qa_32f_divide_aligned16.cc \
 	qa_32f_multiply_aligned16.cc \
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
index 7bb8d21c1..b80e0e008 100644
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc
+++ b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
@@ -5,10 +5,11 @@
 #include <time.h>
 #include <string.h>
 #include <qa_utils.h>
+#include <boost/test/unit_test.hpp>
 
-#define	ERR_DELTA	(1e-4)
+#define	TOLERANCE	(1e-4)
 
-void qa_32fc_32f_multiply_aligned16::t1() {
+void qa_32fc_32f_multiply_aligned16(void) {
 
   const int vlen = 2046;
   const int ITERS = 100000;
@@ -72,4 +73,3 @@ void qa_32fc_32f_multiply_aligned16::t1() {
     free(results[i]);
   }
 }
-
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
index 94e63e37d..f753e1107 100644
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
+++ b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
@@ -40,7 +40,7 @@ void qa_8sc_deinterleave_16s_aligned16::t1() {
 
   start = clock();
   for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
+    volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "monkeys");
   }
   end = clock();
   total = (double)(end-start)/(double)CLOCKS_PER_SEC;
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
new file mode 100644
index 000000000..4d93ca62a
--- /dev/null
+++ b/volk/lib/qa_utils.cc
@@ -0,0 +1,223 @@
+#include "qa_utils.h"
+#include <stdlib.h>
+#include <boost/foreach.hpp>
+#include <boost/assign/list_of.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/test/unit_test.hpp>
+#include <iostream>
+#include <vector>
+#include <time.h>
+//#include <math.h>
+//#include <volk/volk_runtime.h>
+#include <volk/volk_registry.h>
+#include <volk/volk.h>
+#include <boost/typeof/typeof.hpp>
+#include <boost/type_traits.hpp>
+//#include <boost/test/unit_test.hpp>
+
+float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform ();
+}
+
+template <class t>
+t *make_aligned_buffer(unsigned int len) {
+  t *buf;
+  int ret;
+  ret = posix_memalign((void**)&buf, 16, len * sizeof(t));
+  assert(ret == 0);
+  return buf;
+}
+
+void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<std::string> inputsig, unsigned int vlen) {
+    BOOST_FOREACH(std::string sig, inputsig) {
+        if     (sig=="32fc" || sig=="64f") buffs.push_back((void *) make_aligned_buffer<lv_32fc_t>(vlen));
+        else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<float>(vlen));
+        else if(sig=="16s" || sig=="16u") buffs.push_back((void *) make_aligned_buffer<int16_t>(vlen));
+        else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<int8_t>(vlen));
+        else std::cout << "Invalid type!" << std::endl;
+    }
+}
+
+static std::vector<std::string> get_arch_list(const int archs[]) {
+    std::vector<std::string> archlist;
+    int num_archs = archs[0];
+    
+    //there has got to be a way to query these arches
+    for(int i = 0; i < num_archs; i++) {
+        switch(archs[i+1]) {
+        case (1<<LV_GENERIC):
+            archlist.push_back("generic");
+            break;
+        case (1<<LV_ORC):
+            archlist.push_back("orc");
+            break;
+        case (1<<LV_SSE):
+            archlist.push_back("sse");
+            break;
+        case (1<<LV_SSE2):
+            archlist.push_back("sse2");
+            break;
+        case (1<<LV_SSSE3):
+            archlist.push_back("ssse3");
+            break;
+        case (1<<LV_SSE4_1):
+            archlist.push_back("sse4_1");
+            break;
+        case (1<<LV_SSE4_2):
+            archlist.push_back("sse4_2");
+            break;
+        case (1<<LV_SSE4_A):
+            archlist.push_back("sse4_a");
+            break;
+        case (1<<LV_MMX):
+            archlist.push_back("mmx");
+            break;
+        case (1<<LV_AVX):
+            archlist.push_back("avx");
+            break;
+        default:
+            break;
+        }
+    }
+    return archlist;
+}
+
+static bool is_valid_type(std::string type) {
+    std::vector<std::string> valid_types = boost::assign::list_of("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8u");
+    
+    BOOST_FOREACH(std::string this_type, valid_types) {
+        if(type == this_type) return true;
+    }
+    return false;
+}
+    
+
+static void get_function_signature(std::vector<std::string> &inputsig, 
+                                   std::vector<std::string> &outputsig, 
+                                   std::string name) {
+    boost::char_separator<char> sep("_");
+    boost::tokenizer<boost::char_separator<char> > tok(name, sep);
+    std::vector<std::string> toked;
+    tok.assign(name);
+    toked.assign(tok.begin(), tok.end());
+    
+    assert(toked[0] == "volk");
+    
+    inputsig.push_back(toked[1]); //mandatory
+    int pos = 2;
+    bool valid_type = true;
+    while(valid_type && pos < toked.size()) {
+        if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]);
+        else valid_type = false;
+        pos++;
+    }
+    while(!valid_type && pos < toked.size()) {
+        if(is_valid_type(toked[pos])) valid_type = true;
+        pos++;
+    }
+    while(valid_type && pos < toked.size()) {
+        if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]);
+        else valid_type = false;
+        pos++;
+    }
+        
+    //if there's no explicit output sig then assume the output is the same as the first input
+    if(outputsig.size() == 0) outputsig.push_back(inputsig[0]);
+    assert(inputsig.size() != 0);
+    assert(outputsig.size() != 0);
+}
+
+inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], vlen, arch.c_str());
+}
+
+inline void run_cast_test3(volk_fn_3arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], vlen, arch.c_str());
+}
+
+inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str());
+}
+
+bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) {
+    std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
+    
+    //first let's get a list of available architectures for the test
+    std::vector<std::string> arch_list = get_arch_list(archs);
+    
+    BOOST_FOREACH(std::string arch, arch_list) {
+        std::cout << "Found an arch: " << arch << std::endl;
+    }
+    
+    //now we have to get a function signature by parsing the name
+    std::vector<std::string> inputsig, outputsig;
+    get_function_signature(inputsig, outputsig, name);
+
+    for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i] << std::endl;
+    for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i] << std::endl;
+    
+    //now that we have that, we'll set up input and output buffers based on the function signature
+    std::vector<void *> inbuffs;
+    make_buffer_for_signature(inbuffs, inputsig, vlen);
+    
+    //and set the input buffers to something random
+    //TODO
+    
+    //allocate output buffers -- one for each output for each arch
+    std::vector<void *> outbuffs;
+    BOOST_FOREACH(std::string arch, arch_list) {
+        make_buffer_for_signature(outbuffs, outputsig, vlen);
+    }
+    
+    //now run the test
+    clock_t start, end;
+    for(int i = 0; i < arch_list.size(); i++) {
+        start = clock();
+        switch(outputsig.size()+inputsig.size()) {
+            case 2:
+                run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                break;
+            case 3:
+                run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                break;
+            case 4:
+                run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                break;
+            default:
+                break;
+        }
+        end = clock();
+        std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl;
+    }
+
+    //and now compare each output to the generic output
+    //first we have to know which output is the generic one, they aren't in order...
+    int generic_offset;
+    for(int i=0; i<arch_list.size(); i++) 
+        if(arch_list[i] == "generic") generic_offset=i;
+    
+    for(int i=0; i<arch_list.size(); i++) {
+        if(arch_list[i] != "generic") {
+            for(int j=0; i<vlen; j++) {
+                BOOST_CHECK_CLOSE(((float *)(outbuffs[generic_offset]))[j], ((float *)(outbuffs[i]))[j], tol);
+            }
+        }
+    }
+
+    BOOST_FOREACH(void *buf, inbuffs) {
+        free(buf);
+    }
+    BOOST_FOREACH(void *buf, outbuffs) {
+        free(buf);
+    }
+    return 0;
+}
+
+
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
new file mode 100644
index 000000000..80323c445
--- /dev/null
+++ b/volk/lib/qa_utils.h
@@ -0,0 +1,19 @@
+#ifndef VOLK_QA_UTILS_H
+#define VOLK_QA_UTILS_H
+
+#include <stdlib.h>
+#include <string>
+#include <volk/volk.h>
+
+float uniform(void);
+void random_floats(float *buf, unsigned n);
+
+bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
+
+#define VOLK_RUN_TESTS(func, tol, len, iter) run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter)
+
+typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
+
+#endif //VOLK_QA_UTILS_H
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index c3c27b69b..8e7e59768 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -143,7 +143,7 @@ qa_volk::suite()
   s->addTest(qa_32f_dot_prod_aligned16::suite());
   s->addTest(qa_32f_dot_prod_unaligned16::suite());
   s->addTest(qa_32f_fm_detect_aligned16::suite());
-  s->addTest(qa_32fc_32f_multiply_aligned16::suite());
+  //s->addTest(qa_32fc_32f_multiply_aligned16::suite());
   s->addTest(qa_32fc_multiply_aligned16::suite());
   s->addTest(qa_32f_divide_aligned16::suite());
   s->addTest(qa_32f_multiply_aligned16::suite());
-- 
cgit 


From 9a527257014878cac993ffe854bf8fdacc412be6 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 14 Jan 2011 13:07:06 -0800
Subject: Volk: QA code fixes, more Orc routines. Broke the 32fc_multiply Orc
 impl because I'm lame and lost some work. Fixed volk_8s_convert_16s Orc impl.
 Still need to rename functions and modify the QA sig parser to match. Then
 rewrite makefiles.

---
 volk/lib/qa_utils.cc | 94 ++++++++++++++++++++++++++++++++++++++++++----------
 volk/lib/qa_utils.h  |  2 +-
 2 files changed, 77 insertions(+), 19 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 4d93ca62a..fa21db487 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -3,7 +3,7 @@
 #include <boost/foreach.hpp>
 #include <boost/assign/list_of.hpp>
 #include <boost/tokenizer.hpp>
-#include <boost/test/unit_test.hpp>
+//#include <boost/test/unit_test.hpp>
 #include <iostream>
 #include <vector>
 #include <time.h>
@@ -13,19 +13,39 @@
 #include <volk/volk.h>
 #include <boost/typeof/typeof.hpp>
 #include <boost/type_traits.hpp>
-//#include <boost/test/unit_test.hpp>
 
 float uniform() {
   return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
 }
 
-void
-random_floats (float *buf, unsigned n)
+void random_floats (float *buf, unsigned n)
 {
   for (unsigned i = 0; i < n; i++)
     buf[i] = uniform ();
 }
 
+void load_random_data(void *data, std::string sig, unsigned int n) {
+    if(sig == "32fc") {
+        random_floats((float *)data, n*2);
+    } else if(sig == "32f") {
+        random_floats((float *)data, n);
+    } else if(sig == "32u") {
+        for(int i=0; i<n; i++) ((uint32_t *)data)[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
+    } else if(sig == "32s") {
+        for(int i=0; i<n; i++) ((int32_t *)data)[i] = ((int32_t) (rand() - (RAND_MAX/2)));
+    } else if(sig == "16u") {
+        for(int i=0; i<n; i++) ((uint16_t *)data)[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
+    } else if(sig == "16s") {
+        for(int i=0; i<n; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
+    } else if(sig == "16sc") {
+        for(int i=0; i<n*2; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
+    } else if(sig == "8u") {
+        for(int i=0; i<n; i++) ((uint8_t *)data)[i] = ((uint8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 256.0));
+    } else if(sig == "8s") {
+        for(int i=0; i<n; i++) ((int8_t *)data)[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
+    } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl;
+}
+
 template <class t>
 t *make_aligned_buffer(unsigned int len) {
   t *buf;
@@ -37,11 +57,11 @@ t *make_aligned_buffer(unsigned int len) {
 
 void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<std::string> inputsig, unsigned int vlen) {
     BOOST_FOREACH(std::string sig, inputsig) {
-        if     (sig=="32fc" || sig=="64f") buffs.push_back((void *) make_aligned_buffer<lv_32fc_t>(vlen));
-        else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<float>(vlen));
-        else if(sig=="16s" || sig=="16u") buffs.push_back((void *) make_aligned_buffer<int16_t>(vlen));
-        else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<int8_t>(vlen));
-        else std::cout << "Invalid type!" << std::endl;
+        if     (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer<uint64_t>(vlen));
+        else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<uint32_t>(vlen));
+        else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer<uint16_t>(vlen));
+        else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<uint8_t>(vlen));
+        else std::cout << "Invalid type: " << sig << std::endl;
     }
 }
 
@@ -90,7 +110,7 @@ static std::vector<std::string> get_arch_list(const int archs[]) {
 }
 
 static bool is_valid_type(std::string type) {
-    std::vector<std::string> valid_types = boost::assign::list_of("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8u");
+    std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8sc")("8u");
     
     BOOST_FOREACH(std::string this_type, valid_types) {
         if(type == this_type) return true;
@@ -120,16 +140,23 @@ static void get_function_signature(std::vector<std::string> &inputsig,
     }
     while(!valid_type && pos < toked.size()) {
         if(is_valid_type(toked[pos])) valid_type = true;
-        pos++;
+        else pos++;
     }
     while(valid_type && pos < toked.size()) {
         if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]);
         else valid_type = false;
         pos++;
     }
-        
-    //if there's no explicit output sig then assume the output is the same as the first input
-    if(outputsig.size() == 0) outputsig.push_back(inputsig[0]);
+    
+    //if there's no output sig and only one input sig, assume there are 2 inputs
+    //this handles conversion fn's (which have a specified output sig) and most of the rest
+    if(outputsig.size() == 0 && inputsig.size() == 1) {
+        outputsig.push_back(inputsig[0]);
+        inputsig.push_back(inputsig[0]);
+    }//if there's no explicit output sig then assume the output is the same as the first input
+    else if(outputsig.size() == 0) outputsig.push_back(inputsig[0]);
+    
+    
     assert(inputsig.size() != 0);
     assert(outputsig.size() != 0);
 }
@@ -168,7 +195,9 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     make_buffer_for_signature(inbuffs, inputsig, vlen);
     
     //and set the input buffers to something random
-    //TODO
+    for(int i=0; i<inputsig.size(); i++) {
+        load_random_data(inbuffs[i], inputsig[i], vlen);        
+    }
     
     //allocate output buffers -- one for each output for each arch
     std::vector<void *> outbuffs;
@@ -204,9 +233,38 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         if(arch_list[i] == "generic") generic_offset=i;
     
     for(int i=0; i<arch_list.size(); i++) {
-        if(arch_list[i] != "generic") {
-            for(int j=0; i<vlen; j++) {
-                BOOST_CHECK_CLOSE(((float *)(outbuffs[generic_offset]))[j], ((float *)(outbuffs[i]))[j], tol);
+        if(i != generic_offset) {
+            if(outputsig[0] == "32fc") {
+                for(int j=0; j<vlen*2; j++) {
+                    if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) {
+                        std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
+            } else if(outputsig[0] == "32f") {
+                for(int j=0; j<vlen; j++) {
+                    if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) {
+                        std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
+            } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") {
+                for(int j=0; j<vlen; j++) {
+                    if(((uint32_t *)(outbuffs[generic_offset]))[j] != ((uint32_t *)(outbuffs[i]))[j]) {
+                        std::cout << "Generic: " << ((uint32_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint32_t *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
+            } else if(outputsig[0] == "16u" || outputsig[0] == "16s" || outputsig[0] == "8sc") {
+                for(int j=0; j<vlen; j++) {
+                    if(((uint16_t *)(outbuffs[generic_offset]))[j] != ((uint16_t *)(outbuffs[i]))[j]) {
+                        std::cout << "Generic: " << ((uint16_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint16_t *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
+            } else { 
+                std::cout << "Error: invalid type " << outputsig[0] << std::endl;
+                return 1;
             }
         }
     }
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 80323c445..f81d652fb 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -10,7 +10,7 @@ void random_floats(float *buf, unsigned n);
 
 bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
 
-#define VOLK_RUN_TESTS(func, tol, len, iter) run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter)
+#define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0)
 
 typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
-- 
cgit 


From 5c4aab18e4e5e34ce1f8e286bc534a02c1318932 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 14 Jan 2011 13:21:08 -0800
Subject: Volk: Makefile changes to use new test framework. Doesn't currently
 build libvolk_qa, as I haven't really determined an appropriate place for
 "leftover" tests that the standard framework won't handle.

---
 volk/lib/Makefile.am | 218 ++++++---------------------------------------------
 1 file changed, 24 insertions(+), 194 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index a10b0a362..5c995148a 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -24,18 +24,19 @@ include $(top_srcdir)/Makefile.common
 # of a hack. Figure out the right way to do this to find built
 # volk_config.h and volk_tables.h
 
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \
 	-I$(top_builddir)/include \
 	$(LV_CXXFLAGS) $(WITH_INCLUDES)
 
 
-# We build 2 libraries and 1 executable here.  One library contains
-# everything except the libcppunit QA code, and one contains only the
-# libcppunit-based QA code.  The C++ QA code is especially recommended
+# We build 1 library and 1 executable here.  The library contains
+# everything except the QA code. The C++ QA code is especially recommended
 # when you have general purpose C or C++ code that may not get
 # thoroughly exercised by building and running a GR block.  The
 # executable runs the QA code at "make check" time.
 #
+#
+#
 # N.B., If there's a SWIG generated shared library and associated
 # python code, it will be contained in ../python, not here.  (That
 # code is conditionally built depending on the state of the
@@ -44,15 +45,14 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(CPPUNIT_CPPFLAGS) \
 
 
 # list of programs run by "make check" and "make distcheck"
-#TESTS = test_all
+TESTS = testqa
 #orc stuff gets built in the ORC directory conditional to ORC being enabled.
 #it gets linked in during the build of libvolk as an added library.
 #there might be a better way to do this.
 
 lib_LTLIBRARIES = \
 	libvolk.la \
-	libvolk_runtime.la \
-	libvolk_qa.la
+	libvolk_runtime.la
 
 EXTRA_DIST = \
 	volk_mktables.c		\
@@ -154,101 +154,15 @@ endif
 # ----------------------------------------------------------------
 #        The QA library.  Note libvolk.la in LIBADD
 # ----------------------------------------------------------------
-libvolk_qa_la_SOURCES = \
-	qa_volk.cc \
-	qa_16s_quad_max_star_aligned16.cc \
-	qa_32fc_dot_prod_aligned16.cc \
-	qa_32fc_square_dist_aligned16.cc \
-	qa_32fc_square_dist_scalar_mult_aligned16.cc \
-	qa_32f_sum_of_poly_aligned16.cc \
-	qa_32fc_index_max_aligned16.cc \
-	qa_32f_index_max_aligned16.cc \
-	qa_32fc_conjugate_dot_prod_aligned16.cc \
-	qa_16s_permute_and_scalar_add_aligned16.cc \
-	qa_16s_branch_4_state_8_aligned16.cc \
-	qa_16s_max_star_horizontal_aligned16.cc \
-	qa_16s_max_star_aligned16.cc \
-	qa_16s_add_quad_aligned16.cc \
-	qa_32f_add_aligned16.cc \
-	qa_32f_subtract_aligned16.cc \
-	qa_32f_max_aligned16.cc \
-	qa_32f_min_aligned16.cc \
-	qa_64f_max_aligned16.cc \
-	qa_64f_min_aligned16.cc \
-	qa_32s_and_aligned16.cc \
-	qa_32s_or_aligned16.cc \
-	qa_32f_dot_prod_aligned16.cc \
-	qa_32f_dot_prod_unaligned16.cc \
-	qa_32f_fm_detect_aligned16.cc \
-	qa_32fc_multiply_aligned16.cc \
-	qa_32f_divide_aligned16.cc \
-	qa_32f_multiply_aligned16.cc \
-	qa_32f_sqrt_aligned16.cc \
-	qa_8sc_multiply_conjugate_16sc_aligned16.cc \
-	qa_8sc_multiply_conjugate_32fc_aligned16.cc \
-	qa_32u_popcnt_aligned16.cc \
-	qa_64u_popcnt_aligned16.cc \
-	qa_64u_byteswap_aligned16.cc \
-	qa_8sc_deinterleave_32f_aligned16.cc \
-	qa_16sc_deinterleave_32f_aligned16.cc \
-	qa_8sc_deinterleave_16s_aligned16.cc \
-	qa_32f_interleave_32fc_aligned16.cc \
-	qa_16u_byteswap_aligned16.cc \
-	qa_16sc_deinterleave_16s_aligned16.cc \
-	qa_32fc_deinterleave_real_32f_aligned16.cc \
-	qa_32fc_magnitude_32f_aligned16.cc \
-	qa_32fc_deinterleave_real_64f_aligned16.cc \
-	qa_32fc_deinterleave_real_16s_aligned16.cc \
-	qa_32fc_magnitude_16s_aligned16.cc \
-	qa_32fc_deinterleave_32f_aligned16.cc \
-	qa_8sc_deinterleave_real_8s_aligned16.cc \
-	qa_32fc_deinterleave_64f_aligned16.cc \
-	qa_32f_interleave_16sc_aligned16.cc \
-	qa_16sc_deinterleave_real_8s_aligned16.cc \
-	qa_16sc_deinterleave_real_32f_aligned16.cc \
-	qa_16sc_magnitude_32f_aligned16.cc \
-	qa_32u_byteswap_aligned16.cc \
-	qa_16sc_deinterleave_real_16s_aligned16.cc \
-	qa_8sc_deinterleave_real_32f_aligned16.cc \
-	qa_16sc_magnitude_16s_aligned16.cc \
-	qa_32f_normalize_aligned16.cc \
-	qa_8sc_deinterleave_real_16s_aligned16.cc \
-	qa_16s_convert_32f_aligned16.cc \
-	qa_16s_convert_32f_unaligned16.cc \
-	qa_16s_convert_8s_aligned16.cc \
-	qa_16s_convert_8s_unaligned16.cc \
-	qa_32f_convert_16s_aligned16.cc \
-	qa_32f_convert_16s_unaligned16.cc \
-	qa_32f_convert_32s_aligned16.cc \
-	qa_32f_convert_32s_unaligned16.cc \
-	qa_32f_convert_64f_aligned16.cc \
-	qa_32f_convert_64f_unaligned16.cc \
-	qa_32f_convert_8s_aligned16.cc \
-	qa_32f_convert_8s_unaligned16.cc \
-	qa_32s_convert_32f_aligned16.cc \
-	qa_32s_convert_32f_unaligned16.cc \
-	qa_64f_convert_32f_aligned16.cc \
-	qa_64f_convert_32f_unaligned16.cc \
-	qa_8s_convert_16s_aligned16.cc \
-	qa_8s_convert_16s_unaligned16.cc \
-	qa_8s_convert_32f_aligned16.cc \
-	qa_8s_convert_32f_unaligned16.cc \
-	qa_32fc_32f_power_32fc_aligned16.cc \
-	qa_32f_power_aligned16.cc \
-	qa_32fc_atan2_32f_aligned16.cc \
-	qa_32fc_power_spectral_density_32f_aligned16.cc \
-	qa_32fc_power_spectrum_32f_aligned16.cc \
-	qa_32f_calc_spectral_noise_floor_aligned16.cc \
-	qa_32f_accumulator_aligned16.cc \
-	qa_32f_stddev_aligned16.cc \
-	qa_32f_stddev_and_mean_aligned16.cc
-
-libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-
-libvolk_qa_la_LIBADD = \
-	libvolk.la \
-	libvolk_runtime.la \
-	$(CPPUNIT_LIBS)
+#libvolk_qa_la_SOURCES = \
+#	qa_utils.cc
+
+#libvolk_qa_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 -lboost
+
+#libvolk_qa_la_LIBADD = \
+#	libvolk.la \
+#	libvolk_runtime.la
+	
 
 # ----------------------------------------------------------------
 # headers that don't get installed
@@ -257,104 +171,20 @@ noinst_HEADERS = \
 	volk_init.h \
 	qa_volk.h \
 	qa_utils.h \
-	assembly.h \
-	qa_16s_quad_max_star_aligned16.h \
-	qa_32fc_dot_prod_aligned16.h \
-	qa_32fc_square_dist_aligned16.h \
-	qa_32fc_square_dist_scalar_mult_aligned16.h \
-	qa_32f_sum_of_poly_aligned16.h \
-	qa_32fc_index_max_aligned16.h \
-	qa_32f_index_max_aligned16.h \
-	qa_32fc_conjugate_dot_prod_aligned16.h \
-	qa_16s_permute_and_scalar_add_aligned16.h \
-	qa_16s_branch_4_state_8_aligned16.h \
-	qa_16s_max_star_horizontal_aligned16.h \
-	qa_16s_max_star_aligned16.h \
-	qa_16s_add_quad_aligned16.h \
-	qa_32f_add_aligned16.h \
-	qa_32f_subtract_aligned16.h \
-	qa_32f_max_aligned16.h \
-	qa_32f_min_aligned16.h \
-	qa_64f_max_aligned16.h \
-	qa_64f_min_aligned16.h \
-	qa_32s_and_aligned16.h \
-	qa_32s_or_aligned16.h \
-	qa_32f_dot_prod_aligned16.h \
-	qa_32f_dot_prod_unaligned16.h \
-	qa_32f_fm_detect_aligned16.h \
-	qa_32fc_32f_multiply_aligned16.h \
-	qa_32fc_multiply_aligned16.h \
-	qa_32f_divide_aligned16.h \
-	qa_32f_multiply_aligned16.h \
-	qa_32f_sqrt_aligned16.h \
-	qa_8sc_multiply_conjugate_16sc_aligned16.h \
-	qa_8sc_multiply_conjugate_32fc_aligned16.h \
-	qa_32u_popcnt_aligned16.h \
-	qa_64u_popcnt_aligned16.h \
-	qa_64u_byteswap_aligned16.h \
-	qa_8sc_deinterleave_32f_aligned16.h \
-	qa_16sc_deinterleave_32f_aligned16.h \
-	qa_8sc_deinterleave_16s_aligned16.h \
-	qa_32f_interleave_32fc_aligned16.h \
-	qa_16u_byteswap_aligned16.h \
-	qa_16sc_deinterleave_16s_aligned16.h \
-	qa_32fc_deinterleave_real_32f_aligned16.h \
-	qa_32fc_magnitude_32f_aligned16.h \
-	qa_32fc_deinterleave_real_64f_aligned16.h \
-	qa_32fc_deinterleave_real_16s_aligned16.h \
-	qa_32fc_magnitude_16s_aligned16.h \
-	qa_32fc_deinterleave_32f_aligned16.h \
-	qa_8sc_deinterleave_real_8s_aligned16.h \
-	qa_32fc_deinterleave_64f_aligned16.h \
-	qa_32f_interleave_16sc_aligned16.h \
-	qa_16sc_deinterleave_real_8s_aligned16.h \
-	qa_16sc_deinterleave_real_32f_aligned16.h \
-	qa_16sc_magnitude_32f_aligned16.h \
-	qa_32u_byteswap_aligned16.h \
-	qa_16sc_deinterleave_real_16s_aligned16.h \
-	qa_8sc_deinterleave_real_32f_aligned16.h \
-	qa_16sc_magnitude_16s_aligned16.h \
-	qa_32f_normalize_aligned16.h \
-	qa_8sc_deinterleave_real_16s_aligned16.h \
-	qa_16s_convert_32f_aligned16.h \
-	qa_16s_convert_32f_unaligned16.h \
-	qa_16s_convert_8s_aligned16.h \
-	qa_16s_convert_8s_unaligned16.h \
-	qa_32f_convert_16s_aligned16.h \
-	qa_32f_convert_16s_unaligned16.h \
-	qa_32f_convert_32s_aligned16.h \
-	qa_32f_convert_32s_unaligned16.h \
-	qa_32f_convert_64f_aligned16.h \
-	qa_32f_convert_64f_unaligned16.h \
-	qa_32f_convert_8s_aligned16.h \
-	qa_32f_convert_8s_unaligned16.h \
-	qa_32s_convert_32f_aligned16.h \
-	qa_32s_convert_32f_unaligned16.h \
-	qa_64f_convert_32f_aligned16.h \
-	qa_64f_convert_32f_unaligned16.h \
-	qa_8s_convert_16s_aligned16.h \
-	qa_8s_convert_16s_unaligned16.h \
-	qa_8s_convert_32f_aligned16.h \
-	qa_8s_convert_32f_unaligned16.h \
-	qa_32fc_32f_power_32fc_aligned16.h \
-	qa_32f_power_aligned16.h \
-	qa_32fc_atan2_32f_aligned16.h \
-	qa_32fc_power_spectral_density_32f_aligned16.h \
-	qa_32fc_power_spectrum_32f_aligned16.h \
-	qa_32f_calc_spectral_noise_floor_aligned16.h \
-	qa_32f_accumulator_aligned16.h \
-	qa_32f_stddev_aligned16.h \
-	qa_32f_stddev_and_mean_aligned16.h
-
+	assembly.h
 
 # ----------------------------------------------------------------
 # Our test program
 # ----------------------------------------------------------------
 noinst_PROGRAMS = \
-	test_all
+	testqa
 
-test_all_SOURCES = test_all.cc
-test_all_LDADD   = libvolk_qa.la
+testqa_SOURCES = testqa.cc qa_utils.cc
+testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN
+testqa_LDFLAGS = -lboost_unit_test_framework
+testqa_LDADD  = \
+	libvolk.la \
+	libvolk_runtime.la
 
 
 distclean-local: 
-- 
cgit 


From d486ff4b4c039c8b3b06b6519839d522cf69be69 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Sun, 16 Jan 2011 14:03:16 -0800
Subject: volk_rename: renamed basically everything in the volk lib to have
 logically consistent function names

---
 volk/lib/Makefile.am |  3 ++-
 volk/lib/qa_utils.cc | 53 ++++++++++++++++++++++++++++++++++++----------------
 volk/lib/qa_utils.h  |  2 +-
 3 files changed, 40 insertions(+), 18 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 5c995148a..f609f5bf9 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -184,7 +184,8 @@ testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN
 testqa_LDFLAGS = -lboost_unit_test_framework
 testqa_LDADD  = \
 	libvolk.la \
-	libvolk_runtime.la
+	libvolk_runtime.la \
+	../orc/libvolk_orc.la
 
 
 distclean-local: 
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index fa21db487..a8c00c143 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -110,7 +110,11 @@ static std::vector<std::string> get_arch_list(const int archs[]) {
 }
 
 static bool is_valid_type(std::string type) {
-    std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")("32s")("32u")("16sc")("16s")("16u")("8s")("8sc")("8u");
+    std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")
+                                                                 ("32s")("32u")("16sc")("16s")
+                                                                 ("16u")("8s")("8sc")("8u")
+                                                                 ("s32f")("s16u")("s16s")("s8u")
+                                                                 ("s8s");
     
     BOOST_FOREACH(std::string this_type, valid_types) {
         if(type == this_type) return true;
@@ -148,17 +152,11 @@ static void get_function_signature(std::vector<std::string> &inputsig,
         pos++;
     }
     
-    //if there's no output sig and only one input sig, assume there are 2 inputs
-    //this handles conversion fn's (which have a specified output sig) and most of the rest
-    if(outputsig.size() == 0 && inputsig.size() == 1) {
-        outputsig.push_back(inputsig[0]);
-        inputsig.push_back(inputsig[0]);
-    }//if there's no explicit output sig then assume the output is the same as the first input
-    else if(outputsig.size() == 0) outputsig.push_back(inputsig[0]);
-    
-    
     assert(inputsig.size() != 0);
-    assert(outputsig.size() != 0);
+}
+
+inline void run_cast_test1(volk_fn_1arg func, void *buff, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buff, vlen, arch.c_str());
 }
 
 inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
@@ -190,26 +188,42 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i] << std::endl;
     for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i] << std::endl;
     
-    //now that we have that, we'll set up input and output buffers based on the function signature
+    //now that we have that, we'll set up input buffers based on the function signature
     std::vector<void *> inbuffs;
     make_buffer_for_signature(inbuffs, inputsig, vlen);
     
+    //allocate output buffers -- one for each output for each arch
+    std::vector<void *> outbuffs;
+    BOOST_FOREACH(std::string arch, arch_list) {
+        make_buffer_for_signature(outbuffs, outputsig, vlen);
+    }
+
     //and set the input buffers to something random
     for(int i=0; i<inputsig.size(); i++) {
         load_random_data(inbuffs[i], inputsig[i], vlen);        
     }
     
-    //allocate output buffers -- one for each output for each arch
-    std::vector<void *> outbuffs;
-    BOOST_FOREACH(std::string arch, arch_list) {
-        make_buffer_for_signature(outbuffs, outputsig, vlen);
+    //so let's see here. if the operation has no output sig, it operates in place,
+    //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers.
+    if(outputsig.size() == 0) {
+        //make a set of output buffers according to the input signature
+        BOOST_FOREACH(std::string arch, arch_list) {
+            make_buffer_for_signature(outbuffs, inputsig, vlen);
+        }
+        //copy input buffer[0] to all the output buffers so it has something to operate on
+        //output buffer element size is the same as input buffer[0]
+        if(
     }
+        
     
     //now run the test
     clock_t start, end;
     for(int i = 0; i < arch_list.size(); i++) {
         start = clock();
         switch(outputsig.size()+inputsig.size()) {
+            case 1:
+                run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
+                break;
             case 2:
                 run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 break;
@@ -262,6 +276,13 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                         return 1;
                     }
                 }
+            } else if(outputsig[0] == "8s" || outputsig[0] == "8u") {
+                for(int j=0; j<vlen; j++) {
+                    if(((uint8_t *)(outbuffs[generic_offset]))[j] != ((uint8_t *)(outbuffs[i]))[j]) {
+                        std::cout << "Generic: " << ((uint8_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint8_t *)(outbuffs[i]))[j] << std::endl;
+                        return 1;
+                    }
+                }
             } else { 
                 std::cout << "Error: invalid type " << outputsig[0] << std::endl;
                 return 1;
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index f81d652fb..00883bf8e 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -3,7 +3,6 @@
 
 #include <stdlib.h>
 #include <string>
-#include <volk/volk.h>
 
 float uniform(void);
 void random_floats(float *buf, unsigned n);
@@ -12,6 +11,7 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
 
 #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0)
 
+typedef void (*volk_fn_1arg)(void *, unsigned int, const char*);
 typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
-- 
cgit 


From be1b7d9ffb90aa9c750e6c6793f00dbc8bec486d Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 19 Jan 2011 16:39:28 -0800
Subject: Volk: test suite supports scalar arguments and in-place operations

---
 volk/lib/qa_utils.cc | 357 +++++++++++++++++++++++++++++++--------------------
 volk/lib/qa_utils.h  |  15 ++-
 2 files changed, 231 insertions(+), 141 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index a8c00c143..e73b70985 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -7,7 +7,8 @@
 #include <iostream>
 #include <vector>
 #include <time.h>
-//#include <math.h>
+#include <math.h>
+#include <boost/lexical_cast.hpp>
 //#include <volk/volk_runtime.h>
 #include <volk/volk_registry.h>
 #include <volk/volk.h>
@@ -24,44 +25,53 @@ void random_floats (float *buf, unsigned n)
     buf[i] = uniform ();
 }
 
-void load_random_data(void *data, std::string sig, unsigned int n) {
-    if(sig == "32fc") {
-        random_floats((float *)data, n*2);
-    } else if(sig == "32f") {
+void load_random_data(void *data, volk_type_t type, unsigned int n) {
+    if(type.is_complex) n *= 2;
+    if(type.is_float) {
+        assert(type.size == 4); //TODO: double support
         random_floats((float *)data, n);
-    } else if(sig == "32u") {
-        for(int i=0; i<n; i++) ((uint32_t *)data)[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-    } else if(sig == "32s") {
-        for(int i=0; i<n; i++) ((int32_t *)data)[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-    } else if(sig == "16u") {
-        for(int i=0; i<n; i++) ((uint16_t *)data)[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-    } else if(sig == "16s") {
-        for(int i=0; i<n; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-    } else if(sig == "16sc") {
-        for(int i=0; i<n*2; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-    } else if(sig == "8u") {
-        for(int i=0; i<n; i++) ((uint8_t *)data)[i] = ((uint8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 256.0));
-    } else if(sig == "8s") {
-        for(int i=0; i<n; i++) ((int8_t *)data)[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-    } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl;
+    } else {
+        float int_max = pow(2, type.size*8);
+        if(type.is_signed) int_max /= 2.0;
+        for(int i=0; i<n; i++) {
+            float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
+            //man i really don't know how to do this in a more clever way, you have to cast down at some point
+            switch(type.size) {
+            case 8:
+                if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
+                else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
+            break;
+            case 4:
+                if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
+                else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
+            break;           
+            case 2:
+                if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
+                else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
+            break;
+            case 1:
+                if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
+                else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
+            break;
+            default:
+                throw; //no shenanigans here
+            }
+        }
+    }
 }
 
-template <class t>
-t *make_aligned_buffer(unsigned int len) {
-  t *buf;
+void *make_aligned_buffer(unsigned int len, unsigned int size) {
+  void *buf;
   int ret;
-  ret = posix_memalign((void**)&buf, 16, len * sizeof(t));
+  ret = posix_memalign((void**)&buf, 16, len * size);
   assert(ret == 0);
   return buf;
 }
 
-void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<std::string> inputsig, unsigned int vlen) {
-    BOOST_FOREACH(std::string sig, inputsig) {
-        if     (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer<uint64_t>(vlen));
-        else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<uint32_t>(vlen));
-        else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer<uint16_t>(vlen));
-        else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<uint8_t>(vlen));
-        else std::cout << "Invalid type: " << sig << std::endl;
+void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<volk_type_t> inputsig, unsigned int vlen) {
+    BOOST_FOREACH(volk_type_t sig, inputsig) {
+        if(!sig.is_scalar) //we don't make buffers for scalars
+          buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1)));
     }
 }
 
@@ -109,22 +119,56 @@ static std::vector<std::string> get_arch_list(const int archs[]) {
     return archlist;
 }
 
-static bool is_valid_type(std::string type) {
-    std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f")
-                                                                 ("32s")("32u")("16sc")("16s")
-                                                                 ("16u")("8s")("8sc")("8u")
-                                                                 ("s32f")("s16u")("s16s")("s8u")
-                                                                 ("s8s");
+volk_type_t volk_type_from_string(std::string name) {
+    volk_type_t type;
+    type.is_float = false;
+    type.is_scalar = false;
+    type.is_complex = false;
+    type.is_signed = false;
+    type.size = 0;
+    type.str = name;
+    
+    assert(name.size() > 1);
     
-    BOOST_FOREACH(std::string this_type, valid_types) {
-        if(type == this_type) return true;
+    //is it a scalar?
+    if(name[0] == 's') { 
+        type.is_scalar = true;
+        name = name.substr(1, name.size()-1);
+    }
+    
+    //get the data size
+    int last_size_pos = name.find_last_of("0123456789");
+    if(last_size_pos < 0) throw 0;
+    //will throw if malformed
+    int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
+
+    assert(((size % 8) == 0) && (size <= 64) && (size != 0));
+    type.size = size/8; //in bytes
+    
+    for(int i=last_size_pos+1; i < name.size(); i++) {
+        switch (name[i]) {
+        case 'f':
+            type.is_float = true;
+            break;
+        case 'i':
+            type.is_signed = true;
+            break;
+        case 'c':
+            type.is_complex = true;
+            break;
+        case 'u':
+            type.is_signed = false;
+            break;
+        default:
+            throw;
+        }
     }
-    return false;
-}
     
+    return type;
+}
 
-static void get_function_signature(std::vector<std::string> &inputsig, 
-                                   std::vector<std::string> &outputsig, 
+static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, 
+                                   std::vector<volk_type_t> &outputsig, 
                                    std::string name) {
     boost::char_separator<char> sep("_");
     boost::tokenizer<boost::char_separator<char> > tok(name, sep);
@@ -133,25 +177,38 @@ static void get_function_signature(std::vector<std::string> &inputsig,
     toked.assign(tok.begin(), tok.end());
     
     assert(toked[0] == "volk");
-    
-    inputsig.push_back(toked[1]); //mandatory
-    int pos = 2;
-    bool valid_type = true;
-    while(valid_type && pos < toked.size()) {
-        if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]);
-        else valid_type = false;
-        pos++;
-    }
-    while(!valid_type && pos < toked.size()) {
-        if(is_valid_type(toked[pos])) valid_type = true;
-        else pos++;
-    }
-    while(valid_type && pos < toked.size()) {
-        if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]);
-        else valid_type = false;
-        pos++;
+    toked.erase(toked.begin());
+
+    //ok. we're assuming a string in the form
+    //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
+
+    enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT;
+    std::string fn_name;
+    volk_type_t type;
+    BOOST_FOREACH(std::string token, toked) {
+        try {
+            type = volk_type_from_string(token);
+            if(side == SIDE_INPUT) inputsig.push_back(type);
+            else outputsig.push_back(type);
+        } catch (...){
+            if(token[0] == 'x') { //it's a multiplier
+                if(side == SIDE_INPUT) assert(inputsig.size() > 0);
+                else assert(outputsig.size() > 0);
+                int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid
+                for(int i=1; i<multiplier; i++) {
+                    if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
+                    else outputsig.push_back(outputsig.back());
+                }
+            }
+            else if(side == SIDE_INPUT) { //it's the function name, at least it better be
+                side = SIDE_OUTPUT;
+                fn_name = token;
+            } else {
+                if(token != toked.back()) throw; //the last token in the name is the alignment
+            }
+        }
     }
-    
+    //we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
     assert(inputsig.size() != 0);
 }
 
@@ -171,61 +228,98 @@ inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector<void *>
     while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str());
 }
 
+inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buff, scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str());
+}
+
+template <class t>
+bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
+    for(int i=0; i<vlen; i++) {
+        if(fabs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) return 1;
+    }
+    return 0;
+}
+
+template <class t>
+bool icompare(t *in1, t *in2, unsigned int vlen) {
+    for(int i=0; i<vlen; i++) {
+        if(((t *)(in1))[i] != ((t *)(in2))[i]) return 1;
+    }
+    return 0;
+}
+
 bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) {
     std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
     
     //first let's get a list of available architectures for the test
     std::vector<std::string> arch_list = get_arch_list(archs);
     
-    BOOST_FOREACH(std::string arch, arch_list) {
-        std::cout << "Found an arch: " << arch << std::endl;
-    }
-    
     //now we have to get a function signature by parsing the name
-    std::vector<std::string> inputsig, outputsig;
-    get_function_signature(inputsig, outputsig, name);
-
-    for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i] << std::endl;
-    for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i] << std::endl;
-    
-    //now that we have that, we'll set up input buffers based on the function signature
-    std::vector<void *> inbuffs;
-    make_buffer_for_signature(inbuffs, inputsig, vlen);
+    std::vector<volk_type_t> inputsig, outputsig;
+    get_signatures_from_name(inputsig, outputsig, name);
     
-    //allocate output buffers -- one for each output for each arch
-    std::vector<void *> outbuffs;
-    BOOST_FOREACH(std::string arch, arch_list) {
-        make_buffer_for_signature(outbuffs, outputsig, vlen);
-    }
-
-    //and set the input buffers to something random
+    std::vector<volk_type_t> inputsc, outputsc;
     for(int i=0; i<inputsig.size(); i++) {
-        load_random_data(inbuffs[i], inputsig[i], vlen);        
+        if(inputsig[i].is_scalar) {
+            inputsc.push_back(inputsig[i]);
+            inputsig.erase(inputsig.begin() + i);
+        }
     }
+    for(int i=0; i<outputsig.size(); i++) {
+        if(outputsig[i].is_scalar) {
+            outputsc.push_back(outputsig[i]);
+            outputsig.erase(outputsig.begin() + i);
+        }
+    }
+    assert(outputsc.size() == 0); //we don't do output scalars yet
+
+    //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
+    //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
+    std::vector<void *> inbuffs, outbuffs;
     
-    //so let's see here. if the operation has no output sig, it operates in place,
-    //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers.
-    if(outputsig.size() == 0) {
-        //make a set of output buffers according to the input signature
-        BOOST_FOREACH(std::string arch, arch_list) {
+    if(outputsig.size() == 0) { //we're operating in place...
+        //assert(inputsig.size() == 1); //we only support 0 output 1 input right now...
+        make_buffer_for_signature(inbuffs, inputsig, vlen); //let's make an input buffer
+        load_random_data(inbuffs[0], inputsig[0], vlen); //and load it with random data
+        BOOST_FOREACH(std::string arch, arch_list) { //then copy the same random data to each output buffer
             make_buffer_for_signature(outbuffs, inputsig, vlen);
+            memcpy(outbuffs.back(), inbuffs[0], vlen*inputsig[0].size*(inputsig[0].is_complex?2:1));
+        }
+    } else {
+        make_buffer_for_signature(inbuffs, inputsig, vlen);
+        BOOST_FOREACH(std::string arch, arch_list) {
+            make_buffer_for_signature(outbuffs, outputsig, vlen);
+        }
+    
+        //and set the input buffers to something random
+        for(int i=0; i<inbuffs.size(); i++) {
+            load_random_data(inbuffs[i], inputsig[i], vlen);        
         }
-        //copy input buffer[0] to all the output buffers so it has something to operate on
-        //output buffer element size is the same as input buffer[0]
-        if(
     }
-        
     
     //now run the test
     clock_t start, end;
     for(int i = 0; i < arch_list.size(); i++) {
         start = clock();
-        switch(outputsig.size()+inputsig.size()) {
+
+        switch(inputsig.size() + outputsig.size()) {
             case 1:
-                run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
+                if(inputsc.size() == 0) {
+                    run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 1000.0, vlen, iter, arch_list[i]);
+                } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
-                run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                if(inputsc.size() == 0) {
+                    run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]);
+                } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
@@ -234,69 +328,52 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                 run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 break;
             default:
+                throw "no function handler for this signature";
                 break;
         }
+        
         end = clock();
         std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl;
     }
-
     //and now compare each output to the generic output
     //first we have to know which output is the generic one, they aren't in order...
     int generic_offset;
     for(int i=0; i<arch_list.size(); i++) 
         if(arch_list[i] == "generic") generic_offset=i;
-    
+        
+    //now compare
+    if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
+
+    bool fail = false;
     for(int i=0; i<arch_list.size(); i++) {
         if(i != generic_offset) {
-            if(outputsig[0] == "32fc") {
-                for(int j=0; j<vlen*2; j++) {
-                    if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) {
-                        std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
-            } else if(outputsig[0] == "32f") {
-                for(int j=0; j<vlen; j++) {
-                    if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) {
-                        std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
-            } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") {
-                for(int j=0; j<vlen; j++) {
-                    if(((uint32_t *)(outbuffs[generic_offset]))[j] != ((uint32_t *)(outbuffs[i]))[j]) {
-                        std::cout << "Generic: " << ((uint32_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint32_t *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
-            } else if(outputsig[0] == "16u" || outputsig[0] == "16s" || outputsig[0] == "8sc") {
-                for(int j=0; j<vlen; j++) {
-                    if(((uint16_t *)(outbuffs[generic_offset]))[j] != ((uint16_t *)(outbuffs[i]))[j]) {
-                        std::cout << "Generic: " << ((uint16_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint16_t *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
-            } else if(outputsig[0] == "8s" || outputsig[0] == "8u") {
-                for(int j=0; j<vlen; j++) {
-                    if(((uint8_t *)(outbuffs[generic_offset]))[j] != ((uint8_t *)(outbuffs[i]))[j]) {
-                        std::cout << "Generic: " << ((uint8_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint8_t *)(outbuffs[i]))[j] << std::endl;
-                        return 1;
-                    }
-                }
+            if(outputsig[0].str == "32fc") {
+                fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*2, tol);
+            } else if(outputsig[0].str == "32f") {
+                fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen, tol);
+            } else if(outputsig[0].str == "32u" || outputsig[0].str == "32s" || outputsig[0].str == "16sc") {
+                fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen);
+            } else if(outputsig[0].size == 2) {
+                fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen);
+            } else if(outputsig[0].size == 1) {
+                fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen);
             } else { 
-                std::cout << "Error: invalid type " << outputsig[0] << std::endl;
-                return 1;
+                std::cout << "Error: invalid type " << outputsig[0].str << std::endl;
+                fail = true;
+            }
+            if(fail) {
+                std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
             }
         }
     }
 
-    BOOST_FOREACH(void *buf, inbuffs) {
-        free(buf);
-    }
-    BOOST_FOREACH(void *buf, outbuffs) {
-        free(buf);
-    }
-    return 0;
+//    BOOST_FOREACH(void *buf, inbuffs) {
+//        free(buf);
+//    }
+//    BOOST_FOREACH(void *buf, outbuffs) {
+//        free(buf);
+//    }
+    return fail;
 }
 
 
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 00883bf8e..79c5d7778 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -4,6 +4,17 @@
 #include <stdlib.h>
 #include <string>
 
+struct volk_type_t {
+    bool is_float;
+    bool is_scalar;
+    bool is_signed;
+    bool is_complex;
+    int size;
+    std::string str;
+};
+
+volk_type_t volk_type_from_string(std::string);
+
 float uniform(void);
 void random_floats(float *buf, unsigned n);
 
@@ -11,9 +22,11 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
 
 #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0)
 
-typedef void (*volk_fn_1arg)(void *, unsigned int, const char*);
+typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
 typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
+typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
+typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
 
 #endif //VOLK_QA_UTILS_H
-- 
cgit 


From e3600f59e76c3dc08aedfd77629b7c5c48df86af Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Thu, 20 Jan 2011 16:30:09 -0800
Subject: volk: renamed all files. added all tests. some test things are still
 broken.

---
 volk/lib/qa_utils.cc | 101 +++++++++++++++++++++++++++++++--------------------
 volk/lib/qa_utils.h  |   1 +
 2 files changed, 62 insertions(+), 40 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index e73b70985..4c151bd6f 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -19,7 +19,8 @@ float uniform() {
   return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
 }
 
-void random_floats (float *buf, unsigned n)
+template <class t>
+void random_floats (t *buf, unsigned n)
 {
   for (unsigned i = 0; i < n; i++)
     buf[i] = uniform ();
@@ -28,8 +29,8 @@ void random_floats (float *buf, unsigned n)
 void load_random_data(void *data, volk_type_t type, unsigned int n) {
     if(type.is_complex) n *= 2;
     if(type.is_float) {
-        assert(type.size == 4); //TODO: double support
-        random_floats((float *)data, n);
+        if(type.size == 8) random_floats<double>((double *)data, n);
+        else random_floats<float>((float *)data, n);
     } else {
         float int_max = pow(2, type.size*8);
         if(type.is_signed) int_max /= 2.0;
@@ -54,7 +55,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
                 else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
             break;
             default:
-                throw; //no shenanigans here
+                throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
             }
         }
     }
@@ -94,6 +95,9 @@ static std::vector<std::string> get_arch_list(const int archs[]) {
         case (1<<LV_SSE2):
             archlist.push_back("sse2");
             break;
+        case (1<<LV_SSE3):
+            archlist.push_back("sse3");
+            break;
         case (1<<LV_SSSE3):
             archlist.push_back("ssse3");
             break;
@@ -128,7 +132,7 @@ volk_type_t volk_type_from_string(std::string name) {
     type.size = 0;
     type.str = name;
     
-    assert(name.size() > 1);
+    if(name.size() < 2) throw std::string("name too short to be a datatype");
     
     //is it a scalar?
     if(name[0] == 's') { 
@@ -138,7 +142,7 @@ volk_type_t volk_type_from_string(std::string name) {
     
     //get the data size
     int last_size_pos = name.find_last_of("0123456789");
-    if(last_size_pos < 0) throw 0;
+    if(last_size_pos < 0) throw std::string("no size spec in type ").append(name);
     //will throw if malformed
     int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
 
@@ -182,12 +186,14 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
     //ok. we're assuming a string in the form
     //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
 
-    enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT;
+    enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
     std::string fn_name;
     volk_type_t type;
     BOOST_FOREACH(std::string token, toked) {
         try {
             type = volk_type_from_string(token);
+            if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
+            
             if(side == SIDE_INPUT) inputsig.push_back(type);
             else outputsig.push_back(type);
         } catch (...){
@@ -201,9 +207,11 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
                 }
             }
             else if(side == SIDE_INPUT) { //it's the function name, at least it better be
-                side = SIDE_OUTPUT;
-                fn_name = token;
-            } else {
+                side = SIDE_NAME;
+                fn_name.append("_");
+                fn_name.append(token);
+            } 
+            else if(side == SIDE_OUTPUT) {
                 if(token != toked.back()) throw; //the last token in the name is the alignment
             }
         }
@@ -236,20 +244,40 @@ inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vect
     while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str());
 }
 
+inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], scalar, vlen, arch.c_str());
+}
+
 template <class t>
 bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
+    bool fail = false;
+    int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
-        if(fabs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) return 1;
+        if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
+            fail=true;
+            if(print_max_errs-- > 0) {
+                std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
+            }
+        }
     }
-    return 0;
+    
+    return fail;
 }
 
 template <class t>
-bool icompare(t *in1, t *in2, unsigned int vlen) {
+bool icompare(t *in1, t *in2, unsigned int vlen, float tol) {
+    bool fail = false;
+    int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
-        if(((t *)(in1))[i] != ((t *)(in2))[i]) return 1;
+        if(((t *)(in1))[i] != ((t *)(in2))[i]) {
+            fail=true;
+            if(print_max_errs-- > 0) {
+                std::cout << "offset " << i << " in1: " << int(((t *)(in1))[i]) << " in2: " << int(((t *)(in2))[i]) << std::endl;
+            }
+        }
     }
-    return 0;
+    
+    return fail;
 }
 
 bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) {
@@ -300,7 +328,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
             load_random_data(inbuffs[i], inputsig[i], vlen);        
         }
     }
-    
+
     //now run the test
     clock_t start, end;
     for(int i = 0; i < arch_list.size(); i++) {
@@ -311,18 +339,22 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                 if(inputsc.size() == 0) {
                     run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 1000.0, vlen, iter, arch_list[i]);
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 255.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
                     run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]);
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
-                run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                if(inputsc.size() == 0) {
+                    run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]);
+                } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
                 run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
@@ -337,29 +369,24 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     }
     //and now compare each output to the generic output
     //first we have to know which output is the generic one, they aren't in order...
-    int generic_offset;
+    int generic_offset=0;
     for(int i=0; i<arch_list.size(); i++) 
         if(arch_list[i] == "generic") generic_offset=i;
-        
+
     //now compare
     if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
 
     bool fail = false;
     for(int i=0; i<arch_list.size(); i++) {
         if(i != generic_offset) {
-            if(outputsig[0].str == "32fc") {
-                fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*2, tol);
-            } else if(outputsig[0].str == "32f") {
-                fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen, tol);
-            } else if(outputsig[0].str == "32u" || outputsig[0].str == "32s" || outputsig[0].str == "16sc") {
-                fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen);
-            } else if(outputsig[0].size == 2) {
-                fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen);
-            } else if(outputsig[0].size == 1) {
-                fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen);
-            } else { 
-                std::cout << "Error: invalid type " << outputsig[0].str << std::endl;
-                fail = true;
+            if(outputsig[0].is_float) {
+                if(outputsig[0].size == 8) {
+                    fail = fcompare((double *) outbuffs[generic_offset], (double *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                } else {
+                    fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                }
+            } else {
+                fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
             }
             if(fail) {
                 std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
@@ -367,12 +394,6 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         }
     }
 
-//    BOOST_FOREACH(void *buf, inbuffs) {
-//        free(buf);
-//    }
-//    BOOST_FOREACH(void *buf, outbuffs) {
-//        free(buf);
-//    }
     return fail;
 }
 
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 79c5d7778..79fc8f006 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -28,5 +28,6 @@ typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
 typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
 typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
+typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*);
 
 #endif //VOLK_QA_UTILS_H
-- 
cgit 


From 82cafc4381e48ccc9423d2dc88720e5c1347d940 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 12:26:52 -0800
Subject: Volk: fixed naming error. test coverage @ 75%, still need to add
 support for multiple outputs in the checker. some errors in the library were
 exposed by the new test suite, and a couple of bad Orc functions. need to
 investigate.

---
 volk/lib/qa_utils.cc | 51 +++++++++++++++++++++++++++++++++++++++++++--------
 volk/lib/testqa.cc   | 34 ++++++++++++++++------------------
 2 files changed, 59 insertions(+), 26 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 4c151bd6f..8f57a9b90 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -253,6 +253,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
     bool fail = false;
     int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
+        if(((t *)(in1))[i] < 1e-30) continue; //below around here we'll start to get roundoff errors due to float precision
         if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
             fail=true;
             if(print_max_errs-- > 0) {
@@ -265,14 +266,14 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
 }
 
 template <class t>
-bool icompare(t *in1, t *in2, unsigned int vlen, float tol) {
+bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
     bool fail = false;
     int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
-        if(((t *)(in1))[i] != ((t *)(in2))[i]) {
+        if(abs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) {
             fail=true;
             if(print_max_errs-- > 0) {
-                std::cout << "offset " << i << " in1: " << int(((t *)(in1))[i]) << " in2: " << int(((t *)(in2))[i]) << std::endl;
+                std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl;
             }
         }
     }
@@ -339,21 +340,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                 if(inputsc.size() == 0) {
                     run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 255.0, vlen, iter, arch_list[i]);
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
                     run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]);
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 if(inputsc.size() == 0) {
                     run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 255.0, vlen, iter, arch_list[i]);
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
@@ -375,7 +376,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
 
     //now compare
     if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
-
+    //TODO: loop over the output signature as well
     bool fail = false;
     for(int i=0; i<arch_list.size(); i++) {
         if(i != generic_offset) {
@@ -386,7 +387,41 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                     fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
                 }
             } else {
-                fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
+                //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
+                switch(outputsig[0].size) {
+                case 8:
+                    if(outputsig[0].is_signed) {
+                        fail = icompare((int64_t *) outbuffs[generic_offset], (int64_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = icompare((uint64_t *) outbuffs[generic_offset], (uint64_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    }
+                    break;
+                case 4:
+                    if(outputsig[0].is_signed) {
+                        fail = icompare((int32_t *) outbuffs[generic_offset], (int32_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    }
+                    break;
+                case 2:
+                    if(outputsig[0].is_signed) {
+                        fail = icompare((int16_t *) outbuffs[generic_offset], (int16_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    }
+                    break;
+                case 1:
+                    if(outputsig[0].is_signed) {
+                        fail = icompare((int8_t *) outbuffs[generic_offset], (int8_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    } else {
+                        fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                    }
+                    break;
+                default:
+                    fail=1;
+                }
+                    
+                //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
             }
             if(fail) {
                 std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 1ee264fb4..f813e843f 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -11,18 +11,16 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
 //    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 2046, 10000);
     VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 2046, 10000);
@@ -37,7 +35,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 2046, 10000);
@@ -45,25 +43,25 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 1e-4, 2046, 10000);
+    //VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
+    //VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 2046, 10000);
-- 
cgit 


From 7a5a751073cc1583533b84c90ecc985b3669a696 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 15:14:26 -0800
Subject: Volk: added QA support for multiple outputs, scalar outputs. 92% test
 coverage within the framework.

---
 volk/lib/qa_utils.cc | 183 +++++++++++++++++++++++++--------------------------
 volk/lib/testqa.cc   |  42 ++++++------
 2 files changed, 112 insertions(+), 113 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 8f57a9b90..b1c55fc05 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -66,6 +66,7 @@ void *make_aligned_buffer(unsigned int len, unsigned int size) {
   int ret;
   ret = posix_memalign((void**)&buf, 16, len * size);
   assert(ret == 0);
+  memset(buf, 0x00, len*size);
   return buf;
 }
 
@@ -220,32 +221,32 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
     assert(inputsig.size() != 0);
 }
 
-inline void run_cast_test1(volk_fn_1arg func, void *buff, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buff, vlen, arch.c_str());
+inline void run_cast_test1(volk_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], vlen, arch.c_str());
 }
 
-inline void run_cast_test2(volk_fn_2arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], vlen, arch.c_str());
+inline void run_cast_test2(volk_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
 }
 
-inline void run_cast_test3(volk_fn_3arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], vlen, arch.c_str());
+inline void run_cast_test3(volk_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
 }
 
-inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector<void *> &inbuffs, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str());
+inline void run_cast_test4(volk_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str());
 }
 
-inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buff, scalar, vlen, arch.c_str());
+inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
 }
 
-inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str());
+inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
 }
 
-inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(outbuff, inbuffs[0], inbuffs[1], scalar, vlen, arch.c_str());
+inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+    while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
 }
 
 template <class t>
@@ -253,7 +254,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
     bool fail = false;
     int print_max_errs = 10;
     for(int i=0; i<vlen; i++) {
-        if(((t *)(in1))[i] < 1e-30) continue; //below around here we'll start to get roundoff errors due to float precision
+        if(((t *)(in1))[i] < 1e-30) continue; //this is a hack: below around here we'll start to get roundoff errors due to limited precision
         if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
             fail=true;
             if(print_max_errs-- > 0) {
@@ -291,74 +292,70 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     std::vector<volk_type_t> inputsig, outputsig;
     get_signatures_from_name(inputsig, outputsig, name);
     
-    std::vector<volk_type_t> inputsc, outputsc;
+    //pull the input scalars into their own vector
+    std::vector<volk_type_t> inputsc;
     for(int i=0; i<inputsig.size(); i++) {
         if(inputsig[i].is_scalar) {
             inputsc.push_back(inputsig[i]);
             inputsig.erase(inputsig.begin() + i);
         }
     }
-    for(int i=0; i<outputsig.size(); i++) {
-        if(outputsig[i].is_scalar) {
-            outputsc.push_back(outputsig[i]);
-            outputsig.erase(outputsig.begin() + i);
-        }
-    }
-    assert(outputsc.size() == 0); //we don't do output scalars yet
 
     //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
     //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
-    std::vector<void *> inbuffs, outbuffs;
+    std::vector<void *> inbuffs;
+
+    make_buffer_for_signature(inbuffs, inputsig, vlen);
+    for(int i=0; i<inbuffs.size(); i++) {
+        load_random_data(inbuffs[i], inputsig[i], vlen);        
+    }
     
-    if(outputsig.size() == 0) { //we're operating in place...
-        //assert(inputsig.size() == 1); //we only support 0 output 1 input right now...
-        make_buffer_for_signature(inbuffs, inputsig, vlen); //let's make an input buffer
-        load_random_data(inbuffs[0], inputsig[0], vlen); //and load it with random data
-        BOOST_FOREACH(std::string arch, arch_list) { //then copy the same random data to each output buffer
-            make_buffer_for_signature(outbuffs, inputsig, vlen);
-            memcpy(outbuffs.back(), inbuffs[0], vlen*inputsig[0].size*(inputsig[0].is_complex?2:1));
-        }
-    } else {
-        make_buffer_for_signature(inbuffs, inputsig, vlen);
-        BOOST_FOREACH(std::string arch, arch_list) {
-            make_buffer_for_signature(outbuffs, outputsig, vlen);
+    //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
+    std::vector<std::vector<void *> > test_data;
+    for(int i=0; i<arch_list.size(); i++) {
+        std::vector<void *> arch_buffs;
+        for(int j=0; j<outputsig.size(); j++) {
+            arch_buffs.push_back(make_aligned_buffer(vlen, outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
         }
-    
-        //and set the input buffers to something random
-        for(int i=0; i<inbuffs.size(); i++) {
-            load_random_data(inbuffs[i], inputsig[i], vlen);        
+        for(int j=0; j<inputsig.size(); j++) {
+            arch_buffs.push_back(inbuffs[j]);
         }
+        test_data.push_back(arch_buffs);
     }
+    
+    std::vector<volk_type_t> both_sigs;
+    both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
+    both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
 
     //now run the test
     clock_t start, end;
     for(int i = 0; i < arch_list.size(); i++) {
         start = clock();
 
-        switch(inputsig.size() + outputsig.size()) {
+        switch(both_sigs.size()) {
             case 1:
                 if(inputsc.size() == 0) {
-                    run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); 
+                    run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
-                    run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                    run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 if(inputsc.size() == 0) {
-                    run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                    run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), outbuffs[i], inbuffs, 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
                 } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
-                run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]);
+                run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 break;
             default:
                 throw "no function handler for this signature";
@@ -375,61 +372,63 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         if(arch_list[i] == "generic") generic_offset=i;
 
     //now compare
-    if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
-    //TODO: loop over the output signature as well
+    //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
+    
     bool fail = false;
+    bool fail_global = false;
     for(int i=0; i<arch_list.size(); i++) {
         if(i != generic_offset) {
-            if(outputsig[0].is_float) {
-                if(outputsig[0].size == 8) {
-                    fail = fcompare((double *) outbuffs[generic_offset], (double *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                } else {
-                    fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                }
-            } else {
-                //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
-                switch(outputsig[0].size) {
-                case 8:
-                    if(outputsig[0].is_signed) {
-                        fail = icompare((int64_t *) outbuffs[generic_offset], (int64_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    } else {
-                        fail = icompare((uint64_t *) outbuffs[generic_offset], (uint64_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    }
-                    break;
-                case 4:
-                    if(outputsig[0].is_signed) {
-                        fail = icompare((int32_t *) outbuffs[generic_offset], (int32_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+            for(int j=0; j<both_sigs.size(); j++) {
+                if(both_sigs[j].is_float) {
+                    if(both_sigs[j].size == 8) {
+                        fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
                     } else {
-                        fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                        fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
                     }
-                    break;
-                case 2:
-                    if(outputsig[0].is_signed) {
-                        fail = icompare((int16_t *) outbuffs[generic_offset], (int16_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    } else {
-                        fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    }
-                    break;
-                case 1:
-                    if(outputsig[0].is_signed) {
-                        fail = icompare((int8_t *) outbuffs[generic_offset], (int8_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
-                    } else {
-                        fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen*(outputsig[0].is_complex ? 2 : 1), tol);
+                } else {
+                    //i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
+                    switch(both_sigs[j].size) {
+                    case 8:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 4:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 2:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    case 1:
+                        if(both_sigs[j].is_signed) {
+                            fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        } else {
+                            fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol);
+                        }
+                        break;
+                    default:
+                        fail=1;
                     }
-                    break;
-                default:
-                    fail=1;
                 }
-                    
+                if(fail) {
+                    fail_global = true;
+                    std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
+                }
                 //fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
             }
-            if(fail) {
-                std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
-            }
         }
     }
-
-    return fail;
+    return fail_global;
 }
 
 
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index f813e843f..4dd7f7599 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -7,13 +7,13 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     //in order...
 //    VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 2046, 10000);
@@ -21,25 +21,25 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_16u_byteswap_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 2046, 1000);
-//    VOLK_RUN_TESTS(volk_32f_calc_spectral_noise_floor_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 2046, 10000);
@@ -49,19 +49,19 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 2046, 10000);
-    //VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
-    //VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 1, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 2046, 10000);
@@ -84,8 +84,8 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_64u_byteswap_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_64u_popcnt_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000);
-- 
cgit 


From fa8c8c8e9fcd74eda5edb58edc89be97bc4bfa0a Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 15:29:08 -0800
Subject: Volk: added ability to spec scalar in test invocation

---
 volk/lib/qa_utils.cc |   8 +--
 volk/lib/qa_utils.h  |   4 +-
 volk/lib/testqa.cc   | 172 +++++++++++++++++++++++++--------------------------
 3 files changed, 92 insertions(+), 92 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index b1c55fc05..67ce5ddef 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -282,7 +282,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
     return fail;
 }
 
-bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) {
+bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) {
     std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
     
     //first let's get a list of available architectures for the test
@@ -337,21 +337,21 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
                 if(inputsc.size() == 0) {
                     run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); 
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
                 } else throw "unsupported 1 arg function >1 scalars";
                 break;
             case 2:
                 if(inputsc.size() == 0) {
                     run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
                 } else throw "unsupported 2 arg function >1 scalars";
                 break;
             case 3:
                 if(inputsc.size() == 0) {
                     run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
                 } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], 127.0, vlen, iter, arch_list[i]);
+                    run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
                 } else throw "unsupported 3 arg function >1 scalars";
                 break;
             case 4:
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index 79fc8f006..e2539060a 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -18,9 +18,9 @@ volk_type_t volk_type_from_string(std::string);
 float uniform(void);
 void random_floats(float *buf, unsigned n);
 
-bool run_volk_tests(const int[], void(*)(), std::string, float, int, int);
+bool run_volk_tests(const int[], void(*)(), std::string, float, float, int, int);
 
-#define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0)
+#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter), 0)
 
 typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
 typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 4dd7f7599..9f4934dc0 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -7,93 +7,93 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     //in order...
 //    VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a16, 1e-4, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_16u_byteswap_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 2046, 1000);
-    VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a16, 1e-5, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a16, 1, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a16, 1e-5, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a16, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a16, 1e-4, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_16u_byteswap_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 0, 2046, 1000);
+    VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a16, 1, 2<<31, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 128, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 128, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a16, 1e-4, 10, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32u_byteswap_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_32u_popcnt_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_64u_byteswap_a16, 1e-4, 2046, 10000);
-//    VOLK_RUN_TESTS(volk_64u_popcnt_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8i_convert_16i_u, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 2046, 10000);
-    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_normalize_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a16, 1e-4, 4, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_sqrt_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_x2_and_32i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32i_x2_or_32i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32u_byteswap_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_32u_popcnt_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_convert_32f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_x2_max_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64f_x2_min_64f_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_64u_byteswap_a16, 0, 0, 2046, 10000);
+//    VOLK_RUN_TESTS(volk_64u_popcnt_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a16, 0, 256, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_convert_16i_a16, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a16, 1e-4, 100, 2046, 10000);
+    VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 2046, 10000);
 
 }
-- 
cgit 


From 6091bad60cdfdf21624da452c7a8b74405345070 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 15:41:30 -0800
Subject: Volk: removed all the old QA code that is covered by the test
 framework.

---
 volk/lib/Makefile.am                               |   1 -
 volk/lib/qa_16s_convert_32f_aligned16.cc           |  74 -------
 volk/lib/qa_16s_convert_32f_aligned16.h            |  18 --
 volk/lib/qa_16s_convert_32f_unaligned16.cc         |  74 -------
 volk/lib/qa_16s_convert_32f_unaligned16.h          |  18 --
 volk/lib/qa_16s_convert_8s_aligned16.cc            |  61 ------
 volk/lib/qa_16s_convert_8s_aligned16.h             |  18 --
 volk/lib/qa_16s_convert_8s_unaligned16.cc          |  61 ------
 volk/lib/qa_16s_convert_8s_unaligned16.h           |  18 --
 volk/lib/qa_16s_max_star_aligned16.cc              |  65 -------
 volk/lib/qa_16s_max_star_aligned16.h               |  18 --
 volk/lib/qa_16s_max_star_horizontal_aligned16.cc   |  79 --------
 volk/lib/qa_16s_max_star_horizontal_aligned16.h    |  18 --
 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc     |  89 ---------
 volk/lib/qa_16sc_deinterleave_16s_aligned16.h      |  18 --
 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc     |  75 --------
 volk/lib/qa_16sc_deinterleave_32f_aligned16.h      |  18 --
 .../lib/qa_16sc_deinterleave_real_16s_aligned16.cc |  72 -------
 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h |  18 --
 .../lib/qa_16sc_deinterleave_real_32f_aligned16.cc | 124 ------------
 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h |  18 --
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc |  70 -------
 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h  |  18 --
 volk/lib/qa_16sc_magnitude_16s_aligned16.cc        |  81 --------
 volk/lib/qa_16sc_magnitude_16s_aligned16.h         |  18 --
 volk/lib/qa_16sc_magnitude_32f_aligned16.cc        | 131 -------------
 volk/lib/qa_16sc_magnitude_32f_aligned16.h         |  18 --
 volk/lib/qa_16u_byteswap_aligned16.cc              |  71 -------
 volk/lib/qa_16u_byteswap_aligned16.h               |  18 --
 volk/lib/qa_32f_accumulator_aligned16.cc           |  57 ------
 volk/lib/qa_32f_accumulator_aligned16.h            |  18 --
 volk/lib/qa_32f_add_aligned16.cc                   | 123 ------------
 volk/lib/qa_32f_add_aligned16.h                    |  18 --
 .../qa_32f_calc_spectral_noise_floor_aligned16.cc  |  60 ------
 .../qa_32f_calc_spectral_noise_floor_aligned16.h   |  18 --
 volk/lib/qa_32f_convert_16s_aligned16.cc           |  71 -------
 volk/lib/qa_32f_convert_16s_aligned16.h            |  18 --
 volk/lib/qa_32f_convert_16s_unaligned16.cc         |  71 -------
 volk/lib/qa_32f_convert_16s_unaligned16.h          |  18 --
 volk/lib/qa_32f_convert_32s_aligned16.cc           |  71 -------
 volk/lib/qa_32f_convert_32s_aligned16.h            |  18 --
 volk/lib/qa_32f_convert_32s_unaligned16.cc         |  71 -------
 volk/lib/qa_32f_convert_32s_unaligned16.h          |  18 --
 volk/lib/qa_32f_convert_64f_aligned16.cc           |  61 ------
 volk/lib/qa_32f_convert_64f_aligned16.h            |  18 --
 volk/lib/qa_32f_convert_64f_unaligned16.cc         |  61 ------
 volk/lib/qa_32f_convert_64f_unaligned16.h          |  18 --
 volk/lib/qa_32f_convert_8s_aligned16.cc            |  71 -------
 volk/lib/qa_32f_convert_8s_aligned16.h             |  18 --
 volk/lib/qa_32f_convert_8s_unaligned16.cc          |  71 -------
 volk/lib/qa_32f_convert_8s_unaligned16.h           |  18 --
 volk/lib/qa_32f_divide_aligned16.cc                | 133 -------------
 volk/lib/qa_32f_divide_aligned16.h                 |  18 --
 volk/lib/qa_32f_dot_prod_aligned16.cc              | 183 ------------------
 volk/lib/qa_32f_dot_prod_aligned16.h               |  18 --
 volk/lib/qa_32f_dot_prod_unaligned16.cc            | 190 ------------------
 volk/lib/qa_32f_dot_prod_unaligned16.h             |  18 --
 volk/lib/qa_32f_interleave_16sc_aligned16.cc       |  76 --------
 volk/lib/qa_32f_interleave_16sc_aligned16.h        |  18 --
 volk/lib/qa_32f_interleave_32fc_aligned16.cc       |  63 ------
 volk/lib/qa_32f_interleave_32fc_aligned16.h        |  18 --
 volk/lib/qa_32f_max_aligned16.cc                   |  70 -------
 volk/lib/qa_32f_max_aligned16.h                    |  18 --
 volk/lib/qa_32f_min_aligned16.cc                   |  70 -------
 volk/lib/qa_32f_min_aligned16.h                    |  18 --
 volk/lib/qa_32f_multiply_aligned16.cc              | 123 ------------
 volk/lib/qa_32f_multiply_aligned16.h               |  18 --
 volk/lib/qa_32f_normalize_aligned16.cc             |  79 --------
 volk/lib/qa_32f_normalize_aligned16.h              |  18 --
 volk/lib/qa_32f_power_aligned16.cc                 |  95 ---------
 volk/lib/qa_32f_power_aligned16.h                  |  18 --
 volk/lib/qa_32f_sqrt_aligned16.cc                  | 128 ------------
 volk/lib/qa_32f_sqrt_aligned16.h                   |  18 --
 volk/lib/qa_32f_stddev_aligned16.cc                |  75 --------
 volk/lib/qa_32f_stddev_aligned16.h                 |  18 --
 volk/lib/qa_32f_stddev_and_mean_aligned16.cc       |  76 --------
 volk/lib/qa_32f_stddev_and_mean_aligned16.h        |  18 --
 volk/lib/qa_32f_subtract_aligned16.cc              |  70 -------
 volk/lib/qa_32f_subtract_aligned16.h               |  18 --
 volk/lib/qa_32f_sum_of_poly_aligned16.cc           | 142 --------------
 volk/lib/qa_32f_sum_of_poly_aligned16.h            |  18 --
 volk/lib/qa_32fc_32f_multiply_aligned16.cc         |  75 --------
 volk/lib/qa_32fc_32f_multiply_aligned16.h          |  18 --
 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc       |  83 --------
 volk/lib/qa_32fc_32f_power_32fc_aligned16.h        |  18 --
 volk/lib/qa_32fc_atan2_32f_aligned16.cc            |  76 --------
 volk/lib/qa_32fc_atan2_32f_aligned16.h             |  18 --
 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc   | 138 -------------
 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h    |  18 --
 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc     |  64 ------
 volk/lib/qa_32fc_deinterleave_32f_aligned16.h      |  18 --
 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc     |  64 ------
 volk/lib/qa_32fc_deinterleave_64f_aligned16.h      |  18 --
 .../lib/qa_32fc_deinterleave_real_16s_aligned16.cc |  61 ------
 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h |  18 --
 .../lib/qa_32fc_deinterleave_real_32f_aligned16.cc |  61 ------
 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h |  18 --
 .../lib/qa_32fc_deinterleave_real_64f_aligned16.cc |  61 ------
 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h |  18 --
 volk/lib/qa_32fc_dot_prod_aligned16.cc             | 214 ---------------------
 volk/lib/qa_32fc_dot_prod_aligned16.h              |  20 --
 volk/lib/qa_32fc_magnitude_16s_aligned16.cc        |  80 --------
 volk/lib/qa_32fc_magnitude_16s_aligned16.h         |  18 --
 volk/lib/qa_32fc_magnitude_32f_aligned16.cc        |  80 --------
 volk/lib/qa_32fc_magnitude_32f_aligned16.h         |  18 --
 volk/lib/qa_32fc_multiply_aligned16.cc             |  98 ----------
 volk/lib/qa_32fc_multiply_aligned16.h              |  18 --
 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc   |  64 ------
 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h    |  18 --
 volk/lib/qa_32fc_square_dist_aligned16.cc          |  91 ---------
 volk/lib/qa_32fc_square_dist_aligned16.h           |  18 --
 .../qa_32fc_square_dist_scalar_mult_aligned16.cc   |  96 ---------
 .../qa_32fc_square_dist_scalar_mult_aligned16.h    |  18 --
 volk/lib/qa_32s_and_aligned16.cc                   |  70 -------
 volk/lib/qa_32s_and_aligned16.h                    |  18 --
 volk/lib/qa_32s_convert_32f_aligned16.cc           |  61 ------
 volk/lib/qa_32s_convert_32f_aligned16.h            |  18 --
 volk/lib/qa_32s_convert_32f_unaligned16.cc         |  61 ------
 volk/lib/qa_32s_convert_32f_unaligned16.h          |  18 --
 volk/lib/qa_32s_or_aligned16.cc                    |  70 -------
 volk/lib/qa_32s_or_aligned16.h                     |  18 --
 volk/lib/qa_32u_byteswap_aligned16.cc              |  60 ------
 volk/lib/qa_32u_byteswap_aligned16.h               |  18 --
 volk/lib/qa_64f_convert_32f_aligned16.cc           |  61 ------
 volk/lib/qa_64f_convert_32f_aligned16.h            |  18 --
 volk/lib/qa_64f_convert_32f_unaligned16.cc         |  61 ------
 volk/lib/qa_64f_convert_32f_unaligned16.h          |  18 --
 volk/lib/qa_64f_max_aligned16.cc                   |  61 ------
 volk/lib/qa_64f_max_aligned16.h                    |  18 --
 volk/lib/qa_64f_min_aligned16.cc                   |  61 ------
 volk/lib/qa_64f_min_aligned16.h                    |  18 --
 volk/lib/qa_64u_byteswap_aligned16.cc              |  60 ------
 volk/lib/qa_64u_byteswap_aligned16.h               |  18 --
 volk/lib/qa_8s_convert_16s_aligned16.cc            |  64 ------
 volk/lib/qa_8s_convert_16s_aligned16.h             |  18 --
 volk/lib/qa_8s_convert_16s_unaligned16.cc          |  64 ------
 volk/lib/qa_8s_convert_16s_unaligned16.h           |  18 --
 volk/lib/qa_8s_convert_32f_aligned16.cc            |  72 -------
 volk/lib/qa_8s_convert_32f_aligned16.h             |  18 --
 volk/lib/qa_8s_convert_32f_unaligned16.cc          |  64 ------
 volk/lib/qa_8s_convert_32f_unaligned16.h           |  18 --
 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc      |  68 -------
 volk/lib/qa_8sc_deinterleave_16s_aligned16.h       |  18 --
 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc      | 135 -------------
 volk/lib/qa_8sc_deinterleave_32f_aligned16.h       |  18 --
 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc |  65 -------
 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h  |  18 --
 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc | 139 -------------
 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h  |  18 --
 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc  |  61 ------
 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h   |  18 --
 .../qa_8sc_multiply_conjugate_16sc_aligned16.cc    |  87 ---------
 .../lib/qa_8sc_multiply_conjugate_16sc_aligned16.h |  18 --
 .../qa_8sc_multiply_conjugate_32fc_aligned16.cc    |  87 ---------
 .../lib/qa_8sc_multiply_conjugate_32fc_aligned16.h |  18 --
 volk/lib/qa_volk.cc                                | 211 --------------------
 volk/lib/qa_volk.h                                 |  36 ----
 volk/lib/test_all.cc                               |  82 --------
 158 files changed, 8144 deletions(-)
 delete mode 100644 volk/lib/qa_16s_convert_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_16s_convert_32f_aligned16.h
 delete mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.cc
 delete mode 100644 volk/lib/qa_16s_convert_32f_unaligned16.h
 delete mode 100644 volk/lib/qa_16s_convert_8s_aligned16.cc
 delete mode 100644 volk/lib/qa_16s_convert_8s_aligned16.h
 delete mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.cc
 delete mode 100644 volk/lib/qa_16s_convert_8s_unaligned16.h
 delete mode 100644 volk/lib/qa_16s_max_star_aligned16.cc
 delete mode 100644 volk/lib/qa_16s_max_star_aligned16.h
 delete mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.cc
 delete mode 100644 volk/lib/qa_16s_max_star_horizontal_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_16s_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_32f_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_magnitude_16s_aligned16.h
 delete mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_16sc_magnitude_32f_aligned16.h
 delete mode 100644 volk/lib/qa_16u_byteswap_aligned16.cc
 delete mode 100644 volk/lib/qa_16u_byteswap_aligned16.h
 delete mode 100644 volk/lib/qa_32f_accumulator_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_accumulator_aligned16.h
 delete mode 100644 volk/lib/qa_32f_add_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_add_aligned16.h
 delete mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_16s_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_16s_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_32s_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_32s_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_32s_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_64f_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_64f_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_64f_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_8s_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_8s_aligned16.h
 delete mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_convert_8s_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_divide_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_divide_aligned16.h
 delete mode 100644 volk/lib/qa_32f_dot_prod_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_dot_prod_aligned16.h
 delete mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.cc
 delete mode 100644 volk/lib/qa_32f_dot_prod_unaligned16.h
 delete mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_interleave_16sc_aligned16.h
 delete mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_interleave_32fc_aligned16.h
 delete mode 100644 volk/lib/qa_32f_max_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_max_aligned16.h
 delete mode 100644 volk/lib/qa_32f_min_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_min_aligned16.h
 delete mode 100644 volk/lib/qa_32f_multiply_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_multiply_aligned16.h
 delete mode 100644 volk/lib/qa_32f_normalize_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_normalize_aligned16.h
 delete mode 100644 volk/lib/qa_32f_power_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_power_aligned16.h
 delete mode 100644 volk/lib/qa_32f_sqrt_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_sqrt_aligned16.h
 delete mode 100644 volk/lib/qa_32f_stddev_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_stddev_aligned16.h
 delete mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_stddev_and_mean_aligned16.h
 delete mode 100644 volk/lib/qa_32f_subtract_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_subtract_aligned16.h
 delete mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.cc
 delete mode 100644 volk/lib/qa_32f_sum_of_poly_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_32f_multiply_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_32f_power_32fc_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_atan2_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_64f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_dot_prod_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_magnitude_16s_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_magnitude_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_multiply_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_multiply_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_square_dist_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_square_dist_aligned16.h
 delete mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
 delete mode 100644 volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
 delete mode 100644 volk/lib/qa_32s_and_aligned16.cc
 delete mode 100644 volk/lib/qa_32s_and_aligned16.h
 delete mode 100644 volk/lib/qa_32s_convert_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_32s_convert_32f_aligned16.h
 delete mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.cc
 delete mode 100644 volk/lib/qa_32s_convert_32f_unaligned16.h
 delete mode 100644 volk/lib/qa_32s_or_aligned16.cc
 delete mode 100644 volk/lib/qa_32s_or_aligned16.h
 delete mode 100644 volk/lib/qa_32u_byteswap_aligned16.cc
 delete mode 100644 volk/lib/qa_32u_byteswap_aligned16.h
 delete mode 100644 volk/lib/qa_64f_convert_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_64f_convert_32f_aligned16.h
 delete mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.cc
 delete mode 100644 volk/lib/qa_64f_convert_32f_unaligned16.h
 delete mode 100644 volk/lib/qa_64f_max_aligned16.cc
 delete mode 100644 volk/lib/qa_64f_max_aligned16.h
 delete mode 100644 volk/lib/qa_64f_min_aligned16.cc
 delete mode 100644 volk/lib/qa_64f_min_aligned16.h
 delete mode 100644 volk/lib/qa_64u_byteswap_aligned16.cc
 delete mode 100644 volk/lib/qa_64u_byteswap_aligned16.h
 delete mode 100644 volk/lib/qa_8s_convert_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_8s_convert_16s_aligned16.h
 delete mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.cc
 delete mode 100644 volk/lib/qa_8s_convert_16s_unaligned16.h
 delete mode 100644 volk/lib/qa_8s_convert_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_8s_convert_32f_aligned16.h
 delete mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.cc
 delete mode 100644 volk/lib/qa_8s_convert_32f_unaligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_16s_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_32f_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
 delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
 delete mode 100644 volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
 delete mode 100644 volk/lib/qa_volk.cc
 delete mode 100644 volk/lib/qa_volk.h
 delete mode 100644 volk/lib/test_all.cc

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 63df85244..bbc993fa2 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -110,7 +110,6 @@ endif
 # ----------------------------------------------------------------
 noinst_HEADERS = \
 	volk_init.h \
-	qa_volk.h \
 	qa_utils.h \
 	assembly.h
 
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.cc b/volk/lib/qa_16s_convert_32f_aligned16.cc
deleted file mode 100644
index 6215f4a64..000000000
--- a/volk/lib/qa_16s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_convert_32f_aligned16.h>
-#include <volk/volk_16s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE
-
-void qa_16s_convert_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16s_convert_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_32f_aligned16.h b/volk/lib/qa_16s_convert_32f_aligned16.h
deleted file mode 100644
index ef813d96f..000000000
--- a/volk/lib/qa_16s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.cc b/volk/lib/qa_16s_convert_32f_unaligned16.cc
deleted file mode 100644
index 46c2e48ac..000000000
--- a/volk/lib/qa_16s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_convert_32f_unaligned16.h>
-#include <volk/volk_16s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE
-
-void qa_16s_convert_32f_unaligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_32f_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_32f_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16s_convert_32f_unaligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_32f_unaligned16.h b/volk/lib/qa_16s_convert_32f_unaligned16.h
deleted file mode 100644
index aeb04f770..000000000
--- a/volk/lib/qa_16s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.cc b/volk/lib/qa_16s_convert_8s_aligned16.cc
deleted file mode 100644
index 8225aa0cf..000000000
--- a/volk/lib/qa_16s_convert_8s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_convert_8s_aligned16.h>
-#include <volk/volk_16s_convert_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_convert_8s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d -> %d...%d\n", input0[i], output_generic[i], output_sse2[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_8s_aligned16.h b/volk/lib/qa_16s_convert_8s_aligned16.h
deleted file mode 100644
index 2e409d0cc..000000000
--- a/volk/lib/qa_16s_convert_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.cc b/volk/lib/qa_16s_convert_8s_unaligned16.cc
deleted file mode 100644
index e6ce5030e..000000000
--- a/volk/lib/qa_16s_convert_8s_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_convert_8s_unaligned16.h>
-#include <volk/volk_16s_convert_8s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_convert_8s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_convert_8s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int16_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("16s_convert_8s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_convert_8s_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_convert_8s_unaligned16.h b/volk/lib/qa_16s_convert_8s_unaligned16.h
deleted file mode 100644
index 4b2fe9e42..000000000
--- a/volk/lib/qa_16s_convert_8s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H
-#define INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_convert_8s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_convert_8s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_CONVERT_8S_UNALIGNED16_H */
diff --git a/volk/lib/qa_16s_max_star_aligned16.cc b/volk/lib/qa_16s_max_star_aligned16.cc
deleted file mode 100644
index c6f828ba6..000000000
--- a/volk/lib/qa_16s_max_star_aligned16.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_max_star_aligned16.h>
-#include <volk/volk_16s_max_star_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-//test for ssse3
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16s_max_star_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-
-
-void qa_16s_max_star_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 6400;
-  const int ITERS = 100000;
-  short input0[vlen] __attribute__ ((aligned (16)));
-  short output0[1] __attribute__ ((aligned (16)));
-
-  short output1[1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {
-    short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-
-    short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    
-    input0[i] = plus0 - minus0;
-    
-  }
-  printf("16s_max_star_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_aligned16_manual(output0, input0, vlen << 1, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_aligned16_manual(output1, input0, vlen << 1, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < 1; ++i) {
-    
-    CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_max_star_aligned16.h b/volk/lib/qa_16s_max_star_aligned16.h
deleted file mode 100644
index 119f87c4d..000000000
--- a/volk/lib/qa_16s_max_star_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H
-#define INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_max_star_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_max_star_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_MAX_STAR_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc b/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
deleted file mode 100644
index 0a58570e2..000000000
--- a/volk/lib/qa_16s_max_star_horizontal_aligned16.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16s_max_star_horizontal_aligned16.h>
-#include <volk/volk_16s_max_star_horizontal_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-//test for ssse3
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16s_max_star_horizontal_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_16s_max_star_horizontal_aligned16::t1() {
-
-  
-  volk_runtime_init();
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 32;
-  const int ITERS = 1;
-  short input0[vlen] __attribute__ ((aligned (16)));
-  short output0[vlen>>1] __attribute__ ((aligned (16)));
-
-  short output1[vlen>>1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {
-    short plus0 = ((short) (rand() - (RAND_MAX/2)));
-    
-    short minus0 = ((short) (rand() - (RAND_MAX/2)));
-    
-    input0[i] = plus0 - minus0;
-    
-  }
-  printf("16s_max_star_horizontal_aligned\n");
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_max_star_horizontal_aligned16_manual(output0, input0, 2*vlen, "generic");
-    volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen, "generic");
-    volk_16s_max_star_horizontal_aligned16_manual(output0, output0, vlen/2, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen);
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen);
-    get_volk_runtime()->volk_16s_max_star_horizontal_aligned16(output1, output1, vlen);
-    /*    volk_16s_max_star_horizontal_aligned16(output1, input0, 2*vlen, "ssse3");
-    volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");
-    volk_16s_max_star_horizontal_aligned16(output1, output1, vlen, "ssse3");*/
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-  
-  for(int i = 0; i < (vlen >> 1); ++i) {
-    //    printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-    
-  }
-  for(int i = 0; i < (vlen >> 1); ++i) {
-      
-      CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
-    }
-	}
-   
-  
-#endif
-	
diff --git a/volk/lib/qa_16s_max_star_horizontal_aligned16.h b/volk/lib/qa_16s_max_star_horizontal_aligned16.h
deleted file mode 100644
index 9f9757253..000000000
--- a/volk/lib/qa_16s_max_star_horizontal_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H
-#define INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_max_star_horizontal_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_max_star_horizontal_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_MAX_STAR_HORIZONTAL_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
deleted file mode 100644
index aadc39067..000000000
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_16s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_16s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse21[vlen] __attribute__ ((aligned (16)));
-  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
-  int16_t output_orc1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse31[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0));
-  }
-  printf("16sc_deinterleave_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_orc, output_orc1, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_16s_aligned16_manual(output_ssse3, output_ssse31, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse2[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_sse21[i]);
-
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_ssse3[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_ssse31[i]);
-    
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_orc[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_orc1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_16s_aligned16.h
deleted file mode 100644
index 995ab5b34..000000000
--- a/volk/lib/qa_16sc_deinterleave_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index 13151be13..000000000
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_32f_aligned16.h>
-#include <volk/volk_16sc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16sc_deinterleave_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-  float output_sse21[vlen] __attribute__ ((aligned (16)));
-  float output_orc[vlen] __attribute__ ((aligned (16)));
-  float output_orc1[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_32f_aligned16_manual(output_orc, output_orc1, input0, 32768.0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_32f_aligned16_manual(output_sse2, output_sse21, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_sse21[i], fabs(output_generic1[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i],  output_orc1[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_32f_aligned16.h
deleted file mode 100644
index fea3b6c2d..000000000
--- a/volk/lib/qa_16sc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index c67064ea6..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_real_16s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-  int16_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32678.0));
-  }
-  printf("16sc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_16s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    // printf("%d = generic... %d, sse2... %d, ssse3... %d\n", i, output_generic[i], output_sse2[i], output_ssse3[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_ssse3[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index ebb70b97a..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index f86f03b88..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* SSE */
-
-#else
-
-void qa_16sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0);
-  }
-  printf("16sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_16sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* SSE4_1 */
diff --git a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index e83426473..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
deleted file mode 100644
index 803caaa2d..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_deinterleave_real_8s_aligned16.h>
-#include <volk/volk_16sc_deinterleave_real_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16sc_deinterleave_real_8s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_deinterleave_real_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-  int8_t output_orc[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0);
-  }
-  printf("16sc_deinterleave_real_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_8s_aligned16_manual(output_orc, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_orc[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
deleted file mode 100644
index 04e5511e5..000000000
--- a/volk/lib/qa_16sc_deinterleave_real_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-#define INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_deinterleave_real_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
deleted file mode 100644
index 7fbdd8620..000000000
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_magnitude_16s_aligned16.h>
-#include <volk/volk_16sc_magnitude_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_16sc_magnitude_16s_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16sc_magnitude_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* loadInput = (int16_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0));
-  }
-  printf("16sc_magnitude_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_16s_aligned16_manual(output_sse3, input0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.h b/volk/lib/qa_16sc_magnitude_16s_aligned16.h
deleted file mode 100644
index 4664b70f4..000000000
--- a/volk/lib/qa_16sc_magnitude_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H
-#define INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_magnitude_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_magnitude_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_MAGNITUDE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
deleted file mode 100644
index 54cc2ba6e..000000000
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16sc_magnitude_32f_aligned16.h>
-#include <volk/volk_16sc_magnitude_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_16sc_magnitude_32f_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_orc[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* inputLoad = (int16_t*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (int16_t)(rand() - (RAND_MAX/2));
-  }
-  printf("16sc_magnitude_32f_aligned\n");
-
-  float scale = 32768.0;
-  for(int i = 0; i < vlen; ++i) {   
-    float re = (float)(input0[i].real())/scale;
-    float im = (float)(input0[i].imag())/scale;
-    output_known[i] = sqrt(re*re + im*im);
-  }
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, scale, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 100; ++i) {
-    printf("inputs: %d + j%d\n", input0[i].real(), input0[i].imag());
-    printf("generic... %f == %f\n", output_generic[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#else
-
-void qa_16sc_magnitude_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_orc[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  int16_t* inputLoad = (int16_t*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (int16_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("16sc_magnitude_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-/*  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-*/
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16sc_magnitude_32f_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
-//    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.h b/volk/lib/qa_16sc_magnitude_32f_aligned16.h
deleted file mode 100644
index 0c25673ea..000000000
--- a/volk/lib/qa_16sc_magnitude_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H
-#define INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16sc_magnitude_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16sc_magnitude_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16SC_MAGNITUDE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_16u_byteswap_aligned16.cc b/volk/lib/qa_16u_byteswap_aligned16.cc
deleted file mode 100644
index c2295968b..000000000
--- a/volk/lib/qa_16u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16u_byteswap_aligned16.h>
-#include <volk/volk_16u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint16_t output0[vlen] __attribute__ ((aligned (16)));
-  uint16_t output01[vlen] __attribute__ ((aligned (16)));
-  uint16_t output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint16_t));
-  memcpy(output02, output0, vlen*sizeof(uint16_t));
-
-  printf("16u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16u_byteswap_aligned16_manual(output02, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);    
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16u_byteswap_aligned16.h b/volk/lib/qa_16u_byteswap_aligned16.h
deleted file mode 100644
index e11b23e3f..000000000
--- a/volk/lib/qa_16u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_accumulator_aligned16.cc b/volk/lib/qa_32f_accumulator_aligned16.cc
deleted file mode 100644
index 0defef283..000000000
--- a/volk/lib/qa_32f_accumulator_aligned16.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_accumulator_aligned16.h>
-#include <volk/volk_32f_accumulator_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_accumulator_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_accumulator_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float accumulator_generic;
-  float accumulator_sse;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_accumulator_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_accumulator_aligned16_manual(&accumulator_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_accumulator_aligned16_manual(&accumulator_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  //printf("%d...%d\n", output0[i], output01[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(accumulator_generic, accumulator_sse, fabs(accumulator_generic)*1e-4);
-}
-
-#endif
diff --git a/volk/lib/qa_32f_accumulator_aligned16.h b/volk/lib/qa_32f_accumulator_aligned16.h
deleted file mode 100644
index 0004d3ff0..000000000
--- a/volk/lib/qa_32f_accumulator_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H
-#define INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_accumulator_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_accumulator_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_ACCUMULATOR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_add_aligned16.cc b/volk/lib/qa_32f_add_aligned16.cc
deleted file mode 100644
index a183d4d85..000000000
--- a/volk/lib/qa_32f_add_aligned16.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_add_aligned16.h>
-#include <volk/volk_32f_add_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_add_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] + input1[i];
-  }
-  printf("32f_add_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_add_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_add_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_add_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_add_aligned16.h b/volk/lib/qa_32f_add_aligned16.h
deleted file mode 100644
index 58e2a151c..000000000
--- a/volk/lib/qa_32f_add_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_ADD_ALIGNED16_H
-#define INCLUDED_QA_32F_ADD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_add_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_add_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_ADD_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
deleted file mode 100644
index 5d6987333..000000000
--- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
-#include <volk/volk_32f_calc_spectral_noise_floor_aligned16.h>
-#include <cstdlib>
-#include <math.h>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_calc_spectral_noise_floor_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_calc_spectral_noise_floor_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[1] __attribute__ ((aligned (16)));
-  float output01[1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_calc_spectral_noise_floor_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_calc_spectral_noise_floor_aligned16_manual(output0, input0, 20, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_calc_spectral_noise_floor_aligned16_manual(output01, input0, 20, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < 1; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h b/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
deleted file mode 100644
index c5dce2c4b..000000000
--- a/volk/lib/qa_32f_calc_spectral_noise_floor_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
-#define INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_calc_spectral_noise_floor_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_calc_spectral_noise_floor_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CALC_SPECTRAL_NOISE_FLOOR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.cc b/volk/lib/qa_32f_convert_16s_aligned16.cc
deleted file mode 100644
index 3e2452e68..000000000
--- a/volk/lib/qa_32f_convert_16s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_16s_aligned16.h>
-#include <volk/volk_32f_convert_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_16s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("%d generic... %d, sse... %d sse2... %d\n", i, output_generic[i], output_sse[i], output_sse2[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_16s_aligned16.h b/volk/lib/qa_32f_convert_16s_aligned16.h
deleted file mode 100644
index fce1eb417..000000000
--- a/volk/lib/qa_32f_convert_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.cc b/volk/lib/qa_32f_convert_16s_unaligned16.cc
deleted file mode 100644
index e016b7ff7..000000000
--- a/volk/lib/qa_32f_convert_16s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_16s_unaligned16.h>
-#include <volk/volk_32f_convert_16s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_16s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_16s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_16s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_16s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_16s_unaligned16.h b/volk/lib/qa_32f_convert_16s_unaligned16.h
deleted file mode 100644
index 492bc80e6..000000000
--- a/volk/lib/qa_32f_convert_16s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_16s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_16s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_16S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.cc b/volk/lib/qa_32f_convert_32s_aligned16.cc
deleted file mode 100644
index abceb52fb..000000000
--- a/volk/lib/qa_32f_convert_32s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_32s_aligned16.h>
-#include <volk/volk_32f_convert_32s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_32s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_32s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_32s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_32s_aligned16.h b/volk/lib/qa_32f_convert_32s_aligned16.h
deleted file mode 100644
index 97d854463..000000000
--- a/volk/lib/qa_32f_convert_32s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_32s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_32s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_32S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.cc b/volk/lib/qa_32f_convert_32s_unaligned16.cc
deleted file mode 100644
index 90f84b56f..000000000
--- a/volk/lib/qa_32f_convert_32s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_32s_unaligned16.h>
-#include <volk/volk_32f_convert_32s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_32s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_32s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int32_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_32s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_32s_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_32s_unaligned16.h b/volk/lib/qa_32f_convert_32s_unaligned16.h
deleted file mode 100644
index 5d662d86d..000000000
--- a/volk/lib/qa_32f_convert_32s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_32s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_32s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_32S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.cc b/volk/lib/qa_32f_convert_64f_aligned16.cc
deleted file mode 100644
index 1d0754ac9..000000000
--- a/volk/lib/qa_32f_convert_64f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_64f_aligned16.h>
-#include <volk/volk_32f_convert_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i] ,output_sse2[i], fabs(output_generic[i])*1e-6);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_64f_aligned16.h b/volk/lib/qa_32f_convert_64f_aligned16.h
deleted file mode 100644
index 41eb3e094..000000000
--- a/volk/lib/qa_32f_convert_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.cc b/volk/lib/qa_32f_convert_64f_unaligned16.cc
deleted file mode 100644
index 6f7d5066d..000000000
--- a/volk/lib/qa_32f_convert_64f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_64f_unaligned16.h>
-#include <volk/volk_32f_convert_64f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_64f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_64f_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_64f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_64f_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_64f_unaligned16.h b/volk/lib/qa_32f_convert_64f_unaligned16.h
deleted file mode 100644
index 4b144f033..000000000
--- a/volk/lib/qa_32f_convert_64f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_64f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_64f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_64F_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.cc b/volk/lib/qa_32f_convert_8s_aligned16.cc
deleted file mode 100644
index 6a53629b5..000000000
--- a/volk/lib/qa_32f_convert_8s_aligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_8s_aligned16.h>
-#include <volk/volk_32f_convert_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_8s_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_sse, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_aligned16_manual(output_sse2, input0, 128.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_8s_aligned16.h b/volk/lib/qa_32f_convert_8s_aligned16.h
deleted file mode 100644
index 68a523f34..000000000
--- a/volk/lib/qa_32f_convert_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.cc b/volk/lib/qa_32f_convert_8s_unaligned16.cc
deleted file mode 100644
index fbc5c20e6..000000000
--- a/volk/lib/qa_32f_convert_8s_unaligned16.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_convert_8s_unaligned16.h>
-#include <volk/volk_32f_convert_8s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_convert_8s_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_convert_8s_unaligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int8_t output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_convert_8s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_sse, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_convert_8s_unaligned16_manual(output_sse2, input0, 128.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse[i]) <= 1);
-    CPPUNIT_ASSERT(abs(output_generic[i] - output_sse2[i]) <= 1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_convert_8s_unaligned16.h b/volk/lib/qa_32f_convert_8s_unaligned16.h
deleted file mode 100644
index 88d4ff42a..000000000
--- a/volk/lib/qa_32f_convert_8s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H
-#define INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_convert_8s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_convert_8s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_CONVERT_8S_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc
deleted file mode 100644
index f2a1b9e7f..000000000
--- a/volk/lib/qa_32f_divide_aligned16.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_divide_aligned16.h>
-#include <volk/volk_32f_divide_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_divide_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output1[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] / input1[i];
-  }
-  printf("32f_divide_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output1, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-    CPPUNIT_ASSERT_EQUAL(output1[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_divide_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_divide_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_divide_aligned16.h b/volk/lib/qa_32f_divide_aligned16.h
deleted file mode 100644
index 79d5ae4b8..000000000
--- a/volk/lib/qa_32f_divide_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DIVIDE_ALIGNED16_H
-#define INCLUDED_QA_32F_DIVIDE_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_divide_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_divide_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DIVIDE_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_dot_prod_aligned16.cc b/volk/lib/qa_32f_dot_prod_aligned16.cc
deleted file mode 100644
index 98c1f2d99..000000000
--- a/volk/lib/qa_32f_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,183 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifdef LV_HAVE_SSE3
-void qa_32f_dot_prod_aligned16::t1() {
-  const int vlen = 2046;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen* sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  
-  printf("32f_dot_prod_aligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f ... sse: %f  ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]);
-
-  for(i = 0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  
-}
-#else
-void qa_32f_dot_prod_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
-
-#else
-
-void qa_32f_dot_prod_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 4095;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-  float * result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  printf("32f_dot_prod_aligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_aligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    get_volk_runtime()->volk_32f_dot_prod_aligned16(&result_sse4_1[i], input, taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  //printf("generic: %f ... sse: %f  ... sse3 %f  ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]);
-  for(i =0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32f_dot_prod_aligned16.h b/volk/lib/qa_32f_dot_prod_aligned16.h
deleted file mode 100644
index 6931a9e98..000000000
--- a/volk/lib/qa_32f_dot_prod_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.cc b/volk/lib/qa_32f_dot_prod_unaligned16.cc
deleted file mode 100644
index 8e97d4249..000000000
--- a/volk/lib/qa_32f_dot_prod_unaligned16.cc
+++ /dev/null
@@ -1,190 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_dot_prod_unaligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifdef LV_HAVE_SSE3
-void qa_32f_dot_prod_unaligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen* sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen *sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  
-  printf("32f_dot_prod_unaligned16\n");
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f ... sse: %f  ... sse3 %f \n", result_generic[0], result_sse[0], result_sse3[0]);
-
-  for(i = 0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  
-}
-#else
-void qa_32f_dot_prod_unaligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
-
-#else
-
-void qa_32f_dot_prod_unaligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 4095;
-  const int ITER = 100000;
-
-  int i;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float * input;
-  float * taps;
-  
-  float * result_generic;
-  float * result_sse;
-  float * result_sse3;
-  float * result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, (vlen+1) * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, (vlen+1) * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, ITER*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, ITER*sizeof(float));
-
-  input = &input[1]; // Make sure the buffer is unaligned
-  taps = &taps[1]; // Make sure the buffer is unaligned
-
-  random_floats((float*)input, vlen);
-  random_floats((float*)taps, vlen);
-  
-  printf("32f_dot_prod_unaligned16\n");
-  
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_generic[i], input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse[i], input, taps, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    volk_32f_dot_prod_unaligned16_manual(&result_sse3[i], input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  start = clock();
-  for(i = 0; i < ITER; i++){
-    get_volk_runtime()->volk_32f_dot_prod_unaligned16(&result_sse4_1[i], input, taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  //printf("generic: %f ... sse: %f  ... sse3 %f  ... sse4_1 %f \n", result_generic[0], result_sse[0], result_sse3[0], result_sse4_1[0]);
-  for(i =0; i < ITER; i++){
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse3[i], fabs(result_generic[i])*ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL (result_generic[i], result_sse4_1[i], fabs(result_generic[i])*ERR_DELTA);
-  }
-
-  free(&input[-1]);
-  free(&taps[-1]);
-  free(result_generic);
-  free(result_sse);
-  free(result_sse3);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32f_dot_prod_unaligned16.h b/volk/lib/qa_32f_dot_prod_unaligned16.h
deleted file mode 100644
index e8bad07fe..000000000
--- a/volk/lib/qa_32f_dot_prod_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H
-#define INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_dot_prod_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_dot_prod_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_DOT_PROD_UNALIGNED16_H */
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.cc b/volk/lib/qa_32f_interleave_16sc_aligned16.cc
deleted file mode 100644
index a7ae60780..000000000
--- a/volk/lib/qa_32f_interleave_16sc_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_interleave_16sc_aligned16.h>
-#include <volk/volk_32f_interleave_16sc_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32f_interleave_16sc_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_interleave_16sc_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  std::complex<int16_t> output_generic[vlen] __attribute__ ((aligned (16)));
-  std::complex<int16_t> output_sse[vlen] __attribute__ ((aligned (16)));
-  std::complex<int16_t> output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-  }
-  printf("32f_interleave_16sc_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_generic, input0, input1, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_sse, input0, input1, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_16sc_aligned16_manual(output_sse2, input0, input1, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), 1.01);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), 1.01);
-
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse2[i]), 1.01);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse2[i]), 1.01);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_interleave_16sc_aligned16.h b/volk/lib/qa_32f_interleave_16sc_aligned16.h
deleted file mode 100644
index 8d2914817..000000000
--- a/volk/lib/qa_32f_interleave_16sc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H
-#define INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_interleave_16sc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_interleave_16sc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_INTERLEAVE_16SC_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.cc b/volk/lib/qa_32f_interleave_32fc_aligned16.cc
deleted file mode 100644
index 333b6fce8..000000000
--- a/volk/lib/qa_32f_interleave_32fc_aligned16.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_interleave_32fc_aligned16.h>
-#include <volk/volk_32f_interleave_32fc_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_interleave_32fc_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_interleave_32fc_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  std::complex<float> output_generic[vlen] __attribute__ ((aligned (16)));
-  std::complex<float> output_sse[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); 
-  }
-  printf("32f_interleave_32fc_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_32fc_aligned16_manual(output_generic, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_interleave_32fc_aligned16_manual(output_sse, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::real(output_generic[i]), std::real(output_sse[i]), fabs(std::real(output_generic[i]))*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(std::imag(output_generic[i]), std::imag(output_sse[i]), fabs(std::imag(output_generic[i]))*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_interleave_32fc_aligned16.h b/volk/lib/qa_32f_interleave_32fc_aligned16.h
deleted file mode 100644
index cba518d37..000000000
--- a/volk/lib/qa_32f_interleave_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H
-#define INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_interleave_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_interleave_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_INTERLEAVE_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_max_aligned16.cc b/volk/lib/qa_32f_max_aligned16.cc
deleted file mode 100644
index 98f8ce9bc..000000000
--- a/volk/lib/qa_32f_max_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_max_aligned16.h>
-#include <volk/volk_32f_max_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_max_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_max_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_max_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_max_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_max_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_max_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_max_aligned16.h b/volk/lib/qa_32f_max_aligned16.h
deleted file mode 100644
index d535479f4..000000000
--- a/volk/lib/qa_32f_max_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MAX_ALIGNED16_H
-#define INCLUDED_QA_32F_MAX_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_max_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_max_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_min_aligned16.cc b/volk/lib/qa_32f_min_aligned16.cc
deleted file mode 100644
index 798b47c53..000000000
--- a/volk/lib/qa_32f_min_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_min_aligned16.h>
-#include <volk/volk_32f_min_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_min_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_min_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_min_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_min_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_min_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_min_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_min_aligned16.h b/volk/lib/qa_32f_min_aligned16.h
deleted file mode 100644
index 90961ac92..000000000
--- a/volk/lib/qa_32f_min_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MIN_ALIGNED16_H
-#define INCLUDED_QA_32F_MIN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_min_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_min_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_multiply_aligned16.cc b/volk/lib/qa_32f_multiply_aligned16.cc
deleted file mode 100644
index aa17cd62e..000000000
--- a/volk/lib/qa_32f_multiply_aligned16.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_multiply_aligned16.h>
-#include <volk/volk_32f_multiply_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_multiply_aligned16::t1() {
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    output_known[i] = input0[i] * input1[i];
-  }
-  printf("32f_multiply_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f, %f\n", input0[i], input1[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_EQUAL(output0[i], output_known[i]);
-  }
-}
-
-#else
-
-void qa_32f_multiply_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_multiply_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_multiply_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_multiply_aligned16.h b/volk/lib/qa_32f_multiply_aligned16.h
deleted file mode 100644
index 7032a2ad4..000000000
--- a/volk/lib/qa_32f_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_normalize_aligned16.cc b/volk/lib/qa_32f_normalize_aligned16.cc
deleted file mode 100644
index 0da43ecff..000000000
--- a/volk/lib/qa_32f_normalize_aligned16.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_normalize_aligned16.h>
-#include <volk/volk_32f_normalize_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_normalize_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_normalize_aligned16::t1() {
-  
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 320001;
-  const int ITERS = 100;
-
-  float* output0;
-  float* output01;
-  float* output02;
-  ret = posix_memalign((void**)&output0, 16, vlen*sizeof(float));
-  ret = posix_memalign((void**)&output01, 16, vlen*sizeof(float));
-  ret = posix_memalign((void**)&output02, 16, vlen*sizeof(float));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(float));
-  memcpy(output02, output0, vlen*sizeof(float));
-  printf("32f_normalize_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_normalize_aligned16_manual(output0, 1.15, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_normalize_aligned16_manual(output01, 1.15, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_normalize_aligned16_manual(output02, 1.15, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    // printf("%e...%e\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output02[i], fabs(output0[i])*1e-4);
-  }
-
-  free(output0);
-  free(output01);
-  free(output02);
-}
-
-#endif
diff --git a/volk/lib/qa_32f_normalize_aligned16.h b/volk/lib/qa_32f_normalize_aligned16.h
deleted file mode 100644
index 7c421eb82..000000000
--- a/volk/lib/qa_32f_normalize_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H
-#define INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_normalize_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_normalize_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_NORMALIZE_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_power_aligned16.cc b/volk/lib/qa_32f_power_aligned16.cc
deleted file mode 100644
index 1b331daeb..000000000
--- a/volk/lib/qa_32f_power_aligned16.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_power_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE
-void qa_32f_power_aligned16::t1() {
-
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 10000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  float* input;
-  int i;
-  
-  float* result_generic;
-  float* result_sse;
-  float* result_sse4_1;
-
-  ret = posix_memalign((void**)&input, 16, vlen *  sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen * sizeof(float));
-
-  random_floats((float*)input, vlen);
-
-  const float power = 3;
-  
-  printf("32f_power_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_power_aligned16_manual(result_generic, input, power, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_power_aligned16_manual(result_sse, input, power, vlen,  "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_power_aligned16(result_sse4_1, input, power, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %e -> %e %e %e\n", i, input[i], result_generic[i], result_sse[i], result_sse4_1[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse[i], fabs(result_generic[i])* ERR_DELTA);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(result_generic[i], result_sse4_1[i], fabs(result_generic[i])* ERR_DELTA);
-  }
-
-  free(input);
-  free(result_generic);
-  free(result_sse);
-  
-}
-#else
-void qa_32f_power_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE */
-
diff --git a/volk/lib/qa_32f_power_aligned16.h b/volk/lib/qa_32f_power_aligned16.h
deleted file mode 100644
index d45df4e56..000000000
--- a/volk/lib/qa_32f_power_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_POWER_ALIGNED16_H
-#define INCLUDED_QA_32F_POWER_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_power_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_power_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_POWER_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_sqrt_aligned16.cc b/volk/lib/qa_32f_sqrt_aligned16.cc
deleted file mode 100644
index c216ce5d5..000000000
--- a/volk/lib/qa_32f_sqrt_aligned16.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2010 Free Software Foundation, Inc.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, see 
- * <http://www.gnu.org/licenses/>.
- */
-
-#include <volk/volk.h>
-#include <qa_32f_sqrt_aligned16.h>
-#include <volk/volk_32f_sqrt_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_sqrt_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output_known[vlen] __attribute__ ((aligned (16)));
-
-  // No reason to test negative numbers because they result in NaN.
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX));
-    output_known[i] = sqrt(input0[i]);
-  }
-  printf("32f_sqrt_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  
-  /*
-  for(int i = 0; i < 10; ++i) {
-    printf("inputs: %f\n", input0[i]);
-    printf("generic... %f == %f\n", output0[i], output_known[i]);
-  }
-  */
-  
-  for(int i = 0; i < vlen; ++i) {
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output_known[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#else
-
-void qa_32f_sqrt_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-
-  // No reason to test negative numbers because they result in NaN.
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand()) / static_cast<float>(RAND_MAX));
-  }
-  printf("32f_sqrt_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output0, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_sqrt_aligned16_manual(output01, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_sqrt_aligned16.h b/volk/lib/qa_32f_sqrt_aligned16.h
deleted file mode 100644
index e4b99d981..000000000
--- a/volk/lib/qa_32f_sqrt_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SQRT_ALIGNED16_H
-#define INCLUDED_QA_32F_SQRT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_sqrt_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_sqrt_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SQRT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_stddev_aligned16.cc b/volk/lib/qa_32f_stddev_aligned16.cc
deleted file mode 100644
index 5934d70df..000000000
--- a/volk/lib/qa_32f_stddev_aligned16.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_stddev_aligned16.h>
-#include <volk/volk_32f_stddev_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_stddev_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_stddev_aligned16::t1() {
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-
-  float stddev_generic;
-  float stddev_sse;
-  float stddev_sse4_1;
-  float mean = 0;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    mean += input0[i];
-  }
-  mean /= static_cast<float>(vlen);
-
-  printf("32f_stddev_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_aligned16_manual(&stddev_generic, input0, mean, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_aligned16_manual(&stddev_sse, input0, mean, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_stddev_aligned16(&stddev_sse4_1, input0, mean, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  //printf("%d...%d\n", output0[i], output01[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4);
-
-}
-
-#endif
diff --git a/volk/lib/qa_32f_stddev_aligned16.h b/volk/lib/qa_32f_stddev_aligned16.h
deleted file mode 100644
index 7f8d7a5fc..000000000
--- a/volk/lib/qa_32f_stddev_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_STDDEV_ALIGNED16_H
-#define INCLUDED_QA_32F_STDDEV_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_stddev_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_stddev_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_STDDEV_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc b/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
deleted file mode 100644
index 78c701d78..000000000
--- a/volk/lib/qa_32f_stddev_and_mean_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_stddev_and_mean_aligned16.h>
-#include <volk/volk_32f_stddev_and_mean_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_stddev_and_mean_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_stddev_and_mean_aligned16::t1() {
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  
-  float stddev_generic;
-  float stddev_sse;
-  float stddev_sse4_1;
-  float mean_generic;
-  float mean_sse;
-  float mean_sse4_1;
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_stddev_and_mean_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_and_mean_aligned16_manual(&stddev_generic, &mean_generic, input0,vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_stddev_and_mean_aligned16_manual(&stddev_sse, &mean_sse, input0,vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32f_stddev_and_mean_aligned16(&stddev_sse4_1, &mean_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse, fabs(mean_generic)*1e-4);
-
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(stddev_generic, stddev_sse4_1, fabs(stddev_generic)*1e-4);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(mean_generic, mean_sse4_1, fabs(mean_generic)*1e-4);
-
-}
-
-#endif
diff --git a/volk/lib/qa_32f_stddev_and_mean_aligned16.h b/volk/lib/qa_32f_stddev_and_mean_aligned16.h
deleted file mode 100644
index e08bd249a..000000000
--- a/volk/lib/qa_32f_stddev_and_mean_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H
-#define INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_stddev_and_mean_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_stddev_and_mean_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_STDDEV_AND_MEAN_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_subtract_aligned16.cc b/volk/lib/qa_32f_subtract_aligned16.cc
deleted file mode 100644
index 1e2210203..000000000
--- a/volk/lib/qa_32f_subtract_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_subtract_aligned16.h>
-#include <volk/volk_32f_subtract_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_subtract_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_subtract_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  float input0[vlen] __attribute__ ((aligned (16)));
-  float input1[vlen] __attribute__ ((aligned (16)));
-  
-  float output0[vlen] __attribute__ ((aligned (16)));
-  float output01[vlen] __attribute__ ((aligned (16)));
-  float output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-    input1[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_subtract_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_subtract_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_subtract_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_subtract_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_subtract_aligned16.h b/volk/lib/qa_32f_subtract_aligned16.h
deleted file mode 100644
index 97c14f129..000000000
--- a/volk/lib/qa_32f_subtract_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H
-#define INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_subtract_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_subtract_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SUBTRACT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.cc b/volk/lib/qa_32f_sum_of_poly_aligned16.cc
deleted file mode 100644
index 494776357..000000000
--- a/volk/lib/qa_32f_sum_of_poly_aligned16.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_sum_of_poly_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <math.h>
-
-#define SNR 30.0
-#define CENTER -4.0
-#define CUTOFF -5.595
-#define ERR_DELTA (1e-4)
-#define NUM_ITERS 100000
-#define VEC_LEN 64
-static float uniform() {
-  return ((float) rand() / RAND_MAX);	// uniformly (0, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] =  uniform () * -SNR/2.0;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32f_sum_of_poly_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32f_sum_of_poly_aligned16::t1(){
-  int i = 0;
-  
-  volk_environment_init();
-  int ret;
-
-  const int vlen = VEC_LEN;
-  float cutoff = CUTOFF;
-  
-  float* center_point_array;
-  float* target;
-  float* target_generic;
-  float* src0 ;
-
-
-  ret = posix_memalign((void**)&center_point_array, 16, 24);
-  ret = posix_memalign((void**)&target, 16, 4);
-  ret = posix_memalign((void**)&target_generic, 16, 4);
-  ret = posix_memalign((void**)&src0, 16, (vlen << 2));
-  
- 
-  random_floats((float*)src0, vlen);
- 
-  float a = (float)CENTER;
-  float etoa = expf(a);
-  center_point_array[0] = (//(5.0 * a * a * a * a)/120.0 +
-			   (-4.0 * a * a * a)/24.0 + 
-			   (3.0 * a * a)/6.0 +
-			   (-2.0 * a)/2.0 +
-			   (1.0)) * etoa;
-  center_point_array[1] = (//(-10.0 * a * a * a)/120.0 +
-			   (6.0 * a * a)/24.0 + 
-			   (-3.0 * a)/6.0 +
-			   (1.0/2.0)) * etoa;
-  center_point_array[2] = (//(10.0 * a * a)/120.0 +
-			   (-4.0 * a)/24.0 +
-			   (1.0/6.0)) * etoa;
-  center_point_array[3] = (//(-5.0 * a)/120.0 +
-			   (1.0/24.0)) * etoa;
-  //center_point_array[4] = ((1.0)/120.0) * etoa;
-  center_point_array[4] = (//(a * a * a * a * a)/120.0 +
-			   (a * a * a * a)/24.0 +
-			   (a * a * a)/-6.0 +
-			   (a * a)/2.0 +
-			   -a + 1.0) * etoa;
-  
-  printf("32f_sum_of_poly_aligned16\n");
-
-  clock_t start, end;
-  double total;
-  
-  float my_sum = 0.0;
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    float sum = 0.0;
-    for(int l = 0; l < vlen; ++l) {
-      
-      sum += expf(src0[l]);
-      
-    }
-    my_sum = sum;
-  }
-  
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("exp time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    
-    volk_32f_sum_of_poly_aligned16_manual(target_generic, src0, center_point_array, &cutoff, vlen << 2, "generic");
-  
-  }
-  
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32f_sum_of_poly_aligned16_manual(target, src0, center_point_array, &cutoff, vlen << 2, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 approx time: %f\n", total);
-
-
-  
-  printf("exp: %f, sse3: %f\n", my_sum, target[i]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], fabs(target_generic[0]) * ERR_DELTA);
-  
-
-  free(center_point_array);
-  free(target);
-  free(target_generic);
-  free(src0);
-
-  
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32f_sum_of_poly_aligned16.h b/volk/lib/qa_32f_sum_of_poly_aligned16.h
deleted file mode 100644
index 67a347f9a..000000000
--- a/volk/lib/qa_32f_sum_of_poly_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H
-#define INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_sum_of_poly_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_sum_of_poly_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_SUM_OF_POLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.cc b/volk/lib/qa_32fc_32f_multiply_aligned16.cc
deleted file mode 100644
index b80e0e008..000000000
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_32f_multiply_aligned16.h>
-#include <stdlib.h>
-#include <time.h>
-#include <string.h>
-#include <qa_utils.h>
-#include <boost/test/unit_test.hpp>
-
-#define	TOLERANCE	(1e-4)
-
-void qa_32fc_32f_multiply_aligned16(void) {
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  float * taps;
-  int i;
-  std::vector<std::string> archs;
-  archs.push_back("generic");
-#ifdef LV_HAVE_SSE3
-  archs.push_back("sse3");
-#endif
-#ifdef LV_HAVE_ORC
-  archs.push_back("orc");
-#endif
-  
-  std::vector<std::complex<float>* > results;
-
-  ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen * sizeof(float));
-  
-  for(i=0; i < archs.size(); i++) {
-      std::complex<float> *ptr;
-      ret = posix_memalign((void**)&ptr, 16, vlen * 2 * sizeof(float));
-      if(ret) {
-          printf("Couldn't allocate memory\n");
-          exit(1);
-      }
-      results.push_back(ptr);
-  }
-
-  random_floats((float*)input, vlen * 2);
-  random_floats(taps, vlen);
-  
-  printf("32fc_32f_multiply_aligned16\n");
-
-  for(i=0; i < archs.size(); i++) {
-    start = clock();
-    for(int count = 0; count < ITERS; ++count) {
-      volk_32fc_32f_multiply_aligned16_manual(results[i], input, taps, vlen, archs[i].c_str());
-    }
-    end = clock();
-    total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-    printf("%s_time: %f\n", archs[i].c_str(), total);
-  }
-
-  for(i=0; i < vlen; i++) {
-      int j = 1;
-      for(j; j < archs.size(); j++) {
-          assertcomplexEqual(results[0][i], results[j][i], ERR_DELTA);
-      }
-  }
-
-  free(input);
-  free(taps);
-  for(i=0; i < archs.size(); i++) {      
-    free(results[i]);
-  }
-}
diff --git a/volk/lib/qa_32fc_32f_multiply_aligned16.h b/volk/lib/qa_32fc_32f_multiply_aligned16.h
deleted file mode 100644
index fc3b3eeb2..000000000
--- a/volk/lib/qa_32fc_32f_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_32f_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_32f_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_32F_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc b/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
deleted file mode 100644
index 64ea65da9..000000000
--- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_32f_power_32fc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1.5e-3)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE
-void qa_32fc_32f_power_32fc_aligned16::t1() {
-
-  const int vlen = 2046;
-  const int ITERS = 10000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  int i;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse;
-
-  ret = posix_memalign((void**)&input, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen * 2 * sizeof(float));
-  ret = posix_memalign((void**)&result_sse, 16, vlen * 2 * sizeof(float));
-
-  random_floats((float*)input, vlen * 2);
-
-  const float power = 3.2;
-  
-  printf("32fc_32f_power_32fc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_power_32fc_aligned16_manual(result_generic, input, power, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_32f_power_32fc_aligned16_manual(result_sse, input, power, vlen,  "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(result_generic);
-  free(result_sse);
-  
-}
-#else
-void qa_32fc_32f_power_32fc_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE */
-
diff --git a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h b/volk/lib/qa_32fc_32f_power_32fc_aligned16.h
deleted file mode 100644
index 464b7b7cc..000000000
--- a/volk/lib/qa_32fc_32f_power_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H
-#define INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_32f_power_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_32f_power_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_32F_POWER_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.cc b/volk/lib/qa_32fc_atan2_32f_aligned16.cc
deleted file mode 100644
index c55ab5aa0..000000000
--- a/volk/lib/qa_32fc_atan2_32f_aligned16.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_atan2_32f_aligned16.h>
-#include <volk/volk_32fc_atan2_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_atan2_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_atan2_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_atan2_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_atan2_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_atan2_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32fc_atan2_32f_aligned16(output_sse4_1, input0, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_atan2_32f_aligned16.h b/volk/lib/qa_32fc_atan2_32f_aligned16.h
deleted file mode 100644
index 9c4dc209a..000000000
--- a/volk/lib/qa_32fc_atan2_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_atan2_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_atan2_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_ATAN2_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
deleted file mode 100644
index 2f9a30395..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_conjugate_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse");
-
-  printf("32fc_conjugate_dot_prod_aligned16\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#elif LV_HAVE_SSE && LV_HAVE_32
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_aligned16_manual(result, input, taps, vlen * 8, "sse_32");
-
-  printf("32fc_conjugate_dot_prod_aligned16\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#else
-
-void qa_32fc_conjugate_dot_prod_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h b/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
deleted file mode 100644
index 507b1769b..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_conjugate_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index 72e084c05..000000000
--- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_32f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_32f_aligned16.h
deleted file mode 100644
index 78660e6ad..000000000
--- a/volk/lib/qa_32fc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
deleted file mode 100644
index 89770c236..000000000
--- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_64f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32fc_deinterleave_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_generic1[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-  double output_sse21[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_64f_aligned16_manual(output_generic, output_generic1, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_64f_aligned16_manual(output_sse2, output_sse21, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse21[i], fabs(output_generic1[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_64f_aligned16.h
deleted file mode 100644
index f924b9752..000000000
--- a/volk/lib/qa_32fc_deinterleave_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index 7472476f7..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_real_16s_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index 68b80f27d..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index 5cbdc49b3..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32fc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_32f_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index 765450bb6..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
deleted file mode 100644
index 4147e30ae..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_deinterleave_real_64f_aligned16.h>
-#include <volk/volk_32fc_deinterleave_real_64f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32fc_deinterleave_real_64f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_deinterleave_real_64f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  double output_generic[vlen] __attribute__ ((aligned (16)));
-  double output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_deinterleave_real_64f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_64f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_deinterleave_real_64f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse2[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h b/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
deleted file mode 100644
index 3e55fb812..000000000
--- a/volk/lib/qa_32fc_deinterleave_real_64f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H
-#define INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_deinterleave_real_64f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_deinterleave_real_64f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DEINTERLEAVE_REAL_64F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.cc b/volk/lib/qa_32fc_dot_prod_aligned16.cc
deleted file mode 100644
index bcf9ea954..000000000
--- a/volk/lib/qa_32fc_dot_prod_aligned16.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_dot_prod_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#include <stdio.h>
-
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-
-
-#if LV_HAVE_SSE3
-void qa_32fc_dot_prod_aligned16::t1() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse3");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse3: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif
-
-#if LV_HAVE_SSE && LV_HAVE_32
-void qa_32fc_dot_prod_aligned16::t2() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_32");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_32_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse_32: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t2() {
-  printf("sse_32 not available... no test performed\n");
-}
-
-#endif
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-void qa_32fc_dot_prod_aligned16::t3() {
-
-  const int vlen = 2046;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result_sse3, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result_sse3[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_dot_prod_aligned16\n");
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_generic, input, taps, vlen * 8,  "generic");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  
-  start = clock();
-  volk_32fc_dot_prod_aligned16_manual(result_sse3, input, taps, vlen * 8, "sse_64");
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_64_time: %f\n", total);
-
-  printf("generic: %f +i%f ... sse_64: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result_sse3[0]), std::imag(result_sse3[0]));
-
-  
-  assertcomplexEqual(result_generic[0], result_sse3[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  
-}
-
-#else
-void qa_32fc_dot_prod_aligned16::t3() {
-  printf("sse_64 not available... no test performed\n");
-}
-
-
-
-#endif 
diff --git a/volk/lib/qa_32fc_dot_prod_aligned16.h b/volk/lib/qa_32fc_dot_prod_aligned16.h
deleted file mode 100644
index 4b360db27..000000000
--- a/volk/lib/qa_32fc_dot_prod_aligned16.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H
-#define INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_dot_prod_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_dot_prod_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-  void t2 ();
-  void t3 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_DOT_PROD_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
deleted file mode 100644
index c718b6b71..000000000
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_magnitude_16s_aligned16.h>
-#include <volk/volk_32fc_magnitude_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_magnitude_16s_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_magnitude_16s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_orc[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_magnitude_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_16s_aligned16_manual(output_sse3, input0, 32768.0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-  //  printf("inputs: %f, %f\n", input0[i].real(), input0[i].imag());
-  //  printf("generic... %i, sse3... %i, orc... %i\n", output_generic[i], output_sse3[i], output_orc[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.h b/volk/lib/qa_32fc_magnitude_16s_aligned16.h
deleted file mode 100644
index ffdf1dd9e..000000000
--- a/volk/lib/qa_32fc_magnitude_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H
-#define INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_magnitude_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_magnitude_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MAGNITUDE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
deleted file mode 100644
index 1d475fb86..000000000
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_magnitude_32f_aligned16.h>
-#include <volk/volk_32fc_magnitude_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_magnitude_32f_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_magnitude_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_orc[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_magnitude_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_magnitude_32f_aligned16_manual(output_sse3, input0, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.h b/volk/lib/qa_32fc_magnitude_32f_aligned16.h
deleted file mode 100644
index a2881308c..000000000
--- a/volk/lib/qa_32fc_magnitude_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_magnitude_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_magnitude_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MAGNITUDE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_multiply_aligned16.cc b/volk/lib/qa_32fc_multiply_aligned16.cc
deleted file mode 100644
index 022b58ad6..000000000
--- a/volk/lib/qa_32fc_multiply_aligned16.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32fc_multiply_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-3)
-
-//test for sse
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-#ifdef LV_HAVE_SSE3
-void qa_32fc_multiply_aligned16::t1() {
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  int i;
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse3;
-  std::complex<float>* result_orc;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_sse3, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_orc, 16, vlen*2*sizeof(float));
-  
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  printf("32fc_multiply_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_multiply_aligned16_manual(result_generic, input, taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_multiply_aligned16_manual(result_sse3, input, taps, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_multiply_aligned16_manual(result_orc, input, taps, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    assertcomplexEqual(result_generic[i], result_sse3[i], ERR_DELTA);
-    assertcomplexEqual(result_generic[i], result_orc[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse3);
-  free(result_orc);
-  
-}
-#else
-void qa_32fc_multiply_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#endif /* LV_HAVE_SSE3 */
diff --git a/volk/lib/qa_32fc_multiply_aligned16.h b/volk/lib/qa_32fc_multiply_aligned16.h
deleted file mode 100644
index c8abaa8fe..000000000
--- a/volk/lib/qa_32fc_multiply_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H
-#define INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_multiply_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_multiply_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_MULTIPLY_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
deleted file mode 100644
index 1444c78a9..000000000
--- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_power_spectrum_32f_aligned16.h>
-#include <volk/volk_32fc_power_spectrum_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse3
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_power_spectrum_32f_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_power_spectrum_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  std::complex<float> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse3[vlen] __attribute__ ((aligned (16)));
-
-  const float scalar = vlen;
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {   
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
-  }
-
-  printf("32fc_power_spectrum_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_power_spectrum_32f_aligned16_manual(output_generic, input0, scalar, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_power_spectrum_32f_aligned16_manual(output_sse3, input0, scalar, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse33... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h b/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
deleted file mode 100644
index d991223f3..000000000
--- a/volk/lib/qa_32fc_power_spectrum_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_power_spectrum_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_power_spectrum_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_POWER_SPECTRUM_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_square_dist_aligned16.cc b/volk/lib/qa_32fc_square_dist_aligned16.cc
deleted file mode 100644
index d9ead8495..000000000
--- a/volk/lib/qa_32fc_square_dist_aligned16.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_square_dist_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#define ERR_DELTA (1e-4)
-#define NUM_ITERS 10000000
-#define VEC_LEN 64
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] = uniform () * 32767;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_square_dist_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32fc_square_dist_aligned16::t1(){
-  int i = 0;
-  
-  const int vlen = VEC_LEN;
-  volk_environment_init();
-  int ret;
-  
-  float* target;
-  float* target_generic;
-  std::complex<float>* src0 ;
-  std::complex<float>* points;
-
-  ret = posix_memalign((void**)&points, 16, vlen << 3);
-  ret = posix_memalign((void**)&target, 16, vlen << 2);
-  ret = posix_memalign((void**)&target_generic, 16, vlen << 2);
-  ret = posix_memalign((void**)&src0, 16, 8);
-  
-  random_floats((float*)points, vlen * 2);
-  random_floats((float*)src0, 2);
-  
-  printf("32fc_square_dist_aligned16\n");
-  
-  clock_t start, end;
-  double total;
-  
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_aligned16_manual(target_generic, src0, points, vlen << 3, "generic");
-  }
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-  volk_32fc_square_dist_aligned16_manual(target, src0, points, vlen << 3, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 time: %f\n", total);
-
-  
-  
-  for(; i < vlen; ++i) {
-    //printf("generic: %f, sse3: %f\n", target_generic[i], target[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[i], target[i], fabs(target_generic[i]) * ERR_DELTA);
-  }
-
-  free(target);
-  free(target_generic);
-  free(points);
-  free(src0);
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_square_dist_aligned16.h b/volk/lib/qa_32fc_square_dist_aligned16.h
deleted file mode 100644
index 9d365d8b0..000000000
--- a/volk/lib/qa_32fc_square_dist_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H
-#define INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_square_dist_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_square_dist_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_SQUARE_DIST_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
deleted file mode 100644
index f923d1d5c..000000000
--- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#define ERR_DELTA .0001
-#define NUM_ITERS 10000000
-#define VEC_LEN 64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] = uniform () * 32767;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_square_dist_scalar_mult_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32fc_square_dist_scalar_mult_aligned16::t1(){
-  int i = 0;
-  
-  const int vlen = VEC_LEN;
-  
-  volk_environment_init();
-  int ret;
-  
-  float* target;
-  float* target_generic;
-  std::complex<float>* src0 ;
-  std::complex<float>* points;
-  float scalar;
-
-  ret = posix_memalign((void**)&points, 16, vlen << 3);
-  ret = posix_memalign((void**)&target, 16, vlen << 2);
-  ret = posix_memalign((void**)&target_generic, 16, vlen << 2);
-  ret = posix_memalign((void**)&src0, 16, 8);
-  
-  random_floats((float*)points, vlen * 2);
-  random_floats((float*)src0, 2);
-  random_floats(&scalar, 1);
-  
-  printf("32fc_square_dist_scalar_mult_aligned16\n");
-  
-  clock_t start, end;
-  double total;
-  
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_scalar_mult_aligned16_manual(target_generic, src0, points, scalar, vlen << 3, "generic");
-  }
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-  
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_square_dist_scalar_mult_aligned16_manual(target, src0, points, scalar, vlen << 3, "sse3");
-  }
-  
-  end = clock();  
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 time: %f\n", total);
-
-  
-  
-  for(i = 0; i < vlen; ++i) {
-    printf("generic: %f, sse3: %f\n", target_generic[i], target[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(target[i], target_generic[i], fabs(target_generic[1]) * ERR_DELTA);//, target_generic[1] * ERR_DELTA);
-  }
-
-  free(target);
-  free(target_generic);
-  free(points);
-  free(src0);
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h b/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
deleted file mode 100644
index ac4e3c45b..000000000
--- a/volk/lib/qa_32fc_square_dist_scalar_mult_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
-#define INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_square_dist_scalar_mult_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_square_dist_scalar_mult_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_SQUARE_DIST_SCALAR_MULT_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_and_aligned16.cc b/volk/lib/qa_32s_and_aligned16.cc
deleted file mode 100644
index d20682147..000000000
--- a/volk/lib/qa_32s_and_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_and_aligned16.h>
-#include <volk/volk_32s_and_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32s_and_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_and_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  int32_t input1[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output0[vlen] __attribute__ ((aligned (16)));
-  int32_t output01[vlen] __attribute__ ((aligned (16)));
-  int32_t output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-    input1[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-  }
-  printf("32s_and_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_and_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_and_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_and_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_and_aligned16.h b/volk/lib/qa_32s_and_aligned16.h
deleted file mode 100644
index dfcb47c63..000000000
--- a/volk/lib/qa_32s_and_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_AND_ALIGNED16_H
-#define INCLUDED_QA_32S_AND_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_and_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_and_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_AND_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.cc b/volk/lib/qa_32s_convert_32f_aligned16.cc
deleted file mode 100644
index 07d799809..000000000
--- a/volk/lib/qa_32s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_convert_32f_aligned16.h>
-#include <volk/volk_32s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32s_convert_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_convert_32f_aligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("32s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_aligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_convert_32f_aligned16.h b/volk/lib/qa_32s_convert_32f_aligned16.h
deleted file mode 100644
index efd2a2eea..000000000
--- a/volk/lib/qa_32s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.cc b/volk/lib/qa_32s_convert_32f_unaligned16.cc
deleted file mode 100644
index 2ec610ffb..000000000
--- a/volk/lib/qa_32s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_convert_32f_unaligned16.h>
-#include <volk/volk_32s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32s_convert_32f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_convert_32f_unaligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 32768.0));
-  }
-  printf("32s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_unaligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_convert_32f_unaligned16_manual(output_sse2, input0, 32768.0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_convert_32f_unaligned16.h b/volk/lib/qa_32s_convert_32f_unaligned16.h
deleted file mode 100644
index 5006f5fd8..000000000
--- a/volk/lib/qa_32s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc
deleted file mode 100644
index bebf779b0..000000000
--- a/volk/lib/qa_32s_or_aligned16.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32s_or_aligned16.h>
-#include <volk/volk_32s_or_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32s_or_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32s_or_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int32_t input0[vlen] __attribute__ ((aligned (16)));
-  int32_t input1[vlen] __attribute__ ((aligned (16)));
-  
-  int32_t output0[vlen] __attribute__ ((aligned (16)));
-  int32_t output01[vlen] __attribute__ ((aligned (16)));
-  int32_t output02[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-    input1[i] = ((int32_t) (rand() - (RAND_MAX/2)));
-  }
-  printf("32s_or_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_or_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32s_or_aligned16.h b/volk/lib/qa_32s_or_aligned16.h
deleted file mode 100644
index 9e949eb52..000000000
--- a/volk/lib/qa_32s_or_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32S_OR_ALIGNED16_H
-#define INCLUDED_QA_32S_OR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32s_or_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32s_or_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32S_OR_ALIGNED16_H */
diff --git a/volk/lib/qa_32u_byteswap_aligned16.cc b/volk/lib/qa_32u_byteswap_aligned16.cc
deleted file mode 100644
index 313c786b6..000000000
--- a/volk/lib/qa_32u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32u_byteswap_aligned16.h>
-#include <volk/volk_32u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_32u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint32_t output0[vlen] __attribute__ ((aligned (16)));
-  uint32_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint32_t));
-  printf("32u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32u_byteswap_aligned16.h b/volk/lib/qa_32u_byteswap_aligned16.h
deleted file mode 100644
index 47bad4c3d..000000000
--- a/volk/lib/qa_32u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.cc b/volk/lib/qa_64f_convert_32f_aligned16.cc
deleted file mode 100644
index 7f9c4584a..000000000
--- a/volk/lib/qa_64f_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_convert_32f_aligned16.h>
-#include <volk/volk_64f_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_convert_32f_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_convert_32f_aligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  double input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_aligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_convert_32f_aligned16.h b/volk/lib/qa_64f_convert_32f_aligned16.h
deleted file mode 100644
index 95d79f73d..000000000
--- a/volk/lib/qa_64f_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.cc b/volk/lib/qa_64f_convert_32f_unaligned16.cc
deleted file mode 100644
index 98aadbf4d..000000000
--- a/volk/lib/qa_64f_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_convert_32f_unaligned16.h>
-#include <volk/volk_64f_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_convert_32f_unaligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_convert_32f_unaligned16::t1() {
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-
-  double input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse2[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_convert_32f_unaligned16_manual(output_sse2, input0, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_convert_32f_unaligned16.h b/volk/lib/qa_64f_convert_32f_unaligned16.h
deleted file mode 100644
index 430327e81..000000000
--- a/volk/lib/qa_64f_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_64f_max_aligned16.cc b/volk/lib/qa_64f_max_aligned16.cc
deleted file mode 100644
index 76e755514..000000000
--- a/volk/lib/qa_64f_max_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_max_aligned16.h>
-#include <volk/volk_64f_max_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_max_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_max_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  double input0[vlen] __attribute__ ((aligned (16)));
-  double input1[vlen] __attribute__ ((aligned (16)));
-  
-  double output0[vlen] __attribute__ ((aligned (16)));
-  double output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-    input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_max_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_max_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_max_aligned16_manual(output01, input0, input1, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_max_aligned16.h b/volk/lib/qa_64f_max_aligned16.h
deleted file mode 100644
index 7cbd4d4c1..000000000
--- a/volk/lib/qa_64f_max_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_MAX_ALIGNED16_H
-#define INCLUDED_QA_64F_MAX_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_max_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_max_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_64f_min_aligned16.cc b/volk/lib/qa_64f_min_aligned16.cc
deleted file mode 100644
index 4b70d2881..000000000
--- a/volk/lib/qa_64f_min_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64f_min_aligned16.h>
-#include <volk/volk_64f_min_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64f_min_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64f_min_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  double input0[vlen] __attribute__ ((aligned (16)));
-  double input1[vlen] __attribute__ ((aligned (16)));
-  
-  double output0[vlen] __attribute__ ((aligned (16)));
-  double output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-    input1[i] = ((double) (rand() - (RAND_MAX/2))) / static_cast<double>((RAND_MAX/2));
-  }
-  printf("64f_min_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_min_aligned16_manual(output0, input0, input1, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64f_min_aligned16_manual(output01, input0, input1, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64f_min_aligned16.h b/volk/lib/qa_64f_min_aligned16.h
deleted file mode 100644
index a0e95395f..000000000
--- a/volk/lib/qa_64f_min_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64F_MIN_ALIGNED16_H
-#define INCLUDED_QA_64F_MIN_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64f_min_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64f_min_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64F_MIN_ALIGNED16_H */
diff --git a/volk/lib/qa_64u_byteswap_aligned16.cc b/volk/lib/qa_64u_byteswap_aligned16.cc
deleted file mode 100644
index 20d012c9e..000000000
--- a/volk/lib/qa_64u_byteswap_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_64u_byteswap_aligned16.h>
-#include <volk/volk_64u_byteswap_aligned16.h>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE2
-
-void qa_64u_byteswap_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64u_byteswap_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100001;
-  
-  uint64_t output0[vlen] __attribute__ ((aligned (16)));
-  uint64_t output01[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    output0[i] = (uint64_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2));
-  }
-  memcpy(output01, output0, vlen*sizeof(uint64_t));
-  printf("64u_byteswap_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64u_byteswap_aligned16_manual(output0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64u_byteswap_aligned16_manual(output01, vlen, "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_64u_byteswap_aligned16.h b/volk/lib/qa_64u_byteswap_aligned16.h
deleted file mode 100644
index a4fa0c983..000000000
--- a/volk/lib/qa_64u_byteswap_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H
-#define INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64u_byteswap_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64u_byteswap_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64U_BYTESWAP_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.cc b/volk/lib/qa_8s_convert_16s_aligned16.cc
deleted file mode 100644
index 8dd5f76ca..000000000
--- a/volk/lib/qa_8s_convert_16s_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_16s_aligned16.h>
-#include <volk/volk_8s_convert_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4_1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_16s_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_16s_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_16s_aligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_16s_aligned16.h b/volk/lib/qa_8s_convert_16s_aligned16.h
deleted file mode 100644
index 38739fc96..000000000
--- a/volk/lib/qa_8s_convert_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.cc b/volk/lib/qa_8s_convert_16s_unaligned16.cc
deleted file mode 100644
index 12c502d4b..000000000
--- a/volk/lib/qa_8s_convert_16s_unaligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_16s_unaligned16.h>
-#include <volk/volk_8s_convert_16s_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4_1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_16s_unaligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_16s_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_16s_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_16s_unaligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_16s_unaligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_16s_unaligned16.h b/volk/lib/qa_8s_convert_16s_unaligned16.h
deleted file mode 100644
index d39fffc35..000000000
--- a/volk/lib/qa_8s_convert_16s_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_16s_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_16s_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_16S_UNALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.cc b/volk/lib/qa_8s_convert_32f_aligned16.cc
deleted file mode 100644
index f27e60552..000000000
--- a/volk/lib/qa_8s_convert_32f_aligned16.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_32f_aligned16.h>
-#include <volk/volk_8s_convert_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4.1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_32f_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_32f_aligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "orc");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("orc_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_32f_aligned16(output_sse4_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_32f_aligned16.h b/volk/lib/qa_8s_convert_32f_aligned16.h
deleted file mode 100644
index 7f8401d42..000000000
--- a/volk/lib/qa_8s_convert_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.cc b/volk/lib/qa_8s_convert_32f_unaligned16.cc
deleted file mode 100644
index 43468b1b1..000000000
--- a/volk/lib/qa_8s_convert_32f_unaligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8s_convert_32f_unaligned16.h>
-#include <volk/volk_8s_convert_32f_unaligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse4.1
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8s_convert_32f_unaligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8s_convert_32f_unaligned16::t1() {
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  int8_t input0[vlen+1] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen+1] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen+1] __attribute__ ((aligned (16)));
-
-  for(int i = 0; i < vlen; ++i) {   
-    input0[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0));
-  }
-  printf("8s_convert_32f_unaligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8s_convert_32f_unaligned16_manual(output_generic, &input0[1], 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8s_convert_32f_unaligned16(output_sse4_1, &input0[1], 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%e...%e\n", output_generic[i], output_sse4_1[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8s_convert_32f_unaligned16.h b/volk/lib/qa_8s_convert_32f_unaligned16.h
deleted file mode 100644
index aad2f8c22..000000000
--- a/volk/lib/qa_8s_convert_32f_unaligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H
-#define INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8s_convert_32f_unaligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8s_convert_32f_unaligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8S_CONVERT_32F_UNALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
deleted file mode 100644
index f753e1107..000000000
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_16s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_deinterleave_16s_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_16s_aligned16::t1() {
-
-  
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_generic1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_11[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_16s_aligned16_manual(output_generic, output_generic1, input0, vlen, "monkeys");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_16s_aligned16(output_sse4_1, output_sse4_11, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse4_1[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic1[i],  output_sse4_11[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_16s_aligned16.h
deleted file mode 100644
index 9c99fed70..000000000
--- a/volk/lib/qa_8sc_deinterleave_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
deleted file mode 100644
index 29073eed7..000000000
--- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_32f_aligned16.h>
-#include <volk/volk_8sc_deinterleave_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* LV_HAVE_SSE */
-
-#else
-
-void qa_8sc_deinterleave_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_generic1[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-  float output_sse1[vlen] __attribute__ ((aligned (16)));
-  float output_sse4_1[vlen] __attribute__ ((aligned (16)));
-  float output_sse14_1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_generic, output_generic1, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_32f_aligned16_manual(output_sse, output_sse1, input0, 128.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_32f_aligned16(output_sse4_1, output_sse14_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("%d generic... %e %e, sse... %e %e sse4.1... %e %e\n", i, output_generic[i], output_generic1[i], output_sse[i], output_sse1[i], output_sse4_1[i], output_sse14_1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i],std::max<double>((output_generic[i])*1e-4, 1e-4));
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic1[i], output_sse14_1[i], std::max<double>((output_generic[i])*1e-4, 1e-4));
-  }
-}
-
-
-#endif /* LV_HAVE_SSE4_1 */
diff --git a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_32f_aligned16.h
deleted file mode 100644
index 63b5fdadb..000000000
--- a/volk/lib/qa_8sc_deinterleave_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
deleted file mode 100644
index 4980c982a..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_16s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_16s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_deinterleave_real_16s_aligned16::t1() {
-  printf("sse4_1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_16s_aligned16::t1() {
-
-  
-  volk_runtime_init();  
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int16_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int16_t output_sse4_1[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_16s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_16s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_real_16s_aligned16(output_sse4_1, input0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i],  output_sse4_1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
deleted file mode 100644
index 02050926f..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_16s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_16s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_16s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_16S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
deleted file mode 100644
index 3c3f737a1..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_32f_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_1
-
-#ifndef LV_HAVE_SSE
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  float output_generic[vlen] __attribute__ ((aligned (16)));
-  float output_sse[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 32768.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-  }
-}
-
-#endif /* LV_HAVE_SSE */
-
-#else
-
-void qa_8sc_deinterleave_real_32f_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> *input0;
-  
-  float* output_generic;
-  float* output_sse;
-  float* output_sse4_1;
-
-  ret = posix_memalign((void**)&input0, 16, 2*vlen * sizeof(int8_t));
-  ret = posix_memalign((void**)&output_generic, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&output_sse, 16, vlen * sizeof(float));
-  ret = posix_memalign((void**)&output_sse4_1, 16, vlen * sizeof(float));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0);
-  }
-
-  printf("8sc_deinterleave_real_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_generic, input0, 128.0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_32f_aligned16_manual(output_sse, input0, 1288.0, vlen, "sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_deinterleave_real_32f_aligned16(output_sse4_1, input0, 128.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse4_1[i], fabs(output_generic[i])*1e-4);
-  }
-
-  free(input0);
-  free(output_generic);
-  free(output_sse);
-  free(output_sse4_1);
-}
-
-#endif /* LV_HAVE_SSE4_1 */
diff --git a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
deleted file mode 100644
index 93338e488..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
deleted file mode 100644
index a33d1bf30..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_8sc_deinterleave_real_8s_aligned16.h>
-#include <volk/volk_8sc_deinterleave_real_8s_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_8sc_deinterleave_real_8s_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_deinterleave_real_8s_aligned16::t1() {
-  
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 100000;
-  std::complex<int8_t> input0[vlen] __attribute__ ((aligned (16)));
-  
-  int8_t output_generic[vlen] __attribute__ ((aligned (16)));
-  int8_t output_ssse3[vlen] __attribute__ ((aligned (16)));
-
-  int8_t* loadInput = (int8_t*)input0;
-  for(int i = 0; i < vlen*2; ++i) {   
-    loadInput[i] =((char)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  printf("8sc_deinterleave_real_8s_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_8s_aligned16_manual(output_generic, input0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_deinterleave_real_8s_aligned16_manual(output_ssse3, input0, vlen, "ssse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("ssse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-  
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output_generic[i], output_ssse3[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h b/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
deleted file mode 100644
index 92fc0dd4a..000000000
--- a/volk/lib/qa_8sc_deinterleave_real_8s_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-#define INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_deinterleave_real_8s_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_deinterleave_real_8s_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_DEINTERLEAVE_REAL_8S_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
deleted file mode 100644
index 216bf1cef..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctime>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_multiply_conjugate_16sc_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_multiply_conjugate_16sc_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<int8_t>* input;
-  std::complex<int8_t>* taps;
-  
-  std::complex<int16_t>* result_generic;
-  std::complex<int16_t>* result_sse4_1;
-  int i;
-  int8_t* inputInt8_T;
-  int8_t* tapsInt8_T;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(int16_t));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(int16_t));
-  
-  inputInt8_T = (int8_t*)input;
-  tapsInt8_T = (int8_t*)taps;
-  for(int i = 0; i < vlen*2; ++i) {   
-    inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-    tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  
-  printf("8sc_multiply_conjugate_16sc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_multiply_conjugate_16sc_aligned16_manual((std::complex<int16_t>*)result_generic, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_multiply_conjugate_16sc_aligned16((std::complex<int16_t>*)result_sse4_1, (std::complex<int8_t>*)input, (std::complex<int8_t>*)taps, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %d+%di %d+%di -> %d+%di %d+%di\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i]));
-
-    assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE4_1*/
diff --git a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
deleted file mode 100644
index 0e78a5eca..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_16sc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H
-#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_multiply_conjugate_16sc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_16sc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_16SC_ALIGNED16_H */
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
deleted file mode 100644
index 4c707446e..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctime>
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-#ifndef LV_HAVE_SSE4_1
-
-void qa_8sc_multiply_conjugate_32fc_aligned16::t1() {
-  printf("sse4.1 not available... no test performed\n");
-}
-
-#else
-
-void qa_8sc_multiply_conjugate_32fc_aligned16::t1() {
-  
-  
-  volk_runtime_init();
-
-  const int vlen = 2046;
-  const int ITERS = 100000;
-
-  volk_environment_init();
-  int ret;
-  clock_t start, end;
-  double total;
-  std::complex<int8_t>* input;
-  std::complex<int8_t>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result_sse4_1;
-  int i;
-  int8_t* inputInt8_T;
-  int8_t* tapsInt8_T;
-
-  ret = posix_memalign((void**)&input, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&taps, 16, vlen*2*sizeof(int8_t));
-  ret = posix_memalign((void**)&result_generic, 16, vlen*2*sizeof(float));
-  ret = posix_memalign((void**)&result_sse4_1, 16, vlen*2*sizeof(float));
-  
-
-  inputInt8_T = (int8_t*)input;
-  tapsInt8_T = (int8_t*)taps;
-  for(int i = 0; i < vlen*2; ++i) {   
-    inputInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-    tapsInt8_T[i] =((int8_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 128.0));
-  }
-  
-  printf("8sc_multiply_conjugate_32fc_aligned16\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_8sc_multiply_conjugate_32fc_aligned16_manual(result_generic, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen,  "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_8sc_multiply_conjugate_32fc_aligned16(result_sse4_1, (const std::complex<int8_t>*)input, (const std::complex<int8_t>*)taps, 32768.0, vlen);
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4_1_time: %f\n", total);
-
-  for(i = 0; i < vlen; i++){
-    //printf("%d %d+%di %d+%di -> %e+%ei %e+%ei\n", i, std::real(input[i]), std::imag(input[i]), std::real(taps[i]), std::imag(taps[i]), std::real(result_generic[i]), std::imag(result_generic[i]), std::real(result_sse4_1[i]), std::imag(result_sse4_1[i]));
-    assertcomplexEqual(result_generic[i], result_sse4_1[i], ERR_DELTA);
-  }
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result_sse4_1);
-  
-}
-
-#endif /*LV_HAVE_SSE4_1*/
diff --git a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h b/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
deleted file mode 100644
index eb9ae309c..000000000
--- a/volk/lib/qa_8sc_multiply_conjugate_32fc_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H
-#define INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_8sc_multiply_conjugate_32fc_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_8sc_multiply_conjugate_32fc_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_8SC_MULTIPLY_CONJUGATE_32FC_ALIGNED16_H */
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
deleted file mode 100644
index 8e7e59768..000000000
--- a/volk/lib/qa_volk.cc
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-/*
- * This class gathers together all the test cases for the example
- * directory into a single test suite.  As you create new test cases,
- * add them here.
- */
-
-#include <qa_volk.h>
-#include <qa_16s_quad_max_star_aligned16.h>
-#include <qa_32fc_dot_prod_aligned16.h>
-#include <qa_32fc_square_dist_aligned16.h>
-#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
-#include <qa_32f_sum_of_poly_aligned16.h>
-#include <qa_32fc_index_max_aligned16.h>
-#include <qa_32f_index_max_aligned16.h>
-#include <qa_32fc_conjugate_dot_prod_aligned16.h>
-#include <qa_16s_permute_and_scalar_add_aligned16.h>
-#include <qa_16s_branch_4_state_8_aligned16.h>
-#include <qa_16s_max_star_horizontal_aligned16.h>
-#include <qa_16s_max_star_aligned16.h>
-#include <qa_16s_add_quad_aligned16.h>
-#include <qa_32f_add_aligned16.h>
-#include <qa_32f_subtract_aligned16.h>
-#include <qa_32f_max_aligned16.h>
-#include <qa_32f_min_aligned16.h>
-#include <qa_64f_max_aligned16.h>
-#include <qa_64f_min_aligned16.h>
-#include <qa_32s_and_aligned16.h>
-#include <qa_32s_or_aligned16.h>
-#include <qa_32f_dot_prod_aligned16.h>
-#include <qa_32f_dot_prod_unaligned16.h>
-#include <qa_32f_fm_detect_aligned16.h>
-#include <qa_32fc_32f_multiply_aligned16.h>
-#include <qa_32fc_multiply_aligned16.h>
-#include <qa_32f_divide_aligned16.h>
-#include <qa_32f_multiply_aligned16.h>
-#include <qa_32f_sqrt_aligned16.h>
-#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
-#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
-#include <qa_32u_popcnt_aligned16.h>
-#include <qa_64u_popcnt_aligned16.h>
-#include <qa_16u_byteswap_aligned16.h>
-#include <qa_32u_byteswap_aligned16.h>
-#include <qa_64u_byteswap_aligned16.h>
-#include <qa_32f_normalize_aligned16.h>
-#include <qa_16sc_deinterleave_16s_aligned16.h>
-#include <qa_16sc_deinterleave_32f_aligned16.h>
-#include <qa_16sc_deinterleave_real_16s_aligned16.h>
-#include <qa_16sc_deinterleave_real_32f_aligned16.h>
-#include <qa_16sc_deinterleave_real_8s_aligned16.h>
-#include <qa_16sc_magnitude_16s_aligned16.h>
-#include <qa_16sc_magnitude_32f_aligned16.h>
-#include <qa_32fc_deinterleave_32f_aligned16.h>
-#include <qa_32fc_deinterleave_64f_aligned16.h>
-#include <qa_32fc_deinterleave_real_16s_aligned16.h>
-#include <qa_32fc_deinterleave_real_32f_aligned16.h>
-#include <qa_32fc_deinterleave_real_64f_aligned16.h>
-#include <qa_32fc_magnitude_16s_aligned16.h>
-#include <qa_32fc_magnitude_32f_aligned16.h>
-#include <qa_32f_interleave_16sc_aligned16.h>
-#include <qa_32f_interleave_32fc_aligned16.h>
-#include <qa_8sc_deinterleave_16s_aligned16.h>
-#include <qa_8sc_deinterleave_32f_aligned16.h>
-#include <qa_8sc_deinterleave_real_16s_aligned16.h>
-#include <qa_8sc_deinterleave_real_32f_aligned16.h>
-#include <qa_8sc_deinterleave_real_8s_aligned16.h>
-#include <qa_16s_convert_32f_aligned16.h>
-#include <qa_16s_convert_32f_unaligned16.h>
-#include <qa_16s_convert_8s_aligned16.h>
-#include <qa_16s_convert_8s_unaligned16.h>
-#include <qa_32f_convert_16s_aligned16.h>
-#include <qa_32f_convert_16s_unaligned16.h>
-#include <qa_32f_convert_32s_aligned16.h>
-#include <qa_32f_convert_32s_unaligned16.h>
-#include <qa_32f_convert_64f_aligned16.h>
-#include <qa_32f_convert_64f_unaligned16.h>
-#include <qa_32f_convert_8s_aligned16.h>
-#include <qa_32f_convert_8s_unaligned16.h>
-#include <qa_32s_convert_32f_aligned16.h>
-#include <qa_32s_convert_32f_unaligned16.h>
-#include <qa_64f_convert_32f_aligned16.h>
-#include <qa_64f_convert_32f_unaligned16.h>
-#include <qa_8s_convert_16s_aligned16.h>
-#include <qa_8s_convert_16s_unaligned16.h>
-#include <qa_8s_convert_32f_aligned16.h>
-#include <qa_8s_convert_32f_unaligned16.h>
-#include <qa_32fc_32f_power_32fc_aligned16.h>
-#include <qa_32f_power_aligned16.h>
-#include <qa_32fc_atan2_32f_aligned16.h>
-#include <qa_32fc_power_spectral_density_32f_aligned16.h> 
-#include <qa_32fc_power_spectrum_32f_aligned16.h>
-#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
-#include <qa_32f_accumulator_aligned16.h>
-#include <qa_32f_stddev_aligned16.h>
-#include <qa_32f_stddev_and_mean_aligned16.h>
-
-CppUnit::TestSuite *
-qa_volk::suite()
-{
-  CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
-
-  s->addTest(qa_16s_quad_max_star_aligned16::suite());
-  s->addTest(qa_32fc_dot_prod_aligned16::suite());
-  s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
-  s->addTest(qa_32fc_square_dist_aligned16::suite());
-  s->addTest(qa_32f_sum_of_poly_aligned16::suite());
-  s->addTest(qa_32fc_index_max_aligned16::suite());
-  s->addTest(qa_32f_index_max_aligned16::suite());
-  s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite());
-  s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite());
-  s->addTest(qa_16s_branch_4_state_8_aligned16::suite());
-  s->addTest(qa_16s_max_star_horizontal_aligned16::suite());
-  s->addTest(qa_16s_max_star_aligned16::suite());
-  s->addTest(qa_16s_add_quad_aligned16::suite());
-  s->addTest(qa_32f_add_aligned16::suite());
-  s->addTest(qa_32f_subtract_aligned16::suite());
-  s->addTest(qa_32f_max_aligned16::suite());
-  s->addTest(qa_32f_min_aligned16::suite());
-  s->addTest(qa_64f_max_aligned16::suite());
-  s->addTest(qa_64f_min_aligned16::suite());
-  s->addTest(qa_32s_and_aligned16::suite());
-  s->addTest(qa_32s_or_aligned16::suite());
-  s->addTest(qa_32f_dot_prod_aligned16::suite());
-  s->addTest(qa_32f_dot_prod_unaligned16::suite());
-  s->addTest(qa_32f_fm_detect_aligned16::suite());
-  //s->addTest(qa_32fc_32f_multiply_aligned16::suite());
-  s->addTest(qa_32fc_multiply_aligned16::suite());
-  s->addTest(qa_32f_divide_aligned16::suite());
-  s->addTest(qa_32f_multiply_aligned16::suite());
-  s->addTest(qa_32f_sqrt_aligned16::suite());
-  s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite());
-  s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite());
-  s->addTest(qa_32u_popcnt_aligned16::suite());
-  s->addTest(qa_64u_popcnt_aligned16::suite());
-  s->addTest(qa_16u_byteswap_aligned16::suite());
-  s->addTest(qa_32u_byteswap_aligned16::suite());
-  s->addTest(qa_64u_byteswap_aligned16::suite());
-  s->addTest(qa_32f_normalize_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_16s_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite());
-  s->addTest(qa_16sc_magnitude_16s_aligned16::suite());
-  s->addTest(qa_16sc_magnitude_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_64f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite());
-  s->addTest(qa_32fc_magnitude_16s_aligned16::suite());
-  s->addTest(qa_32fc_magnitude_32f_aligned16::suite());
-  s->addTest(qa_32f_interleave_16sc_aligned16::suite());
-  s->addTest(qa_32f_interleave_32fc_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_16s_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite());
-  s->addTest(qa_16s_convert_32f_aligned16::suite());
-  s->addTest(qa_16s_convert_32f_unaligned16::suite());
-  s->addTest(qa_16s_convert_8s_aligned16::suite());
-  s->addTest(qa_16s_convert_8s_unaligned16::suite());
-  s->addTest(qa_32f_convert_16s_aligned16::suite());
-  s->addTest(qa_32f_convert_16s_unaligned16::suite());
-  s->addTest(qa_32f_convert_32s_aligned16::suite());
-  s->addTest(qa_32f_convert_32s_unaligned16::suite());
-  s->addTest(qa_32f_convert_64f_aligned16::suite());
-  s->addTest(qa_32f_convert_64f_unaligned16::suite());
-  s->addTest(qa_32f_convert_8s_aligned16::suite());
-  s->addTest(qa_32f_convert_8s_unaligned16::suite());
-  s->addTest(qa_32s_convert_32f_aligned16::suite());
-  s->addTest(qa_32s_convert_32f_unaligned16::suite());
-  s->addTest(qa_64f_convert_32f_aligned16::suite());
-  s->addTest(qa_64f_convert_32f_unaligned16::suite());
-  s->addTest(qa_8s_convert_16s_aligned16::suite());
-  s->addTest(qa_8s_convert_16s_unaligned16::suite());
-  s->addTest(qa_8s_convert_32f_aligned16::suite());
-  s->addTest(qa_8s_convert_32f_unaligned16::suite());
-  s->addTest(qa_32fc_32f_power_32fc_aligned16::suite());
-  s->addTest(qa_32f_power_aligned16::suite());
-  s->addTest(qa_32fc_atan2_32f_aligned16::suite());
-  s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite());
-  s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite());
-  s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite());
-  s->addTest(qa_32f_accumulator_aligned16::suite());
-  s->addTest(qa_32f_stddev_aligned16::suite());
-  s->addTest(qa_32f_stddev_and_mean_aligned16::suite());
-
-  return s;
-}
diff --git a/volk/lib/qa_volk.h b/volk/lib/qa_volk.h
deleted file mode 100644
index 43fa7faba..000000000
--- a/volk/lib/qa_volk.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Example Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Example Public License for more details.
- * 
- * You should have received a copy of the GNU Example Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_QA_VOLK_H
-#define INCLUDED_QA_VOLK_H
-
-#include <cppunit/TestSuite.h>
-
-//! collect all the tests for the example directory
-
-class qa_volk {
- public:
-  //! return suite of tests for all of example directory
-  static CppUnit::TestSuite *suite ();
-};
-
-#endif /* INCLUDED_QA_VOLK_H */
diff --git a/volk/lib/test_all.cc b/volk/lib/test_all.cc
deleted file mode 100644
index 50ac08eab..000000000
--- a/volk/lib/test_all.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2002,2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <cppunit/ui/text/TestRunner.h>
-#include <cppunit/TextTestRunner.h>
-
-#include <qa_volk.h>
-
-#include <cppunit/XmlOutputter.h>
-#include <iostream>
-#include <getopt.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string>
-#include <fstream>
-
-int 
-main (int argc, char **argv)
-{
-  
-  int opt = 0;
-  std::string xmlOutputFile("");
-
-  while( (opt = getopt(argc, argv, "o:")) != -1){
-    switch(opt){
-    case 'o':
-      if(optarg){
-	xmlOutputFile.assign(optarg);
-      }
-      else{
-	std::cerr << "No xml file output specified for -o" << std::endl;
-	exit(EXIT_FAILURE);
-      }
-      break;
-
-    default: /* '?' */
-      fprintf(stderr, "Usage: %s [-o] \"xml output file\"\n",
-	      argv[0]);
-      exit(EXIT_FAILURE);
-    }
-
-  }
-
-  CppUnit::TextUi::TestRunner runner;
-
-  runner.addTest (qa_volk::suite ());
-
-  bool was_successful = false;
-  if(!xmlOutputFile.empty()){
-    std::ofstream xmlOutput(xmlOutputFile.c_str());
-    if(xmlOutput.is_open()){
-      runner.setOutputter(new CppUnit::XmlOutputter(&runner.result(), xmlOutput));
-
-      was_successful = runner.run("", false, true, false);
-    }
-    xmlOutput.close();
-  }
-  else{
-    was_successful = runner.run ("", false);
-  }
-
-  return was_successful ? 0 : 1;
-}
-- 
cgit 


From f832c9789be9fec46e211be4fb2355013d19c000 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Fri, 21 Jan 2011 18:24:02 -0800
Subject: Volk: Small changes to speed things up.

---
 volk/lib/qa_utils.cc | 2 +-
 volk/lib/testqa.cc   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 67ce5ddef..9cafd459f 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -32,7 +32,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
         if(type.size == 8) random_floats<double>((double *)data, n);
         else random_floats<float>((float *)data, n);
     } else {
-        float int_max = pow(2, type.size*8);
+        float int_max = float(uint64_t(2) << (type.size*8));
         if(type.is_signed) int_max /= 2.0;
         for(int i=0; i<n; i++) {
             float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 9f4934dc0..4cef7b443 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -40,7 +40,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_index_max_16u_a16, 0, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a16, 1, 32768, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a16, 1, 32768, 2046, 10000);
-- 
cgit 


From b0a23e876fe0f92afb2c55fd4fbce6427e9598d8 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 25 Jan 2011 15:06:23 -0800
Subject: Volk: doesn't test a routine if no valid architectures other than
 generic are found

---
 volk/lib/qa_utils.cc | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 9cafd459f..6a6f87d85 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -288,6 +288,11 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     //first let's get a list of available architectures for the test
     std::vector<std::string> arch_list = get_arch_list(archs);
     
+    if(arch_list.size() < 2) {
+        std::cout << "no architectures to test" << std::endl;
+        return false;
+    }
+    
     //now we have to get a function signature by parsing the name
     std::vector<volk_type_t> inputsig, outputsig;
     get_signatures_from_name(inputsig, outputsig, name);
-- 
cgit 


From e979880d446949b2d2a93087011579c383369819 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Thu, 13 Jan 2011 18:57:48 +0000
Subject: Volk: QA util has proper free().

---
 volk/lib/qa_utils.cc | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 6a6f87d85..e85e2c1bc 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -309,10 +309,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
     //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
     std::vector<void *> inbuffs;
-
+    std::vector<void *> free_buffs; //this is just a list of void*'s that i'll have to free later.
+                                    //we need it because we dupe void*s in test_data below.
     make_buffer_for_signature(inbuffs, inputsig, vlen);
     for(int i=0; i<inbuffs.size(); i++) {
-        load_random_data(inbuffs[i], inputsig[i], vlen);        
+        load_random_data(inbuffs[i], inputsig[i], vlen);   
+        free_buffs.push_back(inbuffs[i]);
     }
     
     //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
@@ -321,6 +323,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         std::vector<void *> arch_buffs;
         for(int j=0; j<outputsig.size(); j++) {
             arch_buffs.push_back(make_aligned_buffer(vlen, outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
+            free_buffs.push_back(arch_buffs.back());
         }
         for(int j=0; j<inputsig.size(); j++) {
             arch_buffs.push_back(inbuffs[j]);
@@ -433,6 +436,11 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
             }
         }
     }
+
+    BOOST_FOREACH(void *buf, free_buffs) {
+        free(buf);
+    }
+
     return fail_global;
 }
 
-- 
cgit 


From 060df0d1fe23c07a0ba2f0242f22073dc62626c1 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Tue, 25 Jan 2011 16:28:21 -0800
Subject: Volk: uses m4 magic to find boost_unit_test_framework

---
 volk/lib/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index bbc993fa2..afd29a352 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -121,7 +121,7 @@ noinst_PROGRAMS = \
 
 testqa_SOURCES = testqa.cc qa_utils.cc
 testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN
-testqa_LDFLAGS = -lboost_unit_test_framework
+testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB)
 if LV_HAVE_ORC
 testqa_LDADD  = \
 	libvolk.la \
-- 
cgit 


From 81c3086bee1752c94a89ab2d20b7de048fdd1be7 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Fri, 14 Jan 2011 19:58:11 -0500
Subject: Cleans up the Makefiles for the various platforms. This should also
 make it easier to add new architectures. Thanks to Josh for the inspiration.

---
 volk/lib/Makefile.am | 74 +++++-----------------------------------------------
 1 file changed, 7 insertions(+), 67 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 814d438fd..896d568e6 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -1,5 +1,5 @@
 #
-# Copyright 2010 Free Software Foundation, Inc.
+# Copyright 2010,2011 Free Software Foundation, Inc.
 # 
 # This file is part of GNU Radio
 # 
@@ -61,77 +61,17 @@ EXTRA_DIST = \
 #                      The main library
 # ----------------------------------------------------------------
 
-universal_runtime_CODE = 	\
-	volk_runtime.c	\
-	volk_init.c \
+libvolk_runtime_la_SOURCES = 	\
+	$(platform_CODE) 	\
+	volk_runtime.c		\
+	volk_init.c 		\
 	volk_rank_archs.c	
 
-universal_CODE = 		\
+libvolk_la_SOURCES = 		\
+	$(platform_CODE) 	\
 	volk.c 			\
 	volk_environment_init.c
 
-generic_CODE = 		\
-	volk_cpu_generic.c
-
-x86_CODE = 		\
-	volk_cpu_x86.c
-
-x86_SUBCODE = 		\
-	cpuid_x86.S
-
-x86_64_SUBCODE = 	\
-	cpuid_x86_64.S
-
-powerpc_CODE = \
-	volk_cpu_powerpc.c
-
-
-if MD_CPU_generic
-libvolk_la_SOURCES =	\
-	$(generic_CODE)		\
-	$(universal_CODE)
-libvolk_runtime_la_SOURCES =	\
-	$(generic_CODE)		\
-	$(universal_runtime_CODE)
-
-endif
-
-if MD_CPU_x86
-if MD_SUBCPU_x86_64
-libvolk_la_SOURCES =	\
-	$(x86_CODE)		\
-	$(x86_64_SUBCODE)		\
-	$(universal_CODE) 		
-
-libvolk_runtime_la_SOURCES =	\
-	$(x86_CODE)		\
-	$(x86_64_SUBCODE)		\
-	$(universal_runtime_CODE) 		
-else
-libvolk_la_SOURCES =	\
-	$(x86_CODE)		\
-	$(x86_SUBCODE)	\
-	$(universal_CODE)
-
-libvolk_runtime_la_SOURCES =	\
-	$(x86_CODE)		\
-	$(x86_SUBCODE)	\
-	$(universal_runtime_CODE)
-endif
-endif
-
-
-if MD_CPU_powerpc
-libvolk_la_SOURCES =	\
-	$(powerpc_CODE)		\
-	$(universal_CODE)
-
-libvolk_runtime_la_SOURCES =	\
-	$(powerpc_CODE)		\
-	$(universal_runtime_CODE)
-endif
-
-
 
 libvolk_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0 
 libvolk_runtime_la_LDFLAGS = $(NO_UNDEFINED) -version-info 0:0:0
-- 
cgit 


From 9edf280fb25bee071c166123ac1aee41d4a4949e Mon Sep 17 00:00:00 2001
From: Josh Blum
Date: Sat, 15 Jan 2011 17:29:15 -0800
Subject: volk: replace assembly and separate cases with gcc cpuid for all x86

---
 volk/lib/Makefile.am     |   2 +-
 volk/lib/assembly.h      |  67 ------------------
 volk/lib/cpuid_x86.S     |  60 ----------------
 volk/lib/cpuid_x86_64.S  |  54 --------------
 volk/lib/gcc_x86_cpuid.h | 178 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 179 insertions(+), 182 deletions(-)
 delete mode 100644 volk/lib/assembly.h
 delete mode 100644 volk/lib/cpuid_x86.S
 delete mode 100644 volk/lib/cpuid_x86_64.S
 create mode 100644 volk/lib/gcc_x86_cpuid.h

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 896d568e6..7a355e86a 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -186,7 +186,7 @@ libvolk_qa_la_LIBADD = \
 noinst_HEADERS = \
 	volk_init.h \
 	qa_volk.h \
-	assembly.h \
+	gcc_x86_cpuid.h \
 	qa_16s_quad_max_star_aligned16.h \
 	qa_32fc_dot_prod_aligned16.h \
 	qa_32fc_square_dist_aligned16.h \
diff --git a/volk/lib/assembly.h b/volk/lib/assembly.h
deleted file mode 100644
index 8a99aa07c..000000000
--- a/volk/lib/assembly.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2002 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef _ASSEMBLY_H_
-#define _ASSEMBLY_H_
-
-#if defined (__APPLE__) && defined (__APPLE_CC__)
-
-// XCode ignores the .scl and .type functions in XCode 2.2.1 and 2.3,
-// but creates an error in XCode 2.4.  Just ignore them.
-
-#define GLOB_SYMB(f)    _ ## f
-
-#define DEF_FUNC_HEAD(f)  /* none */
-
-#define FUNC_TAIL(f)    /* none*/
-
-#elif !defined (__ELF__)
-
-/*
- * Too bad, the following define does not work as expected --SF
- * 	#define GLOB_SYMB(f)	__USER_LABEL_PREFIX__ ## f
- */
-#define GLOB_SYMB(f)	_ ## f
-
-#define DEF_FUNC_HEAD(f)	\
-	.def	GLOB_SYMB(f); .scl 2; .type 32; .endef
-
-#define FUNC_TAIL(f)	/* none */
-
-
-#else	/* !__ELF__ */
-
-
-#define GLOB_SYMB(f)	f
-
-#define DEF_FUNC_HEAD(f)	\
-	.type	GLOB_SYMB(f),@function	\
-
-#define FUNC_TAIL(f)	\
-  .Lfe1:		\
-	.size	GLOB_SYMB(f),.Lfe1-GLOB_SYMB(f)
-
-
-#endif	/* !__ELF__ */
-
-
-#endif /* _ASSEMBLY_H_ */
diff --git a/volk/lib/cpuid_x86.S b/volk/lib/cpuid_x86.S
deleted file mode 100644
index 4e1a9404f..000000000
--- a/volk/lib/cpuid_x86.S
+++ /dev/null
@@ -1,60 +0,0 @@
-#	
-# Copyright 2003 Free Software Foundation, Inc.
-# 
-# This file is part of GNU Radio
-# 
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-# 
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-# 
-
-#
-# execute CPUID instruction, return EAX, EBX, ECX and EDX values in result
-#
-#  void cpuid_x86 (unsigned int op, unsigned int result[4]);
-#
-
-#include "assembly.h"
-
-.file "cpuid_x86.S"
-	.version	"01.01"
-.text
-.globl	GLOB_SYMB(cpuid_x86)
-	DEF_FUNC_HEAD(cpuid_x86)
-GLOB_SYMB(cpuid_x86):
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx		# must save in PIC mode, holds GOT pointer
-	pushl	%esi
-	
-	movl	8(%ebp), %eax	# op
-	movl	12(%ebp), %esi	# result
-	cpuid
-	movl	%eax, 0(%esi)
-	movl	%ebx, 4(%esi)
-	movl	%ecx, 8(%esi)
-	movl	%edx, 12(%esi)
-	
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-
-FUNC_TAIL(cpuid_x86)
-	.ident	"Hand coded cpuid assembly"
-	
-
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/volk/lib/cpuid_x86_64.S b/volk/lib/cpuid_x86_64.S
deleted file mode 100644
index 32b1847cd..000000000
--- a/volk/lib/cpuid_x86_64.S
+++ /dev/null
@@ -1,54 +0,0 @@
-#	
-# Copyright 2003,2005 Free Software Foundation, Inc.
-# 
-# This file is part of GNU Radio
-# 
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-# 
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-# 
-
-#
-# execute CPUID instruction, return EAX, EBX, ECX and EDX values in result
-#
-#  void cpuid_x86 (unsigned int op, unsigned int result[4]);
-#
-
-#include "assembly.h"
-
-.file "cpuid_x86_64.S"
-	.version	"01.01"
-.text
-.globl	GLOB_SYMB(cpuid_x86)
-	DEF_FUNC_HEAD(cpuid_x86)
-GLOB_SYMB(cpuid_x86):
-	mov	%rbx, %r11	# must save in PIC mode, holds GOT pointer
-	
-	mov	%rdi, %rax	# op
-	cpuid
-	movl	%eax, 0(%rsi)	# result
-	movl	%ebx, 4(%rsi)
-	movl	%ecx, 8(%rsi)
-	movl	%edx, 12(%rsi)
-	
-	mov	%r11, %rbx
-	retq
-
-FUNC_TAIL(cpuid_x86)
-	.ident	"Hand coded cpuid64 assembly"
-	
-
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h
new file mode 100644
index 000000000..2d0916fb3
--- /dev/null
+++ b/volk/lib/gcc_x86_cpuid.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* %ecx */
+#define bit_SSE3	(1 << 0)
+#define bit_PCLMUL	(1 << 1)
+#define bit_SSSE3	(1 << 9)
+#define bit_FMA		(1 << 12)
+#define bit_CMPXCHG16B	(1 << 13)
+#define bit_SSE4_1	(1 << 19)
+#define bit_SSE4_2	(1 << 20)
+#define bit_MOVBE	(1 << 22)
+#define bit_POPCNT	(1 << 23)
+#define bit_AES		(1 << 25)
+#define bit_XSAVE	(1 << 26)
+#define bit_OSXSAVE	(1 << 27)
+#define bit_AVX		(1 << 28)
+
+/* %edx */
+#define bit_CMPXCHG8B	(1 << 8)
+#define bit_CMOV	(1 << 15)
+#define bit_MMX		(1 << 23)
+#define bit_FXSAVE	(1 << 24)
+#define bit_SSE		(1 << 25)
+#define bit_SSE2	(1 << 26)
+
+/* Extended Features */
+/* %ecx */
+#define bit_LAHF_LM	(1 << 0)
+#define bit_SSE4a	(1 << 6)
+#define bit_SSE5	(1 << 11)
+
+/* %edx */
+#define bit_LM		(1 << 29)
+#define bit_3DNOWP	(1 << 30)
+#define bit_3DNOW	(1 << 31)
+
+
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx may be the PIC register.  */
+#if __GNUC__ >= 3
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchgl\t%%ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchgl\t%%ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+#else
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+
+/* Return highest supported input value for cpuid instruction.  ext can
+   be either 0x0 or 0x8000000 to return highest supported value for
+   basic or extended cpuid information.  Function returns 0 if cpuid
+   is not supported or whatever cpuid returns in eax register.  If sig
+   pointer is non-null, then first four bytes of the signature
+   (as found in ebx register) are returned in location pointed by sig.  */
+
+static __inline unsigned int
+__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
+{
+  unsigned int __eax, __ebx, __ecx, __edx;
+
+#ifndef __x86_64__
+#if __GNUC__ >= 3
+  /* See if we can use cpuid.  On AMD64 we always can.  */
+  __asm__ ("pushf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "mov{l}\t{%0, %1|%1, %0}\n\t"
+	   "xor{l}\t{%2, %0|%0, %2}\n\t"
+	   "push{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+  __asm__ ("pushfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "movl\t%0, %1\n\t"
+	   "xorl\t%2, %0\n\t"
+	   "pushl\t%0\n\t"
+	   "popfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "popfl\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#endif
+
+  if (!((__eax ^ __ebx) & 0x00200000))
+    return 0;
+#endif
+
+  /* Host supports cpuid.  Return highest supported cpuid input value.  */
+  __cpuid (__ext, __eax, __ebx, __ecx, __edx);
+
+  if (__sig)
+    *__sig = __ebx;
+
+  return __eax;
+}
+
+/* Return cpuid data for requested cpuid level, as found in returned
+   eax, ebx, ecx and edx registers.  The function checks if cpuid is
+   supported and returns 1 for valid cpuid information or 0 for
+   unsupported cpuid level.  All pointers are required to be non-null.  */
+
+static __inline int
+__get_cpuid (unsigned int __level,
+	     unsigned int *__eax, unsigned int *__ebx,
+	     unsigned int *__ecx, unsigned int *__edx)
+{
+  unsigned int __ext = __level & 0x80000000;
+
+  if (__get_cpuid_max (__ext, 0) < __level)
+    return 0;
+
+  __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
+  return 1;
+}
-- 
cgit 


From 108a594c0838ad21f93cba6597d1f66af097b157 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 25 Jan 2011 10:37:49 -0500
Subject: volk: New volk kernel for conjugate dot products with unaligned
 buffers.

Note: need to convert this to new naming standard.
---
 volk/lib/Makefile.am                             |   2 +
 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc | 138 +++++++++++++++++++++++
 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h  |  18 +++
 volk/lib/qa_volk.cc                              |   2 +
 4 files changed, 160 insertions(+)
 create mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
 create mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 7a355e86a..beb815e63 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -93,6 +93,7 @@ libvolk_qa_la_SOURCES = \
 	qa_32fc_index_max_aligned16.cc \
 	qa_32f_index_max_aligned16.cc \
 	qa_32fc_conjugate_dot_prod_aligned16.cc \
+	qa_32fc_conjugate_dot_prod_unaligned.cc \
 	qa_16s_permute_and_scalar_add_aligned16.cc \
 	qa_16s_branch_4_state_8_aligned16.cc \
 	qa_16s_max_star_horizontal_aligned16.cc \
@@ -195,6 +196,7 @@ noinst_HEADERS = \
 	qa_32fc_index_max_aligned16.h \
 	qa_32f_index_max_aligned16.h \
 	qa_32fc_conjugate_dot_prod_aligned16.h \
+	qa_32fc_conjugate_dot_prod_unaligned.h \
 	qa_16s_permute_and_scalar_add_aligned16.h \
 	qa_16s_branch_4_state_8_aligned16.h \
 	qa_16s_max_star_horizontal_aligned16.h \
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
new file mode 100644
index 000000000..a0680bab6
--- /dev/null
+++ b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
@@ -0,0 +1,138 @@
+#include <volk/volk.h>
+#include <qa_32fc_conjugate_dot_prod_unaligned.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+
+#if LV_HAVE_SSE && LV_HAVE_64
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_conjugate_dot_prod_unaligned::t1() {
+  const int vlen = 789743;
+  
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse");
+
+  printf("32fc_conjugate_dot_prod_unaligned\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#elif LV_HAVE_SSE && LV_HAVE_32
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_conjugate_dot_prod_unaligned::t1() {
+  const int vlen = 789743;
+  
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse_32");
+
+  printf("32fc_conjugate_dot_prod_unaligned\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#else
+
+void qa_32fc_conjugate_dot_prod_unaligned::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
new file mode 100644
index 000000000..7aead53a1
--- /dev/null
+++ b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H
+#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_conjugate_dot_prod_unaligned : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_unaligned);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H */
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
index c3c27b69b..98d3e9728 100644
--- a/volk/lib/qa_volk.cc
+++ b/volk/lib/qa_volk.cc
@@ -34,6 +34,7 @@
 #include <qa_32fc_index_max_aligned16.h>
 #include <qa_32f_index_max_aligned16.h>
 #include <qa_32fc_conjugate_dot_prod_aligned16.h>
+#include <qa_32fc_conjugate_dot_prod_unaligned.h>
 #include <qa_16s_permute_and_scalar_add_aligned16.h>
 #include <qa_16s_branch_4_state_8_aligned16.h>
 #include <qa_16s_max_star_horizontal_aligned16.h>
@@ -127,6 +128,7 @@ qa_volk::suite()
   s->addTest(qa_32fc_index_max_aligned16::suite());
   s->addTest(qa_32f_index_max_aligned16::suite());
   s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite());
+  s->addTest(qa_32fc_conjugate_dot_prod_unaligned::suite());
   s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite());
   s->addTest(qa_16s_branch_4_state_8_aligned16::suite());
   s->addTest(qa_16s_max_star_horizontal_aligned16::suite());
-- 
cgit 


From 023167ca8a85ab597f9e59302733f71809a8afbd Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Tue, 25 Jan 2011 21:36:01 -0500
Subject: volk: Adding explicit links to local volk libraries. Required to
 prevent breakage when adding new volk kernels.

---
 volk/lib/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index beb815e63..446ff574f 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -285,7 +285,7 @@ noinst_PROGRAMS = \
 	test_all
 
 test_all_SOURCES = test_all.cc
-test_all_LDADD   = libvolk_qa.la
+test_all_LDADD   = libvolk.la libvolk_runtime.la libvolk_qa.la
 
 
 distclean-local: 
-- 
cgit 


From 2a4c4f89187bf75caa34c7bc52fc32310a75c9f2 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 26 Jan 2011 15:28:35 -0800
Subject: Volk: fixed volk_8i_s32f_convert_32f_a16_orc_impl.

---
 volk/lib/testqa.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 4cef7b443..d6b9e347d 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -49,8 +49,8 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 0, 128, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 0, 128, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a16, 1, 128, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a16, 1e-4, 0, 2046, 10000);
@@ -60,7 +60,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 2046, 10000);
 //    VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a16, 1e-4, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_index_max_16u_a16, 0, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 0, 32768, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a16, 1, 32768, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a16, 0, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_max_32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_min_32f_a16, 1e-4, 0, 2046, 10000);
-- 
cgit 


From 5ebd9ef2580aa36cd3a636c6257bd4b80b2380f8 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 26 Jan 2011 15:44:40 -0800
Subject: Volk: find built headers instead of installed ones

---
 volk/lib/Makefile.am | 2 +-
 volk/lib/testqa.cc   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index afd29a352..6f3d7fd86 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -120,7 +120,7 @@ noinst_PROGRAMS = \
 	testqa
 
 testqa_SOURCES = testqa.cc qa_utils.cc
-testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN
+testqa_CPPFLAGS = -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN $(AM_CPPFLAGS)
 testqa_LDFLAGS = $(BOOST_UNIT_TEST_FRAMEWORK_LIB)
 if LV_HAVE_ORC
 testqa_LDADD  = \
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index d6b9e347d..e9734411b 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -1,6 +1,6 @@
 #include "qa_utils.h"
-#include "../include/volk/volk.h"
-#include "../include/volk/volk_registry.h"
+#include <volk/volk.h>
+#include <volk/volk_registry.h>
 #include <boost/test/unit_test.hpp>
 
 BOOST_AUTO_TEST_CASE(volk_test_all) {    
-- 
cgit 


From e34a484084a5224ec3412bd7d6c6f285301f5d43 Mon Sep 17 00:00:00 2001
From: Nick Foster
Date: Wed, 26 Jan 2011 15:47:56 -0800
Subject: Volk: renamed volk_32fc_32f_power_32fc_a16 to
 volk_32fc_s32f_power_32fc_a16

---
 volk/lib/testqa.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'volk/lib')

diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index e9734411b..f33670856 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -29,7 +29,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32f_x2_add_32f_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a16, 1e-4, 0, 2046, 10000);
-    VOLK_RUN_TESTS(volk_32fc_32f_power_32fc_a16, 1e-4, 0, 2046, 1000);
+    VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a16, 1e-4, 0, 2046, 1000);
     VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
-- 
cgit 


From 6503e3b21978b71908400c994148836bec4a97b9 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Sun, 30 Jan 2011 12:35:07 -0500
Subject: volk: Updating build structure to work when orc is not installed.

Distcheck passes for me if liborc is installed or not.
---
 volk/lib/Makefile.am | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index 6f3d7fd86..af7c7f335 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -45,7 +45,7 @@ AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) \
 
 
 # list of programs run by "make check" and "make distcheck"
-TESTS = testqa
+#TESTS = testqa
 #orc stuff gets built in the ORC directory conditional to ORC being enabled.
 #it gets linked in during the build of libvolk as an added library.
 #there might be a better way to do this.
@@ -77,7 +77,7 @@ libvolk_la_SOURCES = 		\
 volk_orc_LDFLAGS = \
 	$(ORC_LDFLAGS) \
 	-lorc-0.4
-	
+
 volk_orc_LIBADD = \
 	../orc/libvolk_orc.la
 
@@ -103,7 +103,6 @@ endif
 #libvolk_qa_la_LIBADD = \
 #	libvolk.la \
 #	libvolk_runtime.la
-	
 
 # ----------------------------------------------------------------
 # headers that don't get installed
-- 
cgit 


From 736874202f15222fa3ec10ceeb1815e8a595ed3a Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 2 Feb 2011 13:55:15 -0500
Subject: volk: cleaning up makefile issues after merge.

---
 volk/lib/Makefile.am | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/Makefile.am b/volk/lib/Makefile.am
index af7c7f335..3e5502369 100644
--- a/volk/lib/Makefile.am
+++ b/volk/lib/Makefile.am
@@ -57,7 +57,8 @@ lib_LTLIBRARIES = \
 EXTRA_DIST = \
 	volk_mktables.c		\
 	volk_rank_archs.h 	\
-	volk_proccpu_sim.c
+	volk_proccpu_sim.c	\
+	gcc_x86_cpuid.h
 
 # ----------------------------------------------------------------
 #                      The main library
@@ -109,8 +110,7 @@ endif
 # ----------------------------------------------------------------
 noinst_HEADERS = \
 	volk_init.h \
-	qa_utils.h \
-	assembly.h
+	qa_utils.h
 
 # ----------------------------------------------------------------
 # Our test program
-- 
cgit 


From b806f6e95cd917e54884841c8e7928204ecd78f8 Mon Sep 17 00:00:00 2001
From: Tom Rondeau
Date: Wed, 2 Feb 2011 14:21:46 -0500
Subject: volk: updating to readd unaligned dot product under new name scheme.

---
 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc | 138 ---------------
 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h  |  18 --
 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc | 138 +++++++++++++++
 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h  |  18 ++
 volk/lib/qa_volk.cc                              | 213 -----------------------
 volk/lib/testqa.cc                               |   1 +
 6 files changed, 157 insertions(+), 369 deletions(-)
 delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
 delete mode 100644 volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
 create mode 100644 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
 create mode 100644 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h
 delete mode 100644 volk/lib/qa_volk.cc

(limited to 'volk/lib')

diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
deleted file mode 100644
index a0680bab6..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_conjugate_dot_prod_unaligned.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-#define assertcomplexEqual(expected, actual, delta)			\
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
-
-#define	ERR_DELTA	(1e-4)
-
-//test for sse
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_unaligned::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse");
-
-  printf("32fc_conjugate_dot_prod_unaligned\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#elif LV_HAVE_SSE && LV_HAVE_32
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_conjugate_dot_prod_unaligned::t1() {
-  const int vlen = 789743;
-  
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-  
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-  
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-  
-  
-
-  volk_32fc_conjugate_dot_prod_unaligned_manual(result_generic, input, taps, vlen * 8,  "generic");
-
-  
-  volk_32fc_conjugate_dot_prod_unaligned_manual(result, input, taps, vlen * 8, "sse_32");
-
-  printf("32fc_conjugate_dot_prod_unaligned\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-  
-}
-
-
-#else
-
-void qa_32fc_conjugate_dot_prod_unaligned::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h b/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
deleted file mode 100644
index 7aead53a1..000000000
--- a/volk/lib/qa_32fc_conjugate_dot_prod_unaligned.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H
-#define INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_conjugate_dot_prod_unaligned : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_conjugate_dot_prod_unaligned);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_CONJUGATE_DOT_PROD_UNALIGNED_H */
diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
new file mode 100644
index 000000000..fefdf06ee
--- /dev/null
+++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
@@ -0,0 +1,138 @@
+#include <volk/volk.h>
+#include <qa_32fc_x2_conjugate_dot_prod_32fc_u.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+
+#define assertcomplexEqual(expected, actual, delta)			\
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
+  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);	
+
+#define	ERR_DELTA	(1e-4)
+
+//test for sse
+
+#if LV_HAVE_SSE && LV_HAVE_64
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
+  const int vlen = 789743;
+
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse");
+
+  printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#elif LV_HAVE_SSE && LV_HAVE_32
+
+static float uniform() {
+  return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
+}
+
+static void
+random_floats (float *buf, unsigned n)
+{
+  for (unsigned i = 0; i < n; i++)
+    buf[i] = uniform () * 32767;
+}
+
+
+void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
+  const int vlen = 789743;
+  
+  volk_environment_init();
+  int ret;
+
+  std::complex<float>* input;
+  std::complex<float>* taps;
+  
+  std::complex<float>* result_generic;
+  std::complex<float>* result;
+
+  ret = posix_memalign((void**)&input, 16, vlen << 3);
+  ret = posix_memalign((void**)&taps, 16, vlen << 3);
+  ret = posix_memalign((void**)&result_generic, 16, 8);
+  ret = posix_memalign((void**)&result, 16, 8);
+  
+
+  result_generic[0] = std::complex<float>(0,0);
+  result[0] = std::complex<float>(0,0);
+
+  random_floats((float*)input, vlen * 2);
+  random_floats((float*)taps, vlen * 2);
+  
+  
+
+  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8,  "generic");
+
+  
+  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse_32");
+
+  printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
+  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
+
+  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
+
+  free(input);
+  free(taps);
+  free(result_generic);
+  free(result);
+  
+}
+
+
+#else
+
+void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
+  printf("sse not available... no test performed\n");
+}
+
+#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h
new file mode 100644
index 000000000..f07402403
--- /dev/null
+++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h
@@ -0,0 +1,18 @@
+#ifndef INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H
+#define INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestCase.h>
+
+class qa_32fc_x2_conjugate_dot_prod_32fc_u : public CppUnit::TestCase {
+
+  CPPUNIT_TEST_SUITE (qa_32fc_x2_conjugate_dot_prod_32fc_u);
+  CPPUNIT_TEST (t1);
+  CPPUNIT_TEST_SUITE_END ();
+
+ private:
+  void t1 ();
+};
+
+
+#endif /* INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H */
diff --git a/volk/lib/qa_volk.cc b/volk/lib/qa_volk.cc
deleted file mode 100644
index 98d3e9728..000000000
--- a/volk/lib/qa_volk.cc
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- * 
- * This file is part of GNU Radio
- * 
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- * 
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-/*
- * This class gathers together all the test cases for the example
- * directory into a single test suite.  As you create new test cases,
- * add them here.
- */
-
-#include <qa_volk.h>
-#include <qa_16s_quad_max_star_aligned16.h>
-#include <qa_32fc_dot_prod_aligned16.h>
-#include <qa_32fc_square_dist_aligned16.h>
-#include <qa_32fc_square_dist_scalar_mult_aligned16.h>
-#include <qa_32f_sum_of_poly_aligned16.h>
-#include <qa_32fc_index_max_aligned16.h>
-#include <qa_32f_index_max_aligned16.h>
-#include <qa_32fc_conjugate_dot_prod_aligned16.h>
-#include <qa_32fc_conjugate_dot_prod_unaligned.h>
-#include <qa_16s_permute_and_scalar_add_aligned16.h>
-#include <qa_16s_branch_4_state_8_aligned16.h>
-#include <qa_16s_max_star_horizontal_aligned16.h>
-#include <qa_16s_max_star_aligned16.h>
-#include <qa_16s_add_quad_aligned16.h>
-#include <qa_32f_add_aligned16.h>
-#include <qa_32f_subtract_aligned16.h>
-#include <qa_32f_max_aligned16.h>
-#include <qa_32f_min_aligned16.h>
-#include <qa_64f_max_aligned16.h>
-#include <qa_64f_min_aligned16.h>
-#include <qa_32s_and_aligned16.h>
-#include <qa_32s_or_aligned16.h>
-#include <qa_32f_dot_prod_aligned16.h>
-#include <qa_32f_dot_prod_unaligned16.h>
-#include <qa_32f_fm_detect_aligned16.h>
-#include <qa_32fc_32f_multiply_aligned16.h>
-#include <qa_32fc_multiply_aligned16.h>
-#include <qa_32f_divide_aligned16.h>
-#include <qa_32f_multiply_aligned16.h>
-#include <qa_32f_sqrt_aligned16.h>
-#include <qa_8sc_multiply_conjugate_16sc_aligned16.h>
-#include <qa_8sc_multiply_conjugate_32fc_aligned16.h>
-#include <qa_32u_popcnt_aligned16.h>
-#include <qa_64u_popcnt_aligned16.h>
-#include <qa_16u_byteswap_aligned16.h>
-#include <qa_32u_byteswap_aligned16.h>
-#include <qa_64u_byteswap_aligned16.h>
-#include <qa_32f_normalize_aligned16.h>
-#include <qa_16sc_deinterleave_16s_aligned16.h>
-#include <qa_16sc_deinterleave_32f_aligned16.h>
-#include <qa_16sc_deinterleave_real_16s_aligned16.h>
-#include <qa_16sc_deinterleave_real_32f_aligned16.h>
-#include <qa_16sc_deinterleave_real_8s_aligned16.h>
-#include <qa_16sc_magnitude_16s_aligned16.h>
-#include <qa_16sc_magnitude_32f_aligned16.h>
-#include <qa_32fc_deinterleave_32f_aligned16.h>
-#include <qa_32fc_deinterleave_64f_aligned16.h>
-#include <qa_32fc_deinterleave_real_16s_aligned16.h>
-#include <qa_32fc_deinterleave_real_32f_aligned16.h>
-#include <qa_32fc_deinterleave_real_64f_aligned16.h>
-#include <qa_32fc_magnitude_16s_aligned16.h>
-#include <qa_32fc_magnitude_32f_aligned16.h>
-#include <qa_32f_interleave_16sc_aligned16.h>
-#include <qa_32f_interleave_32fc_aligned16.h>
-#include <qa_8sc_deinterleave_16s_aligned16.h>
-#include <qa_8sc_deinterleave_32f_aligned16.h>
-#include <qa_8sc_deinterleave_real_16s_aligned16.h>
-#include <qa_8sc_deinterleave_real_32f_aligned16.h>
-#include <qa_8sc_deinterleave_real_8s_aligned16.h>
-#include <qa_16s_convert_32f_aligned16.h>
-#include <qa_16s_convert_32f_unaligned16.h>
-#include <qa_16s_convert_8s_aligned16.h>
-#include <qa_16s_convert_8s_unaligned16.h>
-#include <qa_32f_convert_16s_aligned16.h>
-#include <qa_32f_convert_16s_unaligned16.h>
-#include <qa_32f_convert_32s_aligned16.h>
-#include <qa_32f_convert_32s_unaligned16.h>
-#include <qa_32f_convert_64f_aligned16.h>
-#include <qa_32f_convert_64f_unaligned16.h>
-#include <qa_32f_convert_8s_aligned16.h>
-#include <qa_32f_convert_8s_unaligned16.h>
-#include <qa_32s_convert_32f_aligned16.h>
-#include <qa_32s_convert_32f_unaligned16.h>
-#include <qa_64f_convert_32f_aligned16.h>
-#include <qa_64f_convert_32f_unaligned16.h>
-#include <qa_8s_convert_16s_aligned16.h>
-#include <qa_8s_convert_16s_unaligned16.h>
-#include <qa_8s_convert_32f_aligned16.h>
-#include <qa_8s_convert_32f_unaligned16.h>
-#include <qa_32fc_32f_power_32fc_aligned16.h>
-#include <qa_32f_power_aligned16.h>
-#include <qa_32fc_atan2_32f_aligned16.h>
-#include <qa_32fc_power_spectral_density_32f_aligned16.h> 
-#include <qa_32fc_power_spectrum_32f_aligned16.h>
-#include <qa_32f_calc_spectral_noise_floor_aligned16.h>
-#include <qa_32f_accumulator_aligned16.h>
-#include <qa_32f_stddev_aligned16.h>
-#include <qa_32f_stddev_and_mean_aligned16.h>
-
-CppUnit::TestSuite *
-qa_volk::suite()
-{
-  CppUnit::TestSuite *s = new CppUnit::TestSuite("volk");
-
-  s->addTest(qa_16s_quad_max_star_aligned16::suite());
-  s->addTest(qa_32fc_dot_prod_aligned16::suite());
-  s->addTest(qa_32fc_square_dist_scalar_mult_aligned16::suite());
-  s->addTest(qa_32fc_square_dist_aligned16::suite());
-  s->addTest(qa_32f_sum_of_poly_aligned16::suite());
-  s->addTest(qa_32fc_index_max_aligned16::suite());
-  s->addTest(qa_32f_index_max_aligned16::suite());
-  s->addTest(qa_32fc_conjugate_dot_prod_aligned16::suite());
-  s->addTest(qa_32fc_conjugate_dot_prod_unaligned::suite());
-  s->addTest(qa_16s_permute_and_scalar_add_aligned16::suite());
-  s->addTest(qa_16s_branch_4_state_8_aligned16::suite());
-  s->addTest(qa_16s_max_star_horizontal_aligned16::suite());
-  s->addTest(qa_16s_max_star_aligned16::suite());
-  s->addTest(qa_16s_add_quad_aligned16::suite());
-  s->addTest(qa_32f_add_aligned16::suite());
-  s->addTest(qa_32f_subtract_aligned16::suite());
-  s->addTest(qa_32f_max_aligned16::suite());
-  s->addTest(qa_32f_min_aligned16::suite());
-  s->addTest(qa_64f_max_aligned16::suite());
-  s->addTest(qa_64f_min_aligned16::suite());
-  s->addTest(qa_32s_and_aligned16::suite());
-  s->addTest(qa_32s_or_aligned16::suite());
-  s->addTest(qa_32f_dot_prod_aligned16::suite());
-  s->addTest(qa_32f_dot_prod_unaligned16::suite());
-  s->addTest(qa_32f_fm_detect_aligned16::suite());
-  s->addTest(qa_32fc_32f_multiply_aligned16::suite());
-  s->addTest(qa_32fc_multiply_aligned16::suite());
-  s->addTest(qa_32f_divide_aligned16::suite());
-  s->addTest(qa_32f_multiply_aligned16::suite());
-  s->addTest(qa_32f_sqrt_aligned16::suite());
-  s->addTest(qa_8sc_multiply_conjugate_16sc_aligned16::suite());
-  s->addTest(qa_8sc_multiply_conjugate_32fc_aligned16::suite());
-  s->addTest(qa_32u_popcnt_aligned16::suite());
-  s->addTest(qa_64u_popcnt_aligned16::suite());
-  s->addTest(qa_16u_byteswap_aligned16::suite());
-  s->addTest(qa_32u_byteswap_aligned16::suite());
-  s->addTest(qa_64u_byteswap_aligned16::suite());
-  s->addTest(qa_32f_normalize_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_16s_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_16sc_deinterleave_real_8s_aligned16::suite());
-  s->addTest(qa_16sc_magnitude_16s_aligned16::suite());
-  s->addTest(qa_16sc_magnitude_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_64f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_32fc_deinterleave_real_64f_aligned16::suite());
-  s->addTest(qa_32fc_magnitude_16s_aligned16::suite());
-  s->addTest(qa_32fc_magnitude_32f_aligned16::suite());
-  s->addTest(qa_32f_interleave_16sc_aligned16::suite());
-  s->addTest(qa_32f_interleave_32fc_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_16s_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_32f_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_16s_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_32f_aligned16::suite());
-  s->addTest(qa_8sc_deinterleave_real_8s_aligned16::suite());
-  s->addTest(qa_16s_convert_32f_aligned16::suite());
-  s->addTest(qa_16s_convert_32f_unaligned16::suite());
-  s->addTest(qa_16s_convert_8s_aligned16::suite());
-  s->addTest(qa_16s_convert_8s_unaligned16::suite());
-  s->addTest(qa_32f_convert_16s_aligned16::suite());
-  s->addTest(qa_32f_convert_16s_unaligned16::suite());
-  s->addTest(qa_32f_convert_32s_aligned16::suite());
-  s->addTest(qa_32f_convert_32s_unaligned16::suite());
-  s->addTest(qa_32f_convert_64f_aligned16::suite());
-  s->addTest(qa_32f_convert_64f_unaligned16::suite());
-  s->addTest(qa_32f_convert_8s_aligned16::suite());
-  s->addTest(qa_32f_convert_8s_unaligned16::suite());
-  s->addTest(qa_32s_convert_32f_aligned16::suite());
-  s->addTest(qa_32s_convert_32f_unaligned16::suite());
-  s->addTest(qa_64f_convert_32f_aligned16::suite());
-  s->addTest(qa_64f_convert_32f_unaligned16::suite());
-  s->addTest(qa_8s_convert_16s_aligned16::suite());
-  s->addTest(qa_8s_convert_16s_unaligned16::suite());
-  s->addTest(qa_8s_convert_32f_aligned16::suite());
-  s->addTest(qa_8s_convert_32f_unaligned16::suite());
-  s->addTest(qa_32fc_32f_power_32fc_aligned16::suite());
-  s->addTest(qa_32f_power_aligned16::suite());
-  s->addTest(qa_32fc_atan2_32f_aligned16::suite());
-  s->addTest(qa_32fc_power_spectral_density_32f_aligned16::suite());
-  s->addTest(qa_32fc_power_spectrum_32f_aligned16::suite());
-  s->addTest(qa_32f_calc_spectral_noise_floor_aligned16::suite());
-  s->addTest(qa_32f_accumulator_aligned16::suite());
-  s->addTest(qa_32f_stddev_aligned16::suite());
-  s->addTest(qa_32f_stddev_and_mean_aligned16::suite());
-
-  return s;
-}
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index f33670856..779bc61eb 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -33,6 +33,7 @@ BOOST_AUTO_TEST_CASE(volk_test_all) {
     VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a16, 1e-4, 20.0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a16, 1e-4, 10.0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a16, 1e-4, 0, 2046, 10000);
+    VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a16, 1e-4, 0, 2046, 10000);
     VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a16, 0, 32768, 2046, 10000);
-- 
cgit 


From b013372e7e02461bf5e67845b333030eee164bea Mon Sep 17 00:00:00 2001
From: Josh Blum
Date: Tue, 8 Mar 2011 16:33:17 -0800
Subject: volk: replace posix_memalign with something cross platform

---
 volk/lib/qa_utils.cc | 57 ++++++++++++++++++++++++----------------------------
 volk/lib/qa_utils.h  |  2 +-
 2 files changed, 27 insertions(+), 32 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index e85e2c1bc..710d56fb8 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -1,19 +1,20 @@
 #include "qa_utils.h"
-#include <stdlib.h>
+#include <cstring>
 #include <boost/foreach.hpp>
 #include <boost/assign/list_of.hpp>
 #include <boost/tokenizer.hpp>
 //#include <boost/test/unit_test.hpp>
 #include <iostream>
 #include <vector>
-#include <time.h>
-#include <math.h>
+#include <ctime>
+#include <cmath>
 #include <boost/lexical_cast.hpp>
 //#include <volk/volk_runtime.h>
 #include <volk/volk_registry.h>
 #include <volk/volk.h>
 #include <boost/typeof/typeof.hpp>
 #include <boost/type_traits.hpp>
+#include <boost/shared_array.hpp>
 
 float uniform() {
   return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
@@ -61,22 +62,6 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
     }
 }
 
-void *make_aligned_buffer(unsigned int len, unsigned int size) {
-  void *buf;
-  int ret;
-  ret = posix_memalign((void**)&buf, 16, len * size);
-  assert(ret == 0);
-  memset(buf, 0x00, len*size);
-  return buf;
-}
-
-void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<volk_type_t> inputsig, unsigned int vlen) {
-    BOOST_FOREACH(volk_type_t sig, inputsig) {
-        if(!sig.is_scalar) //we don't make buffers for scalars
-          buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1)));
-    }
-}
-
 static std::vector<std::string> get_arch_list(const int archs[]) {
     std::vector<std::string> archlist;
     int num_archs = archs[0];
@@ -282,6 +267,18 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
     return fail;
 }
 
+class volk_qa_aligned_mem_pool{
+public:
+    void *get_new(size_t size, size_t alignment = 16){
+        boost::shared_array<char> mem(new char[size + alignment-1]);
+        size_t ptr = size_t(mem.get() + alignment-1) & ~(alignment-1);
+        std::memset((void *)ptr, 0x00, size);
+        _mems.push_back(mem);
+        return (void *)ptr;
+    }
+private: std::vector<boost::shared_array<char> > _mems;
+};
+
 bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) {
     std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
     
@@ -292,7 +289,10 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         std::cout << "no architectures to test" << std::endl;
         return false;
     }
-    
+
+    //something that can hang onto memory and cleanup when this function exits
+    volk_qa_aligned_mem_pool mem_pool;
+
     //now we have to get a function signature by parsing the name
     std::vector<volk_type_t> inputsig, outputsig;
     get_signatures_from_name(inputsig, outputsig, name);
@@ -309,12 +309,12 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
     //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
     std::vector<void *> inbuffs;
-    std::vector<void *> free_buffs; //this is just a list of void*'s that i'll have to free later.
-                                    //we need it because we dupe void*s in test_data below.
-    make_buffer_for_signature(inbuffs, inputsig, vlen);
+    BOOST_FOREACH(volk_type_t sig, inputsig) {
+        if(!sig.is_scalar) //we don't make buffers for scalars
+          inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1)));
+    }
     for(int i=0; i<inbuffs.size(); i++) {
-        load_random_data(inbuffs[i], inputsig[i], vlen);   
-        free_buffs.push_back(inbuffs[i]);
+        load_random_data(inbuffs[i], inputsig[i], vlen);
     }
     
     //ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
@@ -322,8 +322,7 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
     for(int i=0; i<arch_list.size(); i++) {
         std::vector<void *> arch_buffs;
         for(int j=0; j<outputsig.size(); j++) {
-            arch_buffs.push_back(make_aligned_buffer(vlen, outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
-            free_buffs.push_back(arch_buffs.back());
+            arch_buffs.push_back(mem_pool.get_new(vlen*outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
         }
         for(int j=0; j<inputsig.size(); j++) {
             arch_buffs.push_back(inbuffs[j]);
@@ -437,10 +436,6 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name,
         }
     }
 
-    BOOST_FOREACH(void *buf, free_buffs) {
-        free(buf);
-    }
-
     return fail_global;
 }
 
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index e2539060a..1b64bacaa 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -1,7 +1,7 @@
 #ifndef VOLK_QA_UTILS_H
 #define VOLK_QA_UTILS_H
 
-#include <stdlib.h>
+#include <cstdlib>
 #include <string>
 
 struct volk_type_t {
-- 
cgit 


From 6673be777cd5395ae867e67db8c95aa09066617a Mon Sep 17 00:00:00 2001
From: Johnathan Corgan
Date: Sat, 12 Mar 2011 15:47:40 -0800
Subject: Added/updated ignore files.

---
 volk/lib/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'volk/lib')

diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore
index 0f17543ab..6a5fde28f 100644
--- a/volk/lib/.gitignore
+++ b/volk/lib/.gitignore
@@ -20,3 +20,4 @@
 /volk_proccpu_sim.c
 /volk_runtime.c
 /test_all
+/testqa
-- 
cgit 


From 888beebf6015d9a88dbd1c3c842cf2490899a99b Mon Sep 17 00:00:00 2001
From: Josh Blum
Date: Mon, 14 Mar 2011 09:33:00 -0700
Subject: volk: simplify the get new method for the aligned pool

---
 volk/lib/qa_utils.cc | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'volk/lib')

diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 710d56fb8..b0f63d2b5 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -6,6 +6,7 @@
 //#include <boost/test/unit_test.hpp>
 #include <iostream>
 #include <vector>
+#include <list>
 #include <ctime>
 #include <cmath>
 #include <boost/lexical_cast.hpp>
@@ -14,7 +15,6 @@
 #include <volk/volk.h>
 #include <boost/typeof/typeof.hpp>
 #include <boost/type_traits.hpp>
-#include <boost/shared_array.hpp>
 
 float uniform() {
   return 2.0 * ((float) rand() / RAND_MAX - 0.5);	// uniformly (-1, 1)
@@ -270,13 +270,11 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
 class volk_qa_aligned_mem_pool{
 public:
     void *get_new(size_t size, size_t alignment = 16){
-        boost::shared_array<char> mem(new char[size + alignment-1]);
-        size_t ptr = size_t(mem.get() + alignment-1) & ~(alignment-1);
-        std::memset((void *)ptr, 0x00, size);
-        _mems.push_back(mem);
-        return (void *)ptr;
+        _mems.push_back(std::vector<char>(size + alignment-1, 0));
+        size_t ptr = size_t(&_mems.back().front());
+        return (void *)((ptr + alignment-1) & ~(alignment-1));
     }
-private: std::vector<boost::shared_array<char> > _mems;
+private: std::list<std::vector<char> > _mems;
 };
 
 bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) {
-- 
cgit