summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/gen/archs.xml2
-rw-r--r--volk/gen/make_cpuid_c.py30
-rw-r--r--volk/lib/testqa.cc160
3 files changed, 109 insertions, 83 deletions
diff --git a/volk/gen/archs.xml b/volk/gen/archs.xml
index 661b3f890..f6822871f 100644
--- a/volk/gen/archs.xml
+++ b/volk/gen/archs.xml
@@ -11,7 +11,7 @@
</arch>
<arch name="neon" type="arm">
- <flag>mfpu=neon -funsafe-math-optimizations</flag>
+ <flag>mfpu=neon -mfloat-abi=softfp -funsafe-math-optimizations</flag>
<alignment>16</alignment>
</arch>
diff --git a/volk/gen/make_cpuid_c.py b/volk/gen/make_cpuid_c.py
index 3b2f12d5c..eb88dcd7f 100644
--- a/volk/gen/make_cpuid_c.py
+++ b/volk/gen/make_cpuid_c.py
@@ -157,9 +157,35 @@ int i_can_has_%s () {
elif str(domarch.attributes["type"].value) == "arm":
arch = str(domarch.attributes["name"].value);
tempstring = tempstring + """\
+#if defined(__arm__) && defined(__linux__)
+#include <asm/hwcap.h>
+#include <linux/auxvec.h>
+#include <stdio.h>
+#define LOOK_FOR_NEON
+#endif
+
int i_can_has_%s () {
-#ifdef __NEON__
- return 1;
+//it's linux-specific, but if you're compiling libvolk for NEON
+//on Windows you have other problems
+
+#ifdef LOOK_FOR_NEON
+ FILE *auxvec_f;
+ unsigned long auxvec[2];
+ unsigned int found_neon = 0;
+ auxvec_f = fopen("/proc/self/auxv", "rb");
+ if(!auxvec_f) return 0;
+
+ //so auxv is basically 32b of ID and 32b of value
+ //so it goes like this
+ while(!found_neon && auxvec_f) {
+ fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
+ if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
+ found_neon = 1;
+ }
+
+ fclose(auxvec_f);
+ return found_neon;
+
#else
return 0;
#endif
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 98b09c50f..62e62c2f4 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -5,89 +5,89 @@
//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4_a, 1e-4, 2046, 10000);
//VOLK_RUN_TESTS(volk_16i_branch_4_state_8_a, 1e-4, 2046, 10000);
VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f_a, 1e-5, 32768.0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 204600, 1000);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a, 1, 0, 204600, 100);
-VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 204600, 1000);
-VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_convert_8i_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_max_star_16i_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a, 0, 0, 204600, 10000);
-//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 10000);
-//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 204600, 50);
-VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 204600, 100);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2_a, 1e-4, 32768.0, 20460, 1000);
+VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16ic_magnitude_16i_a, 1, 0, 20460, 100);
+VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f_a, 1e-5, 32768.0, 20460, 1000);
+VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_a, 1e-4, 32768.0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_s32f_convert_32f_u, 1e-4, 32768.0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_convert_8i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_max_star_16i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i_a, 0, 0, 20460, 10000);
+//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add_a, 1e-4, 0, 2046, 1000);
+//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i_a, 1e-4, 0, 2046, 1000);
+VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 50);
+VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f_a, 1e-4, 10.0, 20460, 100);
//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_a, 1e-4, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 204600, 100);
-VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_convert_64f_a, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a, 1, 128, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 204600, 10000);
+VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc_u, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 100);
+VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_u, 1, 2<<31, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_convert_64f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_convert_64f_u, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_a, 1, 128, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_convert_8i_u, 1, 128, 20460, 10000);
//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f_a, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 20460, 100);
-VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a, 1e-4, 0, 204600, 2000);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 204600, 5000);
+VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f_a, 1e-4, 0, 2046, 100);
+VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a, 1e-4, 10, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_x2_divide_32f_a, 1e-4, 0, 20460, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_a, 1e-4, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f_u, 1e-4, 0, 20460, 5000);
//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f_a, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_32f_index_max_16u_a, 0, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 204600, 3000);
-VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 204600, 2000);
-VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 204600, 2000);
-VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 204600, 10000);
-VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 204600, 100);
-VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 204600, 100);
-VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 204600, 3000);
-VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 204600, 3000);
-VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a, 1e-4, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 204600, 5000);
-VOLK_RUN_TESTS(volk_32i_x2_and_32i_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a, 1e-4, 100, 204600, 10000);
-VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 204600, 10000);
-VOLK_RUN_TESTS(volk_32i_x2_or_32i_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_32u_byteswap_a, 0, 0, 204600, 2000);
+VOLK_RUN_TESTS(volk_32f_index_max_16u_a, 0, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 20460, 3000);
+VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 2000);
+VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 10000);
+VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 100);
+VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 100);
+VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f_a, 1e-4, 100, 20460, 3000);
+VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2_a, 1e-4, 0, 20460, 3000);
+VOLK_RUN_TESTS(volk_32f_x2_subtract_32f_a, 1e-4, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f_a, 1e-4, 0, 20460, 5000);
+VOLK_RUN_TESTS(volk_32i_x2_and_32i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_a, 1e-4, 100, 20460, 10000);
+VOLK_RUN_TESTS(volk_32i_s32f_convert_32f_u, 1e-4, 100, 20460, 10000);
+VOLK_RUN_TESTS(volk_32i_x2_or_32i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_32u_byteswap_a, 0, 0, 20460, 2000);
//VOLK_RUN_TESTS(volk_32u_popcnt_a, 0, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_64f_convert_32f_a, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_64f_x2_max_64f_a, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_64f_x2_min_64f_a, 1e-4, 0, 204600, 1000);
-VOLK_RUN_TESTS(volk_64u_byteswap_a, 0, 0, 204600, 1000);
+VOLK_RUN_TESTS(volk_64f_convert_32f_a, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_64f_convert_32f_u, 1e-4, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_64f_x2_max_64f_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_64f_x2_min_64f_a, 1e-4, 0, 20460, 1000);
+VOLK_RUN_TESTS(volk_64u_byteswap_a, 0, 0, 20460, 1000);
//VOLK_RUN_TESTS(volk_64u_popcnt_a, 0, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a, 0, 0, 204600, 3000);
-VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 204600, 3000);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a, 0, 256, 204600, 3000);
-VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 204600, 3000);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a, 0, 0, 204600, 10000);
-VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 204600, 400);
-VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 204600, 400);
-VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 204600, 20000);
-VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 204600, 2000);
-VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 204600, 2000);
-VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2_a, 0, 0, 20460, 3000);
+VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2_a, 1e-4, 100, 20460, 3000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i_a, 0, 256, 20460, 3000);
+VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f_a, 1e-4, 100, 20460, 3000);
+VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i_a, 0, 0, 20460, 10000);
+VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic_a, 0, 0, 20460, 400);
+VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc_a, 1e-4, 100, 20460, 400);
+VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 20000);
+VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 2000);
+VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 2000);
+VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 2000);