summaryrefslogtreecommitdiff
path: root/volk/lib
diff options
context:
space:
mode:
Diffstat (limited to 'volk/lib')
-rw-r--r--volk/lib/.gitignore4
-rw-r--r--volk/lib/gcc_x86_cpuid.h6
-rw-r--r--volk/lib/qa_16s_add_quad_aligned16.cc10
-rw-r--r--volk/lib/qa_16s_branch_4_state_8_aligned16.cc38
-rw-r--r--volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc18
-rw-r--r--volk/lib/qa_16s_quad_max_star_aligned16.cc6
-rw-r--r--volk/lib/qa_32f_fm_detect_aligned16.cc8
-rw-r--r--volk/lib/qa_32f_index_max_aligned16.cc36
-rw-r--r--volk/lib/qa_32fc_index_max_aligned16.cc34
-rw-r--r--volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc8
-rw-r--r--volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc28
-rw-r--r--volk/lib/qa_32u_popcnt_aligned16.cc8
-rw-r--r--volk/lib/qa_64u_popcnt_aligned16.cc8
-rw-r--r--volk/lib/qa_utils.cc54
-rw-r--r--volk/lib/volk_prefs.c2
-rw-r--r--volk/lib/volk_rank_archs.c4
16 files changed, 134 insertions, 138 deletions
diff --git a/volk/lib/.gitignore b/volk/lib/.gitignore
deleted file mode 100644
index 28ec6ddaa..000000000
--- a/volk/lib/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-/Makefile
-/Makefile.in
-/Makefile.am
-/testqa
diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h
index 2d0916fb3..98eeb33a3 100644
--- a/volk/lib/gcc_x86_cpuid.h
+++ b/volk/lib/gcc_x86_cpuid.h
@@ -5,16 +5,16 @@
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option) any
* later version.
- *
+ *
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* Under Section 7 of GPL version 3, you are granted additional
* permissions described in the GCC Runtime Library Exception, version
* 3.1, as published by the Free Software Foundation.
- *
+ *
* You should have received a copy of the GNU General Public License and
* a copy of the GCC Runtime Library Exception along with this program;
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc b/volk/lib/qa_16s_add_quad_aligned16.cc
index 5d5eb7e18..8da43b972 100644
--- a/volk/lib/qa_16s_add_quad_aligned16.cc
+++ b/volk/lib/qa_16s_add_quad_aligned16.cc
@@ -16,7 +16,7 @@ void qa_16s_add_quad_aligned16::t1() {
void qa_16s_add_quad_aligned16::t1() {
-
+
volk_environment_init();
clock_t start, end;
double total;
@@ -27,7 +27,7 @@ void qa_16s_add_quad_aligned16::t1() {
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short input4[vlen];
-
+
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
__VOLK_ATTR_ALIGNED(16) short output2[vlen];
@@ -48,13 +48,13 @@ void qa_16s_add_quad_aligned16::t1() {
short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-
+
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
input4[i] = plus4 - minus4;
-
+
}
printf("16s_add_quad_aligned\n");
@@ -76,7 +76,7 @@ void qa_16s_add_quad_aligned16::t1() {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
-
+
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
index 2e6e6a1a0..5a58569a1 100644
--- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
+++ b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
@@ -22,17 +22,17 @@ void qa_16s_branch_4_state_8_aligned16::t1() {
static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
static char* permuters[4] = {permute0, permute1, permute2, permute3};
-
+
unsigned int num_bytes = vlen << 1;
volk_environment_init();
clock_t start, end;
double total;
-
+
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short target3[vlen];
-
+
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = {
7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
@@ -45,29 +45,29 @@ void qa_16s_branch_4_state_8_aligned16::t1() {
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = {
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
-
-
+
+
for(int i = 0; i < vlen; ++i) {
src0[i] = i;
-
+
}
-
+
printf("16s_branch_4_state_8_aligned\n");
-
-
+
+
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
-
+
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time: %f\n", total);
-
-
+
+
start = clock();
for(int i = 0; i < num_iters; ++i) {
@@ -78,25 +78,25 @@ void qa_16s_branch_4_state_8_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("branch_4_state_8_time, ssse3: %f\n", total);
-
+
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
}
end = clock();
-
+
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time, generic: %f\n", total);
-
-
-
+
+
+
for(int i = 0; i < vlen; ++i) {
printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
}
-
+
for(int i = 0; i < vlen; ++i) {
-
+
CPPUNIT_ASSERT(target[i] == target2[i]);
CPPUNIT_ASSERT(target[i] == target3[i]);
}
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
index 3cd4e906d..dadd2c580 100644
--- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
+++ b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
@@ -16,13 +16,13 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() {
void qa_16s_permute_and_scalar_add_aligned16::t1() {
const int vlen = 64;
-
+
unsigned int num_bytes = vlen << 1;
volk_environment_init();
clock_t start, end;
double total;
-
+
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
@@ -43,7 +43,7 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() {
}
printf("16s_permute_and_scalar_add_aligned\n");
-
+
start = clock();
for(int i = 0; i < 100000; ++i) {
volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
@@ -53,24 +53,24 @@ void qa_16s_permute_and_scalar_add_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
-
+
start = clock();
for(int i = 0; i < 100000; ++i) {
volk_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
-
+
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
-
-
+
+
for(int i = 0; i < vlen; ++i) {
//printf("generic... %d, sse2... %d\n", target[i], target2[i]);
}
-
+
for(int i = 0; i < vlen; ++i) {
-
+
CPPUNIT_ASSERT(target[i] == target2[i]);
}
}
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc b/volk/lib/qa_16s_quad_max_star_aligned16.cc
index 192a69e35..2a5dec44a 100644
--- a/volk/lib/qa_16s_quad_max_star_aligned16.cc
+++ b/volk/lib/qa_16s_quad_max_star_aligned16.cc
@@ -16,7 +16,7 @@ void qa_16s_quad_max_star_aligned16::t1() {
void qa_16s_quad_max_star_aligned16::t1() {
const int vlen = 34;
-
+
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
@@ -50,9 +50,9 @@ void qa_16s_quad_max_star_aligned16::t1() {
for(int i = 0; i < vlen; ++i) {
printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
}
-
+
for(int i = 0; i < vlen; ++i) {
-
+
CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
}
}
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc b/volk/lib/qa_32f_fm_detect_aligned16.cc
index a2e7a85be..4e792ec6c 100644
--- a/volk/lib/qa_32f_fm_detect_aligned16.cc
+++ b/volk/lib/qa_32f_fm_detect_aligned16.cc
@@ -15,18 +15,18 @@ void qa_32f_fm_detect_aligned16::t1() {
#else
void qa_32f_fm_detect_aligned16::t1() {
-
+
volk_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) float input0[vlen];
-
+
__VOLK_ATTR_ALIGNED(16) float output0[vlen];
__VOLK_ATTR_ALIGNED(16) float output01[vlen];
- for(int i = 0; i < vlen; ++i) {
+ for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
}
printf("32f_fm_detect_aligned\n");
@@ -51,7 +51,7 @@ void qa_32f_fm_detect_aligned16::t1() {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
-
+
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
diff --git a/volk/lib/qa_32f_index_max_aligned16.cc b/volk/lib/qa_32f_index_max_aligned16.cc
index a1c3d4cd1..2df206726 100644
--- a/volk/lib/qa_32f_index_max_aligned16.cc
+++ b/volk/lib/qa_32f_index_max_aligned16.cc
@@ -34,12 +34,12 @@ void qa_32f_index_max_aligned16::t1(){
void qa_32f_index_max_aligned16::t1(){
-
+
const int vlen = VEC_LEN;
-
+
volk_runtime_init();
-
+
volk_environment_init();
int ret;
@@ -47,8 +47,8 @@ void qa_32f_index_max_aligned16::t1(){
unsigned int* target_sse;
unsigned int* target_generic;
float* src0 ;
-
-
+
+
unsigned int i_target_sse4_1;
target_sse4_1 = &i_target_sse4_1;
unsigned int i_target_sse;
@@ -57,20 +57,20 @@ void qa_32f_index_max_aligned16::t1(){
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
-
+
random_floats((float*)src0, vlen);
-
+
printf("32f_index_max_aligned16\n");
clock_t start, end;
double total;
-
-
+
+
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
}
- end = clock();
+ end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
@@ -78,25 +78,25 @@ void qa_32f_index_max_aligned16::t1(){
for(int k = 0; k < NUM_ITERS; ++k) {
volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
}
-
- end = clock();
+
+ end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse time: %f\n", total);
-
+
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, vlen);
}
-
- end = clock();
+
+ end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse4.1 time: %f\n", total);
-
-
+
+
printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
-
+
free(src0);
}
diff --git a/volk/lib/qa_32fc_index_max_aligned16.cc b/volk/lib/qa_32fc_index_max_aligned16.cc
index 4d83f1639..3859bcb52 100644
--- a/volk/lib/qa_32fc_index_max_aligned16.cc
+++ b/volk/lib/qa_32fc_index_max_aligned16.cc
@@ -33,36 +33,36 @@ void qa_32fc_index_max_aligned16::t1(){
void qa_32fc_index_max_aligned16::t1(){
-
+
const int vlen = VEC_LEN;
-
+
volk_environment_init();
int ret;
-
+
unsigned int* target;
unsigned int* target_generic;
std::complex<float>* src0 ;
-
-
+
+
unsigned int i_target;
target = &i_target;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen << 3);
-
+
random_floats((float*)src0, vlen * 2);
-
+
printf("32fc_index_max_aligned16\n");
clock_t start, end;
double total;
-
-
+
+
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
}
- end = clock();
+ end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
@@ -70,19 +70,19 @@ void qa_32fc_index_max_aligned16::t1(){
for(int k = 0; k < NUM_ITERS; ++k) {
volk_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
}
-
- end = clock();
+
+ end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3 time: %f\n", total);
-
-
-
+
+
+
printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
-
-
+
+
free(src0);
}
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
index 981bb19e6..daca31d9c 100644
--- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
+++ b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
@@ -15,14 +15,14 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() {
#else
void qa_32fc_power_spectral_density_32f_aligned16::t1() {
-
+
volk_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
-
+
__VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
__VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
@@ -30,7 +30,7 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() {
const float rbw = 1.7;
float* inputLoad = (float*)input0;
- for(int i = 0; i < 2*vlen; ++i) {
+ for(int i = 0; i < 2*vlen; ++i) {
inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
}
printf("32fc_power_spectral_density_32f_aligned\n");
@@ -54,7 +54,7 @@ void qa_32fc_power_spectral_density_32f_aligned16::t1() {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
-
+
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
index fefdf06ee..b825c20e4 100644
--- a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
+++ b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
@@ -7,7 +7,7 @@
#define assertcomplexEqual(expected, actual, delta) \
CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), fabs(std::real(expected)) * delta); \
- CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), fabs(std::imag(expected))* delta);
#define ERR_DELTA (1e-4)
@@ -35,7 +35,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
std::complex<float>* input;
std::complex<float>* taps;
-
+
std::complex<float>* result_generic;
std::complex<float>* result;
@@ -43,19 +43,19 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
ret = posix_memalign((void**)&taps, 16, vlen << 3);
ret = posix_memalign((void**)&result_generic, 16, 8);
ret = posix_memalign((void**)&result, 16, 8);
-
+
result_generic[0] = std::complex<float>(0,0);
result[0] = std::complex<float>(0,0);
random_floats((float*)input, vlen * 2);
random_floats((float*)taps, vlen * 2);
-
-
+
+
volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8, "generic");
-
+
volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse");
printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
@@ -67,7 +67,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
free(taps);
free(result_generic);
free(result);
-
+
}
@@ -87,13 +87,13 @@ random_floats (float *buf, unsigned n)
void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
const int vlen = 789743;
-
+
volk_environment_init();
int ret;
std::complex<float>* input;
std::complex<float>* taps;
-
+
std::complex<float>* result_generic;
std::complex<float>* result;
@@ -101,19 +101,19 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
ret = posix_memalign((void**)&taps, 16, vlen << 3);
ret = posix_memalign((void**)&result_generic, 16, 8);
ret = posix_memalign((void**)&result, 16, 8);
-
+
result_generic[0] = std::complex<float>(0,0);
result[0] = std::complex<float>(0,0);
random_floats((float*)input, vlen * 2);
random_floats((float*)taps, vlen * 2);
-
-
+
+
volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, vlen * 8, "generic");
-
+
volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, "sse_32");
printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
@@ -125,7 +125,7 @@ void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
free(taps);
free(result_generic);
free(result);
-
+
}
diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc b/volk/lib/qa_32u_popcnt_aligned16.cc
index c880260f2..5559d933d 100644
--- a/volk/lib/qa_32u_popcnt_aligned16.cc
+++ b/volk/lib/qa_32u_popcnt_aligned16.cc
@@ -16,8 +16,8 @@ void qa_32u_popcnt_aligned16::t1() {
#else
void qa_32u_popcnt_aligned16::t1() {
-
-
+
+
volk_runtime_init();
volk_environment_init();
@@ -26,7 +26,7 @@ void qa_32u_popcnt_aligned16::t1() {
const int ITERS = 10000000;
__VOLK_ATTR_ALIGNED(16) uint32_t input0;
-
+
__VOLK_ATTR_ALIGNED(16) uint32_t output0;
__VOLK_ATTR_ALIGNED(16) uint32_t output01;
@@ -55,7 +55,7 @@ void qa_32u_popcnt_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse4.2_time: %f\n", total);
-
+
CPPUNIT_ASSERT_EQUAL(output0, output01);
}
diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc b/volk/lib/qa_64u_popcnt_aligned16.cc
index 6be4e50ea..391601f22 100644
--- a/volk/lib/qa_64u_popcnt_aligned16.cc
+++ b/volk/lib/qa_64u_popcnt_aligned16.cc
@@ -16,8 +16,8 @@ void qa_64u_popcnt_aligned16::t1() {
#else
void qa_64u_popcnt_aligned16::t1() {
-
-
+
+
volk_runtime_init();
volk_environment_init();
@@ -26,7 +26,7 @@ void qa_64u_popcnt_aligned16::t1() {
const int ITERS = 10000000;
__VOLK_ATTR_ALIGNED(16) uint64_t input0;
-
+
__VOLK_ATTR_ALIGNED(16) uint64_t output0;
__VOLK_ATTR_ALIGNED(16) uint64_t output01;
@@ -55,7 +55,7 @@ void qa_64u_popcnt_aligned16::t1() {
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse4.2_time: %f\n", total);
-
+
CPPUNIT_ASSERT_EQUAL(output0, output01);
}
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index bb37801c9..c15979b3f 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -46,7 +46,7 @@ void load_random_data(void *data, volk_type_t type, unsigned int n) {
case 4:
if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
- break;
+ break;
case 2:
if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
@@ -69,7 +69,7 @@ static std::vector<std::string> get_arch_list(struct volk_func_desc desc) {
//if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't available on this pc
archlist.push_back(std::string(desc.indices[i]));
}
-
+
return archlist;
}
@@ -81,15 +81,15 @@ volk_type_t volk_type_from_string(std::string name) {
type.is_signed = false;
type.size = 0;
type.str = name;
-
+
if(name.size() < 2) throw std::string("name too short to be a datatype");
-
+
//is it a scalar?
- if(name[0] == 's') {
+ if(name[0] == 's') {
type.is_scalar = true;
name = name.substr(1, name.size()-1);
}
-
+
//get the data size
size_t last_size_pos = name.find_last_of("0123456789");
if(last_size_pos < 0) throw std::string("no size spec in type ").append(name);
@@ -98,7 +98,7 @@ volk_type_t volk_type_from_string(std::string name) {
assert(((size % 8) == 0) && (size <= 64) && (size != 0));
type.size = size/8; //in bytes
-
+
for(size_t i=last_size_pos+1; i < name.size(); i++) {
switch (name[i]) {
case 'f':
@@ -117,19 +117,19 @@ volk_type_t volk_type_from_string(std::string name) {
throw;
}
}
-
+
return type;
}
-static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
- std::vector<volk_type_t> &outputsig,
+static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
+ std::vector<volk_type_t> &outputsig,
std::string name) {
boost::char_separator<char> sep("_");
boost::tokenizer<boost::char_separator<char> > tok(name, sep);
std::vector<std::string> toked;
tok.assign(name);
toked.assign(tok.begin(), tok.end());
-
+
assert(toked[0] == "volk");
toked.erase(toked.begin());
@@ -143,7 +143,7 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
try {
type = volk_type_from_string(token);
if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
-
+
if(side == SIDE_INPUT) inputsig.push_back(type);
else outputsig.push_back(type);
} catch (...){
@@ -160,7 +160,7 @@ static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
side = SIDE_NAME;
fn_name.append("_");
fn_name.append(token);
- }
+ }
else if(side == SIDE_OUTPUT) {
if(token != toked.back()) throw; //the last token in the name is the alignment
}
@@ -223,7 +223,7 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
}
}
}
-
+
return fail;
}
@@ -239,7 +239,7 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
}
}
}
-
+
return fail;
}
@@ -264,10 +264,10 @@ bool run_volk_tests(struct volk_func_desc desc,
std::vector<std::string> *best_arch_vector = 0
) {
std::cout << "RUN_VOLK_TESTS: " << name << std::endl;
-
+
//first let's get a list of available architectures for the test
std::vector<std::string> arch_list = get_arch_list(desc);
-
+
if(arch_list.size() < 2) {
std::cout << "no architectures to test" << std::endl;
return false;
@@ -279,7 +279,7 @@ bool run_volk_tests(struct volk_func_desc desc,
//now we have to get a function signature by parsing the name
std::vector<volk_type_t> inputsig, outputsig;
get_signatures_from_name(inputsig, outputsig, name);
-
+
//pull the input scalars into their own vector
std::vector<volk_type_t> inputsc;
for(size_t i=0; i<inputsig.size(); i++) {
@@ -299,7 +299,7 @@ bool run_volk_tests(struct volk_func_desc desc,
for(size_t i=0; i<inbuffs.size(); i++) {
load_random_data(inbuffs[i], inputsig[i], vlen);
}
-
+
//ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
std::vector<std::vector<void *> > test_data;
for(size_t i=0; i<arch_list.size(); i++) {
@@ -312,7 +312,7 @@ bool run_volk_tests(struct volk_func_desc desc,
}
test_data.push_back(arch_buffs);
}
-
+
std::vector<volk_type_t> both_sigs;
both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
@@ -326,7 +326,7 @@ bool run_volk_tests(struct volk_func_desc desc,
switch(both_sigs.size()) {
case 1:
if(inputsc.size() == 0) {
- run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
+ run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
@@ -364,23 +364,23 @@ bool run_volk_tests(struct volk_func_desc desc,
throw "no function handler for this signature";
break;
}
-
+
end = clock();
double arch_time = (double)(end-start)/(double)CLOCKS_PER_SEC;
std::cout << arch_list[i] << " completed in " << arch_time << "s" << std::endl;
profile_times.push_back(arch_time);
}
-
+
//and now compare each output to the generic output
//first we have to know which output is the generic one, they aren't in order...
size_t generic_offset=0;
- for(size_t i=0; i<arch_list.size(); i++)
+ for(size_t i=0; i<arch_list.size(); i++)
if(arch_list[i] == "generic") generic_offset=i;
//now compare
//if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
-
+
bool fail = false;
bool fail_global = false;
std::vector<bool> arch_results;
@@ -438,7 +438,7 @@ bool run_volk_tests(struct volk_func_desc desc,
}
arch_results.push_back(!fail);
}
-
+
double best_time = std::numeric_limits<double>::max();
std::string best_arch = "generic";
for(size_t i=0; i < arch_list.size(); i++) {
@@ -447,7 +447,7 @@ bool run_volk_tests(struct volk_func_desc desc,
best_arch = arch_list[i];
}
}
-
+
std::cout << "Best arch: " << best_arch << std::endl;
if(best_arch_vector) {
best_arch_vector->push_back(name + std::string(" ") + best_arch);
diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c
index 7e705bed4..5e5c9dfff 100644
--- a/volk/lib/volk_prefs.c
+++ b/volk/lib/volk_prefs.c
@@ -26,7 +26,7 @@ int load_preferences(struct volk_arch_pref **prefs) {
char path[512], line[512], function[128], arch[32];
int n_arch_prefs = 0;
struct volk_arch_pref *t_pref;
-
+
//get the config path
get_config_path(path);
if (path == NULL) return n_arch_prefs; //no prefs found
diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c
index 4baa078bc..865d60955 100644
--- a/volk/lib/volk_rank_archs.c
+++ b/volk/lib/volk_rank_archs.c
@@ -26,14 +26,14 @@ unsigned int volk_rank_archs(const char *indices[], const int* arch_defs, unsign
n_arch_prefs = load_preferences(&volk_arch_prefs);
prefs_loaded = 1;
}
-
+
//now look for the function name in the prefs list
for(i=0; i < n_arch_prefs; i++) {
if(!strncmp(name, volk_arch_prefs[i].name, 128)) { //found it
return get_index(indices, n_archs, volk_arch_prefs[i].arch);
}
}
-
+
for(i=1; i < n_archs; ++i) {
if((arch_defs[i]&(!arch)) == 0) {
best_val = (arch_defs[i] > arch_defs[best_val + 1]) ? i-1 : best_val;