#include "qa_utils.h" #include #include #include #include //#include #include #include #include #include #include //#include #include #include #include #include float uniform() { return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) } template void random_floats (t *buf, unsigned n) { for (unsigned i = 0; i < n; i++) buf[i] = uniform (); } void load_random_data(void *data, volk_type_t type, unsigned int n) { if(type.is_complex) n *= 2; if(type.is_float) { if(type.size == 8) random_floats((double *)data, n); else random_floats((float *)data, n); } else { float int_max = float(uint64_t(2) << (type.size*8)); if(type.is_signed) int_max /= 2.0; for(int i=0; i((RAND_MAX/2))) * int_max; //man i really don't know how to do this in a more clever way, you have to cast down at some point switch(type.size) { case 8: if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; break; case 4: if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; break; case 2: if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; break; case 1: if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; break; default: throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here } } } } void *make_aligned_buffer(unsigned int len, unsigned int size) { void *buf; int ret; ret = posix_memalign((void**)&buf, 16, len * size); assert(ret == 0); memset(buf, 0x00, len*size); return buf; } void make_buffer_for_signature(std::vector &buffs, std::vector inputsig, unsigned int vlen) { BOOST_FOREACH(volk_type_t sig, inputsig) { if(!sig.is_scalar) //we don't make buffers for scalars buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1))); } } static std::vector get_arch_list(const int archs[]) { std::vector archlist; int num_archs = archs[0]; //there has got to be a way to query these arches for(int i = 0; i < num_archs; i++) { switch(archs[i+1]) { case (1<(name.substr(0, last_size_pos+1)); assert(((size % 8) == 0) && (size <= 64) && (size != 0)); type.size = size/8; //in bytes for(int i=last_size_pos+1; i < name.size(); i++) { switch (name[i]) { case 'f': type.is_float = true; break; case 'i': type.is_signed = true; break; case 'c': type.is_complex = true; break; case 'u': type.is_signed = false; break; default: throw; } } return type; } static void get_signatures_from_name(std::vector &inputsig, std::vector &outputsig, std::string name) { boost::char_separator sep("_"); boost::tokenizer > tok(name, sep); std::vector toked; tok.assign(name); toked.assign(tok.begin(), tok.end()); assert(toked[0] == "volk"); toked.erase(toked.begin()); //ok. we're assuming a string in the form //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; std::string fn_name; volk_type_t type; BOOST_FOREACH(std::string token, toked) { try { type = volk_type_from_string(token); if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... if(side == SIDE_INPUT) inputsig.push_back(type); else outputsig.push_back(type); } catch (...){ if(token[0] == 'x') { //it's a multiplier if(side == SIDE_INPUT) assert(inputsig.size() > 0); else assert(outputsig.size() > 0); int multiplier = boost::lexical_cast(token.substr(1, token.size()-1)); //will throw if invalid for(int i=1; i &buffs, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], vlen, arch.c_str()); } inline void run_cast_test2(volk_fn_2arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str()); } inline void run_cast_test3(volk_fn_3arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); } inline void run_cast_test4(volk_fn_4arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); } inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); } inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); } inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } template bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl; } } } return fail; } template bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; for(int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << static_cast(t(((t *)(in1))[i])) << " in2: " << static_cast(t(((t *)(in2))[i])) << std::endl; } } } return fail; } bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, float scalar, int vlen, int iter) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test std::vector arch_list = get_arch_list(archs); if(arch_list.size() < 2) { std::cout << "no architectures to test" << std::endl; return false; } //now we have to get a function signature by parsing the name std::vector inputsig, outputsig; get_signatures_from_name(inputsig, outputsig, name); //pull the input scalars into their own vector std::vector inputsc; for(int i=0; i inbuffs; std::vector free_buffs; //this is just a list of void*'s that i'll have to free later. //we need it because we dupe void*s in test_data below. make_buffer_for_signature(inbuffs, inputsig, vlen); for(int i=0; i > test_data; for(int i=0; i arch_buffs; for(int j=0; j both_sigs; both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); //now run the test clock_t start, end; for(int i = 0; i < arch_list.size(); i++) { start = clock(); switch(both_sigs.size()) { case 1: if(inputsc.size() == 0) { run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else throw "unsupported 3 arg function >1 scalars"; break; case 4: run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); break; default: throw "no function handler for this signature"; break; } end = clock(); std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; } //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset=0; for(int i=0; i