diff options
Diffstat (limited to 'volk')
-rw-r--r-- | volk/lib/qa_utils.cc | 357 | ||||
-rw-r--r-- | volk/lib/qa_utils.h | 15 |
2 files changed, 231 insertions, 141 deletions
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc index a8c00c143..e73b70985 100644 --- a/volk/lib/qa_utils.cc +++ b/volk/lib/qa_utils.cc @@ -7,7 +7,8 @@ #include <iostream> #include <vector> #include <time.h> -//#include <math.h> +#include <math.h> +#include <boost/lexical_cast.hpp> //#include <volk/volk_runtime.h> #include <volk/volk_registry.h> #include <volk/volk.h> @@ -24,44 +25,53 @@ void random_floats (float *buf, unsigned n) buf[i] = uniform (); } -void load_random_data(void *data, std::string sig, unsigned int n) { - if(sig == "32fc") { - random_floats((float *)data, n*2); - } else if(sig == "32f") { +void load_random_data(void *data, volk_type_t type, unsigned int n) { + if(type.is_complex) n *= 2; + if(type.is_float) { + assert(type.size == 4); //TODO: double support random_floats((float *)data, n); - } else if(sig == "32u") { - for(int i=0; i<n; i++) ((uint32_t *)data)[i] = (uint32_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } else if(sig == "32s") { - for(int i=0; i<n; i++) ((int32_t *)data)[i] = ((int32_t) (rand() - (RAND_MAX/2))); - } else if(sig == "16u") { - for(int i=0; i<n; i++) ((uint16_t *)data)[i] = (uint16_t) ((rand() - (RAND_MAX/2)) / (RAND_MAX/2)); - } else if(sig == "16s") { - for(int i=0; i<n; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0)); - } else if(sig == "16sc") { - for(int i=0; i<n*2; i++) ((int16_t *)data)[i] = ((int16_t)((((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * 32768.0)); - } else if(sig == "8u") { - for(int i=0; i<n; i++) ((uint8_t *)data)[i] = ((uint8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 256.0)); - } else if(sig == "8s") { - for(int i=0; i<n; i++) ((int8_t *)data)[i] = ((int8_t)(((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)) * 128.0)); - } else std::cout << "load_random_data(): Invalid sig: " << sig << std::endl; + } else { + float int_max = pow(2, type.size*8); + if(type.is_signed) int_max /= 2.0; + for(int i=0; i<n; i++) { + float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max; + //man i really don't know how to do this in a more clever way, you have to cast down at some point + switch(type.size) { + case 8: + if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; + else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; + break; + case 4: + if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; + else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; + break; + case 2: + if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; + else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; + break; + case 1: + if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; + else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; + break; + default: + throw; //no shenanigans here + } + } + } } -template <class t> -t *make_aligned_buffer(unsigned int len) { - t *buf; +void *make_aligned_buffer(unsigned int len, unsigned int size) { + void *buf; int ret; - ret = posix_memalign((void**)&buf, 16, len * sizeof(t)); + ret = posix_memalign((void**)&buf, 16, len * size); assert(ret == 0); return buf; } -void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<std::string> inputsig, unsigned int vlen) { - BOOST_FOREACH(std::string sig, inputsig) { - if (sig=="32fc" || sig=="64f" || sig=="64u") buffs.push_back((void *) make_aligned_buffer<uint64_t>(vlen)); - else if(sig=="32f" || sig=="32u" || sig=="32s" || sig=="16sc") buffs.push_back((void *) make_aligned_buffer<uint32_t>(vlen)); - else if(sig=="16s" || sig=="16u" || sig=="8sc") buffs.push_back((void *) make_aligned_buffer<uint16_t>(vlen)); - else if(sig=="8s" || sig=="8u") buffs.push_back((void *) make_aligned_buffer<uint8_t>(vlen)); - else std::cout << "Invalid type: " << sig << std::endl; +void make_buffer_for_signature(std::vector<void *> &buffs, std::vector<volk_type_t> inputsig, unsigned int vlen) { + BOOST_FOREACH(volk_type_t sig, inputsig) { + if(!sig.is_scalar) //we don't make buffers for scalars + buffs.push_back(make_aligned_buffer(vlen, sig.size*(sig.is_complex ? 2 : 1))); } } @@ -109,22 +119,56 @@ static std::vector<std::string> get_arch_list(const int archs[]) { return archlist; } -static bool is_valid_type(std::string type) { - std::vector<std::string> valid_types = boost::assign::list_of("64f")("64u")("32fc")("32f") - ("32s")("32u")("16sc")("16s") - ("16u")("8s")("8sc")("8u") - ("s32f")("s16u")("s16s")("s8u") - ("s8s"); +volk_type_t volk_type_from_string(std::string name) { + volk_type_t type; + type.is_float = false; + type.is_scalar = false; + type.is_complex = false; + type.is_signed = false; + type.size = 0; + type.str = name; + + assert(name.size() > 1); - BOOST_FOREACH(std::string this_type, valid_types) { - if(type == this_type) return true; + //is it a scalar? + if(name[0] == 's') { + type.is_scalar = true; + name = name.substr(1, name.size()-1); + } + + //get the data size + int last_size_pos = name.find_last_of("0123456789"); + if(last_size_pos < 0) throw 0; + //will throw if malformed + int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1)); + + assert(((size % 8) == 0) && (size <= 64) && (size != 0)); + type.size = size/8; //in bytes + + for(int i=last_size_pos+1; i < name.size(); i++) { + switch (name[i]) { + case 'f': + type.is_float = true; + break; + case 'i': + type.is_signed = true; + break; + case 'c': + type.is_complex = true; + break; + case 'u': + type.is_signed = false; + break; + default: + throw; + } } - return false; -} + return type; +} -static void get_function_signature(std::vector<std::string> &inputsig, - std::vector<std::string> &outputsig, +static void get_signatures_from_name(std::vector<volk_type_t> &inputsig, + std::vector<volk_type_t> &outputsig, std::string name) { boost::char_separator<char> sep("_"); boost::tokenizer<boost::char_separator<char> > tok(name, sep); @@ -133,25 +177,38 @@ static void get_function_signature(std::vector<std::string> &inputsig, toked.assign(tok.begin(), tok.end()); assert(toked[0] == "volk"); - - inputsig.push_back(toked[1]); //mandatory - int pos = 2; - bool valid_type = true; - while(valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) inputsig.push_back(toked[pos]); - else valid_type = false; - pos++; - } - while(!valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) valid_type = true; - else pos++; - } - while(valid_type && pos < toked.size()) { - if(is_valid_type(toked[pos])) outputsig.push_back(toked[pos]); - else valid_type = false; - pos++; + toked.erase(toked.begin()); + + //ok. we're assuming a string in the form + //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) + + enum { SIDE_INPUT, SIDE_OUTPUT } side = SIDE_INPUT; + std::string fn_name; + volk_type_t type; + BOOST_FOREACH(std::string token, toked) { + try { + type = volk_type_from_string(token); + if(side == SIDE_INPUT) inputsig.push_back(type); + else outputsig.push_back(type); + } catch (...){ + if(token[0] == 'x') { //it's a multiplier + if(side == SIDE_INPUT) assert(inputsig.size() > 0); + else assert(outputsig.size() > 0); + int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid + for(int i=1; i<multiplier; i++) { + if(side == SIDE_INPUT) inputsig.push_back(inputsig.back()); + else outputsig.push_back(outputsig.back()); + } + } + else if(side == SIDE_INPUT) { //it's the function name, at least it better be + side = SIDE_OUTPUT; + fn_name = token; + } else { + if(token != toked.back()) throw; //the last token in the name is the alignment + } + } } - + //we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input! assert(inputsig.size() != 0); } @@ -171,61 +228,98 @@ inline void run_cast_test4(volk_fn_4arg func, void *outbuff, std::vector<void *> while(iter--) func(outbuff, inbuffs[0], inbuffs[1], inbuffs[2], vlen, arch.c_str()); } +inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, void *buff, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buff, scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, void *outbuff, std::vector<void *> &inbuffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(outbuff, inbuffs[0], scalar, vlen, arch.c_str()); +} + +template <class t> +bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { + for(int i=0; i<vlen; i++) { + if(fabs(((t *)(in1))[i] - ((t *)(in2))[i]) > tol) return 1; + } + return 0; +} + +template <class t> +bool icompare(t *in1, t *in2, unsigned int vlen) { + for(int i=0; i<vlen; i++) { + if(((t *)(in1))[i] != ((t *)(in2))[i]) return 1; + } + return 0; +} + bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, float tol, int vlen, int iter) { std::cout << "RUN_VOLK_TESTS: " << name << std::endl; //first let's get a list of available architectures for the test std::vector<std::string> arch_list = get_arch_list(archs); - BOOST_FOREACH(std::string arch, arch_list) { - std::cout << "Found an arch: " << arch << std::endl; - } - //now we have to get a function signature by parsing the name - std::vector<std::string> inputsig, outputsig; - get_function_signature(inputsig, outputsig, name); - - for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i] << std::endl; - for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i] << std::endl; - - //now that we have that, we'll set up input buffers based on the function signature - std::vector<void *> inbuffs; - make_buffer_for_signature(inbuffs, inputsig, vlen); + std::vector<volk_type_t> inputsig, outputsig; + get_signatures_from_name(inputsig, outputsig, name); - //allocate output buffers -- one for each output for each arch - std::vector<void *> outbuffs; - BOOST_FOREACH(std::string arch, arch_list) { - make_buffer_for_signature(outbuffs, outputsig, vlen); - } - - //and set the input buffers to something random + std::vector<volk_type_t> inputsc, outputsc; for(int i=0; i<inputsig.size(); i++) { - load_random_data(inbuffs[i], inputsig[i], vlen); + if(inputsig[i].is_scalar) { + inputsc.push_back(inputsig[i]); + inputsig.erase(inputsig.begin() + i); + } } + for(int i=0; i<outputsig.size(); i++) { + if(outputsig[i].is_scalar) { + outputsc.push_back(outputsig[i]); + outputsig.erase(outputsig.begin() + i); + } + } + assert(outputsc.size() == 0); //we don't do output scalars yet + + //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl; + //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl; + std::vector<void *> inbuffs, outbuffs; - //so let's see here. if the operation has no output sig, it operates in place, - //and we want the output buffers to be the input buffers; we want to copy the input buffer to allllll the output buffers. - if(outputsig.size() == 0) { - //make a set of output buffers according to the input signature - BOOST_FOREACH(std::string arch, arch_list) { + if(outputsig.size() == 0) { //we're operating in place... + //assert(inputsig.size() == 1); //we only support 0 output 1 input right now... + make_buffer_for_signature(inbuffs, inputsig, vlen); //let's make an input buffer + load_random_data(inbuffs[0], inputsig[0], vlen); //and load it with random data + BOOST_FOREACH(std::string arch, arch_list) { //then copy the same random data to each output buffer make_buffer_for_signature(outbuffs, inputsig, vlen); + memcpy(outbuffs.back(), inbuffs[0], vlen*inputsig[0].size*(inputsig[0].is_complex?2:1)); + } + } else { + make_buffer_for_signature(inbuffs, inputsig, vlen); + BOOST_FOREACH(std::string arch, arch_list) { + make_buffer_for_signature(outbuffs, outputsig, vlen); + } + + //and set the input buffers to something random + for(int i=0; i<inbuffs.size(); i++) { + load_random_data(inbuffs[i], inputsig[i], vlen); } - //copy input buffer[0] to all the output buffers so it has something to operate on - //output buffer element size is the same as input buffer[0] - if( } - //now run the test clock_t start, end; for(int i = 0; i < arch_list.size(); i++) { start = clock(); - switch(outputsig.size()+inputsig.size()) { + + switch(inputsig.size() + outputsig.size()) { case 1: - run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); + if(inputsc.size() == 0) { + run_cast_test1((volk_fn_1arg)(manual_func), outbuffs[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), outbuffs[i], 1000.0, vlen, iter, arch_list[i]); + } else throw "unsupported 1 arg function >1 scalars"; break; case 2: - run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + if(inputsc.size() == 0) { + run_cast_test2((volk_fn_2arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), outbuffs[i], inbuffs, 1000.0, vlen, iter, arch_list[i]); + } else throw "unsupported 2 arg function >1 scalars"; break; case 3: run_cast_test3((volk_fn_3arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); @@ -234,69 +328,52 @@ bool run_volk_tests(const int archs[], void (*manual_func)(), std::string name, run_cast_test4((volk_fn_4arg)(manual_func), outbuffs[i], inbuffs, vlen, iter, arch_list[i]); break; default: + throw "no function handler for this signature"; break; } + end = clock(); std::cout << arch_list[i] << " completed in " << (double)(end-start)/(double)CLOCKS_PER_SEC << "s" << std::endl; } - //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... int generic_offset; for(int i=0; i<arch_list.size(); i++) if(arch_list[i] == "generic") generic_offset=i; - + + //now compare + if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know + + bool fail = false; for(int i=0; i<arch_list.size(); i++) { if(i != generic_offset) { - if(outputsig[0] == "32fc") { - for(int j=0; j<vlen*2; j++) { - if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) { - std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } - } else if(outputsig[0] == "32f") { - for(int j=0; j<vlen; j++) { - if(fabs(((float *)(outbuffs[generic_offset]))[j] - ((float *)(outbuffs[i]))[j]) > tol) { - std::cout << "Generic: " << ((float *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((float *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } - } else if(outputsig[0] == "32u" || outputsig[0] == "32s" || outputsig[0] == "16sc") { - for(int j=0; j<vlen; j++) { - if(((uint32_t *)(outbuffs[generic_offset]))[j] != ((uint32_t *)(outbuffs[i]))[j]) { - std::cout << "Generic: " << ((uint32_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint32_t *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } - } else if(outputsig[0] == "16u" || outputsig[0] == "16s" || outputsig[0] == "8sc") { - for(int j=0; j<vlen; j++) { - if(((uint16_t *)(outbuffs[generic_offset]))[j] != ((uint16_t *)(outbuffs[i]))[j]) { - std::cout << "Generic: " << ((uint16_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint16_t *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } - } else if(outputsig[0] == "8s" || outputsig[0] == "8u") { - for(int j=0; j<vlen; j++) { - if(((uint8_t *)(outbuffs[generic_offset]))[j] != ((uint8_t *)(outbuffs[i]))[j]) { - std::cout << "Generic: " << ((uint8_t *)(outbuffs[generic_offset]))[j] << " " << arch_list[i] << ": " << ((uint8_t *)(outbuffs[i]))[j] << std::endl; - return 1; - } - } + if(outputsig[0].str == "32fc") { + fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen*2, tol); + } else if(outputsig[0].str == "32f") { + fail = fcompare((float *) outbuffs[generic_offset], (float *) outbuffs[i], vlen, tol); + } else if(outputsig[0].str == "32u" || outputsig[0].str == "32s" || outputsig[0].str == "16sc") { + fail = icompare((uint32_t *) outbuffs[generic_offset], (uint32_t *) outbuffs[i], vlen); + } else if(outputsig[0].size == 2) { + fail = icompare((uint16_t *) outbuffs[generic_offset], (uint16_t *) outbuffs[i], vlen); + } else if(outputsig[0].size == 1) { + fail = icompare((uint8_t *) outbuffs[generic_offset], (uint8_t *) outbuffs[i], vlen); } else { - std::cout << "Error: invalid type " << outputsig[0] << std::endl; - return 1; + std::cout << "Error: invalid type " << outputsig[0].str << std::endl; + fail = true; + } + if(fail) { + std::cout << name << ": fail on arch " << arch_list[i] << std::endl; } } } - BOOST_FOREACH(void *buf, inbuffs) { - free(buf); - } - BOOST_FOREACH(void *buf, outbuffs) { - free(buf); - } - return 0; +// BOOST_FOREACH(void *buf, inbuffs) { +// free(buf); +// } +// BOOST_FOREACH(void *buf, outbuffs) { +// free(buf); +// } + return fail; } diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h index 00883bf8e..79c5d7778 100644 --- a/volk/lib/qa_utils.h +++ b/volk/lib/qa_utils.h @@ -4,6 +4,17 @@ #include <stdlib.h> #include <string> +struct volk_type_t { + bool is_float; + bool is_scalar; + bool is_signed; + bool is_complex; + int size; + std::string str; +}; + +volk_type_t volk_type_from_string(std::string); + float uniform(void); void random_floats(float *buf, unsigned n); @@ -11,9 +22,11 @@ bool run_volk_tests(const int[], void(*)(), std::string, float, int, int); #define VOLK_RUN_TESTS(func, tol, len, iter) BOOST_CHECK_EQUAL(run_volk_tests(func##_arch_defs, (void (*)())func##_manual, std::string(#func), tol, len, iter), 0) -typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); +typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*); typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const char*); typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); +typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input +typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*); #endif //VOLK_QA_UTILS_H |