diff options
Diffstat (limited to 'volk')
-rw-r--r-- | volk/include/volk/volk_16sc_magnitude_16s_aligned16.h | 15 | ||||
-rw-r--r-- | volk/include/volk/volk_16sc_magnitude_32f_aligned16.h | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_magnitude_16s_aligned16.h | 14 | ||||
-rw-r--r-- | volk/include/volk/volk_32fc_magnitude_32f_aligned16.h | 13 | ||||
-rw-r--r-- | volk/include/volk/volk_32s_or_aligned16.h | 14 | ||||
-rw-r--r-- | volk/lib/qa_16sc_magnitude_16s_aligned16.cc | 9 | ||||
-rw-r--r-- | volk/lib/qa_16sc_magnitude_32f_aligned16.cc | 20 | ||||
-rw-r--r-- | volk/lib/qa_32f_divide_aligned16.cc | 9 | ||||
-rw-r--r-- | volk/lib/qa_32fc_magnitude_16s_aligned16.cc | 9 | ||||
-rw-r--r-- | volk/lib/qa_32fc_magnitude_32f_aligned16.cc | 9 | ||||
-rw-r--r-- | volk/lib/qa_32s_or_aligned16.cc | 9 | ||||
-rw-r--r-- | volk/orc/Makefile.am | 6 | ||||
-rw-r--r-- | volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc | 24 | ||||
-rw-r--r-- | volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc | 25 | ||||
-rw-r--r-- | volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc | 25 | ||||
-rw-r--r-- | volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc | 21 | ||||
-rw-r--r-- | volk/orc/volk_32s_or_aligned16_orc_impl.orc | 5 |
17 files changed, 234 insertions, 7 deletions
diff --git a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h b/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h index 1482ab82e..9f3222aa6 100644 --- a/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h +++ b/volk/include/volk/volk_16sc_magnitude_16s_aligned16.h @@ -164,7 +164,7 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV const int16_t* complexVectorPtr = (const int16_t*)complexVector; int16_t* magnitudeVectorPtr = magnitudeVector; unsigned int number = 0; - const float scalar = 32786.0; + const float scalar = 32768.0; for(number = 0; number < num_points; number++){ float real = ((float)(*complexVectorPtr++)) / scalar; float imag = ((float)(*complexVectorPtr++)) / scalar; @@ -173,7 +173,18 @@ static inline void volk_16sc_magnitude_16s_aligned16_generic(int16_t* magnitudeV } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_16sc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points); +static inline void volk_16sc_magnitude_16s_aligned16_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points){ + volk_16sc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_16sc_MAGNITUDE_16s_ALIGNED16_H */ diff --git a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h index 9c2a48835..e063ae432 100644 --- a/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_16sc_magnitude_32f_aligned16.h @@ -161,7 +161,19 @@ static inline void volk_16sc_magnitude_32f_aligned16_generic(float* magnitudeVec } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param scalar The data value to be divided against each input data value of the input complex vector + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_16sc_magnitude_32f_aligned16_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_16sc_magnitude_32f_aligned16_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){ + volk_16sc_magnitude_32f_aligned16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_16sc_MAGNITUDE_32f_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h b/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h index 4e64d8c22..4e590e120 100644 --- a/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h +++ b/volk/include/volk/volk_32fc_magnitude_16s_aligned16.h @@ -140,7 +140,19 @@ static inline void volk_32fc_magnitude_16s_aligned16_generic(int16_t* magnitudeV } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Calculates the magnitude of the complexVector, scales the resulting value and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param scalar The scale value multiplied to the magnitude of each complex vector + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector +*/ +extern void volk_32fc_magnitude_16s_aligned16_orc_impl(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points); +static inline void volk_32fc_magnitude_16s_aligned16_orc(int16_t* magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){ + volk_32fc_magnitude_16s_aligned16_orc_impl(magnitudeVector, complexVector, scalar, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32fc_MAGNITUDE_16s_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h b/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h index 7a8fd1ef9..3ea62da6a 100644 --- a/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h +++ b/volk/include/volk/volk_32fc_magnitude_32f_aligned16.h @@ -115,7 +115,18 @@ static inline void volk_32fc_magnitude_32f_aligned16_generic(float* magnitudeVec } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC + /*! + \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +extern void volk_32fc_magnitude_32f_aligned16_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points); +static inline void volk_32fc_magnitude_32f_aligned16_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + volk_32fc_magnitude_32f_aligned16_orc_impl(magnitudeVector, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32fc_MAGNITUDE_32f_ALIGNED16_H */ diff --git a/volk/include/volk/volk_32s_or_aligned16.h b/volk/include/volk/volk_32s_or_aligned16.h index f4c427c4d..64748d535 100644 --- a/volk/include/volk/volk_32s_or_aligned16.h +++ b/volk/include/volk/volk_32s_or_aligned16.h @@ -63,7 +63,19 @@ static inline void volk_32s_or_aligned16_generic(int32_t* cVector, const int32_t } #endif /* LV_HAVE_GENERIC */ - +#if LV_HAVE_ORC +/*! + \brief Ors the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be ored + \param bVector One of the vectors to be ored + \param num_points The number of values in aVector and bVector to be ored together and stored into cVector +*/ +extern void volk_32s_or_aligned16_orc_impl(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points); +static inline void volk_32s_or_aligned16_orc(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points){ + volk_32s_or_aligned16_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ #endif /* INCLUDED_VOLK_32s_OR_ALIGNED16_H */ diff --git a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc index b14610757..c8f13ff84 100644 --- a/volk/lib/qa_16sc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_16s_aligned16.cc @@ -23,6 +23,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t output_sse[vlen] __attribute__ ((aligned (16))); int16_t output_sse3[vlen] __attribute__ ((aligned (16))); @@ -41,6 +42,13 @@ void qa_16sc_magnitude_16s_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_16s_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_16s_aligned16_manual(output_sse, input0, vlen, "sse"); } end = clock(); @@ -64,6 +72,7 @@ void qa_16sc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc index 2c9e48f6e..e7178863c 100644 --- a/volk/lib/qa_16sc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_16sc_magnitude_32f_aligned16.cc @@ -15,6 +15,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_known[vlen] __attribute__ ((aligned (16))); int16_t* inputLoad = (int16_t*)input0; @@ -37,6 +38,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { end = clock(); total = (double)(end-start)/(double)CLOCKS_PER_SEC; printf("generic_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, scale, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); /* for(int i = 0; i < 100; ++i) { @@ -48,6 +57,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_known[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_orc[i], output_known[i], fabs(output_generic[i])*1e-4); } } @@ -63,6 +73,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { std::complex<int16_t> input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_sse[vlen] __attribute__ ((aligned (16))); float output_sse3[vlen] __attribute__ ((aligned (16))); @@ -81,6 +92,14 @@ void qa_16sc_magnitude_32f_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_16sc_magnitude_32f_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_16sc_magnitude_32f_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); } end = clock(); @@ -104,6 +123,7 @@ void qa_16sc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32f_divide_aligned16.cc b/volk/lib/qa_32f_divide_aligned16.cc index f104e0443..b2c2ecf9a 100644 --- a/volk/lib/qa_32f_divide_aligned16.cc +++ b/volk/lib/qa_32f_divide_aligned16.cc @@ -88,6 +88,7 @@ void qa_32f_divide_aligned16::t1() { float output0[vlen] __attribute__ ((aligned (16))); float output01[vlen] __attribute__ ((aligned (16))); + float output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); @@ -104,6 +105,13 @@ void qa_32f_divide_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32f_divide_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32f_divide_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -117,6 +125,7 @@ void qa_32f_divide_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc index a4be1616b..c3e65866b 100644 --- a/volk/lib/qa_32fc_magnitude_16s_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_16s_aligned16.cc @@ -23,6 +23,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() { std::complex<float> input0[vlen] __attribute__ ((aligned (16))); int16_t output_generic[vlen] __attribute__ ((aligned (16))); + int16_t output_orc[vlen] __attribute__ ((aligned (16))); int16_t output_sse[vlen] __attribute__ ((aligned (16))); int16_t output_sse3[vlen] __attribute__ ((aligned (16))); @@ -41,6 +42,13 @@ void qa_32fc_magnitude_16s_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_16s_aligned16_manual(output_orc, input0, 32768.0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32fc_magnitude_16s_aligned16_manual(output_sse, input0, 32768.0, vlen, "sse"); } end = clock(); @@ -64,6 +72,7 @@ void qa_32fc_magnitude_16s_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], 1.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 1.1); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], 1.1); } } diff --git a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc index d69ada408..6a1d46c7a 100644 --- a/volk/lib/qa_32fc_magnitude_32f_aligned16.cc +++ b/volk/lib/qa_32fc_magnitude_32f_aligned16.cc @@ -23,6 +23,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() { std::complex<float> input0[vlen] __attribute__ ((aligned (16))); float output_generic[vlen] __attribute__ ((aligned (16))); + float output_orc[vlen] __attribute__ ((aligned (16))); float output_sse[vlen] __attribute__ ((aligned (16))); float output_sse3[vlen] __attribute__ ((aligned (16))); @@ -41,6 +42,13 @@ void qa_32fc_magnitude_32f_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32fc_magnitude_32f_aligned16_manual(output_orc, input0, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32fc_magnitude_32f_aligned16_manual(output_sse, input0, vlen, "sse"); } end = clock(); @@ -64,6 +72,7 @@ void qa_32fc_magnitude_32f_aligned16::t1() { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse[i], fabs(output_generic[i])*1e-4); CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i])*1e-4); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_orc[i], fabs(output_generic[i])*1e-4); } } diff --git a/volk/lib/qa_32s_or_aligned16.cc b/volk/lib/qa_32s_or_aligned16.cc index e09dfb91c..9ea5283a6 100644 --- a/volk/lib/qa_32s_or_aligned16.cc +++ b/volk/lib/qa_32s_or_aligned16.cc @@ -25,6 +25,7 @@ void qa_32s_or_aligned16::t1() { int32_t output0[vlen] __attribute__ ((aligned (16))); int32_t output01[vlen] __attribute__ ((aligned (16))); + int32_t output02[vlen] __attribute__ ((aligned (16))); for(int i = 0; i < vlen; ++i) { input0[i] = ((int32_t) (rand() - (RAND_MAX/2))); @@ -41,6 +42,13 @@ void qa_32s_or_aligned16::t1() { printf("generic_time: %f\n", total); start = clock(); for(int count = 0; count < ITERS; ++count) { + volk_32s_or_aligned16_manual(output02, input0, input1, vlen, "orc"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("orc_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { volk_32s_or_aligned16_manual(output01, input0, input1, vlen, "sse"); } end = clock(); @@ -54,6 +62,7 @@ void qa_32s_or_aligned16::t1() { for(int i = 0; i < vlen; ++i) { //printf("%d...%d\n", output0[i], output01[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output02[i]); } } diff --git a/volk/orc/Makefile.am b/volk/orc/Makefile.am index c71625d87..3f105fd80 100644 --- a/volk/orc/Makefile.am +++ b/volk/orc/Makefile.am @@ -29,11 +29,15 @@ volk_8s_convert_16s_aligned16_orc_impl.orc \ volk_8s_convert_32f_aligned16_orc_impl.orc \ volk_16u_byteswap_aligned16_orc_impl.orc \ volk_32s_and_aligned16_orc_impl.orc \ +volk_32s_or_aligned16_orc_impl.orc \ volk_32f_add_aligned16_orc_impl.orc \ volk_32f_subtract_aligned16_orc_impl.orc \ volk_32f_divide_aligned16_orc_impl.orc \ volk_32f_multiply_aligned16_orc_impl.orc \ -volk_32f_sqrt_aligned16_orc_impl.orc +volk_32f_sqrt_aligned16_orc_impl.orc \ +volk_16sc_magnitude_32f_aligned16_orc_impl.orc \ +volk_32fc_magnitude_32f_aligned16_orc_impl.orc \ +volk_32fc_magnitude_16s_aligned16_orc_impl.orc diff --git a/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc new file mode 100644 index 000000000..f6c959c00 --- /dev/null +++ b/volk/orc/volk_16sc_magnitude_16s_aligned16_orc_impl.orc @@ -0,0 +1,24 @@ +.function volk_16sc_magnitude_16s_aligned16_orc_impl +.source 4 src +.dest 2 dst +.temp 2 reals +.temp 2 imags +.temp 4 reall +.temp 4 imagl +.temp 4 realf +.temp 4 imagf +.temp 4 sumf +.temp 4 rootf +.temp 4 rootl + +splitlw reals, imags, src +convwl reall, reals +convwl imagl, imags +convlf realf, reall +convlf imagf, imagl +mulf realf, realf, (1.0 / 32768.0) +mulf imagf, imagf, (1.0 / 32768.0) +addf sumf, realf, imagf +sqrtf rootf, sumf +convfl rootl, rootf +conflw dst, rootl diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc new file mode 100644 index 000000000..44654ad8e --- /dev/null +++ b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc @@ -0,0 +1,25 @@ +.function volk_16sc_magnitude_32f_aligned16_orc_impl +.source 4 src +.dest 4 dst +.floatparam 4 scalar +.temp 4 invscalar +.temp 4 reall +.temp 4 imagl +.temp 2 reals +.temp 2 imags +.temp 4 realf +.temp 4 imagf +.temp 4 sumf + +divf invscalar, 1.0, scalar +splitlw reals, imags, src +convswl reall, reals +convswl imagl, imags +convlf realf, reall +convlf imagf, imagl +mulf realf, realf, invscalar +mulf imagf, imagf, invscalar +mulf realf, realf, realf +mulf imagf, imagf, imagf +addf sumf, realf, imagf +sqrtf dst, sumf diff --git a/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc new file mode 100644 index 000000000..db8405e59 --- /dev/null +++ b/volk/orc/volk_32fc_magnitude_16s_aligned16_orc_impl.orc @@ -0,0 +1,25 @@ +.function volk_32fc_magnitude_16s_aligned16_orc_impl +.source 8 src +.dest 2 dst +.floatparam 4 scalar +.temp 4 invscalar +.temp 4 reall +.temp 4 imagl +.temp 4 realf +.temp 4 imagf +.temp 4 sumf +.temp 4 rootf +.temp 4 rootl + +divf invscalar, 1.0, scalar +splitql reall, imagl, src +convlf realf, reall +convlf imagf, imagl +mulf realf, realf, invscalar +mulf imagf, imagf, invscalar +mulf realf, realf, realf +mulf imagf, imagf, imagf +addf sumf, realf, imagf +sqrtf rootf, sumf +convfl rootl, rootf +convlw dst, rootl diff --git a/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc b/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc new file mode 100644 index 000000000..cc5c85b45 --- /dev/null +++ b/volk/orc/volk_32fc_magnitude_32f_aligned16_orc_impl.orc @@ -0,0 +1,21 @@ +.function volk_32fc_magnitude_32f_aligned16_orc_impl +.source 8 src +.dest 4 dst +.floatparam 4 scalar +.temp 4 invscalar +.temp 4 reall +.temp 4 imagl +.temp 4 realf +.temp 4 imagf +.temp 4 sumf + +divf invscalar, 1.0, scalar +splitql reall, imagl, src +convlf realf, reall +convlf imagf, imagl +mulf realf, realf, invscalar +mulf imagf, imagf, invscalar +mulf realf, realf, realf +mulf imagf, imagf, imagf +addf sumf, realf, imagf +sqrtf dst, sumf diff --git a/volk/orc/volk_32s_or_aligned16_orc_impl.orc b/volk/orc/volk_32s_or_aligned16_orc_impl.orc new file mode 100644 index 000000000..6d2a3859a --- /dev/null +++ b/volk/orc/volk_32s_or_aligned16_orc_impl.orc @@ -0,0 +1,5 @@ +.function volk_32s_or_aligned16_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +orl dst, src1, src2 |