summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volk/apps/volk_profile.cc2
-rw-r--r--volk/include/volk/Makefile.am5
-rw-r--r--volk/include/volk/volk_32fc_conjugate_32fc_a.h64
-rw-r--r--volk/include/volk/volk_32fc_conjugate_32fc_u.h64
-rw-r--r--volk/lib/testqa.cc2
5 files changed, 136 insertions, 1 deletions
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 6ba7f17bb..0da21ffa5 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -58,6 +58,8 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results);
VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results);
VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results);
diff --git a/volk/include/volk/Makefile.am b/volk/include/volk/Makefile.am
index d071f18f2..f6b5835b1 100644
--- a/volk/include/volk/Makefile.am
+++ b/volk/include/volk/Makefile.am
@@ -134,4 +134,7 @@ volkinclude_HEADERS = \
volk_8i_convert_16i_a.h \
volk_8i_convert_16i_u.h \
volk_8i_s32f_convert_32f_a.h \
- volk_8i_s32f_convert_32f_u.h
+ volk_8i_s32f_convert_32f_u.h \
+ volk_32fc_conjugate_32fc_a.h \
+ volk_32fc_conjugate_32fc_u.h
+
diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_conjugate_32fc_a.h
new file mode 100644
index 000000000..1518af9be
--- /dev/null
+++ b/volk/include/volk/volk_32fc_conjugate_32fc_a.h
@@ -0,0 +1,64 @@
+#ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
+#define INCLUDED_volk_32fc_conjugate_32fc_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
+
+ x = _mm_xor_ps(x, conjugator); // conjugate register
+
+ _mm_store_ps((float*)c,x); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = lv_conj(*a);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = lv_conj(*aPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_conjugate_32fc_u.h
new file mode 100644
index 000000000..b26fe0789
--- /dev/null
+++ b/volk/include/volk/volk_32fc_conjugate_32fc_u.h
@@ -0,0 +1,64 @@
+#ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
+#define INCLUDED_volk_32fc_conjugate_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
+
+ x = _mm_xor_ps(x, conjugator); // conjugate register
+
+ _mm_storeu_ps((float*)c,x); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = lv_conj(*a);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_32fc_conjugate_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = lv_conj(*aPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index fdd3d4853..593087f85 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -93,6 +93,8 @@ VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_u, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1);