summaryrefslogtreecommitdiff
path: root/volk/kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
blob: 9e10217a0fcfeef58fb1ccae7f29a33788385483 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H

#include <volk/volk_common.h>
#include <inttypes.h>
#include <stdio.h>

#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
  \brief Deinterleaves the complex vector, multiply the value by the scalar, convert to 16t, and in I vector data
  \param complexVector The complex input vector
  \param scalar The value to be multiply against each of the input values
  \param iBuffer The I buffer output data
  \param num_points The number of complex data values to be deinterleaved
*/
static inline void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
  unsigned int number = 0;
  const unsigned int quarterPoints = num_points / 4;

  const float* complexVectorPtr = (float*)complexVector;
  int16_t* iBufferPtr = iBuffer;

  __m128 vScalar = _mm_set_ps1(scalar);

  __m128 cplxValue1, cplxValue2, iValue;

  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];

  for(;number < quarterPoints; number++){
    cplxValue1 = _mm_load_ps(complexVectorPtr);
    complexVectorPtr += 4;

    cplxValue2 = _mm_load_ps(complexVectorPtr);
    complexVectorPtr += 4;

    // Arrange in i1i2i3i4 format
    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));

    iValue = _mm_mul_ps(iValue, vScalar);

    _mm_store_ps(floatBuffer, iValue);
    *iBufferPtr++ = (int16_t)(floatBuffer[0]);
    *iBufferPtr++ = (int16_t)(floatBuffer[1]);
    *iBufferPtr++ = (int16_t)(floatBuffer[2]);
    *iBufferPtr++ = (int16_t)(floatBuffer[3]);
  }

  number = quarterPoints * 4;
  iBufferPtr = &iBuffer[number];
  for(; number < num_points; number++){
    *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
    complexVectorPtr++;
  }
}
#endif /* LV_HAVE_SSE */

#ifdef LV_HAVE_GENERIC
/*!
  \brief Deinterleaves the complex vector, multiply the value by the scalar, convert to 16t, and in I vector data
  \param complexVector The complex input vector
  \param scalar The value to be multiply against each of the input values
  \param iBuffer The I buffer output data
  \param num_points The number of complex data values to be deinterleaved
*/
static inline void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t* iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int num_points){
  const float* complexVectorPtr = (float*)complexVector;
  int16_t* iBufferPtr = iBuffer;
  unsigned int number = 0;
  for(number = 0; number < num_points; number++){
    *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
    complexVectorPtr++;
  }

}
#endif /* LV_HAVE_GENERIC */




#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */