summaryrefslogtreecommitdiff
path: root/volk/include/volk/volk_16sc_deinterleave_real_8s_aligned16.h
blob: c0d1e941a9079dc22478672890a648f4c98a131a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#ifndef INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H
#define INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H

#include <inttypes.h>
#include <stdio.h>

#if LV_HAVE_SSSE3
#include <tmmintrin.h>
/*!
  \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
  \param complexVector The complex input vector
  \param iBuffer The I buffer output data
  \param num_points The number of complex data values to be deinterleaved
*/
static inline void volk_16sc_deinterleave_real_8s_aligned16_ssse3(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
  unsigned int number = 0;
  const int8_t* complexVectorPtr = (int8_t*)complexVector;
  int8_t* iBufferPtr = iBuffer;
  __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
  __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
  __m128i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;

  unsigned int sixteenthPoints = num_points / 16;

  for(number = 0; number < sixteenthPoints; number++){
    complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 16;
    complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 16;

    complexVal3 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 16;
    complexVal4 = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr += 16;

    complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
    complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);

    complexVal1 = _mm_or_si128(complexVal1, complexVal2);

    complexVal3 = _mm_shuffle_epi8(complexVal3, iMoveMask1);
    complexVal4 = _mm_shuffle_epi8(complexVal4, iMoveMask2);

    complexVal3 = _mm_or_si128(complexVal3, complexVal4);


    complexVal1 = _mm_srai_epi16(complexVal1, 8);
    complexVal3 = _mm_srai_epi16(complexVal3, 8);

    iOutputVal = _mm_packs_epi16(complexVal1, complexVal3);

    _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);

    iBufferPtr += 16;
  }

  number = sixteenthPoints * 16;
  int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
  for(; number < num_points; number++){
    *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ / 256));
    int16ComplexVectorPtr++;
  }
}
#endif /* LV_HAVE_SSSE3 */

#if LV_HAVE_GENERIC
/*!
  \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
  \param complexVector The complex input vector
  \param iBuffer The I buffer output data
  \param num_points The number of complex data values to be deinterleaved
*/
static inline void volk_16sc_deinterleave_real_8s_aligned16_generic(int8_t* iBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
  unsigned int number = 0;
  const int16_t* complexVectorPtr = (int16_t*)complexVector;
  int8_t* iBufferPtr = iBuffer;
  for(number = 0; number < num_points; number++){
    *iBufferPtr++ = (int8_t)(*complexVectorPtr++ / 256);
    complexVectorPtr++;
  }
}
#endif /* LV_HAVE_GENERIC */




#endif /* INCLUDED_VOLK_16sc_DEINTERLEAVE_REAL_8s_ALIGNED16_H */