1 files changed, 638 insertions, 0 deletions
diff --git a/gr-vocoder/lib/codec2/sine.c b/gr-vocoder/lib/codec2/sine.c
new file mode 100644
index 000000000..45cc9de71
--- /dev/null
+++ b/gr-vocoder/lib/codec2/sine.c
@@ -0,0 +1,638 @@
+/*---------------------------------------------------------------------------*\
+                                                                             
+  FILE........: sine.c
+  AUTHOR......: David Rowe                                           
+  DATE CREATED: 19/8/2010
+                                                                             
+  Sinusoidal analysis and synthesis functions.
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 1990-2010 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+				INCLUDES                                      
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "defines.h"
+#include "sine.h"
+#include "fft.h"
+
+#define HPF_BETA 0.125
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+				HEADERS                                     
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, 
+			 float pstep);
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+				FUNCTIONS                                     
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*\
+                                                       
+  FUNCTION....: make_analysis_window	     
+  AUTHOR......: David Rowe			      
+  DATE CREATED: 11/5/94 
+
+  Init function that generates the time domain analysis window and it's DFT.
+
+\*---------------------------------------------------------------------------*/
+
+void make_analysis_window(float w[],COMP W[])
+{
+  float m;
+  COMP  temp;
+  int   i,j;
+
+  /* 
+     Generate Hamming window centered on M-sample pitch analysis window
+  
+  0            M/2           M-1
+  |-------------|-------------|
+        |-------|-------|
+            NW samples
+
+     All our analysis/synthsis is centred on the M/2 sample.               
+  */
+
+  m = 0.0;
+  for(i=0; i<M/2-NW/2; i++)
+    w[i] = 0.0;
+  for(i=M/2-NW/2,j=0; i<M/2+NW/2; i++,j++) {
+    w[i] = 0.5 - 0.5*cos(TWO_PI*j/(NW-1));
+    m += w[i]*w[i];
+  }
+  for(i=M/2+NW/2; i<M; i++)
+    w[i] = 0.0;
+ 
+  /* Normalise - makes freq domain amplitude estimation straight
+     forward */
+
+  m = 1.0/sqrt(m*FFT_ENC);
+  for(i=0; i<M; i++) {
+    w[i] *= m;
+  }
+
+  /* 
+     Generate DFT of analysis window, used for later processing.  Note
+     we modulo FFT_ENC shift the time domain window w[], this makes the
+     imaginary part of the DFT W[] equal to zero as the shifted w[] is
+     even about the n=0 time axis if NW is odd.  Having the imag part
+     of the DFT W[] makes computation easier.
+
+     0                      FFT_ENC-1
+     |-------------------------|
+
+      ----\               /----
+           \             / 
+            \           /          <- shifted version of window w[n]
+             \         /
+              \       /
+               -------
+
+     |---------|     |---------|      
+       NW/2              NW/2
+  */
+
+  for(i=0; i<FFT_ENC; i++) {
+    W[i].real = 0.0;
+    W[i].imag = 0.0;
+  }
+  for(i=0; i<NW/2; i++)
+    W[i].real = w[i+M/2];
+  for(i=FFT_ENC-NW/2,j=M/2-NW/2; i<FFT_ENC; i++,j++)
+    W[i].real = w[j];
+
+  fft(&W[0].real,FFT_ENC,-1);         /* "Numerical Recipes in C" FFT */
+
+  /* 
+      Re-arrange W[] to be symmetrical about FFT_ENC/2.  Makes later 
+      analysis convenient.
+
+   Before:
+
+
+     0                 FFT_ENC-1
+     |----------|---------|
+     __                   _       
+       \                 /          
+        \_______________/      
+
+   After:
+
+     0                 FFT_ENC-1
+     |----------|---------|
+               ___                        
+              /   \                
+     ________/     \_______     
+
+  */
+       
+      
+  for(i=0; i<FFT_ENC/2; i++) {
+    temp.real = W[i].real;
+    temp.imag = W[i].imag;
+    W[i].real = W[i+FFT_ENC/2].real;
+    W[i].imag = W[i+FFT_ENC/2].imag;
+    W[i+FFT_ENC/2].real = temp.real;
+    W[i+FFT_ENC/2].imag = temp.imag;
+  }
+
+}
+
+/*---------------------------------------------------------------------------*\
+                                                       
+  FUNCTION....: hpf	     
+  AUTHOR......: David Rowe			      
+  DATE CREATED: 16 Nov 2010
+
+  High pass filter with a -3dB point of about 160Hz.
+
+    y(n) = -HPF_BETA*y(n-1) + x(n) - x(n-1)
+ 
+\*---------------------------------------------------------------------------*/
+
+float hpf(float x, float states[])
+{
+    states[0] += -HPF_BETA*states[0] + x - states[1];
+    states[1] = x;
+
+    return states[0];
+}
+
+/*---------------------------------------------------------------------------*\
+                                                       
+  FUNCTION....: dft_speech	     
+  AUTHOR......: David Rowe			      
+  DATE CREATED: 27/5/94 
+
+  Finds the DFT of the current speech input speech frame.
+
+\*---------------------------------------------------------------------------*/
+
+void dft_speech(COMP Sw[], float Sn[], float w[])
+{
+  int i;
+  
+  for(i=0; i<FFT_ENC; i++) {
+    Sw[i].real = 0.0;
+    Sw[i].imag = 0.0;
+  }
+
+  /* Centre analysis window on time axis, we need to arrange input
+     to FFT this way to make FFT phases correct */
+  
+  /* move 2nd half to start of FFT input vector */
+
+  for(i=0; i<NW/2; i++)
+    Sw[i].real = Sn[i+M/2]*w[i+M/2];
+
+  /* move 1st half to end of FFT input vector */
+
+  for(i=0; i<NW/2; i++)
+    Sw[FFT_ENC-NW/2+i].real = Sn[i+M/2-NW/2]*w[i+M/2-NW/2];
+
+  fft(&Sw[0].real,FFT_ENC,-1);
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                     
+  FUNCTION....: two_stage_pitch_refinement			
+  AUTHOR......: David Rowe
+  DATE CREATED: 27/5/94				
+
+  Refines the current pitch estimate using the harmonic sum pitch
+  estimation technique.
+
+\*---------------------------------------------------------------------------*/
+
+void two_stage_pitch_refinement(MODEL *model, COMP Sw[])
+{
+  float pmin,pmax,pstep;	/* pitch refinment minimum, maximum and step */ 
+
+  /* Coarse refinement */
+
+  pmax = TWO_PI/model->Wo + 5;
+  pmin = TWO_PI/model->Wo - 5;
+  pstep = 1.0;
+  hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
+  
+  /* Fine refinement */
+  
+  pmax = TWO_PI/model->Wo + 1;
+  pmin = TWO_PI/model->Wo - 1;
+  pstep = 0.25;
+  hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
+  
+  /* Limit range */
+  
+  if (model->Wo < TWO_PI/P_MAX)
+    model->Wo = TWO_PI/P_MAX;
+  if (model->Wo > TWO_PI/P_MIN)
+    model->Wo = TWO_PI/P_MIN;
+
+  model->L = floor(PI/model->Wo);
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                
+ FUNCTION....: hs_pitch_refinement				
+ AUTHOR......: David Rowe			
+ DATE CREATED: 27/5/94							     
+									  
+ Harmonic sum pitch refinement function.			   
+									    
+ pmin   pitch search range minimum	    
+ pmax	pitch search range maximum	    
+ step   pitch search step size		    
+ model	current pitch estimate in model.Wo  
+									    
+ model 	refined pitch estimate in model.Wo  
+									     
+\*---------------------------------------------------------------------------*/
+
+void hs_pitch_refinement(MODEL *model, COMP Sw[], float pmin, float pmax, float pstep)
+{
+  int m;		/* loop variable */
+  int b;		/* bin for current harmonic centre */
+  float E;		/* energy for current pitch*/
+  float Wo;		/* current "test" fundamental freq. */
+  float Wom;		/* Wo that maximises E */
+  float Em;		/* mamimum energy */
+  float r;		/* number of rads/bin */
+  float p;		/* current pitch */
+  
+  /* Initialisation */
+  
+  model->L = PI/model->Wo;	/* use initial pitch est. for L */
+  Wom = model->Wo;
+  Em = 0.0;
+  r = TWO_PI/FFT_ENC;
+  
+  /* Determine harmonic sum for a range of Wo values */
+
+  for(p=pmin; p<=pmax; p+=pstep) {
+    E = 0.0;
+    Wo = TWO_PI/p;
+
+    /* Sum harmonic magnitudes */
+
+    for(m=1; m<=model->L; m++) {
+      b = floor(m*Wo/r + 0.5);
+      E += Sw[b].real*Sw[b].real + Sw[b].imag*Sw[b].imag;
+    }  
+
+    /* Compare to see if this is a maximum */
+    
+    if (E > Em) {
+      Em = E;
+      Wom = Wo;
+    }
+  }
+
+  model->Wo = Wom;
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  FUNCTION....: estimate_amplitudes 			      
+  AUTHOR......: David Rowe		
+  DATE CREATED: 27/5/94			       
+									      
+  Estimates the complex amplitudes of the harmonics.    
+									      
+\*---------------------------------------------------------------------------*/
+
+void estimate_amplitudes(MODEL *model, COMP Sw[], COMP W[])
+{
+  int   i,m;		/* loop variables */
+  int   am,bm;		/* bounds of current harmonic */
+  int   b;		/* DFT bin of centre of current harmonic */
+  float den;		/* denominator of amplitude expression */
+  float r;		/* number of rads/bin */
+  int   offset;
+  COMP  Am;
+
+  r = TWO_PI/FFT_ENC;
+
+  for(m=1; m<=model->L; m++) {
+    den = 0.0;
+    am = floor((m - 0.5)*model->Wo/r + 0.5);
+    bm = floor((m + 0.5)*model->Wo/r + 0.5);
+    b = floor(m*model->Wo/r + 0.5);
+
+    /* Estimate ampltude of harmonic */
+
+    den = 0.0;
+    Am.real = Am.imag = 0.0;
+    for(i=am; i<bm; i++) {
+      den += Sw[i].real*Sw[i].real + Sw[i].imag*Sw[i].imag;
+      offset = i + FFT_ENC/2 - floor(m*model->Wo/r + 0.5);
+      Am.real += Sw[i].real*W[offset].real;
+      Am.imag += Sw[i].imag*W[offset].real;
+    }
+
+    model->A[m] = sqrt(den);
+
+    /* Estimate phase of harmonic */
+
+    model->phi[m] = atan2(Sw[b].imag,Sw[b].real);
+  }
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  est_voicing_mbe()          
+                                                                             
+  Returns the error of the MBE cost function for a fiven F0.
+
+  Note: I think a lot of the operations below can be simplified as
+  W[].imag = 0 and has been normalised such that den always equals 1.
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+float est_voicing_mbe(
+    MODEL *model,
+    COMP   Sw[],
+    COMP   W[],
+    COMP   Sw_[],         /* DFT of all voiced synthesised signal  */
+                          /* useful for debugging/dump file        */
+    COMP   Ew[],          /* DFT of error                          */
+    float prev_Wo)
+{
+    int   i,l,al,bl,m;    /* loop variables */
+    COMP  Am;             /* amplitude sample for this band */
+    int   offset;         /* centers Hw[] about current harmonic */
+    float den;            /* denominator of Am expression */
+    float error;          /* accumulated error between original and synthesised */
+    float Wo;            
+    float sig, snr;
+    float elow, ehigh, eratio;
+    float dF0, sixty;
+
+    sig = 0.0;
+    for(l=1; l<=model->L/4; l++) {
+	sig += model->A[l]*model->A[l];
+    }
+    for(i=0; i<FFT_ENC; i++) {
+	Sw_[i].real = 0.0;
+	Sw_[i].imag = 0.0;
+	Ew[i].real = 0.0;
+	Ew[i].imag = 0.0;
+    }
+
+    Wo = model->Wo;
+    error = 0.0;
+
+    /* Just test across the harmonics in the first 1000 Hz (L/4) */
+
+    for(l=1; l<=model->L/4; l++) {
+	Am.real = 0.0;
+	Am.imag = 0.0;
+	den = 0.0;
+	al = ceil((l - 0.5)*Wo*FFT_ENC/TWO_PI);
+	bl = ceil((l + 0.5)*Wo*FFT_ENC/TWO_PI);
+
+	/* Estimate amplitude of harmonic assuming harmonic is totally voiced */
+
+	for(m=al; m<bl; m++) {
+	    offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
+	    Am.real += Sw[m].real*W[offset].real + Sw[m].imag*W[offset].imag;
+	    Am.imag += Sw[m].imag*W[offset].real - Sw[m].real*W[offset].imag;
+	    den += W[offset].real*W[offset].real + W[offset].imag*W[offset].imag;
+        }
+
+        Am.real = Am.real/den;
+        Am.imag = Am.imag/den;
+
+        /* Determine error between estimated harmonic and original */
+
+        for(m=al; m<bl; m++) {
+	    offset = FFT_ENC/2 + m - l*Wo*FFT_ENC/TWO_PI + 0.5;
+	    Sw_[m].real = Am.real*W[offset].real - Am.imag*W[offset].imag;
+	    Sw_[m].imag = Am.real*W[offset].imag + Am.imag*W[offset].real;
+	    Ew[m].real = Sw[m].real - Sw_[m].real;
+	    Ew[m].imag = Sw[m].imag - Sw_[m].imag;
+	    error += Ew[m].real*Ew[m].real;
+	    error += Ew[m].imag*Ew[m].imag;
+	}
+    }
+    
+    snr = 10.0*log10(sig/error);
+    if (snr > V_THRESH)
+	model->voiced = 1;
+    else
+	model->voiced = 0;
+ 
+    /* post processing, helps clean up some voicing errors ------------------*/
+
+    /* 
+       Determine the ratio of low freancy to high frequency energy,
+       voiced speech tends to be dominated by low frequency energy,
+       unvoiced by high frequency. This measure can be used to
+       determine if we have made any gross errors.
+    */
+
+    elow = ehigh = 0.0;
+    for(l=1; l<=model->L/2; l++) {
+	elow += model->A[l]*model->A[l];
+    }
+    for(l=model->L/2; l<=model->L; l++) {
+	ehigh += model->A[l]*model->A[l];
+    }
+    eratio = 10.0*log10(elow/ehigh);
+    dF0 = 0.0;
+
+    /* Look for Type 1 errors, strongly V speech that has been
+       accidentally declared UV */
+
+    if (model->voiced == 0)
+	if (eratio > 10.0)
+	    model->voiced = 1;
+
+    /* Look for Type 2 errors, strongly UV speech that has been
+       accidentally declared V */
+
+    if (model->voiced == 1) {
+	if (eratio < -10.0)
+	    model->voiced = 0;
+
+	/* If pitch is jumping about it's likely this is UV */
+
+	dF0 = (model->Wo - prev_Wo)*FS/TWO_PI;
+	if (fabs(dF0) > 15.0) 
+	    model->voiced = 0;
+
+	/* A common source of Type 2 errors is the pitch estimator
+	   gives a low (50Hz) estimate for UV speech, which gives a
+	   good match with noise due to the close harmoonic spacing.
+	   These errors are much more common than people with 50Hz
+	   pitch, so we have just a small eratio threshold. */
+
+	sixty = 60.0*TWO_PI/FS;
+	if ((eratio < -4.0) && (model->Wo <= sixty))
+	    model->voiced = 0;
+    }
+    //printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0);
+
+    return snr;
+}
+
+/*---------------------------------------------------------------------------*\
+                                                       
+  FUNCTION....: make_synthesis_window	     
+  AUTHOR......: David Rowe			      
+  DATE CREATED: 11/5/94 
+
+  Init function that generates the trapezoidal (Parzen) sythesis window.
+
+\*---------------------------------------------------------------------------*/
+
+void make_synthesis_window(float Pn[])
+{
+  int   i;
+  float win;
+
+  /* Generate Parzen window in time domain */
+
+  win = 0.0;
+  for(i=0; i<N/2-TW; i++)
+    Pn[i] = 0.0;
+  win = 0.0;
+  for(i=N/2-TW; i<N/2+TW; win+=1.0/(2*TW), i++ )
+    Pn[i] = win;
+  for(i=N/2+TW; i<3*N/2-TW; i++)
+    Pn[i] = 1.0;
+  win = 1.0;
+  for(i=3*N/2-TW; i<3*N/2+TW; win-=1.0/(2*TW), i++)
+    Pn[i] = win;
+  for(i=3*N/2+TW; i<2*N; i++)
+    Pn[i] = 0.0;
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  FUNCTION....: synthesise 			      
+  AUTHOR......: David Rowe		
+  DATE CREATED: 20/2/95		       
+									      
+  Synthesise a speech signal in the frequency domain from the
+  sinusodal model parameters.  Uses overlap-add with a trapezoidal
+  window to smoothly interpolate betwen frames.
+									      
+\*---------------------------------------------------------------------------*/
+
+void synthesise(
+  float  Sn_[],		/* time domain synthesised signal              */
+  MODEL *model,		/* ptr to model parameters for this frame      */
+  float  Pn[],		/* time domain Parzen window                   */
+  int    shift          /* flag used to handle transition frames       */
+)
+{
+    int   i,l,j,b;	/* loop variables */
+    COMP  Sw_[FFT_DEC];	/* DFT of synthesised signal */
+
+    if (shift) {
+	/* Update memories */
+
+	for(i=0; i<N-1; i++) {
+	    Sn_[i] = Sn_[i+N];
+	}
+	Sn_[N-1] = 0.0;
+    }
+
+    for(i=0; i<FFT_DEC; i++) {
+	Sw_[i].real = 0.0;
+	Sw_[i].imag = 0.0;
+    }
+
+    /*
+      Nov 2010 - found that synthesis using time domain cos() functions
+      gives better results for synthesis frames greater than 10ms.  Inverse
+      FFT synthesis using a 512 pt FFT works well for 10ms window.  I think
+      (but am not sure) that the problem is realted to the quantisation of
+      the harmonic frequencies to the FFT bin size, e.g. there is a 
+      8000/512 Hz step between FFT bins.  For some reason this makes
+      the speech from longer frame > 10ms sound poor.  The effect can also
+      be seen when synthesising test signals like single sine waves, some
+      sort of amplitude modulation at the frame rate.
+
+      Another possibility is using a larger FFT size (1024 or 2048).
+    */
+
+#define FFT_SYNTHESIS
+#ifdef FFT_SYNTHESIS
+    /* Now set up frequency domain synthesised speech */
+    for(l=1; l<=model->L; l++) {
+	b = floor(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
+	if (b > ((FFT_DEC/2)-1)) {
+		b = (FFT_DEC/2)-1;
+	}
+	Sw_[b].real = model->A[l]*cos(model->phi[l]);
+	Sw_[b].imag = model->A[l]*sin(model->phi[l]);
+	Sw_[FFT_DEC-b].real = Sw_[b].real;
+	Sw_[FFT_DEC-b].imag = -Sw_[b].imag;
+    }
+
+    /* Perform inverse DFT */
+
+    fft(&Sw_[0].real,FFT_DEC,1);
+#else
+    /*
+       Direct time domain synthesis using the cos() function.  Works
+       well at 10ms and 20ms frames rates.  Note synthesis window is
+       still used to handle overlap-add between adjacent frames.  This
+       could be simplified as we don't need to synthesise where Pn[]
+       is zero.
+    */
+    for(l=1; l<=model->L; l++) {
+	for(i=0,j=-N+1; i<N-1; i++,j++) {
+	    Sw_[FFT_DEC-N+1+i].real += 2.0*model->A[l]*cos(j*model->Wo*l + model->phi[l]);
+	}
+ 	for(i=N-1,j=0; i<2*N; i++,j++)
+	    Sw_[j].real += 2.0*model->A[l]*cos(j*model->Wo*l + model->phi[l]);
+    }	
+#endif
+
+    /* Overlap add to previous samples */
+
+    for(i=0; i<N-1; i++) {
+	Sn_[i] += Sw_[FFT_DEC-N+1+i].real*Pn[i];
+    }
+
+    if (shift)
+	for(i=N-1,j=0; i<2*N; i++,j++)
+	    Sn_[i] = Sw_[j].real*Pn[i];
+    else
+	for(i=N-1,j=0; i<2*N; i++,j++)
+	    Sn_[i] += Sw_[j].real*Pn[i];
+}
+