1 files changed, 364 insertions, 0 deletions
diff --git a/gr-vocoder/lib/codec2/nlp.c b/gr-vocoder/lib/codec2/nlp.c
new file mode 100644
index 000000000..42ae90919
--- /dev/null
+++ b/gr-vocoder/lib/codec2/nlp.c
@@ -0,0 +1,364 @@
+/*---------------------------------------------------------------------------*\
+                                                 
+  FILE........: nlp.c                                                   
+  AUTHOR......: David Rowe                                      
+  DATE CREATED: 23/3/93                                    
+                                                         
+  Non Linear Pitch (NLP) estimation functions.			  
+                                                               
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "defines.h"
+#include "nlp.h"
+#include "dump.h"
+#include "fft.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+ 				DEFINES                                       
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+#define PMAX_M      600		/* maximum NLP analysis window size     */
+#define COEFF       0.95	/* notch filter parameter               */
+#define PE_FFT_SIZE 512		/* DFT size for pitch estimation        */
+#define DEC         5		/* decimation factor                    */
+#define SAMPLE_RATE 8000
+#define PI          3.141592654	/* mathematical constant                */
+#define T           0.1         /* threshold for local minima candidate */
+#define F0_MAX      500
+#define CNLP        0.3	        /* post processor constant              */
+#define NLP_NTAP 48	        /* Decimation LPF order */
+
+/*---------------------------------------------------------------------------*\
+                                                                            
+ 				GLOBALS                                       
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+/* 48 tap 600Hz low pass FIR filter coefficients */
+
+const float nlp_fir[] = {
+  -1.0818124e-03,
+  -1.1008344e-03,
+  -9.2768838e-04,
+  -4.2289438e-04,
+   5.5034190e-04,
+   2.0029849e-03,
+   3.7058509e-03,
+   5.1449415e-03,
+   5.5924666e-03,
+   4.3036754e-03,
+   8.0284511e-04,
+  -4.8204610e-03,
+  -1.1705810e-02,
+  -1.8199275e-02,
+  -2.2065282e-02,
+  -2.0920610e-02,
+  -1.2808831e-02,
+   3.2204775e-03,
+   2.6683811e-02,
+   5.5520624e-02,
+   8.6305944e-02,
+   1.1480192e-01,
+   1.3674206e-01,
+   1.4867556e-01,
+   1.4867556e-01,
+   1.3674206e-01,
+   1.1480192e-01,
+   8.6305944e-02,
+   5.5520624e-02,
+   2.6683811e-02,
+   3.2204775e-03,
+  -1.2808831e-02,
+  -2.0920610e-02,
+  -2.2065282e-02,
+  -1.8199275e-02,
+  -1.1705810e-02,
+  -4.8204610e-03,
+   8.0284511e-04,
+   4.3036754e-03,
+   5.5924666e-03,
+   5.1449415e-03,
+   3.7058509e-03,
+   2.0029849e-03,
+   5.5034190e-04,
+  -4.2289438e-04,
+  -9.2768838e-04,
+  -1.1008344e-03,
+  -1.0818124e-03
+};
+
+typedef struct {
+    float sq[PMAX_M];	     /* squared speech samples */
+    float mem_x,mem_y;       /* memory for notch filter */
+    float mem_fir[NLP_NTAP]; /* decimation FIR filter memory */
+} NLP;
+
+float post_process_mbe(COMP Fw[], int pmin, int pmax, float gmax);
+float post_process_sub_multiples(COMP Fw[], 
+				 int pmin, int pmax, float gmax, int gmax_bin,
+				 float *prev_Wo);
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  nlp_create()                                                                  
+                                                                             
+  Initialisation function for NLP pitch estimator.
+
+\*---------------------------------------------------------------------------*/
+
+void *nlp_create()
+{
+    NLP *nlp;
+    int  i;
+
+    nlp = (NLP*)malloc(sizeof(NLP));
+    if (nlp == NULL)
+	return NULL;
+
+    for(i=0; i<PMAX_M; i++)
+	nlp->sq[i] = 0.0;
+    nlp->mem_x = 0.0;
+    nlp->mem_y = 0.0;
+    for(i=0; i<NLP_NTAP; i++)
+	nlp->mem_fir[i] = 0.0;
+
+    return (void*)nlp;
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  nlp_destory()
+                                                                             
+  Initialisation function for NLP pitch estimator.
+
+\*---------------------------------------------------------------------------*/
+
+void nlp_destroy(void *nlp_state)
+{
+    assert(nlp_state != NULL);
+    free(nlp_state);
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  nlp()                                                                  
+                                                                             
+  Determines the pitch in samples using the Non Linear Pitch (NLP)
+  algorithm [1]. Returns the fundamental in Hz.  Note that the actual
+  pitch estimate is for the centre of the M sample Sn[] vector, not
+  the current N sample input vector.  This is (I think) a delay of 2.5
+  frames with N=80 samples.  You should align further analysis using
+  this pitch estimate to be centred on the middle of Sn[].
+
+  Two post processors have been tried, the MBE version (as discussed
+  in [1]), and a post processor that checks sub-multiples.  Both
+  suffer occasional gross pitch errors (i.e. neither are perfect).  In
+  the presence of background noise the sub-multiple algorithm tends
+  towards low F0 which leads to better sounding background noise than
+  the MBE post processor.
+
+  A good way to test and develop the NLP pitch estimator is using the
+  tnlp (codec2/unittest) and the codec2/octave/plnlp.m Octave script.
+
+  A pitch tracker searching a few frames forward and backward in time
+  would be a useful addition.
+
+  References:
+
+    [1] http://www.itr.unisa.edu.au/~steven/thesis/dgr.pdf Chapter 4
+                                                              
+\*---------------------------------------------------------------------------*/
+
+float nlp(
+  void *nlp_state, 
+  float  Sn[],			/* input speech vector */
+  int    n,			/* frames shift (no. new samples in Sn[]) */
+  int    m,			/* analysis window size */
+  int    pmin,			/* minimum pitch value */
+  int    pmax,			/* maximum pitch value */
+  float *pitch,			/* estimated pitch period in samples */
+  COMP   Sw[],                  /* Freq domain version of Sn[] */
+  float *prev_Wo
+)
+{
+    NLP   *nlp;
+    float  notch;		    /* current notch filter output */
+    COMP   Fw[PE_FFT_SIZE];	    /* DFT of squared signal */
+    float  gmax;
+    int    gmax_bin;
+    int   i,j;
+    float best_f0;
+
+    assert(nlp_state != NULL);
+    nlp = (NLP*)nlp_state;
+
+    /* Square, notch filter at DC, and LP filter vector */
+
+    for(i=m-n; i<M; i++) 	    /* square latest speech samples */
+	nlp->sq[i] = Sn[i]*Sn[i];
+
+    for(i=m-n; i<m; i++) {	/* notch filter at DC */
+	notch = nlp->sq[i] - nlp->mem_x;
+	notch += COEFF*nlp->mem_y;
+	nlp->mem_x = nlp->sq[i];
+	nlp->mem_y = notch;
+	nlp->sq[i] = notch;
+    }
+
+    for(i=m-n; i<m; i++) {	/* FIR filter vector */
+
+	for(j=0; j<NLP_NTAP-1; j++)
+	    nlp->mem_fir[j] = nlp->mem_fir[j+1];
+	nlp->mem_fir[NLP_NTAP-1] = nlp->sq[i];
+
+	nlp->sq[i] = 0.0;
+	for(j=0; j<NLP_NTAP; j++)
+	    nlp->sq[i] += nlp->mem_fir[j]*nlp_fir[j];
+    }
+
+    /* Decimate and DFT */
+
+    for(i=0; i<PE_FFT_SIZE; i++) {
+	Fw[i].real = 0.0;
+	Fw[i].imag = 0.0;
+    }
+    for(i=0; i<m/DEC; i++) {
+	Fw[i].real = nlp->sq[i*DEC]*(0.5 - 0.5*cos(2*PI*i/(m/DEC-1)));
+    }
+#ifdef DUMP
+    dump_dec(Fw);
+#endif
+    fft(&Fw[0].real,PE_FFT_SIZE,1);
+    for(i=0; i<PE_FFT_SIZE; i++)
+	Fw[i].real = Fw[i].real*Fw[i].real + Fw[i].imag*Fw[i].imag;
+
+#ifdef DUMP
+    dump_sq(nlp->sq);
+    dump_Fw(Fw);
+#endif
+
+    /* find global peak */
+
+    gmax = 0.0;
+    gmax_bin = PE_FFT_SIZE*DEC/pmax;
+    for(i=PE_FFT_SIZE*DEC/pmax; i<=PE_FFT_SIZE*DEC/pmin; i++) {
+	if (Fw[i].real > gmax) {
+	    gmax = Fw[i].real;
+	    gmax_bin = i;
+	}
+    }
+
+    best_f0 = post_process_sub_multiples(Fw, pmin, pmax, gmax, gmax_bin, 
+					 prev_Wo);
+
+    /* Shift samples in buffer to make room for new samples */
+
+    for(i=0; i<m-n; i++)
+	nlp->sq[i] = nlp->sq[i+n];
+
+    /* return pitch and F0 estimate */
+
+    *pitch = (float)SAMPLE_RATE/best_f0;
+    return(best_f0);  
+}
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+  post_process_sub_multiples() 
+                                                                           
+  Given the global maximma of Fw[] we search interger submultiples for
+  local maxima.  If local maxima exist and they are above an
+  experimentally derived threshold (OK a magic number I pulled out of
+  the air) we choose the submultiple as the F0 estimate.
+
+  The rational for this is that the lowest frequency peak of Fw[]
+  should be F0, as Fw[] can be considered the autocorrelation function
+  of Sw[] (the speech spectrum).  However sometimes due to phase
+  effects the lowest frequency maxima may not be the global maxima.
+
+  This works OK in practice and favours low F0 values in the presence
+  of background noise which means the sinusoidal codec does an OK job
+  of synthesising the background noise.  High F0 in background noise
+  tends to sound more periodic introducing annoying artifacts.
+
+\*---------------------------------------------------------------------------*/
+
+float post_process_sub_multiples(COMP Fw[], 
+				 int pmin, int pmax, float gmax, int gmax_bin,
+				 float *prev_Wo)
+{
+    int   min_bin, cmax_bin;
+    int   mult;
+    float thresh, best_f0;
+    int   b, bmin, bmax, lmax_bin;
+    float lmax, cmax;
+    int   prev_f0_bin;
+
+    /* post process estimate by searching submultiples */
+
+    mult = 2;
+    min_bin = PE_FFT_SIZE*DEC/pmax;
+    cmax_bin = gmax_bin;
+    prev_f0_bin = *prev_Wo*(4000.0/PI)*(PE_FFT_SIZE*DEC)/SAMPLE_RATE;
+
+    while(gmax_bin/mult >= min_bin) {
+
+	b = gmax_bin/mult;			/* determine search interval */
+	bmin = 0.8*b;
+	bmax = 1.2*b;
+	if (bmin < min_bin)
+	    bmin = min_bin;
+
+	/* lower threshold to favour previous frames pitch estimate,
+	    this is a form of pitch tracking */
+
+	if ((prev_f0_bin > bmin) && (prev_f0_bin < bmax))
+	    thresh = CNLP*0.5*gmax;
+	else
+	    thresh = CNLP*gmax;
+
+	lmax = 0;
+	lmax_bin = bmin;
+	for (b=bmin; b<=bmax; b++) 		/* look for maximum in interval */
+	    if (Fw[b].real > lmax) {
+		lmax = Fw[b].real;
+		lmax_bin = b;
+	    }
+
+	if (lmax > thresh)
+	    if ((lmax > Fw[lmax_bin-1].real) && (lmax > Fw[lmax_bin+1].real)) {
+		cmax = lmax;
+		cmax_bin = lmax_bin;
+	    }
+
+	mult++;
+    }
+
+    best_f0 = (float)cmax_bin*SAMPLE_RATE/(PE_FFT_SIZE*DEC);
+
+    return best_f0;
+}
+