1 files changed, 469 insertions, 0 deletions
diff --git a/gr-vocoder/lib/codec2/c2sim.c b/gr-vocoder/lib/codec2/c2sim.c
new file mode 100644
index 000000000..bb49c7899
--- /dev/null
+++ b/gr-vocoder/lib/codec2/c2sim.c
@@ -0,0 +1,469 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: c2sim.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 20/8/2010
+
+  Codec2 simulation.  Combines encoder and decoder and allows switching in 
+  out various algorithms and quantisation steps. 
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+
+#include "defines.h"
+#include "sine.h"
+#include "nlp.h"
+#include "dump.h"
+#include "lpc.h"
+#include "lsp.h"
+#include "quantise.h"
+#include "phase.h"
+#include "postfilter.h"
+#include "interp.h"
+
+/*---------------------------------------------------------------------------*\
+                                                                             
+ switch_present()                                                            
+                                                                             
+ Searches the command line arguments for a "switch".  If the switch is       
+ found, returns the command line argument where it ws found, else returns    
+ NULL.                                                                       
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+int switch_present(sw,argc,argv)
+register char sw[];     /* switch in string form */
+register int argc;      /* number of command line arguments */
+register char *argv[];  /* array of command line arguments in string form */
+{
+  register int i;       /* loop variable */
+
+  for(i=1; i<argc; i++)
+    if (!strcmp(sw,argv[i]))
+      return(i);
+
+  return 0;
+}
+
+void synth_one_frame(short buf[], MODEL *model, float Sn_[], float Pn[]);
+
+/*---------------------------------------------------------------------------*\
+                                                                          
+				MAIN                                        
+                                                                         
+\*---------------------------------------------------------------------------*/
+
+int main(int argc, char *argv[])
+{
+  FILE *fout;		/* output speech file                    */
+  FILE *fin;		/* input speech file                     */
+  short buf[N];		/* input/output buffer                   */
+  float Sn[M];	        /* float input speech samples            */
+  COMP  Sw[FFT_ENC];	/* DFT of Sn[]                           */
+  float w[M];	        /* time domain hamming window            */
+  COMP  W[FFT_ENC];	/* DFT of w[]                            */
+  MODEL model;
+  float Pn[2*N];	/* trapezoidal synthesis window          */
+  float Sn_[2*N];	/* synthesised speech */
+  int   i;		/* loop variable                         */
+  int   frames;
+  float prev_Wo;
+  float pitch;
+  int   voiced1 = 0;
+
+  char  out_file[MAX_STR];
+  int   arg;
+  float snr;
+  float sum_snr;
+
+  int lpc_model, order = LPC_ORD;
+  int lsp, lspd, lspdvq, lsp_quantiser;
+  float ak[LPC_MAX];
+  COMP  Sw_[FFT_ENC];
+  COMP  Ew[FFT_ENC]; 
+ 
+  int dump;
+  
+  int phase0;
+  float ex_phase[MAX_AMP+1];
+
+  int   postfilt;
+  float bg_est;
+
+  int   hand_voicing;
+  FILE *fvoicing = 0;
+
+  MODEL prev_model, interp_model;
+  int decimate;
+  float lsps[LPC_ORD];
+  float prev_lsps[LPC_ORD];
+  float e, prev_e;
+  float ak_interp[LPC_MAX];
+
+  void *nlp_states;
+  float hpf_states[2];
+  int   resample;
+  float AresdB_prev[MAX_AMP];
+
+  for(i=0; i<MAX_AMP; i++)
+      AresdB_prev[i] = 0.0; 
+
+  for(i=0; i<M; i++)
+      Sn[i] = 1.0;
+  for(i=0; i<2*N; i++)
+      Sn_[i] = 0;
+
+  prev_Wo = TWO_PI/P_MAX;
+
+  prev_model.Wo = TWO_PI/P_MIN;
+  prev_model.L = floor(PI/prev_model.Wo);
+  for(i=1; i<=prev_model.L; i++) {
+      prev_model.A[i] = 0.0;
+      prev_model.phi[i] = 0.0;
+  }
+  for(i=1; i<=MAX_AMP; i++) {
+      ex_phase[i] = 0.0;
+  }
+  for(i=0; i<LPC_ORD; i++) {
+      prev_lsps[i] = i*PI/(LPC_ORD+1);
+  }
+  e = prev_e = 1;
+  hpf_states[0] = hpf_states[1] = 0.0;
+
+  nlp_states = nlp_create();
+
+  if (argc < 2) {
+    fprintf(stderr, "\nCodec2 - 2400 bit/s speech codec - Simulation Program\n"
+     "\thttp://rowetel.com/codec2.html\n\n"
+     "usage: %s InputFile [-o OutputFile]\n"
+     "\t[--lpc Order]\n"
+     "\t[--lsp]\n"
+     "\t[--lspd]\n"
+     "\t[--lspdvq]\n"
+     "\t[--phase0]\n"
+     "\t[--postfilter]\n"
+     "\t[--hand_voicing]\n"
+     "\t[--dec]\n"
+     "\t[--dump DumpFilePrefix]\n", argv[0]);
+    exit(1);
+  }
+
+  /* Interpret command line arguments -------------------------------------*/
+
+  /* Input file */
+
+  if ((fin = fopen(argv[1],"rb")) == NULL) {
+    fprintf(stderr, "Error opening input speech file: %s: %s.\n",
+     argv[1], strerror(errno));
+    exit(1);
+  }
+
+  /* Output file */
+
+  if ((arg = switch_present("-o",argc,argv))) {
+    if ((fout = fopen(argv[arg+1],"wb")) == NULL) {
+      fprintf(stderr, "Error opening output speech file: %s: %s.\n",
+       argv[arg+1], strerror(errno));
+      exit(1);
+    }
+    strcpy(out_file,argv[arg+1]);
+  }
+  else
+    fout = NULL;
+
+  lpc_model = 0;
+  if ((arg = switch_present("--lpc",argc,argv))) {
+      lpc_model = 1;
+      order = atoi(argv[arg+1]);
+      if ((order < 4) || (order > 20)) {
+        fprintf(stderr, "Error in lpc order: %d\n", order);
+        exit(1);
+      }	  
+  }
+
+  dump = switch_present("--dump",argc,argv);
+#ifdef DUMP
+  if (dump) 
+      dump_on(argv[dump+1]);
+#endif
+
+  lsp = switch_present("--lsp",argc,argv);
+  lsp_quantiser = 0;
+  if (lsp)
+      assert(order == LPC_ORD);
+
+  lspd = switch_present("--lspd",argc,argv);
+  if (lspd)
+      assert(order == LPC_ORD);
+
+  lspdvq = switch_present("--lspdvq",argc,argv);
+  if (lspdvq)
+      assert(order == LPC_ORD);
+
+  phase0 = switch_present("--phase0",argc,argv);
+  if (phase0) {
+      ex_phase[0] = 0;
+  }
+
+  hand_voicing = switch_present("--hand_voicing",argc,argv);
+  if (hand_voicing) {
+      fvoicing = fopen(argv[hand_voicing+1],"rt");
+      assert(fvoicing != NULL);
+  }
+
+  bg_est = 0.0;
+  postfilt = switch_present("--postfilter",argc,argv);
+
+  decimate = switch_present("--dec",argc,argv);
+
+  arg = switch_present("--resample",argc,argv);
+  resample = atoi(argv[arg+1]);
+
+  /* Initialise ------------------------------------------------------------*/
+
+  make_analysis_window(w,W);
+  make_synthesis_window(Pn);
+  quantise_init();
+
+  /* Main loop ------------------------------------------------------------*/
+
+  frames = 0;
+  sum_snr = 0;
+  while(fread(buf,sizeof(short),N,fin)) {
+    frames++;
+    //printf("frame: %d", frames);
+
+    /* Read input speech */
+
+    for(i=0; i<M-N; i++)
+      Sn[i] = Sn[i+N];
+    for(i=0; i<N; i++) {
+	//Sn[i+M-N] = hpf((float)buf[i], hpf_states);
+	Sn[i+M-N] = (float)buf[i];
+    }
+ 
+    /* Estimate pitch */
+
+    nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
+    model.Wo = TWO_PI/pitch;
+
+    /* estimate model parameters */
+
+    dft_speech(Sw, Sn, w); 
+    two_stage_pitch_refinement(&model, Sw);
+    estimate_amplitudes(&model, Sw, W);
+#ifdef DUMP 
+    dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
+#endif
+
+    /* optional zero-phase modelling */
+
+    if (phase0) {
+	float Wn[M];		        /* windowed speech samples */
+	float Rk[LPC_MAX+1];	        /* autocorrelation coeffs  */
+  	
+#ifdef DUMP
+	dump_phase(&model.phi[0], model.L);
+#endif
+
+	/* find aks here, these are overwritten if LPC modelling is enabled */
+
+	for(i=0; i<M; i++)
+	    Wn[i] = Sn[i]*w[i];
+	autocorrelate(Wn,Rk,M,order);
+	levinson_durbin(Rk,ak,order);
+
+#ifdef DUMP
+	dump_ak(ak, LPC_ORD);
+#endif
+	
+	/* determine voicing */
+
+	snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew, prev_Wo);
+#ifdef DUMP
+	dump_Sw_(Sw_);
+	dump_Ew(Ew);
+	dump_snr(snr);
+#endif
+
+	/* just to make sure we are not cheating - kill all phases */
+
+	for(i=0; i<MAX_AMP; i++)
+	    model.phi[i] = 0;
+	
+	if (hand_voicing) {
+	    fscanf(fvoicing,"%d\n",&model.voiced);
+	}
+    }
+
+    /* optional LPC model amplitudes */
+
+    if (lpc_model) {
+	int   lsp_indexes[LPC_MAX];
+
+	e = speech_to_uq_lsps(lsps, ak, Sn, w, order);
+
+	if (lsp) {
+	    encode_lsps(lsp_indexes, lsps, LPC_ORD);
+	    decode_lsps(lsps, lsp_indexes, LPC_ORD);
+	    bw_expand_lsps(lsps, LPC_ORD);
+	    lsp_to_lpc(lsps, ak, LPC_ORD);
+	}
+
+	if (lspd) {
+	    float lsps_[LPC_ORD];
+
+	    lspd_quantise(lsps, lsps_, LPC_ORD);
+	    lsp_to_lpc(lsps_, ak, LPC_ORD);
+ 	}
+
+	if (lspdvq) {
+	    float lsps_[LPC_ORD];
+
+	    lspdvq_quantise(lsps, lsps_, LPC_ORD);
+	    lsp_to_lpc(lsps_, ak, LPC_ORD);
+ 	}
+
+	e = decode_energy(encode_energy(e));
+	model.Wo = decode_Wo(encode_Wo(model.Wo));
+
+	aks_to_M2(ak, order, &model, e, &snr, 1); 
+	apply_lpc_correction(&model);
+	sum_snr += snr;
+#ifdef DUMP
+        dump_quantised_model(&model);
+#endif
+    }
+
+    /* optional resampling of model amplitudes */
+
+    printf("frames=%d\n", frames);
+    if (resample) {
+	snr = resample_amp_nl(&model, resample, AresdB_prev);
+	sum_snr += snr;
+#ifdef DUMP
+        dump_quantised_model(&model);
+#endif
+    }
+
+    /* option decimation to 20ms rate, which enables interpolation
+       routine to synthesise in between frame */
+  
+    if (decimate) {
+	if (!phase0) {
+	    printf("needs --phase0 to resample phase for interpolated Wo\n");
+	    exit(0);
+	}
+	if (!lpc_model) {
+	    printf("needs --lpc 10 to resample amplitudes\n");
+	    exit(0);
+	}
+
+	/* odd frame - interpolate */
+
+	if (frames%2) {
+
+	    interp_model.voiced = voiced1;
+
+	    #ifdef LOG_LIN_INTERP
+	    interpolate(&interp_model, &prev_model, &model);
+	    #else
+	    interpolate_lsp(&interp_model, &prev_model, &model,
+			    prev_lsps, prev_e, lsps, e, ak_interp);
+	    apply_lpc_correction(&interp_model);
+	    #endif
+	    
+	    if (phase0)
+		phase_synth_zero_order(&interp_model, ak_interp, ex_phase,
+				       order);	
+	    if (postfilt)
+		postfilter(&interp_model, &bg_est);
+	    synth_one_frame(buf, &interp_model, Sn_, Pn);
+	    if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
+
+	    if (phase0)
+		phase_synth_zero_order(&model, ak, ex_phase, order);	
+	    if (postfilt)
+		postfilter(&model, &bg_est);
+	    synth_one_frame(buf, &model, Sn_, Pn);
+	    if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
+
+	    prev_model = model;
+	    for(i=0; i<LPC_ORD; i++)
+		prev_lsps[i] = lsps[i];
+	    prev_e = e;
+	}
+	else {
+	    voiced1 = model.voiced;
+	}
+    }
+    else {
+	if (phase0)
+	    phase_synth_zero_order(&model, ak, ex_phase, order);	
+	if (postfilt)
+	    postfilter(&model, &bg_est);
+	synth_one_frame(buf, &model, Sn_, Pn);
+	if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
+    }
+    prev_Wo = TWO_PI/pitch;
+  }
+  fclose(fin);
+
+  if (fout != NULL)
+    fclose(fout);
+
+  if (lpc_model || resample)
+      printf("SNR av = %5.2f dB\n", sum_snr/frames);
+
+#ifdef DUMP
+  if (dump)
+      dump_off();
+#endif
+
+  if (hand_voicing)
+    fclose(fvoicing);
+
+  nlp_destroy(nlp_states);
+
+  return 0;
+}
+
+void synth_one_frame(short buf[], MODEL *model, float Sn_[], float Pn[])
+{
+    int     i;
+
+    synthesise(Sn_, model, Pn, 1);
+
+    for(i=0; i<N; i++) {
+	if (Sn_[i] > 32767.0)
+	    buf[i] = 32767;
+	else if (Sn_[i] < -32767.0)
+	    buf[i] = -32767;
+	else
+	    buf[i] = Sn_[i];
+    }
+
+}