/***************************************************************************/ /* Part of speech segregation program for the 1999 IEEE Trans. Neural Net. */ /* paper by D.L. Wang and G.J. Brown. */ /* This part performs resynthesis based on segregated streams (binary */ /* masks). It takes a segregated stream (a binary mask) and the original */ /* mixture and produces the corresponding segregated signal. */ /***************************************************************************/ #include #include #include /* booleans */ #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif /* auditory filterbank constants */ #define MAX_CHANNEL 150 /* maxmimum number of filters */ #define BW_CORRECTION 1.019 /* ERB bandwidth correction 4th order */ #define SAMPLING_FREQUENCY 16000 /* Hz */ #define MAX_WINDOW 320 /* use a window of 20 ms */ #define OFFSET 160 /* offset 10 ms */ #define MAX_SIGNAL 100000 #define MAX_FRAME 700 #define HWIN MAX_WINDOW/2.0 /* frequency scale definitions from Moore and Glasberg 1990 */ #define erb(f) (24.7*(4.37e-3*(f)+1.0)) #define hzToERBrate(f) (21.4*log10(4.37e-3*(f)+1.0)) #define ERBrateToHz(f) ((pow(10.0,((f)/21.4))-1.0)/4.37e-3) #define sqr(x) ((x)*(x)) typedef struct { double cf, bw, criticalRate, z, gain, expCoeff; double p0, p1, p2, p3, p4; double q0, q1, q2, q3, q4; double u0, u1; double v0, v1; } channel; /* function prototypes */ void help(void); /* UNIX help */ void blip(void); /* write dots on stderr */ void initChannels(int lowerCF, int upperCF, int numChannels); /* initialise filterbank channels */ double updateCochlea(channel *c, float sigval, int tim); /* process one sample of the input though the cochlea */ int msToSamples(float ms); /* converts time in ms to samples at srate */ /* global variables */ channel cochlea[MAX_CHANNEL]; float t, dt, twoPi, twoPiT; float mask[MAX_FRAME][MAX_CHANNEL]; /* function declarations */ void help(void) { fprintf(stderr,"-l int lowest filter centre frequency (Hz) (500)\n"); fprintf(stderr,"-u int highest filter centre frequency (Hz) (2000)\n"); fprintf(stderr,"-n int number of channels (32)\n"); fprintf(stderr,"-a string name of left input file\n"); fprintf(stderr,"-b string name of right input file\n"); fprintf(stderr,"-d float buffer decay time in ms (20.0)\n"); fprintf(stderr,"-v bool verbose output (FALSE)\n"); } int readSignal(float s[]) { int sample=0; int i; for (i=0; i32) count=0; } float DBtoAmplitude(float dB) { return pow(10.0,(dB/20.0)); } void initChannels(int lowerCF, int upperCF, int numChannels) { float lowerERB, upperERB, spaceERB; channel c; int chan; dt = 1.0/(float)SAMPLING_FREQUENCY; twoPi = 2.0*M_PI; twoPiT = 2.0*M_PI*dt; lowerERB = hzToERBrate(lowerCF); upperERB = hzToERBrate(upperCF); if (numChannels > 1) spaceERB = (upperERB-lowerERB)/(numChannels-1); else spaceERB = 0.0; for (chan=0; chanz; c->p0 = sigval*cos(c->expCoeff*tim)+zz*(4*c->p1-zz*(6*c->p2-zz*(4*c->p3-zz*c->p4))); c->q0 =-sigval*sin(c->expCoeff*tim)+zz*(4*c->q1-zz*(6*c->q2-zz*(4*c->q3-zz*c->q4))); c->u0 = zz*(c->p1+zz*(4*c->p2+zz*c->p3)); c->v0 = zz*(c->q1+zz*(4*c->q2+zz*c->q3)); bm = (c->u0*cos(c->expCoeff*tim)-c->v0*sin(c->expCoeff*tim))*c->gain; /* filter coefficients */ c->p4 = c->p3; c->p3 = c->p2; c->p2 = c->p1; c->p1 = c->p0; c->q4 = c->q3; c->q3 = c->q2; c->q2 = c->q1; c->q1 = c->q0; c->u1 = c->u0; c->v1 = c->v0; return(bm); } int msToSamples(float ms) { return (int)((float)SAMPLING_FREQUENCY*ms/1000.0); } void readMask(char fname[], int maxFrame, int maxChan) { int frame, chan; FILE *ifp; fprintf(stderr,"reading mask from file %s...",fname); ifp=fopen(fname,"r"); if (ifp==NULL) { fprintf(stderr,"Cannot open file %s\n",fname); exit(0); } for (frame=0; frame=0) w[frame*OFFSET+i]=w[frame*OFFSET+i]+mask[frame][chan]*0.5*(1.0+cos(i*M_PI/(HWIN)+M_PI));} for (i=MAX_WINDOW/2; i=0) w[frame*OFFSET+i]=w[frame*OFFSET+i]+mask[frame][chan]*0.5*(1.0+cos((i-HWIN)*M_PI/(HWIN)));} } for (i=0; i