Hello,
Sorry to ask a similar question to the one i asked before (FFT Problem (Returns random results)), but i've looked up pitch detection and autocorrelation and have found some code for pitch detection using autocorrelation.
Im trying to do pitch detection of a users singing. Problem is, it keeps returning random results. I've got some code from http://code.google.com/p/yaalp/ which i've converted to C++ and modified (below). My sample rate is 2048, and data size is 1024. I'm detecting pitch of both a sine wave and mic input. The frequency of the sine wave is 726.0, and its detecting it to be 722.950820 (which im ok with), but its detecting the pitch of the mic as a random number from around 100 to around 1050.
I'm now using a High pass filter to remove the DC offset, but it's not working. Am i doing it right, and if so, what else can i do to fix it? Any help would be greatly appreciated!
double* doHighPassFilter(short *buffer)
{
// Do FFT:
int bufferLength = 1024;
float *real = malloc(bufferLength*sizeof(float));
float *real2 = malloc(bufferLength*sizeof(float));
for(int x=0;x<bufferLength;x++)
{
real[x] = buffer[x];
}
fft(real, bufferLength);
for(int x=0;x<bufferLength;x+=2)
{
real2[x] = real[x];
}
for (int i=0; i < 30; i++) //Set freqs lower than 30hz to zero to attenuate the low frequencies
real2[i] = 0;
// Do inverse FFT:
inversefft(real2,bufferLength);
double* real3 = (double*)real2;
return real3;
}
double DetectPitch(short* data)
{
int sampleRate = 2048;
//Create sine wave
double *buffer = malloc(1024*sizeof(short));
double amplitude = 0.25 * 32768; //0.25 * max length of short
double frequency = 726.0;
for (int n = 0; n < 1024; n++)
{
buffer[n] = (short)(amplitude * sin((2 * 3.14159265 * n * frequency) / sampleRate));
}
doHighPassFilter(data);
printf("Pitch from sine wave: %f\n",detectPitchCalculation(buffer, 50.0, 1000.0, 1, 1));
printf("Pitch from mic: %f\n",detectPitchCalculation(data, 50.0, 1000.0, 1, 1));
return 0;
}
// These work by shifting the signal until it seems to correlate with itself.
// In other words if the signal looks very similar to (signal shifted 200 data) than the fundamental period is probably 200 data
// Note that the algorithm only works well when there's only one prominent fundamental.
// This could be optimized by looking at the rate of change to determine a maximum without testing all periods.
double detectPitchCalculation(double* data, double minHz, double maxHz, int nCandidates, int nResolution)
{
//-------------------------1-------------------------//
// note that higher frequency means lower period
int nLowPeriodInSamples = hzToPeriodInSamples(maxHz, 2048);
int nHiPeriodInSamples = hzToPeriodInSamples(minHz, 2048);
if (nHiPeriodInSamples <= nLowPeriodInSamples) printf("Bad range for pitch detection.");
if (1024 < nHiPeriodInSamples) printf("Not enough data.");
double *results = new double[nHiPeriodInSamples - nLowPeriodInSamples];
//-------------------------2-------------------------//
for (int period = nLowPeriodInSamples; period < nHiPeriodInSamples; period += nResolution)
{
double sum = 0;
// for each sample, find correlation. (If they are far apart, small)
for (int i = 0; i < 1024 - period; i++)
sum += data[i] * data[i + period];
double mean = sum / 1024.0;
results[period - nLowPeriodInSamples] = mean;
}
//-------------------------3-------------------------//
// find the best indices
int *bestIndices = findBestCandidates(nCandidates, results, nHiPeriodInSamples - nLowPeriodInSamples - 1); //note findBestCandidates modifies parameter
// convert back to Hz
double *res = new double[nCandidates];
for (int i=0; i < nCandidates;i++) res[i] = periodInSamplesToHz(bestIndices[i]+nLowPeriodInSamples, 2048);
double pitch2 = res[0];
free(res);
free(results);
return pitch2;
}
/// Finds n "best" values from an array. Returns the indices of the best parts.
/// (One way to do this would be to sort the array, but that could take too long.
/// Warning: Changes the contents of the array!!! Do not use result array afterwards.
int* findBestCandidates(int n, double* inputs,int length)
{
//int length = inputs.Length;
if (length < n) printf("Length of inputs is not long enough.");
int *res = new int[n];
double minValue = 0;
for (int c = 0; c < n; c++)
{
// find the highest.
double fBestValue = minValue;
int nBestIndex = -1;
for (int i = 0; i < length; i++)
{
if (inputs[i] > fBestValue)
{
nBestIndex = i;
fBestValue = inputs[i];
}
}
// record this highest value
res[c] = nBestIndex;
// now blank out that index.
if(nBestIndex!=-1) inputs[nBestIndex] = minValue;
}
return res;
}
int hzToPeriodInSamples(double hz, int sampleRate)
{
return (int)(1 / (hz / (double)sampleRate));
}
double periodInSamplesToHz(int period, int sampleRate)
{
return 1 / (period / (double)sampleRate);
}
Thanks,
Niall.
Edit: Changed the code to implement a high pass filter with a cutoff of 30hz (from What Are High-Pass and Low-Pass Filters?, can anyone tell me how to convert the low-pass filter using convolution to a high-pass one?) but it's still returning random results. Plugging it into a VST host and using VST plugins to compare spectrums isn't an option to me unfortunately.