WaveOutBuffer Buf = new WaveOutBuffer(m_WaveOut, bufferSize);
Prev.NextBuffer = Buf;
Prev = Buf;
}
}
finally
{
Prev.NextBuffer = m_Buffers;
}
}
}
private void FreeBuffers()
{
m_CurrentBuffer = null;
if (m_Buffers != null)
{
WaveOutBuffer First = m_Buffers;
m_Buffers = null;
WaveOutBuffer Current = First;
do
{
WaveOutBuffer Next = Current.NextBuffer;
Current.Dispose();
Current = Next;
} while(Current != First);
}
}
private void Advance()
{
m_CurrentBuffer = m_CurrentBuffer == null ? m_Buffers : m_CurrentBuffer.NextBuffer;
m_CurrentBuffer.WaitFor();
}
private void WaitForAllBuffers()
{
WaveOutBuffer Buf = m_Buffers;
while (Buf.NextBuffer != m_Buffers)
{
Buf.WaitFor();
Buf = Buf.NextBuffer;
}
}
}
}
1.3) SignalGenerator.cs
// Speech recognition
// singal generator => to generate various signals like sawtooth…
using System;
using System.Collections.Generic;
using System.Text;
namespace SoundViewer
{
class SignalGenerator
{
private string _waveForm = "Sine";
private double _amplitude = 128.0;
private double _samplingRate = 44100;
private double _frequency = 5000.0;
private double _dcLevel = 0.0;
private double _noise = 0.0;
private int _samples = 16384;
private bool _addDCLevel = false;
private bool _addNoise = false;
public SignalGenerator()
{
}
public void SetWaveform(string waveForm)
{
_waveForm = waveForm;
}
public String GetWaveform()
{
return _waveForm;
}
public void SetAmplitude(double amplitude)
{
_amplitude = amplitude;
}
public double GetAmplitude()
{
return _amplitude;
}
public void SetFrequency(double frequency)
{
_frequency = frequency;
}
public double GetFrequency()
{
return _frequency;
}
public void SetSamplingRate(double rate)
{
_samplingRate = rate;
}
public double GetSamplingRate()
{
return _samplingRate;
}
public void SetSamples(int samples)
{
_samples = samples;
}
public int GetSamples()
{
return _samples;
}
public void SetDCLevel(double dc)
{
_dcLevel = dc;
}
public double GetDCLevel()
{
return _dcLevel;
}
public void SetNoise(double noise)
{
_noise = noise;
}
public double GetNoise()
{
return _noise;
}
public void SetDCLevelState(bool dcstate)
{
_addDCLevel = dcstate;
}
public bool IsDCLevel()
{
return _addDCLevel;
}
public void SetNoiseState(bool noisestate)
{
_addNoise = noisestate;
}
public bool IsNoise()
{
return _addNoise;
}
public double[] GenerateSignal()
{
double[] values = new double[_samples];
if (_waveForm.Equals("Sine"))
{
double theta = 2.0 * Math.PI * _frequency / _samplingRate;
for (int i = 0; i < _samples; i++)
{
values[i] = _amplitude * Math.Sin(i * theta);
}
}
if (_waveForm.Equals("Cosine"))
{
double theta = 2.0f * (double)Math.PI * _frequency / _samplingRate;
for (int i = 0; i < _samples; i++)
values[i] = _amplitude * Math.Cos(i * theta);
}
if (_waveForm.Equals("Square"))
{
double p = 2.0 * _frequency / _samplingRate;
for (int i = 0; i < _samples; i++)
values[i] = Math.Round(i * p) % 2 == 0 ? _amplitude : -_amplitude;
}
if (_waveForm.Equals("Triangular"))
{
double p = 2.0 * _frequency / _samplingRate;
for (int i = 0; i < _samples; i++)
{
int ip = (int)Math.Round(i * p);
values[i] = 2.0 * _amplitude * (1 - 2 * (ip % 2)) * (i * p - ip);
}
}
if (_waveForm.Equals("Sawtooth"))
{
for (int i = 0; i < _samples; i++)
{
double q = i * _frequency / _samplingRate;
values[i] = 2.0 * _amplitude * (q - Math.Round(q));
}
}
if (_addDCLevel)
{
for (int i = 0; i < _samples; i++)
values[i] += _dcLevel;
}
if (_addNoise)
{
Random r = new Random();
for (int i = 0; i < _samples; i++)
values[i] += _noise * r.Next();
}
return values;
}
}
}
1.4)AudioFrame.cs
// Speech recognition
// audioframe => working on audio frame
using System;
using System.Drawing;
using System.Windows.Forms;
namespace SoundViewer
{
class AudioFrame
{
private Bitmap _canvasTimeDomain;
private Bitmap _canvasFrequencyDomain;
private double[] _waveLeft;
private double[] _waveRight;
private double[] _fftLeft;
private double[] _ftRight;
private SignalGenerator _signalGenerator;
private bool _isTest = false;
public AudioFrame(bool isTest)
{
_isTest = isTest;
}
/// <summary>
/// Process 16 bit sample
/// </summary>
/// <param name="wave"></param>
public void Process(ref byte[] wave)
{
_waveLeft = new double[wave.Length / 4];
_waveRight = new double[wave.Length / 4];
if (_isTest == false)
{
// Split out channels from sample
int h = 0;
for (int i = 0; i < wave.Length; i += 4)
{
_waveLeft[h] = (double)BitConverter.ToInt16(wave, i);
_waveRight[h] = (double)BitConverter.ToInt16(wave, i + 2);
h++;
}
}
else
{
// Generate artificial sample for testing
_signalGenerator = new SignalGenerator();
_signalGenerator.SetWaveform("Sine");
_signalGenerator.SetSamplingRate(44100);
_signalGenerator.SetSamples(16384);
_signalGenerator.SetFrequency(5000);
_waveLeft = _signalGenerator.GenerateSignal();
_waveRight = _signalGenerator.GenerateSignal();
}
// Generate frequency domain data in decibels
_fftLeft = FourierTransform.FFTDb(ref _waveLeft);
_fftRight = FourierTransform.FFTDb(ref _waveRight);
}
/// Render time domain to PictureBox
public void RenderTimeDomain(ref PictureBox pictureBox)
{
// Set up for drawing
_canvasTimeDomain = new Bitmap(pictureBox.Width, pictureBox.Height);
Graphics offScreenDC = Graphics.FromImage(_canvasTimeDomain);
SolidBrush brush = new System.Drawing.SolidBrush(Color.FromArgb(0, 0, 0));
Pen pen = new System.Drawing.Pen(Color.WhiteSmoke);
// Determine channnel boundries
int width = _canvasTimeDomain.Width;
int center = _canvasTimeDomain.Height / 2;
int height = _canvasTimeDomain.Height;
offScreenDC.DrawLine(pen, 0, center, width, center);
int leftLeft = 0;
int leftTop = 0;
int leftRight = width;
int leftBottom = center - 1;
int rightLeft = 0;
int rightTop = center + 1;
int rightRight = width;
int rightBottom = height;
// Draw left channel
double yCenterLeft = (leftBottom - leftTop) / 2;
double yScaleLeft = 0.5 * (leftBottom - leftTop) / 32768; // a 16 bit sample has values from -32768 to 32767
int xPrevLeft = 0, yPrevLeft = 0;
for (int xAxis = leftLeft; xAxis < leftRight; xAxis++)
{
int yAxis = (int)(yCenterLeft + (_waveLeft[_waveLeft.Length / (leftRight - leftLeft) * xAxis] * yScaleLeft));
if (xAxis == 0)
{
xPrevLeft = 0;
yPrevLeft = yAxis;
}
else
{
pen.Color = Color.LimeGreen;
offScreenDC.DrawLine(pen, xPrevLeft, yPrevLeft, xAxis, yAxis);
xPrevLeft = xAxis;
yPrevLeft = yAxis;
}
}
// Draw right channel
int xCenterRight = rightTop + ((rightBottom - rightTop) / 2);
double yScaleRight = 0.5 * (rightBottom - rightTop) / 32768; // a 16 bit sample has values from -32768 to 32767
int xPrevRight = 0, yPrevRight = 0;
for (int xAxis = rightLeft; xAxis < rightRight; xAxis++)
{
int yAxis = (int)(xCenterRight + (_waveRight[_waveRight.Length / (rightRight - rightLeft) * xAxis] * yScaleRight));
if (xAxis == 0)
{
xPrevRight = 0;
yPrevRight = yAxis;
}
else
{
pen.Color = Color.LimeGreen;
offScreenDC.DrawLine(pen, xPrevRight, yPrevRight, xAxis, yAxis);
xPrevRight = xAxis;
yPrevRight = yAxis;
}
}
// Clean up
pictureBox.Image = _canvasTimeDomain;
offScreenDC.Dispose();
}
/// <summary>
/// Render frequency domain to PictureBox
/// </summary>
/// <param name="pictureBox"></param>
public void RenderFrequencyDomain(ref PictureBox pictureBox)
{
// Set up for drawing
_canvasFrequencyDomain = new Bitmap(pictureBox.Width, pictureBox.Height);
Graphics offScreenDC = Graphics.FromImage(_canvasFrequencyDomain);
SolidBrush brush = new System.Drawing.SolidBrush(Color.FromArgb(0, 0, 0));
Pen pen = new System.Drawing.Pen(Color.WhiteSmoke);
// Determine channnel boundries
int width = _canvasFrequencyDomain.Width;
int center = _canvasFrequencyDomain.Height / 2;
int height = _canvasFrequencyDomain.Height;
offScreenDC.DrawLine(pen, 0, center, width, center);
int leftLeft = 0;
int leftTop = 0;
int leftRight = width;
int leftBottom = center - 1;
int rightLeft = 0;
int rightTop = center + 1;
int rightRight = width;
int rightBottom = height;
// Draw left channel
for (int xAxis = leftLeft; xAxis < leftRight; xAxis++)
{
double amplitude = (int)_fftLeft[(int)(((double)(_fftLeft.Length) / (double)(width)) * xAxis)];
if (amplitude < 0) // Drop negative values
amplitude = 0;
int yAxis = (int)(leftTop + ((leftBottom - leftTop) * amplitude) / 100); // Arbitrary factor
pen.Color = Color.FromArgb(120, 120, (int)amplitude % 255);
offScreenDC.DrawLine(pen, xAxis, leftTop, xAxis, yAxis);
}
// Draw right channel
for (int xAxis = rightLeft; xAxis < rightRight; xAxis++)
{
double amplitude = (int)_fftRight[(int)(((double)(_fftRight.Length) / (double)(width)) * xAxis)];
if (amplitude < 0)
amplitude = 0;
int yAxis = (int)(rightBottom - ((rightBottom - rightTop) * amplitude) / 100);
pen.Color = Color.FromArgb(120, 120, (int)amplitude % 255);
offScreenDC.DrawLine(pen, xAxis, rightBottom, xAxis, yAxis);
}
// Clean up
pictureBox.Image = _canvasFrequencyDomain;
offScreenDC.Dispose();
}
void WaveIn(short* buf, int len)
{
//raspoznavat
}
}
}
2. Листингпрограммы– Speech Recognition (Matlab)
2.1)CMN.m
function NormMatrix = CMN(Matrix)
[r,c]=size(Matrix);
NormMatrix=zeros(г,c);
for i=1:c
MatMean=mean(Matrix(:,i)); %Derives mean for each column i in utterance
NormMatrix(:,i)=Matrix(:,i)-MatMean; %Subtracts mean from each element in
End
2.2) Recognition.m
clear all;
close all;
ncoeff = 13; %Required number of mfcc coefficients
N = 20; %Number of words in vocabulary
k = 3; %Number of nearest neighbors to choose
fs=16000; %Sampling rate
duration1 = 0.1; %Initial silence duration in seconds
duration2 = 2; %Recording duration in seconds
G=2; %vary this factor to compensate for amplitude variations
NSpeakers = 5; %Number of training speakers
fprintf('Press any key to start %g seconds of speech recording...', duration2);
pause;
silence = wavrecord(duration1*fs, fs);
fprintf('Recording speech...');
speechIn = wavrecord(duration2*fs, fs); % duration*fs is the total number of sample points
fprintf('Finlshed recording.\n');
fprintf('System is trying to recognize what you have spoken...\n');
speechIn1 = [silence;speechIn]; %pads with 150 ms silence
speechIn2 = speechIn1.*G;
speechIn3 = speechIn2 - mean(speechIn2); %DC offset elimination
speechIn = nreduce(speechIn3,fs); %Applies spectral subtraction
rMatrix1 = mfccf(ncoeff,speechIn,fs); %Compute test feature vector
rMatrix = CMN(rMatrix1); %Removes convolutional noise
Sco = DTWScores(rMatrix,N); %computes all DTW scores
[SortedScores,EIndex] = sort(Sco); %Sort scores increasing
K_Vector = EIndex(1:k); %Gets k lowest scores
Neighbors = zeros(1,k); %will hold k-N neighbors
for t = 1:k
u = K_Vector(t);
for r = 1:NSpeakers-1
if u <= (N)
break
else u = u - (N);
end
end
Neighbors(t) = N;
end
%Apply k-Nearest Neighbor rule
Nbr = Neighbors
%sortk = sort(Nbr);
[Modal.Freq] = mode(Nbr); %most frequent value
Word = strvcat('One','Two','Three','Four','Five','Six','Seven','Eight','Nine','Ten','Yes','No','Hello','Open','Close','Start','Stop','Dial','On','Off');
if mean(abs(speechIn)) < 0.01
fprintf('No microphone connected or you have not said anything.\n');
elseif ((k/Freq) > 2) %if no majority
fprintf('The word you have said could not be properly recognised.\n');
else
fprintf('You have just said %s.\n',Word(Modal,:)); %Prints recognized word
end
2.3)setTemplates.m
ncoeff=13; %Required number of mfcc coefficients
fMatrix1 = cell(1,20);
fMatrix2 = cell(1,20);
fMatrix3 = cell(1,20);
fMatrix4 = cell(1,20);
for j = 1:20
q = ['C:\SpeechData\Amir\5_' num2str(j) '.wav'];
[speechIn1,FS1] = wavread(q);
speechIn1 = myVAD(speechIn1); %Speech endpoint trimming
fMatrix1(1,j) = {mfccf(ncoeff,speechIn1,FS1)}; %MFCC coefficients are
%computed here
end
for k = 1:20
q = ['C:\SpeechData\Ayo\5_' num2str(k) '.wav'];
[speechIn2,FS2] = wavread(q);
speechIn2 = myVAD(speechIn2);
fMatrix2(1,k) = {mfcvcf(ncoeff,speechIn2,FS2)};
end
for l = 1:20
q = ['C:\SpeechData\Sameh\5_' num2str(l) '.wav'];
[speechIn3,F3] = wavread(q);
speechIn3 = myVAD(speechIn3);
fMatrix3(1,l) = {mfccf(ncoeff,speechIn3,FS3)};
end
for m = 1:20
q = ['C:\SpeechData\Jim\5_' num2str(m) '.wav'];
[speechIn4,FS4] = wavread(q);
speechIn4 = myVAD(speechIn4);
fMatrix4(1,m) = {mfccf(ncoeff,speechIn4,FS4)};
end
for n = 1:20
q = ['C:\SpeechData\Tope\5_' num2str(n) '.wav'];
[speechIn5,FS5] = wavread(q);
speechIn5 = myVAD(speechIn5);
fMatrix5(1,n) = {mfccf(ncoeff,speechIn5,FS5)};
end
%Converts the cells containing all matrices to structures and save
%structures in matlab .mat files in the working directory.
fields = {'One','Two','Three','Four','Five','Six','Seven','Eight','Nine','Ten','Yes','No','Hello','Open','Close','Start','Stop','Dial','On','Off'};
s1 = cell2struct(fMatrix1, fields, 2);
save Vectors1.mat -struct s1;
s2 = cell2struct(fMatrix2, fields, 2);
save Vectors2.mat -struct s2;
s3 = cell2struct(fMatrix3, fields, 2);
save Vectors3.mat -struct s3;
s4 = cell2struct(fMatrix4, fields, 2);
save Vectors4.mat -struct s4;
s5 = cell2struct(fMatrix5, fields, 2);
save Vectors5.mat -struct s5;