钢琴识谱练习 VB.NET PYTHON

news/2024/6/15 6:49:59 标签: tensorflow, vb.net, 钢琴识谱
Imports NAudio.Wave
Imports MathNet.Numerics.IntegralTransforms
Imports System.Numerics
Imports TensorFlow
Imports System.IO

Public Class Form1

    '录音
    Dim wav As New WaveInEvent

    Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
        '设置缓冲区大小
        wav.BufferMilliseconds = 128  '缓冲区大小= 频率*Milliseconds*字节/1000 ;
        wav.NumberOfBuffers = 6   '原12 减少缓冲区数量,使用录音不中断
        wav.WaveFormat = New WaveFormat(16000, 16, 1) '格式 16000

        '添加回调函数
        AddHandler wav.DataAvailable, AddressOf waveIn_DataAvailable

        wav.StartRecording()


    End Sub


    '回调函数
    Dim WavData16(2048 - 1) As Int16
    Dim WavDataDb(2048 - 1) As Single
    Dim mfcc As New MFCC
    Private Sub waveIn_DataAvailable(sender As Object, e As WaveInEventArgs)

        Buffer.BlockCopy(e.Buffer, 0, WavData16, 0, e.BytesRecorded)
        'Array.Copy(WavData16, WavDataDb, 2048)



        '计算mfcc 返回182维数
        Dim mfccs = WavTMfcc(WavData16)


        Dim float(0, 182 - 1) As Single
        For i = 0 To 182 - 1
            float(0, i) = mfccs(i)
        Next

        Dim out() As Single = piano.Detect(float)

        Dim max As Single = out(0)
        Dim num = 0
        For i = 0 To 7 - 1
            If out(i) > max Then
                max = out(i)
                num = i

            End If
        Next




        If max > 0.9 Then
            ' Label1.Text = num.ToString
            Me.Invoke(New ShowText(AddressOf ShowTxt), num.ToString)
        End If
        'Debug.Print(num & ":" & max)

    End Sub

    Public Delegate Sub ShowText(txt As String)

    Public Sub ShowTxt(txt As String)
        Label1.Text = txt
    End Sub





    Public Function WavTMfcc(data() As Int16) As Single()
        '分帧,每一帧进行mfcc计算 帧长512 帧移256 
        Dim len = data.Length
        Dim FrmSize = 512
        Dim FrmNum = len / 256 - 1

        Dim mfccs(FrmNum * 26 - 1) As Single  '7*26 =182
        Dim Frame(FrmSize - 1) As Single
        For i As Integer = 0 To FrmNum - 1
            Array.Copy(data, i * 256, Frame, 0, FrmSize)
            mfcc.Hamming_window(Frame)
            Dim fft As Complex() = mfcc.FFT(Frame)
            Dim rs As Single() = mfcc.MFCC(fft)
            Array.Copy(rs, 0, mfccs, i * 26, 26)

        Next

        Return mfccs
    End Function

    Private Sub Button7_Click(sender As Object, e As EventArgs) Handles Button7.Click
        Dim txtwrite As New IO.StreamWriter("d:\mfcc.txt", False)

        Dim fs As New DirectoryInfo("d:\piano")
        For Each i As FileInfo In fs.GetFiles("*.wav", SearchOption.AllDirectories)
            Dim fn = i.Name
            Dim fullname = i.FullName


            '打开文件获取mfcc 
            Dim len As Integer = 2048
            Dim bt(len * 2 - 1) As Byte
            Dim it16(len - 1) As Int16

            Dim wf As New WaveFileReader(fullname)
            wf.Read(bt, 0, len * 2)

            Buffer.BlockCopy(bt, 0, it16, 0, len * 2)

            'VAD(it16)

            '计算mfcc 返回182维数
            Dim mfccs = WavTMfcc(it16)


            Dim StrMfcc = String.Join(",", mfccs.ToArray)
            Dim Ans = {0, 0, 0, 0, 0, 0, 0, 0}

            Ans(Val(Mid(fn, 1, 1))) = 1
            Dim StrAns = String.Join(",", Ans.ToArray)


            txtwrite.WriteLine(StrMfcc)
            txtwrite.WriteLine(StrAns)

        Next

        txtwrite.Close()

        '语音区域识别-指数平均法 v = 0.1*vo + (1-0.1)v(0-1)


    End Sub


    Public Function VAD(data As Int16()) As List(Of Point)
        Dim Belta As Single = 0.1
        Dim Sum As UInt64

        Dim StartP, EndP As Integer
        Dim Status As Boolean = False

        Dim WaveArea As New List(Of Point)

        For i = 0 To data.Length - 1
            Sum = Belta * Math.Pow(data(i), 2) + (1 - Belta) * Sum

            '声音开始位置
            If (Sum > Math.Pow(1024, 2)) Then
                If Status = False Then
                    Status = True
                    StartP = i
                End If
            End If

            '声音结束位置
            If Status = True Then
                If Sum < Math.Pow(100, 2) Then
                    WaveArea.Add(New Point(StartP, i))
                    Status = False

                End If
            End If


        Next

        Return WaveArea


    End Function

    Private Sub Button8_Click(sender As Object, e As EventArgs) Handles Button8.Click
        Dim wf As New WaveFileReader("d:\d00.wav")
        Dim len As Integer = wf.Length / 2
        Dim bt(len * 2 - 1) As Byte
        Dim it16(len - 1) As Int16


        wf.Read(bt, 0, len * 2)

        Buffer.BlockCopy(bt, 0, it16, 0, len * 2)

        VAD(it16)

    End Sub

    Dim piano As New TensorflowPiano

    Private Sub Button9_Click(sender As Object, e As EventArgs) Handles Button9.Click
        Dim wf As New WaveFileReader("d:\testpiano.wav")
        Dim len As Integer = wf.Length / 2
        Dim bt(len * 2 - 1) As Byte
        Dim it16(len - 1) As Int16

        wf.Read(bt, 0, len * 2)

        Buffer.BlockCopy(bt, 0, it16, 0, len * 2)

        Dim frame(2048 - 1) As Int16

        For k As Integer = 0 To Math.Floor(len / 2048) - 1
            Array.Copy(it16, k * 2048, frame, 0, 2048)

            '计算mfcc 返回182维数
            Dim mfccs = WavTMfcc(frame)


            Dim float(0, 182 - 1) As Single
            For i = 0 To 182 - 1
                float(0, i) = mfccs(i)
            Next

            Dim out() As Single = piano.Detect(float)

            Dim max As Single = out(0)
            Dim num = 0
            For i = 0 To 7 - 1
                If out(i) > max Then
                    max = out(i)
                    num = i

                End If
            Next
            Debug.Print(num & ":" & max)
        Next

    End Sub

    Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load

    End Sub
End Class

在vb中对训练数据的MFCC进行读取,保存为TXT文件;在PYTHON文件中进行训练

.py

import tensorflow as tf
import librosa
import numpy as np
from tensorflow.python.framework.graph_util import convert_variables_to_constants


f = open("d:/mfcc.txt","r")

xdata = []
ydata = []

while True:
    v1 = f.readline()
    if v1 == '':
        break;
    xd =  np.array(tuple(eval(v1)))
    #xd = list(xd.reshape(1,-1))
    xdata.append(xd)

    v2 = f.readline()
    yd = np.array(tuple(eval(v2)))
    ydata.append(yd)






x = tf.placeholder("float32",[None,182],name= 'input')
w = tf.Variable(tf.truncated_normal([182,8],stddev = 0.1))
b = tf.Variable(tf.truncated_normal(shape=[8],stddev = 0.1,dtype = tf.float32))

y = tf.nn.softmax(tf.matmul(x,w)+b,name = 'out')

y_ = tf.placeholder("float32",[None,8])

loss = -tf.reduce_sum(y_*tf.log(y))

train = tf.train.GradientDescentOptimizer(1e-5).minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(20000):
        sess.run(train,feed_dict={x:xdata,y_:ydata})
        if i%100 == 0:
                 print("step %d,%g"%(i,i))
                 print(sess.run(y_,feed_dict={y_:ydata}))
                 print(sess.run(loss,feed_dict={x:xdata,y_:ydata}))
                 print(sess.run(y,feed_dict={x:xdata}))


     #保存为pb格式
    graph = convert_variables_to_constants(sess,sess.graph_def,['out'])
    tf.train.write_graph(graph,'d:/','piano.pb',as_text=False)
    #保存训练参数        

在VB中使用训练好的模型

Imports System.IO
Imports System.Numerics
Imports TensorFlow

'Install-Package TensorFlowSharp

Public Class TensorflowPiano

    Dim graph As TFGraph
    Dim session As TFSession

    '加载模型
    Public Sub New()
        Dim model As Byte() = File.ReadAllBytes("d:\piano.pb")
        graph = New TFGraph()
        graph.Import(model, "")

        session = New TFSession(graph)

        ' Threading.ThreadPool.SetMaxThreads(5, 5)
    End Sub

    Protected Overrides Sub finalize()
        ' session.CloseSession()
    End Sub





    Dim output
    Dim runner As TFSession.Runner
    Dim result
    Dim rshape

    '运行模型
    Public Function Detect(Data(,) As Single) As Single()

        runner = session.GetRunner()

        runner.AddInput(graph("input")(0), Data).Fetch(graph("out")(0))

        output = runner.Run()


        result = output(0)
        rshape = result.Shape
        Dim rt As Single()
        rt = result.GetValue(True)(0)
        'For k = 0 To rshape.GetValue(0) - 1
        '    rt = result.GetValue(True)(k)(0)
        '    'Debug.Print(rt)
        '    If (rt > 0.8) Then
        '        Debug.Print("-----------recogxili")
        '        ' MsgBox("recgo")
        '    End If
        'Next

        Return rt

    End Function




End Class

MFCC特征提取:

Imports System.Numerics
Imports MathNet.Numerics.IntegralTransforms

Public Class MFCC


    Public H As Double(,)

    Private MFCCNum As Integer
    Private FrameSize As Integer '帧长512


    Public Sub New(Optional framesize As Integer = 512, Optional MFCCNum As Integer = 26)
        '注意设置最小频率 freMin  0  ,300 
        Me.MFCCNum = MFCCNum
        Me.FrameSize = framesize

        H = New Double(MFCCNum, Me.FrameSize / 2) {}

        '计算mel系数
        Dim filter_points(40 + 1) As Integer '40个滤波器,需要41点

        Const sampleRate As Integer = 16000  '采样频率 16000
        Const filterNum As Integer = 40  '滤波器数量 取40个

        Dim freMax As Double = sampleRate / 2   '实际最大频率 
        Dim freMin As Double = 0    '实际最小频率 
        Dim melFremax As Double = 1125 * Math.Log(1 + freMax / 700)     '将实际频率转换成梅尔频率 
        Dim melFremin As Double = 1125 * Math.Log(1 + freMin / 700)


        Dim k As Double = (melFremax - melFremin) / (filterNum + 1)


        Dim m As Double() = New Double(filterNum + 1) {}
        Dim r As Double() = New Double(filterNum + 1) {}


        For i As Integer = 0 To filterNum + 1
            m(i) = melFremin + k * i
            r(i) = 700 * (Math.Exp(m(i) / 1125) - 1)
            '将梅尔频率转换成实际频率 
            filter_points(i) = Math.Floor((Me.FrameSize + 1) * r(i) / sampleRate)
        Next


        '生成mel滤波器
        For i As Integer = 0 To MFCCNum
            For j As Integer = 0 To Me.FrameSize / 2 - 1
                If j < filter_points(i) Then
                    H(i, j) = 0
                End If
                If (filter_points(i) <= j) And (j <= filter_points(i + 1)) Then
                    H(i, j) = (CDbl(j - filter_points(i)) / (filter_points(i + 1) - filter_points(i)))
                End If
                If (filter_points(i + 1) <= j) And (j <= filter_points(i + 2)) Then
                    H(i, j) = (CDbl(filter_points(i + 2) - j) / (filter_points(i + 2) - filter_points(i + 1)))
                End If
                If j > filter_points(i + 2) Then
                    H(i, j) = 0
                End If
            Next
        Next



    End Sub

    '汉明窗
    Public Sub Hamming_window(WaveData() As Single)
        Dim len As Integer = WaveData.Length
        Dim omega As Single = 2.0 * Math.PI / len
        For j As Integer = 0 To len - 1
            WaveData(j) = (0.54 - 0.46 * Math.Cos(omega * (j))) * WaveData(j)
        Next
    End Sub

    '傅里叶计算
    Public Function FFT(WaveData() As Single) As Complex()
        Dim FFT_Complex(WaveData.Length - 1) As Complex
        For i = 0 To WaveData.Length - 1
            FFT_Complex(i) = WaveData(i)
        Next
        MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_Complex, FourierOptions.Matlab)
        Return FFT_Complex


    End Function


    Public Function MFCC(fft() As Complex) As Single()

        '取LOG
        Dim S As Single() = New Single(MFCCNum - 1) {}
        For i As Integer = 0 To MFCCNum - 1
            For j As Integer = 0 To Me.FrameSize / 2 - 1
                S(i) = S(i) + Math.Pow(fft(j).Magnitude, 2) * H(i, j)
            Next
            If S(i) <> 0 Then
                S(i) = Math.Log(S(i), Math.E)
            End If
        Next


        'DCT运算
        Dim mfcc_mass(MFCCNum - 1) As Double
        For l As Integer = 0 To MFCCNum - 1
            For i As Integer = 0 To MFCCNum - 1
                mfcc_mass(l) += S(i) * Math.Cos(Math.PI * l * ((i * 0.5) / 20))




            Next
        Next

        Return S


    End Function

End Class

 

 


http://www.niftyadmin.cn/n/1650166.html

相关文章

录音 waveApi与Naudio

Naudio 录音 Imports NAudio.WaveDim wav As New WaveInwav.BufferMilliseconds = 200 缓冲区大小= ; 200 = 6400;100 = 3200wav.NumberOfBuffers = 12 缓冲区数量wav.WaveFormat = New WaveFormat(16000, 16, 1) 格式 16000AddHandler wav.DataAvailable, AddressOf wav…

播放音乐 NAUDIO

Imports System.Media Imports NAudio.Wave Public Class PlayWavFile‘播放文件Public Function Play(filename As String)Dim player As SoundPlayer New SoundPlayer(filename)player.Play()End FunctionNaudio 播放数据Public Function play3(data() As Int16)Dim sampleR…

MySQL 中添加列、修改列以及删除列

1 ALTER TABLE&#xff1a;添加&#xff0c;修改&#xff0c;删除表的列&#xff0c;约束等表的定义。 2 查看列&#xff1a;desc 表名; 3 修改表名&#xff1a;alter table t_book rename to bbb; 4 添加列&#xff1a;alter table 表名 add column 列名 varchar(30); …

.Net转Java.05.为啥MySQL没有nolock

今天忽然想到一个问题&#xff0c;原来为了提高SQL Server性能&#xff0c;公司规定查询语句一般都要加 WITH (NOLOCK)的 现在转Java了&#xff0c;用了MySQL为啥不提这个事情了&#xff1f; 先在MySQL里写了一个查询语句&#xff0c;比样子加了nolock&#xff0c;提示语法不正…

图灵机器人-语音对话的实现

Imports System.Collections.Generic Imports System.ComponentModel Imports System.Data Imports System.Drawing Imports System.Linq Imports System.Text Imports System.Threading.Tasks Imports System.Windows.FormsImports Newtonsoft.Json json解析用 Imports Syst…

Linux - Nginx安装

1.复制解压。 [rootlocalhost ~]# mv /opt/nginx-1.13.8.tar.gz /usr/local/src [rootlocalhost ~]# cd /usr/local/src [rootlocalhost src]# tar -zxvf nginx-1.13.8.tar.gz 2.编译安装 [rootlocalhost src]# cd nginx-1.13.8 [rootlocalhost nginx-1.13.8]# ./configure --…

【Python】学习笔记3 集合-监控日志脚本

1、集合 1 # 新的数据类型集合&#xff1a;天生去重2 3 # ----------定义----------4 s set() #空的集合5 s2 { #有内容的集合&#xff0c;输出时自动去重6 1,7 2,8 2,9 3, 10 } 11 12 13 # ----------天生去重---------- 14 print(s2) #{2, 3, 1} 15 # …

wxpython grid使用

Wxpython https://www.cnblogs.com/ankier/archive/2012/10/14/2723364.html 资料收集于网络&#xff0c;如有冒犯&#xff0c;敬请告知 wxFormBuilder 并列结构—拖拉 界面卡顿 创建线线&#xff08;注意将函数另打包进另一函数&#xff0c;否则卡顿&#xff09; #线程执行…