# 聲音處理
## 大家都可以在下方留下code互相分享!!
## 波形圖
#### librosa
```python=
from pyhht.visualization import plot_imfs
import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython.display as ipd
path = 'C:/Users/User/sabrina/sabrina_T01-1_0625.wav'
data , sr = librosa.load(path,sr=16000)
data = data/abs(data).max()
plt.plot(data)
ipd.Audio(data,rate=16000) #播放
```
#### wave
```python=
import wave
import matplotlib.pyplot as plt
import numpy as np
# 讀檔
f = wave.open(r"C:/Users/User/sabrina/sabrina_T01-1_0625.wav", "rb")
# 格式資訊
# (nchannels, sampwidth, framerate, nframes, comptype, compname)
params = f.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
# 波形數據
str_data = f.readframes(nframes)
f.close()
# bytes to np.short
wave_data = np.frombuffer(str_data, dtype=np.short)
# 轉換為雙聲道
# wave_data.shape = -1, 2
# wave_data = wave_data.T
# 總時間長度
time = np.arange(0, nframes) * (1.0 / framerate)
# 音訊資訊
print(nchannels, sampwidth, framerate, nframes) #nchannels:聲道數 ;sampwidth:量化位數(byte) ;framerate(sample rate):採樣頻率 ; nframes:採樣點數
# 畫波形
plt.subplot(211)
plt.plot(time, wave_data)
# plt.plot(time, wave_data[0]) #雙聲道音檔的畫法
plt.subplot(212)
plt.plot(time, wave_data, c="g")
# plt.plot(time, wave_data[1], c="g")#雙聲道音檔的畫法
plt.xlabel("time (seconds)")
plt.show()
```
<br>
## Spectrogram
```python=
import librosa
import librosa.display # 如果沒有 import 的話會 module 'librosa' has no attribute 'display'
import numpy as np
import matplotlib as plt
import matplotlib.pyplot as plt # 如果沒有 import 的話會 module 'matplotlib' has no attribute 'title'
path='C:/Users/User/sabrina/sabrina_T01-1_0625.wav'
y, sr = librosa.load(path, sr=16000, mono=True)
D = np.abs(librosa.stft(y))
librosa.display.specshow(librosa.amplitude_to_db(D,ref=np.max),x_axis='time',y_axis='hz',sr=sr) #sr=sr不然預設會是22050
plt.tight_layout()
plt.savefig('C:/Users/User/pic/1.png') #儲存圖片
plt.show()
```
## MFCC
```python=
import librosa
import librosa.display
import matplotlib.pyplot as plt
path='C:/Users/User/sabrina/sabrina_T01-1_0625.wav'
y, sr = librosa.load(path,sr=16000,mono=True)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
librosa.display.specshow(mfccs,y_axis='hz', x_axis='time',sr=sr)
plt.colorbar()
plt.title('MFCC')
plt.tight_layout()
```
<br>
## mix_sound
```python=
import librosa as lr
import numpy as np
import os
#--------整理noise data------------------
noise_list = []
for file in os.listdir('/Users/rick/Desktop/noise_16000'):
if '.wav' in file:
noise_list.append(os.path.join('/Users/rick/Desktop/noise_16000',file))
#----------------------------------------
#---------整理clean data------------------
filename_list = []
for filename in os.listdir('/Users/rick/python_test/noise'):
if '.' not in filename:
filename_list.append(filename)
file_list = []
for filename in filename_list:
for file in os.listdir(os.path.join('/Users/rick/python_test/noise',filename)):
if '.wav' in file:
file_list.append(os.path.join('/Users/rick/python_test/noise',filename,file))
file_list.sort()
#------------------------------------------
#-----------隨機混音開始---------------------
if not os.path.isdir('mixed_pos_snr'):
os.mkdir('mixed_pos_snr')
for file in file_list:
for noise_index in range(len(noise_list)):
snr = np.random.randint(0,16) #隨機取 snr的值
# noise_index = np.random.randint(0,len(noise_list))
cleanwav = file
noisewav = noise_list[noise_index]
noise,sr = lr.load(noisewav,sr=16000) # sr設置為None可取原檔案中的sample rate
clean,Sr = lr.load(cleanwav,sr=16000)
cleanenergy = np.sum(np.power(clean,2)) #np.power(a,2) = np.square(a)
# 隨機索引與clean長度相同的noise信號
ind = np.random.randint(1, len(noise) - len(clean) + 1)
noiselen=noise[ind:len(clean) + ind]
# 噪声语音能量 噪音的功率
noiseenergy = np.sum(np.power(noiselen,2))
# 噪声等级系数
noiseratio = np.sqrt((cleanenergy / noiseenergy) / (np.power(10, snr * 0.1)))
# 隨機索引與clean長度相同的noise信號
noisyAudio = clean + noise[ind:len(clean)+ind] * noiseratio
# write wav
noisywavname='./mixed_pos_snr/'+cleanwav.split('/')[-1].rstrip('.wav')+'_'+noisewav.split('/')[-1].rstrip('.wav')+"_snr"+str(snr)+".wav"
# sf.write(noisywavname, noisyAudio, 16000)
lr.output.write_wav(noisywavname,noisyAudio,16000)
```
## wav 與 頻譜圖 相互轉換
#### wav to spectrogram
```python=
import scipy.io.wavfile
rate,audData=scipy.io.wavfile.read('sabrina_T01-1_0622.wav')
#----------------------------------------
#------------以下為可調整參數---------------
# Create FFT
import numpy as np
import math
from PIL import Image
import time
FFT_LENGTH = 512 # 影響輸出圖片的高度
WINDOW_LENGTH = 256 # 影響輸出圖片的寬度
WINDOW_STEP = int(WINDOW_LENGTH / 2)
magnitudeMin = float("inf")
magnitudeMax = float("-inf")
phaseMin = float("inf")
phaseMax = float("-inf")
#-----------------------------------------
#-----------定義許多 function--------------
def amplifyMagnitudeByLog(d):
return 188.301 * math.log10(d + 1)
def weakenAmplifiedMagnitude(d):
return math.pow(10, d/188.301)-1
def generateLinearScale(magnitudePixels, phasePixels,
magnitudeMin, magnitudeMax, phaseMin, phaseMax):
height = magnitudePixels.shape[0]
width = magnitudePixels.shape[1]
magnitudeRange = magnitudeMax - magnitudeMin
phaseRange = phaseMax - phaseMin
rgbArray = np.zeros((height, width, 3), 'uint8')
for w in range(width):
for h in range(height):
magnitudePixels[h,w] = (magnitudePixels[h,w] - magnitudeMin) / (magnitudeRange) * 255 * 2
magnitudePixels[h,w] = amplifyMagnitudeByLog(magnitudePixels[h,w])
phasePixels[h,w] = (phasePixels[h,w] - phaseMin) / (phaseRange) * 255
red = 255 if magnitudePixels[h,w] > 255 else magnitudePixels[h,w]
green = (magnitudePixels[h,w] - 255) if magnitudePixels[h,w] > 255 else 0
blue = phasePixels[h,w]
rgbArray[h,w,0] = int(red)
rgbArray[h,w,1] = int(green)
rgbArray[h,w,2] = int(blue)
return rgbArray
def recoverLinearScale(rgbArray, magnitudeMin, magnitudeMax,
phaseMin, phaseMax):
width = rgbArray.shape[1]
height = rgbArray.shape[0]
magnitudeVals = rgbArray[:,:,0].astype(float) + rgbArray[:,:,1].astype(float)
phaseVals = rgbArray[:,:,2].astype(float)
phaseRange = phaseMax - phaseMin
magnitudeRange = magnitudeMax - magnitudeMin
for w in range(width):
for h in range(height):
phaseVals[h,w] = (phaseVals[h,w] / 255 * phaseRange) + phaseMin
magnitudeVals[h,w] = weakenAmplifiedMagnitude(magnitudeVals[h,w])
magnitudeVals[h,w] = (magnitudeVals[h,w] / (255*2) * magnitudeRange) + magnitudeMin
return magnitudeVals, phaseVals
def generateSpectrogramForWave(signal):
start_time = time.time()
global magnitudeMin
global magnitudeMax
global phaseMin
global phaseMax
buffer = np.zeros(int(signal.size + WINDOW_STEP - (signal.size % WINDOW_STEP)))
buffer[0:len(signal)] = signal
height = int(FFT_LENGTH / 2 + 1)
width = int(len(buffer) / (WINDOW_STEP) - 1)
magnitudePixels = np.zeros((height, width))
phasePixels = np.zeros((height, width))
for w in range(width):
buff = np.zeros(FFT_LENGTH)
stepBuff = buffer[w*WINDOW_STEP:w*WINDOW_STEP + WINDOW_LENGTH]
# apply hanning window
stepBuff = stepBuff * np.hanning(WINDOW_LENGTH)
buff[0:len(stepBuff)] = stepBuff
#buff now contains windowed signal with step length and padded with zeroes to the end
fft = np.fft.rfft(buff)
for h in range(len(fft)):
#print(buff.shape)
#return
magnitude = math.sqrt(fft[h].real**2 + fft[h].imag**2)
if magnitude > magnitudeMax:
magnitudeMax = magnitude
if magnitude < magnitudeMin:
magnitudeMin = magnitude
phase = math.atan2(fft[h].imag, fft[h].real)
if phase > phaseMax:
phaseMax = phase
if phase < phaseMin:
phaseMin = phase
magnitudePixels[height-h-1,w] = magnitude
phasePixels[height-h-1,w] = phase
rgbArray = generateLinearScale(magnitudePixels, phasePixels,
magnitudeMin, magnitudeMax, phaseMin, phaseMax)
elapsed_time = time.time() - start_time
print('%.2f' % elapsed_time, 's', sep='')
img = Image.fromarray(rgbArray, 'RGB')
return img
#-----------------------------------------
#-----------wav轉為頻譜圖並存檔--------------
img = generateSpectrogramForWave(audData)
img.save("../../denoised/spectrogram2.png","PNG")
```
#### spectrogram return to wav
```python=
from PIL import Image
import scipy.io.wavfile
def recoverSignalFromSpectrogram(filePath):
img = Image.open(filePath)
data = np.array( img, dtype='uint8' )
width = data.shape[1]
height = data.shape[0]
magnitudeVals, phaseVals \
= recoverLinearScale(data, magnitudeMin, magnitudeMax, phaseMin, phaseMax)
recovered = np.zeros(WINDOW_LENGTH * width // 2 + WINDOW_STEP, dtype=np.int16)
for w in range(width):
toInverse = np.zeros(height, dtype=np.complex_)
for h in range(height):
magnitude = magnitudeVals[height-h-1,w]
phase = phaseVals[height-h-1,w]
toInverse[h] = magnitude * math.cos(phase) + (1j * magnitude * math.sin(phase))
signal = np.fft.irfft(toInverse)
recovered[w*WINDOW_STEP:w*WINDOW_STEP + WINDOW_LENGTH] += signal[:WINDOW_LENGTH].astype(np.int16)
scipy.io.wavfile.write("../../denoised/recovered2.wav", 16000, recovered)
recoverSignalFromSpectrogram("../../denoised/spectrogram2.png")
```
##### 分批畫圖
```python=
for ind , i in enumerate(mixed_file[:5]):
plt.clf()
data , sr = librosa.load(i,sr=16000)
data = data/abs(data).max()
plt.plot(data)
plt.savefig(f'./pic/{ind}.png')
```