from pydub import AudioSegment
import numpy as np
import pylab as plt
import IPython
import glob
mp3s = glob.glob('./video/*.m4a')
for src in mp3s:
dst = './video/'+src.split('/')[-1:][0].replace('.m4a','.wav')
sound = AudioSegment.from_file(src,'m4a')
sound.export(dst, format='wav')
sound = AudioSegment.from_wav("./video/incredible.wav")
print('dBFS:',sound.dBFS)
print('声道数:',sound.channels)
print('采样字节数:',sound.sample_width)
print('采样频率',sound.frame_rate)
print('每帧字节数:',sound.frame_width)
print('响度:',sound.rms)
print('最高振幅:',sound.max)
print('max_dBFS:',sound.max_dBFS)
print('持续时间:',sound.duration_seconds)
print('采样点数:',sound.frame_count())
wave_data = np.array(sound.get_array_of_samples())
print(wave_data.shape)
音频是两个声道,数据是两个声道的数据交替组成的
1声道采样值,2声道采样值,1声道采样值,2声道采样值......
我们希望将两个声道的数据分开,wave_data[0]是1声道的数据,wave_data[1]是2声道的数据:
wave_data.shape = (-1,2)
wave_data = wave_data.T
print(wave_data.shape)
print(wave_data[0])
print(wave_data[1])
time=np.arange(0,sound.frame_count())/sound.frame_rate
plt.plot(time,wave_data[0])
plt.plot(time,wave_data[1],c="r")
# 采样点数,修改采样点数和起始位置进行不同位置和长度的音频波形分析
N=int(sound.frame_count())
start=0 #开始采样位置
df = sound.frame_rate/(N-1) # 分辨率
freq = [df*n for n in range(0,N)] #N个元素
wave_data2=wave_data[0][start:start+N]
c=np.fft.fft(wave_data2)*2/N
#常规显示采样频率一半的频谱
d=int(len(c)/2)
plt.plot(freq[:d-1],abs(c[:d-1]),'r')
plt.show()
将数据分成200段,分别计算每段的频谱
pxx, freqs, bins, im = plt.specgram(wave_data[0], 200, sound.frame_rate)
print(pxx.shape)
将10s的背景音频剪成两段5s的
backgrounds = glob.glob('./Pronunciation/backgrounds_10s/*.wav')
for key,value in enumerate(backgrounds):
bk = AudioSegment.from_wav(value)
frist_s5 = bk[:5000]
last_s5 = bk[5000:]
frist_s5.export('./Pronunciation/backgrounds_5s/'+str(key)+"_frist_s5.wav", format="wav")
last_s5.export('./Pronunciation/backgrounds_5s/'+str(key)+"_last_s5.wav", format="wav")
sound.apply_gain(-20)
sound.apply_gain(0)
IPython.display.Audio("./video/incredible.wav")