快速查看音頻通道數和每個通道能力判斷具體哪個通道說話;一般能量大的那個算是說話
import wave
from pydub import AudioSegment
import numpy as npdef read_wav_file(file_path):with wave.open(file_path, 'rb') as wav_file:params = wav_file.getparams()num_channels = params.nchannelssample_width = params.sampwidthframe_rate = params.frameratenum_frames = params.nframesprint(f"Number of channels: {num_channels}")print(f"Sample width: {sample_width}")print(f"Frame rate: {frame_rate}")print(f"Number of frames: {num_frames}")frames = wav_file.readframes(num_frames)audio_data = np.frombuffer(frames, dtype=np.int16)if num_channels > 1:audio_data = audio_data.reshape(-1, num_channels)return audio_data, frame_rate, num_channelsdef analyze_channels(audio_data, frame_rate, num_channels):for channel in range(num_channels):channel_data = audio_data[:, channel] if num_channels > 1 else audio_data# 計算通道的能量energy = np.sum(np.abs(channel_data))print(f"Channel {channel} energy: {energy}")# 你可以在這里添加更多的分析邏輯,比如使用語音活動檢測(VAD)來判斷說話聲if __name__ == "__main__":file_path = r"E:\allchat\output.wav"audio_data, frame_rate, num_channels = read_wav_file(file_path)analyze_channels(audio_data, frame_rate, num_channels)
這里 channel0 的聲音算說話的
1、轉換mono單聲道,選擇人聲的那個通道
mp3格式
from pydub import AudioSegmentdef extract_and_save_channel(input_file, output_file, channel_index):# 讀取 MP3 文件audio = AudioSegment.from_mp3(input_file)# 提取特定通道if audio.channels > 1:channel_data = audio.split_to_mono()[channel_index]else:channel_data = audio# 保存提取的通道為新的 MP3 文件channel_data.export(output_file, format="mp3")if __name__ == "__main__":input_file = "your_audio_file.mp3"output_file = "channel_0.mp3"channel_index = 0 # 選擇 Channel 0extract_and_save_channel(input_file, output_file, channel_index)
wav格式
from pydub import AudioSegment# 加載WAV文件
wav_file_path = r"E:\allchat\output_16000.wav"
audio_segment = AudioSegment.from_wav(wav_file_path)# 提取Channel 0
if audio_segment.channels > 1:channel_0 = audio_segment.split_to_mono()[0]
else:channel_0 = audio_segment# 導出為單聲道WAV文件
mono_wav_file_path = r"E:\allchat\output_16000_channel_0.wav"
channel_0.export(mono_wav_file_path, format="wav")
2、采樣率更改為16000
from pydub import AudioSegmentdef resample_wav_with_pydub(input_file, output_file, new_rate):# 讀取原始WAV文件audio = AudioSegment.from_wav(input_file)# 設置新的采樣率audio = audio.set_frame_rate(new_rate)# 導出重采樣后的WAV文件audio.export(output_file, format='wav')# 使用示例
resample_wav_with_pydub('input.wav', 'output_16000.wav', 16000)