PaddleSpeech的GitHub項目地址
環境要求:
gcc >= 4.8.5
paddlepaddle <= 2.5.1
python >= 3.8
OS support: Linux(recommend), Windows, Mac OSX
pip下載指令:
python -m pip install paddlepaddle-gpu==2.5.1 -i https://pypi.tuna.tsinghua.edu.cn/simple pip install paddlespeech==1.4.1
小模型配置代碼:
from paddlespeech.cli.asr.infer import ASRExecutorASR_MODELS = ['conformer_wenetspeech-zh-16k', 'conformer_online_wenetspeech-zh-16k','conformer_u2pp_online_wenetspeech-zh-16k','conformer_online_multicn-zh-16k', 'conformer_aishell-zh-16k', 'conformer_online_aishell-zh-16k','transformer_librispeech-en-16k', 'deepspeech2online_wenetspeech-zh-16k','deepspeech2offline_aishell-zh-16k','deepspeech2online_aishell-zh-16k', 'deepspeech2offline_librispeech-en-16k','conformer_talcs-codeswitch_zh_en-16k']
ASR_MODEL = 'conformer_wenetspeech'
ASR_EXECUTOR = ASRExecutor()
音頻文件保存代碼:
import io
import os
import uuid
import soundfile as sf
# 將音頻數據轉換并保存為16kHz采樣率、16位量化深度、單聲道的WAV文件
def save_audio_file(file_path, file_content):# 生成一個唯一的文件名unique_filename = str(uuid.uuid4()) + ".wav"# 確保目錄存在if not os.path.exists(file_path):os.makedirs(file_path)try:# 將文件流轉換為音頻數據audio_data, sample_rate = sf.read(io.BytesIO(file_content.read()))# 構建完整的文件路徑file_path_with_file_name = os.path.join(file_path, unique_filename)# # 重采樣音頻數據到16kHz,單聲道,16位if sample_rate != 16000:from scipy.signal import resamplenum_samples = int(len(audio_data) * (16000 / sample_rate))audio_data = resample(audio_data, num_samples)sample_rate = 16000# 保存音頻數據為16位,16kHz,單聲道的WAV文件sf.write(file_path_with_file_name, audio_data, 16000, subtype='PCM_16')return file_path_with_file_name, sample_rateexcept Exception as e:print(f"Error saving file: {e}")return None
獲取語音識別結果代碼:
import os
# 獲取指定文件的語音識別結果
def get_text_with_asr(file_path_with_file_name, sample_rate):if not os.path.exists(file_path_with_file_name):return Noneasr_result = ASR_EXECUTOR(audio_file=file_path_with_file_name,model=ASR_MODEL,# sample_rate=sample_rate,# lang='zh')return asr_result
音頻轉文字代碼:
import os
# 音頻轉文字(上傳音頻文件)
def audio_to_text(file_content, file_name):file_path_without_file_name = '.' + STATIC_FILE_PATH + "/"if not os.path.exists(file_path_without_file_name):os.makedirs(file_path_without_file_name)file_path_with_file_name, sample_rate = save_audio_file(file_path_without_file_name, file_content)asr_result = get_text_with_asr(file_path_with_file_name, sample_rate)return asr_result