測試功能描述:
程序會先測量 2 秒環境音量作為基準,然后開始實時顯示音量柱狀圖,并在 30 秒后自動結束,當檢測到音量超過閾值時會顯示提示并打開led燈
一,硬件準備:
1.ESP32 CH3 USB開發板1塊
2.INMP441 麥克風1個
3.LED燈一個
二,連線
1.麥克風與esp32連接
- SCK -> GPIO14
- WS -> GPIO15
- SD -> GPIO2
- VDD -> 3.3V
- GND -> GND
2.麥克風與esp32連接
- 正極 -> GPIO25
- 負極 -> GND
三,測試代碼
import machine
import ustruct
import utime
import math# 硬件配置 - INMP441麥克風連接到I2S0
i2s = machine.I2S(0,sck=machine.Pin(14), # 時鐘引腳ws=machine.Pin(15), # 字選擇引腳sd=machine.Pin(2), # 數據引腳mode=machine.I2S.RX, # 接收模式bits=16, # 16位采樣深度format=machine.I2S.MONO, # 單聲道rate=16000, # 采樣率16kHzibuf=40000 # 內部緩沖區大小
)# LED配置
led = machine.Pin(25, machine.Pin.OUT) # LED連接到GPIO25# 采樣參數
SAMPLE_RATE = 16000
BUFFER_SIZE = 1024 # 每次讀取的樣本數
FFT_SIZE = 256 # FFT大小(必須是2的冪,降低以提高性能)
DETECTION_THRESHOLD = 20000 # 語音檢測閾值
COMMAND_TIMEOUT = 1 # 命令持續時間(秒)
LED_ON_DURATION = 3 # LED亮起持續時間(秒)
VOLUME_BAR_LENGTH = 40 # 音量條長度# 漢寧窗函數
def hanning_window(size):"""生成漢寧窗函數"""window = []for i in range(size):window.append(0.5 * (1 - math.cos(2 * math.pi * i / (size - 1))))return window# 簡易FFT實現(Cooley-Tukey算法)
def fft(samples):"""簡易FFT實現"""n = len(samples)if n <= 1:return sampleseven = fft(samples[0::2])odd = fft(samples[1::2])result = [0j] * n # 使用復數列表for k in range(n//2):angle = -2 * math.pi * k / nt = odd[k] * (math.cos(angle) + 1j * math.sin(angle))result[k] = even[k] + tresult[k + n//2] = even[k] - treturn result# 計算頻譜能量
def calculate_spectrum(samples):"""計算信號的頻譜能量"""# 應用漢寧窗window = hanning_window(len(samples))windowed_samples = [samples[i] * window[i] for i in range(len(samples))]# 計算FFTfft_data = fft(windowed_samples)# 計算幅度譜(能量)magnitudes = [math.sqrt(fft_data[i].real**2 + fft_data[i].imag**2) for i in range(len(fft_data)//2)]return magnitudes# 提取語音特征
def extract_features(spectrum):"""從頻譜中提取語音相關特征"""# 人類語音主要集中在300-3400Hz# 計算這個范圍內的能量low_freq = int(300 * FFT_SIZE / SAMPLE_RATE)high_freq = int(3400 * FFT_SIZE / SAMPLE_RATE)# 確保索引在有效范圍內low_freq = max(0, min(low_freq, len(spectrum)-1))high_freq = max(0, min(high_freq, len(spectrum)-1))# 計算語音頻率范圍內的能量voice_energy = sum(spectrum[low_freq:high_freq])return voice_energy# 環境特征校準
def calibrate_environment(duration=2):"""測量環境特征,返回建議的閾值"""print(f"正在校準環境特征 ({duration}秒)...")mic_samples = bytearray(BUFFER_SIZE * 2)samples_array = [0] * BUFFER_SIZEmax_feature = 0avg_feature = 0samples_count = 0for _ in range(int(SAMPLE_RATE / BUFFER_SIZE * duration)):# 從I2S讀取數據i2s.readinto(mic_samples)# 轉換為整數數組for i in range(BUFFER_SIZE):samples_array[i] = ustruct.unpack_from('<h', mic_samples, i*2)[0]# 計算頻譜和特征spectrum = calculate_spectrum(samples_array[:FFT_SIZE])feature = extract_features(spectrum)avg_feature += featuresamples_count += 1if feature > max_feature:max_feature = feature# 顯示實時特征normalized = min(1.0, feature / 100000)bar_length = int(normalized * VOLUME_BAR_LENGTH)feature_bar = '█' * bar_length + '-' * (VOLUME_BAR_LENGTH - bar_length)print(f"\r環境特征: [{feature_bar}] {feature:.1f}", end='')utime.sleep_ms(50)if samples_count > 0:avg_feature /= samples_count# 建議閾值:環境最大特征的1.5倍suggested_threshold = max_feature * 1.5# 確保閾值不低于默認值suggested_threshold = max(DETECTION_THRESHOLD, suggested_threshold)print(f"\n環境校準完成: 平均={avg_feature:.1f}, 最大={max_feature:.1f}")print(f"建議閾值: {suggested_threshold:.1f}")return suggested_threshold# 語音命令識別
def detect_command(buffer, threshold=DETECTION_THRESHOLD):"""檢測語音命令"""samples_array = [0] * BUFFER_SIZE# 轉換為整數數組for i in range(BUFFER_SIZE):samples_array[i] = ustruct.unpack_from('<h', buffer, i*2)[0]# 計算頻譜spectrum = calculate_spectrum(samples_array[:FFT_SIZE])# 提取特征feature = extract_features(spectrum)# 計算音量(用于顯示)volume = sum(abs(samples_array[i]) for i in range(BUFFER_SIZE)) // BUFFER_SIZEreturn feature > threshold, feature, volume# 主控制循環
def voice_control_loop(threshold=DETECTION_THRESHOLD):print("語音控制LED系統已啟動")print(f"語音檢測閾值: {threshold}")# 創建緩沖區mic_samples = bytearray(BUFFER_SIZE * 2)led_state = Falselast_command_time = 0command_active = Falsetry:while True:# 從I2S讀取數據i2s.readinto(mic_samples)# 檢測命令command_detected, feature_value, volume = detect_command(mic_samples, threshold)current_time = utime.ticks_ms()# 狀態顯示status = "ON " if led_state else "OFF"cmd_status = "CMD" if command_detected else " "# 顯示特征值柱狀圖normalized = min(1.0, feature_value / 100000)bar_length = int(normalized * VOLUME_BAR_LENGTH)feature_bar = '█' * bar_length + '-' * (VOLUME_BAR_LENGTH - bar_length)print(f"\r特征: [{feature_bar}] {feature_value:.1f} | 音量: {volume:4d} | 狀態: {status} | {cmd_status}", end='')# 命令邏輯if command_detected and not command_active:command_active = Trueprint("\n命令開始檢測")if command_detected:last_command_time = current_time# 如果命令持續時間超過閾值,執行動作if command_active and utime.ticks_diff(current_time, last_command_time) > COMMAND_TIMEOUT * 1000:command_active = False# 如果LED關閉,則打開if not led_state:led.on()led_state = Trueprint("\nLED已點亮")else:# 如果LED已打開,則關閉led.off()led_state = Falseprint("\nLED已關閉")# 如果LED已打開且超時,則關閉if led_state and utime.ticks_diff(current_time, last_command_time) > LED_ON_DURATION * 1000:led.off()led_state = Falseprint("\nLED已關閉(超時)")# 短暫延遲utime.sleep_ms(50)except KeyboardInterrupt:print("\n程序已停止")finally:# 關閉資源i2s.deinit()led.off()# 運行語音控制程序
def main():# 執行環境校準threshold = calibrate_environment()# 使用校準后的閾值voice_control_loop(threshold)# 啟動程序
main()
四,運行效果截圖
?
?
- 代碼中的引腳定義可以根據實際接線情況調整
- 內部緩沖區大小 (ibuf) 可能需要根據 ESP32 的內存情況調整
- 采樣率和位深度可以根據需要修改,但會影響音頻質量
- 如果出現內存不足錯誤,嘗試減小 BUFFER_SIZE 值
- 麥克風的靈敏度可以通過修改 INMP442 的增益設置來調整