Signed-off-by: sairate <sairate@sina.cn>
This commit is contained in:
parent
154e52ff0f
commit
b1b6a4b5bb
98
小智智能助手/README.md
Normal file
98
小智智能助手/README.md
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
|
||||||
|
# 🎙️ 智能语音助手 “小智”
|
||||||
|
|
||||||
|
一个基于 **百度智能云语音识别 + 合成** 和 **DeepSeek-R1 对话模型** 的智能语音助手,支持以下功能:
|
||||||
|
|
||||||
|
* 🎤 **语音唤醒与自动录音**
|
||||||
|
* 🧠 **自然语言理解(LLM)问答**
|
||||||
|
* 🔊 **语音合成回应**
|
||||||
|
* 🕹️ **Python 本地运行,无需前端或网页**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✅ 功能概览
|
||||||
|
|
||||||
|
| 功能模块 | 描述 |
|
||||||
|
| ----------- | ---------------------------------- |
|
||||||
|
| 语音唤醒 | 检测是否说出“小智”关键词,触发问答流程 |
|
||||||
|
| 自动录音 | 使用 WebRTC VAD 进行语音活动检测,自动判断语音开始和结束 |
|
||||||
|
| 百度语音识别 | 调用百度智能云 API,将语音识别成文本 |
|
||||||
|
| DeepSeek 问答 | 将用户问题发送到 DeepSeek-R1 模型,获取智能回复 |
|
||||||
|
| 百度语音合成 | 将 AI 回复转换为音频并使用 `pygame` 播放 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📦 环境依赖(requirements.txt)
|
||||||
|
|
||||||
|
```txt
|
||||||
|
requests
|
||||||
|
pyaudio
|
||||||
|
pygame
|
||||||
|
webrtcvad
|
||||||
|
openai
|
||||||
|
```
|
||||||
|
|
||||||
|
安装依赖:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
|
||||||
|
```
|
||||||
|
|
||||||
|
如遇 `pyaudio` 安装失败,推荐使用:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install pipwin
|
||||||
|
pipwin install pyaudio
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔐 API 配置
|
||||||
|
|
||||||
|
打开 `main.py` 或脚本文件,将以下参数替换为你自己的:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 百度智能云 API
|
||||||
|
BAIDU_API_KEY = "你的API Key"
|
||||||
|
BAIDU_SECRET_KEY = "你的Secret Key"
|
||||||
|
|
||||||
|
# DeepSeek-R1 API
|
||||||
|
DEEPSEEK_API_KEY = "你的DeepSeek API Key"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🚀 启动方式
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
启动后助手进入循环监听状态,等待用户说出 **唤醒词“小智”**,进入问答流程。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎧 示例流程
|
||||||
|
|
||||||
|
1. **你说:“小智”**(唤醒)
|
||||||
|
2. 程序回应:“好的,小智在。请说出你的问题。”
|
||||||
|
3. **你说:“今天北京天气怎么样?”**
|
||||||
|
4. 小智回答:“今天北京的天气是晴转多云,最高气温27度。”
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔊 技术细节
|
||||||
|
|
||||||
|
* 📍 **自动录音**:基于 `webrtcvad` 自动结束语音(检测静音)
|
||||||
|
* 📍 **语音识别**:百度 `vop.baidu.com/server_api`
|
||||||
|
* 📍 **语音合成**:百度 `tsn.baidu.com/text2audio`
|
||||||
|
* 📍 **AI对话**:调用 DeepSeek-R1 `chat.completions` 接口(通过 OpenAI SDK)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🛠️ 注意事项
|
||||||
|
|
||||||
|
* 程序使用 `pyaudio` 进行录音,需麦克风硬件支持
|
||||||
|
* 百度语音识别需中文 `16kHz 单声道 PCM`
|
||||||
|
* 请保持联网状态,所有服务需调用在线 API
|
||||||
|
|
||||||
207
小智智能助手/app.py
Normal file
207
小智智能助手/app.py
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
import requests
|
||||||
|
import base64
|
||||||
|
import time
|
||||||
|
import wave
|
||||||
|
import pyaudio
|
||||||
|
import pygame
|
||||||
|
import webrtcvad
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# --------------------- 配置参数 ---------------------
|
||||||
|
# 百度智能云 API 配置(请替换为您的 API Key 和 Secret Key)
|
||||||
|
BAIDU_API_KEY = "4icZSO1OlMCU2ZiRMhgGCXFu"
|
||||||
|
BAIDU_SECRET_KEY = "6wJldJ08m1jIX9hb0ULcJrIJ9D1OJW3c"
|
||||||
|
|
||||||
|
# DeepSeek API 配置(请替换为您的 DeepSeek API Key)
|
||||||
|
DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c"
|
||||||
|
|
||||||
|
# 录音参数
|
||||||
|
CHUNK = 1024
|
||||||
|
FORMAT = pyaudio.paInt16
|
||||||
|
CHANNELS = 1
|
||||||
|
RATE = 16000
|
||||||
|
|
||||||
|
# 设备 ID(可以随意设定)
|
||||||
|
DEVICE_ID = "raspberry_pi"
|
||||||
|
|
||||||
|
# --------------------- 工具函数 ---------------------
|
||||||
|
def get_baidu_token():
|
||||||
|
"""获取百度智能云 API 访问令牌"""
|
||||||
|
url = "https://aip.baidubce.com/oauth/2.0/token"
|
||||||
|
params = {
|
||||||
|
"grant_type": "client_credentials",
|
||||||
|
"client_id": BAIDU_API_KEY,
|
||||||
|
"client_secret": BAIDU_SECRET_KEY,
|
||||||
|
}
|
||||||
|
response = requests.post(url, data=params)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json().get("access_token")
|
||||||
|
print("获取百度 Token 失败")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def record_audio_vad(filename, max_duration=10):
|
||||||
|
"""使用 WebRTC VAD 语音活动检测实现自动录音"""
|
||||||
|
vad = webrtcvad.Vad(1) # 设置 VAD 灵敏度(0-3,越大越严格)
|
||||||
|
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
|
||||||
|
# 🛠 **确保录音格式符合 WebRTC VAD 要求**
|
||||||
|
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=320)
|
||||||
|
|
||||||
|
print("开始录音(自动检测静音停止)...")
|
||||||
|
frames = []
|
||||||
|
silence_count = 0
|
||||||
|
max_silence = 150 # 允许最多 1 秒静音(30 帧)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
data = stream.read(320, exception_on_overflow=False) # **🛠 WebRTC VAD 需要 10ms 帧大小**
|
||||||
|
frames.append(data)
|
||||||
|
|
||||||
|
# 检查是否有语音
|
||||||
|
is_speech = vad.is_speech(data, 16000)
|
||||||
|
silence_count = 0 if is_speech else silence_count + 1
|
||||||
|
|
||||||
|
if silence_count > max_silence: # 如果连续 1 秒静音,则停止录音
|
||||||
|
print("检测到静音,录音结束。")
|
||||||
|
break
|
||||||
|
|
||||||
|
# 限制最大录音时长
|
||||||
|
if len(frames) > int((16000 / 320) * max_duration):
|
||||||
|
print("达到最大录音时长,录音结束。")
|
||||||
|
break
|
||||||
|
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
# 保存录音为 WAV 文件
|
||||||
|
wf = wave.open(filename, 'wb')
|
||||||
|
wf.setnchannels(1)
|
||||||
|
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
|
||||||
|
wf.setframerate(16000)
|
||||||
|
wf.writeframes(b''.join(frames))
|
||||||
|
wf.close()
|
||||||
|
|
||||||
|
|
||||||
|
def speech_recognition(audio_file, token):
|
||||||
|
"""调用百度语音识别 API 将音频转文本"""
|
||||||
|
with open(audio_file, "rb") as f:
|
||||||
|
speech_data = f.read()
|
||||||
|
speech_base64 = base64.b64encode(speech_data).decode('utf-8')
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"format": "wav",
|
||||||
|
"rate": RATE,
|
||||||
|
"channel": 1,
|
||||||
|
"token": token,
|
||||||
|
"cuid": DEVICE_ID,
|
||||||
|
"len": len(speech_data),
|
||||||
|
"speech": speech_base64,
|
||||||
|
"word_list": ["小智","小志","小至"] # 添加热词,提高识别准确率
|
||||||
|
}
|
||||||
|
url = "http://vop.baidu.com/server_api"
|
||||||
|
headers = {'Content-Type': 'application/json'}
|
||||||
|
response = requests.post(url, json=payload, headers=headers)
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
if result.get("err_no") == 0:
|
||||||
|
return result.get("result", [""])[0]
|
||||||
|
print("语音识别错误:", result.get("err_msg"))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def wake_word_detected(text):
|
||||||
|
"""检查文本是否包含唤醒词"""
|
||||||
|
wake_words = ["小智", "小志", "小知", "晓智"]
|
||||||
|
return any(word in text for word in wake_words)
|
||||||
|
|
||||||
|
|
||||||
|
def deepseek_conversation(user_text):
|
||||||
|
"""调用 DeepSeek API 获取 AI 回答"""
|
||||||
|
try:
|
||||||
|
client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com")
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="deepseek-chat",
|
||||||
|
messages=[{"role": "system", "content": "你是一名叫小智的助手,回复不需要使用markdown格式,请直接以文本形式回复。"},
|
||||||
|
{"role": "user", "content": user_text}],
|
||||||
|
stream=False
|
||||||
|
)
|
||||||
|
return response.choices[0].message.content
|
||||||
|
except Exception as e:
|
||||||
|
print("DeepSeek API 调用异常:", e)
|
||||||
|
return "抱歉,我无法获取答案。"
|
||||||
|
|
||||||
|
|
||||||
|
def text_to_speech(text, token, output_file="answer.mp3"):
|
||||||
|
"""调用百度语音合成 API,将文本转换为语音"""
|
||||||
|
MAX_CHAR = 1024
|
||||||
|
text = text[:MAX_CHAR] if len(text) > MAX_CHAR else text
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"tex": text,
|
||||||
|
"tok": token,
|
||||||
|
"cuid": DEVICE_ID,
|
||||||
|
"ctp": 1,
|
||||||
|
"lan": "zh"
|
||||||
|
}
|
||||||
|
url = "http://tsn.baidu.com/text2audio"
|
||||||
|
response = requests.post(url, data=params)
|
||||||
|
|
||||||
|
if response.headers.get('Content-Type') == "audio/mp3":
|
||||||
|
with open(output_file, "wb") as f:
|
||||||
|
f.write(response.content)
|
||||||
|
return output_file
|
||||||
|
print("语音合成错误:", response.text)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def play_audio(file_path):
|
||||||
|
"""播放音频文件"""
|
||||||
|
pygame.mixer.init()
|
||||||
|
pygame.mixer.music.load(file_path)
|
||||||
|
pygame.mixer.music.play()
|
||||||
|
while pygame.mixer.music.get_busy():
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------- 主程序 ---------------------
|
||||||
|
def main():
|
||||||
|
print("启动智能助手小智...")
|
||||||
|
token = get_baidu_token()
|
||||||
|
if not token:
|
||||||
|
return
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print("等待唤醒词 '小智' ...")
|
||||||
|
record_audio_vad("wake.wav", max_duration=3)
|
||||||
|
wake_text = speech_recognition("wake.wav", token)
|
||||||
|
|
||||||
|
if wake_text and "小智" in wake_text:
|
||||||
|
print("唤醒成功,小智回应:好的,小智在。")
|
||||||
|
|
||||||
|
# 语音合成回应 "好的,小智在。"
|
||||||
|
response_audio = text_to_speech("好的,小智在。请说出你的问题。", token, output_file="wakeup_response.mp3")
|
||||||
|
if response_audio:
|
||||||
|
play_audio(response_audio) # 播放唤醒成功音频
|
||||||
|
|
||||||
|
print("请说出您的问题:")
|
||||||
|
record_audio_vad(filename="query.wav")
|
||||||
|
user_query = speech_recognition("query.wav", token)
|
||||||
|
if user_query:
|
||||||
|
print("用户说:", user_query)
|
||||||
|
# 使用 DeepSeek-R1 模型获取回答
|
||||||
|
answer = deepseek_conversation(user_query)
|
||||||
|
print("小智回答:", answer)
|
||||||
|
# 使用百度语音合成将回答转换为语音
|
||||||
|
audio_file = text_to_speech(answer, token, output_file="answer.mp3")
|
||||||
|
if audio_file:
|
||||||
|
play_audio(audio_file)
|
||||||
|
else:
|
||||||
|
print("未能识别您的问题,请重试。")
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
BIN
小智智能助手/requirements.txt
Normal file
BIN
小智智能助手/requirements.txt
Normal file
Binary file not shown.
23
小智智能助手/test.py
Normal file
23
小智智能助手/test.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
|
||||||
|
BAIDU_API_KEY = "gQyEX2mdkEa3gHvaYxcXMSv3"
|
||||||
|
BAIDU_SECRET_KEY = "M5s4mMH3B5yeX5LDP4RME3rdlxATb3lO"
|
||||||
|
# 设备唯一标识(可设置为行空板的设备 ID 或 MAC 地址)
|
||||||
|
DEVICE_ID = "your_device_id"
|
||||||
|
|
||||||
|
# DeepSeek API 配置(请替换为实际接口地址及参数)
|
||||||
|
DEEPSEEK_API_URL = "https://api.deepseek.com"
|
||||||
|
DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c"
|
||||||
|
|
||||||
|
"""
|
||||||
|
curl https://api.deepseek.com/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <sk-f15b44b6b3344cdd820e59acebce9d2c>" \
|
||||||
|
-d '{
|
||||||
|
"model": "deepseek-chat",
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "Hello!"}
|
||||||
|
],
|
||||||
|
"stream": false
|
||||||
|
}'
|
||||||
|
"""
|
||||||
12
小智智能助手/流程图.md
Normal file
12
小智智能助手/流程图.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[用户说话] --> B[语音录音模块pyaudiowebrtcva]
|
||||||
|
B --> C[语音识别模块百度语音识别API]
|
||||||
|
C --> D[语义理解模块DeepSeek大模型]
|
||||||
|
D --> E[语音合成模块百度语音合成API]
|
||||||
|
E --> F[音频播放模块pygame]
|
||||||
|
F --> G[返回语音回答]
|
||||||
|
G --> H{是否继续交互}
|
||||||
|
H -- 是 --> B
|
||||||
|
H -- 否 --> I[结束程序]
|
||||||
|
```
|
||||||
BIN
小智智能助手/流程图.png
Normal file
BIN
小智智能助手/流程图.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 103 KiB |
BIN
小智智能助手/附件2:作品说明文档.doc
Normal file
BIN
小智智能助手/附件2:作品说明文档.doc
Normal file
Binary file not shown.
BIN
小智智能助手/附件4:参赛选手承诺书.docx
Normal file
BIN
小智智能助手/附件4:参赛选手承诺书.docx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user