Signed-off-by: sairate <sairate@sina.cn>

2025-05-15 18:53:35 +08:00 · 2025-05-15 18:53:35 +08:00 · b1b6a4b5bb
commit b1b6a4b5bb
parent 154e52ff0f
9 changed files with 341 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1 @@
+# 2025年中新天津生态城青少年创意编程与智能设计大赛
--- a/小智智能助手/README.md
+++ b/小智智能助手/README.md
@ -0,0 +1,98 @@
+
+# 🎙️ 智能语音助手 “小智”
+
+一个基于 **百度智能云语音识别 + 合成** 和 **DeepSeek-R1 对话模型** 的智能语音助手，支持以下功能：
+
+* 🎤 **语音唤醒与自动录音**
+* 🧠 **自然语言理解（LLM）问答**
+* 🔊 **语音合成回应**
+* 🕹️ **Python 本地运行，无需前端或网页**
+
+---
+
+## ✅ 功能概览
+
+| 功能模块        | 描述                                 |
+| ----------- | ---------------------------------- |
+| 语音唤醒        | 检测是否说出“小智”关键词，触发问答流程               |
+| 自动录音        | 使用 WebRTC VAD 进行语音活动检测，自动判断语音开始和结束 |
+| 百度语音识别      | 调用百度智能云 API，将语音识别成文本               |
+| DeepSeek 问答 | 将用户问题发送到 DeepSeek-R1 模型，获取智能回复     |
+| 百度语音合成      | 将 AI 回复转换为音频并使用 `pygame` 播放        |
+
+---
+
+## 📦 环境依赖（requirements.txt）
+
+```txt
+requests
+pyaudio
+pygame
+webrtcvad
+openai
+```
+
+安装依赖：
+
+```bash
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
+```
+
+如遇 `pyaudio` 安装失败，推荐使用：
+
+```bash
+pip install pipwin
+pipwin install pyaudio
+```
+
+---
+
+## 🔐 API 配置
+
+打开 `main.py` 或脚本文件，将以下参数替换为你自己的：
+
+```python
+# 百度智能云 API
+BAIDU_API_KEY = "你的API Key"
+BAIDU_SECRET_KEY = "你的Secret Key"
+
+# DeepSeek-R1 API
+DEEPSEEK_API_KEY = "你的DeepSeek API Key"
+```
+
+---
+
+## 🚀 启动方式
+
+```bash
+python main.py
+```
+
+启动后助手进入循环监听状态，等待用户说出 **唤醒词“小智”**，进入问答流程。
+
+---
+
+## 🎧 示例流程
+
+1. **你说：“小智”**（唤醒）
+2. 程序回应：“好的，小智在。请说出你的问题。”
+3. **你说：“今天北京天气怎么样？”**
+4. 小智回答：“今天北京的天气是晴转多云，最高气温27度。”
+
+---
+
+## 🔊 技术细节
+
+* 📍 **自动录音**：基于 `webrtcvad` 自动结束语音（检测静音）
+* 📍 **语音识别**：百度 `vop.baidu.com/server_api`
+* 📍 **语音合成**：百度 `tsn.baidu.com/text2audio`
+* 📍 **AI对话**：调用 DeepSeek-R1 `chat.completions` 接口（通过 OpenAI SDK）
+
+---
+
+## 🛠️ 注意事项
+
+* 程序使用 `pyaudio` 进行录音，需麦克风硬件支持
+* 百度语音识别需中文 `16kHz 单声道 PCM`
+* 请保持联网状态，所有服务需调用在线 API
+
--- a/小智智能助手/app.py
+++ b/小智智能助手/app.py
@ -0,0 +1,207 @@
+import requests
+import base64
+import time
+import wave
+import pyaudio
+import pygame
+import webrtcvad
+from openai import OpenAI
+
+# --------------------- 配置参数 ---------------------
+# 百度智能云 API 配置（请替换为您的 API Key 和 Secret Key）
+BAIDU_API_KEY = "4icZSO1OlMCU2ZiRMhgGCXFu"
+BAIDU_SECRET_KEY = "6wJldJ08m1jIX9hb0ULcJrIJ9D1OJW3c"
+
+# DeepSeek API 配置（请替换为您的 DeepSeek API Key）
+DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c"
+
+# 录音参数
+CHUNK = 1024
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 16000
+
+# 设备 ID（可以随意设定）
+DEVICE_ID = "raspberry_pi"
+
+# --------------------- 工具函数 ---------------------
+def get_baidu_token():
+    """获取百度智能云 API 访问令牌"""
+    url = "https://aip.baidubce.com/oauth/2.0/token"
+    params = {
+        "grant_type": "client_credentials",
+        "client_id": BAIDU_API_KEY,
+        "client_secret": BAIDU_SECRET_KEY,
+    }
+    response = requests.post(url, data=params)
+    if response.status_code == 200:
+        return response.json().get("access_token")
+    print("获取百度 Token 失败")
+    return None
+
+
+def record_audio_vad(filename, max_duration=10):
+    """使用 WebRTC VAD 语音活动检测实现自动录音"""
+    vad = webrtcvad.Vad(1)  # 设置 VAD 灵敏度（0-3，越大越严格）
+
+    p = pyaudio.PyAudio()
+
+    # 🛠 **确保录音格式符合 WebRTC VAD 要求**
+    stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=320)
+
+    print("开始录音（自动检测静音停止）...")
+    frames = []
+    silence_count = 0
+    max_silence = 150 # 允许最多 1 秒静音（30 帧）
+
+    while True:
+        data = stream.read(320, exception_on_overflow=False)  # **🛠 WebRTC VAD 需要 10ms 帧大小**
+        frames.append(data)
+
+        # 检查是否有语音
+        is_speech = vad.is_speech(data, 16000)
+        silence_count = 0 if is_speech else silence_count + 1
+
+        if silence_count > max_silence:  # 如果连续 1 秒静音，则停止录音
+            print("检测到静音，录音结束。")
+            break
+
+        # 限制最大录音时长
+        if len(frames) > int((16000 / 320) * max_duration):
+            print("达到最大录音时长，录音结束。")
+            break
+
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+    # 保存录音为 WAV 文件
+    wf = wave.open(filename, 'wb')
+    wf.setnchannels(1)
+    wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
+    wf.setframerate(16000)
+    wf.writeframes(b''.join(frames))
+    wf.close()
+
+
+def speech_recognition(audio_file, token):
+    """调用百度语音识别 API 将音频转文本"""
+    with open(audio_file, "rb") as f:
+        speech_data = f.read()
+    speech_base64 = base64.b64encode(speech_data).decode('utf-8')
+
+    payload = {
+        "format": "wav",
+        "rate": RATE,
+        "channel": 1,
+        "token": token,
+        "cuid": DEVICE_ID,
+        "len": len(speech_data),
+        "speech": speech_base64,
+        "word_list": ["小智","小志","小至"]  # 添加热词，提高识别准确率
+    }
+    url = "http://vop.baidu.com/server_api"
+    headers = {'Content-Type': 'application/json'}
+    response = requests.post(url, json=payload, headers=headers)
+    result = response.json()
+
+    if result.get("err_no") == 0:
+        return result.get("result", [""])[0]
+    print("语音识别错误：", result.get("err_msg"))
+    return None
+
+
+def wake_word_detected(text):
+    """检查文本是否包含唤醒词"""
+    wake_words = ["小智", "小志", "小知", "晓智"]
+    return any(word in text for word in wake_words)
+
+
+def deepseek_conversation(user_text):
+    """调用 DeepSeek API 获取 AI 回答"""
+    try:
+        client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com")
+        response = client.chat.completions.create(
+            model="deepseek-chat",
+            messages=[{"role": "system", "content": "你是一名叫小智的助手，回复不需要使用markdown格式，请直接以文本形式回复。"},
+                      {"role": "user", "content": user_text}],
+            stream=False
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        print("DeepSeek API 调用异常：", e)
+        return "抱歉，我无法获取答案。"
+
+
+def text_to_speech(text, token, output_file="answer.mp3"):
+    """调用百度语音合成 API，将文本转换为语音"""
+    MAX_CHAR = 1024
+    text = text[:MAX_CHAR] if len(text) > MAX_CHAR else text
+
+    params = {
+        "tex": text,
+        "tok": token,
+        "cuid": DEVICE_ID,
+        "ctp": 1,
+        "lan": "zh"
+    }
+    url = "http://tsn.baidu.com/text2audio"
+    response = requests.post(url, data=params)
+
+    if response.headers.get('Content-Type') == "audio/mp3":
+        with open(output_file, "wb") as f:
+            f.write(response.content)
+        return output_file
+    print("语音合成错误：", response.text)
+    return None
+
+
+def play_audio(file_path):
+    """播放音频文件"""
+    pygame.mixer.init()
+    pygame.mixer.music.load(file_path)
+    pygame.mixer.music.play()
+    while pygame.mixer.music.get_busy():
+        time.sleep(0.1)
+
+
+# --------------------- 主程序 ---------------------
+def main():
+    print("启动智能助手小智...")
+    token = get_baidu_token()
+    if not token:
+        return
+
+    while True:
+        print("等待唤醒词 '小智' ...")
+        record_audio_vad("wake.wav", max_duration=3)
+        wake_text = speech_recognition("wake.wav", token)
+
+        if wake_text and "小智" in wake_text:
+            print("唤醒成功，小智回应：好的，小智在。")
+
+            # 语音合成回应 "好的，小智在。"
+            response_audio = text_to_speech("好的，小智在。请说出你的问题。", token, output_file="wakeup_response.mp3")
+            if response_audio:
+                play_audio(response_audio)  # 播放唤醒成功音频
+
+            print("请说出您的问题：")
+            record_audio_vad(filename="query.wav")
+            user_query = speech_recognition("query.wav", token)
+            if user_query:
+                print("用户说：", user_query)
+                # 使用 DeepSeek-R1 模型获取回答
+                answer = deepseek_conversation(user_query)
+                print("小智回答：", answer)
+                # 使用百度语音合成将回答转换为语音
+                audio_file = text_to_speech(answer, token, output_file="answer.mp3")
+                if audio_file:
+                    play_audio(audio_file)
+            else:
+                print("未能识别您的问题，请重试。")
+
+        time.sleep(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/小智智能助手/requirements.txt
+++ b/小智智能助手/requirements.txt
--- a/小智智能助手/test.py
+++ b/小智智能助手/test.py
@ -0,0 +1,23 @@
+
+BAIDU_API_KEY = "gQyEX2mdkEa3gHvaYxcXMSv3"
+BAIDU_SECRET_KEY = "M5s4mMH3B5yeX5LDP4RME3rdlxATb3lO"
+# 设备唯一标识（可设置为行空板的设备 ID 或 MAC 地址）
+DEVICE_ID = "your_device_id"
+
+# DeepSeek API 配置（请替换为实际接口地址及参数）
+DEEPSEEK_API_URL = "https://api.deepseek.com"
+DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c"
+
+"""
+curl https://api.deepseek.com/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <sk-f15b44b6b3344cdd820e59acebce9d2c>" \
+  -d '{
+        "model": "deepseek-chat",
+        "messages": [
+          {"role": "system", "content": "You are a helpful assistant."},
+          {"role": "user", "content": "Hello!"}
+        ],
+        "stream": false
+      }'
+"""
--- a/小智智能助手/流程图.md
+++ b/小智智能助手/流程图.md
@ -0,0 +1,12 @@
+```mermaid
+graph TD
+    A[用户说话] --> B[语音录音模块pyaudiowebrtcva]
+    B --> C[语音识别模块百度语音识别API]
+    C --> D[语义理解模块DeepSeek大模型]
+    D --> E[语音合成模块百度语音合成API]
+    E --> F[音频播放模块pygame]
+    F --> G[返回语音回答]
+    G --> H{是否继续交互}
+    H -- 是 --> B
+    H -- 否 --> I[结束程序]
+```
--- a/小智智能助手/流程图.png
+++ b/小智智能助手/流程图.png
--- a/小智智能助手/附件2：作品说明文档.doc
+++ b/小智智能助手/附件2：作品说明文档.doc
--- a/小智智能助手/附件4：参赛选手承诺书.docx
+++ b/小智智能助手/附件4：参赛选手承诺书.docx
				`@ -0,0 +1 @@`
				`# 2025年中新天津生态城青少年创意编程与智能设计大赛`