diff --git a/README.md b/README.md new file mode 100644 index 0000000..7f7653a --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# 2025年中新天津生态城青少年创意编程与智能设计大赛 \ No newline at end of file diff --git a/小智智能助手/README.md b/小智智能助手/README.md new file mode 100644 index 0000000..6d3c5ae --- /dev/null +++ b/小智智能助手/README.md @@ -0,0 +1,98 @@ + +# 🎙️ 智能语音助手 “小智” + +一个基于 **百度智能云语音识别 + 合成** 和 **DeepSeek-R1 对话模型** 的智能语音助手,支持以下功能: + +* 🎤 **语音唤醒与自动录音** +* 🧠 **自然语言理解(LLM)问答** +* 🔊 **语音合成回应** +* 🕹️ **Python 本地运行,无需前端或网页** + +--- + +## ✅ 功能概览 + +| 功能模块 | 描述 | +| ----------- | ---------------------------------- | +| 语音唤醒 | 检测是否说出“小智”关键词,触发问答流程 | +| 自动录音 | 使用 WebRTC VAD 进行语音活动检测,自动判断语音开始和结束 | +| 百度语音识别 | 调用百度智能云 API,将语音识别成文本 | +| DeepSeek 问答 | 将用户问题发送到 DeepSeek-R1 模型,获取智能回复 | +| 百度语音合成 | 将 AI 回复转换为音频并使用 `pygame` 播放 | + +--- + +## 📦 环境依赖(requirements.txt) + +```txt +requests +pyaudio +pygame +webrtcvad +openai +``` + +安装依赖: + +```bash +pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/ +``` + +如遇 `pyaudio` 安装失败,推荐使用: + +```bash +pip install pipwin +pipwin install pyaudio +``` + +--- + +## 🔐 API 配置 + +打开 `main.py` 或脚本文件,将以下参数替换为你自己的: + +```python +# 百度智能云 API +BAIDU_API_KEY = "你的API Key" +BAIDU_SECRET_KEY = "你的Secret Key" + +# DeepSeek-R1 API +DEEPSEEK_API_KEY = "你的DeepSeek API Key" +``` + +--- + +## 🚀 启动方式 + +```bash +python main.py +``` + +启动后助手进入循环监听状态,等待用户说出 **唤醒词“小智”**,进入问答流程。 + +--- + +## 🎧 示例流程 + +1. **你说:“小智”**(唤醒) +2. 程序回应:“好的,小智在。请说出你的问题。” +3. **你说:“今天北京天气怎么样?”** +4. 小智回答:“今天北京的天气是晴转多云,最高气温27度。” + +--- + +## 🔊 技术细节 + +* 📍 **自动录音**:基于 `webrtcvad` 自动结束语音(检测静音) +* 📍 **语音识别**:百度 `vop.baidu.com/server_api` +* 📍 **语音合成**:百度 `tsn.baidu.com/text2audio` +* 📍 **AI对话**:调用 DeepSeek-R1 `chat.completions` 接口(通过 OpenAI SDK) + +--- + +## 🛠️ 注意事项 + +* 程序使用 `pyaudio` 进行录音,需麦克风硬件支持 +* 百度语音识别需中文 `16kHz 单声道 PCM` +* 请保持联网状态,所有服务需调用在线 API + diff --git a/小智智能助手/app.py b/小智智能助手/app.py new file mode 100644 index 0000000..0bd2c70 --- /dev/null +++ b/小智智能助手/app.py @@ -0,0 +1,207 @@ +import requests +import base64 +import time +import wave +import pyaudio +import pygame +import webrtcvad +from openai import OpenAI + +# --------------------- 配置参数 --------------------- +# 百度智能云 API 配置(请替换为您的 API Key 和 Secret Key) +BAIDU_API_KEY = "4icZSO1OlMCU2ZiRMhgGCXFu" +BAIDU_SECRET_KEY = "6wJldJ08m1jIX9hb0ULcJrIJ9D1OJW3c" + +# DeepSeek API 配置(请替换为您的 DeepSeek API Key) +DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c" + +# 录音参数 +CHUNK = 1024 +FORMAT = pyaudio.paInt16 +CHANNELS = 1 +RATE = 16000 + +# 设备 ID(可以随意设定) +DEVICE_ID = "raspberry_pi" + +# --------------------- 工具函数 --------------------- +def get_baidu_token(): + """获取百度智能云 API 访问令牌""" + url = "https://aip.baidubce.com/oauth/2.0/token" + params = { + "grant_type": "client_credentials", + "client_id": BAIDU_API_KEY, + "client_secret": BAIDU_SECRET_KEY, + } + response = requests.post(url, data=params) + if response.status_code == 200: + return response.json().get("access_token") + print("获取百度 Token 失败") + return None + + +def record_audio_vad(filename, max_duration=10): + """使用 WebRTC VAD 语音活动检测实现自动录音""" + vad = webrtcvad.Vad(1) # 设置 VAD 灵敏度(0-3,越大越严格) + + p = pyaudio.PyAudio() + + # 🛠 **确保录音格式符合 WebRTC VAD 要求** + stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=320) + + print("开始录音(自动检测静音停止)...") + frames = [] + silence_count = 0 + max_silence = 150 # 允许最多 1 秒静音(30 帧) + + while True: + data = stream.read(320, exception_on_overflow=False) # **🛠 WebRTC VAD 需要 10ms 帧大小** + frames.append(data) + + # 检查是否有语音 + is_speech = vad.is_speech(data, 16000) + silence_count = 0 if is_speech else silence_count + 1 + + if silence_count > max_silence: # 如果连续 1 秒静音,则停止录音 + print("检测到静音,录音结束。") + break + + # 限制最大录音时长 + if len(frames) > int((16000 / 320) * max_duration): + print("达到最大录音时长,录音结束。") + break + + stream.stop_stream() + stream.close() + p.terminate() + + # 保存录音为 WAV 文件 + wf = wave.open(filename, 'wb') + wf.setnchannels(1) + wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) + wf.setframerate(16000) + wf.writeframes(b''.join(frames)) + wf.close() + + +def speech_recognition(audio_file, token): + """调用百度语音识别 API 将音频转文本""" + with open(audio_file, "rb") as f: + speech_data = f.read() + speech_base64 = base64.b64encode(speech_data).decode('utf-8') + + payload = { + "format": "wav", + "rate": RATE, + "channel": 1, + "token": token, + "cuid": DEVICE_ID, + "len": len(speech_data), + "speech": speech_base64, + "word_list": ["小智","小志","小至"] # 添加热词,提高识别准确率 + } + url = "http://vop.baidu.com/server_api" + headers = {'Content-Type': 'application/json'} + response = requests.post(url, json=payload, headers=headers) + result = response.json() + + if result.get("err_no") == 0: + return result.get("result", [""])[0] + print("语音识别错误:", result.get("err_msg")) + return None + + +def wake_word_detected(text): + """检查文本是否包含唤醒词""" + wake_words = ["小智", "小志", "小知", "晓智"] + return any(word in text for word in wake_words) + + +def deepseek_conversation(user_text): + """调用 DeepSeek API 获取 AI 回答""" + try: + client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com") + response = client.chat.completions.create( + model="deepseek-chat", + messages=[{"role": "system", "content": "你是一名叫小智的助手,回复不需要使用markdown格式,请直接以文本形式回复。"}, + {"role": "user", "content": user_text}], + stream=False + ) + return response.choices[0].message.content + except Exception as e: + print("DeepSeek API 调用异常:", e) + return "抱歉,我无法获取答案。" + + +def text_to_speech(text, token, output_file="answer.mp3"): + """调用百度语音合成 API,将文本转换为语音""" + MAX_CHAR = 1024 + text = text[:MAX_CHAR] if len(text) > MAX_CHAR else text + + params = { + "tex": text, + "tok": token, + "cuid": DEVICE_ID, + "ctp": 1, + "lan": "zh" + } + url = "http://tsn.baidu.com/text2audio" + response = requests.post(url, data=params) + + if response.headers.get('Content-Type') == "audio/mp3": + with open(output_file, "wb") as f: + f.write(response.content) + return output_file + print("语音合成错误:", response.text) + return None + + +def play_audio(file_path): + """播放音频文件""" + pygame.mixer.init() + pygame.mixer.music.load(file_path) + pygame.mixer.music.play() + while pygame.mixer.music.get_busy(): + time.sleep(0.1) + + +# --------------------- 主程序 --------------------- +def main(): + print("启动智能助手小智...") + token = get_baidu_token() + if not token: + return + + while True: + print("等待唤醒词 '小智' ...") + record_audio_vad("wake.wav", max_duration=3) + wake_text = speech_recognition("wake.wav", token) + + if wake_text and "小智" in wake_text: + print("唤醒成功,小智回应:好的,小智在。") + + # 语音合成回应 "好的,小智在。" + response_audio = text_to_speech("好的,小智在。请说出你的问题。", token, output_file="wakeup_response.mp3") + if response_audio: + play_audio(response_audio) # 播放唤醒成功音频 + + print("请说出您的问题:") + record_audio_vad(filename="query.wav") + user_query = speech_recognition("query.wav", token) + if user_query: + print("用户说:", user_query) + # 使用 DeepSeek-R1 模型获取回答 + answer = deepseek_conversation(user_query) + print("小智回答:", answer) + # 使用百度语音合成将回答转换为语音 + audio_file = text_to_speech(answer, token, output_file="answer.mp3") + if audio_file: + play_audio(audio_file) + else: + print("未能识别您的问题,请重试。") + + time.sleep(1) + + +if __name__ == '__main__': + main() diff --git a/小智智能助手/requirements.txt b/小智智能助手/requirements.txt new file mode 100644 index 0000000..84e8cf1 Binary files /dev/null and b/小智智能助手/requirements.txt differ diff --git a/小智智能助手/test.py b/小智智能助手/test.py new file mode 100644 index 0000000..4db0975 --- /dev/null +++ b/小智智能助手/test.py @@ -0,0 +1,23 @@ + +BAIDU_API_KEY = "gQyEX2mdkEa3gHvaYxcXMSv3" +BAIDU_SECRET_KEY = "M5s4mMH3B5yeX5LDP4RME3rdlxATb3lO" +# 设备唯一标识(可设置为行空板的设备 ID 或 MAC 地址) +DEVICE_ID = "your_device_id" + +# DeepSeek API 配置(请替换为实际接口地址及参数) +DEEPSEEK_API_URL = "https://api.deepseek.com" +DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c" + +""" +curl https://api.deepseek.com/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "deepseek-chat", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"} + ], + "stream": false + }' +""" \ No newline at end of file diff --git a/小智智能助手/流程图.md b/小智智能助手/流程图.md new file mode 100644 index 0000000..72f6246 --- /dev/null +++ b/小智智能助手/流程图.md @@ -0,0 +1,12 @@ +```mermaid +graph TD + A[用户说话] --> B[语音录音模块pyaudiowebrtcva] + B --> C[语音识别模块百度语音识别API] + C --> D[语义理解模块DeepSeek大模型] + D --> E[语音合成模块百度语音合成API] + E --> F[音频播放模块pygame] + F --> G[返回语音回答] + G --> H{是否继续交互} + H -- 是 --> B + H -- 否 --> I[结束程序] +``` \ No newline at end of file diff --git a/小智智能助手/流程图.png b/小智智能助手/流程图.png new file mode 100644 index 0000000..fc4af58 Binary files /dev/null and b/小智智能助手/流程图.png differ diff --git a/小智智能助手/附件2:作品说明文档.doc b/小智智能助手/附件2:作品说明文档.doc new file mode 100644 index 0000000..eb084dc Binary files /dev/null and b/小智智能助手/附件2:作品说明文档.doc differ diff --git a/小智智能助手/附件4:参赛选手承诺书.docx b/小智智能助手/附件4:参赛选手承诺书.docx new file mode 100644 index 0000000..60469f1 Binary files /dev/null and b/小智智能助手/附件4:参赛选手承诺书.docx differ