简单的流程就是语音交给百度API转换成文字,
文字提交给图灵机器人返回文字回答,
将返回的文字交给百度再转换成语音输出。
sox这个不太好装,可以参考 http://www.plane.run/306.py
其他的库直接pip3 install 都可以正常装上。
import os import time import wave import json import pyaudio import requests import speech_recognition as sr from aip import AipSpeech # 百度 APP_ID = ' ' API_KEY = ' ' SECRET_KEY = ' ' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 图灵机器人 TURING_KEY = " " URL = "http://openapi.turingapi.com/openapi/api/v2" HEADERS = {'Content-Type': 'application/json;charset=UTF-8'} # 用 SpeechRecognition 录音 def rec(rate=16000): r = sr.Recognizer() with sr.Microphone(sample_rate=rate) as source: print("[话痨版小飞] 正在等你说话...") audio = r.listen(source) with open("recording.wav", "wb") as f: f.write(audio.get_wav_data()) # 百度语音AI 语音转文字 def listen(): with open('recording.wav', 'rb') as f: audio_data = f.read() result = client.asr(audio_data, 'wav', 16000, { 'dev_pid': 1536, }) # print(result) if "result" in result.keys(): result_text = result["result"][0] else: result_text = "我沉默了" print("你说: " + result_text) return result_text 图灵机器人 def robot(text=""): data = { "reqType": 0, "perception": { "inputText": { "text": "" }, "selfInfo": { "location": { "city": "威海", "street": "古寨东路" } } }, "userInfo": { "apiKey": TURING_KEY, "userId": "starky" } } data["perception"]["inputText"]["text"] = text response = requests.request("post", URL, json=data, headers=HEADERS) response_dict = json.loads(response.text) result = response_dict["results"][0]["values"]["text"] print("小飞: " + result) return result # 百度语音AI 文字转语音 def speak(text=""): result = client.synthesis(text, 'zh', 1, { 'spd': 5, 'vol': 5, 'per': 4, }) if not isinstance(result, dict): with open('audio.mp3', 'wb') as f: f.write(result) # 转换MP3 到wav播放 因为百度返回的只能是mp3格式 多了一步转换 def play(): os.system('sox audio.mp3 audio.wav') wf = wave.open('audio.wav', 'rb') p = pyaudio.PyAudio() def callback(in_data, frame_count, time_info, status): data = wf.readframes(frame_count) return (data, pyaudio.paContinue) stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True, stream_callback=callback) stream.start_stream() while stream.is_active(): time.sleep(0.1) stream.stop_stream() stream.close() wf.close() p.terminate() while True: rec() request = listen() response = robot(request) speak(response) play()