12 Star 61 Fork 56

金诺 / TDXPystock

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
AudioToText.py 4.67 KB
一键复制 编辑 原始数据 按行查看 历史
金诺 提交于 2021-11-21 11:04 . Update AudioToText.py
# 使用百度api 语音转文字功能 [对源语音要求太高了] 本文件代码运行有问题不要联系我,告诉你环境问题,整这个环境很花时间,一时半会根本说不清,自行google搜索解决
import requests as req
import json,base64,os
import speech_recognition as sr
from pocketsphinx import AudioFile
from Speech_and_Text import speech_to_text_cmu
import ffmpeg
baiduaudioappapiid='xxx' #文字转语音的apiid
baiduaudioappAPI_Key='xxx'
baiduaudioappSecret_Key='xxx'
#获取百度对应的appid的token
def getbaiduApiToken(grant_type,API_Key,Secret_Key):
try:
host = f'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={API_Key}&client_secret={Secret_Key}'
response = req.get(host)
if response:
return response.json()['access_token']
except BaseException as b:
print(b)
return None #wg
#使用百度的语音识别
def audioTotextForBaidu(audiofile):
try:
if os.path.exists(audiofile):
pass
else:
return None
token=getbaiduApiToken('client_credentials',baiduaudioappAPI_Key,baiduaudioappSecret_Key)
if token is None:
return None
print(token)
filetext=''
with open(audiofile,'rb') as fr:
filetext = fr.read() #将读取的二进制内容转成base64
filelen = len(filetext)
base64text=base64.b64encode(filetext)
if str(base64text).strip() =='' or base64text is None:
return None
url='http://vop.baidu.com/server_api'
header={'Content-Type':'application/json'}
postdata={
"format":"pcm",
"rate":16000,
#"dev_pid":1537,
"channel":1,
"token":token,
"cuid":'baidu_workshop', #用户唯一标识
"len":filelen,
"speech":base64text,
}
response=req.post(url=url,json=postdata,headers=header,timeout=60)
result=response.json()['result']
print(result)
return result
except BaseException as b :
print(b)
return b
#使用本地安装的speech_recognition 语音包[PocketSphinx]
def audioTotextForlocal_zh_CN(audiofile):
try:
r = sr.Recognizer()
# print(r)
wavFile = sr.AudioFile(audiofile)
# print(wavFile)
with wavFile as source:
audio = r.record(source)
# recognize speech using Sphinx
try:
return r.recognize_sphinx(audio, language="zh-CN")
except sr.UnknownValueError:
print("无法理解")
except sr.RequestError as e:
print("error; {0}".format(e))
except BaseException as b:
print(b)
# print("听起来像英文 " + r.recognize_sphinx(audio))
#使用本地安装的speech_recognition 语音包[PocketSphinx]
def audioTotextForlocal_en_US(audiofile): #使用本地安装的speech_recognition 语音包[PocketSphinx]
try:
r = sr.Recognizer()
# print(r)
wavFile = sr.AudioFile(audiofile)
# print(wavFile)
with wavFile as source:
audio = r.record(source)
# recognize speech using Sphinx
try:
return r.recognize_sphinx(audio, language="en-US")
except sr.UnknownValueError:
print("无法理解")
except sr.RequestError as e:
print("error; {0}".format(e))
except BaseException as b:
print(b)
#调用科大讯飞
def audioTotext_by_ifly():
from Speech_and_Text import speech_to_text_ifly
# 从文件读入
# speech_to_text_ifly(audio_path="path_of_audio", if_microphone=False)
# 从麦克风读入
speech_to_text_ifly(if_microphone=True)
#从麦克风读取声音转文字
def frommkaudiotoText_by_cmu(filename):
# 从文件读入
speech_to_text_cmu(audio_path=filename, if_microphone=False)
# 从麦克风读入
# speech_to_text_cmu(if_microphone=True)
#采样率转换 也达到压缩的效果
def cylchange(srcfile,targetfile,cyl):
try:
ffmpeg.input(srcfile).output(targetfile, ar=cyl).run()
except BaseException as b:
print(b)
return None
#本地语音文件float数据 转int
def local_audiofile_OnFloatToInt(srcfile):
pass
if __name__ == '__main__':
# audiofile_cn=u'./data/textToaudioTest.mp3'
audiofile_cn=u'./data/test.wav'
audiofile_en=u'./data/testfile.mp3'
# audioTotextForBaidu(audiofile_cn)
sample_rate=25050
print(audioTotextForlocal_zh_CN(audiofile_cn))
targetfile=u'./data/testfile'+str(sample_rate)+'.mp3'
cylchange(audiofile_en,targetfile,sample_rate)
print(audioTotextForlocal_en_US(audiofile_cn))
# frommkaudiotoText_by_cmu(audiofile_en)
Python
1
https://gitee.com/top3/TDXPystock.git
git@gitee.com:top3/TDXPystock.git
top3
TDXPystock
TDXPystock
newmastor

搜索帮助