CQ_Intelligent-Technology-T.../Assets/Scripts/语音控制脚本/SpeechToText.cs

335 lines
9.2 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using Newtonsoft.Json.Linq;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Threading.Tasks;
using UnityEngine;
public class SpeechToText : MonoBehaviour
{
public static SpeechToText Instance;
/// <summary>当前物体的AudioSource组件<summary>
[HideInInspector]
public AudioSource audioSource;
/// <summary>用户录入的音频</summary>
private AudioClip recordedAudioClip;
/// <summary>
/// 转换后的文本
/// </summary>
public string GetSpeechText;
private void Awake()
{
Instance = this;
}
private void Start()
{
//获取AudioSource组件
audioSource = GetComponent<AudioSource>();
//测试用[!重要代码],此操作为异步完成,需要一段时间后才能执行=>{}里的代码。
SendTextToSpeechMsg("你好啊,我是讯飞语音助手!", auidoClip =>
{
audioSource.clip = auidoClip;
audioSource.Play();
});
}
#region
private bool recording = false;
/// <summary>
/// 当 录音按钮 按下时调用
/// </summary>
public void StartSpeechTotext()
{
if (recording == false)
{
recording = true;
//开始录音[!重要代码]
StartRecord();
}
else
{
recording = false;
//结束录音[!重要代码]
EndRecord((text, _) =>
{
Debug.Log($"讯飞语音转文本成功!文本为:{text}");
//GetSpeechText = text;
Control_Light.Instance.AudiosContorl(text);
});
}
}
#endregion
#region
/// <summary>
/// 向XunFei发送消息并等待其返回
/// </summary>
/// <param name="text">文本</param>
/// <param name="chatGLMCallback">回调函数</param>
public void SendTextToSpeechMsg(string text, Action<AudioClip> callback)
{
//构建Json字符串
JObject jObject = new JObject();
jObject["text"] = text;
//可以更改成你想要的声音,具体内容在讯飞控制台中查看
jObject["voice"] = "xiaoyan";
//发送消息
StartCoroutine(SendTextToSpeechMsgCoroutine(jObject, callback));
}
/// <summary>
/// 向XunFei发送消息的协程
/// </summary>
/// <param name="message"></param>
/// <param name="callback">收到消息后的回调函数</param>
/// <returns></returns>
private IEnumerator SendTextToSpeechMsgCoroutine(JObject message, Action<AudioClip> callback)
{
//请求数据
Task<string> resultJson = XunFeiManager.Instance.TextToSpeech(message);
//等待返回消息
yield return new WaitUntil(() => resultJson.IsCompleted);
//成功接收到消息
if (resultJson.IsCompletedSuccessfully == true)
{
//解析Json字符串
JObject obj = JObject.Parse(resultJson.Result);
//获取音频数据base64字符串
string text = obj["data"].ToString();
//解析音频数据
float[] audioData = BytesToFloat(Convert.FromBase64String(text));
if (audioData.Length == 0)//讯飞文本转语音失败
{
Debug.Log($"讯飞文本转语音失败可能由于输入文本为空或不正确导致语音长度为0错误信息{resultJson.Result}");
//失败回调
callback.Invoke(null);
}
//构建AudioClip
AudioClip audioClip = AudioClip.Create("SynthesizedAudio", audioData.Length, 1, 16000, false);
audioClip.SetData(audioData, 0);
//Debug.Log("讯飞文本转语音成功");
//回调
callback.Invoke(audioClip);
}
else
{
Debug.Log($"讯飞文本转语音消息发送失败,错误信息:{resultJson.Result}");
//失败回调
callback.Invoke(null);
}
}
/// <summary>
/// byte[]数组转化为AudioClip可读取的float[]类型
/// </summary>
/// <param name="byteArray"></param>
/// <returns></returns>
private static float[] BytesToFloat(byte[] byteArray)
{
float[] sounddata = new float[byteArray.Length / 2];
for (int i = 0; i < sounddata.Length; i++)
{
sounddata[i] = bytesToFloat(byteArray[i * 2], byteArray[i * 2 + 1]);
}
return sounddata;
}
private static float bytesToFloat(byte firstByte, byte secondByte)
{
// convert two bytes to one short (little endian)
//小端和大端顺序要调整
short s;
if (BitConverter.IsLittleEndian)
s = (short)((secondByte << 8) | firstByte);
else
s = (short)((firstByte << 8) | secondByte);
// convert to range from -1 to (just below) 1
return s / 32768.0F;
}
#endregion
#region
/// <summary>
/// 开始录音
/// </summary>
public void StartRecord()
{
//开始录音频最长40秒
recordedAudioClip = Microphone.Start(null, true, 40, 16000);
}
/// <summary>
/// 结束录音
/// </summary>
/// <param name="speechToTextCallback">语音转文本成功后的回调函数</param>
public void EndRecord(Action<string, AudioClip> speechToTextCallback)
{
//取消了录音
if (speechToTextCallback == null) return;
//录音结束
Microphone.End(null);
//去除掉没有声音的片段
recordedAudioClip = TrimSilence(recordedAudioClip, 0.01f);
//发送消息
SendSpeechToTextMsg(recordedAudioClip, text =>
{
//回调
speechToTextCallback.Invoke(text, recordedAudioClip);
});
}
/// <summary>
/// 向XunFei发送消息并等待其返回
/// </summary>
/// <param name="strContent">音频数据</param>
/// <param name="chatGLMCallback">回调函数</param>
public void SendSpeechToTextMsg(AudioClip audioClip, Action<string> callback)
{
byte[] bytes = AudioClipToBytes(audioClip);
//构建Json字符串
JObject jObject = new JObject();
jObject["data"] = Convert.ToBase64String(bytes);
//发送消息
StartCoroutine(SendSpeechToTextMsgCoroutine(jObject, callback));
}
/// <summary>
/// 向XunFei发送消息的协程
/// </summary>
/// <param name="message"></param>
/// <param name="callback">收到消息后的回调函数</param>
/// <returns></returns>
private IEnumerator SendSpeechToTextMsgCoroutine(JObject message, Action<string> callback)
{
//请求数据
Task<string> resultJson = XunFeiManager.Instance.SpeechToText(message);
//等待返回消息
yield return new WaitUntil(() => resultJson.IsCompleted);
//成功接收到消息
if (resultJson.IsCompletedSuccessfully == true)
{
//解析Json字符串
JObject obj = JObject.Parse(resultJson.Result);
//获取相似度
string text = obj["text"].ToString();
//Debug.Log("讯飞语音转文本:" + text);
//回调
callback.Invoke(text);
}
else
{
Debug.Log("讯飞语音转文本消息发送失败");
//失败回调
callback.Invoke(string.Empty);
}
}
/// <summary>
/// 将AudioClip转换成byte[]数据
/// </summary>
/// <param name="audioClip">Unity中的音频数据</param>
/// <returns>byte[]数据</returns>
private static byte[] AudioClipToBytes(AudioClip audioClip)
{
float[] data = new float[audioClip.samples];
audioClip.GetData(data, 0);
int rescaleFactor = 32767; //to convert float to Int16
byte[] outData = new byte[data.Length * 2];
for (int i = 0; i < data.Length; i++)
{
short temshort = (short)(data[i] * rescaleFactor);
byte[] temdata = BitConverter.GetBytes(temshort);
outData[i * 2] = temdata[0];
outData[i * 2 + 1] = temdata[1];
}
return outData;
}
/// <summary>
/// 剔除沉默音域
/// </summary>
/// <param name="clip"></param>
/// <param name="min"></param>
/// <returns></returns>
private static AudioClip TrimSilence(AudioClip clip, float min)
{
var samples = new float[clip.samples];
clip.GetData(samples, 0);
return TrimSilence(new List<float>(samples), min, clip.channels, clip.frequency);
}
private static AudioClip TrimSilence(List<float> samples, float min, int channels, int hz, bool _3D = false)
{
int origSamples = samples.Count;
int i;
for (i = 0; i < samples.Count; i++)
{
if (Mathf.Abs(samples[i]) > min)
{
break;
}
}
i -= (int)(hz * .1f);
i = Mathf.Max(i, 0);
// Remove start silence
samples.RemoveRange(0, i);
for (i = samples.Count - 1; i > 0; i--)
{
if (Mathf.Abs(samples[i]) > min)
{
break;
}
}
// Add some tail onto it
i += (int)(hz * .1f);
i = Mathf.Min(i, samples.Count - 1);
samples.RemoveRange(i, samples.Count - i);
if (samples.Count == 0)
{
Debug.Log("剔除后的AudioClip长度为0");
return null;
}
var clip = AudioClip.Create("TempClip", samples.Count, channels, hz, _3D);
clip.SetData(samples.ToArray(), 0);
return clip;
}
#endregion
}