using Newtonsoft.Json.Linq; using System; using System.Collections; using System.Collections.Generic; using System.Threading.Tasks; using UnityEngine; public class SpeechToText : MonoBehaviour { public static SpeechToText Instance; /// 当前物体的AudioSource组件 [HideInInspector] public AudioSource audioSource; /// 用户录入的音频 private AudioClip recordedAudioClip; /// /// 转换后的文本 /// public string GetSpeechText; private void Awake() { Instance = this; } private void Start() { //获取AudioSource组件 audioSource = GetComponent(); //测试用[!重要代码],此操作为异步完成,需要一段时间后才能执行=>{}里的代码。 SendTextToSpeechMsg("你好啊,我是讯飞语音助手!", auidoClip => { audioSource.clip = auidoClip; audioSource.Play(); }); } #region 语音转文本 测试用 private bool recording = false; /// /// 当 录音按钮 按下时调用 /// public void StartSpeechTotext() { if (recording == false) { recording = true; //开始录音[!重要代码] StartRecord(); } else { recording = false; //结束录音[!重要代码] EndRecord((text, _) => { Debug.Log($"讯飞语音转文本成功!文本为:{text}"); //GetSpeechText = text; Control_Light.Instance.AudiosContorl(text); }); } } #endregion #region 讯飞文本转语音 /// /// 向XunFei发送消息,并等待其返回 /// /// 文本 /// 回调函数 public void SendTextToSpeechMsg(string text, Action callback) { //构建Json字符串 JObject jObject = new JObject(); jObject["text"] = text; //可以更改成你想要的声音,具体内容在讯飞控制台中查看 jObject["voice"] = "xiaoyan"; //发送消息 StartCoroutine(SendTextToSpeechMsgCoroutine(jObject, callback)); } /// /// 向XunFei发送消息的协程 /// /// /// 收到消息后的回调函数 /// private IEnumerator SendTextToSpeechMsgCoroutine(JObject message, Action callback) { //请求数据 Task resultJson = XunFeiManager.Instance.TextToSpeech(message); //等待返回消息 yield return new WaitUntil(() => resultJson.IsCompleted); //成功接收到消息 if (resultJson.IsCompletedSuccessfully == true) { //解析Json字符串 JObject obj = JObject.Parse(resultJson.Result); //获取音频数据(base64字符串) string text = obj["data"].ToString(); //解析音频数据 float[] audioData = BytesToFloat(Convert.FromBase64String(text)); if (audioData.Length == 0)//讯飞文本转语音失败 { Debug.Log($"讯飞文本转语音失败,可能由于输入文本为空或不正确,导致语音长度为0,错误信息:{resultJson.Result}"); //失败回调 callback.Invoke(null); } //构建AudioClip AudioClip audioClip = AudioClip.Create("SynthesizedAudio", audioData.Length, 1, 16000, false); audioClip.SetData(audioData, 0); //Debug.Log("讯飞文本转语音成功"); //回调 callback.Invoke(audioClip); } else { Debug.Log($"讯飞文本转语音消息发送失败,错误信息:{resultJson.Result}"); //失败回调 callback.Invoke(null); } } /// /// byte[]数组转化为AudioClip可读取的float[]类型 /// /// /// private static float[] BytesToFloat(byte[] byteArray) { float[] sounddata = new float[byteArray.Length / 2]; for (int i = 0; i < sounddata.Length; i++) { sounddata[i] = bytesToFloat(byteArray[i * 2], byteArray[i * 2 + 1]); } return sounddata; } private static float bytesToFloat(byte firstByte, byte secondByte) { // convert two bytes to one short (little endian) //小端和大端顺序要调整 short s; if (BitConverter.IsLittleEndian) s = (short)((secondByte << 8) | firstByte); else s = (short)((firstByte << 8) | secondByte); // convert to range from -1 to (just below) 1 return s / 32768.0F; } #endregion #region 讯飞语音转文本 /// /// 开始录音 /// public void StartRecord() { //开始录音频(最长40秒) recordedAudioClip = Microphone.Start(null, true, 40, 16000); } /// /// 结束录音 /// /// 语音转文本成功后的回调函数 public void EndRecord(Action speechToTextCallback) { //取消了录音 if (speechToTextCallback == null) return; //录音结束 Microphone.End(null); //去除掉没有声音的片段 recordedAudioClip = TrimSilence(recordedAudioClip, 0.01f); //发送消息 SendSpeechToTextMsg(recordedAudioClip, text => { //回调 speechToTextCallback.Invoke(text, recordedAudioClip); }); } /// /// 向XunFei发送消息,并等待其返回 /// /// 音频数据 /// 回调函数 public void SendSpeechToTextMsg(AudioClip audioClip, Action callback) { byte[] bytes = AudioClipToBytes(audioClip); //构建Json字符串 JObject jObject = new JObject(); jObject["data"] = Convert.ToBase64String(bytes); //发送消息 StartCoroutine(SendSpeechToTextMsgCoroutine(jObject, callback)); } /// /// 向XunFei发送消息的协程 /// /// /// 收到消息后的回调函数 /// private IEnumerator SendSpeechToTextMsgCoroutine(JObject message, Action callback) { //请求数据 Task resultJson = XunFeiManager.Instance.SpeechToText(message); //等待返回消息 yield return new WaitUntil(() => resultJson.IsCompleted); //成功接收到消息 if (resultJson.IsCompletedSuccessfully == true) { //解析Json字符串 JObject obj = JObject.Parse(resultJson.Result); //获取相似度 string text = obj["text"].ToString(); //Debug.Log("讯飞语音转文本:" + text); //回调 callback.Invoke(text); } else { Debug.Log("讯飞语音转文本消息发送失败"); //失败回调 callback.Invoke(string.Empty); } } /// /// 将AudioClip转换成byte[]数据 /// /// Unity中的音频数据 /// byte[]数据 private static byte[] AudioClipToBytes(AudioClip audioClip) { float[] data = new float[audioClip.samples]; audioClip.GetData(data, 0); int rescaleFactor = 32767; //to convert float to Int16 byte[] outData = new byte[data.Length * 2]; for (int i = 0; i < data.Length; i++) { short temshort = (short)(data[i] * rescaleFactor); byte[] temdata = BitConverter.GetBytes(temshort); outData[i * 2] = temdata[0]; outData[i * 2 + 1] = temdata[1]; } return outData; } /// /// 剔除沉默音域 /// /// /// /// private static AudioClip TrimSilence(AudioClip clip, float min) { var samples = new float[clip.samples]; clip.GetData(samples, 0); return TrimSilence(new List(samples), min, clip.channels, clip.frequency); } private static AudioClip TrimSilence(List samples, float min, int channels, int hz, bool _3D = false) { int origSamples = samples.Count; int i; for (i = 0; i < samples.Count; i++) { if (Mathf.Abs(samples[i]) > min) { break; } } i -= (int)(hz * .1f); i = Mathf.Max(i, 0); // Remove start silence samples.RemoveRange(0, i); for (i = samples.Count - 1; i > 0; i--) { if (Mathf.Abs(samples[i]) > min) { break; } } // Add some tail onto it i += (int)(hz * .1f); i = Mathf.Min(i, samples.Count - 1); samples.RemoveRange(i, samples.Count - i); if (samples.Count == 0) { Debug.Log("剔除后的AudioClip长度为0"); return null; } var clip = AudioClip.Create("TempClip", samples.Count, channels, hz, _3D); clip.SetData(samples.ToArray(), 0); return clip; } #endregion }