ict.shenzhi/Assets/Scripts/kdl/IFlytekManagerHuman.cs

810 lines
28 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.Text;
using Newtonsoft.Json.Linq;
using System.Net.WebSockets;
using System.Threading;
using System.Threading.Tasks;
using System.Linq;
using Newtonsoft.Json;
public class IFlytekManagerHuman : MonoBehaviour
{
public static IFlytekManagerHuman instance;
ClientWebSocket ws;
CancellationToken cancellation;
/// <summary>
/// 语音转文字
/// </summary>
const string wsAudioToTextUrl = "wss://iat-api.xfyun.cn/v2/iat";
/// <summary>
/// 文字转语音
/// </summary>
const string wsTextToAudioUrl = "wss://tts-api.xfyun.cn/v2/tts";
const string appId = "423f4764";
const string apiKey = "6d1972981844bc6be96b5eaf97be70bb";
const string apiSecret = "ZDc1MjdhYzE0ZDkzZjdlOWQ1NjRiMDBj";
const int StatusFirstFrame = 0;
const int StatusContinueFrame = 1;
const int StatusLastFrame = 2;
/// <summary>
/// chat
/// </summary>
const string wsChatUrl = "ws://spark-api.xf-yun.com/v1.1/chat";
const string chat_appid = "a83ddb30";
const string chat_key = "df2f1b36593607691d184c2282977959";
const string chat_secret = "ZDUwMjczZjM2YTI2YTVhZDU2MjljZDI4";
/// <summary>
/// 用户唯一id
/// </summary>
string clientId = System.Guid.NewGuid().ToString("N");
private void Awake()
{
instance = this;
}
private void OnDestroy()
{
StopAllCoroutines();
if (ws != null && ws.State == WebSocketState.Open)
{
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "", cancellation);
ws = null;
}
}
/// <summary>
/// 语音转文字
/// </summary>
/// <param name="AudioData"></param>
/// <param name="callback"></param>
public void AudioToText(byte[] AudioData, Action<string> callback)
{
StartCoroutine(callApiAudioToText(AudioData, callback));
}
/// <summary>
/// 短文字转语音
/// </summary>
/// <param name="text"></param>
/// <param name="callback"></param>
public void TextToAudio(string text, Action<AudioClip> callback, int speed, string tone, AudiosLibrary au)
{
StartCoroutine(callApiTextToAudio(text, callback, speed, tone, au));
}
/// <summary>
/// 星火大模型对话(不联系上文)
/// </summary>
/// <param name="question">问题</param>
/// <param name="callback"></param>
public void Chat(string question, Action<string> callback)
{
JArray text = new JArray();
JObject jb = new JObject();
jb.Add("role", "user");
jb.Add("content", question);
text.Add(jb);
StartCoroutine(CallapiForChat(text, callback));
}
/// <summary>
/// 星火大模型对话(联系上文)
/// </summary>
/// <param name="historyChat">依次历史对话,不要多</param>
/// <param name="question">问题</param>
/// <param name="callback"></param>
public void Chat(List<ChatData> historyChat, string question, Action<string> callback)
{
JArray text = new JArray();
for (int i = 0; i < historyChat.Count; i++)
{
JObject jb = new JObject();
jb.Add("role", "user");
jb.Add("content", historyChat[i].ask);
text.Add(jb);
JObject jb2 = new JObject();
jb2.Add("role", "assistant");
jb2.Add("content", historyChat[i].answer);
text.Add(jb2);
}
//问题
JObject jb111 = new JObject();
jb111.Add("role", "user");
jb111.Add("content", question);
text.Add(jb111);
StartCoroutine(CallapiForChat(text, callback));
}
/// <summary>
/// 调用语音听写
/// </summary>
/// <param name="AudioData"></param>
IEnumerator callApiAudioToText(byte[] AudioData, Action<string> callback)
{
Debug.Log("开始调用API");
MyDebugger.Log("开始转文字");
string url = GetAuthUrl(wsAudioToTextUrl, apiKey, apiSecret);
using (ws = new ClientWebSocket())
{
//连接
ws.ConnectAsync(new Uri(url), cancellation).Wait(2000);
if (ws.State == WebSocketState.Open)
{
//连接成功,开始发送数据
int frameSize = 8000; //每一帧音频的大小,建议每 40ms 发送 8000
int intervel = 40; //毫秒数
int status = 0; // 音频的状态
byte[] buffer /*= new byte[frameSize]*/;
string str = "";
//开启异步接收
Task<string> recive = Task.Run<string>(() => ReciveText(ws));
//发送
for (int i = 0; i < AudioData.Length; i += frameSize)
{
buffer = SubArray(AudioData, i, frameSize);
if (buffer == null)
{
status = StatusLastFrame; //文件读完改变status 为 2
}
switch (status)
{
case StatusFirstFrame: // 第一帧音频status = 0
//请求数据均为json字符串
JObject frame = new JObject();
JObject business = new JObject(); //第一帧必须发送
JObject common = new JObject(); //第一帧必须发送
JObject data = new JObject(); //每一帧都要发送
//公共参数
//填充common
common.Add("app_id", appId);
//业务参数
//填充business
business.Add("language", "zh_cn");
business.Add("domain", "iat");
business.Add("accent", "mandarin");
//business.Add("nunum", 0);
business.Add("vad_eos", 9000);
business.Add("ptt", 1);//标点符号
//business.Add("rlang", "zh-hk"); // zh-cn :简体中文默认值zh-hk :繁体香港(若未授权不生效)
//business.Add("vinfo", 1);
//business.Add("dwa", "wpgs");//动态修正(若未授权不生效)
//business.Add("nbest", 5);// 句子多候选(若未授权不生效)
//business.Add("wbest", 3);// 词级多候选(若未授权不生效)
//业务数据流参数
//填充data
data.Add("status", StatusFirstFrame);
data.Add("format", "audio/L16;rate=16000");
data.Add("audio", Convert.ToBase64String(buffer));
data.Add("encoding", "raw");
//填充frame
frame.Add("common", common);
frame.Add("business", business);
frame.Add("data", data);
var frameData = Encoding.UTF8.GetBytes(frame.ToString());
//发送数据
ws.SendAsync(new ArraySegment<byte>(frameData), WebSocketMessageType.Text, true, cancellation);
status = StatusContinueFrame; // 发送完第一帧改变status 为 1
break;
case StatusContinueFrame: //中间帧status = 1
JObject frame1 = new JObject();
JObject data1 = new JObject(); //每一帧都要发送
//填充data
data1.Add("status", StatusContinueFrame);
data1.Add("format", "audio/L16;rate=16000");
data1.Add("audio", Convert.ToBase64String(buffer));
data1.Add("encoding", "raw");
//填充frame
frame1.Add("data", data1);
var frameData1 = Encoding.UTF8.GetBytes(frame1.ToString());
ws.SendAsync(new ArraySegment<byte>(frameData1), WebSocketMessageType.Text, true, cancellation);
break;
case StatusLastFrame: // 最后一帧音频status = 2 ,标志音频发送结束
break;
}
//Task.Delay(intervel).Wait(); //模拟音频采样延时
yield return new WaitForSeconds(intervel * 0.001f);//模拟音频采样延时
}
#region
JObject frame2 = new JObject();
JObject data2 = new JObject(); //每一帧都要发送
//填充data
data2.Add("status", StatusLastFrame);
//填充frame
frame2.Add("data", data2);
var frameData2 = System.Text.Encoding.UTF8.GetBytes(frame2.ToString());
ws.SendAsync(new ArraySegment<byte>(frameData2), WebSocketMessageType.Text, true, cancellation);
#endregion
Debug.Log("进入等待");
yield return new WaitUntil(() => recive.IsCompleted);
Debug.Log("完成:" + recive.Result);
MyDebugger.Log("文字转换完成");
callback(recive.Result);
Debug.Log("结束调用API");
}
else
{
Debug.LogError("连接失败");
callback(null);
}
}
}
/// <summary>
/// 文字转语音
/// </summary>
/// <param name="msg"></param>
/// <param name="callback"></param>
/// <returns></returns>
IEnumerator callApiTextToAudio(string msg, Action<AudioClip> callback, int speed, string tone, AudiosLibrary audios)
{
Debug.Log("开始调用API");
MyDebugger.Log("开始转语音");
string url = GetAuthUrl(wsTextToAudioUrl, apiKey, apiSecret);
using (ws = new ClientWebSocket())
{
//连接
ws.ConnectAsync(new Uri(url), cancellation).Wait(2000);
if (ws.State == WebSocketState.Open)
{
Task<byte[]> re = Task.Run<byte[]>(() => ReciveAudio(ws));
//连接成功,开始发送数据
//请求数据均为json字符串
JObject frame = new JObject();
JObject business = new JObject();
JObject common = new JObject();
JObject data = new JObject();
common.Add("app_id", appId);
business.Add("aue", "lame");
business.Add("sfl", 1);
business.Add("auf", "audio/L16;rate=16000");
business.Add("vcn", tone);
//business.Add("volume", audios.AudiosVolume);//音量
//business.Add("speed", audios.AudiosSpeed);//音速
//business.Add("pitch", audios.AudiosHigth);//音调,可改变音线
//business.Add("bgs", 0);//是否有背景音
business.Add("tte", "utf8");
data.Add("status", 2);
string str = Convert.ToBase64String(Encoding.UTF8.GetBytes(DigitalHumanManager.Instance.current.showStr(msg)));
//string str = Convert.ToBase64String(Encoding.UTF8.GetBytes(msg));
Debug.Log(str);
data.Add("text", str);
//填充frame
frame.Add("common", common);
frame.Add("business", business);
frame.Add("data", data);
string da = frame.ToString();
var frameData = Encoding.UTF8.GetBytes(da);
//发送数据
ws.SendAsync(new ArraySegment<byte>(frameData), WebSocketMessageType.Text, true, cancellation);
yield return new WaitUntil(() => re.IsCompleted);
Debug.Log("完成");
MyDebugger.Log("语音转换完成");
//File.WriteAllBytes(@"D:\chenxiangxue\PythonProject\tts_ws_python3_demo\tts_ws_python3_demo\c#.mp3", re.Result);
//转audio
AudioClip clip = Mp3Tansform.LoadMp3Audio(re.Result, "demo.mp3");
callback(clip);
Debug.Log(clip.length);
Debug.Log(msg.Length);
DigitalHumanManager.Instance.current.showClips(clip.length);
}
else
{
Debug.LogError("连接失败");
callback(null);
}
}
}
IEnumerator CallapiForChat(JArray text, Action<string> callback)
{
Debug.Log("开始调用API");
MyDebugger.Log("开始聊天");
string url = GetAuthUrl(wsChatUrl, chat_key, chat_secret);
using (ws = new ClientWebSocket())
{
//连接
ws.ConnectAsync(new Uri(url), cancellation).Wait(2000);
if (ws.State == WebSocketState.Open)
{
Task<string> re = Task.Run<string>(() => ReciceChat(ws));
//连接成功,开始发送数据
//请求数据均为json字符串
JObject data = new JObject();
JObject header = new JObject();
JObject parameter = new JObject();
JObject payload = new JObject();
data.Add("header", header);
data.Add("parameter", parameter);
data.Add("payload", payload);
header.Add("app_id", chat_appid);
header.Add("uid", clientId);
JObject chat = new JObject();
parameter.Add("chat", chat);
chat.Add("domain", "general");//领域
chat.Add("max_tokens", 2048);//最大tokebs长度
chat.Add("auditing", "default");//审核
JObject message = new JObject();
payload.Add("message", message);
message.Add("text", text);
string da = data.ToString();
var frameData = Encoding.UTF8.GetBytes(da);
//发送数据
ws.SendAsync(new ArraySegment<byte>(frameData), WebSocketMessageType.Text, true, cancellation);
yield return new WaitUntil(() => re.IsCompleted);
Debug.Log("完成");
callback(re.Result);
}
else
{
Debug.LogError("连接失败");
callback(null);
}
}
}
string ReciveText(ClientWebSocket ws)
{
string msgRecive = "";
//全部消息容器
List<byte> bs = new List<byte>();
//缓冲区
var buffer = new byte[1024 * 4];
while (ws.State == WebSocketState.Open)
{
Debug.Log("等待消息接收");
var task = ws.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
task.Wait();
WebSocketReceiveResult result = task.Result;
//文本消息
if (result.CloseStatus.HasValue)
{
Debug.Log("接收关闭");
break;
}
if (result.MessageType == WebSocketMessageType.Text)
{
bs.AddRange(buffer.Take(result.Count));
//消息是否已接收完全
if (result.EndOfMessage)
{
//发送过来的消息
string userMsg = Encoding.UTF8.GetString(bs.ToArray(), 0, bs.Count);
var resultObj = GetTextResultData(userMsg);
string msg = resultObj.data.result.GetResultText();
//清空消息容器
bs = new List<byte>();
Debug.Log(msg);
msgRecive = msgRecive + msg;
if (resultObj.data.result.ls)
{
//关闭
Debug.Log("最后一块数据");
break;
}
}
}
}
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "", cancellation);
Debug.Log("退出接收");
return msgRecive;
}
byte[] ReciveAudio(ClientWebSocket ws)
{
List<byte> msgRecive = new List<byte>();
//全部消息容器
List<byte> bs = new List<byte>();
//缓冲区
var buffer = new byte[1024 * 4];
while (ws.State == WebSocketState.Open)
{
//Debug.Log("等待消息接收");
var task = ws.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
task.Wait();
WebSocketReceiveResult result = task.Result;
//文本消息
if (result.CloseStatus.HasValue)
{
Debug.Log("接收关闭");
break;
}
if (result.MessageType == WebSocketMessageType.Text)
{
bs.AddRange(buffer.Take(result.Count));
//消息是否已接收完全
if (result.EndOfMessage)
{
//发送过来的消息
string userMsg = Encoding.UTF8.GetString(bs.ToArray(), 0, bs.Count);
bs = new List<byte>();
var resultObj = GetAudioResultData(userMsg);
if (resultObj.code == 0)
{
string msg = resultObj.data.result.GetResultAudio();
//清空消息容器
Debug.Log(msg.Length);
msgRecive.AddRange(Convert.FromBase64String(msg));
byte[] base64clip = Convert.FromBase64String(msg);
}
else
{
//关闭
Debug.Log("音频错误结束");
break;
}
if (resultObj.data.status == 2)
{
//关闭
Debug.Log("音频结束");
break;
}
}
}
}
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "", cancellation);
Debug.Log("退出接收");
return msgRecive.ToArray();
}
string ReciceChat(ClientWebSocket ws)
{
string msgRecive = "";
//全部消息容器
List<byte> bs = new List<byte>();
//缓冲区
var buffer = new byte[1024 * 4];
while (ws.State == WebSocketState.Open)
{
//Debug.Log("等待消息接收");
var task = ws.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
task.Wait();
WebSocketReceiveResult result = task.Result;
//文本消息
if (result.CloseStatus.HasValue)
{
Debug.Log("接收关闭");
break;
}
if (result.MessageType == WebSocketMessageType.Text)
{
bs.AddRange(buffer.Take(result.Count));
//消息是否已接收完全
if (result.EndOfMessage)
{
//发送过来的消息
string userMsg = Encoding.UTF8.GetString(bs.ToArray(), 0, bs.Count);
bs = new List<byte>();
JObject jb = JObject.Parse(userMsg);
int code = jb["header"]["code"].ToObject<int>();
string sid = jb["header"]["sid"].ToString();
int status = jb["header"]["status"].ToObject<int>();
string message = jb["header"]["message"].ToString();
if (code == 0)
{
//正常
JToken[] ja = jb["payload"]["choices"]["text"].ToArray();
foreach (var item in ja)
{
msgRecive = msgRecive + item["content"].ToString();
//Debug.Log(item["content"].ToString());
}
if (status == 2)
{
//完成
Debug.Log("最后一条");
break;
}
}
else
{
//出错
Debug.LogError(message);
break;
}
}
}
}
Debug.Log("退出接收");
return msgRecive;
}
/// <summary>
/// 获取语音听写url
/// </summary>
/// <returns></returns>
private string GetAuthUrl(string whichUrl, string key, string secret)
{
//当前时间戳RFC1123格式
string date = DateTime.UtcNow.ToString("r");
//string date = "Thu, 15 Jun 2023 02:32:41 GMT";
string host = whichUrl.Split("://")[1].Split('/')[0];
string localPath = whichUrl.Split(host)[1];
string str = "host: " + host + "\n" + "date: " + date + "\n" + "GET " + localPath + " HTTP/1.1";
//hmac-sha256计算签名
string sha = HMACsha256(secret, str);
//授权api_key授权算法头部签名
//authorization格式api_key="$api_key",algorithm="hmac-sha256",headers="host date request-line",signature="$signature"
string authorization = string.Format("api_key=\"{0}\", algorithm=\"{1}\", headers=\"{2}\", signature=\"{3}\"", key, "hmac-sha256", "host date request-line", sha);
//System.Web.HttpUtility.UrlEncode
//鉴权参数host,data,authorization
//authorization使用base64编码的签名相关信息(签名基于hmac-sha256计算)
string path1 = "authorization" + "=" + Convert.ToBase64String(Encoding.UTF8.GetBytes(authorization));
date = date.Replace(" ", "%20").Replace(":", "%3A").Replace(",", "%2C");
string path2 = "date" + "=" + date;
string path3 = "host" + "=" + host;
string newurl = whichUrl + "?" + path1 + "&" + path2 + "&" + path3;
return newurl;
}
private string HMACsha256(string apiSecretIsKey, string buider)
{
byte[] bytes = Encoding.UTF8.GetBytes(apiSecretIsKey);
System.Security.Cryptography.HMACSHA256 hMACSHA256 = new System.Security.Cryptography.HMACSHA256(bytes);
byte[] date = Encoding.UTF8.GetBytes(buider);
date = hMACSHA256.ComputeHash(date);
hMACSHA256.Clear();
return Convert.ToBase64String(date);
}
/// <summary>
/// 从此实例检索子数组
/// </summary>
/// <param name="source">要检索的数组</param>
/// <param name="startIndex">起始索引号</param>
/// <param name="length">检索最大长度</param>
/// <returns>与此实例中在 startIndex 处开头、长度为 length 的子数组等效的一个数组</returns>
public static byte[] SubArray(byte[] source, int startIndex, int length)
{
if (startIndex < 0 || startIndex > source.Length || length < 0)
{
return null;
}
byte[] Destination;
if (startIndex + length <= source.Length)
{
Destination = new byte[length];
Array.Copy(source, startIndex, Destination, 0, length);
}
else
{
Destination = new byte[(source.Length - startIndex)];
Array.Copy(source, startIndex, Destination, 0, source.Length - startIndex);
}
return Destination;
}
/// <summary>
/// 解析文本
/// </summary>
/// <param name="ReceviceStr"></param>
/// <returns></returns>
private ResponseData<TextInfo> GetTextResultData(string ReceviceStr)
{
ResponseData<TextInfo> temp = new ResponseData<TextInfo>();
ReaponseDataInfo<TextInfo> dataInfo = new ReaponseDataInfo<TextInfo>();
TextInfo resultInfo = new TextInfo();
List<Ws> tempwsS;
List<Cw> tempcwS;
Ws tempWs;
Cw temocw;
var jsonObj = (JObject)JsonConvert.DeserializeObject(ReceviceStr);
temp.code = jsonObj["code"].ToObject<int>();
temp.message = jsonObj["message"].ToObject<string>();
temp.sid = jsonObj["sid"].ToObject<string>();
var data = jsonObj["data"]/*.ToObject<JObject>()*/;
dataInfo.status = data["status"].ToObject<int>();
var result = data["result"]/*.ToObject<JObject>()*/;
resultInfo.bg = result["bg"].ToObject<int>();
resultInfo.ed = result["ed"].ToObject<int>();
//resultInfo.pgs = result["pgs"].ToObject<string>();
//resultInfo.rg = result["rg"].ToObject<int[]>();
resultInfo.sn = result["sn"].ToObject<int>(); ;
resultInfo.ls = result["ls"].ToObject<bool>(); ;
var wss = result["ws"];
tempwsS = new List<Ws>();
JArray wsArray = wss.ToObject<JArray>();
for (int i = 0; i < wsArray.Count; i++)
{
tempWs = new Ws();
tempWs.bg = wsArray[i]["bg"].ToObject<int>();
//tempWs.ed = wsArray[i]["ed"].ToObject<int>();
var cws = wsArray[i]["cw"];
tempcwS = new List<Cw>();
JArray cwArray = cws.ToObject<JArray>();
for (int j = 0; j < cwArray.Count; j++)
{
temocw = new Cw();
temocw.sc = cwArray[j]["sc"].ToObject<int>();
temocw.w = cwArray[j]["w"].ToObject<string>();
tempcwS.Add(temocw);
}
tempWs.cw = tempcwS.ToArray();
tempwsS.Add(tempWs);
}
resultInfo.ws = tempwsS.ToArray();
dataInfo.result = resultInfo;
temp.data = dataInfo;
return temp;
}
private ResponseData<AudioInfo> GetAudioResultData(string ReceviceStr)
{
ResponseData<AudioInfo> temp = new ResponseData<AudioInfo>();
ReaponseDataInfo<AudioInfo> dataInfo = new ReaponseDataInfo<AudioInfo>();
AudioInfo resultInfo = new AudioInfo();
var jsonObj = (JObject)JsonConvert.DeserializeObject(ReceviceStr);
temp.code = jsonObj["code"].ToObject<int>();
temp.message = jsonObj["message"].ToObject<string>();
temp.sid = jsonObj["sid"].ToObject<string>();
if (temp.code != 0)
{
Debug.LogError("code:" + temp.code + "message:" + temp.message);
return temp;
}
temp.data = dataInfo;
var data = jsonObj["data"]/*.ToObject<JObject>()*/;
dataInfo.status = data["status"].ToObject<int>();
dataInfo.result = resultInfo;
resultInfo.audio = data["audio"].ToString();
resultInfo.ced = data["ced"].ToString();
return temp;
}
}
/// <summary>
/// 返回数据
/// </summary>
/// <typeparam name="T"></typeparam>
public class ResponseDataHuman<T>
{
public int code;
public string message;
public string sid;
public ReaponseDataInfoHuman<T> data;
}
public class ReaponseDataInfoHuman<T>
{
public int status;
public T result;
}
/// <summary>
/// 语音听写数据
/// </summary>
public class TextInfoHuman
{
public int bg;
public int ed;
public string pgs;
public int[] rg;
public int sn;
public bool ls;
public WsHuman[] ws;
public string GetResultText()
{
StringBuilder strB = new StringBuilder();
for (int i = 0; i < ws.Length; i++)
{
strB.Append(ws[i].cw[0].w);
}
return strB.ToString();
}
}
public class WsHuman
{
public Cw[] cw;
public int bg;
public int ed;
}
public class CwHuman
{
public int sc;
public string w;
}
/// <summary>
/// 语音合成数据
/// </summary>
public class AudioInfoHuman
{
public string audio;
public string ced;
public string GetResultAudio()
{
return audio;
}
}
/// <summary>
/// 问答对话组
/// </summary>
public class ChatInfoHuman
{
public string ask;
public string answer;
}