using UnityEngine; using System.Collections; using UnityEngine.Networking; using System.Linq; namespace Crosstales.RTVoice.MaryTTS { /// MaryTTS voice provider. [HelpURL("https://www.crosstales.com/media/data/assets/rtvoice/api/class_crosstales_1_1_r_t_voice_1_1_mary_t_t_s_1_1_voice_provider_mary_t_t_s.html")] //[ExecuteInEditMode] public class VoiceProviderMaryTTS : Crosstales.RTVoice.Provider.BaseCustomVoiceProvider { #region Variables [Tooltip("Server URL for MaryTTS."), SerializeField] private string url = "http://mary.dfki.de"; [Tooltip("Server port for MaryTTS (default: 59125)."), Range(0, 65535), SerializeField] private int port = 59125; [Tooltip("User name for MaryTTS (default: empty)."), SerializeField] private string username = string.Empty; [Tooltip("User password for MaryTTS (default: empty)."), SerializeField] private string password = string.Empty; [Tooltip("Input type for MaryTTS (default: MaryTTSType.SSML)."), SerializeField] private Crosstales.RTVoice.Model.Enum.MaryTTSType type = Crosstales.RTVoice.Model.Enum.MaryTTSType.SSML; private string uri; private System.Collections.Generic.Dictionary headers = new System.Collections.Generic.Dictionary(); private string lastUrl; private int lastPort; private string lastUser; private string lastPassword; private bool isLoading; #endregion #region Properties /// Server URL for MaryTTS. public string URL { get => url; set { if (url == value) return; url = value; Speaker.Instance.ReloadProvider(); } } /// Server port for MaryTTS. public int Port { get => port; set { if (port == value) return; port = Mathf.Clamp(value, 0, 65535); Speaker.Instance.ReloadProvider(); } } /// User name for MaryTTS. public string Username { get => username; set { if (username == value) return; username = value; Speaker.Instance.ReloadProvider(); } } /// User password for MaryTTS. public string Password { get => password; set { if (password == value) return; password = value; Speaker.Instance.ReloadProvider(); } } /// Input type for MaryTTS. public Crosstales.RTVoice.Model.Enum.MaryTTSType Type { get => type; set => type = value; } public override string AudioFileExtension => ".wav"; public override AudioType AudioFileType => AudioType.WAV; public override string DefaultVoiceName => "cmu-rms-hsmm"; public override bool isWorkingInEditor => false; public override bool isWorkingInPlaymode => true; public override int MaxTextLength => 256000; public override bool isSpeakNativeSupported => false; public override bool isSpeakSupported => true; public override bool isPlatformSupported => true; public override bool isSSMLSupported => true; public override bool isOnlineService => true; public override bool hasCoRoutines => true; public override bool isIL2CPPSupported => true; public override bool hasVoicesInEditor => true; public override int MaxSimultaneousSpeeches => 0; #endregion #region MonoBehaviour methods #if CT_DEVELOP private void Awake() { url = "https://marytts.crosstales.com/"; port = 443; username = "rtvdemo"; password = APIKeys.MaryTTS; type = Crosstales.RTVoice.Model.Enum.MaryTTSType.SSML; } #endif #endregion #region Implemented methods public override void Load(bool forceReload = false) { //Debug.Log("LOAD: " + cachedVoices?.Count + "-" + isLoading, this); bool _forceReload = forceReload; if (URL != lastUrl || Port != lastPort || Username != lastUser || Password != lastPassword) { lastUrl = URL; lastPort = Port; lastUser = Username; lastPassword = Password; _forceReload = true; } if (Crosstales.Common.Util.NetworkHelper.isInternetAvailable && (cachedVoices?.Count == 0 || _forceReload)) { if (!string.IsNullOrEmpty(URL)) uri = Crosstales.Common.Util.NetworkHelper.CleanUrl(URL, false, false) + ":" + Port; if (!string.IsNullOrEmpty(Username)) headers["Authorization"] = "Basic " + System.Convert.ToBase64String(System.Text.Encoding.ASCII.GetBytes(Username + ":" + Password)); if (Crosstales.RTVoice.Util.Helper.isEditorMode) { #if UNITY_EDITOR getVoicesInEditor(); #endif } else { if (!isLoading) { isLoading = true; StartCoroutine(getVoices()); } } } else { onVoicesReady(); } } public override IEnumerator SpeakNative(Crosstales.RTVoice.Model.Wrapper wrapper) { yield return speak(wrapper, true); } public override IEnumerator Speak(Crosstales.RTVoice.Model.Wrapper wrapper) { yield return speak(wrapper, false); } public override IEnumerator Generate(Crosstales.RTVoice.Model.Wrapper wrapper) { #if !UNITY_WEBGL if (wrapper == null) { Debug.LogWarning("'wrapper' is null!"); } else { if (string.IsNullOrEmpty(wrapper.Text)) { Debug.LogWarning("'wrapper.Text' is null or empty: " + wrapper); } else { if (!Crosstales.Common.Util.NetworkHelper.isInternetAvailable) { const string errorMessage = "Internet is not available - can't use MaryTTS right now!"; Debug.LogError(errorMessage); onErrorInfo(wrapper, errorMessage); } else { if (uri != null) { yield return null; //return to the main process (uid) string voiceCulture = getVoiceCulture(wrapper); string voiceName = getVoiceName(wrapper); silence = false; onSpeakAudioGenerationStart(wrapper); System.Text.StringBuilder sbXML = new System.Text.StringBuilder(); string request; switch (Type) { case Crosstales.RTVoice.Model.Enum.MaryTTSType.RAWMARYXML: //RAWMARYXML sbXML.Append(""); sbXML.Append( ""); sbXML.Append(prepareProsody(wrapper)); sbXML.Append(""); request = uri + "/process?INPUT_TEXT=" + System.Uri.EscapeDataString(sbXML.ToString()) + "&INPUT_TYPE=RAWMARYXML&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&LOCALE=" + voiceCulture + "&VOICE=" + voiceName; break; case Crosstales.RTVoice.Model.Enum.MaryTTSType.EMOTIONML: //EMOTIONML sbXML.Append(""); sbXML.Append(""); sbXML.Append(getValidXML(wrapper.Text)); sbXML.Append(""); request = uri + "/process?INPUT_TEXT=" + System.Uri.EscapeDataString(sbXML.ToString()) + "&INPUT_TYPE=EMOTIONML&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&LOCALE=" + voiceCulture + "&VOICE=" + voiceName; break; case Crosstales.RTVoice.Model.Enum.MaryTTSType.SSML: //SSML sbXML.Append(""); sbXML.Append( ""); sbXML.Append(prepareProsody(wrapper)); sbXML.Append(""); request = uri + "/process?INPUT_TEXT=" + System.Uri.EscapeDataString(sbXML.ToString()) + "&INPUT_TYPE=SSML&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&LOCALE=" + voiceCulture + "&VOICE=" + voiceName; break; default: //TEXT request = uri + "/process?INPUT_TEXT=" + System.Uri.EscapeDataString(wrapper.Text) + "&INPUT_TYPE=TEXT&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&LOCALE=" + voiceCulture + "&VOICE=" + voiceName; break; } if (Crosstales.RTVoice.Util.Constants.DEV_DEBUG) Debug.Log(sbXML); if (Mathf.Abs(wrapper.Volume - 1f) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE) { request += "&effect_Volume_selected=on&effect_Volume_parameters=amount:" + wrapper.Volume; } using (UnityWebRequest www = UnityWebRequest.Get(request.Trim())) { if (headers != null) { foreach (System.Collections.Generic.KeyValuePair kvp in headers) { www.SetRequestHeader(kvp.Key, kvp.Value); } } www.downloadHandler = new DownloadHandlerBuffer(); yield return www.SendWebRequest(); #if UNITY_2020_1_OR_NEWER if (www.result != UnityWebRequest.Result.ProtocolError && www.result != UnityWebRequest.Result.ConnectionError) #else if (!www.isHttpError && !www.isNetworkError) #endif { processAudioFile(wrapper, wrapper.OutputFile, false, www.downloadHandler.data); } else { string errorMessage = "Could not generate the speech: " + wrapper + System.Environment.NewLine + "WWW error: " + www.error; Debug.LogError(errorMessage); onErrorInfo(wrapper, errorMessage); } } } } } } #else Debug.LogError("'Generate' is not supported under WebGL!"); yield return null; #endif } #endregion #region Private methods private IEnumerator speak(Crosstales.RTVoice.Model.Wrapper wrapper, bool isNative) { if (wrapper == null) { Debug.LogWarning("'wrapper' is null!"); } else { if (string.IsNullOrEmpty(wrapper.Text)) { Debug.LogWarning("'wrapper.Text' is null or empty: " + wrapper); } else { if (wrapper.Source == null) { Debug.LogWarning("'wrapper.Source' is null: " + wrapper); } else { if (!Crosstales.Common.Util.NetworkHelper.isInternetAvailable) { const string errorMessage = "Internet is not available - can't use MaryTTS right now!"; Debug.LogError(errorMessage); onErrorInfo(wrapper, errorMessage); } else { if (uri != null) { yield return null; //return to the main process (uid) string voiceCulture = getVoiceCulture(wrapper); string voiceName = getVoiceName(wrapper); silence = false; if (!isNative) onSpeakAudioGenerationStart(wrapper); System.Text.StringBuilder sbXML = new System.Text.StringBuilder(); string request; switch (Type) { case Crosstales.RTVoice.Model.Enum.MaryTTSType.RAWMARYXML: //RAWMARYXML sbXML.Append(""); sbXML.Append(""); sbXML.Append(prepareProsody(wrapper)); sbXML.Append(""); request = uri + "/process?INPUT_TEXT=" + System.Uri.EscapeDataString(sbXML.ToString()) + "&INPUT_TYPE=RAWMARYXML&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&LOCALE=" + voiceCulture + "&VOICE=" + voiceName; break; case Crosstales.RTVoice.Model.Enum.MaryTTSType.EMOTIONML: //EMOTIONML sbXML.Append(""); sbXML.Append(""); sbXML.Append(getValidXML(wrapper.Text)); sbXML.Append(""); request = uri + "/process?INPUT_TEXT=" + System.Uri.EscapeDataString(sbXML.ToString()) + "&INPUT_TYPE=EMOTIONML&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&LOCALE=" + voiceCulture + "&VOICE=" + voiceName; break; case Crosstales.RTVoice.Model.Enum.MaryTTSType.SSML: //SSML sbXML.Append(""); sbXML.Append(""); sbXML.Append(prepareProsody(wrapper)); sbXML.Append(""); request = uri + "/process?INPUT_TEXT=" + System.Uri.EscapeDataString(sbXML.ToString()) + "&INPUT_TYPE=SSML&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&LOCALE=" + voiceCulture + "&VOICE=" + voiceName; break; default: //TEXT request = uri + "/process?INPUT_TEXT=" + System.Uri.EscapeDataString(wrapper.Text) + "&INPUT_TYPE=TEXT&OUTPUT_TYPE=AUDIO&AUDIO=WAVE_FILE&LOCALE=" + voiceCulture + "&VOICE=" + voiceName; break; } if (Crosstales.RTVoice.Util.Constants.DEV_DEBUG) Debug.Log(sbXML); if (Mathf.Abs(wrapper.Volume - 1f) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE) request += "&effect_Volume_selected=on&effect_Volume_parameters=amount:" + wrapper.Volume; yield return playAudioFile(wrapper, request, wrapper.OutputFile, AudioFileType, isNative, false, headers); } } } } } } private IEnumerator getVoices() { /* if (!Util.Helper.isInternetAvailable) { const string errorMessage = "Internet is not available - can't use MaryTTS right now!"; Debug.LogError(errorMessage); onErrorInfo(null, errorMessage); } else { */ //Debug.Log("getVoices", this); //isLoading = true; if (uri != null) { System.Collections.Generic.List serverVoicesResponse = new System.Collections.Generic.List(); using (UnityWebRequest www = UnityWebRequest.Get(uri + "/voices")) { if (headers != null) { foreach (System.Collections.Generic.KeyValuePair kvp in headers) { www.SetRequestHeader(kvp.Key, kvp.Value); } } www.downloadHandler = new DownloadHandlerBuffer(); yield return www.SendWebRequest(); #if UNITY_2020_1_OR_NEWER if (www.result != UnityWebRequest.Result.ProtocolError && www.result != UnityWebRequest.Result.ConnectionError) #else if (!www.isHttpError && !www.isNetworkError) #endif { string[] rawVoices = www.downloadHandler.text.Split('\n'); foreach (string rawVoice in rawVoices) { try { if (!string.IsNullOrEmpty(rawVoice)) { string[] newVoice = { rawVoice.Split(' ')[0], rawVoice.Split(' ')[1], rawVoice.Split(' ')[2] }; serverVoicesResponse.Add(newVoice); } } catch (System.Exception ex) { Debug.LogWarning("Problem preparing voice: " + rawVoice + " - " + ex); } } System.Collections.Generic.List voices = new System.Collections.Generic.List(40); voices.AddRange(serverVoicesResponse.Select(voice => new Crosstales.RTVoice.Model.Voice(voice[0], "MaryTTS voice: " + voice[0], Crosstales.RTVoice.Util.Helper.StringToGender(voice[2]), "unknown", voice[1]))); cachedVoices = voices.OrderBy(s => s.Name).ToList(); if (Common.Util.BaseConstants.DEV_DEBUG) Debug.Log("Voices read: " + cachedVoices.CTDump()); } else { string errorMessage = "Could not get the voices: " + www.error; Debug.LogError(errorMessage); onErrorInfo(null, errorMessage); } } } onVoicesReady(); // } isLoading = false; } private static string prepareProsody(Crosstales.RTVoice.Model.Wrapper wrapper) { System.Text.StringBuilder sbXML = new System.Text.StringBuilder(); if (Mathf.Abs(wrapper.Rate - 1f) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE || Mathf.Abs(wrapper.Pitch - 1f) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE || Mathf.Abs(wrapper.Volume - 1f) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE) { sbXML.Append(" Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE) { float _rate = wrapper.Rate > 1 ? (wrapper.Rate - 1f) * 0.5f : wrapper.Rate - 1f; sbXML.Append(" rate=\""); sbXML.Append(_rate >= 0f ? _rate.ToString("+#0%", Crosstales.RTVoice.Util.Helper.BaseCulture) : _rate.ToString("#0%", Crosstales.RTVoice.Util.Helper.BaseCulture)); sbXML.Append("\""); } if (Mathf.Abs(wrapper.Pitch - 1f) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE) { float _pitch = wrapper.Pitch - 1f; sbXML.Append(" pitch=\""); sbXML.Append(_pitch >= 0f ? _pitch.ToString("+#0%", Crosstales.RTVoice.Util.Helper.BaseCulture) : _pitch.ToString("#0%", Crosstales.RTVoice.Util.Helper.BaseCulture)); sbXML.Append("\""); } if (Mathf.Abs(wrapper.Volume - 1f) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE) { sbXML.Append(" volume=\""); sbXML.Append((100 * wrapper.Volume).ToString("#0", Crosstales.RTVoice.Util.Helper.BaseCulture)); sbXML.Append("\""); } sbXML.Append(">"); sbXML.Append(wrapper.Text); sbXML.Append(""); } else { sbXML.Append(wrapper.Text); } //Debug.Log(sbXML.ToString()); return getValidXML(sbXML.ToString()); } private static string getVoiceCulture(Crosstales.RTVoice.Model.Wrapper wrapper) { if (string.IsNullOrEmpty(wrapper.Voice?.Culture)) { if (Crosstales.RTVoice.Util.Config.DEBUG) Debug.LogWarning("'wrapper.Voice' or 'wrapper.Voice.Culture' is null! Using the 'default' English voice."); //always use English as fallback return "en-US"; } return wrapper.Voice?.Culture; } #endregion #region Editor-only methods #if UNITY_EDITOR public override void GenerateInEditor(Crosstales.RTVoice.Model.Wrapper wrapper) { Debug.LogError("'GenerateInEditor' is not supported for MaryTTS!"); } public override void SpeakNativeInEditor(Crosstales.RTVoice.Model.Wrapper wrapper) { Debug.LogError("'SpeakNativeInEditor' is not supported for MaryTTS!"); } private void getVoicesInEditor() { //Debug.Log("getVoicesInEditor", this); System.Collections.Generic.List serverVoicesResponse = new System.Collections.Generic.List(); if (!Crosstales.Common.Util.NetworkHelper.isInternetAvailable) { const string errorMessage = "Internet is not available - can't use MaryTTS right now!"; Debug.LogError(errorMessage); } else { if (uri != null) { try { System.Net.ServicePointManager.ServerCertificateValidationCallback = Crosstales.Common.Util.NetworkHelper.RemoteCertificateValidationCallback; using (System.Net.WebClient client = new Crosstales.Common.Util.CTWebClient()) { if (headers != null) { foreach (System.Collections.Generic.KeyValuePair kvp in headers) { client.Headers.Add(kvp.Key, kvp.Value); } } using (System.IO.Stream stream = client.OpenRead(uri + "/voices")) { if (stream != null) { using (System.IO.StreamReader reader = new System.IO.StreamReader(stream)) { string content = reader.ReadToEnd(); if (Crosstales.RTVoice.Util.Config.DEBUG) Debug.Log(content); string[] rawVoices = content.Split('\n'); foreach (string rawVoice in rawVoices) { try { if (!string.IsNullOrEmpty(rawVoice)) { string[] newVoice = { rawVoice.Split(' ')[0], rawVoice.Split(' ')[1], rawVoice.Split(' ')[2] }; serverVoicesResponse.Add(newVoice); } } catch (System.Exception ex) { Debug.LogWarning("Problem preparing voice: " + rawVoice + " - " + ex); } } System.Collections.Generic.List voices = new System.Collections.Generic.List(40); voices.AddRange(serverVoicesResponse.Select(voice => new Crosstales.RTVoice.Model.Voice(voice[0], "MaryTTS voice: " + voice[0], Crosstales.RTVoice.Util.Helper.StringToGender(voice[2]), "unknown", voice[1]))); cachedVoices = voices.OrderBy(s => s.Name).ToList(); if (Common.Util.BaseConstants.DEV_DEBUG) Debug.Log("Voices read: " + cachedVoices.CTDump()); } } else { Debug.LogError("Stream to voices URI was null: " + uri + "/voices"); } } } } catch (System.Exception ex) { Debug.LogError(ex); } } onVoicesReady(); } } #endif #endregion } } // © 2016-2023 crosstales LLC (https://www.crosstales.com)