using UnityEngine;
using System.Collections;
using System.Linq;
namespace Crosstales.RTVoice.SAPI
{
///
/// Example for a custom voice provider (TTS-system) with all callbacks (only for demonstration - it doesn't do anything).
/// NOTE: please make sure you understand the Wrapper and its variables
///
[ExecuteInEditMode]
public class VoiceProviderSAPI : Crosstales.RTVoice.Provider.BaseCustomVoiceProvider
{
#region Variables
//private const int SPF_DEFAULT = 0;
private const int SPF_ASYNC = 1;
private const int SPF_PURGEBEFORESPEAK = 2;
//private const int SPF_IS_FILENAME = 4;
private const int SPF_IS_XML = 8;
//private const int SPF_IS_NOT_XML = 16;
//private const int SPF_PERSIST_XML = 32;
//private const int SPF_NLP_SPEAK_PUNC = 64;
//private const int SPF_PARSE_SAPI = 128;
private const int SPF_PARSE_SSML = 256;
private static bool isDestroyed;
private static bool isInitalized;
#endregion
#region Bridge declaration and methods
#if UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern int Uni_Voice_Init();
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern void Uni_Voice_Close();
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern int Uni_Voice_Status(int voiceStat);
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern int Uni_Voice_Speak([System.Runtime.InteropServices.MarshalAs(System.Runtime.InteropServices.UnmanagedType.LPWStr)] string TextToSpeech); // SPF_ASYNC & SPF_IS_XML
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern int Uni_Voice_SpeakEX([System.Runtime.InteropServices.MarshalAs(System.Runtime.InteropServices.UnmanagedType.LPWStr)] string TextToSpeech, int voiceFlag); // CUSTOM FLAG
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern int Uni_Voice_Volume(int volume); // zero to 100
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern int Uni_Voice_Rate(int rate); // -10 to 10
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern void Uni_Voice_Pause();
[System.Runtime.InteropServices.DllImport("SAPI_UNITY_DLL")]
private static extern void Uni_Voice_Resume();
#endif
#endregion
#region Properties
public override string AudioFileExtension => "none";
public override AudioType AudioFileType => AudioType.UNKNOWN;
public override string DefaultVoiceName => "David";
public override bool isWorkingInEditor => Crosstales.RTVoice.Util.Helper.isWindowsEditor;
public override bool isWorkingInPlaymode => true;
public override bool isPlatformSupported => Crosstales.RTVoice.Util.Helper.isWindowsPlatform || Crosstales.RTVoice.Util.Helper.isWindowsEditor;
public override int MaxTextLength => 256000;
public override bool isSpeakNativeSupported => true;
public override bool isSpeakSupported => false;
public override bool isSSMLSupported => true;
public override bool isOnlineService => false;
public override bool hasCoRoutines => true;
public override bool isIL2CPPSupported => true;
public override bool hasVoicesInEditor => true;
public override int MaxSimultaneousSpeeches => 1;
#endregion
#region MonoBehaviour methods
protected override void Start()
{
base.Start();
#if UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN
if (isPlatformSupported && !isInitalized && Crosstales.RTVoice.Speaker.Instance.CustomProvider == this)
{
Uni_Voice_Init();
isInitalized = true;
}
#endif
}
private void OnApplicationQuit()
{
#if UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN
if (!isDestroyed && isInitalized)
{
Uni_Voice_Close();
isDestroyed = true;
isInitalized = false;
}
#endif
}
#endregion
#region Implemented methods
public override void Load(bool forceReload = false)
{
#if UNITY_STANDALONE_WIN && NET_4_6
if (cachedVoices?.Count == 0 || forceReload)
{
try
{
const string speechTokens = "Software\\Microsoft\\Speech\\Voices\\Tokens";
System.Collections.Generic.List voices = new System.Collections.Generic.List();
using (Microsoft.Win32.RegistryKey registryKey = Microsoft.Win32.Registry.LocalMachine.OpenSubKey(speechTokens))
{
if (registryKey != null)
{
foreach (string regKeyFound in registryKey.GetSubKeyNames())
{
string finalKey = $"HKEY_LOCAL_MACHINE\\Software\\Microsoft\\Speech\\Voices\\Tokens\\{regKeyFound}\\Attributes";
string voiceName = (string)Microsoft.Win32.Registry.GetValue(finalKey, "name", "");
if (!string.IsNullOrEmpty(voiceName))
{
//desc = voice.GetDescription();
string desc = voiceName;
string gender = (string)Microsoft.Win32.Registry.GetValue(finalKey, "gender", "");
string age = (string)Microsoft.Win32.Registry.GetValue(finalKey, "age", "");
string lang = (string)Microsoft.Win32.Registry.GetValue(finalKey, "language", "");
string vendor = (string)Microsoft.Win32.Registry.GetValue(finalKey, "vendor", "");
string version = (string)Microsoft.Win32.Registry.GetValue(finalKey, "version", "");
if (string.IsNullOrEmpty(lang))
{
lang = "409"; //en-US
}
else if (lang.Length > 4)
{
string[] codes = lang.Split(',');
lang = codes.Length > 1 ? codes[0] : "409";
}
int langCode = int.Parse(lang, System.Globalization.NumberStyles.HexNumber);
if (!Crosstales.RTVoice.Util.Helper.LocaleCodes.TryGetValue(langCode, out string culture))
{
Debug.LogWarning("Voice with name '" + voiceName + "' has an unknown language code: " +
langCode + "(" + lang + ")!", this);
culture = "en-us";
}
voices.Add(new Crosstales.RTVoice.Model.Voice(voiceName, desc, Crosstales.RTVoice.Util.Helper.StringToGender(gender), age,
culture, regKeyFound, vendor, version));
}
else
{
Debug.LogWarning("Voice ignored because it has no name: " + regKeyFound, this);
}
}
}
cachedVoices = voices.OrderBy(s => s.Name).ToList();
}
}
catch (System.Exception ex)
{
string errorMessage = "Could not get any voices: " + ex;
Debug.LogError(errorMessage, this);
onErrorInfo(null, errorMessage);
}
}
#else
Debug.LogError("SAPI Unity is not supported under the current platform!", this);
#endif
Invoke(nameof(onVoicesReady), 0.1f);
}
public override IEnumerator Generate(Crosstales.RTVoice.Model.Wrapper wrapper)
{
Debug.LogError("'Generate' is not supported for SAPI Unity!", this);
yield return null;
}
public override IEnumerator SpeakNative(Crosstales.RTVoice.Model.Wrapper wrapper)
{
yield return speak(wrapper, true);
}
public override IEnumerator Speak(Crosstales.RTVoice.Model.Wrapper wrapper)
{
yield return speak(wrapper, false);
}
public override void Silence()
{
#if UNITY_STANDALONE_WIN
//Debug.Log(Uni_Voice_Status(0));
if (!isDestroyed)
Uni_Voice_SpeakEX(" ", SPF_ASYNC | SPF_PURGEBEFORESPEAK);
//Uni_Voice_Pause();
#endif
base.Silence();
}
public override void Silence(string uid)
{
Silence();
base.Silence(uid);
}
#endregion
#region Private methods
private IEnumerator speak(Crosstales.RTVoice.Model.Wrapper wrapper, bool isNative)
{
#if UNITY_STANDALONE_WIN || UNITY_EDITOR_WIN
if (wrapper == null)
{
Debug.LogWarning("'wrapper' is null!", this);
}
else
{
if (string.IsNullOrEmpty(wrapper.Text))
{
Debug.LogWarning("'wrapper.Text' is null or empty!", this);
}
else
{
yield return null; //return to the main process (uid)
string voiceName = getVoiceName(wrapper);
int calculatedRate = calculateRate(wrapper.Rate);
int calculatedVolume = calculateVolume(wrapper.Volume);
silence = false;
if (!isNative)
{
onSpeakAudioGenerationStart(wrapper); //just a fake event if some code needs the feedback...
yield return null;
onSpeakAudioGenerationComplete(wrapper); //just a fake event if some code needs the feedback...
}
//Uni_Voice_Resume();
//yield return null;
onSpeakStart(wrapper);
Uni_Voice_Volume(calculatedVolume);
Uni_Voice_Rate(calculatedRate);
//TEST
//wrapper.ForceSSML = false;
if (wrapper.ForceSSML && !Crosstales.RTVoice.Speaker.Instance.AutoClearTags)
{
Uni_Voice_SpeakEX(prepareText(wrapper, voiceName), SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_PARSE_SSML);
}
else
{
Uni_Voice_SpeakEX("" + getValidXML(wrapper.Text), SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML);
}
yield return new WaitForSeconds(0.1f);
do
{
yield return null;
} while (Uni_Voice_Status(0) == 2 && !silence);
if (Crosstales.RTVoice.Util.Config.DEBUG)
Debug.Log("Text spoken: " + wrapper.Text, this);
onSpeakComplete(wrapper);
}
}
#else
yield return null;
#endif
}
private static string prepareText(Crosstales.RTVoice.Model.Wrapper wrapper, string voiceName)
{
if (wrapper.ForceSSML && !Crosstales.RTVoice.Speaker.Instance.AutoClearTags)
{
System.Text.StringBuilder sbXML = new System.Text.StringBuilder();
sbXML.Append("");
sbXML.Append("");
sbXML.Append("");
float _pitch = wrapper.Pitch - 1f;
if (Mathf.Abs(_pitch) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE)
{
sbXML.Append("");
}
sbXML.Append(wrapper.Text);
if (Mathf.Abs(_pitch) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE)
sbXML.Append("");
sbXML.Append("");
sbXML.Append("");
return getValidXML(sbXML.ToString());
}
return wrapper.Text;
}
private static int calculateVolume(float volume)
{
return Mathf.Clamp((int)(100 * volume), 0, 100);
}
private static int calculateRate(float rate)
{
//allowed range: 0 - 3f - all other values were cropped
int result = 0;
if (Mathf.Abs(rate - 1f) > Crosstales.Common.Util.BaseConstants.FLOAT_TOLERANCE)
{
//relevant?
if (rate > 1f)
{
//larger than 1
if (rate >= 2.75f)
{
result = 10; //2.78
}
else if (rate >= 2.6f && rate < 2.75f)
{
result = 9; //2.6
}
else if (rate >= 2.35f && rate < 2.6f)
{
result = 8; //2.39
}
else if (rate >= 2.2f && rate < 2.35f)
{
result = 7; //2.2
}
else if (rate >= 2f && rate < 2.2f)
{
result = 6; //2
}
else if (rate >= 1.8f && rate < 2f)
{
result = 5; //1.8
}
else if (rate >= 1.6f && rate < 1.8f)
{
result = 4; //1.6
}
else if (rate >= 1.4f && rate < 1.6f)
{
result = 3; //1.45
}
else if (rate >= 1.2f && rate < 1.4f)
{
result = 2; //1.28
}
else if (rate > 1f && rate < 1.2f)
{
result = 1; //1.14
}
}
else
{
//smaller than 1
if (rate <= 0.3f)
{
result = -10; //0.33
}
else if (rate > 0.3 && rate <= 0.4f)
{
result = -9; //0.375
}
else if (rate > 0.4 && rate <= 0.45f)
{
result = -8; //0.42
}
else if (rate > 0.45 && rate <= 0.5f)
{
result = -7; //0.47
}
else if (rate > 0.5 && rate <= 0.55f)
{
result = -6; //0.525
}
else if (rate > 0.55 && rate <= 0.6f)
{
result = -5; //0.585
}
else if (rate > 0.6 && rate <= 0.7f)
{
result = -4; //0.655
}
else if (rate > 0.7 && rate <= 0.8f)
{
result = -3; //0.732
}
else if (rate > 0.8 && rate <= 0.9f)
{
result = -2; //0.82
}
else if (rate > 0.9 && rate < 1f)
{
result = -1; //0.92
}
}
}
if (Crosstales.RTVoice.Util.Constants.DEV_DEBUG)
Debug.Log("calculateRate: " + result + " - " + rate);
return result;
}
#endregion
#region Editor-only methods
#if UNITY_EDITOR
public override void GenerateInEditor(Crosstales.RTVoice.Model.Wrapper wrapper)
{
Debug.LogError("'GenerateInEditor' is not supported for SAPI Unity!", this);
}
public override void SpeakNativeInEditor(Crosstales.RTVoice.Model.Wrapper wrapper)
{
#if UNITY_EDITOR_WIN
if (wrapper == null)
{
Debug.LogWarning("'wrapper' is null!", this);
}
else
{
if (string.IsNullOrEmpty(wrapper.Text))
{
Debug.LogWarning("'wrapper.Text' is null or empty!", this);
}
else
{
string voiceName = getVoiceName(wrapper);
int calculatedRate = calculateRate(wrapper.Rate);
int calculatedVolume = calculateVolume(wrapper.Volume);
silence = false;
onSpeakStart(wrapper);
Uni_Voice_Volume(calculatedVolume);
Uni_Voice_Rate(calculatedRate);
Uni_Voice_Resume();
Uni_Voice_SpeakEX("" + getValidXML(wrapper.Text), SPF_ASYNC | SPF_IS_XML | SPF_PURGEBEFORESPEAK);
do
{
System.Threading.Thread.Sleep(50);
} while (Uni_Voice_Status(0) == 2 && !silence);
if (Crosstales.RTVoice.Util.Config.DEBUG)
Debug.Log("Text spoken: " + wrapper.Text, this);
onSpeakComplete(wrapper);
}
}
#endif
}
#endif
#endregion
}
}
// © 2019-2023 crosstales LLC (https://www.crosstales.com)