ai-speech-build/src/view/components/speechControl.vue

757 lines
22 KiB
Vue
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!--
* @Author: 季万俊
* @Date: 2025-08-22 17:03:18
* @Description:
-->
<template>
<div class="container">
<!-- 固定在左下角的语音控制组件 -->
<div class="voice-control-container">
<div class="status-text">点击开始语音识别</div>
<button class="voice-btn" id="voice-btn" @click="toggleListening">
<svg
style="
position: absolute;
left: 50%;
top: 50%;
transform: translate3d(-50%, -50%, 0);
"
t="1756171270753"
class="icon"
viewBox="0 0 1024 1024"
version="1.1"
xmlns="http://www.w3.org/2000/svg"
p-id="14442"
width="42"
height="70"
>
<path
d="M704 192v368c0 52.8-21.6 100.8-56.4 135.6S564.8 752 512 752c-105.6 0-192-86.4-192-192V192C320 86.4 406.4 0 512 0s192 86.4 192 192z"
p-id="14443"
data-spm-anchor-id="a313x.search_index.0.i2.72cc3a81bxN4ca"
class="selected"
fill="#ffffff"
></path>
<path
d="M816 496v144c0 2.8-0.4 5.6-1.1 8.4-18.5 68.2-58.9 126.1-112.3 166.9-43.5 33.2-95.6 55.2-151.6 62.2-4 0.5-7 3.9-7 7.9V944c0 8.8 7.2 16 16 16h80c35.3 0 64 28.7 64 64H320c0-35.3 28.7-64 64-64h80c8.8 0 16-7.2 16-16v-58.5c0-4-3-7.4-7-7.9-124.8-15.7-230.3-105.5-263.9-229.2-0.7-2.7-1.1-5.6-1.1-8.4V496.7c0-17.4 13.7-32.2 31.1-32.7 18.1-0.5 32.9 14 32.9 32v129.8c0 6.9 1.1 13.8 3.3 20.3C309.3 746.9 404.6 816 512 816s202.7-69.1 236.7-169.9c2.2-6.5 3.3-13.4 3.3-20.3V496.7c0-17.4 13.7-32.2 31.1-32.7 18.1-0.5 32.9 14 32.9 32z"
p-id="14444"
data-spm-anchor-id="a313x.search_index.0.i3.72cc3a81bxN4ca"
class="selected"
fill="#ffffff"
></path>
</svg>
<div class="pulse-ring"></div>
</button>
<div class="command-display">
<div class="command-text"></div>
<div class="command-action" v-for="item in action">
{{
"执行操作:" +
`${item.command} ${
item.params && item.params.keyword
? "-" + item.params.keyword
: ""
}`
}}
</div>
</div>
</div>
</div>
</template>
<script setup>
import { ref, onMounted, watch, nextTick } from "vue";
import config from "./../../config/index";
const listenStatus = ref(false);
const props = defineProps({
config: {
type: Object,
default: () => {},
},
});
watch(config, (newVal, oldVal) => {
// if (voiceControl.value && voiceControl.value.isListening) {
// voiceControl.value.stopListening();
// }
// initVoiceControl();
});
class VoiceControl {
constructor(callback) {
this.config = config;
this.apiKey = "sk-020189889aac40f3b050f7c60ca597f8";
this.setupSpeechRecognition();
this.updateUI();
this.callback = callback;
}
setupSpeechRecognition() {
const SpeechRecognition =
window.SpeechRecognition || window.webkitSpeechRecognition;
console.log("检测是否支持语音识别", SpeechRecognition);
if (!SpeechRecognition) {
this.showError("您的浏览器不支持语音识别API");
return;
}
this.recognition = new SpeechRecognition();
// 配置识别参数以提高精度
this.recognition.continuous = false; // 长时间识别
this.recognition.interimResults = true; // 返回临时结果
this.recognition.maxAlternatives = 1; // 只返回最可能的结果
this.recognition.lang = "zh-CN"; // 设置为中文识别
// 音频处理参数(重要)
this.recognition.energy_threshold = 500; // 能量阈值
this.recognition.pause_threshold = 0.5; // 停顿时间阈值(秒)
this.recognition.phrase_threshold = 0.2; // 短语识别阈值
this.lastFinalTranscript = ""; // 用于跟踪最终结果
this.isProcessing = false; // 防止并发处理
this.recognition.onstart = () => {
this.updateUI();
};
this.recognition.onresult = async (event) => {
// 避免同时处理多个结果
if (this.isProcessing) return;
// 获取最新结果索引
const lastResultIndex = event.results.length - 1;
const result = event.results[lastResultIndex];
let transcript = result[0].transcript;
this.showTranscript(transcript);
// 显示中间结果(灰色)和最终结果(黑色)
const commandDisplay = document.querySelector(".command-display");
const commandText = commandDisplay.querySelector(".command-text");
console.log(result.isFinal, "=====> isFinal");
if (!result.isFinal) {
// 中间结果 - 可以快速显示但不处理
commandText.textContent = `正在识别:${transcript}`;
commandText.style.color = "#00000085";
commandDisplay.classList.add("show");
return; // 不处理中间结果,只显示
}
// 处理最终结果
this.isProcessing = true;
if (transcript) {
commandText.textContent = `正在识别:${transcript}`;
commandText.style.color = "#00000085";
}
console.log(transcript, "=====> 识别文字");
try {
// 使用setTimeout将处理放入下一个事件循环避免阻塞UI
setTimeout(async () => {
if (transcript) {
const sequence = await this.queryDeepSeek(transcript);
this.executeSequence(sequence);
if (sequence && sequence.sequence && sequence.sequence.length > 0) {
this.callback(sequence.sequence);
} else {
this.callback([]);
}
}
this.isProcessing = false;
}, 0);
} catch (error) {
console.error("处理过程中出错:", error);
this.isProcessing = false;
}
};
// this.recognition.onerror = (event) => {
// console.log(event, "=====> error event");
// setTimeout(() => {
// this.recognition.start();
// }, 100); // 短暂延迟后重启
// };
this.recognition.onend = () => {
// 快速重启识别,减少等待时间
console.log("====> 走到这 ", listenStatus.value);
// TODO 模拟指令
// setTimeout(async () => {
// const sequence = await this.queryDeepSeek("搜索项目风冷热泵");
// this.executeSequence(sequence);
// if (sequence && sequence.sequence && sequence.sequence.length > 0) {
// this.callback(sequence.sequence);
// } else {
// this.callback([]);
// }
// }, 100);
// END TODO
if (listenStatus.value) {
// 只有在用户希望继续聆听时才重启
setTimeout(() => {
this.recognition.start();
}, 100); // 短暂延迟后重启
} else {
this.updateUI();
setTimeout(() => {
document.querySelector(".status-text").classList.remove("show");
}, 3000);
}
this.isProcessing = false;
// this.updateUI();
// document.querySelector(".status-text").textContent = "点击开始语音识别";
// // 3秒后隐藏状态文本
// setTimeout(() => {
// document.querySelector(".status-text").classList.remove("show");
// }, 3000);
};
}
async queryDeepSeek(userQuery) {
console.log("连接AI ", userQuery);
if (!this.apiKey) {
throw new Error("请先设置DeepSeek API密钥");
}
const prompt = `
你是一个网页控制助手,负责将用户的自然语言指令转换为可执行的指令序列。请基于提供的配置信息,分析用户请求的意图,从可用指令集中精准匹配相应指令。
# 配置信息
${JSON.stringify(this.config, null, 2)}
# 用户指令
"${userQuery}"
# 输出要求
请严格按照以下JSON格式输出只包含名为"sequence"的数组:
{ "sequence": [ { "command": "command_name", "params": { "param_name": "value" } } ] }
# 处理规则
1. **指令匹配**:仅使用配置文件中明确定义的指令,禁止添加任何额外指令
2. **执行顺序**:按照逻辑顺序排列指令(如:"打开页面→输入信息→提交操作"
3. **层级处理**:若子指令被匹配,必须包含其父指令以形成完整路径(如:智能运维→人员管理)
4. **参数提取**:从用户指令中准确提取参数值并填充到对应字段
5. **多操作处理**:复合指令需拆分为多个完整操作序列并按逻辑排序
6. **语音容错**:对可能的语音识别错误(特别是专有名词)进行智能矫正
7. **场景判断**:根据关键词自动判断场景(设备相关→搜索设备,人员相关→搜索人员等)
8. **默认处理**:无匹配指令时返回空数组
# 特别注意
- 针对语音识别可能出现的同音词错误,使用上下文智能矫正(如:"疯了"→"风冷"
- 参数值提取时保持原始语义准确性
- 确保输出为纯JSON格式无额外解释内容
请生成针对"${userQuery}"的指令序列:
`;
// const prompt = `
// 你是一个网页控制助手。请根据以下配置文件,分析用户自然语言请求,从可用指令集中筛选出与请求意图匹配的指令,将用户的自然语言指令解析成一个可执行的指令序列。
// 配置文件:
// ${JSON.stringify(this.config, null, 2)}
// 用户指令:"${userQuery}"
// 请严格按照以下JSON格式输出只包含一个名为"sequence"的数组:
// { "sequence": [ { "command": "command_name", "params": { "param_name": "value" } }, ... ] }
// 要求:
// 1. 只使用配置文件中定义的command
// 2.按照符合逻辑的执行顺序对筛选出的指令进行排序(例如:登录需遵循 "打开登录→输入用户名→输入密码→提交登录" 的顺序)
// 3.仅保留指令的 command 字段,形成有序数组
// 4.若是command的children符合指令则父指令也输出。比如: 输入人员管理人员管理处于智能运维的children子菜单则输出两条指令为智能运维→人员管理。
// 5. 如果用户指令中包含参数值如用户名、密码、关键词请正确提取并填充到params中
// 6.若请求涉及多个独立操作,需按操作逻辑拆分排序(如 "先登录再搜索商品" 需包含两部分完整指令链)
// 7.严格禁止添加指令集中不存在的指令,无关指令需排除
// 8.若无可匹配指令,返回空数组
// 9.因为语音识别的问题,有的关键词可能是专有名词的原因没有识别正确,比如语音输入风冷,结果被识别成疯了,封了等。可能是设备或者专有名词,根据同音尽量矫正。
// 10.对于带有关键词的根据关键词智能判断场景,比如关键词是设备相关则是搜索设备,关键词是人物相关则是搜索人员等。
// 现在请生成针对"${userQuery}"的JSON指令序列
// `;
const response = await fetch(
"https://api.deepseek.com/v1/chat/completions",
{
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify({
model: "deepseek-chat",
messages: [
{
role: "system",
content: `"你是一个专业的网页语音控制助手。请严格根据提供的配置文件和用户指令,生成准确的操作序列。
重要规则:
1. 只返回纯JSON格式不要包含任何其他文本
2. JSON结构必须包含sequence数组\n3. 每个指令必须存在于配置文件中
4. 参数必须匹配指令定义
5. 按逻辑顺序排列指令
6. 若无可匹配指令,必须返回空数组
7. 特别注意:诗句、诗词、问候语、闲聊内容等与网页操作无关的指令都应返回空数组"`,
},
{
role: "user",
content: prompt,
},
],
temperature: 0.1,
stream: false,
max_tokens: 500,
}),
}
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(
`DeepSeek API错误: ${response.status} ${errorData.message || ""}`
);
}
const data = await response.json();
const content = data.choices[0].message.content;
// 解析JSON响应
let result = JSON.parse(content);
console.log(result, "=====> deepseek 返回", data);
// 验证响应结构
if (
!result.sequence ||
!Array.isArray(result.sequence) ||
result.sequence.length === 0
) {
console.log(result, "DeepSeek返回了无效的指令序列格式");
throw new Error("DeepSeek返回了无效的指令序列格式");
}
return result;
}
startListening() {
if (!this.recognition) {
throw new Error("语音识别未初始化");
}
document.querySelector(".status-text").classList.add("show");
this.recognition.start();
}
stopListening() {
this.recognition.stop();
this.updateUI();
}
async executeSequence(sequence) {
for (const [index, instruction] of sequence.sequence.entries()) {
try {
await this.executeInstruction(instruction);
// 在指令之间添加延迟
if (index < sequence.sequence.length - 1) {
await this.delay(800);
}
} catch (error) {
throw error;
}
}
}
delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// 向iframe发送命令
sendCommand(command) {
const targetFrame = document.getElementById("suzhoudaping");
nextTick(() => {
const message = {
type: "CONTROL_COMMAND",
action: command.action,
selector: command.selector,
value: command.value,
timestamp: Date.now(),
};
try {
targetFrame.contentWindow.postMessage(message, "*");
console.log(message, "=====> 发送到iframe");
} catch (error) {
console.log(error, "=====> 发送到iframe");
}
});
}
async executeInstruction(instruction) {
// 递归查找指令支持搜索children中的嵌套指令
function findCommandRecursively(commands, targetCommand) {
// 遍历当前层级的指令
for (const cmd of commands) {
// 1. 先检查当前指令是否匹配
if (cmd.command === targetCommand) {
return cmd;
}
// 2. 如果当前指令有children递归查找子指令
if (
cmd.children &&
Array.isArray(cmd.children) &&
cmd.children.length > 0
) {
const foundInChildren = findCommandRecursively(
cmd.children,
targetCommand
);
// 子层级找到匹配指令,直接返回
if (foundInChildren) {
return foundInChildren;
}
}
}
// 所有层级都未找到
return null;
}
// 使用示例(替换原代码)
const commandConfig = findCommandRecursively(
this.config.commands,
instruction.command
);
console.log(commandConfig, "===> commandConfig");
if (!commandConfig) {
throw new Error(`未知指令: ${instruction.command}`);
}
if (commandConfig && commandConfig.isIframe) {
if (commandConfig.action == "input") {
const inputParam = commandConfig.params[0];
commandConfig.value = instruction.params[inputParam.name];
}
this.sendCommand(commandConfig);
return;
}
const element = document.querySelector(commandConfig.selector);
if (!element) {
throw new Error(`找不到元素: ${commandConfig.selector}`);
}
// 滚动到元素可见
element.scrollIntoView({ behavior: "smooth", block: "center" });
switch (commandConfig.action) {
case "click":
element.click();
break;
case "input":
const inputParam = commandConfig.params[0];
if (instruction.params && instruction.params[inputParam.name]) {
element.value = instruction.params[inputParam.name];
element.dispatchEvent(new Event("input", { bubbles: true }));
}
break;
case "navigate":
if (instruction.params && instruction.params.product_url) {
window.location.href = instruction.params.product_url;
}
break;
case "input_and_submit":
if (instruction.params && instruction.params.keyword) {
element.value = instruction.params.keyword;
element.dispatchEvent(new Event("input", { bubbles: true }));
// 尝试提交表单或点击相关按钮
if (element.form) {
element.form.submit();
} else {
// 查找提交按钮
const submitBtn = document.querySelector(
'#search-submit, [type="submit"]'
);
if (submitBtn) submitBtn.click();
}
}
break;
default:
throw new Error(`未知动作类型: ${commandConfig.action}`);
}
// 添加视觉反馈
this.highlightElement(element);
}
highlightElement(element) {
const originalStyle = element.style.boxShadow;
// 模糊半径15px扩散半径4px透明度0.4(淡色微光)
element.style.boxShadow = "0 0 12px 5px rgba(173, 216, 230, 0.6)";
setTimeout(() => {
element.style.boxShadow = originalStyle;
}, 1000);
}
// UI 更新方法
updateUI() {
const voiceBtn = document.getElementById("voice-btn");
if (voiceBtn) {
if (listenStatus.value) {
voiceBtn.classList.add("listening");
document.querySelector(".status-text").textContent = "正在聆听...";
} else {
voiceBtn.classList.remove("listening");
document.querySelector(".status-text").textContent =
"点击开始语音识别2-1";
}
}
}
showTranscript(text) {
const transcriptEl = document.getElementById("transcript");
if (transcriptEl) {
transcriptEl.textContent = `识别结果: ${text}`;
}
}
}
const voiceControl = ref(null);
const action = ref([]);
const initVoiceControl = () => {
voiceControl.value = new VoiceControl(callBackFun);
};
const callBackFun = (data) => {
action.value = data;
};
const toggleListening = () => {
console.log("开始语音", listenStatus);
if (!voiceControl.value) {
alert("请先初始化语音控制系统");
return;
}
try {
if (listenStatus.value) {
listenStatus.value = false;
voiceControl.value.stopListening();
const voiceBtn = document.getElementById("voice-btn");
if (voiceBtn) {
voiceBtn.classList.remove("listening");
}
} else {
listenStatus.value = true;
voiceControl.value.startListening();
}
} catch (error) {
console.error("语音识别操作失败:", error);
// 重置状态
listenStatus.value = false;
const voiceBtn = document.getElementById("voice-btn");
if (voiceBtn) {
voiceBtn.classList.remove("listening");
}
alert("语音识别操作失败,请稍后再试");
}
};
onMounted(() => {
initVoiceControl();
});
</script>
<style scoped>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
}
body {
background: linear-gradient(135deg, #1a2a6c, #b21f1f, #fdbb2d);
min-height: 100vh;
display: flex;
justify-content: center;
align-items: center;
padding: 20px;
color: #fff;
}
/* 固定在左下角的语音控制组件 */
.voice-control-container {
position: fixed;
width: 144px;
right: 30px;
bottom: 30px;
z-index: 1000;
display: flex;
flex-direction: column;
align-items: center;
gap: 15px;
}
.voice-btn {
width: 70px;
height: 70px;
border-radius: 50%;
background: linear-gradient(135deg, #6e45e2, #88d3ce);
border: none;
color: white;
cursor: pointer;
display: flex;
justify-content: center;
align-items: center;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.25);
transition: transform 0.2s, box-shadow 0.2s;
position: relative;
overflow: hidden;
font-size: 0;
}
.voice-btn:hover {
transform: scale(1.05);
box-shadow: 0 6px 25px rgba(0, 0, 0, 0.35);
}
.voice-btn:active {
transform: scale(0.95);
}
.voice-btn.listening {
background: linear-gradient(135deg, #ff5e62, #ff9966);
animation: pulse 1.5s infinite;
font-size: 0;
}
.voice-btn::before {
content: "\f130";
font-family: "Font Awesome 6 Free";
font-weight: 900;
font-size: 28px;
transition: transform 0.3s;
}
.voice-btn.listening::before {
content: "\f131";
animation: bounce 0.5s infinite alternate;
}
.status-text {
background-color: rgba(255, 255, 255, 0.15);
backdrop-filter: blur(10px);
padding: 8px 16px;
border-radius: 20px;
font-size: 14px;
color: #ffffff85;
opacity: 0;
transition: opacity 0.3s;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
}
.status-text.show {
opacity: 1;
}
.pulse-ring {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
border-radius: 50%;
background: rgba(255, 255, 255, 0.5);
opacity: 0;
transform: scale(1);
}
.voice-btn.listening .pulse-ring {
animation: sonar 1.5s infinite;
}
.command-display {
position: fixed;
right: 160px;
bottom: 30px;
background-color: rgba(255, 255, 255, 0.15);
backdrop-filter: blur(10px);
padding: 12px 20px;
border-radius: 12px;
color: #fff;
max-width: 300px;
opacity: 0;
transform: translateX(-20px);
transition: opacity 0.3s, transform 0.3s;
text-align: left;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
}
.command-display.show {
opacity: 1;
transform: translateX(0);
}
.command-text {
font-size: 16px;
margin-bottom: 5px;
color: #fff !important;
}
.command-action {
font-size: 14px;
color: #88d3ce;
font-weight: 500;
}
@keyframes pulse {
0% {
box-shadow: 0 0 0 0 rgba(255, 94, 98, 0.7);
}
70% {
box-shadow: 0 0 0 15px rgba(255, 94, 98, 0);
}
100% {
box-shadow: 0 0 0 0 rgba(255, 94, 98, 0);
}
}
@keyframes sonar {
0% {
transform: scale(1);
opacity: 0.8;
}
100% {
transform: scale(2.5);
opacity: 0;
}
}
@keyframes bounce {
from {
transform: scale(1);
}
to {
transform: scale(1.3);
}
}
</style>