Files
leonpan-pc/app/core/services/text_speech.py
2025-10-29 22:20:21 +08:00

287 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding: utf-8
import os
import subprocess
import tempfile
from loguru import logger
from PyQt6.QtCore import QEventLoop, QObject, QThread, pyqtSignal
from PyQt6.QtCore import QUrl
from PyQt6.QtMultimedia import QAudioOutput, QMediaPlayer
class LocalTextToSpeechThread(QThread):
"""本地文本转语音播放线程 - Windows优化版"""
# 信号定义
playback_started = pyqtSignal() # 播放开始
playback_finished = pyqtSignal() # 播放完成
playback_error = pyqtSignal(str) # 播放错误
progress_updated = pyqtSignal(int) # 播放进度更新
synthesis_completed = pyqtSignal(str) # 语音合成完成(返回文件路径)
def __init__(self, text, parent=None):
super().__init__(parent)
self.text = text
self.audio_file_path = None
self.media_player = None
self.audio_output = None
self._stop_requested = False
def run(self):
"""线程执行函数"""
try:
# 1. 将文本转换为语音文件
self.audio_file_path = self._text_to_speech(self.text)
if not self.audio_file_path or self._stop_requested:
return
# 发射合成完成信号
self.synthesis_completed.emit(self.audio_file_path)
# 2. 播放语音
self._play_audio(self.audio_file_path)
except Exception as e:
self.playback_error.emit(f"语音播放错误: {str(e)}")
def _text_to_speech(self, text):
"""使用本地TTS引擎将文本转换为语音文件"""
try:
# 检查文本长度
if not text or len(text.strip()) == 0:
self.playback_error.emit("文本内容为空")
return None
# 限制文本长度,避免合成时间过长
max_length = 1000
if len(text) > max_length:
text = text[:max_length] + "。文本过长,已截断。"
self.playback_error.emit(f"文本过长,已截断前{max_length}个字符")
# 优先使用pyttsx3效率最高
try:
import pyttsx3
return self._pyttsx3_tts(text)
except ImportError:
# 备用方案使用Windows内置TTS
return self._windows_tts(text)
except Exception as e:
self.playback_error.emit(f"语音合成失败: {str(e)}")
return None
def _pyttsx3_tts(self, text):
"""使用pyttsx3合成语音 - 优化版"""
try:
import pyttsx3
# 初始化TTS引擎
engine = pyttsx3.init()
# 设置语音属性 - 提高效率
engine.setProperty('rate', 200) # 提高语速
engine.setProperty('volume', 0.9) # 提高音量
# 创建临时文件保存音频
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
temp_path = temp_file.name
# 保存语音到文件
engine.save_to_file(text, temp_path)
engine.runAndWait()
# 检查文件是否成功创建
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path
else:
logger.error("语音文件生成失败")
except Exception as e:
# 如果pyttsx3失败尝试Windows TTS
return self._windows_tts(text)
def _windows_tts(self, text):
"""Windows系统TTS - 优化版"""
try:
# 方法1: 使用PowerShell命令 - 最可靠
return self._powershell_tts(text)
except Exception as e:
logger.error(f"Windows TTS失败: {str(e)}")
def _powershell_tts(self, text):
"""使用PowerShell合成语音 - 优化版"""
try:
# 创建临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
temp_path = temp_file.name
# 转义文本中的特殊字符
escaped_text = text.replace('"', '`"').replace("'", "`'")
# 使用PowerShell的SpeechSynthesizer - 简化命令
ps_script = f"""
Add-Type -AssemblyName System.Speech
$speak = New-Object System.Speech.Synthesis.SpeechSynthesizer
$speak.SetOutputToWaveFile("{temp_path}")
$speak.Speak("{escaped_text}")
$speak.Dispose()
"""
# 使用更高效的方式执行PowerShell
process = subprocess.Popen(
["powershell", "-Command", ps_script],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True
)
# 等待进程完成,设置超时
try:
stdout, stderr = process.communicate(timeout=30)
if process.returncode != 0:
logger.error(f"PowerShell执行失败: {stderr.decode('gbk', errors='ignore')}")
except subprocess.TimeoutExpired:
process.kill()
logger.error(f"PowerShell超时")
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path
else:
logger.error("语音文件生成失败")
except Exception as e:
# raise Exception(f"PowerShell TTS失败: {str(e)}")
logger.error(f"PowerShell TTS失败{e}")
def _play_audio(self, file_path):
"""播放音频文件 - 优化版"""
if self._stop_requested:
return
try:
# 创建媒体播放器和音频输出
self.media_player = QMediaPlayer()
self.audio_output = QAudioOutput()
self.media_player.setAudioOutput(self.audio_output)
# 设置音量
self.audio_output.setVolume(1.0)
# 连接信号
self.media_player.playbackStateChanged.connect(self._on_playback_state_changed)
self.media_player.positionChanged.connect(self._on_position_changed)
self.media_player.durationChanged.connect(self._on_duration_changed)
self.media_player.errorOccurred.connect(self._on_player_error)
# 设置媒体源并开始播放
self.media_player.setSource(QUrl.fromLocalFile(file_path))
self.media_player.play()
# 使用事件循环等待播放完成
loop = QEventLoop()
self.media_player.playbackStateChanged.connect(
lambda state: loop.quit() if state == QMediaPlayer.PlaybackState.StoppedState else None
)
loop.exec()
except Exception as e:
raise Exception(f"音频播放失败: {str(e)}")
finally:
# 清理临时文件
if file_path and os.path.exists(file_path):
try:
os.unlink(file_path)
except:
pass
def _on_playback_state_changed(self, state):
"""处理播放状态变化"""
from PyQt6.QtMultimedia import QMediaPlayer
if state == QMediaPlayer.PlaybackState.StoppedState:
self.playback_finished.emit()
def _on_position_changed(self, position):
"""处理播放位置变化"""
if (self.media_player and
self.media_player.duration() > 0):
progress = int((position / self.media_player.duration()) * 100)
self.progress_updated.emit(progress)
def _on_duration_changed(self, duration):
"""处理时长变化"""
if duration > 0:
self.playback_started.emit()
def _on_player_error(self, error, error_string):
"""处理播放器错误"""
self.playback_error.emit(f"播放器错误: {error_string}")
def stop_playback(self):
"""停止播放"""
self._stop_requested = True
if self.media_player and self.media_player.playbackState() != QMediaPlayer.PlaybackState.StoppedState:
self.media_player.stop()
# 清理临时文件
if self.audio_file_path and os.path.exists(self.audio_file_path):
try:
os.unlink(self.audio_file_path)
except:
pass
class LocalSpeechController(QObject):
"""本地语音播放控制器"""
def __init__(self, parent=None):
super().__init__(parent)
self.speech_thread = None
def play_text(self, text):
"""播放文本语音"""
# 停止当前播放
self.stop_playback()
# 创建新的语音线程
self.speech_thread = LocalTextToSpeechThread(text)
# 连接信号
self.speech_thread.playback_started.connect(self._on_playback_started)
self.speech_thread.playback_finished.connect(self._on_playback_finished)
self.speech_thread.playback_error.connect(self._on_playback_error)
self.speech_thread.progress_updated.connect(self._on_progress_updated)
self.speech_thread.synthesis_completed.connect(self._on_synthesis_completed)
# 开始播放
self.speech_thread.start()
def stop_playback(self):
"""停止播放"""
if self.speech_thread and self.speech_thread.isRunning():
self.speech_thread.stop_playback()
self.speech_thread.wait(1000) # 等待线程结束最多1秒
def is_playing(self):
"""检查是否正在播放"""
return self.speech_thread and self.speech_thread.isRunning()
def _on_playback_started(self):
"""处理播放开始"""
logger.info("语音播放开始")
def _on_playback_finished(self):
"""处理播放完成"""
logger.success("语音播放完成")
def _on_playback_error(self, error_msg):
"""处理播放错误"""
logger.warning(f"语音播放错误: {error_msg}")
def _on_progress_updated(self, progress):
...
def _on_synthesis_completed(self, file_path):
"""处理语音合成完成"""
logger.info(f"语音合成完成,文件路径: {file_path}")