Files
leonpan-pc/app/core/services/text_speech.py

287 lines
10 KiB
Python
Raw Normal View History

2025-10-29 22:20:21 +08:00
# coding: utf-8
import os
import subprocess
import tempfile
from loguru import logger
from PyQt6.QtCore import QEventLoop, QObject, QThread, pyqtSignal
from PyQt6.QtCore import QUrl
from PyQt6.QtMultimedia import QAudioOutput, QMediaPlayer
class LocalTextToSpeechThread(QThread):
"""本地文本转语音播放线程 - Windows优化版"""
# 信号定义
playback_started = pyqtSignal() # 播放开始
playback_finished = pyqtSignal() # 播放完成
playback_error = pyqtSignal(str) # 播放错误
progress_updated = pyqtSignal(int) # 播放进度更新
synthesis_completed = pyqtSignal(str) # 语音合成完成(返回文件路径)
def __init__(self, text, parent=None):
super().__init__(parent)
self.text = text
self.audio_file_path = None
self.media_player = None
self.audio_output = None
self._stop_requested = False
def run(self):
"""线程执行函数"""
try:
# 1. 将文本转换为语音文件
self.audio_file_path = self._text_to_speech(self.text)
if not self.audio_file_path or self._stop_requested:
return
# 发射合成完成信号
self.synthesis_completed.emit(self.audio_file_path)
# 2. 播放语音
self._play_audio(self.audio_file_path)
except Exception as e:
self.playback_error.emit(f"语音播放错误: {str(e)}")
def _text_to_speech(self, text):
"""使用本地TTS引擎将文本转换为语音文件"""
try:
# 检查文本长度
if not text or len(text.strip()) == 0:
self.playback_error.emit("文本内容为空")
return None
# 限制文本长度,避免合成时间过长
max_length = 1000
if len(text) > max_length:
text = text[:max_length] + "。文本过长,已截断。"
self.playback_error.emit(f"文本过长,已截断前{max_length}个字符")
# 优先使用pyttsx3效率最高
try:
import pyttsx3
return self._pyttsx3_tts(text)
except ImportError:
# 备用方案使用Windows内置TTS
return self._windows_tts(text)
except Exception as e:
self.playback_error.emit(f"语音合成失败: {str(e)}")
return None
def _pyttsx3_tts(self, text):
"""使用pyttsx3合成语音 - 优化版"""
try:
import pyttsx3
# 初始化TTS引擎
engine = pyttsx3.init()
# 设置语音属性 - 提高效率
engine.setProperty('rate', 200) # 提高语速
engine.setProperty('volume', 0.9) # 提高音量
# 创建临时文件保存音频
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
temp_path = temp_file.name
# 保存语音到文件
engine.save_to_file(text, temp_path)
engine.runAndWait()
# 检查文件是否成功创建
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path
else:
logger.error("语音文件生成失败")
except Exception as e:
# 如果pyttsx3失败尝试Windows TTS
return self._windows_tts(text)
def _windows_tts(self, text):
"""Windows系统TTS - 优化版"""
try:
# 方法1: 使用PowerShell命令 - 最可靠
return self._powershell_tts(text)
except Exception as e:
logger.error(f"Windows TTS失败: {str(e)}")
def _powershell_tts(self, text):
"""使用PowerShell合成语音 - 优化版"""
try:
# 创建临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
temp_path = temp_file.name
# 转义文本中的特殊字符
escaped_text = text.replace('"', '`"').replace("'", "`'")
# 使用PowerShell的SpeechSynthesizer - 简化命令
ps_script = f"""
Add-Type -AssemblyName System.Speech
$speak = New-Object System.Speech.Synthesis.SpeechSynthesizer
$speak.SetOutputToWaveFile("{temp_path}")
$speak.Speak("{escaped_text}")
$speak.Dispose()
"""
# 使用更高效的方式执行PowerShell
process = subprocess.Popen(
["powershell", "-Command", ps_script],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True
)
# 等待进程完成,设置超时
try:
stdout, stderr = process.communicate(timeout=30)
if process.returncode != 0:
logger.error(f"PowerShell执行失败: {stderr.decode('gbk', errors='ignore')}")
except subprocess.TimeoutExpired:
process.kill()
logger.error(f"PowerShell超时")
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path
else:
logger.error("语音文件生成失败")
except Exception as e:
# raise Exception(f"PowerShell TTS失败: {str(e)}")
logger.error(f"PowerShell TTS失败{e}")
def _play_audio(self, file_path):
"""播放音频文件 - 优化版"""
if self._stop_requested:
return
try:
# 创建媒体播放器和音频输出
self.media_player = QMediaPlayer()
self.audio_output = QAudioOutput()
self.media_player.setAudioOutput(self.audio_output)
# 设置音量
self.audio_output.setVolume(1.0)
# 连接信号
self.media_player.playbackStateChanged.connect(self._on_playback_state_changed)
self.media_player.positionChanged.connect(self._on_position_changed)
self.media_player.durationChanged.connect(self._on_duration_changed)
self.media_player.errorOccurred.connect(self._on_player_error)
# 设置媒体源并开始播放
self.media_player.setSource(QUrl.fromLocalFile(file_path))
self.media_player.play()
# 使用事件循环等待播放完成
loop = QEventLoop()
self.media_player.playbackStateChanged.connect(
lambda state: loop.quit() if state == QMediaPlayer.PlaybackState.StoppedState else None
)
loop.exec()
except Exception as e:
raise Exception(f"音频播放失败: {str(e)}")
finally:
# 清理临时文件
if file_path and os.path.exists(file_path):
try:
os.unlink(file_path)
except:
pass
def _on_playback_state_changed(self, state):
"""处理播放状态变化"""
from PyQt6.QtMultimedia import QMediaPlayer
if state == QMediaPlayer.PlaybackState.StoppedState:
self.playback_finished.emit()
def _on_position_changed(self, position):
"""处理播放位置变化"""
if (self.media_player and
self.media_player.duration() > 0):
progress = int((position / self.media_player.duration()) * 100)
self.progress_updated.emit(progress)
def _on_duration_changed(self, duration):
"""处理时长变化"""
if duration > 0:
self.playback_started.emit()
def _on_player_error(self, error, error_string):
"""处理播放器错误"""
self.playback_error.emit(f"播放器错误: {error_string}")
def stop_playback(self):
"""停止播放"""
self._stop_requested = True
if self.media_player and self.media_player.playbackState() != QMediaPlayer.PlaybackState.StoppedState:
self.media_player.stop()
# 清理临时文件
if self.audio_file_path and os.path.exists(self.audio_file_path):
try:
os.unlink(self.audio_file_path)
except:
pass
class LocalSpeechController(QObject):
"""本地语音播放控制器"""
def __init__(self, parent=None):
super().__init__(parent)
self.speech_thread = None
def play_text(self, text):
"""播放文本语音"""
# 停止当前播放
self.stop_playback()
# 创建新的语音线程
self.speech_thread = LocalTextToSpeechThread(text)
# 连接信号
self.speech_thread.playback_started.connect(self._on_playback_started)
self.speech_thread.playback_finished.connect(self._on_playback_finished)
self.speech_thread.playback_error.connect(self._on_playback_error)
self.speech_thread.progress_updated.connect(self._on_progress_updated)
self.speech_thread.synthesis_completed.connect(self._on_synthesis_completed)
# 开始播放
self.speech_thread.start()
def stop_playback(self):
"""停止播放"""
if self.speech_thread and self.speech_thread.isRunning():
self.speech_thread.stop_playback()
self.speech_thread.wait(1000) # 等待线程结束最多1秒
def is_playing(self):
"""检查是否正在播放"""
return self.speech_thread and self.speech_thread.isRunning()
def _on_playback_started(self):
"""处理播放开始"""
logger.info("语音播放开始")
def _on_playback_finished(self):
"""处理播放完成"""
logger.success("语音播放完成")
def _on_playback_error(self, error_msg):
"""处理播放错误"""
logger.warning(f"语音播放错误: {error_msg}")
def _on_progress_updated(self, progress):
...
def _on_synthesis_completed(self, file_path):
"""处理语音合成完成"""
logger.info(f"语音合成完成,文件路径: {file_path}")