feat: 接入 Edge TTS 免费中文语音合成
设备不支持内置中文 TTS,改用微软 Edge TTS(WebSocket 协议)。 - 新增 EdgeTtsManager:WebSocket 调用、MP3 缓存、MediaPlayer 播放 - 调试模式(设置页点头像6次)触发 TTS 测试 - 缓存机制:相同文本不重复请求,上限50条自动清理 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,343 @@
|
||||
package com.xiaoqu.watch.service.manager
|
||||
|
||||
import android.content.Context
|
||||
import android.media.MediaPlayer
|
||||
import dagger.hilt.android.qualifiers.ApplicationContext
|
||||
import kotlinx.coroutines.*
|
||||
import okhttp3.*
|
||||
import okio.ByteString
|
||||
import timber.log.Timber
|
||||
import java.io.ByteArrayOutputStream
|
||||
import java.io.File
|
||||
import java.security.MessageDigest
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.*
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* Edge TTS 语音合成管理器
|
||||
* 使用微软 Edge 浏览器的免费 TTS 服务,支持高质量中文语音合成。
|
||||
* 通过 WebSocket 连接,发送文本,接收 MP3 音频并播放。
|
||||
*/
|
||||
@Singleton
|
||||
class EdgeTtsManager @Inject constructor(
|
||||
@ApplicationContext private val context: Context,
|
||||
private val okHttpClient: OkHttpClient
|
||||
) {
|
||||
|
||||
companion object {
|
||||
private const val TAG = "EdgeTTS"
|
||||
|
||||
/** WebSocket 基础地址 */
|
||||
private const val WSS_BASE = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1"
|
||||
|
||||
/** 固定的信任客户端 Token */
|
||||
private const val TRUSTED_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"
|
||||
|
||||
/** GEC 版本号 */
|
||||
private const val GEC_VERSION = "1-143.0.3650.75"
|
||||
|
||||
/** Windows 纪元偏移(Unix epoch → Windows file time epoch) */
|
||||
private const val WINDOWS_EPOCH_OFFSET = 11644473600L
|
||||
|
||||
/** 默认中文女声(晓晓,微软神经网络语音,音质最好) */
|
||||
const val VOICE_XIAOXIAO = "zh-CN-XiaoxiaoNeural"
|
||||
|
||||
/** 中文男声(云希) */
|
||||
const val VOICE_YUNXI = "zh-CN-YunxiNeural"
|
||||
|
||||
/** 音频输出格式:24kHz 48kbps MP3,体积小质量够用 */
|
||||
private const val OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3"
|
||||
|
||||
/** 缓存目录名 */
|
||||
private const val CACHE_DIR = "tts_cache"
|
||||
|
||||
/** 缓存最大条数 */
|
||||
private const val MAX_CACHE_SIZE = 50
|
||||
}
|
||||
|
||||
/** 当前 MediaPlayer */
|
||||
private var mediaPlayer: MediaPlayer? = null
|
||||
|
||||
/** 是否正在播放 */
|
||||
var isPlaying: Boolean = false
|
||||
private set
|
||||
|
||||
/** 时钟偏移(秒),用于 GEC Token 生成 */
|
||||
private var clockSkewSeconds: Long = 0
|
||||
|
||||
/** 播放完成回调 */
|
||||
var onComplete: (() -> Unit)? = null
|
||||
|
||||
/**
|
||||
* 合成并播放语音
|
||||
* @param text 要朗读的文本
|
||||
* @param voice 语音类型,默认晓晓
|
||||
* @param onError 错误回调
|
||||
*/
|
||||
fun speak(text: String, voice: String = VOICE_XIAOXIAO, onError: ((String) -> Unit)? = null) {
|
||||
if (text.isBlank()) return
|
||||
|
||||
// 停止当前播放
|
||||
stop()
|
||||
|
||||
CoroutineScope(Dispatchers.IO).launch {
|
||||
try {
|
||||
// 检查缓存
|
||||
val cacheFile = getCacheFile(text, voice)
|
||||
if (cacheFile.exists()) {
|
||||
Timber.d("$TAG: 命中缓存 ${cacheFile.name}")
|
||||
playAudio(cacheFile)
|
||||
return@launch
|
||||
}
|
||||
|
||||
// 调用 Edge TTS API
|
||||
val audioData = synthesize(text, voice)
|
||||
if (audioData != null && audioData.isNotEmpty()) {
|
||||
// 保存到缓存
|
||||
saveToCacheDir(cacheFile, audioData)
|
||||
playAudio(cacheFile)
|
||||
} else {
|
||||
Timber.w("$TAG: 合成返回空数据")
|
||||
withContext(Dispatchers.Main) { onError?.invoke("语音合成失败") }
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Timber.e(e, "$TAG: 语音合成异常")
|
||||
withContext(Dispatchers.Main) { onError?.invoke("语音合成异常: ${e.message}") }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** 停止播放 */
|
||||
fun stop() {
|
||||
try {
|
||||
mediaPlayer?.apply {
|
||||
if (isPlaying) stop()
|
||||
release()
|
||||
}
|
||||
} catch (_: Exception) {
|
||||
}
|
||||
mediaPlayer = null
|
||||
isPlaying = false
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过 WebSocket 调用 Edge TTS 合成语音
|
||||
* @return MP3 音频字节数组,失败返回 null
|
||||
*/
|
||||
private suspend fun synthesize(text: String, voice: String): ByteArray? {
|
||||
return suspendCancellableCoroutine { continuation ->
|
||||
val connectionId = UUID.randomUUID().toString().replace("-", "")
|
||||
val requestId = UUID.randomUUID().toString().replace("-", "")
|
||||
val gecToken = generateGecToken()
|
||||
|
||||
val url = "$WSS_BASE?TrustedClientToken=$TRUSTED_TOKEN" +
|
||||
"&ConnectionId=$connectionId" +
|
||||
"&Sec-MS-GEC=$gecToken" +
|
||||
"&Sec-MS-GEC-Version=$GEC_VERSION"
|
||||
|
||||
val request = Request.Builder()
|
||||
.url(url)
|
||||
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0")
|
||||
.header("Origin", "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold")
|
||||
.build()
|
||||
|
||||
val audioBuffer = ByteArrayOutputStream()
|
||||
var resumed = false
|
||||
|
||||
val ws = okHttpClient.newWebSocket(request, object : WebSocketListener() {
|
||||
override fun onOpen(webSocket: WebSocket, response: Response) {
|
||||
Timber.d("$TAG: WebSocket 已连接")
|
||||
|
||||
// 1. 发送 speech.config
|
||||
val configMsg = buildConfigMessage()
|
||||
webSocket.send(configMsg)
|
||||
|
||||
// 2. 发送 SSML 合成请求
|
||||
val ssmlMsg = buildSsmlMessage(requestId, text, voice)
|
||||
webSocket.send(ssmlMsg)
|
||||
}
|
||||
|
||||
override fun onMessage(webSocket: WebSocket, text: String) {
|
||||
// 文本帧:解析 Path
|
||||
if (text.contains("Path:turn.end")) {
|
||||
// 合成完成
|
||||
Timber.d("$TAG: 合成完成,音频大小 ${audioBuffer.size()} 字节")
|
||||
webSocket.close(1000, "done")
|
||||
if (!resumed) {
|
||||
resumed = true
|
||||
continuation.resumeWith(Result.success(audioBuffer.toByteArray()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun onMessage(webSocket: WebSocket, bytes: ByteString) {
|
||||
// 二进制帧:提取音频数据
|
||||
val data = bytes.toByteArray()
|
||||
val audioPayload = parseBinaryFrame(data)
|
||||
if (audioPayload != null) {
|
||||
audioBuffer.write(audioPayload)
|
||||
}
|
||||
}
|
||||
|
||||
override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) {
|
||||
Timber.e(t, "$TAG: WebSocket 连接失败")
|
||||
// 尝试从错误响应中修正时钟偏移
|
||||
response?.header("Date")?.let { adjustClockSkew(it) }
|
||||
if (!resumed) {
|
||||
resumed = true
|
||||
continuation.resumeWith(Result.success(null))
|
||||
}
|
||||
}
|
||||
|
||||
override fun onClosed(webSocket: WebSocket, code: Int, reason: String) {
|
||||
if (!resumed) {
|
||||
resumed = true
|
||||
continuation.resumeWith(Result.success(audioBuffer.toByteArray()))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
continuation.invokeOnCancellation {
|
||||
ws.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** 构建 speech.config 消息 */
|
||||
private fun buildConfigMessage(): String {
|
||||
val timestamp = formatTimestamp()
|
||||
return "X-Timestamp:$timestamp\r\n" +
|
||||
"Content-Type:application/json; charset=utf-8\r\n" +
|
||||
"Path:speech.config\r\n" +
|
||||
"\r\n" +
|
||||
"""{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"false"},"outputFormat":"$OUTPUT_FORMAT"}}}}"""
|
||||
}
|
||||
|
||||
/** 构建 SSML 合成请求消息 */
|
||||
private fun buildSsmlMessage(requestId: String, text: String, voice: String): String {
|
||||
val timestamp = formatTimestamp()
|
||||
val escapedText = text
|
||||
.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace("\"", """)
|
||||
.replace("'", "'")
|
||||
|
||||
return "X-RequestId:$requestId\r\n" +
|
||||
"Content-Type:application/ssml+xml\r\n" +
|
||||
"X-Timestamp:${timestamp}Z\r\n" +
|
||||
"Path:ssml\r\n" +
|
||||
"\r\n" +
|
||||
"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='zh-CN'>" +
|
||||
"<voice name='$voice'>" +
|
||||
"<prosody pitch='+0Hz' rate='+0%' volume='+0%'>$escapedText</prosody>" +
|
||||
"</voice></speak>"
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析二进制帧,提取音频数据
|
||||
* 格式:[2字节头部长度][头部内容][音频数据]
|
||||
*/
|
||||
private fun parseBinaryFrame(data: ByteArray): ByteArray? {
|
||||
if (data.size < 2) return null
|
||||
val headerLength = ((data[0].toInt() and 0xFF) shl 8) or (data[1].toInt() and 0xFF)
|
||||
val audioStart = 2 + headerLength
|
||||
if (audioStart >= data.size) return null
|
||||
|
||||
// 验证是音频帧
|
||||
val headerStr = String(data, 2, headerLength, Charsets.US_ASCII)
|
||||
if (!headerStr.contains("Path:audio")) return null
|
||||
|
||||
return data.copyOfRange(audioStart, data.size)
|
||||
}
|
||||
|
||||
/** 生成 Sec-MS-GEC Token(基于时间的 SHA256 哈希) */
|
||||
private fun generateGecToken(): String {
|
||||
var ticks = (System.currentTimeMillis() / 1000.0) + clockSkewSeconds
|
||||
ticks += WINDOWS_EPOCH_OFFSET
|
||||
ticks -= ticks % 300 // 对齐到 5 分钟
|
||||
ticks *= 10_000_000 // 转换为 100 纳秒间隔
|
||||
|
||||
val strToHash = "${ticks.toLong()}$TRUSTED_TOKEN"
|
||||
val digest = MessageDigest.getInstance("SHA-256")
|
||||
.digest(strToHash.toByteArray(Charsets.US_ASCII))
|
||||
return digest.joinToString("") { "%02X".format(it) }
|
||||
}
|
||||
|
||||
/** 格式化时间戳 */
|
||||
private fun formatTimestamp(): String {
|
||||
val sdf = SimpleDateFormat("EEE MMM dd yyyy HH:mm:ss 'GMT+0000 (Coordinated Universal Time)'", Locale.US)
|
||||
sdf.timeZone = TimeZone.getTimeZone("UTC")
|
||||
return sdf.format(Date())
|
||||
}
|
||||
|
||||
/** 从服务器响应修正时钟偏移 */
|
||||
private fun adjustClockSkew(serverDateHeader: String) {
|
||||
try {
|
||||
val sdf = SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US)
|
||||
val serverTime = sdf.parse(serverDateHeader)?.time ?: return
|
||||
clockSkewSeconds = (serverTime - System.currentTimeMillis()) / 1000
|
||||
Timber.d("$TAG: 时钟偏移修正为 ${clockSkewSeconds}s")
|
||||
} catch (_: Exception) {
|
||||
}
|
||||
}
|
||||
|
||||
/** 播放音频文件 */
|
||||
private suspend fun playAudio(file: File) {
|
||||
withContext(Dispatchers.Main) {
|
||||
try {
|
||||
stop()
|
||||
mediaPlayer = MediaPlayer().apply {
|
||||
setDataSource(file.absolutePath)
|
||||
setOnCompletionListener {
|
||||
isPlaying = false
|
||||
onComplete?.invoke()
|
||||
Timber.d("$TAG: 播放完成")
|
||||
}
|
||||
setOnErrorListener { _, what, extra ->
|
||||
Timber.e("$TAG: 播放错误 what=$what extra=$extra")
|
||||
isPlaying = false
|
||||
true
|
||||
}
|
||||
prepare()
|
||||
start()
|
||||
isPlaying = true
|
||||
Timber.d("$TAG: 开始播放")
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Timber.e(e, "$TAG: 播放异常")
|
||||
isPlaying = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ===== 缓存管理 =====
|
||||
|
||||
/** 获取缓存文件路径(基于文本+语音的 MD5) */
|
||||
private fun getCacheFile(text: String, voice: String): File {
|
||||
val cacheDir = File(context.cacheDir, CACHE_DIR).also { it.mkdirs() }
|
||||
val key = MessageDigest.getInstance("MD5")
|
||||
.digest("$voice:$text".toByteArray())
|
||||
.joinToString("") { "%02x".format(it) }
|
||||
return File(cacheDir, "$key.mp3")
|
||||
}
|
||||
|
||||
/** 保存到缓存目录,超过上限时清理最旧的 */
|
||||
private fun saveToCacheDir(file: File, data: ByteArray) {
|
||||
file.writeBytes(data)
|
||||
// 清理超出上限的旧缓存
|
||||
val cacheDir = file.parentFile ?: return
|
||||
val files = cacheDir.listFiles()?.sortedBy { it.lastModified() } ?: return
|
||||
if (files.size > MAX_CACHE_SIZE) {
|
||||
files.take(files.size - MAX_CACHE_SIZE).forEach { it.delete() }
|
||||
}
|
||||
}
|
||||
|
||||
/** 清除所有缓存 */
|
||||
fun clearCache() {
|
||||
File(context.cacheDir, CACHE_DIR).deleteRecursively()
|
||||
Timber.d("$TAG: 缓存已清除")
|
||||
}
|
||||
}
|
||||
@@ -28,7 +28,7 @@ import com.xiaoqu.watch.ui.punch.PunchResult
|
||||
import com.xiaoqu.watch.ui.punch.PunchViewModel
|
||||
import com.xiaoqu.watch.ui.widget.StatusBarView
|
||||
import com.xiaoqu.watch.util.DateUtil
|
||||
import android.speech.tts.TextToSpeech
|
||||
import com.xiaoqu.watch.service.manager.EdgeTtsManager
|
||||
import dagger.hilt.android.AndroidEntryPoint
|
||||
import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.isActive
|
||||
@@ -52,6 +52,7 @@ class HomeFragment : BaseFragment<FragmentHomeBinding>() {
|
||||
@Inject lateinit var bluetoothScanManager: com.xiaoqu.watch.service.manager.BluetoothScanManager
|
||||
@Inject lateinit var notificationManager: com.xiaoqu.watch.service.manager.NotificationManager
|
||||
@Inject lateinit var vibrationConfigManager: com.xiaoqu.watch.device.sensor.VibrationConfigManager
|
||||
@Inject lateinit var edgeTtsManager: EdgeTtsManager
|
||||
|
||||
/** 考勤打卡 ViewModel */
|
||||
private val punchViewModel: PunchViewModel by viewModels()
|
||||
@@ -85,7 +86,6 @@ class HomeFragment : BaseFragment<FragmentHomeBinding>() {
|
||||
private var lastTapTime = 0L
|
||||
|
||||
// ===== TTS 语音测试 =====
|
||||
private var tts: TextToSpeech? = null
|
||||
|
||||
override fun createBinding(inflater: LayoutInflater, container: ViewGroup?): FragmentHomeBinding {
|
||||
return FragmentHomeBinding.inflate(inflater, container, false)
|
||||
@@ -183,9 +183,8 @@ class HomeFragment : BaseFragment<FragmentHomeBinding>() {
|
||||
it.onBackKeyPressed = null
|
||||
it.notificationBanner.onClick = null
|
||||
}
|
||||
// 释放 TTS 资源
|
||||
tts?.shutdown()
|
||||
tts = null
|
||||
// 停止 TTS 播放
|
||||
edgeTtsManager.stop()
|
||||
}
|
||||
|
||||
// ===== 打卡面板 =====
|
||||
@@ -561,43 +560,18 @@ class HomeFragment : BaseFragment<FragmentHomeBinding>() {
|
||||
}
|
||||
|
||||
/**
|
||||
* TTS 语音测试:验证设备是否支持中文语音合成
|
||||
* 测试内容:初始化 TTS → 设置中文 → 播放测试语音
|
||||
* Edge TTS 语音测试:通过微软 Edge TTS 合成中文语音
|
||||
* 测试内容:调用 Edge TTS API → 接收 MP3 → 播放
|
||||
* 结果通过 Logcat 和 Toast 反馈
|
||||
*/
|
||||
private fun testTts() {
|
||||
tts?.shutdown()
|
||||
tts = TextToSpeech(requireContext()) { status ->
|
||||
if (status == TextToSpeech.SUCCESS) {
|
||||
val result = tts?.setLanguage(java.util.Locale.CHINESE)
|
||||
when {
|
||||
result == TextToSpeech.LANG_MISSING_DATA -> {
|
||||
Timber.w("TTS: 中文语音包缺失")
|
||||
activity?.runOnUiThread {
|
||||
Toast.makeText(requireContext(), "TTS: 中文语音包缺失", Toast.LENGTH_LONG).show()
|
||||
}
|
||||
}
|
||||
result == TextToSpeech.LANG_NOT_SUPPORTED -> {
|
||||
Timber.w("TTS: 不支持中文")
|
||||
activity?.runOnUiThread {
|
||||
Toast.makeText(requireContext(), "TTS: 不支持中文", Toast.LENGTH_LONG).show()
|
||||
}
|
||||
}
|
||||
else -> {
|
||||
Timber.d("TTS: 中文语音可用,开始播放测试")
|
||||
activity?.runOnUiThread {
|
||||
Toast.makeText(requireContext(), "TTS 测试播放中...", Toast.LENGTH_SHORT).show()
|
||||
}
|
||||
tts?.speak("您有3条新任务待处理", TextToSpeech.QUEUE_FLUSH, null, "tts_test")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Timber.e("TTS: 初始化失败, status=$status")
|
||||
activity?.runOnUiThread {
|
||||
Toast.makeText(requireContext(), "TTS: 初始化失败", Toast.LENGTH_LONG).show()
|
||||
}
|
||||
Toast.makeText(requireContext(), "Edge TTS 测试中...", Toast.LENGTH_SHORT).show()
|
||||
edgeTtsManager.speak(
|
||||
text = "您有3条新任务待处理,请及时查看",
|
||||
onError = { msg ->
|
||||
Toast.makeText(requireContext(), "TTS 失败: $msg", Toast.LENGTH_LONG).show()
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// ===== 事件监听 =====
|
||||
|
||||
Reference in New Issue
Block a user