From 5a386a6060b3b1096b71e8930f6050ad67106d4f Mon Sep 17 00:00:00 2001 From: dongliang Date: Fri, 8 May 2026 12:33:59 +0930 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=8E=A5=E5=85=A5=20Edge=20TTS=20?= =?UTF-8?q?=E5=85=8D=E8=B4=B9=E4=B8=AD=E6=96=87=E8=AF=AD=E9=9F=B3=E5=90=88?= =?UTF-8?q?=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 设备不支持内置中文 TTS,改用微软 Edge TTS(WebSocket 协议)。 - 新增 EdgeTtsManager:WebSocket 调用、MP3 缓存、MediaPlayer 播放 - 调试模式(设置页点头像6次)触发 TTS 测试 - 缓存机制:相同文本不重复请求,上限50条自动清理 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../watch/service/manager/EdgeTtsManager.kt | 343 ++++++++++++++++++ .../com/xiaoqu/watch/ui/home/HomeFragment.kt | 50 +-- 2 files changed, 355 insertions(+), 38 deletions(-) create mode 100644 app/src/main/java/com/xiaoqu/watch/service/manager/EdgeTtsManager.kt diff --git a/app/src/main/java/com/xiaoqu/watch/service/manager/EdgeTtsManager.kt b/app/src/main/java/com/xiaoqu/watch/service/manager/EdgeTtsManager.kt new file mode 100644 index 0000000..32abec6 --- /dev/null +++ b/app/src/main/java/com/xiaoqu/watch/service/manager/EdgeTtsManager.kt @@ -0,0 +1,343 @@ +package com.xiaoqu.watch.service.manager + +import android.content.Context +import android.media.MediaPlayer +import dagger.hilt.android.qualifiers.ApplicationContext +import kotlinx.coroutines.* +import okhttp3.* +import okio.ByteString +import timber.log.Timber +import java.io.ByteArrayOutputStream +import java.io.File +import java.security.MessageDigest +import java.text.SimpleDateFormat +import java.util.* +import javax.inject.Inject +import javax.inject.Singleton + +/** + * Edge TTS 语音合成管理器 + * 使用微软 Edge 浏览器的免费 TTS 服务,支持高质量中文语音合成。 + * 通过 WebSocket 连接,发送文本,接收 MP3 音频并播放。 + */ +@Singleton +class EdgeTtsManager @Inject constructor( + @ApplicationContext private val context: Context, + private val okHttpClient: OkHttpClient +) { + + companion object { + private const val TAG = "EdgeTTS" + + /** WebSocket 基础地址 */ + private const val WSS_BASE = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1" + + /** 固定的信任客户端 Token */ + private const val TRUSTED_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4" + + /** GEC 版本号 */ + private const val GEC_VERSION = "1-143.0.3650.75" + + /** Windows 纪元偏移(Unix epoch → Windows file time epoch) */ + private const val WINDOWS_EPOCH_OFFSET = 11644473600L + + /** 默认中文女声(晓晓,微软神经网络语音,音质最好) */ + const val VOICE_XIAOXIAO = "zh-CN-XiaoxiaoNeural" + + /** 中文男声(云希) */ + const val VOICE_YUNXI = "zh-CN-YunxiNeural" + + /** 音频输出格式:24kHz 48kbps MP3,体积小质量够用 */ + private const val OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3" + + /** 缓存目录名 */ + private const val CACHE_DIR = "tts_cache" + + /** 缓存最大条数 */ + private const val MAX_CACHE_SIZE = 50 + } + + /** 当前 MediaPlayer */ + private var mediaPlayer: MediaPlayer? = null + + /** 是否正在播放 */ + var isPlaying: Boolean = false + private set + + /** 时钟偏移(秒),用于 GEC Token 生成 */ + private var clockSkewSeconds: Long = 0 + + /** 播放完成回调 */ + var onComplete: (() -> Unit)? = null + + /** + * 合成并播放语音 + * @param text 要朗读的文本 + * @param voice 语音类型,默认晓晓 + * @param onError 错误回调 + */ + fun speak(text: String, voice: String = VOICE_XIAOXIAO, onError: ((String) -> Unit)? = null) { + if (text.isBlank()) return + + // 停止当前播放 + stop() + + CoroutineScope(Dispatchers.IO).launch { + try { + // 检查缓存 + val cacheFile = getCacheFile(text, voice) + if (cacheFile.exists()) { + Timber.d("$TAG: 命中缓存 ${cacheFile.name}") + playAudio(cacheFile) + return@launch + } + + // 调用 Edge TTS API + val audioData = synthesize(text, voice) + if (audioData != null && audioData.isNotEmpty()) { + // 保存到缓存 + saveToCacheDir(cacheFile, audioData) + playAudio(cacheFile) + } else { + Timber.w("$TAG: 合成返回空数据") + withContext(Dispatchers.Main) { onError?.invoke("语音合成失败") } + } + } catch (e: Exception) { + Timber.e(e, "$TAG: 语音合成异常") + withContext(Dispatchers.Main) { onError?.invoke("语音合成异常: ${e.message}") } + } + } + } + + /** 停止播放 */ + fun stop() { + try { + mediaPlayer?.apply { + if (isPlaying) stop() + release() + } + } catch (_: Exception) { + } + mediaPlayer = null + isPlaying = false + } + + /** + * 通过 WebSocket 调用 Edge TTS 合成语音 + * @return MP3 音频字节数组,失败返回 null + */ + private suspend fun synthesize(text: String, voice: String): ByteArray? { + return suspendCancellableCoroutine { continuation -> + val connectionId = UUID.randomUUID().toString().replace("-", "") + val requestId = UUID.randomUUID().toString().replace("-", "") + val gecToken = generateGecToken() + + val url = "$WSS_BASE?TrustedClientToken=$TRUSTED_TOKEN" + + "&ConnectionId=$connectionId" + + "&Sec-MS-GEC=$gecToken" + + "&Sec-MS-GEC-Version=$GEC_VERSION" + + val request = Request.Builder() + .url(url) + .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0") + .header("Origin", "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold") + .build() + + val audioBuffer = ByteArrayOutputStream() + var resumed = false + + val ws = okHttpClient.newWebSocket(request, object : WebSocketListener() { + override fun onOpen(webSocket: WebSocket, response: Response) { + Timber.d("$TAG: WebSocket 已连接") + + // 1. 发送 speech.config + val configMsg = buildConfigMessage() + webSocket.send(configMsg) + + // 2. 发送 SSML 合成请求 + val ssmlMsg = buildSsmlMessage(requestId, text, voice) + webSocket.send(ssmlMsg) + } + + override fun onMessage(webSocket: WebSocket, text: String) { + // 文本帧:解析 Path + if (text.contains("Path:turn.end")) { + // 合成完成 + Timber.d("$TAG: 合成完成,音频大小 ${audioBuffer.size()} 字节") + webSocket.close(1000, "done") + if (!resumed) { + resumed = true + continuation.resumeWith(Result.success(audioBuffer.toByteArray())) + } + } + } + + override fun onMessage(webSocket: WebSocket, bytes: ByteString) { + // 二进制帧:提取音频数据 + val data = bytes.toByteArray() + val audioPayload = parseBinaryFrame(data) + if (audioPayload != null) { + audioBuffer.write(audioPayload) + } + } + + override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) { + Timber.e(t, "$TAG: WebSocket 连接失败") + // 尝试从错误响应中修正时钟偏移 + response?.header("Date")?.let { adjustClockSkew(it) } + if (!resumed) { + resumed = true + continuation.resumeWith(Result.success(null)) + } + } + + override fun onClosed(webSocket: WebSocket, code: Int, reason: String) { + if (!resumed) { + resumed = true + continuation.resumeWith(Result.success(audioBuffer.toByteArray())) + } + } + }) + + continuation.invokeOnCancellation { + ws.cancel() + } + } + } + + /** 构建 speech.config 消息 */ + private fun buildConfigMessage(): String { + val timestamp = formatTimestamp() + return "X-Timestamp:$timestamp\r\n" + + "Content-Type:application/json; charset=utf-8\r\n" + + "Path:speech.config\r\n" + + "\r\n" + + """{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"false"},"outputFormat":"$OUTPUT_FORMAT"}}}}""" + } + + /** 构建 SSML 合成请求消息 */ + private fun buildSsmlMessage(requestId: String, text: String, voice: String): String { + val timestamp = formatTimestamp() + val escapedText = text + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'") + + return "X-RequestId:$requestId\r\n" + + "Content-Type:application/ssml+xml\r\n" + + "X-Timestamp:${timestamp}Z\r\n" + + "Path:ssml\r\n" + + "\r\n" + + "" + + "" + + "$escapedText" + + "" + } + + /** + * 解析二进制帧,提取音频数据 + * 格式:[2字节头部长度][头部内容][音频数据] + */ + private fun parseBinaryFrame(data: ByteArray): ByteArray? { + if (data.size < 2) return null + val headerLength = ((data[0].toInt() and 0xFF) shl 8) or (data[1].toInt() and 0xFF) + val audioStart = 2 + headerLength + if (audioStart >= data.size) return null + + // 验证是音频帧 + val headerStr = String(data, 2, headerLength, Charsets.US_ASCII) + if (!headerStr.contains("Path:audio")) return null + + return data.copyOfRange(audioStart, data.size) + } + + /** 生成 Sec-MS-GEC Token(基于时间的 SHA256 哈希) */ + private fun generateGecToken(): String { + var ticks = (System.currentTimeMillis() / 1000.0) + clockSkewSeconds + ticks += WINDOWS_EPOCH_OFFSET + ticks -= ticks % 300 // 对齐到 5 分钟 + ticks *= 10_000_000 // 转换为 100 纳秒间隔 + + val strToHash = "${ticks.toLong()}$TRUSTED_TOKEN" + val digest = MessageDigest.getInstance("SHA-256") + .digest(strToHash.toByteArray(Charsets.US_ASCII)) + return digest.joinToString("") { "%02X".format(it) } + } + + /** 格式化时间戳 */ + private fun formatTimestamp(): String { + val sdf = SimpleDateFormat("EEE MMM dd yyyy HH:mm:ss 'GMT+0000 (Coordinated Universal Time)'", Locale.US) + sdf.timeZone = TimeZone.getTimeZone("UTC") + return sdf.format(Date()) + } + + /** 从服务器响应修正时钟偏移 */ + private fun adjustClockSkew(serverDateHeader: String) { + try { + val sdf = SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US) + val serverTime = sdf.parse(serverDateHeader)?.time ?: return + clockSkewSeconds = (serverTime - System.currentTimeMillis()) / 1000 + Timber.d("$TAG: 时钟偏移修正为 ${clockSkewSeconds}s") + } catch (_: Exception) { + } + } + + /** 播放音频文件 */ + private suspend fun playAudio(file: File) { + withContext(Dispatchers.Main) { + try { + stop() + mediaPlayer = MediaPlayer().apply { + setDataSource(file.absolutePath) + setOnCompletionListener { + isPlaying = false + onComplete?.invoke() + Timber.d("$TAG: 播放完成") + } + setOnErrorListener { _, what, extra -> + Timber.e("$TAG: 播放错误 what=$what extra=$extra") + isPlaying = false + true + } + prepare() + start() + isPlaying = true + Timber.d("$TAG: 开始播放") + } + } catch (e: Exception) { + Timber.e(e, "$TAG: 播放异常") + isPlaying = false + } + } + } + + // ===== 缓存管理 ===== + + /** 获取缓存文件路径(基于文本+语音的 MD5) */ + private fun getCacheFile(text: String, voice: String): File { + val cacheDir = File(context.cacheDir, CACHE_DIR).also { it.mkdirs() } + val key = MessageDigest.getInstance("MD5") + .digest("$voice:$text".toByteArray()) + .joinToString("") { "%02x".format(it) } + return File(cacheDir, "$key.mp3") + } + + /** 保存到缓存目录,超过上限时清理最旧的 */ + private fun saveToCacheDir(file: File, data: ByteArray) { + file.writeBytes(data) + // 清理超出上限的旧缓存 + val cacheDir = file.parentFile ?: return + val files = cacheDir.listFiles()?.sortedBy { it.lastModified() } ?: return + if (files.size > MAX_CACHE_SIZE) { + files.take(files.size - MAX_CACHE_SIZE).forEach { it.delete() } + } + } + + /** 清除所有缓存 */ + fun clearCache() { + File(context.cacheDir, CACHE_DIR).deleteRecursively() + Timber.d("$TAG: 缓存已清除") + } +} diff --git a/app/src/main/java/com/xiaoqu/watch/ui/home/HomeFragment.kt b/app/src/main/java/com/xiaoqu/watch/ui/home/HomeFragment.kt index e01a805..2fb2e4d 100644 --- a/app/src/main/java/com/xiaoqu/watch/ui/home/HomeFragment.kt +++ b/app/src/main/java/com/xiaoqu/watch/ui/home/HomeFragment.kt @@ -28,7 +28,7 @@ import com.xiaoqu.watch.ui.punch.PunchResult import com.xiaoqu.watch.ui.punch.PunchViewModel import com.xiaoqu.watch.ui.widget.StatusBarView import com.xiaoqu.watch.util.DateUtil -import android.speech.tts.TextToSpeech +import com.xiaoqu.watch.service.manager.EdgeTtsManager import dagger.hilt.android.AndroidEntryPoint import kotlinx.coroutines.delay import kotlinx.coroutines.isActive @@ -52,6 +52,7 @@ class HomeFragment : BaseFragment() { @Inject lateinit var bluetoothScanManager: com.xiaoqu.watch.service.manager.BluetoothScanManager @Inject lateinit var notificationManager: com.xiaoqu.watch.service.manager.NotificationManager @Inject lateinit var vibrationConfigManager: com.xiaoqu.watch.device.sensor.VibrationConfigManager + @Inject lateinit var edgeTtsManager: EdgeTtsManager /** 考勤打卡 ViewModel */ private val punchViewModel: PunchViewModel by viewModels() @@ -85,7 +86,6 @@ class HomeFragment : BaseFragment() { private var lastTapTime = 0L // ===== TTS 语音测试 ===== - private var tts: TextToSpeech? = null override fun createBinding(inflater: LayoutInflater, container: ViewGroup?): FragmentHomeBinding { return FragmentHomeBinding.inflate(inflater, container, false) @@ -183,9 +183,8 @@ class HomeFragment : BaseFragment() { it.onBackKeyPressed = null it.notificationBanner.onClick = null } - // 释放 TTS 资源 - tts?.shutdown() - tts = null + // 停止 TTS 播放 + edgeTtsManager.stop() } // ===== 打卡面板 ===== @@ -561,43 +560,18 @@ class HomeFragment : BaseFragment() { } /** - * TTS 语音测试:验证设备是否支持中文语音合成 - * 测试内容:初始化 TTS → 设置中文 → 播放测试语音 + * Edge TTS 语音测试:通过微软 Edge TTS 合成中文语音 + * 测试内容:调用 Edge TTS API → 接收 MP3 → 播放 * 结果通过 Logcat 和 Toast 反馈 */ private fun testTts() { - tts?.shutdown() - tts = TextToSpeech(requireContext()) { status -> - if (status == TextToSpeech.SUCCESS) { - val result = tts?.setLanguage(java.util.Locale.CHINESE) - when { - result == TextToSpeech.LANG_MISSING_DATA -> { - Timber.w("TTS: 中文语音包缺失") - activity?.runOnUiThread { - Toast.makeText(requireContext(), "TTS: 中文语音包缺失", Toast.LENGTH_LONG).show() - } - } - result == TextToSpeech.LANG_NOT_SUPPORTED -> { - Timber.w("TTS: 不支持中文") - activity?.runOnUiThread { - Toast.makeText(requireContext(), "TTS: 不支持中文", Toast.LENGTH_LONG).show() - } - } - else -> { - Timber.d("TTS: 中文语音可用,开始播放测试") - activity?.runOnUiThread { - Toast.makeText(requireContext(), "TTS 测试播放中...", Toast.LENGTH_SHORT).show() - } - tts?.speak("您有3条新任务待处理", TextToSpeech.QUEUE_FLUSH, null, "tts_test") - } - } - } else { - Timber.e("TTS: 初始化失败, status=$status") - activity?.runOnUiThread { - Toast.makeText(requireContext(), "TTS: 初始化失败", Toast.LENGTH_LONG).show() - } + Toast.makeText(requireContext(), "Edge TTS 测试中...", Toast.LENGTH_SHORT).show() + edgeTtsManager.speak( + text = "您有3条新任务待处理,请及时查看", + onError = { msg -> + Toast.makeText(requireContext(), "TTS 失败: $msg", Toast.LENGTH_LONG).show() } - } + ) } // ===== 事件监听 =====