diff --git a/app/src/main/java/com/xiaoqu/watch/service/manager/EdgeTtsManager.kt b/app/src/main/java/com/xiaoqu/watch/service/manager/EdgeTtsManager.kt
new file mode 100644
index 0000000..32abec6
--- /dev/null
+++ b/app/src/main/java/com/xiaoqu/watch/service/manager/EdgeTtsManager.kt
@@ -0,0 +1,343 @@
+package com.xiaoqu.watch.service.manager
+
+import android.content.Context
+import android.media.MediaPlayer
+import dagger.hilt.android.qualifiers.ApplicationContext
+import kotlinx.coroutines.*
+import okhttp3.*
+import okio.ByteString
+import timber.log.Timber
+import java.io.ByteArrayOutputStream
+import java.io.File
+import java.security.MessageDigest
+import java.text.SimpleDateFormat
+import java.util.*
+import javax.inject.Inject
+import javax.inject.Singleton
+
+/**
+ * Edge TTS 语音合成管理器
+ * 使用微软 Edge 浏览器的免费 TTS 服务,支持高质量中文语音合成。
+ * 通过 WebSocket 连接,发送文本,接收 MP3 音频并播放。
+ */
+@Singleton
+class EdgeTtsManager @Inject constructor(
+ @ApplicationContext private val context: Context,
+ private val okHttpClient: OkHttpClient
+) {
+
+ companion object {
+ private const val TAG = "EdgeTTS"
+
+ /** WebSocket 基础地址 */
+ private const val WSS_BASE = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1"
+
+ /** 固定的信任客户端 Token */
+ private const val TRUSTED_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"
+
+ /** GEC 版本号 */
+ private const val GEC_VERSION = "1-143.0.3650.75"
+
+ /** Windows 纪元偏移(Unix epoch → Windows file time epoch) */
+ private const val WINDOWS_EPOCH_OFFSET = 11644473600L
+
+ /** 默认中文女声(晓晓,微软神经网络语音,音质最好) */
+ const val VOICE_XIAOXIAO = "zh-CN-XiaoxiaoNeural"
+
+ /** 中文男声(云希) */
+ const val VOICE_YUNXI = "zh-CN-YunxiNeural"
+
+ /** 音频输出格式:24kHz 48kbps MP3,体积小质量够用 */
+ private const val OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3"
+
+ /** 缓存目录名 */
+ private const val CACHE_DIR = "tts_cache"
+
+ /** 缓存最大条数 */
+ private const val MAX_CACHE_SIZE = 50
+ }
+
+ /** 当前 MediaPlayer */
+ private var mediaPlayer: MediaPlayer? = null
+
+ /** 是否正在播放 */
+ var isPlaying: Boolean = false
+ private set
+
+ /** 时钟偏移(秒),用于 GEC Token 生成 */
+ private var clockSkewSeconds: Long = 0
+
+ /** 播放完成回调 */
+ var onComplete: (() -> Unit)? = null
+
+ /**
+ * 合成并播放语音
+ * @param text 要朗读的文本
+ * @param voice 语音类型,默认晓晓
+ * @param onError 错误回调
+ */
+ fun speak(text: String, voice: String = VOICE_XIAOXIAO, onError: ((String) -> Unit)? = null) {
+ if (text.isBlank()) return
+
+ // 停止当前播放
+ stop()
+
+ CoroutineScope(Dispatchers.IO).launch {
+ try {
+ // 检查缓存
+ val cacheFile = getCacheFile(text, voice)
+ if (cacheFile.exists()) {
+ Timber.d("$TAG: 命中缓存 ${cacheFile.name}")
+ playAudio(cacheFile)
+ return@launch
+ }
+
+ // 调用 Edge TTS API
+ val audioData = synthesize(text, voice)
+ if (audioData != null && audioData.isNotEmpty()) {
+ // 保存到缓存
+ saveToCacheDir(cacheFile, audioData)
+ playAudio(cacheFile)
+ } else {
+ Timber.w("$TAG: 合成返回空数据")
+ withContext(Dispatchers.Main) { onError?.invoke("语音合成失败") }
+ }
+ } catch (e: Exception) {
+ Timber.e(e, "$TAG: 语音合成异常")
+ withContext(Dispatchers.Main) { onError?.invoke("语音合成异常: ${e.message}") }
+ }
+ }
+ }
+
+ /** 停止播放 */
+ fun stop() {
+ try {
+ mediaPlayer?.apply {
+ if (isPlaying) stop()
+ release()
+ }
+ } catch (_: Exception) {
+ }
+ mediaPlayer = null
+ isPlaying = false
+ }
+
+ /**
+ * 通过 WebSocket 调用 Edge TTS 合成语音
+ * @return MP3 音频字节数组,失败返回 null
+ */
+ private suspend fun synthesize(text: String, voice: String): ByteArray? {
+ return suspendCancellableCoroutine { continuation ->
+ val connectionId = UUID.randomUUID().toString().replace("-", "")
+ val requestId = UUID.randomUUID().toString().replace("-", "")
+ val gecToken = generateGecToken()
+
+ val url = "$WSS_BASE?TrustedClientToken=$TRUSTED_TOKEN" +
+ "&ConnectionId=$connectionId" +
+ "&Sec-MS-GEC=$gecToken" +
+ "&Sec-MS-GEC-Version=$GEC_VERSION"
+
+ val request = Request.Builder()
+ .url(url)
+ .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0")
+ .header("Origin", "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold")
+ .build()
+
+ val audioBuffer = ByteArrayOutputStream()
+ var resumed = false
+
+ val ws = okHttpClient.newWebSocket(request, object : WebSocketListener() {
+ override fun onOpen(webSocket: WebSocket, response: Response) {
+ Timber.d("$TAG: WebSocket 已连接")
+
+ // 1. 发送 speech.config
+ val configMsg = buildConfigMessage()
+ webSocket.send(configMsg)
+
+ // 2. 发送 SSML 合成请求
+ val ssmlMsg = buildSsmlMessage(requestId, text, voice)
+ webSocket.send(ssmlMsg)
+ }
+
+ override fun onMessage(webSocket: WebSocket, text: String) {
+ // 文本帧:解析 Path
+ if (text.contains("Path:turn.end")) {
+ // 合成完成
+ Timber.d("$TAG: 合成完成,音频大小 ${audioBuffer.size()} 字节")
+ webSocket.close(1000, "done")
+ if (!resumed) {
+ resumed = true
+ continuation.resumeWith(Result.success(audioBuffer.toByteArray()))
+ }
+ }
+ }
+
+ override fun onMessage(webSocket: WebSocket, bytes: ByteString) {
+ // 二进制帧:提取音频数据
+ val data = bytes.toByteArray()
+ val audioPayload = parseBinaryFrame(data)
+ if (audioPayload != null) {
+ audioBuffer.write(audioPayload)
+ }
+ }
+
+ override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) {
+ Timber.e(t, "$TAG: WebSocket 连接失败")
+ // 尝试从错误响应中修正时钟偏移
+ response?.header("Date")?.let { adjustClockSkew(it) }
+ if (!resumed) {
+ resumed = true
+ continuation.resumeWith(Result.success(null))
+ }
+ }
+
+ override fun onClosed(webSocket: WebSocket, code: Int, reason: String) {
+ if (!resumed) {
+ resumed = true
+ continuation.resumeWith(Result.success(audioBuffer.toByteArray()))
+ }
+ }
+ })
+
+ continuation.invokeOnCancellation {
+ ws.cancel()
+ }
+ }
+ }
+
+ /** 构建 speech.config 消息 */
+ private fun buildConfigMessage(): String {
+ val timestamp = formatTimestamp()
+ return "X-Timestamp:$timestamp\r\n" +
+ "Content-Type:application/json; charset=utf-8\r\n" +
+ "Path:speech.config\r\n" +
+ "\r\n" +
+ """{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"false"},"outputFormat":"$OUTPUT_FORMAT"}}}}"""
+ }
+
+ /** 构建 SSML 合成请求消息 */
+ private fun buildSsmlMessage(requestId: String, text: String, voice: String): String {
+ val timestamp = formatTimestamp()
+ val escapedText = text
+ .replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace("\"", """)
+ .replace("'", "'")
+
+ return "X-RequestId:$requestId\r\n" +
+ "Content-Type:application/ssml+xml\r\n" +
+ "X-Timestamp:${timestamp}Z\r\n" +
+ "Path:ssml\r\n" +
+ "\r\n" +
+ "" +
+ "" +
+ "$escapedText" +
+ ""
+ }
+
+ /**
+ * 解析二进制帧,提取音频数据
+ * 格式:[2字节头部长度][头部内容][音频数据]
+ */
+ private fun parseBinaryFrame(data: ByteArray): ByteArray? {
+ if (data.size < 2) return null
+ val headerLength = ((data[0].toInt() and 0xFF) shl 8) or (data[1].toInt() and 0xFF)
+ val audioStart = 2 + headerLength
+ if (audioStart >= data.size) return null
+
+ // 验证是音频帧
+ val headerStr = String(data, 2, headerLength, Charsets.US_ASCII)
+ if (!headerStr.contains("Path:audio")) return null
+
+ return data.copyOfRange(audioStart, data.size)
+ }
+
+ /** 生成 Sec-MS-GEC Token(基于时间的 SHA256 哈希) */
+ private fun generateGecToken(): String {
+ var ticks = (System.currentTimeMillis() / 1000.0) + clockSkewSeconds
+ ticks += WINDOWS_EPOCH_OFFSET
+ ticks -= ticks % 300 // 对齐到 5 分钟
+ ticks *= 10_000_000 // 转换为 100 纳秒间隔
+
+ val strToHash = "${ticks.toLong()}$TRUSTED_TOKEN"
+ val digest = MessageDigest.getInstance("SHA-256")
+ .digest(strToHash.toByteArray(Charsets.US_ASCII))
+ return digest.joinToString("") { "%02X".format(it) }
+ }
+
+ /** 格式化时间戳 */
+ private fun formatTimestamp(): String {
+ val sdf = SimpleDateFormat("EEE MMM dd yyyy HH:mm:ss 'GMT+0000 (Coordinated Universal Time)'", Locale.US)
+ sdf.timeZone = TimeZone.getTimeZone("UTC")
+ return sdf.format(Date())
+ }
+
+ /** 从服务器响应修正时钟偏移 */
+ private fun adjustClockSkew(serverDateHeader: String) {
+ try {
+ val sdf = SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US)
+ val serverTime = sdf.parse(serverDateHeader)?.time ?: return
+ clockSkewSeconds = (serverTime - System.currentTimeMillis()) / 1000
+ Timber.d("$TAG: 时钟偏移修正为 ${clockSkewSeconds}s")
+ } catch (_: Exception) {
+ }
+ }
+
+ /** 播放音频文件 */
+ private suspend fun playAudio(file: File) {
+ withContext(Dispatchers.Main) {
+ try {
+ stop()
+ mediaPlayer = MediaPlayer().apply {
+ setDataSource(file.absolutePath)
+ setOnCompletionListener {
+ isPlaying = false
+ onComplete?.invoke()
+ Timber.d("$TAG: 播放完成")
+ }
+ setOnErrorListener { _, what, extra ->
+ Timber.e("$TAG: 播放错误 what=$what extra=$extra")
+ isPlaying = false
+ true
+ }
+ prepare()
+ start()
+ isPlaying = true
+ Timber.d("$TAG: 开始播放")
+ }
+ } catch (e: Exception) {
+ Timber.e(e, "$TAG: 播放异常")
+ isPlaying = false
+ }
+ }
+ }
+
+ // ===== 缓存管理 =====
+
+ /** 获取缓存文件路径(基于文本+语音的 MD5) */
+ private fun getCacheFile(text: String, voice: String): File {
+ val cacheDir = File(context.cacheDir, CACHE_DIR).also { it.mkdirs() }
+ val key = MessageDigest.getInstance("MD5")
+ .digest("$voice:$text".toByteArray())
+ .joinToString("") { "%02x".format(it) }
+ return File(cacheDir, "$key.mp3")
+ }
+
+ /** 保存到缓存目录,超过上限时清理最旧的 */
+ private fun saveToCacheDir(file: File, data: ByteArray) {
+ file.writeBytes(data)
+ // 清理超出上限的旧缓存
+ val cacheDir = file.parentFile ?: return
+ val files = cacheDir.listFiles()?.sortedBy { it.lastModified() } ?: return
+ if (files.size > MAX_CACHE_SIZE) {
+ files.take(files.size - MAX_CACHE_SIZE).forEach { it.delete() }
+ }
+ }
+
+ /** 清除所有缓存 */
+ fun clearCache() {
+ File(context.cacheDir, CACHE_DIR).deleteRecursively()
+ Timber.d("$TAG: 缓存已清除")
+ }
+}
diff --git a/app/src/main/java/com/xiaoqu/watch/ui/home/HomeFragment.kt b/app/src/main/java/com/xiaoqu/watch/ui/home/HomeFragment.kt
index e01a805..2fb2e4d 100644
--- a/app/src/main/java/com/xiaoqu/watch/ui/home/HomeFragment.kt
+++ b/app/src/main/java/com/xiaoqu/watch/ui/home/HomeFragment.kt
@@ -28,7 +28,7 @@ import com.xiaoqu.watch.ui.punch.PunchResult
import com.xiaoqu.watch.ui.punch.PunchViewModel
import com.xiaoqu.watch.ui.widget.StatusBarView
import com.xiaoqu.watch.util.DateUtil
-import android.speech.tts.TextToSpeech
+import com.xiaoqu.watch.service.manager.EdgeTtsManager
import dagger.hilt.android.AndroidEntryPoint
import kotlinx.coroutines.delay
import kotlinx.coroutines.isActive
@@ -52,6 +52,7 @@ class HomeFragment : BaseFragment() {
@Inject lateinit var bluetoothScanManager: com.xiaoqu.watch.service.manager.BluetoothScanManager
@Inject lateinit var notificationManager: com.xiaoqu.watch.service.manager.NotificationManager
@Inject lateinit var vibrationConfigManager: com.xiaoqu.watch.device.sensor.VibrationConfigManager
+ @Inject lateinit var edgeTtsManager: EdgeTtsManager
/** 考勤打卡 ViewModel */
private val punchViewModel: PunchViewModel by viewModels()
@@ -85,7 +86,6 @@ class HomeFragment : BaseFragment() {
private var lastTapTime = 0L
// ===== TTS 语音测试 =====
- private var tts: TextToSpeech? = null
override fun createBinding(inflater: LayoutInflater, container: ViewGroup?): FragmentHomeBinding {
return FragmentHomeBinding.inflate(inflater, container, false)
@@ -183,9 +183,8 @@ class HomeFragment : BaseFragment() {
it.onBackKeyPressed = null
it.notificationBanner.onClick = null
}
- // 释放 TTS 资源
- tts?.shutdown()
- tts = null
+ // 停止 TTS 播放
+ edgeTtsManager.stop()
}
// ===== 打卡面板 =====
@@ -561,43 +560,18 @@ class HomeFragment : BaseFragment() {
}
/**
- * TTS 语音测试:验证设备是否支持中文语音合成
- * 测试内容:初始化 TTS → 设置中文 → 播放测试语音
+ * Edge TTS 语音测试:通过微软 Edge TTS 合成中文语音
+ * 测试内容:调用 Edge TTS API → 接收 MP3 → 播放
* 结果通过 Logcat 和 Toast 反馈
*/
private fun testTts() {
- tts?.shutdown()
- tts = TextToSpeech(requireContext()) { status ->
- if (status == TextToSpeech.SUCCESS) {
- val result = tts?.setLanguage(java.util.Locale.CHINESE)
- when {
- result == TextToSpeech.LANG_MISSING_DATA -> {
- Timber.w("TTS: 中文语音包缺失")
- activity?.runOnUiThread {
- Toast.makeText(requireContext(), "TTS: 中文语音包缺失", Toast.LENGTH_LONG).show()
- }
- }
- result == TextToSpeech.LANG_NOT_SUPPORTED -> {
- Timber.w("TTS: 不支持中文")
- activity?.runOnUiThread {
- Toast.makeText(requireContext(), "TTS: 不支持中文", Toast.LENGTH_LONG).show()
- }
- }
- else -> {
- Timber.d("TTS: 中文语音可用,开始播放测试")
- activity?.runOnUiThread {
- Toast.makeText(requireContext(), "TTS 测试播放中...", Toast.LENGTH_SHORT).show()
- }
- tts?.speak("您有3条新任务待处理", TextToSpeech.QUEUE_FLUSH, null, "tts_test")
- }
- }
- } else {
- Timber.e("TTS: 初始化失败, status=$status")
- activity?.runOnUiThread {
- Toast.makeText(requireContext(), "TTS: 初始化失败", Toast.LENGTH_LONG).show()
- }
+ Toast.makeText(requireContext(), "Edge TTS 测试中...", Toast.LENGTH_SHORT).show()
+ edgeTtsManager.speak(
+ text = "您有3条新任务待处理,请及时查看",
+ onError = { msg ->
+ Toast.makeText(requireContext(), "TTS 失败: $msg", Toast.LENGTH_LONG).show()
}
- }
+ )
}
// ===== 事件监听 =====