Add first-chunk timeout

This commit is contained in:
2026-05-22 14:03:42 +08:00
parent e629d37fa8
commit 72892336bc
3 changed files with 42 additions and 15 deletions

View File

@@ -44,6 +44,7 @@ object LargeLanguageModels {
fun reload() { fun reload() {
val timeout = PluginConfig.timeout.milliseconds val timeout = PluginConfig.timeout.milliseconds
val firstChunkTimeout = PluginConfig.firstChunkTimeout.milliseconds
// 初始化聊天模型 // 初始化聊天模型
if (PluginConfig.openAiApi.isNotBlank() && PluginConfig.openAiToken.isNotBlank()) { if (PluginConfig.openAiApi.isNotBlank() && PluginConfig.openAiToken.isNotBlank()) {
@@ -51,6 +52,7 @@ object LargeLanguageModels {
baseUrl = PluginConfig.openAiApi, baseUrl = PluginConfig.openAiApi,
token = PluginConfig.openAiToken, token = PluginConfig.openAiToken,
timeout = timeout, timeout = timeout,
firstChunkTimeout = firstChunkTimeout,
extraBody = parseExtraBody(PluginConfig.chatModelExtraBody) extraBody = parseExtraBody(PluginConfig.chatModelExtraBody)
) )
} }
@@ -61,6 +63,7 @@ object LargeLanguageModels {
baseUrl = PluginConfig.reasoningModelApi, baseUrl = PluginConfig.reasoningModelApi,
token = PluginConfig.reasoningModelToken, token = PluginConfig.reasoningModelToken,
timeout = timeout, timeout = timeout,
firstChunkTimeout = firstChunkTimeout,
extraBody = parseExtraBody(PluginConfig.reasoningModelExtraBody) extraBody = parseExtraBody(PluginConfig.reasoningModelExtraBody)
) )
} }
@@ -71,6 +74,7 @@ object LargeLanguageModels {
baseUrl = PluginConfig.visualModelApi, baseUrl = PluginConfig.visualModelApi,
token = PluginConfig.visualModelToken, token = PluginConfig.visualModelToken,
timeout = timeout, timeout = timeout,
firstChunkTimeout = firstChunkTimeout,
extraBody = parseExtraBody(PluginConfig.visualModelExtraBody) extraBody = parseExtraBody(PluginConfig.visualModelExtraBody)
) )
} }

View File

@@ -14,6 +14,7 @@ import kotlinx.coroutines.currentCoroutineContext
import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.isActive import kotlinx.coroutines.isActive
import kotlinx.coroutines.withTimeout
import kotlinx.serialization.json.* import kotlinx.serialization.json.*
import kotlin.time.Duration import kotlin.time.Duration
@@ -21,15 +22,16 @@ class ModelService(
val baseUrl: String, val baseUrl: String,
val token: String, val token: String,
val timeout: Duration, val timeout: Duration,
val firstChunkTimeout: Duration,
val extraBody: JsonObject? = null val extraBody: JsonObject? = null
) { ) {
val httpClient: HttpClient by lazy { val httpClient: HttpClient by lazy {
HttpClient(OkHttp) { HttpClient(OkHttp) {
install(HttpTimeout) { install(HttpTimeout) {
val millis = timeout.inWholeMilliseconds // 总请求/socket 超时保持长值,允许慢速流式输出;连接握手则用短超时。
requestTimeoutMillis = millis requestTimeoutMillis = timeout.inWholeMilliseconds
connectTimeoutMillis = millis socketTimeoutMillis = timeout.inWholeMilliseconds
socketTimeoutMillis = millis connectTimeoutMillis = firstChunkTimeout.inWholeMilliseconds
} }
defaultRequest { defaultRequest {
url(baseUrl) url(baseUrl)
@@ -66,17 +68,35 @@ class ModelService(
}.let { response -> }.let { response ->
val channel: ByteReadChannel = response.body() val channel: ByteReadChannel = response.body()
try { try {
while (currentCoroutineContext().isActive && !channel.isClosedForRead) { // 首块 data: 必须在 firstChunkTimeout 内到达,否则抛 TimeoutCancellationException
val line = channel.readUTF8Line() ?: continue // 走 JChatGPT 的重试流程;之后的流式读取不再有应用层超时,由 socketTimeoutMillis 兜底。
when { val firstDataLine: String? = withTimeout(firstChunkTimeout) {
line.startsWith("data: [DONE]") -> break var found: String? = null
line.startsWith("data: ") -> { while (currentCoroutineContext().isActive && !channel.isClosedForRead) {
val chunk = json.decodeFromString<ChatCompletionChunk>( val line = channel.readUTF8Line() ?: continue
line.removePrefix("data: ") if (line.startsWith("data: ")) {
) found = line
emit(chunk) break
}
// 心跳/空行/注释行,不计为首块,继续等
}
found
}
if (firstDataLine != null) {
if (!firstDataLine.startsWith("data: [DONE]")) {
emit(json.decodeFromString(firstDataLine.removePrefix("data: ")))
while (currentCoroutineContext().isActive && !channel.isClosedForRead) {
val line = channel.readUTF8Line() ?: continue
when {
line.startsWith("data: [DONE]") -> break
line.startsWith("data: ") -> {
emit(json.decodeFromString(line.removePrefix("data: ")))
}
else -> continue
}
} }
else -> continue
} }
} }
} finally { } finally {

View File

@@ -77,9 +77,12 @@ object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("群荣誉等级权限门槛,达到这个等级相当于自动拥有对话权限。") @ValueDescription("群荣誉等级权限门槛,达到这个等级相当于自动拥有对话权限。")
val temperaturePermission: Int by value(50) val temperaturePermission: Int by value(50)
@ValueDescription("等待响应超时时间单位毫秒默认60秒") @ValueDescription("等待响应超时时间整个请求的总超时与socket读超时单位毫秒默认60秒")
val timeout: Long by value(60000L) val timeout: Long by value(60000L)
@ValueDescription("首块响应超时时间单位毫秒默认10秒。若连接建立后在此时间内没收到首块data:则中断走重试")
val firstChunkTimeout: Long by value(10000L)
@Deprecated("使用外部文件而不是在配置文件内保存提示词") @Deprecated("使用外部文件而不是在配置文件内保存提示词")
@ValueDescription("系统提示词,该字段已弃用,使用提示词文件而不是在这里修改") @ValueDescription("系统提示词,该字段已弃用,使用提示词文件而不是在这里修改")
var prompt: String by value("你是一个乐于助人的助手") var prompt: String by value("你是一个乐于助人的助手")