JChatGPT/src/main/kotlin/tools/VisualAgent.kt
2025-07-11 16:50:59 +08:00

76 lines
2.7 KiB
Kotlin
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package top.jie65535.mirai.tools
import com.aallam.openai.api.chat.ChatCompletionRequest
import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.ImagePart
import com.aallam.openai.api.chat.TextPart
import com.aallam.openai.api.chat.Tool
import com.aallam.openai.api.core.Parameters
import com.aallam.openai.api.model.ModelId
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.add
import kotlinx.serialization.json.jsonPrimitive
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonArray
import kotlinx.serialization.json.putJsonObject
import top.jie65535.mirai.LargeLanguageModels
import top.jie65535.mirai.PluginConfig
class VisualAgent : BaseAgent(
tool = Tool.function(
name = "visualAgent",
description = "可通过调用视觉模型识别图片。",
parameters = Parameters.buildJsonObject {
put("type", "object")
putJsonObject("properties") {
putJsonObject("image_url") {
put("type", "string")
put("description", "图片地址")
}
putJsonObject("prompt") {
put("type", "string")
put("description", "用于调用视觉模型的提示词")
}
}
putJsonArray("required") {
add("image_url")
add("prompt")
}
}
)
) {
override val loadingMessage: String
get() = "图片识别中..."
override val isEnabled: Boolean
get() = LargeLanguageModels.visual != null
override suspend fun execute(args: JsonObject?): String {
requireNotNull(args)
val llm = LargeLanguageModels.visual ?: return "未配置llm无法进行识别。"
val imageUrl = args.getValue("image_url").jsonPrimitive.content
val prompt = args.getValue("prompt").jsonPrimitive.content
val answerContent = StringBuilder()
llm.chatCompletions(ChatCompletionRequest(
model = ModelId(PluginConfig.visualModel),
messages = listOf(
ChatMessage.System("You are a helpful assistant."),
ChatMessage.User(
content = listOf(
ImagePart(imageUrl),
TextPart(prompt)
)
)
)
)).collect {
if (it.choices.isNotEmpty()) {
val delta = it.choices[0].delta ?: return@collect
if (!delta.content.isNullOrEmpty()) {
answerContent.append(delta.content)
}
}
}
return answerContent.toString().ifEmpty { "识图异常,结果为空" }
}
}