Add Visual Agent

This commit is contained in:
2025-07-11 16:50:59 +08:00
parent 89794b587e
commit 3c4373e1ff
6 changed files with 202 additions and 72 deletions

View File

@@ -5,8 +5,8 @@ import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.Tool
import com.aallam.openai.api.core.Parameters
import com.aallam.openai.api.model.ModelId
import com.aallam.openai.client.Chat
import kotlinx.serialization.json.*
import top.jie65535.mirai.LargeLanguageModels
import top.jie65535.mirai.PluginConfig
class ReasoningAgent : BaseAgent(
@@ -18,26 +18,24 @@ class ReasoningAgent : BaseAgent(
putJsonObject("properties") {
putJsonObject("prompt") {
put("type", "string")
put("description", "用于调用推理模型的提示")
put("description", "用于调用推理模型的提示")
}
}
putJsonArray("required") {
add("question")
add("prompt")
}
},
)
) {
var llm: Chat? = null
override val loadingMessage: String
get() = "深度思考中..."
override val isEnabled: Boolean
get() = llm != null
get() = LargeLanguageModels.reasoning != null
override suspend fun execute(args: JsonObject?): String {
requireNotNull(args)
val llm = llm ?: return "未配置llm无法进行推理。"
val llm = LargeLanguageModels.reasoning ?: return "未配置llm无法进行推理。"
val prompt = args.getValue("prompt").jsonPrimitive.content
val answerContent = StringBuilder()

View File

@@ -0,0 +1,76 @@
package top.jie65535.mirai.tools
import com.aallam.openai.api.chat.ChatCompletionRequest
import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.ImagePart
import com.aallam.openai.api.chat.TextPart
import com.aallam.openai.api.chat.Tool
import com.aallam.openai.api.core.Parameters
import com.aallam.openai.api.model.ModelId
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.add
import kotlinx.serialization.json.jsonPrimitive
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonArray
import kotlinx.serialization.json.putJsonObject
import top.jie65535.mirai.LargeLanguageModels
import top.jie65535.mirai.PluginConfig
class VisualAgent : BaseAgent(
tool = Tool.function(
name = "visualAgent",
description = "可通过调用视觉模型识别图片。",
parameters = Parameters.buildJsonObject {
put("type", "object")
putJsonObject("properties") {
putJsonObject("image_url") {
put("type", "string")
put("description", "图片地址")
}
putJsonObject("prompt") {
put("type", "string")
put("description", "用于调用视觉模型的提示词")
}
}
putJsonArray("required") {
add("image_url")
add("prompt")
}
}
)
) {
override val loadingMessage: String
get() = "图片识别中..."
override val isEnabled: Boolean
get() = LargeLanguageModels.visual != null
override suspend fun execute(args: JsonObject?): String {
requireNotNull(args)
val llm = LargeLanguageModels.visual ?: return "未配置llm无法进行识别。"
val imageUrl = args.getValue("image_url").jsonPrimitive.content
val prompt = args.getValue("prompt").jsonPrimitive.content
val answerContent = StringBuilder()
llm.chatCompletions(ChatCompletionRequest(
model = ModelId(PluginConfig.visualModel),
messages = listOf(
ChatMessage.System("You are a helpful assistant."),
ChatMessage.User(
content = listOf(
ImagePart(imageUrl),
TextPart(prompt)
)
)
)
)).collect {
if (it.choices.isNotEmpty()) {
val delta = it.choices[0].delta ?: return@collect
if (!delta.content.isNullOrEmpty()) {
answerContent.append(delta.content)
}
}
}
return answerContent.toString().ifEmpty { "识图异常,结果为空" }
}
}