Add Visual Agent

This commit is contained in:
2025-07-11 16:50:59 +08:00
parent 89794b587e
commit 3c4373e1ff
6 changed files with 202 additions and 72 deletions

View File

@ -4,14 +4,11 @@ import com.aallam.openai.api.chat.ChatCompletionRequest
import com.aallam.openai.api.chat.ChatMessage import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.ChatRole import com.aallam.openai.api.chat.ChatRole
import com.aallam.openai.api.chat.ToolCall import com.aallam.openai.api.chat.ToolCall
import com.aallam.openai.api.http.Timeout
import com.aallam.openai.api.model.ModelId import com.aallam.openai.api.model.ModelId
import com.aallam.openai.client.Chat
import com.aallam.openai.client.OpenAI
import com.aallam.openai.client.OpenAIHost
import io.ktor.util.collections.* import io.ktor.util.collections.*
import kotlinx.coroutines.delay import kotlinx.coroutines.delay
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import net.mamoe.mirai.console.command.CommandManager.INSTANCE.register import net.mamoe.mirai.console.command.CommandManager.INSTANCE.register
import net.mamoe.mirai.console.command.CommandSender.Companion.toCommandSender import net.mamoe.mirai.console.command.CommandSender.Companion.toCommandSender
import net.mamoe.mirai.console.permission.PermissionId import net.mamoe.mirai.console.permission.PermissionId
@ -26,6 +23,7 @@ import net.mamoe.mirai.event.events.FriendMessageEvent
import net.mamoe.mirai.event.events.GroupMessageEvent import net.mamoe.mirai.event.events.GroupMessageEvent
import net.mamoe.mirai.event.events.MessageEvent import net.mamoe.mirai.event.events.MessageEvent
import net.mamoe.mirai.message.data.* import net.mamoe.mirai.message.data.*
import net.mamoe.mirai.message.data.Image.Key.queryUrl
import net.mamoe.mirai.message.data.MessageSource.Key.quote import net.mamoe.mirai.message.data.MessageSource.Key.quote
import net.mamoe.mirai.utils.ExternalResource.Companion.toExternalResource import net.mamoe.mirai.utils.ExternalResource.Companion.toExternalResource
import net.mamoe.mirai.utils.info import net.mamoe.mirai.utils.info
@ -38,7 +36,6 @@ import java.time.ZoneOffset
import java.time.format.DateTimeFormatter import java.time.format.DateTimeFormatter
import kotlin.collections.* import kotlin.collections.*
import kotlin.math.max import kotlin.math.max
import kotlin.time.Duration.Companion.milliseconds
import kotlin.time.Duration.Companion.seconds import kotlin.time.Duration.Companion.seconds
object JChatGPT : KotlinPlugin( object JChatGPT : KotlinPlugin(
@ -51,8 +48,6 @@ object JChatGPT : KotlinPlugin(
// dependsOn("xyz.cssxsh.mirai.plugin.mirai-hibernate-plugin", true) // dependsOn("xyz.cssxsh.mirai.plugin.mirai-hibernate-plugin", true)
} }
) { ) {
private var llm: Chat? = null
/** /**
* 是否包含历史对话 * 是否包含历史对话
*/ */
@ -68,9 +63,7 @@ object JChatGPT : KotlinPlugin(
PluginConfig.reload() PluginConfig.reload()
// 设置Token // 设置Token
if (PluginConfig.openAiToken.isNotEmpty()) { LargeLanguageModels.reload()
updateOpenAiToken(PluginConfig.openAiToken)
}
// 注册插件命令 // 注册插件命令
PluginCommands.register() PluginCommands.register()
@ -93,17 +86,6 @@ object JChatGPT : KotlinPlugin(
logger.info { "Plugin loaded" } logger.info { "Plugin loaded" }
} }
fun updateOpenAiToken(token: String) {
val timeout = PluginConfig.timeout.milliseconds
llm = OpenAI(
token,
host = OpenAIHost(baseUrl = PluginConfig.openAiApi),
timeout = Timeout(request = timeout, connect = timeout, socket = timeout),
// logging = LoggingConfig(LogLevel.All)
)
reasoningAgent.llm = llm
}
private val timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss") private val timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss")
.withZone(ZoneOffset.systemDefault()) .withZone(ZoneOffset.systemDefault())
private val dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy年MM月dd E HH:mm:ss") private val dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy年MM月dd E HH:mm:ss")
@ -112,7 +94,7 @@ object JChatGPT : KotlinPlugin(
private suspend fun onMessage(event: MessageEvent) { private suspend fun onMessage(event: MessageEvent) {
// 检查Token是否设置 // 检查Token是否设置
if (llm == null) return if (LargeLanguageModels.chat == null) return
// 发送者是否有权限 // 发送者是否有权限
if (!event.toCommandSender().hasPermission(chatPermission)) { if (!event.toCommandSender().hasPermission(chatPermission)) {
if (event is GroupMessageEvent) { if (event is GroupMessageEvent) {
@ -244,11 +226,7 @@ object JChatGPT : KotlinPlugin(
it.getDisplay(event.subject) it.getDisplay(event.subject)
} }
is ForwardMessage -> { else -> singleMessageToText(it)
it.title + "\n " + it.preview
}
else -> it.content
} }
}) })
} }
@ -285,18 +263,36 @@ object JChatGPT : KotlinPlugin(
val recordMessage = record.toMessageChain() val recordMessage = record.toMessageChain()
recordMessage[QuoteReply.Key]?.let { recordMessage[QuoteReply.Key]?.let {
historyText.append(" 引用\n > ") historyText.append(" 引用\n > ")
.appendLine(it.source.originalMessage.content.replace("\n", "\n > ")) .appendLine(it.source.originalMessage
.joinToString("", transform = ::singleMessageToText)
.replace("\n", "\n > "))
} }
// 消息内容 // 消息内容
historyText.append(" 说:").appendLine(record.toMessageChain().joinToString("") { historyText.append(" 说:").appendLine(
when (it) { record.toMessageChain().joinToString("", transform = ::singleMessageToText))
is ForwardMessage -> { }
it.title + "\n " + it.preview
}
else -> it.content private fun singleMessageToText(it: SingleMessage): String {
return when (it) {
is ForwardMessage -> {
it.title + "\n " + it.preview
}
// 图片格式化
is Image -> {
try {
val imageUrl = runBlocking {
it.queryUrl()
}
"![图片]($imageUrl)"
} catch (e: Throwable) {
logger.warning("图片地址获取失败", e)
it.content
} }
}) }
else -> it.content
}
} }
// endregion - 历史消息相关 - // endregion - 历史消息相关 -
@ -657,8 +653,6 @@ object JChatGPT : KotlinPlugin(
} }
} }
private val reasoningAgent = ReasoningAgent()
/** /**
* 工具列表 * 工具列表
*/ */
@ -682,7 +676,10 @@ object JChatGPT : KotlinPlugin(
RunCode(), RunCode(),
// 推理代理 // 推理代理
reasoningAgent, ReasoningAgent(),
// 视觉代理
VisualAgent(),
// 天气服务 // 天气服务
WeatherService(), WeatherService(),
@ -713,7 +710,7 @@ object JChatGPT : KotlinPlugin(
chatMessages: List<ChatMessage>, chatMessages: List<ChatMessage>,
hasTools: Boolean = true hasTools: Boolean = true
): ChatMessage { ): ChatMessage {
val llm = this.llm ?: throw NullPointerException("OpenAI Token 未设置,无法开始") val llm = LargeLanguageModels.chat ?: throw NullPointerException("OpenAI Token 未设置,无法开始")
val availableTools = if (hasTools) { val availableTools = if (hasTools) {
myTools.filter { it.isEnabled }.map { it.tool } myTools.filter { it.isEnabled }.map { it.tool }
} else null } else null
@ -733,17 +730,21 @@ object JChatGPT : KotlinPlugin(
val nameCard = StringBuilder() val nameCard = StringBuilder()
// 群活跃等级 // 群活跃等级
nameCard.append("【lv").append(member.active.temperature).append(" ") nameCard.append("【lv").append(member.active.temperature).append(" ")
// 群头衔 try {
if (member.specialTitle.isNotEmpty()) { // 群头衔
nameCard.append(member.specialTitle) if (member.specialTitle.isNotEmpty()) {
} else { nameCard.append(member.specialTitle)
nameCard.append( } else {
when (member.permission) { nameCard.append(
OWNER -> "群主" when (member.permission) {
ADMINISTRATOR -> "管理员" OWNER -> "群主"
MEMBER -> member.temperatureTitle ADMINISTRATOR -> "管理员"
} MEMBER -> member.temperatureTitle
) }
)
}
} catch (e: Throwable) {
logger.warning("获取群头衔失败", e)
} }
// 群名片 // 群名片
nameCard.append("").append(member.nameCardOrNick).append("(").append(member.id).append(")") nameCard.append("").append(member.nameCardOrNick).append("(").append(member.id).append(")")

View File

@ -0,0 +1,40 @@
package top.jie65535.mirai
import com.aallam.openai.api.http.Timeout
import com.aallam.openai.client.Chat
import com.aallam.openai.client.OpenAI
import com.aallam.openai.client.OpenAIHost
import kotlin.time.Duration.Companion.milliseconds
object LargeLanguageModels {
var chat: Chat? = null
var reasoning: Chat? = null
var visual: Chat? = null
fun reload() {
val timeout = PluginConfig.timeout.milliseconds
if (PluginConfig.openAiApi.isNotBlank() && PluginConfig.openAiToken.isNotBlank()) {
chat = OpenAI(
token = PluginConfig.openAiToken,
host = OpenAIHost(baseUrl = PluginConfig.openAiApi),
timeout = Timeout(request = timeout, connect = timeout, socket = timeout)
)
}
if (PluginConfig.reasoningModelApi.isNotBlank() && PluginConfig.reasoningModelToken.isNotBlank()) {
reasoning = OpenAI(
token = PluginConfig.reasoningModelToken,
host = OpenAIHost(baseUrl = PluginConfig.reasoningModelApi),
timeout = Timeout(request = timeout, connect = timeout, socket = timeout)
)
}
if (PluginConfig.visualModelApi.isNotBlank() && PluginConfig.visualModelToken.isNotBlank()) {
visual = OpenAI(
token = PluginConfig.visualModelToken,
host = OpenAIHost(baseUrl = PluginConfig.visualModelApi),
timeout = Timeout(request = timeout, connect = timeout, socket = timeout)
)
}
}
}

View File

@ -18,7 +18,7 @@ object PluginCommands : CompositeCommand(
suspend fun CommandSender.setToken(token: String) { suspend fun CommandSender.setToken(token: String) {
PluginConfig.openAiToken = token PluginConfig.openAiToken = token
PluginConfig.save() PluginConfig.save()
JChatGPT.updateOpenAiToken(token) LargeLanguageModels.reload()
sendMessage("OK") sendMessage("OK")
} }
@ -45,7 +45,7 @@ object PluginCommands : CompositeCommand(
@SubCommand @SubCommand
suspend fun CommandSender.reload() { suspend fun CommandSender.reload() {
PluginConfig.reload() PluginConfig.reload()
JChatGPT.updateOpenAiToken(PluginConfig.openAiToken) LargeLanguageModels.reload()
sendMessage("OK") sendMessage("OK")
} }
} }

View File

@ -6,7 +6,7 @@ import net.mamoe.mirai.console.data.value
object PluginConfig : AutoSavePluginConfig("Config") { object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("OpenAI API base url") @ValueDescription("OpenAI API base url")
val openAiApi: String by value("https://api.openai.com/v1/") val openAiApi: String by value("https://dashscope.aliyuncs.com/compatible-mode/v1/")
@ValueDescription("OpenAI API Token") @ValueDescription("OpenAI API Token")
var openAiToken: String by value("") var openAiToken: String by value("")
@ -14,11 +14,32 @@ object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("Chat模型") @ValueDescription("Chat模型")
var chatModel: String by value("qwen-max") var chatModel: String by value("qwen-max")
@ValueDescription("推理模型API")
var reasoningModelApi: String by value("https://dashscope.aliyuncs.com/compatible-mode/v1/")
@ValueDescription("推理模型Token")
var reasoningModelToken: String by value("")
@ValueDescription("推理模型") @ValueDescription("推理模型")
var reasoningModel: String by value("qwq-plus") var reasoningModel: String by value("qwq-plus")
@ValueDescription("Chat默认提示") @ValueDescription("视觉模型API")
var prompt: String by value("") var visualModelApi: String by value("https://dashscope.aliyuncs.com/compatible-mode/v1/")
@ValueDescription("视觉模型Token")
var visualModelToken: String by value("")
@ValueDescription("视觉模型")
var visualModel: String by value("qwen-vl-plus")
@ValueDescription("Jina API Key")
val jinaApiKey by value("")
@ValueDescription("SearXNG 搜索引擎地址,如 http://127.0.0.1:8080/search 必须启用允许json格式返回")
val searXngUrl: String by value("")
@ValueDescription("在线运行代码 glot.io 的 api token在官网注册账号即可获取。")
val glotToken: String by value("")
@ValueDescription("群管理是否自动拥有对话权限,默认是") @ValueDescription("群管理是否自动拥有对话权限,默认是")
val groupOpHasChatPermission: Boolean by value(true) val groupOpHasChatPermission: Boolean by value(true)
@ -26,17 +47,14 @@ object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("好友是否自动拥有对话权限,默认是") @ValueDescription("好友是否自动拥有对话权限,默认是")
val friendHasChatPermission: Boolean by value(true) val friendHasChatPermission: Boolean by value(true)
@ValueDescription("群荣誉等级权限门槛,达到这个等级相当于自动拥有权限。") @ValueDescription("群荣誉等级权限门槛,达到这个等级相当于自动拥有对话权限。")
val temperaturePermission: Int by value(60) val temperaturePermission: Int by value(50)
@ValueDescription("等待响应超时时间单位毫秒默认60秒") @ValueDescription("等待响应超时时间单位毫秒默认60秒")
val timeout: Long by value(60000L) val timeout: Long by value(60000L)
@ValueDescription("SearXNG 搜索引擎地址,如 http://127.0.0.1:8080/search 必须启用允许json格式返回") @ValueDescription("系统提示词")
val searXngUrl: String by value("") var prompt: String by value("你是一个乐于助人的助手")
@ValueDescription("在线运行代码 glot.io 的 api token在官网注册账号即可获取。")
val glotToken: String by value("")
@ValueDescription("创建Prompt时取最近多少分钟内的消息") @ValueDescription("创建Prompt时取最近多少分钟内的消息")
val historyWindowMin: Int by value(10) val historyWindowMin: Int by value(10)
@ -55,7 +73,4 @@ object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("关键字呼叫,支持正则表达式") @ValueDescription("关键字呼叫,支持正则表达式")
val callKeyword by value("[小筱][林淋月玥]") val callKeyword by value("[小筱][林淋月玥]")
@ValueDescription("Jina API Key")
val jinaApiKey by value("")
} }

View File

@ -5,8 +5,8 @@ import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.Tool import com.aallam.openai.api.chat.Tool
import com.aallam.openai.api.core.Parameters import com.aallam.openai.api.core.Parameters
import com.aallam.openai.api.model.ModelId import com.aallam.openai.api.model.ModelId
import com.aallam.openai.client.Chat
import kotlinx.serialization.json.* import kotlinx.serialization.json.*
import top.jie65535.mirai.LargeLanguageModels
import top.jie65535.mirai.PluginConfig import top.jie65535.mirai.PluginConfig
class ReasoningAgent : BaseAgent( class ReasoningAgent : BaseAgent(
@ -18,26 +18,24 @@ class ReasoningAgent : BaseAgent(
putJsonObject("properties") { putJsonObject("properties") {
putJsonObject("prompt") { putJsonObject("prompt") {
put("type", "string") put("type", "string")
put("description", "用于调用推理模型的提示") put("description", "用于调用推理模型的提示")
} }
} }
putJsonArray("required") { putJsonArray("required") {
add("question") add("prompt")
} }
}, },
) )
) { ) {
var llm: Chat? = null
override val loadingMessage: String override val loadingMessage: String
get() = "深度思考中..." get() = "深度思考中..."
override val isEnabled: Boolean override val isEnabled: Boolean
get() = llm != null get() = LargeLanguageModels.reasoning != null
override suspend fun execute(args: JsonObject?): String { override suspend fun execute(args: JsonObject?): String {
requireNotNull(args) requireNotNull(args)
val llm = llm ?: return "未配置llm无法进行推理。" val llm = LargeLanguageModels.reasoning ?: return "未配置llm无法进行推理。"
val prompt = args.getValue("prompt").jsonPrimitive.content val prompt = args.getValue("prompt").jsonPrimitive.content
val answerContent = StringBuilder() val answerContent = StringBuilder()

View File

@ -0,0 +1,76 @@
package top.jie65535.mirai.tools
import com.aallam.openai.api.chat.ChatCompletionRequest
import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.ImagePart
import com.aallam.openai.api.chat.TextPart
import com.aallam.openai.api.chat.Tool
import com.aallam.openai.api.core.Parameters
import com.aallam.openai.api.model.ModelId
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.add
import kotlinx.serialization.json.jsonPrimitive
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonArray
import kotlinx.serialization.json.putJsonObject
import top.jie65535.mirai.LargeLanguageModels
import top.jie65535.mirai.PluginConfig
class VisualAgent : BaseAgent(
tool = Tool.function(
name = "visualAgent",
description = "可通过调用视觉模型识别图片。",
parameters = Parameters.buildJsonObject {
put("type", "object")
putJsonObject("properties") {
putJsonObject("image_url") {
put("type", "string")
put("description", "图片地址")
}
putJsonObject("prompt") {
put("type", "string")
put("description", "用于调用视觉模型的提示词")
}
}
putJsonArray("required") {
add("image_url")
add("prompt")
}
}
)
) {
override val loadingMessage: String
get() = "图片识别中..."
override val isEnabled: Boolean
get() = LargeLanguageModels.visual != null
override suspend fun execute(args: JsonObject?): String {
requireNotNull(args)
val llm = LargeLanguageModels.visual ?: return "未配置llm无法进行识别。"
val imageUrl = args.getValue("image_url").jsonPrimitive.content
val prompt = args.getValue("prompt").jsonPrimitive.content
val answerContent = StringBuilder()
llm.chatCompletions(ChatCompletionRequest(
model = ModelId(PluginConfig.visualModel),
messages = listOf(
ChatMessage.System("You are a helpful assistant."),
ChatMessage.User(
content = listOf(
ImagePart(imageUrl),
TextPart(prompt)
)
)
)
)).collect {
if (it.choices.isNotEmpty()) {
val delta = it.choices[0].delta ?: return@collect
if (!delta.content.isNullOrEmpty()) {
answerContent.append(delta.content)
}
}
}
return answerContent.toString().ifEmpty { "识图异常,结果为空" }
}
}