Add Visual Agent

This commit is contained in:
2025-07-11 16:50:59 +08:00
parent 89794b587e
commit 3c4373e1ff
6 changed files with 202 additions and 72 deletions

View File

@ -4,14 +4,11 @@ import com.aallam.openai.api.chat.ChatCompletionRequest
import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.ChatRole
import com.aallam.openai.api.chat.ToolCall
import com.aallam.openai.api.http.Timeout
import com.aallam.openai.api.model.ModelId
import com.aallam.openai.client.Chat
import com.aallam.openai.client.OpenAI
import com.aallam.openai.client.OpenAIHost
import io.ktor.util.collections.*
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import net.mamoe.mirai.console.command.CommandManager.INSTANCE.register
import net.mamoe.mirai.console.command.CommandSender.Companion.toCommandSender
import net.mamoe.mirai.console.permission.PermissionId
@ -26,6 +23,7 @@ import net.mamoe.mirai.event.events.FriendMessageEvent
import net.mamoe.mirai.event.events.GroupMessageEvent
import net.mamoe.mirai.event.events.MessageEvent
import net.mamoe.mirai.message.data.*
import net.mamoe.mirai.message.data.Image.Key.queryUrl
import net.mamoe.mirai.message.data.MessageSource.Key.quote
import net.mamoe.mirai.utils.ExternalResource.Companion.toExternalResource
import net.mamoe.mirai.utils.info
@ -38,7 +36,6 @@ import java.time.ZoneOffset
import java.time.format.DateTimeFormatter
import kotlin.collections.*
import kotlin.math.max
import kotlin.time.Duration.Companion.milliseconds
import kotlin.time.Duration.Companion.seconds
object JChatGPT : KotlinPlugin(
@ -51,8 +48,6 @@ object JChatGPT : KotlinPlugin(
// dependsOn("xyz.cssxsh.mirai.plugin.mirai-hibernate-plugin", true)
}
) {
private var llm: Chat? = null
/**
* 是否包含历史对话
*/
@ -68,9 +63,7 @@ object JChatGPT : KotlinPlugin(
PluginConfig.reload()
// 设置Token
if (PluginConfig.openAiToken.isNotEmpty()) {
updateOpenAiToken(PluginConfig.openAiToken)
}
LargeLanguageModels.reload()
// 注册插件命令
PluginCommands.register()
@ -93,17 +86,6 @@ object JChatGPT : KotlinPlugin(
logger.info { "Plugin loaded" }
}
fun updateOpenAiToken(token: String) {
val timeout = PluginConfig.timeout.milliseconds
llm = OpenAI(
token,
host = OpenAIHost(baseUrl = PluginConfig.openAiApi),
timeout = Timeout(request = timeout, connect = timeout, socket = timeout),
// logging = LoggingConfig(LogLevel.All)
)
reasoningAgent.llm = llm
}
private val timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss")
.withZone(ZoneOffset.systemDefault())
private val dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy年MM月dd E HH:mm:ss")
@ -112,7 +94,7 @@ object JChatGPT : KotlinPlugin(
private suspend fun onMessage(event: MessageEvent) {
// 检查Token是否设置
if (llm == null) return
if (LargeLanguageModels.chat == null) return
// 发送者是否有权限
if (!event.toCommandSender().hasPermission(chatPermission)) {
if (event is GroupMessageEvent) {
@ -244,11 +226,7 @@ object JChatGPT : KotlinPlugin(
it.getDisplay(event.subject)
}
is ForwardMessage -> {
it.title + "\n " + it.preview
}
else -> it.content
else -> singleMessageToText(it)
}
})
}
@ -285,18 +263,36 @@ object JChatGPT : KotlinPlugin(
val recordMessage = record.toMessageChain()
recordMessage[QuoteReply.Key]?.let {
historyText.append(" 引用\n > ")
.appendLine(it.source.originalMessage.content.replace("\n", "\n > "))
.appendLine(it.source.originalMessage
.joinToString("", transform = ::singleMessageToText)
.replace("\n", "\n > "))
}
// 消息内容
historyText.append(" 说:").appendLine(record.toMessageChain().joinToString("") {
when (it) {
is ForwardMessage -> {
it.title + "\n " + it.preview
}
historyText.append(" 说:").appendLine(
record.toMessageChain().joinToString("", transform = ::singleMessageToText))
}
else -> it.content
private fun singleMessageToText(it: SingleMessage): String {
return when (it) {
is ForwardMessage -> {
it.title + "\n " + it.preview
}
// 图片格式化
is Image -> {
try {
val imageUrl = runBlocking {
it.queryUrl()
}
"![图片]($imageUrl)"
} catch (e: Throwable) {
logger.warning("图片地址获取失败", e)
it.content
}
})
}
else -> it.content
}
}
// endregion - 历史消息相关 -
@ -657,8 +653,6 @@ object JChatGPT : KotlinPlugin(
}
}
private val reasoningAgent = ReasoningAgent()
/**
* 工具列表
*/
@ -682,7 +676,10 @@ object JChatGPT : KotlinPlugin(
RunCode(),
// 推理代理
reasoningAgent,
ReasoningAgent(),
// 视觉代理
VisualAgent(),
// 天气服务
WeatherService(),
@ -713,7 +710,7 @@ object JChatGPT : KotlinPlugin(
chatMessages: List<ChatMessage>,
hasTools: Boolean = true
): ChatMessage {
val llm = this.llm ?: throw NullPointerException("OpenAI Token 未设置,无法开始")
val llm = LargeLanguageModels.chat ?: throw NullPointerException("OpenAI Token 未设置,无法开始")
val availableTools = if (hasTools) {
myTools.filter { it.isEnabled }.map { it.tool }
} else null
@ -733,17 +730,21 @@ object JChatGPT : KotlinPlugin(
val nameCard = StringBuilder()
// 群活跃等级
nameCard.append("【lv").append(member.active.temperature).append(" ")
// 群头衔
if (member.specialTitle.isNotEmpty()) {
nameCard.append(member.specialTitle)
} else {
nameCard.append(
when (member.permission) {
OWNER -> "群主"
ADMINISTRATOR -> "管理员"
MEMBER -> member.temperatureTitle
}
)
try {
// 群头衔
if (member.specialTitle.isNotEmpty()) {
nameCard.append(member.specialTitle)
} else {
nameCard.append(
when (member.permission) {
OWNER -> "群主"
ADMINISTRATOR -> "管理员"
MEMBER -> member.temperatureTitle
}
)
}
} catch (e: Throwable) {
logger.warning("获取群头衔失败", e)
}
// 群名片
nameCard.append("").append(member.nameCardOrNick).append("(").append(member.id).append(")")

View File

@ -0,0 +1,40 @@
package top.jie65535.mirai
import com.aallam.openai.api.http.Timeout
import com.aallam.openai.client.Chat
import com.aallam.openai.client.OpenAI
import com.aallam.openai.client.OpenAIHost
import kotlin.time.Duration.Companion.milliseconds
object LargeLanguageModels {
var chat: Chat? = null
var reasoning: Chat? = null
var visual: Chat? = null
fun reload() {
val timeout = PluginConfig.timeout.milliseconds
if (PluginConfig.openAiApi.isNotBlank() && PluginConfig.openAiToken.isNotBlank()) {
chat = OpenAI(
token = PluginConfig.openAiToken,
host = OpenAIHost(baseUrl = PluginConfig.openAiApi),
timeout = Timeout(request = timeout, connect = timeout, socket = timeout)
)
}
if (PluginConfig.reasoningModelApi.isNotBlank() && PluginConfig.reasoningModelToken.isNotBlank()) {
reasoning = OpenAI(
token = PluginConfig.reasoningModelToken,
host = OpenAIHost(baseUrl = PluginConfig.reasoningModelApi),
timeout = Timeout(request = timeout, connect = timeout, socket = timeout)
)
}
if (PluginConfig.visualModelApi.isNotBlank() && PluginConfig.visualModelToken.isNotBlank()) {
visual = OpenAI(
token = PluginConfig.visualModelToken,
host = OpenAIHost(baseUrl = PluginConfig.visualModelApi),
timeout = Timeout(request = timeout, connect = timeout, socket = timeout)
)
}
}
}

View File

@ -18,7 +18,7 @@ object PluginCommands : CompositeCommand(
suspend fun CommandSender.setToken(token: String) {
PluginConfig.openAiToken = token
PluginConfig.save()
JChatGPT.updateOpenAiToken(token)
LargeLanguageModels.reload()
sendMessage("OK")
}
@ -45,7 +45,7 @@ object PluginCommands : CompositeCommand(
@SubCommand
suspend fun CommandSender.reload() {
PluginConfig.reload()
JChatGPT.updateOpenAiToken(PluginConfig.openAiToken)
LargeLanguageModels.reload()
sendMessage("OK")
}
}

View File

@ -6,7 +6,7 @@ import net.mamoe.mirai.console.data.value
object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("OpenAI API base url")
val openAiApi: String by value("https://api.openai.com/v1/")
val openAiApi: String by value("https://dashscope.aliyuncs.com/compatible-mode/v1/")
@ValueDescription("OpenAI API Token")
var openAiToken: String by value("")
@ -14,11 +14,32 @@ object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("Chat模型")
var chatModel: String by value("qwen-max")
@ValueDescription("推理模型API")
var reasoningModelApi: String by value("https://dashscope.aliyuncs.com/compatible-mode/v1/")
@ValueDescription("推理模型Token")
var reasoningModelToken: String by value("")
@ValueDescription("推理模型")
var reasoningModel: String by value("qwq-plus")
@ValueDescription("Chat默认提示")
var prompt: String by value("")
@ValueDescription("视觉模型API")
var visualModelApi: String by value("https://dashscope.aliyuncs.com/compatible-mode/v1/")
@ValueDescription("视觉模型Token")
var visualModelToken: String by value("")
@ValueDescription("视觉模型")
var visualModel: String by value("qwen-vl-plus")
@ValueDescription("Jina API Key")
val jinaApiKey by value("")
@ValueDescription("SearXNG 搜索引擎地址,如 http://127.0.0.1:8080/search 必须启用允许json格式返回")
val searXngUrl: String by value("")
@ValueDescription("在线运行代码 glot.io 的 api token在官网注册账号即可获取。")
val glotToken: String by value("")
@ValueDescription("群管理是否自动拥有对话权限,默认是")
val groupOpHasChatPermission: Boolean by value(true)
@ -26,17 +47,14 @@ object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("好友是否自动拥有对话权限,默认是")
val friendHasChatPermission: Boolean by value(true)
@ValueDescription("群荣誉等级权限门槛,达到这个等级相当于自动拥有权限。")
val temperaturePermission: Int by value(60)
@ValueDescription("群荣誉等级权限门槛,达到这个等级相当于自动拥有对话权限。")
val temperaturePermission: Int by value(50)
@ValueDescription("等待响应超时时间单位毫秒默认60秒")
val timeout: Long by value(60000L)
@ValueDescription("SearXNG 搜索引擎地址,如 http://127.0.0.1:8080/search 必须启用允许json格式返回")
val searXngUrl: String by value("")
@ValueDescription("在线运行代码 glot.io 的 api token在官网注册账号即可获取。")
val glotToken: String by value("")
@ValueDescription("系统提示词")
var prompt: String by value("你是一个乐于助人的助手")
@ValueDescription("创建Prompt时取最近多少分钟内的消息")
val historyWindowMin: Int by value(10)
@ -55,7 +73,4 @@ object PluginConfig : AutoSavePluginConfig("Config") {
@ValueDescription("关键字呼叫,支持正则表达式")
val callKeyword by value("[小筱][林淋月玥]")
@ValueDescription("Jina API Key")
val jinaApiKey by value("")
}

View File

@ -5,8 +5,8 @@ import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.Tool
import com.aallam.openai.api.core.Parameters
import com.aallam.openai.api.model.ModelId
import com.aallam.openai.client.Chat
import kotlinx.serialization.json.*
import top.jie65535.mirai.LargeLanguageModels
import top.jie65535.mirai.PluginConfig
class ReasoningAgent : BaseAgent(
@ -18,26 +18,24 @@ class ReasoningAgent : BaseAgent(
putJsonObject("properties") {
putJsonObject("prompt") {
put("type", "string")
put("description", "用于调用推理模型的提示")
put("description", "用于调用推理模型的提示")
}
}
putJsonArray("required") {
add("question")
add("prompt")
}
},
)
) {
var llm: Chat? = null
override val loadingMessage: String
get() = "深度思考中..."
override val isEnabled: Boolean
get() = llm != null
get() = LargeLanguageModels.reasoning != null
override suspend fun execute(args: JsonObject?): String {
requireNotNull(args)
val llm = llm ?: return "未配置llm无法进行推理。"
val llm = LargeLanguageModels.reasoning ?: return "未配置llm无法进行推理。"
val prompt = args.getValue("prompt").jsonPrimitive.content
val answerContent = StringBuilder()

View File

@ -0,0 +1,76 @@
package top.jie65535.mirai.tools
import com.aallam.openai.api.chat.ChatCompletionRequest
import com.aallam.openai.api.chat.ChatMessage
import com.aallam.openai.api.chat.ImagePart
import com.aallam.openai.api.chat.TextPart
import com.aallam.openai.api.chat.Tool
import com.aallam.openai.api.core.Parameters
import com.aallam.openai.api.model.ModelId
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.add
import kotlinx.serialization.json.jsonPrimitive
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonArray
import kotlinx.serialization.json.putJsonObject
import top.jie65535.mirai.LargeLanguageModels
import top.jie65535.mirai.PluginConfig
class VisualAgent : BaseAgent(
tool = Tool.function(
name = "visualAgent",
description = "可通过调用视觉模型识别图片。",
parameters = Parameters.buildJsonObject {
put("type", "object")
putJsonObject("properties") {
putJsonObject("image_url") {
put("type", "string")
put("description", "图片地址")
}
putJsonObject("prompt") {
put("type", "string")
put("description", "用于调用视觉模型的提示词")
}
}
putJsonArray("required") {
add("image_url")
add("prompt")
}
}
)
) {
override val loadingMessage: String
get() = "图片识别中..."
override val isEnabled: Boolean
get() = LargeLanguageModels.visual != null
override suspend fun execute(args: JsonObject?): String {
requireNotNull(args)
val llm = LargeLanguageModels.visual ?: return "未配置llm无法进行识别。"
val imageUrl = args.getValue("image_url").jsonPrimitive.content
val prompt = args.getValue("prompt").jsonPrimitive.content
val answerContent = StringBuilder()
llm.chatCompletions(ChatCompletionRequest(
model = ModelId(PluginConfig.visualModel),
messages = listOf(
ChatMessage.System("You are a helpful assistant."),
ChatMessage.User(
content = listOf(
ImagePart(imageUrl),
TextPart(prompt)
)
)
)
)).collect {
if (it.choices.isNotEmpty()) {
val delta = it.choices[0].delta ?: return@collect
if (!delta.content.isNullOrEmpty()) {
answerContent.append(delta.content)
}
}
}
return answerContent.toString().ifEmpty { "识图异常,结果为空" }
}
}