From 4dde523dfceb75d5c716f7250462b07a8aa104d2 Mon Sep 17 00:00:00 2001 From: jie65535 Date: Fri, 22 May 2026 14:06:50 +0800 Subject: [PATCH] Move token usage to JSON store --- .gitignore | 1 + scripts/recover_data_yml.py | 72 +++++++++++++++++ src/main/kotlin/JChatGPT.kt | 9 ++- src/main/kotlin/PluginCommands.kt | 119 ++++++++++------------------- src/main/kotlin/PluginData.kt | 33 ++++---- src/main/kotlin/TokenUsageStore.kt | 113 +++++++++++++++++++++++++++ 6 files changed, 244 insertions(+), 103 deletions(-) create mode 100644 scripts/recover_data_yml.py create mode 100644 src/main/kotlin/TokenUsageStore.kt diff --git a/.gitignore b/.gitignore index f05259d..3683c7d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # User-specific stuff .idea/ .run/ +build-with-jdk17.bat *.iml *.ipr diff --git a/scripts/recover_data_yml.py b/scripts/recover_data_yml.py new file mode 100644 index 0000000..e995b02 --- /dev/null +++ b/scripts/recover_data_yml.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +""" +恢复 data.yml:把 tokenUsageDailyRecords 抽出成 token_usage.json, +顺手清理 tokenUsageRecords,把 data.yml 重写成合法的、yamlkt 能读回的 JSON。 + +用法(在 data.yml 所在目录运行): + python3 recover_data_yml.py /path/to/top.jie65535.mirai.JChatGPT/ + +会做: + 1. 备份原 data.yml -> data.yml.bak- + 2. 读 data.yml(按 JSON 解析,目前文件就是 JSON-flow YAML) + 3. 把 tokenUsageDailyRecords 写到 token_usage.json + 4. 删除 tokenUsageRecords 和 tokenUsageDailyRecords 字段 + 5. 重写 data.yml(保留 contactMemory / userFavorability 等) +""" +import json +import os +import sys +import time + +def main(target_dir: str) -> int: + data_path = os.path.join(target_dir, "data.yml") + if not os.path.exists(data_path): + print(f"NOT FOUND: {data_path}", file=sys.stderr) + return 1 + + with open(data_path, "r", encoding="utf-8") as f: + text = f.read() + + try: + data = json.loads(text) + except json.JSONDecodeError as e: + print(f"data.yml 不是合法 JSON:{e}", file=sys.stderr) + print("如果文件其实是 block-style YAML,请先用 yq/python yaml 转换", file=sys.stderr) + return 2 + + if not isinstance(data, dict): + print(f"顶层不是 map,是 {type(data).__name__}", file=sys.stderr) + return 3 + + ts = int(time.time()) + backup_path = os.path.join(target_dir, f"data.yml.bak-{ts}") + with open(backup_path, "w", encoding="utf-8") as f: + f.write(text) + print(f"已备份 -> {backup_path}") + + daily_records = data.pop("tokenUsageDailyRecords", []) + raw_records = data.pop("tokenUsageRecords", []) + print(f"提取 tokenUsageDailyRecords: {len(daily_records)} 条") + print(f"丢弃 tokenUsageRecords (legacy): {len(raw_records)} 条") + + token_path = os.path.join(target_dir, "token_usage.json") + if os.path.exists(token_path): + token_backup = os.path.join(target_dir, f"token_usage.json.bak-{ts}") + os.rename(token_path, token_backup) + print(f"已备份现有 token_usage.json -> {token_backup}") + + with open(token_path, "w", encoding="utf-8") as f: + json.dump(daily_records, f, ensure_ascii=False, indent=2) + print(f"写入 -> {token_path} ({len(daily_records)} 条)") + + with open(data_path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) + print(f"重写 -> {data_path}(剩余字段: {list(data.keys())})") + return 0 + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print(__doc__, file=sys.stderr) + sys.exit(1) + sys.exit(main(sys.argv[1])) diff --git a/src/main/kotlin/JChatGPT.kt b/src/main/kotlin/JChatGPT.kt index 3f2f20a..b3562c5 100644 --- a/src/main/kotlin/JChatGPT.kt +++ b/src/main/kotlin/JChatGPT.kt @@ -79,6 +79,9 @@ object JChatGPT : KotlinPlugin( PluginConfig.reload() PluginData.reload() + // 初始化 token 使用日聚合存储(独立 JSON 文件,绕开 yamlkt 大数据 bug) + TokenUsageStore.init(dataFolder) + // 设置Token LargeLanguageModels.reload() @@ -657,21 +660,19 @@ object JChatGPT : KotlinPlugin( ) ) - // 记录token使用量 + // 记录token使用量(按日聚合,独立JSON文件) lastTokenUsage?.let { usage -> val now = OffsetDateTime.now().toEpochSecond() val groupId = if (event is GroupMessageEvent) event.subject.id else null - val record = TokenUsageRecord( + TokenUsageStore.record( timestamp = now, userId = event.sender.id, userNickname = event.senderName, groupId = groupId, - model = PluginConfig.chatModel, promptTokens = usage.promptTokens ?: 0, completionTokens = usage.completionTokens ?: 0, totalTokens = usage.totalTokens ?: 0 ) - PluginData.tokenUsageRecords.add(record) } // 处理最后一个工具调用 diff --git a/src/main/kotlin/PluginCommands.kt b/src/main/kotlin/PluginCommands.kt index ec7d170..ebc9c1c 100644 --- a/src/main/kotlin/PluginCommands.kt +++ b/src/main/kotlin/PluginCommands.kt @@ -10,10 +10,7 @@ import net.mamoe.mirai.contact.Group import net.mamoe.mirai.contact.Member import net.mamoe.mirai.contact.User import top.jie65535.mirai.JChatGPT.reload -import java.time.Instant -import java.time.ZoneId import java.time.LocalDate -import java.time.format.DateTimeFormatter object PluginCommands : CompositeCommand( JChatGPT, "jgpt", description = "J OpenAI ChatGPT" @@ -81,29 +78,27 @@ object PluginCommands : CompositeCommand( suspend fun CommandSender.tokens(days: Int = 7) { validateDays(days) - if (PluginData.tokenUsageRecords.isEmpty()) { + if (TokenUsageStore.all.isEmpty()) { sendMessage("暂无 Token 使用记录") return } - val cutoff = calculateCutoffTimestamp(days) - val todayStart = calculateTodayStartTimestamp() + val cutoff = calculateCutoffDate(days) + val today = LocalDate.now().toString() - // 一次遍历计算所有统计数据 data class Statistics( - var totalTokens: Int = 0, - var todayTokens: Int = 0, - val userTotals: MutableMap> = mutableMapOf(), - val groupTotals: MutableMap = mutableMapOf(), + var totalTokens: Long = 0, + var todayTokens: Long = 0, + val userTotals: MutableMap> = mutableMapOf(), + val groupTotals: MutableMap = mutableMapOf(), val users: MutableSet = mutableSetOf() ) - val stats = PluginData.tokenUsageRecords.fold(Statistics()) { acc, record -> - if (record.timestamp >= cutoff) { + val stats = TokenUsageStore.all.fold(Statistics()) { acc, record -> + if (record.date >= cutoff) { acc.totalTokens += record.totalTokens acc.users.add(record.userId) - // 累计用户Token val existing = acc.userTotals[record.userId] if (existing == null) { acc.userTotals[record.userId] = record.userNickname to record.totalTokens @@ -111,13 +106,12 @@ object PluginCommands : CompositeCommand( acc.userTotals[record.userId] = existing.first to (existing.second + record.totalTokens) } - // 累计群组Token record.groupId?.let { groupId -> - acc.groupTotals[groupId] = acc.groupTotals.getOrDefault(groupId, 0) + record.totalTokens + acc.groupTotals[groupId] = acc.groupTotals.getOrDefault(groupId, 0L) + record.totalTokens } } - if (record.timestamp >= todayStart) { + if (record.date == today) { acc.todayTokens += record.totalTokens } @@ -151,7 +145,7 @@ object PluginCommands : CompositeCommand( appendLine(" /jgpt tokensDaily [days] - 每日统计") appendLine(" /jgpt tokensUsers [limit] - 用户排名") appendLine(" /jgpt tokensGroups [limit] - 群组排名") - appendLine(" /jgpt tokensQuery [userId] [days] - 详细记录") + appendLine(" /jgpt tokensQuery [userId] [days] - 每日逐人记录") appendLine(" /jgpt tokensUserDaily [days] - 用户日统计") } @@ -162,19 +156,12 @@ object PluginCommands : CompositeCommand( suspend fun CommandSender.tokensDaily(days: Int = 7) { validateDays(days) - val cutoff = calculateCutoffTimestamp(days) + val cutoff = calculateCutoffDate(days) - val dailyStats = PluginData.tokenUsageRecords - .filter { it.timestamp >= cutoff } - .groupBy { - LocalDate.ofInstant( - Instant.ofEpochSecond(it.timestamp), - ZoneId.systemDefault() - ) - } - .mapValues { (_, records) -> - records.sumOf { it.totalTokens } - } + val dailyStats = TokenUsageStore.all + .filter { it.date >= cutoff } + .groupBy { it.date } + .mapValues { (_, records) -> records.sumOf { it.totalTokens } } .toSortedMap() if (dailyStats.isEmpty()) { @@ -196,13 +183,11 @@ object PluginCommands : CompositeCommand( suspend fun CommandSender.tokensUsers(limit: Int = 10) { require(limit > 0) { "limit must be positive: $limit" } - val userStats = PluginData.tokenUsageRecords + val userStats = TokenUsageStore.all .groupBy { it.userId } .mapValues { (_, records) -> - Pair( - records.first().userNickname, - records.sumOf { it.totalTokens } - ) + val latest = records.maxByOrNull { it.date }!! + Pair(latest.userNickname, records.sumOf { it.totalTokens }) } .toList() .sortedByDescending { it.second.second } @@ -227,12 +212,10 @@ object PluginCommands : CompositeCommand( suspend fun CommandSender.tokensGroups(limit: Int = 10) { require(limit > 0) { "limit must be positive: $limit" } - val groupStats = PluginData.tokenUsageRecords + val groupStats = TokenUsageStore.all .filter { it.groupId != null } .groupBy { it.groupId!! } - .mapValues { (_, records) -> - records.sumOf { it.totalTokens } - } + .mapValues { (_, records) -> records.sumOf { it.totalTokens } } .toList() .sortedByDescending { it.second } .take(limit) @@ -256,12 +239,12 @@ object PluginCommands : CompositeCommand( suspend fun CommandSender.tokensQuery(userId: Long?, days: Int = 7) { validateDays(days) - val cutoff = calculateCutoffTimestamp(days) + val cutoff = calculateCutoffDate(days) - val filtered = PluginData.tokenUsageRecords - .filter { it.timestamp >= cutoff } + val filtered = TokenUsageStore.all + .filter { it.date >= cutoff } .filter { userId == null || it.userId == userId } - .sortedByDescending { it.timestamp } + .sortedWith(compareByDescending { it.date }.thenByDescending { it.totalTokens }) .take(DEFAULT_QUERY_LIMIT) if (filtered.isEmpty()) { @@ -270,15 +253,12 @@ object PluginCommands : CompositeCommand( } val response = buildString { - appendLine("最近 $days 天使用记录(最多显示${DEFAULT_QUERY_LIMIT}条):") + appendLine("最近 $days 天使用记录(最多显示${DEFAULT_QUERY_LIMIT}条,按日聚合):") appendLine() filtered.forEach { record -> - val time = Instant.ofEpochSecond(record.timestamp) - .atZone(ZoneId.systemDefault()) - .format(DateTimeFormatter.ofPattern("MM-dd HH:mm")) val location = if (record.groupId != null) "群${record.groupId}" else "私聊" - appendLine("[$time] $location - ${record.userNickname}") - appendLine(" 模型: ${record.model}, Tokens: ${formatNumber(record.totalTokens)} " + + appendLine("[${record.date}] $location - ${record.userNickname}") + appendLine(" 调用 ${record.callCount} 次, Tokens: ${formatNumber(record.totalTokens)} " + "(输入: ${formatNumber(record.promptTokens)}, 输出: ${formatNumber(record.completionTokens)})") appendLine() } @@ -290,36 +270,28 @@ object PluginCommands : CompositeCommand( suspend fun CommandSender.tokensUserDaily(userId: Long, days: Int = 7) { validateDays(days) - val cutoff = calculateCutoffTimestamp(days) + val cutoff = calculateCutoffDate(days) - // 先过滤用户记录,同时获取昵称 - val userRecords = PluginData.tokenUsageRecords - .filter { it.timestamp >= cutoff && it.userId == userId } + val userRecords = TokenUsageStore.all + .filter { it.date >= cutoff && it.userId == userId } if (userRecords.isEmpty()) { sendMessage("用户 $userId 在指定时间范围内无使用记录") return } - val userNickname = userRecords.first().userNickname + val userNickname = userRecords.maxByOrNull { it.date }!!.userNickname val userDailyStats = userRecords - .groupBy { - LocalDate.ofInstant( - Instant.ofEpochSecond(it.timestamp), - ZoneId.systemDefault() - ) - } - .mapValues { (_, records) -> - records.sumOf { it.totalTokens } - } + .groupBy { it.date } + .mapValues { (_, records) -> records.sumOf { it.totalTokens } } .toSortedMap() val response = buildString { appendLine("用户 $userNickname 最近 $days 天 Token 使用统计:") appendLine() userDailyStats.forEach { (date, total) -> - appendLine("$date: $total tokens") + appendLine("$date: ${formatNumber(total)} tokens") } appendLine() appendLine("总计: ${formatNumber(userDailyStats.values.sum())} tokens") @@ -330,23 +302,10 @@ object PluginCommands : CompositeCommand( // ==================== 辅助函数 ==================== /** - * 计算截止时间戳(指定天数前的起始时间 00:00:00) - * 最近N天包含今天,所以要从 (N-1) 天前开始算 + * 计算截止日期字符串(指定天数前的日期,含今天共 days 天) */ - private fun calculateCutoffTimestamp(days: Int): Long { - return LocalDate.now() - .minusDays((days - 1).toLong()) - .atStartOfDay(ZoneId.systemDefault()) - .toEpochSecond() - } - - /** - * 计算今天的起始时间戳(00:00:00) - */ - private fun calculateTodayStartTimestamp(): Long { - return LocalDate.now() - .atStartOfDay(ZoneId.systemDefault()) - .toEpochSecond() + private fun calculateCutoffDate(days: Int): String { + return LocalDate.now().minusDays((days - 1).toLong()).toString() } /** diff --git a/src/main/kotlin/PluginData.kt b/src/main/kotlin/PluginData.kt index 7583c6b..cc2f2a0 100644 --- a/src/main/kotlin/PluginData.kt +++ b/src/main/kotlin/PluginData.kt @@ -40,26 +40,26 @@ data class FavorabilityInfo( } /** - * Token使用记录数据类 - * @param timestamp Unix时间戳 - * @param userId 用户QQ号 - * @param userNickname 用户昵称 + * Token使用日聚合记录。按 (date, userId, groupId) 维度合并。由 [TokenUsageStore] 持久化到独立 JSON 文件。 + * @param date 本地时区下的日期,格式 yyyy-MM-dd + * @param userId QQ + * @param userNickname 最近一次记录到的昵称 * @param groupId 群号(私聊时为null) - * @param model 模型名称 - * @param promptTokens 输入token数 - * @param completionTokens 输出token数 - * @param totalTokens 总token数 + * @param promptTokens 当天累计输入token + * @param completionTokens 当天累计输出token + * @param totalTokens 当天累计总token + * @param callCount 当天调用次数 */ @Serializable -data class TokenUsageRecord( - val timestamp: Long, +data class TokenUsageDailyRecord( + val date: String, val userId: Long, val userNickname: String, val groupId: Long?, - val model: String, - val promptTokens: Int, - val completionTokens: Int, - val totalTokens: Int + val promptTokens: Long = 0, + val completionTokens: Long = 0, + val totalTokens: Long = 0, + val callCount: Int = 0 ) object PluginData : AutoSavePluginData("data") { @@ -75,11 +75,6 @@ object PluginData : AutoSavePluginData("data") { */ val userFavorability by value(mutableMapOf()) - /** - * Token使用记录 - */ - val tokenUsageRecords by value(mutableListOf()) - /** * 添加对话记忆 */ diff --git a/src/main/kotlin/TokenUsageStore.kt b/src/main/kotlin/TokenUsageStore.kt new file mode 100644 index 0000000..7b740bf --- /dev/null +++ b/src/main/kotlin/TokenUsageStore.kt @@ -0,0 +1,113 @@ +package top.jie65535.mirai + +import kotlinx.serialization.builtins.ListSerializer +import kotlinx.serialization.json.Json +import java.io.File +import java.time.Instant +import java.time.LocalDate +import java.time.ZoneId +import java.time.format.DateTimeFormatter + +/** + * Token使用日聚合存储。独立于 mamoe 的 plugin data 系统,直接管 JSON 文件, + * 避免 yamlkt 在大数据量下编/解码不互通的 bug。 + */ +object TokenUsageStore { + private val json = Json { + prettyPrint = true + ignoreUnknownKeys = true + encodeDefaults = true + } + private val dateFmt = DateTimeFormatter.ISO_LOCAL_DATE + private val listSerializer = ListSerializer(TokenUsageDailyRecord.serializer()) + + private lateinit var file: File + private val records = mutableListOf() + + /** + * 在 onEnable 中调用一次,传入插件数据目录。 + */ + fun init(dataFolder: File) { + file = File(dataFolder, "token_usage.json") + records.clear() + if (file.exists() && file.length() > 0) { + try { + records.addAll(json.decodeFromString(listSerializer, file.readText())) + } catch (_: Exception) { + // 加载失败不阻塞插件启动,备份原文件后从空开始 + val backup = File(file.parentFile, "token_usage.json.broken-${System.currentTimeMillis()}") + file.copyTo(backup, overwrite = true) + } + } + } + + val all: List get() = records + + /** + * 将一次调用的 token 用量累加到当日聚合行;若不存在则创建。写盘失败不抛。 + */ + @Synchronized + fun record( + timestamp: Long, + userId: Long, + userNickname: String, + groupId: Long?, + promptTokens: Int, + completionTokens: Int, + totalTokens: Int + ) { + val date = LocalDate.ofInstant(Instant.ofEpochSecond(timestamp), ZoneId.systemDefault()) + .format(dateFmt) + val nickname = sanitizeNickname(userNickname) + val idx = records.indexOfFirst { + it.date == date && it.userId == userId && it.groupId == groupId + } + if (idx >= 0) { + val r = records[idx] + records[idx] = r.copy( + userNickname = nickname.ifEmpty { r.userNickname }, + promptTokens = r.promptTokens + promptTokens, + completionTokens = r.completionTokens + completionTokens, + totalTokens = r.totalTokens + totalTokens, + callCount = r.callCount + 1 + ) + } else { + records.add( + TokenUsageDailyRecord( + date = date, + userId = userId, + userNickname = nickname, + groupId = groupId, + promptTokens = promptTokens.toLong(), + completionTokens = completionTokens.toLong(), + totalTokens = totalTokens.toLong(), + callCount = 1 + ) + ) + } + save() + } + + /** 把控制字符压成空格,避免昵称里的换行/零宽字符把 JSON/展示弄乱。 */ + private fun sanitizeNickname(s: String): String { + if (s.isEmpty()) return s + val cleaned = buildString(s.length) { + for (c in s) { + if (c == ' ' || (!c.isISOControl() && c.category != CharCategory.FORMAT)) append(c) + else append(' ') + } + } + return cleaned.trim().replace(Regex(" {2,}"), " ") + } + + private fun save() { + try { + val tmp = File(file.parentFile, "${file.name}.tmp") + tmp.writeText(json.encodeToString(listSerializer, records)) + tmp.copyTo(file, overwrite = true) + tmp.delete() + } catch (_: Exception) { + // 写盘失败由日志/上层关心,这里不抛断对话流程 + } + } +}