Move token usage to JSON store

This commit is contained in:
2026-05-22 14:06:50 +08:00
parent f17adee4ba
commit 4dde523dfc
6 changed files with 244 additions and 103 deletions

1
.gitignore vendored
View File

@@ -1,6 +1,7 @@
# User-specific stuff # User-specific stuff
.idea/ .idea/
.run/ .run/
build-with-jdk17.bat
*.iml *.iml
*.ipr *.ipr

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
"""
恢复 data.yml把 tokenUsageDailyRecords 抽出成 token_usage.json
顺手清理 tokenUsageRecords把 data.yml 重写成合法的、yamlkt 能读回的 JSON。
用法(在 data.yml 所在目录运行):
python3 recover_data_yml.py /path/to/top.jie65535.mirai.JChatGPT/
会做:
1. 备份原 data.yml -> data.yml.bak-<timestamp>
2. 读 data.yml按 JSON 解析,目前文件就是 JSON-flow YAML
3. 把 tokenUsageDailyRecords 写到 token_usage.json
4. 删除 tokenUsageRecords 和 tokenUsageDailyRecords 字段
5. 重写 data.yml保留 contactMemory / userFavorability 等)
"""
import json
import os
import sys
import time
def main(target_dir: str) -> int:
data_path = os.path.join(target_dir, "data.yml")
if not os.path.exists(data_path):
print(f"NOT FOUND: {data_path}", file=sys.stderr)
return 1
with open(data_path, "r", encoding="utf-8") as f:
text = f.read()
try:
data = json.loads(text)
except json.JSONDecodeError as e:
print(f"data.yml 不是合法 JSON{e}", file=sys.stderr)
print("如果文件其实是 block-style YAML请先用 yq/python yaml 转换", file=sys.stderr)
return 2
if not isinstance(data, dict):
print(f"顶层不是 map{type(data).__name__}", file=sys.stderr)
return 3
ts = int(time.time())
backup_path = os.path.join(target_dir, f"data.yml.bak-{ts}")
with open(backup_path, "w", encoding="utf-8") as f:
f.write(text)
print(f"已备份 -> {backup_path}")
daily_records = data.pop("tokenUsageDailyRecords", [])
raw_records = data.pop("tokenUsageRecords", [])
print(f"提取 tokenUsageDailyRecords: {len(daily_records)}")
print(f"丢弃 tokenUsageRecords (legacy): {len(raw_records)}")
token_path = os.path.join(target_dir, "token_usage.json")
if os.path.exists(token_path):
token_backup = os.path.join(target_dir, f"token_usage.json.bak-{ts}")
os.rename(token_path, token_backup)
print(f"已备份现有 token_usage.json -> {token_backup}")
with open(token_path, "w", encoding="utf-8") as f:
json.dump(daily_records, f, ensure_ascii=False, indent=2)
print(f"写入 -> {token_path} ({len(daily_records)} 条)")
with open(data_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
print(f"重写 -> {data_path}(剩余字段: {list(data.keys())}")
return 0
if __name__ == "__main__":
if len(sys.argv) != 2:
print(__doc__, file=sys.stderr)
sys.exit(1)
sys.exit(main(sys.argv[1]))

View File

@@ -79,6 +79,9 @@ object JChatGPT : KotlinPlugin(
PluginConfig.reload() PluginConfig.reload()
PluginData.reload() PluginData.reload()
// 初始化 token 使用日聚合存储(独立 JSON 文件,绕开 yamlkt 大数据 bug
TokenUsageStore.init(dataFolder)
// 设置Token // 设置Token
LargeLanguageModels.reload() LargeLanguageModels.reload()
@@ -657,21 +660,19 @@ object JChatGPT : KotlinPlugin(
) )
) )
// 记录token使用量 // 记录token使用量按日聚合独立JSON文件
lastTokenUsage?.let { usage -> lastTokenUsage?.let { usage ->
val now = OffsetDateTime.now().toEpochSecond() val now = OffsetDateTime.now().toEpochSecond()
val groupId = if (event is GroupMessageEvent) event.subject.id else null val groupId = if (event is GroupMessageEvent) event.subject.id else null
val record = TokenUsageRecord( TokenUsageStore.record(
timestamp = now, timestamp = now,
userId = event.sender.id, userId = event.sender.id,
userNickname = event.senderName, userNickname = event.senderName,
groupId = groupId, groupId = groupId,
model = PluginConfig.chatModel,
promptTokens = usage.promptTokens ?: 0, promptTokens = usage.promptTokens ?: 0,
completionTokens = usage.completionTokens ?: 0, completionTokens = usage.completionTokens ?: 0,
totalTokens = usage.totalTokens ?: 0 totalTokens = usage.totalTokens ?: 0
) )
PluginData.tokenUsageRecords.add(record)
} }
// 处理最后一个工具调用 // 处理最后一个工具调用

View File

@@ -10,10 +10,7 @@ import net.mamoe.mirai.contact.Group
import net.mamoe.mirai.contact.Member import net.mamoe.mirai.contact.Member
import net.mamoe.mirai.contact.User import net.mamoe.mirai.contact.User
import top.jie65535.mirai.JChatGPT.reload import top.jie65535.mirai.JChatGPT.reload
import java.time.Instant
import java.time.ZoneId
import java.time.LocalDate import java.time.LocalDate
import java.time.format.DateTimeFormatter
object PluginCommands : CompositeCommand( object PluginCommands : CompositeCommand(
JChatGPT, "jgpt", description = "J OpenAI ChatGPT" JChatGPT, "jgpt", description = "J OpenAI ChatGPT"
@@ -81,29 +78,27 @@ object PluginCommands : CompositeCommand(
suspend fun CommandSender.tokens(days: Int = 7) { suspend fun CommandSender.tokens(days: Int = 7) {
validateDays(days) validateDays(days)
if (PluginData.tokenUsageRecords.isEmpty()) { if (TokenUsageStore.all.isEmpty()) {
sendMessage("暂无 Token 使用记录") sendMessage("暂无 Token 使用记录")
return return
} }
val cutoff = calculateCutoffTimestamp(days) val cutoff = calculateCutoffDate(days)
val todayStart = calculateTodayStartTimestamp() val today = LocalDate.now().toString()
// 一次遍历计算所有统计数据
data class Statistics( data class Statistics(
var totalTokens: Int = 0, var totalTokens: Long = 0,
var todayTokens: Int = 0, var todayTokens: Long = 0,
val userTotals: MutableMap<Long, Pair<String, Int>> = mutableMapOf(), val userTotals: MutableMap<Long, Pair<String, Long>> = mutableMapOf(),
val groupTotals: MutableMap<Long, Int> = mutableMapOf(), val groupTotals: MutableMap<Long, Long> = mutableMapOf(),
val users: MutableSet<Long> = mutableSetOf() val users: MutableSet<Long> = mutableSetOf()
) )
val stats = PluginData.tokenUsageRecords.fold(Statistics()) { acc, record -> val stats = TokenUsageStore.all.fold(Statistics()) { acc, record ->
if (record.timestamp >= cutoff) { if (record.date >= cutoff) {
acc.totalTokens += record.totalTokens acc.totalTokens += record.totalTokens
acc.users.add(record.userId) acc.users.add(record.userId)
// 累计用户Token
val existing = acc.userTotals[record.userId] val existing = acc.userTotals[record.userId]
if (existing == null) { if (existing == null) {
acc.userTotals[record.userId] = record.userNickname to record.totalTokens acc.userTotals[record.userId] = record.userNickname to record.totalTokens
@@ -111,13 +106,12 @@ object PluginCommands : CompositeCommand(
acc.userTotals[record.userId] = existing.first to (existing.second + record.totalTokens) acc.userTotals[record.userId] = existing.first to (existing.second + record.totalTokens)
} }
// 累计群组Token
record.groupId?.let { groupId -> record.groupId?.let { groupId ->
acc.groupTotals[groupId] = acc.groupTotals.getOrDefault(groupId, 0) + record.totalTokens acc.groupTotals[groupId] = acc.groupTotals.getOrDefault(groupId, 0L) + record.totalTokens
} }
} }
if (record.timestamp >= todayStart) { if (record.date == today) {
acc.todayTokens += record.totalTokens acc.todayTokens += record.totalTokens
} }
@@ -151,7 +145,7 @@ object PluginCommands : CompositeCommand(
appendLine(" /jgpt tokensDaily [days] - 每日统计") appendLine(" /jgpt tokensDaily [days] - 每日统计")
appendLine(" /jgpt tokensUsers [limit] - 用户排名") appendLine(" /jgpt tokensUsers [limit] - 用户排名")
appendLine(" /jgpt tokensGroups [limit] - 群组排名") appendLine(" /jgpt tokensGroups [limit] - 群组排名")
appendLine(" /jgpt tokensQuery [userId] [days] - 详细记录") appendLine(" /jgpt tokensQuery [userId] [days] - 每日逐人记录")
appendLine(" /jgpt tokensUserDaily <userId> [days] - 用户日统计") appendLine(" /jgpt tokensUserDaily <userId> [days] - 用户日统计")
} }
@@ -162,19 +156,12 @@ object PluginCommands : CompositeCommand(
suspend fun CommandSender.tokensDaily(days: Int = 7) { suspend fun CommandSender.tokensDaily(days: Int = 7) {
validateDays(days) validateDays(days)
val cutoff = calculateCutoffTimestamp(days) val cutoff = calculateCutoffDate(days)
val dailyStats = PluginData.tokenUsageRecords val dailyStats = TokenUsageStore.all
.filter { it.timestamp >= cutoff } .filter { it.date >= cutoff }
.groupBy { .groupBy { it.date }
LocalDate.ofInstant( .mapValues { (_, records) -> records.sumOf { it.totalTokens } }
Instant.ofEpochSecond(it.timestamp),
ZoneId.systemDefault()
)
}
.mapValues { (_, records) ->
records.sumOf { it.totalTokens }
}
.toSortedMap() .toSortedMap()
if (dailyStats.isEmpty()) { if (dailyStats.isEmpty()) {
@@ -196,13 +183,11 @@ object PluginCommands : CompositeCommand(
suspend fun CommandSender.tokensUsers(limit: Int = 10) { suspend fun CommandSender.tokensUsers(limit: Int = 10) {
require(limit > 0) { "limit must be positive: $limit" } require(limit > 0) { "limit must be positive: $limit" }
val userStats = PluginData.tokenUsageRecords val userStats = TokenUsageStore.all
.groupBy { it.userId } .groupBy { it.userId }
.mapValues { (_, records) -> .mapValues { (_, records) ->
Pair( val latest = records.maxByOrNull { it.date }!!
records.first().userNickname, Pair(latest.userNickname, records.sumOf { it.totalTokens })
records.sumOf { it.totalTokens }
)
} }
.toList() .toList()
.sortedByDescending { it.second.second } .sortedByDescending { it.second.second }
@@ -227,12 +212,10 @@ object PluginCommands : CompositeCommand(
suspend fun CommandSender.tokensGroups(limit: Int = 10) { suspend fun CommandSender.tokensGroups(limit: Int = 10) {
require(limit > 0) { "limit must be positive: $limit" } require(limit > 0) { "limit must be positive: $limit" }
val groupStats = PluginData.tokenUsageRecords val groupStats = TokenUsageStore.all
.filter { it.groupId != null } .filter { it.groupId != null }
.groupBy { it.groupId!! } .groupBy { it.groupId!! }
.mapValues { (_, records) -> .mapValues { (_, records) -> records.sumOf { it.totalTokens } }
records.sumOf { it.totalTokens }
}
.toList() .toList()
.sortedByDescending { it.second } .sortedByDescending { it.second }
.take(limit) .take(limit)
@@ -256,12 +239,12 @@ object PluginCommands : CompositeCommand(
suspend fun CommandSender.tokensQuery(userId: Long?, days: Int = 7) { suspend fun CommandSender.tokensQuery(userId: Long?, days: Int = 7) {
validateDays(days) validateDays(days)
val cutoff = calculateCutoffTimestamp(days) val cutoff = calculateCutoffDate(days)
val filtered = PluginData.tokenUsageRecords val filtered = TokenUsageStore.all
.filter { it.timestamp >= cutoff } .filter { it.date >= cutoff }
.filter { userId == null || it.userId == userId } .filter { userId == null || it.userId == userId }
.sortedByDescending { it.timestamp } .sortedWith(compareByDescending<TokenUsageDailyRecord> { it.date }.thenByDescending { it.totalTokens })
.take(DEFAULT_QUERY_LIMIT) .take(DEFAULT_QUERY_LIMIT)
if (filtered.isEmpty()) { if (filtered.isEmpty()) {
@@ -270,15 +253,12 @@ object PluginCommands : CompositeCommand(
} }
val response = buildString { val response = buildString {
appendLine("最近 $days 天使用记录(最多显示${DEFAULT_QUERY_LIMIT}条):") appendLine("最近 $days 天使用记录(最多显示${DEFAULT_QUERY_LIMIT},按日聚合")
appendLine() appendLine()
filtered.forEach { record -> filtered.forEach { record ->
val time = Instant.ofEpochSecond(record.timestamp)
.atZone(ZoneId.systemDefault())
.format(DateTimeFormatter.ofPattern("MM-dd HH:mm"))
val location = if (record.groupId != null) "${record.groupId}" else "私聊" val location = if (record.groupId != null) "${record.groupId}" else "私聊"
appendLine("[$time] $location - ${record.userNickname}") appendLine("[${record.date}] $location - ${record.userNickname}")
appendLine(" 模型: ${record.model}, Tokens: ${formatNumber(record.totalTokens)} " + appendLine(" 调用 ${record.callCount}, Tokens: ${formatNumber(record.totalTokens)} " +
"(输入: ${formatNumber(record.promptTokens)}, 输出: ${formatNumber(record.completionTokens)})") "(输入: ${formatNumber(record.promptTokens)}, 输出: ${formatNumber(record.completionTokens)})")
appendLine() appendLine()
} }
@@ -290,36 +270,28 @@ object PluginCommands : CompositeCommand(
suspend fun CommandSender.tokensUserDaily(userId: Long, days: Int = 7) { suspend fun CommandSender.tokensUserDaily(userId: Long, days: Int = 7) {
validateDays(days) validateDays(days)
val cutoff = calculateCutoffTimestamp(days) val cutoff = calculateCutoffDate(days)
// 先过滤用户记录,同时获取昵称 val userRecords = TokenUsageStore.all
val userRecords = PluginData.tokenUsageRecords .filter { it.date >= cutoff && it.userId == userId }
.filter { it.timestamp >= cutoff && it.userId == userId }
if (userRecords.isEmpty()) { if (userRecords.isEmpty()) {
sendMessage("用户 $userId 在指定时间范围内无使用记录") sendMessage("用户 $userId 在指定时间范围内无使用记录")
return return
} }
val userNickname = userRecords.first().userNickname val userNickname = userRecords.maxByOrNull { it.date }!!.userNickname
val userDailyStats = userRecords val userDailyStats = userRecords
.groupBy { .groupBy { it.date }
LocalDate.ofInstant( .mapValues { (_, records) -> records.sumOf { it.totalTokens } }
Instant.ofEpochSecond(it.timestamp),
ZoneId.systemDefault()
)
}
.mapValues { (_, records) ->
records.sumOf { it.totalTokens }
}
.toSortedMap() .toSortedMap()
val response = buildString { val response = buildString {
appendLine("用户 $userNickname 最近 $days 天 Token 使用统计:") appendLine("用户 $userNickname 最近 $days 天 Token 使用统计:")
appendLine() appendLine()
userDailyStats.forEach { (date, total) -> userDailyStats.forEach { (date, total) ->
appendLine("$date: $total tokens") appendLine("$date: ${formatNumber(total)} tokens")
} }
appendLine() appendLine()
appendLine("总计: ${formatNumber(userDailyStats.values.sum())} tokens") appendLine("总计: ${formatNumber(userDailyStats.values.sum())} tokens")
@@ -330,23 +302,10 @@ object PluginCommands : CompositeCommand(
// ==================== 辅助函数 ==================== // ==================== 辅助函数 ====================
/** /**
* 计算截止时间戳(指定天数前的起始时间 00:00:00 * 计算截止日期字符串(指定天数前的日期,含今天共 days 天
* 最近N天包含今天所以要从 (N-1) 天前开始算
*/ */
private fun calculateCutoffTimestamp(days: Int): Long { private fun calculateCutoffDate(days: Int): String {
return LocalDate.now() return LocalDate.now().minusDays((days - 1).toLong()).toString()
.minusDays((days - 1).toLong())
.atStartOfDay(ZoneId.systemDefault())
.toEpochSecond()
}
/**
* 计算今天的起始时间戳00:00:00
*/
private fun calculateTodayStartTimestamp(): Long {
return LocalDate.now()
.atStartOfDay(ZoneId.systemDefault())
.toEpochSecond()
} }
/** /**

View File

@@ -40,26 +40,26 @@ data class FavorabilityInfo(
} }
/** /**
* Token使用记录数据类 * Token使用日聚合记录。按 (date, userId, groupId) 维度合并。由 [TokenUsageStore] 持久化到独立 JSON 文件。
* @param timestamp Unix时间戳 * @param date 本地时区下的日期,格式 yyyy-MM-dd
* @param userId 用户QQ * @param userId QQ
* @param userNickname 用户昵称 * @param userNickname 最近一次记录到的昵称
* @param groupId 群号私聊时为null * @param groupId 群号私聊时为null
* @param model 模型名称 * @param promptTokens 当天累计输入token
* @param promptTokens 输入token * @param completionTokens 当天累计输出token
* @param completionTokens 输出token * @param totalTokens 当天累计总token
* @param totalTokens 总token * @param callCount 当天调用次
*/ */
@Serializable @Serializable
data class TokenUsageRecord( data class TokenUsageDailyRecord(
val timestamp: Long, val date: String,
val userId: Long, val userId: Long,
val userNickname: String, val userNickname: String,
val groupId: Long?, val groupId: Long?,
val model: String, val promptTokens: Long = 0,
val promptTokens: Int, val completionTokens: Long = 0,
val completionTokens: Int, val totalTokens: Long = 0,
val totalTokens: Int val callCount: Int = 0
) )
object PluginData : AutoSavePluginData("data") { object PluginData : AutoSavePluginData("data") {
@@ -75,11 +75,6 @@ object PluginData : AutoSavePluginData("data") {
*/ */
val userFavorability by value(mutableMapOf<Long, FavorabilityInfo>()) val userFavorability by value(mutableMapOf<Long, FavorabilityInfo>())
/**
* Token使用记录
*/
val tokenUsageRecords by value(mutableListOf<TokenUsageRecord>())
/** /**
* 添加对话记忆 * 添加对话记忆
*/ */

View File

@@ -0,0 +1,113 @@
package top.jie65535.mirai
import kotlinx.serialization.builtins.ListSerializer
import kotlinx.serialization.json.Json
import java.io.File
import java.time.Instant
import java.time.LocalDate
import java.time.ZoneId
import java.time.format.DateTimeFormatter
/**
* Token使用日聚合存储。独立于 mamoe 的 plugin data 系统,直接管 JSON 文件,
* 避免 yamlkt 在大数据量下编/解码不互通的 bug。
*/
object TokenUsageStore {
private val json = Json {
prettyPrint = true
ignoreUnknownKeys = true
encodeDefaults = true
}
private val dateFmt = DateTimeFormatter.ISO_LOCAL_DATE
private val listSerializer = ListSerializer(TokenUsageDailyRecord.serializer())
private lateinit var file: File
private val records = mutableListOf<TokenUsageDailyRecord>()
/**
* 在 onEnable 中调用一次,传入插件数据目录。
*/
fun init(dataFolder: File) {
file = File(dataFolder, "token_usage.json")
records.clear()
if (file.exists() && file.length() > 0) {
try {
records.addAll(json.decodeFromString(listSerializer, file.readText()))
} catch (_: Exception) {
// 加载失败不阻塞插件启动,备份原文件后从空开始
val backup = File(file.parentFile, "token_usage.json.broken-${System.currentTimeMillis()}")
file.copyTo(backup, overwrite = true)
}
}
}
val all: List<TokenUsageDailyRecord> get() = records
/**
* 将一次调用的 token 用量累加到当日聚合行;若不存在则创建。写盘失败不抛。
*/
@Synchronized
fun record(
timestamp: Long,
userId: Long,
userNickname: String,
groupId: Long?,
promptTokens: Int,
completionTokens: Int,
totalTokens: Int
) {
val date = LocalDate.ofInstant(Instant.ofEpochSecond(timestamp), ZoneId.systemDefault())
.format(dateFmt)
val nickname = sanitizeNickname(userNickname)
val idx = records.indexOfFirst {
it.date == date && it.userId == userId && it.groupId == groupId
}
if (idx >= 0) {
val r = records[idx]
records[idx] = r.copy(
userNickname = nickname.ifEmpty { r.userNickname },
promptTokens = r.promptTokens + promptTokens,
completionTokens = r.completionTokens + completionTokens,
totalTokens = r.totalTokens + totalTokens,
callCount = r.callCount + 1
)
} else {
records.add(
TokenUsageDailyRecord(
date = date,
userId = userId,
userNickname = nickname,
groupId = groupId,
promptTokens = promptTokens.toLong(),
completionTokens = completionTokens.toLong(),
totalTokens = totalTokens.toLong(),
callCount = 1
)
)
}
save()
}
/** 把控制字符压成空格,避免昵称里的换行/零宽字符把 JSON/展示弄乱。 */
private fun sanitizeNickname(s: String): String {
if (s.isEmpty()) return s
val cleaned = buildString(s.length) {
for (c in s) {
if (c == ' ' || (!c.isISOControl() && c.category != CharCategory.FORMAT)) append(c)
else append(' ')
}
}
return cleaned.trim().replace(Regex(" {2,}"), " ")
}
private fun save() {
try {
val tmp = File(file.parentFile, "${file.name}.tmp")
tmp.writeText(json.encodeToString(listSerializer, records))
tmp.copyTo(file, overwrite = true)
tmp.delete()
} catch (_: Exception) {
// 写盘失败由日志/上层关心,这里不抛断对话流程
}
}
}