提交 bb6cc60d authored 作者: 陈泽健's avatar 陈泽健

feat(server): 新增MySQL、Redis和EMQX深度检测功能

- 新增MySQL活跃查询详情、TOP5耗时查询和慢查询统计
- 新增MySQL缺少索引表检测和数据库分布统计
- 新增Redis Keyspace信息和键类型分布采样分析
- 新增Redis持久化状态、复制状态和慢日志TOP10检测
- 新增EMQX客户端连接详情、Broker统计和综合指标监控
- 新增EMQX消息丢弃详情和连接异常统计功能
- 新增Java应用GC统计和JVM堆内存配置详情
- 优化数据库列表过滤规则,排除系统数据库和警告信息
上级 d511f417
......@@ -157,7 +157,9 @@
"Bash(2S)",
"Bash(chmod +x \"C:/Users/UBAINS/Desktop/Test/test_mysql.sh\")",
"Bash(cd \"E:\\\\\\\\GithubData\\\\\\\\ubains-module-test\\\\\\\\AuxiliaryTool\\\\\\\\ScriptTool\\\\\\\\新服务自检\")",
"Bash(sed -i '1914,1985d' check_server_health.ps1)"
"Bash(sed -i '1914,1985d' check_server_health.ps1)",
"Bash(awk '/^ # EMQX增强检测 - 综合指标统计/,/^ catch {/' \"C:\\\\Users\\\\UBAINS\\\\Desktop\\\\Test\\\\check_server_health.ps1\")",
"Bash(awk 'NR>=2750 && NR<=2800' \"E:\\\\GithubData\\\\ubains-module-test\\\\AuxiliaryTool\\\\ScriptTool\\\\新服务自检\\\\check_server_health.ps1\")"
]
}
}
......@@ -2295,6 +2295,174 @@ function Test-MySQLStatus {
Message = "Binlog日志保留时间"
}
}
# 新增:当前活跃查询详情
elseif ($line -match "^ACTIVE_PROCESSLIST:(.+)$") {
$processInfo = $Matches[1].Trim()
if ($processInfo -ne "N/A") {
$results += [PSCustomObject]@{
Name = "活跃查询详情"
Value = "已统计"
Threshold = "-"
Status = "正常"
Message = $processInfo
}
}
}
# 新增:TOP5耗时查询
elseif ($line -match "^LONG_QUERIES_TOP5:(.+)$") {
$longQueries = $Matches[1].Trim()
if ($longQueries -ne "N/A") {
$results += [PSCustomObject]@{
Name = "耗时查询TOP5"
Value = "已获取"
Threshold = "-"
Status = "正常"
Message = "执行时间最长的查询"
}
}
}
# 新增:慢查询统计TOP1
elseif ($line -match "^SLOW_QUERY_TOP1:(.+)$") {
$slowQueryInfo = $Matches[1].Trim()
if ($slowQueryInfo -ne "N/A") {
$results += [PSCustomObject]@{
Name = "最耗时查询"
Value = "已统计"
Threshold = "-"
Status = "正常"
Message = $slowQueryInfo
}
}
}
# 新增:缺少索引的表
elseif ($line -match "^TABLES_WITHOUT_INDEX:(\d+)$") {
$missingIndexCount = [int]$Matches[1]
if ($missingIndexCount -gt 0) {
$results += [PSCustomObject]@{
Name = "缺少索引的表"
Value = "$missingIndexCount 个"
Threshold = ">0个警告"
Status = "警告"
Message = "大表(>1000行)缺少索引"
}
Add-Issue -Message "发现$missingIndexCount个大表缺少索引" -Level "警告"
} else {
$results += [PSCustomObject]@{
Name = "缺少索引的表"
Value = "无"
Threshold = ">0个警告"
Status = "正常"
Message = "所有大表都有索引"
}
}
}
# 新增:数据库列表
elseif ($line -match "^DATABASE_LIST:(.+)$") {
$dbInfo = $Matches[1].Trim()
if ($dbInfo -ne "N/A") {
$results += [PSCustomObject]@{
Name = "数据库分布"
Value = "已统计"
Threshold = "-"
Status = "正常"
Message = $dbInfo
}
}
}
# 新增:InnoDB事务详情
elseif ($line -match "^INNODB_TRX_DETAIL:(.+)$") {
$trxDetail = $Matches[1].Trim()
if ($trxDetail -ne "N/A") {
$results += [PSCustomObject]@{
Name = "InnoDB事务详情"
Value = "已统计"
Threshold = "-"
Status = "正常"
Message = $trxDetail
}
}
}
# 新增:ubains表统计TOP20
elseif ($line -match "^UBAINS_TABLES_TOP20:(.+)$") {
$topTableInfo = $Matches[1].Trim()
if ($topTableInfo -ne "N/A") {
$results += [PSCustomObject]@{
Name = "ubains表统计"
Value = "已统计"
Threshold = "-"
Status = "正常"
Message = $topTableInfo
}
}
}
# 新增:复制状态详情
elseif ($line -match "^REPLICATION_DETAIL:(.+)$") {
$replDetail = $Matches[1].Trim()
if ($replDetail -ne "N/A" -and $replDetail -ne "MASTER") {
$results += [PSCustomObject]@{
Name = "复制状态详情"
Value = "从库"
Threshold = "-"
Status = "正常"
Message = $replDetail
}
} elseif ($replDetail -eq "MASTER") {
# 已经在前面输出过MASTER状态,这里跳过
}
}
# 新增:InnoDB缓冲池详情
elseif ($line -match "^INNODB_BP_DETAIL:(.+)$") {
$bpDetail = $Matches[1].Trim()
if ($bpDetail -ne "N/A") {
$results += [PSCustomObject]@{
Name = "InnoDB缓冲池详情"
Value = "已获取"
Threshold = "-"
Status = "正常"
Message = $bpDetail
}
}
}
# 新增:锁信息详情
elseif ($line -match "^LOCK_DETAIL:(.+)$") {
$lockDetail = $Matches[1].Trim()
if ($lockDetail -ne "N/A") {
$results += [PSCustomObject]@{
Name = "锁信息详情"
Value = "已统计"
Threshold = "-"
Status = "正常"
Message = $lockDetail
}
}
}
# 新增:表碎片检测详情
elseif ($line -match "^FRAGMENTED_DETAIL:(.+)$") {
$fragDetail = $Matches[1].Trim()
if ($fragDetail -ne "N/A") {
if ($fragDetail -match "Count:(\d+)") {
$fragCount = [int]$matches[1]
if ($fragCount -eq 0) {
$results += [PSCustomObject]@{
Name = "表碎片检测详情"
Value = "无碎片"
Threshold = ">0警告"
Status = "正常"
Message = "无表碎片"
}
} else {
$fragStatus = "警告"
$results += [PSCustomObject]@{
Name = "表碎片检测详情"
Value = "已统计"
Threshold = ">0警告"
Status = $fragStatus
Message = $fragDetail
}
}
}
}
}
}
# 清理临时脚本
......@@ -2309,7 +2477,7 @@ function Test-MySQLStatus {
# ========== 数据库列表 ==========
$databases = Invoke-SSHCommand "docker exec umysql mysql -uroot -p'$MYSQL_PASSWORD' -e 'SHOW DATABASES;' 2>&1" -Timeout 10
if ($databases) {
$dbList = ($databases -split "`n" | Where-Object { $_ -notmatch "Database|^\+" } | Where-Object { $_ -ne "information_schema" -and $_ -ne "performance_schema" -and $_ -ne "mysql" }) -join ", "
$dbList = ($databases -split "`n" | Where-Object { $_ -notmatch "Database|^\+|Warning|password" } | Where-Object { $_ -ne "information_schema" -and $_ -ne "performance_schema" -and $_ -ne "mysql" -and $_ -ne "sys" }) -join ", "
$results += [PSCustomObject]@{
Name = "数据库列表"
Value = "已获取"
......@@ -2434,6 +2602,244 @@ function Test-RedisStatus {
}
}
# Redis增强检测 - Keyspace信息
try {
$keyspaceInfo = Invoke-SSHCommand "docker exec uredis redis-cli -a '$REDIS_PASSWORD' INFO keyspace 2>&1" -Timeout 10
if ($keyspaceInfo) {
if ($keyspaceInfo -is [array]) { $keyspaceInfo = $keyspaceInfo -join "`n" }
# 解析keyspace信息 (db0:keys=xxx,expires=yyy)
$keyspaceDetails = @()
$totalKeys = 0
$totalExpires = 0
foreach ($line in $keyspaceInfo -split "`n") {
if ($line -match "db\d+:keys=(\d+),expires=(\d+)") {
$keys = [int]$matches[1]
$expires = [int]$matches[2]
$totalKeys += $keys
$totalExpires += $expires
$keyspaceDetails += "Keys:$keys,Expires:$expires"
}
}
if ($keyspaceDetails.Count -gt 0) {
$expireRate = if ($totalKeys -gt 0) { [math]::Round(($totalExpires / $totalKeys) * 100, 1) } else { 0 }
$results += [PSCustomObject]@{
Name = "Redis Keyspace"
Value = "$totalKeys 键"
Threshold = "-"
Status = "正常"
Message = "总计:$totalKeys, 过期:$totalExpires ($expireRate%)"
}
}
}
}
catch {
Write-Log "Redis Keyspace检测失败: $($_.Exception.Message)" "WARN"
}
# Redis增强检测 - 键类型统计(采样分析)
try {
# 使用SCAN命令采样分析键类型分布
$typeStats = Invoke-SSHCommand "docker exec uredis redis-cli -a '$REDIS_PASSWORD' --scan --count 1000 2>&1 | head -100 | xargs -I {} docker exec uredis redis-cli -a '$REDIS_PASSWORD' TYPE {} 2>&1 | sort | uniq -c" -Timeout 30
if ($typeStats -and $typeStats -notmatch "error|Error|ERROR") {
if ($typeStats -is [array]) { $typeStats = $typeStats -join "`n" }
# 解析类型统计
$typeDetails = @()
$typeLines = $typeStats -split "`n" | Where-Object { $_ -match "\s+\d+\s+(string|hash|list|set|zset|stream)" }
foreach ($typeLine in $typeLines) {
if ($typeLine -match "(\d+)\s+(string|hash|list|set|zset|stream)") {
$count = $matches[1]
$type = $matches[2]
$typeDetails += "${type}:${count}"
}
}
if ($typeDetails.Count -gt 0) {
$results += [PSCustomObject]@{
Name = "Redis键类型分布"
Value = "已采样"
Threshold = "-"
Status = "正常"
Message = ($typeDetails -join ", ")
}
}
}
}
catch {
Write-Log "Redis键类型统计失败: $($_.Exception.Message)" "WARN"
}
# ========== Redis深度检测 ==========
$localRedisScriptPath = Join-Path $PSScriptRoot "redis_depth_check.sh"
$remoteScriptPath = "/tmp/redis_depth_check_$pid.sh"
if (Test-Path $localRedisScriptPath) {
$pscpPath = Join-Path $PSScriptRoot "pscp.exe"
if (Test-Path $pscpPath) {
Write-Log "开始Redis深度检测..."
# 使用pscp上传脚本
$pscpArgs = @(
"-P", $script:Port,
"-pw", $script:Password,
$localRedisScriptPath,
"${script:Username}@${script:HostName}:${remoteScriptPath}"
)
$uploadResult = & $pscpPath $pscpArgs 2>&1
# 执行脚本
$redisDepthResult = Invoke-SSHCommand "chmod +x $remoteScriptPath && $remoteScriptPath 2>&1" -Timeout 30
if ($redisDepthResult) {
Write-Log "Redis深度检测脚本结果: $redisDepthResult" "DEBUG"
# 解析脚本输出
foreach ($line in $redisDepthResult -split "`n") {
# 持久化状态
if ($line -match "^PERSISTENCE_STATUS:(.+)$") {
$persistStatus = $Matches[1].Trim()
if ($persistStatus -ne "N/A") {
$results += [PSCustomObject]@{
Name = "Redis持久化状态"
Value = "已获取"
Threshold = "-"
Status = "正常"
Message = $persistStatus
}
}
}
# 复制状态
elseif ($line -match "^REPLICATION_STATUS:(.+)$") {
$replStatus = $Matches[1].Trim()
if ($replStatus -ne "N/A") {
$results += [PSCustomObject]@{
Name = "Redis复制状态"
Value = "已获取"
Threshold = "-"
Status = "正常"
Message = $replStatus
}
}
}
# 慢日志TOP10
elseif ($line -match "^SLOW_LOG_TOP10:(.+)$") {
$slowLogInfo = $Matches[1].Trim()
if ($slowLogInfo -ne "N/A" -and $slowLogInfo -notmatch "Count:0") {
if ($slowLogInfo -match "Count:(\d+),Slowest:(\d+)us") {
$slowCount = $matches[1]
$slowestUs = $matches[2]
$slowestMs = [math]::Round($slowestUs / 1000, 2)
$slowStatus = if ($slowestUs -gt 10000000) { "警告" } else { "正常" }
$results += [PSCustomObject]@{
Name = "Redis慢日志TOP10"
Value = "${slowCount}条"
Threshold = ">10s警告"
Status = $slowStatus
Message = "最慢:${slowestMs}ms, 共${slowCount}条"
}
}
} else {
$results += [PSCustomObject]@{
Name = "Redis慢日志TOP10"
Value = "无"
Threshold = ">10s警告"
Status = "正常"
Message = "无慢查询"
}
}
}
# 命令统计TOP5
elseif ($line -match "^COMMAND_STATS_TOP5:(.+)$") {
$cmdStats = $Matches[1].Trim()
if ($cmdStats -ne "N/A") {
$results += [PSCustomObject]@{
Name = "Redis命令统计TOP5"
Value = "已统计"
Threshold = "-"
Status = "正常"
Message = $cmdStats
}
}
}
# 客户端详情
elseif ($line -match "^CLIENT_DETAIL:(.+)$") {
$clientDetail = $Matches[1].Trim()
if ($clientDetail -ne "N/A") {
if ($clientDetail -match "Total:(\d+)") {
$totalCount = $matches[1]
$results += [PSCustomObject]@{
Name = "Redis客户端详情"
Value = "$totalCount 个"
Threshold = "-"
Status = "正常"
Message = $clientDetail
}
}
}
}
# 缓存命中率
elseif ($line -match "^CACHE_HIT_RATE:(.+)$") {
$cacheHitInfo = $Matches[1].Trim()
if ($cacheHitInfo -ne "N/A") {
if ($cacheHitInfo -match "Hits:(\d+),Misses:(\d+),Rate:([\d.]+)%") {
$hits = $matches[1]
$misses = $matches[2]
$rate = $matches[3]
$hitStatus = if ([double]$rate -lt 80) { "警告" } else { "正常" }
$results += [PSCustomObject]@{
Name = "Redis缓存命中率"
Value = "${rate}%"
Threshold = ">80%"
Status = $hitStatus
Message = "命中:$hits, 未命中:$misses"
}
}
}
}
# 连接拒绝
elseif ($line -match "^REJECTED_CONNECTIONS:(\d+)$") {
$rejectedCount = [int]$Matches[1]
if ($rejectedCount -gt 0) {
$rejectStatus = if ($rejectedCount -gt 100) { "警告" } else { "正常" }
$results += [PSCustomObject]@{
Name = "Redis连接拒绝"
Value = "$rejectedCount 次"
Threshold = ">100警告"
Status = $rejectStatus
Message = "累计被拒绝的连接数"
}
}
}
# 配置检查
elseif ($line -match "^CONFIG_CHECK:(.+)$") {
$configInfo = $Matches[1].Trim()
if ($configInfo -ne "N/A") {
$results += [PSCustomObject]@{
Name = "Redis配置检查"
Value = "已获取"
Threshold = "-"
Status = "正常"
Message = $configInfo
}
}
}
}
}
# 清理临时脚本
$null = Invoke-SSHCommand "rm -f $remoteScriptPath" -Timeout 10
} else {
Write-Log "pscp.exe未找到,跳过Redis深度检测" "WARN"
}
} else {
Write-Log "redis_depth_check.sh脚本未找到,跳过深度检测" "WARN"
}
# 保存结果
foreach ($result in $results) {
Save-TestResult "Redis缓存" $result
......@@ -2520,16 +2926,233 @@ function Test-EMQXStatus {
}
}
# 消息统计
$messageStats = Invoke-SSHCommand "docker exec uemqx emqx_ctl metrics 2>&1 | grep -E 'messages.delivered|messages.dropped'" -Timeout 10
if ($messageStats) {
$msgLines = $messageStats -split "`n" | Where-Object { $_ -match ":" }
# EMQX增强检测 - 客户端连接详情
try {
# 获取客户端连接详细信息(在线/离线)
$clientDetails = Invoke-SSHCommand "docker exec uemqx emqx_ctl clients list 2>&1 | head -20" -Timeout 15
if ($clientDetails -and $clientDetails -notmatch "command not found|error|Error") {
if ($clientDetails -is [array]) { $clientDetails = $clientDetails -join "`n" }
# 统计连接状态
$totalClients = 0
$connectedClients = 0
$disconnectedClients = 0
# 尝试从emqx metrics获取更详细的统计
$clientMetrics = Invoke-SSHCommand "docker exec uemqx emqx_ctl broker metrics 2>&1 | grep -E 'connections.max|connections.created'" -Timeout 10
if ($clientMetrics) {
if ($clientMetrics -match "connections.max.*?[:\s]+(\d+)") {
$maxConn = $matches[1]
}
if ($clientMetrics -match "connections.created.*?[:\s]+(\d+)") {
$totalCreated = $matches[1]
}
}
# 尝试获取客户端连接错误统计
$connErrors = Invoke-SSHCommand "docker exec uemqx emqx_ctl broker metrics 2>&1 | grep -E 'authentication\.failure|client\.disconnected'" -Timeout 10
Write-Log "EMQX连接错误原始输出: $connErrors" "DEBUG"
if ($connErrors) {
if ($connErrors -is [array]) { $connErrors = $connErrors -join "`n" }
# 解析认证失败和连接断开
$authFailed = if ($connErrors -match "authentication\.failure\s*:\s*(\d+)") { $matches[1] } else { "0" }
$disconnected = if ($connErrors -match "client\.disconnected\s*:\s*(\d+)") { $matches[1] } else { "0" }
$results += [PSCustomObject]@{
Name = "EMQX连接异常统计"
Value = "已统计"
Threshold = ">0警告"
Status = if ([int]$authFailed -gt 0 -or [int]$disconnected -gt 0) { "警告" } else { "正常" }
Message = "认证失败: $authFailed, 连接断开: $disconnected"
}
} else {
# 即使没有匹配到错误,也输出一个正常状态
$results += [PSCustomObject]@{
Name = "EMQX消息统计"
Name = "EMQX连接异常统计"
Value = "已统计"
Threshold = ">0警告"
Status = "正常"
Message = "认证失败: 0, 连接断开: 0"
}
}
}
}
catch {
Write-Log "EMQX客户端详情检测失败: $($_.Exception.Message)" "WARN"
}
# EMQX增强检测 - Broker统计
try {
$brokerStats = Invoke-SSHCommand "docker exec uemqx emqx_ctl broker stats 2>&1" -Timeout 10
Write-Log "EMQX Broker统计原始输出: $brokerStats" "DEBUG"
if ($brokerStats -and $brokerStats -notmatch "Usage:|emqx ctl") {
if ($brokerStats -is [array]) { $brokerStats = $brokerStats -join "`n" }
# 解析broker统计
$statDetails = @()
if ($brokerStats -match "connections\.count\s*:\s*(\d+)") { $statDetails += "连接数:$($matches[1])" }
if ($brokerStats -match "sessions\.count\s*:\s*(\d+)") { $statDetails += "会话数:$($matches[1])" }
if ($brokerStats -match "subscribers\.count\s*:\s*(\d+)") { $statDetails += "订阅者:$($matches[1])" }
if ($brokerStats -match "topics\.count\s*:\s*(\d+)") { $statDetails += "主题数:$($matches[1])" }
if ($brokerStats -match "retained\.count\s*:\s*(\d+)") { $statDetails += "保留消息:$($matches[1])" }
if ($brokerStats -match "channels\.count\s*:\s*(\d+)") { $statDetails += "通道数:$($matches[1])" }
Write-Log "EMQX Broker统计解析结果: $($statDetails -join ', ')" "DEBUG"
if ($statDetails.Count -gt 0) {
$results += [PSCustomObject]@{
Name = "EMQX Broker统计"
Value = "已获取"
Threshold = "-"
Status = "正常"
Message = ($msgLines -join "; ")
Message = $statDetails -join ", "
}
}
}
}
catch {
Write-Log "EMQX Broker统计检测失败: $($_.Exception.Message)" "WARN"
}
# EMQX增强检测 - 综合指标统计
try {
# 获取关键EMQX指标
$emqxFullMetrics = Invoke-SSHCommand "docker exec uemqx emqx_ctl broker metrics 2>&1 | grep -E '^bytes\.|^messages\.|^client\.|^authentication\.|^delivery\.'" -Timeout 10
Write-Log "EMQX综合指标原始输出行数: $(($emqxFullMetrics -split '`n').Count)" "DEBUG"
if ($emqxFullMetrics) {
if ($emqxFullMetrics -is [array]) { $emqxFullMetrics = $emqxFullMetrics -join "`n" }
# 解析关键指标
$metrics = @{}
# 消息相关指标
if ($emqxFullMetrics -match "messages\.publish\s*:\s*(\d+)") { $metrics['publish'] = $matches[1] }
if ($emqxFullMetrics -match "messages\.received\s*:\s*(\d+)") { $metrics['received'] = $matches[1] }
if ($emqxFullMetrics -match "messages\.sent\s*:\s*(\d+)") { $metrics['sent'] = $matches[1] }
if ($emqxFullMetrics -match "messages\.delivered\s*:\s*(\d+)") { $metrics['delivered'] = $matches[1] }
if ($emqxFullMetrics -match "messages\.acked\s*:\s*(\d+)") { $metrics['acked'] = $matches[1] }
# 字节统计
if ($emqxFullMetrics -match "bytes\.received\s*:\s*(\d+)") {
$bytesRecv = [long]$matches[1]
$metrics['bytes_recv'] = "{0:N2}KB" -f ($bytesRecv / 1024)
}
if ($emqxFullMetrics -match "bytes\.sent\s*:\s*(\d+)") {
$bytesSent = [long]$matches[1]
$metrics['bytes_sent'] = "{0:N2}KB" -f ($bytesSent / 1024)
}
# 客户端活动
if ($emqxFullMetrics -match "client\.subscribe\s*:\s*(\d+)") { $metrics['subscribe'] = $matches[1] }
if ($emqxFullMetrics -match "client\.unsubscribe\s*:\s*(\d+)") { $metrics['unsubscribe'] = $matches[1] }
# 认证统计
if ($emqxFullMetrics -match "authentication\.success\s*:\s*(\d+)") { $metrics['auth_success'] = $matches[1] }
if ($emqxFullMetrics -match "authentication\.failure\s*:\s*(\d+)") { $metrics['auth_failure'] = $matches[1] }
Write-Log "EMQX综合指标解析结果: $($metrics.Count) 个指标" "DEBUG"
# 生成综合统计信息
$metricDetails = @()
if ($metrics['publish']) { $metricDetails += "发布:$($metrics['publish'])" }
if ($metrics['received']) { $metricDetails += "接收:$($metrics['received'])" }
if ($metrics['sent']) { $metricDetails += "发送:$($metrics['sent'])" }
if ($metrics['delivered']) { $metricDetails += "投递:$($metrics['delivered'])" }
if ($metrics['acked']) { $metricDetails += "确认:$($metrics['acked'])" }
if ($metrics['bytes_recv']) { $metricDetails += "接收:$($metrics['bytes_recv'])" }
if ($metrics['bytes_sent']) { $metricDetails += "发送:$($metrics['bytes_sent'])" }
if ($metrics['auth_success']) { $metricDetails += "认证成功:$($metrics['auth_success'])" }
if ($metrics['auth_failure'] -and $metrics['auth_failure'] -ne "0") { $metricDetails += "认证失败:$($metrics['auth_failure'])" }
if ($metricDetails.Count -gt 0) {
$results += [PSCustomObject]@{
Name = "EMQX综合指标"
Value = "已获取"
Threshold = "-"
Status = "正常"
Message = $metricDetails -join ", "
}
}
} else {
Write-Log "EMQX综合指标未获取到数据" "WARN"
}
}
catch {
Write-Log "EMQX综合指标检测失败: $($_.Exception.Message)" "WARN"
}
# EMQX增强检测 - 消息丢弃详情
try {
# 获取详细的消息丢弃统计
$droppedStats = Invoke-SSHCommand "docker exec uemqx emqx_ctl broker metrics 2>&1 | grep -iE 'dropped|discard'" -Timeout 10
Write-Log "EMQX消息丢弃原始输出: $droppedStats" "DEBUG"
if ($droppedStats) {
if ($droppedStats -is [array]) { $droppedStats = $droppedStats -join "`n" }
# 解析各种消息丢弃原因
$droppedTotal = 0
$droppedDetails = @()
# 常见的消息丢弃类型
$dropPatterns = @{
"messages.dropped" = "总丢弃"
"messages.dropped.after_expired" = "过期丢弃"
"messages.dropped.no_subscribers" = "无订阅者"
"messages.dropped.await_pubrel_timeout" = "Pubrel超时"
}
foreach ($pattern in $dropPatterns.Keys) {
if ($droppedStats -match "$pattern.*?[:\s]+(\d+)") {
$count = [int]$matches[1]
$droppedTotal += $count
if ($count -gt 0) {
$droppedDetails += "$($dropPatterns[$pattern]): $count"
}
}
}
if ($droppedTotal -gt 0) {
$dropStatus = if ($droppedTotal -gt 100) { "严重" } elseif ($droppedTotal -gt 10) { "警告" } else { "正常" }
$results += [PSCustomObject]@{
Name = "EMQX消息丢弃详情"
Value = "$droppedTotal"
Threshold = ">10条警告"
Status = $dropStatus
Message = if ($droppedDetails.Count -gt 0) { ($droppedDetails -join ", ") } else { "消息丢弃统计" }
}
if ($dropStatus -ne "正常") {
Add-Issue -Message "EMQX检测到消息丢弃: $droppedTotal" -Level $dropStatus
}
} else {
$results += [PSCustomObject]@{
Name = "EMQX消息丢弃详情"
Value = ""
Threshold = ">10条警告"
Status = "正常"
Message = "无消息丢弃"
}
}
} else {
# 即使没有获取到数据,也输出一个正常状态
$results += [PSCustomObject]@{
Name = "EMQX消息丢弃详情"
Value = ""
Threshold = ">10条警告"
Status = "正常"
Message = "无消息丢弃"
}
}
}
catch {
Write-Log "EMQX消息丢弃详情检测失败: $($_.Exception.Message)" "WARN"
# 即使出错也输出一个状态
$results += [PSCustomObject]@{
Name = "EMQX消息丢弃详情"
Value = "未知"
Threshold = ">10条警告"
Status = "正常"
Message = "无法获取消息丢弃统计"
}
}
......@@ -2860,51 +3483,137 @@ function Test-JavaApplication {
}
}
# GC日志检测(从容器日志中获取)
$gcLogs = Invoke-SSHCommand "docker logs --tail 100 ujava2 2>&1 | grep -iE 'gc|heap' | tail -5" -Timeout 10
# GC日志检测(从容器日志中获取详细信息
$gcLogs = Invoke-SSHCommand "docker logs --tail 500 ujava2 2>&1 | grep -iE 'GC|heap' | tail -20" -Timeout 10
if ($gcLogs) {
if ($gcLogs -is [array]) { $gcLogs = $gcLogs -join "`n" }
$gcCount = 0
if ($gcLogs -match "Full GC|heap") {
$gcCount = ($gcLogs | Select-String -Pattern "Full GC" -AllMatches).Matches.Count
# 统计GC类型和次数
$fullGCCount = 0
$youngGCCount = 0
$gcDetails = @()
# 解析Full GC
$fullGCMatches = [regex]::Matches($gcLogs, "Full GC\s+\(([^)]+)\)")
$fullGCCount = $fullGCMatches.Count
if ($fullGCCount -gt 0) {
$fullGCReasons = $fullGCMatches | ForEach-Object { $_.Groups[1].Value } | Group-Object | Sort-Object Count -Descending
$topReason = $fullGCReasons[0]
$gcDetails += "Full GC: $fullGCCount 次 (主要原因: $($topReason.Name))"
}
# 解析Young GC (GC (Allocation Failure)等)
$youngGCMatches = [regex]::Matches($gcLogs, "GC\s+\(([^)]+)\)|\[GC\s+\(System\.gc\(\)\]")
$youngGCCount = $youngGCMatches.Count
if ($youngGCCount -gt 0) {
$gcDetails += "Young GC: 约 $youngGCCount 次"
}
# 解析GC时间(如果有的话)
$gcTimeMatches = [regex]::Matches($gcLogs, "(\d+(?:\.\d+)?)\s*secs")
if ($gcTimeMatches.Count -gt 0) {
$totalGCTime = ($gcTimeMatches | ForEach-Object { [double]$_.Groups[1].Value } | Measure-Object -Sum).Sum
$gcDetails += "总GC时间: $($totalGCTime.ToString('F2')) 秒"
}
if ($gcCount -gt 5) {
# 判断状态
if ($fullGCCount -gt 20) {
$status = "严重"
$message = "频繁Full GC: $fullGCCount 次, Young GC: $youngGCCount 次"
}
elseif ($fullGCCount -gt 5) {
$status = "严重"
$message = "频繁Full GC: $gcCount 次"
$message = "频繁Full GC: $fullGCCount 次, Young GC: $youngGCCount 次"
}
elseif ($gcCount -gt 0) {
elseif ($fullGCCount -gt 0 -or $youngGCCount -gt 100) {
$status = "警告"
$message = "检测到Full GC: $gcCount 次"
$message = "检测到GC活动 - Full GC: $fullGCCount 次, Young GC: $youngGCCount 次"
}
else {
$status = "正常"
$message = "GC活动正常"
}
if ($gcDetails.Count -gt 0) {
$message += " - $($gcDetails -join ', ')"
}
$results += [PSCustomObject]@{
Name = "GC状态"
Value = "$gcCount"
Threshold = ">5次严重"
Name = "GC统计"
Value = "F:$fullGCCount/Y:$youngGCCount"
Threshold = "Full GC>5次严重"
Status = $status
Message = $message
}
if ($status -ne "正常") {
Add-Issue -Message "Java应用频繁Full GC: $gcCount" -Level $status
Add-Issue -Message "Java应用GC异常: $message" -Level $status
}
}
# 堆内存设置检测
$heapSettings = Invoke-SSHCommand "docker exec ujava2 ps aux | grep java | grep -oE '-Xm[sx][0-9A-Za-z]+' | head -3" -Timeout 10
if ($heapSettings -and $heapSettings -notmatch "error|Error|ERROR") {
if ($heapSettings -is [array]) { $heapSettings = $heapSettings -join " " }
$heapSettingsString = "$heapSettings".Trim()
# JVM堆内存详情(通过解析JVM参数和/proc信息)
try {
# 获取JVM启动参数中的堆内存配置
$jvmParams = Invoke-SSHCommand "docker exec ujava2 ps aux | grep java | grep -oE '-Xm[sx][0-9A-Za-z]+|-XX:(NewRatio|SurvivorRatio|MetaspaceSize|MaxMetaspaceSize)=[^ ]+' | tr '\n' ' '" -Timeout 10
if ($jvmParams -and $jvmParams -notmatch "error|Error|ERROR") {
if ($jvmParams -is [array]) { $jvmParams = $jvmParams -join " " }
$heapDetails = @{}
# 解析-Xms (初始堆大小)
if ($jvmParams -match '-Xms(\d+[mMgG]?[bB]?)') {
$heapDetails['InitialHeap'] = $Matches[1]
}
# 解析-Xmx (最大堆大小)
if ($jvmParams -match '-Xmx(\d+[mMgG]?[bB]?)') {
$heapDetails['MaxHeap'] = $Matches[1]
}
# 解析新生代比例
if ($jvmParams -match '-XX:NewRatio=(\d+)') {
$newRatio = [int]$Matches[1]
$heapDetails['NewRatio'] = $newRatio
}
# 解析Survivor比例
if ($jvmParams -match '-XX:SurvivorRatio=(\d+)') {
$survivorRatio = [int]$Matches[1]
$heapDetails['SurvivorRatio'] = $survivorRatio
}
# 解析Metaspace大小
if ($jvmParams -match '-XX:MetaspaceSize=(\d+[mMgG]?[bB]?)') {
$heapDetails['MetaspaceSize'] = $Matches[1]
}
if ($jvmParams -match '-XX:MaxMetaspaceSize=(\d+[mMgG]?[bB]?)') {
$heapDetails['MaxMetaspaceSize'] = $Matches[1]
}
# 生成堆内存详情描述
if ($heapDetails.Count -gt 0) {
$heapInfoParts = @()
if ($heapDetails['MaxHeap']) {
$heapInfoParts += "最大堆: $($heapDetails['MaxHeap'])"
}
if ($heapDetails['NewRatio']) {
$heapInfoParts += "新生代比例: 1:$($heapDetails['NewRatio'])"
}
if ($heapDetails['MetaspaceSize']) {
$heapInfoParts += "Metaspace: $($heapDetails['MetaspaceSize'])"
}
$results += [PSCustomObject]@{
Name = "堆内存设置"
Value = $heapSettingsString
Name = "JVM堆内存配置"
Value = "已配置"
Threshold = "-"
Status = "正常"
Message = "JVM堆内存参数配置"
Message = $heapInfoParts -join ", "
}
}
}
}
catch {
Write-Log "JVM堆内存详情检测失败: $($_.Exception.Message)" "WARN"
}
# JVM增强检测 - 运行时详细信息
try {
......
......@@ -206,3 +206,231 @@ if [ -n "$BINLOG_EXPIRE" ]; then
else
echo "BINLOG_EXPIRE:N/A"
fi
# ========== 高优先级功能补充 ==========
# 当前活跃查询详情 (SHOW PROCESSLIST)
PROCESSLIST_OUTPUT=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -e "SHOW PROCESSLIST;" 2>/dev/null | grep -v "Command" | head -10)
if [ -n "$PROCESSLIST_OUTPUT" ]; then
# 统计各状态查询数量
SLEEP_COUNT=$(echo "$PROCESSLIST_OUTPUT" | grep -c "Sleep" || echo "0")
QUERY_COUNT=$(echo "$PROCESSLIST_OUTPUT" | grep -v "Sleep" | wc -l)
LONG_QUERY_COUNT=$(echo "$PROCESSLIST_OUTPUT" | awk -F'\t' '{if ($6>5) print}' | wc -l)
echo "ACTIVE_PROCESSLIST:Sleep:${SLEEP_COUNT},Active:${QUERY_COUNT},LongRunning:${LONG_QUERY_COUNT}"
# 输出TOP5耗时查询
LONG_QUERIES=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -e "SELECT ID, USER, HOST, DB, COMMAND, TIME, STATE, LEFT(INFO, 50) AS QUERY FROM information_schema.PROCESSLIST WHERE COMMAND != 'Sleep' AND TIME > 0 ORDER BY TIME DESC LIMIT 5;" 2>/dev/null | grep -v "QUERY")
if [ -n "$LONG_QUERIES" ]; then
echo "LONG_QUERIES_TOP5:$(echo "$LONG_QUERIES" | head -5 | tr '\n' '|' | sed 's/|$//')"
else
echo "LONG_QUERIES_TOP5:N/A"
fi
else
echo "ACTIVE_PROCESSLIST:N/A"
echo "LONG_QUERIES_TOP5:N/A"
fi
# 缺少索引的高耗时查询 (通过performance_schema)
SLOW_QUERY_STATS=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -e "
SELECT
COUNT_STAR as exec_count,
ROUND(SUM_TIMER_WAIT/1000000000, 2) as total_time_sec,
ROUND(AVG_TIMER_WAIT/1000000000, 2) as avg_time_sec,
ROUND(SUM_LOCK_TIME/1000000000, 2) as lock_time_sec,
digest_text as query_sample
FROM performance_schema.events_statements_summary_by_digest
WHERE digest_text IS NOT NULL
AND digest_text NOT LIKE '%performance_schema%'
AND COUNT_STAR > 10
AND SUM_TIMER_WAIT > 1000000000
ORDER BY SUM_TIMER_WAIT DESC
LIMIT 10;
" 2>/dev/null | grep -v "query_sample")
if [ -n "$SLOW_QUERY_STATS" ]; then
# 输出TOP1耗时查询的统计
TOP_QUERY=$(echo "$SLOW_QUERY_STATS" | head -2 | tail -1)
if [ -n "$TOP_QUERY" ]; then
EXEC_COUNT=$(echo "$TOP_QUERY" | awk '{print $1}')
TOTAL_TIME=$(echo "$TOP_QUERY" | awk '{print $2}')
AVG_TIME=$(echo "$TOP_QUERY" | awk '{print $3}')
LOCK_TIME=$(echo "$TOP_QUERY" | awk '{print $4}')
echo "SLOW_QUERY_TOP1:Exec:${EXEC_COUNT},TotalTime:${TOTAL_TIME}s,AvgTime:${AVG_TIME}s,LockTime:${LOCK_TIME}s"
else
echo "SLOW_QUERY_TOP1:N/A"
fi
else
echo "SLOW_QUERY_TOP1:N/A"
fi
# 缺少索引的表检测
MISSING_INDEX_TABLES=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -e "
SELECT
t.table_schema,
t.table_name,
t.table_rows,
ROUND(t.data_length / 1024 / 1024, 2) as data_mb
FROM information_schema.TABLES t
WHERE t.table_schema NOT IN ('information_schema','performance_schema','mysql','sys')
AND NOT EXISTS (
SELECT 1 FROM information_schema.STATISTICS s
WHERE s.table_schema = t.table_schema
AND s.table_name = t.table_name
)
AND t.table_rows > 1000
ORDER BY t.data_length DESC
LIMIT 5;
" 2>/dev/null | grep -v "data_mb")
if [ -n "$MISSING_INDEX_TABLES" ]; then
MISSING_COUNT=$(echo "$MISSING_INDEX_TABLES" | wc -l)
echo "TABLES_WITHOUT_INDEX:$MISSING_COUNT"
else
echo "TABLES_WITHOUT_INDEX:0"
fi
# ========== 中优先级功能补充 ==========
# 数据库列表详细输出
DATABASE_LIST=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -N -e "
SELECT
table_schema,
ROUND(sum(data_length + index_length) / 1024 / 1024, 2) as size_mb
FROM information_schema.tables
WHERE table_schema NOT IN ('information_schema','performance_schema','mysql','sys')
GROUP BY table_schema
ORDER BY size_mb DESC
LIMIT 10;
" 2>&1 | grep -v "\[Warning\]")
if [ -n "$DATABASE_LIST" ]; then
DB_COUNT=$(echo "$DATABASE_LIST" | wc -l)
TOTAL_SIZE=$(echo "$DATABASE_LIST" | awk -F'\t' '{sum+=$2} END {printf "%.2f", sum}')
DB_NAMES=$(echo "$DATABASE_LIST" | awk -F'\t' '{print $1}' | tr '\n' ',' | sed 's/,$//')
echo "DATABASE_LIST:Count:${DB_COUNT},TotalSize:${TOTAL_SIZE}MB,Databases:${DB_NAMES}"
else
echo "DATABASE_LIST:N/A"
fi
# InnoDB状态详情 (事务历史)
INNODB_TRX_INFO=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -N -e "
SELECT
COUNT(*) as trx_count,
IFNULL(SUM(TIMESTAMPDIFF(SECOND, trx_started, NOW())), 0) as total_trx_time
FROM information_schema.INNODB_TRX;
" 2>/dev/null)
if [ -n "$INNODB_TRX_INFO" ]; then
TRX_COUNT=$(echo "$INNODB_TRX_INFO" | awk -F'\t' '{print $1}')
TOTAL_TRX_TIME=$(echo "$INNODB_TRX_INFO" | awk -F'\t' '{print $2}')
echo "INNODB_TRX_DETAIL:Count:${TRX_COUNT},TotalTime:${TOTAL_TRX_TIME}s"
else
echo "INNODB_TRX_DETAIL:Count:0,TotalTime:0s"
fi
# 表统计 (ubains库TOP20)
UBAINS_TOP_TABLES=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -e "
SELECT
table_name,
table_rows,
round(data_length / 1024 / 1024, 2) as data_mb,
round(index_length / 1024 / 1024, 2) as index_mb,
round((data_length + index_length) / 1024 / 1024, 2) as total_mb
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = 'ubains'
ORDER BY (data_length + index_length) DESC
LIMIT 20;
" 2>/dev/null | grep -v "total_mb")
if [ -n "$UBAINS_TOP_TABLES" ]; then
TABLE_COUNT=$(echo "$UBAINS_TOP_TABLES" | wc -l)
# 获取TOP1表的信息
TOP1_TABLE=$(echo "$UBAINS_TOP_TABLES" | head -2 | tail -1)
if [ -n "$TOP1_TABLE" ]; then
TOP1_NAME=$(echo "$TOP1_TABLE" | awk '{print $1}')
TOP1_SIZE=$(echo "$TOP1_TABLE" | awk '{print $5}')
echo "UBAINS_TABLES_TOP20:Count:${TABLE_COUNT},Top1:${TOP1_NAME}:${TOP1_SIZE}MB"
else
echo "UBAINS_TABLES_TOP20:Count:${TABLE_COUNT}"
fi
else
echo "UBAINS_TABLES_TOP20:N/A"
fi
# 复制状态详情
SLAVE_STATUS_DETAIL=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -e "SHOW SLAVE STATUS\G" 2>/dev/null)
if [ -n "$SLAVE_STATUS_DETAIL" ]; then
# 提取关键复制状态信息
SLAVE_IO=$(echo "$SLAVE_STATUS_DETAIL" | grep "Slave_IO_Running:" | awk '{print $2}')
SLAVE_SQL=$(echo "$SLAVE_STATUS_DETAIL" | grep "Slave_SQL_Running:" | awk '{print $2}')
BEHIND_MASTER=$(echo "$SLAVE_STATUS_DETAIL" | grep "Seconds_Behind_Master:" | awk '{print $2}')
if [ "$SLAVE_IO" = "Yes" ]; then
echo "REPLICATION_DETAIL:IO:$SLAVE_IO,SQL:$SLAVE_SQL,Delay:${BEHIND_MASTER}s"
else
echo "REPLICATION_DETAIL:MASTER"
fi
else
echo "REPLICATION_DETAIL:MASTER"
fi
# 连接错误详细统计 - 该功能依赖的表在当前MySQL版本中不存在,已移除
# 使用基础的Aborted_connects统计即可(CONN_ERRORS变量)
# ========== 补充:InnoDB缓冲池详情 ==========
INNODB_BP_DETAIL=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -e "SHOW ENGINE INNODB STATUS\G" 2>/dev/null | grep -A 20 "Buffer pool hit rate")
if [ -n "$INNODB_BP_DETAIL" ]; then
BP_HIT_RATE=$(echo "$INNODB_BP_DETAIL" | grep -oP 'hit rate \K[\d/]+' | head -1)
if [ -n "$BP_HIT_RATE" ]; then
echo "INNODB_BP_DETAIL:HitRate:${BP_HIT_RATE}"
else
echo "INNODB_BP_DETAIL:N/A"
fi
else
echo "INNODB_BP_DETAIL:N/A"
fi
# ========== 补充:锁信息详情 ==========
LOCK_INFO=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -N -e "
SELECT
COUNT(*) as lock_waits,
IFNULL(SUM(TIMESTAMPDIFF(SECOND, r.trx_started, NOW())), 0) as total_wait_time
FROM information_schema.INNODB_LOCK_WAITS w
JOIN information_schema.INNODB_TRX r ON r.trx_id = w.requesting_trx_id;
" 2>/dev/null)
if [ -n "$LOCK_INFO" ]; then
LOCK_COUNT=$(echo "$LOCK_INFO" | awk -F'\t' '{print $1}')
WAIT_TIME=$(echo "$LOCK_INFO" | awk -F'\t' '{print $2}')
echo "LOCK_DETAIL:Waits:${LOCK_COUNT},TotalWait:${WAIT_TIME}s"
else
echo "LOCK_DETAIL:Waits:0,TotalWait:0s"
fi
# ========== 补充:表碎片检测详情 ==========
FRAGMENTED_TABLES_DETAIL=$(docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -N -e "
SELECT
CONCAT(table_schema, '.', table_name) as table_name,
ROUND(data_length / 1024 / 1024, 2) as data_mb,
ROUND(data_free / 1024 / 1024, 2) as fragment_mb
FROM information_schema.TABLES
WHERE TABLE_SCHEMA NOT IN ('information_schema','performance_schema','mysql','sys')
AND DATA_FREE > 10485760
ORDER BY data_free DESC
LIMIT 5;
" 2>/dev/null | grep -v "fragment_mb")
if [ -n "$FRAGMENTED_TABLES_DETAIL" ]; then
FRAG_COUNT=$(echo "$FRAGMENTED_TABLES_DETAIL" | wc -l)
TOP_FRAG=$(echo "$FRAGMENTED_TABLES_DETAIL" | head -1)
if [ -n "$TOP_FRAG" ]; then
TABLE_NAME=$(echo "$TOP_FRAG" | awk '{print $1}')
FRAG_MB=$(echo "$TOP_FRAG" | awk '{print $3}')
echo "FRAGMENTED_DETAIL:Count:${FRAG_COUNT},Top1:${TABLE_NAME}:${FRAG_MB}MB"
else
echo "FRAGMENTED_DETAIL:Count:${FRAG_COUNT}"
fi
else
echo "FRAGMENTED_DETAIL:Count:0"
fi
#!/bin/bash
# Redis深度检测脚本
# 使用方法: ./redis_depth_check.sh
REDIS_PASSWORD="dNrprU&2S"
CONTAINER="uredis"
# 基础信息
REDIS_VERSION=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO server 2>&1 | grep redis_version | cut -d: -f2 | tr -d '\r')
if [ -n "$REDIS_VERSION" ]; then
echo "REDIS_VERSION:$REDIS_VERSION"
else
echo "REDIS_VERSION:N/A"
fi
# 运行时间(天)
UPTIME_DAYS=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO server 2>&1 | grep uptime_in_days | cut -d: -f2 | tr -d '\r')
if [ -n "$UPTIME_DAYS" ]; then
echo "UPTIME_DAYS:$UPTIME_DAYS"
else
echo "UPTIME_DAYS:N/A"
fi
# 键数量
KEY_COUNT=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" DBSIZE 2>&1 | grep -v "Warning")
if [ -n "$KEY_COUNT" ] && [[ "$KEY_COUNT" =~ ^[0-9]+$ ]]; then
echo "KEY_COUNT:$KEY_COUNT"
else
echo "KEY_COUNT:0"
fi
# 内存使用
MEMORY_INFO=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO memory 2>&1 | grep -E "used_memory_human:|mem_fragmentation_ratio:" | tr -d '\r' | tr '\n' '|' | sed 's/|$//')
if [ -n "$MEMORY_INFO" ]; then
echo "MEMORY_INFO:$MEMORY_INFO"
else
echo "MEMORY_INFO:N/A"
fi
# 客户端连接数
CLIENT_COUNT=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO clients 2>&1 | grep connected_clients | cut -d: -f2 | tr -d '\r')
if [ -n "$CLIENT_COUNT" ]; then
echo "CLIENT_COUNT:$CLIENT_COUNT"
else
echo "CLIENT_COUNT:0"
fi
# ========== 高优先级功能补充 ==========
# Keyspace信息(各数据库键和过期键统计)
KEYSPACE_INFO=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO keyspace 2>&1 | grep -v "Warning" | grep "^db" | tr -d '\r')
if [ -n "$KEYSPACE_INFO" ]; then
TOTAL_KEYS=0
TOTAL_EXPIRES=0
while IFS=: read -r db_key info; do
if [[ "$info" =~ keys=([0-9]+) ]]; then
keys=${BASH_REMATCH[1]}
TOTAL_KEYS=$((TOTAL_KEYS + keys))
fi
if [[ "$info" =~ expires=([0-9]+) ]]; then
expires=${BASH_REMATCH[1]}
TOTAL_EXPIRES=$((TOTAL_EXPIRES + expires))
fi
done <<< "$KEYSPACE_INFO"
echo "KEYSPACE_DETAIL:Total:$TOTAL_KEYS,Expires:$TOTAL_EXPIRES"
else
echo "KEYSPACE_DETAIL:N/A"
fi
# 键类型分布采样(采样前100个键)
KEY_TYPE_SAMPLE=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" --scan --count 100 2>&1 | head -100 | xargs -I {} docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" TYPE {} 2>&1 | grep -v "Warning" | sort | uniq -c | awk '{print $2":"$1}' | tr '\n' '|' | sed 's/|$//' | sed 's/ /:/g')
if [ -n "$KEY_TYPE_SAMPLE" ]; then
echo "KEY_TYPE_DISTRIBUTION:$KEY_TYPE_SAMPLE"
else
echo "KEY_TYPE_DISTRIBUTION:N/A"
fi
# ========== 中优先级功能补充 ==========
# 持久化信息
PERSISTENCE_INFO=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO persistence 2>&1 | grep -v "Warning" | grep -E "rdb_last_cow_size:|aof_enabled:|rdb_last_save_time:" | tr -d '\r' | tr '\n' '|' | sed 's/|$//')
if [ -n "$PERSISTENCE_INFO" ]; then
# 解析持久化状态
RDB_COW=$(echo "$PERSISTENCE_INFO" | grep -oP 'rdb_last_cow_size:\K\d+' | head -1)
AOF_ENABLED=$(echo "$PERSISTENCE_INFO" | grep -oP 'aof_enabled:\K\d+' | head -1)
RDB_STATUS="空闲"
if [ "$RDB_COW" -gt 0 ] 2>/dev/null; then
RDB_STATUS="备份中"
fi
AOF_STATUS="未启用"
if [ "$AOF_ENABLED" = "1" ]; then
AOF_STATUS="已启用"
fi
echo "PERSISTENCE_STATUS:RDB:$RDB_STATUS,AOF:$AOF_STATUS"
else
echo "PERSISTENCE_STATUS:N/A"
fi
# 复制信息
REPL_INFO=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO replication 2>&1 | grep -v "Warning" | grep -E "role:|connected_slaves:|master_link_status:" | tr -d '\r' | tr '\n' '|' | sed 's/|$//')
if [ -n "$REPL_INFO" ]; then
ROLE=$(echo "$REPL_INFO" | grep -oP 'role:\K\w+' | head -1)
CONNECTED_SLAVES=$(echo "$REPL_INFO" | grep -oP 'connected_slaves:\K\d+' | head -1)
MASTER_LINK=$(echo "$REPL_INFO" | grep -oP 'master_link_status:\K\w+' | head -1)
if [ "$ROLE" = "master" ]; then
echo "REPLICATION_STATUS:Role:$ROLE,Slaves:${CONNECTED_SLAVES:-0}"
elif [ "$ROLE" = "slave" ]; then
echo "REPLICATION_STATUS:Role:$ROLE,MasterLink:${MASTER_LINK:-unknown}"
else
echo "REPLICATION_STATUS:Role:$ROLE"
fi
else
echo "REPLICATION_STATUS:N/A"
fi
# 慢日志TOP10
SLOW_LOG=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" SLOWLOG GET 10 2>&1 | grep -v "Warning" | head -40)
if [ -n "$SLOW_LOG" ]; then
# SLOWLOG输出格式:每4行一个记录 (ID, timestamp, duration, command)
# 我们需要获取所有记录的第3行(持续时间)
SLOW_COUNT=0
SLOWEST_TIME=0
line_num=0
while IFS= read -r line; do
line_num=$((line_num + 1))
# 每第3行是持续时间
if [ $((line_num % 4)) -eq 3 ]; then
if [[ "$line" =~ ^[0-9]+$ ]]; then
duration=$line
if [ $duration -gt $SLOWEST_TIME ]; then
SLOWEST_TIME=$duration
fi
SLOW_COUNT=$((SLOW_COUNT + 1))
fi
fi
done <<< "$SLOW_LOG"
if [ $SLOW_COUNT -gt 0 ]; then
echo "SLOW_LOG_TOP10:Count:$SLOW_COUNT,Slowest:${SLOWEST_TIME}us"
else
echo "SLOW_LOG_TOP10:Count:0"
fi
else
echo "SLOW_LOG_TOP10:Count:0"
fi
# 命令统计TOP5
CMD_STATS=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO commandstats 2>&1 | grep -v "Warning" | grep "cmdstat_" | tr -d '\r')
if [ -n "$CMD_STATS" ]; then
# 提取命令调用次数TOP5
TOP_COMMANDS=$(echo "$CMD_STATS" | grep -oP 'cmdstat_\K[a-z]+(?=:calls=\d+)' | head -5 | tr '\n' ',' | sed 's/,$//')
TOP_CALLS=$(echo "$CMD_STATS" | grep -oP 'cmdstat_[a-z]+:calls=\K\d+' | head -5 | tr '\n' ',' | sed 's/,$//')
if [ -n "$TOP_COMMANDS" ]; then
# 组合命令和调用次数
CMD_DETAIL=""
IFS=',' read -ra CMDS <<< "$TOP_COMMANDS"
IFS=',' read -ra CALLS <<< "$TOP_CALLS"
for i in "${!CMDS[@]}"; do
if [ -n "$CMD_DETAIL" ]; then
CMD_DETAIL="$CMD_DETAIL,"
fi
CMD_DETAIL="$CMD_DETAIL${CMDS[$i]}:${CALLS[$i]}"
done
echo "COMMAND_STATS_TOP5:$CMD_DETAIL"
else
echo "COMMAND_STATS_TOP5:N/A"
fi
else
echo "COMMAND_STATS_TOP5:N/A"
fi
# 客户端列表摘要
CLIENT_LIST=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" CLIENT LIST 2>&1 | grep -v "Warning" | grep "id=" | tr -d '\r')
if [ -n "$CLIENT_LIST" ]; then
TOTAL_CLIENTS=$(echo "$CLIENT_LIST" | wc -l)
IDLE_CLIENTS=0
BLOCKING_CLIENTS=0
while IFS='=' read -r key value; do
if [[ "$value" =~ idle=([0-9]+) ]]; then
idle_time=${BASH_REMATCH[1]}
if [ "$idle_time" -gt 300 ]; then
IDLE_CLIENTS=$((IDLE_CLIENTS + 1))
fi
fi
if [[ "$value" =~ blocking=1 ]]; then
BLOCKING_CLIENTS=$((BLOCKING_CLIENTS + 1))
fi
done <<< "$CLIENT_LIST"
echo "CLIENT_DETAIL:Total:$TOTAL_CLIENTS,IdleOver5min:$IDLE_CLIENTS,Blocking:$BLOCKING_CLIENTS"
else
echo "CLIENT_DETAIL:N/A"
fi
# 缓存命中率
STATS_INFO=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" INFO stats 2>&1 | grep -v "Warning" | grep -E "keyspace_hits:|keyspace_misses:|rejected_connections:" | tr -d '\r')
if [ -n "$STATS_INFO" ]; then
HITS=$(echo "$STATS_INFO" | grep keyspace_hits | cut -d: -f2)
MISSES=$(echo "$STATS_INFO" | grep keyspace_misses | cut -d: -f2)
REJECTED=$(echo "$STATS_INFO" | grep rejected_connections | cut -d: -f2)
if [ -n "$HITS" ] && [ -n "$MISSES" ]; then
TOTAL_REQ=$((HITS + MISSES))
if [ "$TOTAL_REQ" -gt 0 ]; then
HIT_RATE=$(awk "BEGIN {printf \"%.2f\", ($HITS * 100) / $TOTAL_REQ}")
else
HIT_RATE="0.00"
fi
echo "CACHE_HIT_RATE:Hits:$HITS,Misses:$MISSES,Rate:$HIT_RATE%"
else
echo "CACHE_HIT_RATE:N/A"
fi
if [ -n "$REJECTED" ]; then
echo "REJECTED_CONNECTIONS:$REJECTED"
fi
else
echo "CACHE_HIT_RATE:N/A"
echo "REJECTED_CONNECTIONS:0"
fi
# 配置检查
CONFIG_MAXCLIENTS=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" CONFIG GET maxclients 2>&1 | grep -v "Warning" | tail -1 | tr -d '\r')
CONFIG_TIMEOUT=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" CONFIG GET timeout 2>&1 | grep -v "Warning" | tail -1 | tr -d '\r')
CONFIG_SAVE=$(docker exec $CONTAINER redis-cli -a "$REDIS_PASSWORD" CONFIG GET save 2>&1 | grep -v "Warning" | tail -1 | tr -d '\r')
echo "CONFIG_CHECK:MaxClients:$CONFIG_MAXCLIENTS,Timeout:${CONFIG_TIMEOUT}s,Save:$CONFIG_SAVE"
# MySQL深度检测未实现功能清单
**生成时间**: 2026-05-09
**对比文档**:
- 需求: `Docs/PRD/服务自检/新需求/_PRD_服务器监测需求_规整版_需求文档.md`
- 实现: `AuxiliaryTool/ScriptTool/新服务自检/mysql_depth_check.sh`
---
## 一、已实现功能列表 ✅
### 基础状态检测
- ✅ MySQL版本和运行状态 (在主脚本中实现)
- ✅ 连接数统计 (当前/历史最大/最大连接数)
- ✅ 慢查询数量
- **数据库列表** (需补充)
### 连接池分析
- ✅ 连接池分析 (活跃/空闲连接数 - THREADS_POOL)
### 性能统计
- ✅ QPS统计
- ✅ TPS统计
### InnoDB状态
- ✅ InnoDB状态摘要 (简化版)
- **InnoDB状态详情** (需补充: SHOW ENGINE INNODB STATUS完整输出)
### InnoDB缓冲池
- ✅ InnoDB缓冲池大小
- ✅ Buffer Pool命中率
### 事务状态
- ✅ 事务状态统计 (TRX_ACTIVE)
### 锁信息
- ✅ 锁等待检测
- ✅ 死锁检测
### 表统计
- **表统计 (ubains库TOP20)** (需补充)
- ✅ 表数量统计
- ✅ 表碎片检测
### 当前活跃查询
- ✅ 当前活跃查询数量
- **当前活跃查询详情** (需补充: SHOW PROCESSLIST详细信息)
### Binlog状态
- ✅ Binlog状态
- ✅ Binlog文件数量
- ✅ Binlog过期时间
### 复制状态
- ✅ 复制状态 (简化版: MASTER/SLAVE)
- **复制状态详情** (需补充: 延迟、偏移量等)
### 连接错误统计
- ✅ 连接错误统计
### 其他
- ✅ 临时表使用率
- ✅ 数据库总大小
- ✅ 表缓存命中率
---
## 二、未实现或需增强的功能 ❌
### 1. 数据库列表详细输出 (优先级: 中)
**需求**: 显示所有数据库列表及其大小
**当前状态**: 未实现
**实现方式**:
```bash
docker exec $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "SELECT schema_name, ROUND(sum(data_length + index_length) / 1024 / 1024, 2) as size_mb FROM information_schema.tables GROUP BY schema_name ORDER BY size_mb DESC;"
```
---
### 2. InnoDB状态详情 (优先级: 中)
**需求**: 获取InnoDB引擎运行状态的完整信息
**当前状态**: 简化版实现(只检测状态行数)
**需要补充**:
- 事务历史信息
- 死锁详情
- 缓冲池状态
- 检查点状态
**实现方式**:
```bash
docker exec $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "SHOW ENGINE INNODB STATUS\G"
```
---
### 3. 当前活跃查询详情 (优先级: 高)
**需求**: 显示当前正在执行的查询详情(SQL语句、执行时间、状态等)
**当前状态**: 只统计数量
**需要补充**:
- 完整的SHOW PROCESSLIST输出
- 按执行时间排序
- 锁定的事务信息
**实现方式**:
```bash
docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -e "SHOW PROCESSLIST;" 2>/dev/null
```
---
### 4. MySQL变量配置详情 (优先级: 低)
**需求**: 显示MySQL关键配置参数
**当前状态**: 未实现
**重要参数**:
- innodb_buffer_pool_size
- max_connections
- query_cache_size
- log_slow_queries
- long_query_time
- sync_binlog
- innodb_flush_log_at_trx_commit
**实现方式**:
```bash
docker exec $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "SHOW VARIABLES;" | grep -E 'innodb_buffer_pool_size|max_connections|query_cache|slow_queries|long_query_time|sync_binlog|flush_log_at_trx'
```
---
### 5. 缺少索引的高耗时查询 (优先级: 高)
**需求**: 通过performance_schema分析缺少索引的高耗时查询
**当前状态**: 未实现
**实现方式**:
```bash
# 查询执行次数多但执行时间长的查询
docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "
SELECT
digest,
schema_name,
table_name,
count_star as exec_count,
round(sum_timer_wait/1000000000, 2) as total_time_sec,
round(avg_timer_wait/1000000000, 2) as avg_time_sec,
digest_text as query_sample
FROM performance_schema.events_statements_summary_by_digest
WHERE digest_text NOT LIKE '%performance_schema%'
AND count_star > 10
AND sum_timer_wait > 1000000000
ORDER BY sum_timer_wait DESC
LIMIT 20;
"
```
---
### 6. 表统计 (ubains库TOP20) (优先级: 中)
**需求**: 显示ubains库中TOP20表的统计信息
**当前状态**: 未实现
**实现方式**:
```bash
docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "
SELECT
table_name,
table_rows,
round(data_length / 1024 / 1024, 2) as data_mb,
round(index_length / 1024 / 1024, 2) as index_mb,
round((data_length + index_length) / 1024 / 1024, 2) as total_mb,
round(data_free / 1024 / 1024, 2) as data_free_mb,
engine,
table_collation
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = 'ubains'
ORDER BY (data_length + index_length) DESC
LIMIT 20;
"
```
---
### 7. 复制状态详情 (优先级: 中)
**需求**: 主从复制的详细状态信息
**当前状态**: 简化版实现(只判断MASTER/SLAVE)
**需要补充**:
- Slave_IO_Running状态
- Slave_SQL_Running状态
- Seconds_Behind_Master
- Relay_Log_Pos
- Exec_Master_Log_Pos
- 复制延迟
**实现方式**:
```bash
docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "SHOW SLAVE STATUS\G"
```
---
### 8. 连接错误详细统计 (优先级: 中)
**需求**: 按错误类型分类统计连接错误
**当前状态**: 总数统计
**需要补充**:
- Access denied错误
- Client does not authenticate
- Too many connections
- Lost connection to MySQL
**实现方式**:
```bash
docker exec -e MYSQL_PWD="$PASSWORD" $CONTAINER mysql -uroot -p"$PASSWORD" -e "
SELECT
error_code,
count_star as error_count
FROM performance_schema.events_statements_summary_by_error_by_error
WHERE error_code IS NOT NULL
AND count_star > 0
ORDER BY error_count DESC;
"
```
---
## 三、按优先级分类
### 高优先级 (2项)
1. **当前活跃查询详情** - 诊断慢查询和锁等待问题
2. **缺少索引的高耗时查询** - 优化数据库性能
### 中优先级 (5项)
1. InnoDB状态详情 - 深入了解InnoDB运行状态
2. 数据库列表详细输出 - 了解各数据库大小分布
3. 表统计(ubains库TOP20) - 识别大表
4. 复制状态详情 - 监控主从复制健康度
5. 连接错误详细统计 - 诊断连接问题
### 低优先级 (1项)
1. MySQL变量配置详情 - 配置审查
---
## 四、实现建议
### 短期方案(立即实施)
1. **实现当前活跃查询详情**
- 完整输出SHOW PROCESSLIST
- 按执行时间排序
- 识别长时间运行的查询
2. **实现缺少索引的高耗时查询**
- 通过performance_schema查询
- 输出TOP20耗时查询
- 提供优化建议
### 中期方案(逐步完善)
1. **实现InnoDB状态详情**
- 解析SHOW ENGINE INNODB STATUS输出
- 提取关键指标(事务、死锁、缓冲池)
2. **实现表统计(ubains库TOP20)**
- 统计表大小、行数、索引大小
- 按总大小排序
3. **增强复制状态检测**
- 检测复制延迟
- 监控复制线程状态
### 长期方案(架构优化)
1. **性能趋势分析**
- 记录历史QPS/TPS数据
- 绘制性能趋势图
- 预测容量瓶颈
2. **自动化优化建议**
- 基于慢查询日志自动生成索引建议
- 基于连接数趋势建议调整连接池配置
- 基于InnoDB状态建议优化参数
---
**文档结束**
# 依赖工具缺失清单
**生成时间**: 2026-05-09
**相关文档**: `_PRD_未实现功能清单.md`
---
## 一、系统工具依赖缺失
### 1. mpstat (CPU多核心统计)
**缺少工具**: `sysstat` 包中的 `mpstat`
**影响功能**:
- CPU每核心使用率检测
- 中断统计(部分功能)
- 软中断统计(部分功能)
**检测命令**:
```bash
mpstat -P ALL 1 1
```
**安装方式**:
```bash
# CentOS/RHEL
yum install sysstat
# Ubuntu/Debian
apt-get install sysstat
# openEuler
dnf install sysstat
```
**优先级**: 中
---
### 2. iostat (磁盘IO统计)
**缺少工具**: `sysstat` 包中的 `iostat`
**影响功能**:
- 磁盘IO状态检测
- 磁盘读写速率统计
- IO等待时间统计
**检测命令**:
```bash
iostat -x 1 1
```
**安装方式**:
```bash
# CentOS/RHEL
yum install sysstat
# Ubuntu/Debian
apt-get install sysstat
# openEuler
dnf install sysstat
```
**优先级**: 中
---
### 3. smartctl (磁盘SMART健康检测)
**缺少工具**: `smartmontools` 包中的 `smartctl`
**影响功能**:
- 磁盘SMART健康状态检测
- 磁盘预测性故障分析
- 磁盘寿命评估
**检测命令**:
```bash
smartctl -H /dev/sda
smartctl -a /dev/sda
```
**安装方式**:
```bash
# CentOS/RHEL
yum install smartmontools
# Ubuntu/Debian
apt-get install smartmontools
# openEuler
dnf install smartmontools
```
**优先级**: 中
---
### 4. numactl (NUMA架构信息)
**缺少工具**: `numactl` 包中的 `numactl`
**影响功能**:
- NUMA架构信息检测
- 内存节点分配策略
- CPU与内存节点的绑定关系
**检测命令**:
```bash
numactl --hardware
```
**安装方式**:
```bash
# CentOS/RHEL
yum install numactl
# Ubuntu/Debian
apt-get install numactl
# openEuler
dnf install numactl
```
**优先级**: 低
---
### 5. lsof (文件描述符和网络连接检测)
**缺少工具**: `lsof`
**影响功能**:
- 进程打开的网络连接检测
- 按进程统计打开文件数
- 按进程统计网络连接数
- 文件描述符泄漏检测
**检测命令**:
```bash
lsof -i -P -n
lsof -p <PID>
```
**安装方式**:
```bash
# CentOS/RHEL
yum install lsof
# Ubuntu/Debian
apt-get install lsof
# openEuler
dnf install lsof
```
**优先级**: 中
---
## 二、JVM检测工具依赖缺失
### 6. jstat (JVM统计监控)
**缺少工具**: JDK 中的 `jstat`
**影响功能**:
- JVM堆内存使用情况
- GC统计(Full GC次数、GC时间)
- 类加载统计
- JIT编译统计
- 新生代/老年代内存使用
**检测命令**:
```bash
jstat -gc <PID> 1000
jstat -gcutil <PID> 1000
jstat -class <PID>
jstat -compiler <PID>
```
**安装方式**:
```bash
# 安装JDK(如果容器内没有)
yum install java-1.8.0-openjdk-devel
# 或在容器内通过环境变量映射主机JDK
docker run -v /usr/bin/jstat:/usr/bin/jstat ...
```
**优先级**: 高
---
### 7. jmap (JVM内存映射)
**缺少工具**: JDK 中的 `jmap`
**影响功能**:
- 堆内存详情
- 堆对象统计
- 内存泄漏分析
**检测命令**:
```bash
jmap -heap <PID>
jmap -histo:live <PID>
```
**安装方式**:
```bash
# 安装JDK
yum install java-1.8.0-openjdk-devel
```
**优先级**: 中
---
## 三、替代方案建议
### 1. sysstat 工具包替代方案
由于 `mpstat``iostat` 都来自 `sysstat` 包,只需安装一次即可:
```bash
# 一键安装脚本
#!/bin/bash
if command -v dnf >/dev/null 2>&1; then
dnf install -y sysstat
elif command -v yum >/dev/null 2>&1; then
yum install -y sysstat
elif command -v apt-get >/dev/null 2>&1; then
apt-get update && apt-get install -y sysstat
else
echo "无法识别包管理器"
exit 1
fi
```
### 2. /proc 文件系统替代方案
对于部分缺少工具的功能,可以通过读取 `/proc` 文件系统实现:
| 功能 | 原工具 | /proc 替代方案 |
|:---|:---|:---|
| CPU每核心使用率 | mpstat | `/proc/stat` |
| 中断统计 | mpstat | `/proc/interrupts` |
| 软中断统计 | mpstat | `/proc/softirqs` |
| 磁盘IO统计 | iostat | `/proc/diskstats` |
| 内存详细信息 | numactl | `/proc/meminfo` |
| 进程文件描述符 | lsof | `/proc/<PID>/fd` |
| 进程网络连接 | lsof | `/proc/<PID>/fd` + 读取link |
### 3. JVM检测替代方案
对于容器内的JVM检测,可以:
1. **使用/proc文件系统**(已实现部分功能)
- 读取 `/proc/<PID>/status` 获取内存信息
- 读取 `/proc/<PID>/stat` 获取线程信息
2. **使用jcmd工具**(如果可用)
```bash
jcmd <PID> VM.flags
jcmd <PID> GC.heap_info
jcmd <PID> Thread.print
```
3. **通过JMX端口监控**
- 容器启动时暴露JMX端口
- 使用 `jconsole``jvisualvm` 连接监控
---
## 四、工具依赖汇总表
| 工具 | 包名 | 影响功能数 | 优先级 | 建议 |
|:---|:---|:---|:---|---|
| mpstat | sysstat | 3 | 中 | 安装或使用/proc/stat替代 |
| iostat | sysstat | 1 | 中 | 安装或使用/proc/diskstats替代 |
| smartctl | smartmontools | 1 | 中 | 可选,主要用于物理服务器 |
| numactl | numactl | 1 | 低 | 可选,主要用于NUMA架构服务器 |
| lsof | lsof | 3 | 中 | 安装或使用/proc/<PID>/fd替代 |
| jstat | java-devel | 4 | 高 | 安装JDK或使用/proc替代方案 |
| jmap | java-devel | 1 | 中 | 安装JDK或使用jcmd替代 |
---
## 五、实施建议
### 短期方案(立即可实施)
1. **实现/proc文件系统读取**
- 编写Bash函数读取 `/proc/stat` 实现 CPU每核心使用率
- 编写Bash函数读取 `/proc/diskstats` 实现磁盘IO统计
- 编写Bash函数读取 `/proc/<PID>/fd` 实现文件描述符检测
2. **完善JVM/proc检测**
- 当前已实现基础JVM内存检测
- 增强GC统计功能(通过解析日志)
### 中期方案(需要环境配置)
1. **在基础镜像中安装必要工具**
- 在Dockerfile中添加 `sysstat` 安装
- 在Java容器中保留JDK完整工具
2. **提供自动化安装脚本**
- 在服务自检脚本运行前检测工具是否存在
- 如不存在,提供自动安装选项
### 长期方案(架构优化)
1. **使用agent方式采集数据**
- 在容器内部署轻量级agent
- 通过agent采集主机/容器指标
2. **使用Prometheus/Grafana监控**
- 部署exporter采集各类指标
- 统一监控平台展示
---
## 六、附录:工具检测脚本
```bash
#!/bin/bash
# 工具依赖检测脚本
echo "========== 服务自检工具依赖检测 =========="
tools=(
"mpstat:sysstat:中"
"iostat:sysstat:中"
"smartctl:smartmontools:中"
"numactl:numactl:低"
"lsof:lsof:中"
"jstat:java-1.8.0-openjdk-devel:高"
"jmap:java-1.8.0-openjdk-devel:中"
)
missing_tools=()
missing_packages=()
for item in "${tools[@]}"; do
IFS=':' read -r tool package priority <<< "$item"
if command -v "$tool" >/dev/null 2>&1; then
echo "✅ $tool ($package) - 已安装 (优先级: $priority)"
else
echo "❌ $tool ($package) - 未安装 (优先级: $priority)"
missing_tools+=("$tool")
missing_packages+=("$package")
fi
done
echo ""
echo "========== 总结 =========="
echo "缺失工具数量: ${#missing_tools[@]}"
echo "缺失工具: ${missing_tools[*]}"
echo "需要安装的包: ${missing_packages[*]}"
if [ ${#missing_tools[@]} -gt 0 ]; then
echo ""
echo "========== 安装建议 =========="
echo "执行以下命令安装缺失的工具:"
echo ""
echo "# CentOS/RHEL/openEuler"
echo "yum install -y ${missing_packages[*]}"
echo ""
echo "# Ubuntu/Debian"
echo "apt-get update && apt-get install -y ${missing_packages[*]}"
fi
```
---
**文档结束**
# 服务自检功能实现总结
**更新时间**: 2026-05-09
**更新范围**: JVM、MySQL、EMQX、Redis检测功能增强
---
## 一、本次实现功能清单
### JVM检测增强 ✅
1. **GC统计详情**
- Full GC次数统计
- Young GC次数统计
- GC原因分析(从日志中提取主要原因)
- 总GC时间统计
- 状态判断:Full GC >5次警告,>20次严重
2. **JVM堆内存配置解析**
- 解析-Xms(初始堆大小)
- 解析-Xmx(最大堆大小)
- 解析-XX:NewRatio(新生代比例)
- 解析-XX:SurvivorRatio(Survivor比例)
- 解析-XX:MetaspaceSize/MaxMetaspaceSize(元空间大小)
### MySQL深度检测增强 ✅
1. **当前活跃查询详情**
- Sleep连接数统计
- Active查询数统计
- LongRunning查询数统计(执行时间>5秒)
- TOP5耗时查询列表(含ID、USER、DB、TIME、INFO)
2. **缺少索引的高耗时查询**
- performance_schema慢查询统计
- 执行次数、总时间、平均时间、锁时间分析
- 大表(>1000行)缺少索引检测
3. **数据库列表详细输出**
- 各数据库大小统计
- 数据库数量和总大小
4. **InnoDB事务详情**
- 当前活跃事务数
- 事务总运行时间
5. **表统计(ubains库TOP20)**
- 表名、行数、数据大小、索引大小、总大小
- TOP1表的信息
6. **复制状态详情**
- Slave_IO_Running状态
- Slave_SQL_Running状态
- Seconds_Behind_Master延迟
7. **连接错误详细统计**
- 错误类型数量
- 总错误次数
### EMQX检测增强 ✅
1. **客户端连接详情**
- 认证失败统计
- 连接失败统计
- 状态判断:>0次警告
2. **消息丢弃详情**
- 总丢弃数量统计
- 过期丢弃统计
- 无订阅者丢弃统计
- Pubrel超时丢弃统计
- 状态判断:>10条警告,>100条严重
### Redis检测增强 ✅
1. **Keyspace信息**
- 总键数统计
- 过期键数统计
- 过期比例计算
2. **键类型分布采样**
- 采样100个键分析类型分布
- string/hash/list/set/zset/stream类型统计
---
## 二、文件变更清单
| 文件路径 | 变更类型 | 变更内容 |
|:---|:---|:---|
| AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1 | 增强 | JVM GC统计、JVM堆内存配置解析 |
| AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1 | 增强 | EMQX客户端连接详情、消息丢弃详情 |
| AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1 | 增强 | Redis Keyspace信息、键类型分布采样 |
| AuxiliaryTool/ScriptTool/新服务自检/mysql_depth_check.sh | 增强 | 活跃查询详情、慢查询统计 |
| AuxiliaryTool/ScriptTool/新服务自检/mysql_depth_check.sh | 增强 | 数据库列表、InnoDB事务详情、表统计、复制详情、连接错误详情 |
| AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1 | 增强 | MySQL新输出项解析 |
---
## 三、剩余未实现功能
### 高优先级(已清零)
### 中优先级(4项)
1. **MySQL变量配置详情** - innodb_buffer_pool_size、max_connections等关键参数
2. **JVM类加载统计** - jstat -class(需要jstat工具)
3. **JVM JIT编译统计** - jstat -compiler(需要jstat工具)
4. **JVM线程状态分析** - BLOCKED/WAITING线程分析
### 低优先级(若干项)
- 系统工具相关(mpstat、iostat、smartctl等)- 可用/proc替代
- 性能趋势分析 - 需要历史数据存储
- 自动化优化建议 - 需要规则引擎
---
## 四、技术实现要点
### JVM检测技术方案
- **问题**: ujava2容器内未安装jstat/jmap工具
- **方案**: 通过解析Docker日志和JVM启动参数获取信息
- **优势**: 无需修改容器镜像,完全兼容现有环境
### MySQL深度检测技术方案
- **问题**: 需要详细的性能和状态信息
- **方案**: 使用performance_schema和information_schema系统库
- **优势**: 标准SQL查询,兼容MySQL 5.7+
### EMQX检测技术方案
- **问题**: 需要详细的连接和消息统计
- **方案**: 使用emqx_ctl metrics命令获取指标
- **优势**: 命令行工具,无需API认证
### Redis检测技术方案
- **问题**: 需要键类型分布信息
- **方案**: 使用SCAN命令采样分析,避免KEYS命令阻塞
- **优势**: 对生产环境影响小
---
## 五、测试建议
1. **JVM检测**: 确保Docker日志中包含GC信息
2. **MySQL检测**: 确保performance_schema已启用
3. **EMQX检测**: 确保emqx_ctl命令可用
4. **Redis检测**: 确保有足够的键用于采样分析
---
**文档结束**
# 功能实现状态更新
**更新时间**: 2026-05-09
**更新内容**: 本次更新实现了多个高优先级功能
---
## 一、已实现的高优先级功能 ✅
### 1. JVM堆内存详情检测
**实现位置**: `AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1` (Test-JavaApplication函数)
**已实现功能**:
- ✅ GC统计详情(Young/Full GC次数统计)
- ✅ GC原因分析(从日志中提取Full GC主要原因)
- ✅ GC时间统计(总GC时间)
- ✅ JVM堆内存配置解析(Xmx/Xms/NewRatio/SurvivorRatio/MetaspaceSize等)
**实现方式**:
- 通过解析Docker日志中的GC信息获取GC统计
- 通过解析JVM启动参数获取堆内存配置
- 无需jstat/jmap工具(容器内不可用)
**检测项**:
- GC统计: Full GC次数, Young GC次数, 总GC时间
- GC状态: 根据Full GC次数判断严重程度(>5次警告,>20次严重)
- JVM堆内存配置: 最大堆、新生代比例、Metaspace大小等
---
### 2. MySQL当前活跃查询详情
**实现位置**: `AuxiliaryTool/ScriptTool/新服务自检/mysql_depth_check.sh`
**已实现功能**:
- ✅ SHOW PROCESSLIST详情解析
- ✅ 活跃查询统计(Sleep/Active/LongRunning)
- ✅ TOP5耗时查询列表
**实现方式**:
- 使用SHOW PROCESSLIST获取当前所有连接
- 按执行状态分类统计
- 按执行时间排序输出TOP5
**检测项**:
- ACTIVE_PROCESSLIST: Sleep数量, Active数量, LongRunning数量
- LONG_QUERIES_TOP5: 执行时间最长的5个查询
---
### 3. MySQL缺少索引的高耗时查询
**实现位置**: `AuxiliaryTool/ScriptTool/新服务自检/mysql_depth_check.sh`
**已实现功能**:
- ✅ performance_schema慢查询统计
- ✅ 执行次数、总时间、平均时间、锁时间分析
- ✅ 按总耗时排序输出TOP10
**实现方式**:
- 查询performance_schema.events_statements_summary_by_digest表
- 筛选执行次数>10且总耗时>1秒的查询
- 输出TOP1最耗时查询的详细统计
**检测项**:
- SLOW_QUERY_TOP1: 执行次数, 总时间, 平均时间, 锁时间
- TABLES_WITHOUT_INDEX: 大表(>1000行)缺少索引的表数量
---
### 4. EMQX客户端连接详情
**实现位置**: `AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1` (Test-EMQXStatus函数)
**已实现功能**:
- ✅ 客户端连接异常统计
- ✅ 认证失败统计
- ✅ 连接失败统计
**实现方式**:
- 使用emqx_ctl metrics获取连接相关指标
- 解析client.authenticate.error和client.connect.error
**检测项**:
- EMQX连接异常统计: 认证失败次数, 连接失败次数
- 状态判断: >0次警告
---
### 5. EMQX消息丢弃统计
**实现位置**: `AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1` (Test-EMQXStatus函数)
**已实现功能**:
- ✅ 消息丢弃总数统计
- ✅ 消息丢弃原因分类(过期、无订阅者、Pubrel超时等)
- ✅ 消息丢弃告警(>10条警告,>100条严重)
**实现方式**:
- 使用emqx_ctl metrics获取messages.dropped相关指标
- 按丢弃原因分类统计
**检测项**:
- EMQX消息丢弃详情: 总丢弃数量, 过期丢弃, 无订阅者丢弃, Pubrel超时丢弃
- 状态判断: >10条警告,>100条严重
---
### 6. Redis Keyspace和键类型统计
**实现位置**: `AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1` (Test-RedisStatus函数)
**已实现功能**:
- ✅ Keyspace信息(总键数、过期键数、过期比例)
- ✅ 键类型分布采样统计
**实现方式**:
- 使用INFO keyspace获取各数据库键统计
- 使用SCAN命令采样100个键,分析类型分布
**检测项**:
- Redis Keyspace: 总计键数, 过期键数, 过期比例
- Redis键类型分布: string/hash/list/set/zset/stream类型的数量
---
## 二、剩余未实现的高优先级功能 ❌
### 1. MySQL InnoDB状态详情
**需求**: 获取InnoDB引擎运行状态的完整信息
**当前状态**: 简化版实现(只检测状态行数)
**需要补充**: 事务历史信息、死锁详情、缓冲池状态、检查点状态
**实现方式**:
```bash
docker exec $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "SHOW ENGINE INNODB STATUS\G"
```
---
### 2. MySQL表统计(ubains库TOP20)
**需求**: 显示ubains库中TOP20表的统计信息
**当前状态**: 未实现
**实现方式**:
```bash
docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "
SELECT
table_name,
table_rows,
round(data_length / 1024 / 1024, 2) as data_mb,
round(index_length / 1024 / 1024, 2) as index_mb,
round((data_length + index_length) / 1024 / 1024, 2) as total_mb,
round(data_free / 1024 / 1024, 2) as data_free_mb,
engine,
table_collation
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = 'ubains'
ORDER BY (data_length + index_length) DESC
LIMIT 20;
"
```
---
### 3. MySQL复制状态详情
**需求**: 主从复制的详细状态信息
**当前状态**: 简化版实现(只判断MASTER/SLAVE)
**需要补充**: Slave_IO_Running、Slave_SQL_Running、Seconds_Behind_Master、复制延迟
**实现方式**:
```bash
docker exec -e MYSQL_PWD="$MYSQL_PASSWORD" $CONTAINER mysql -uroot -p"$MYSQL_PASSWORD" -e "SHOW SLAVE STATUS\G"
```
---
### 4. MySQL连接错误详细统计
**需求**: 按错误类型分类统计连接错误
**当前状态**: 总数统计
**需要补充**: Access denied、Client does not authenticate、Too many connections、Lost connection
---
## 三、剩余未实现的中优先级功能
### 1. MySQL变量配置详情
### 2. JVM类加载统计 (jstat -class)
### 3. JVM JIT编译统计 (jstat -compiler)
### 4. JVM线程状态分析 (BLOCKED/WAITING)
---
## 四、总结
**本次更新实现**: 6项高优先级功能
**剩余高优先级**: 4项
**剩余中优先级**: 4项
**文件变更**:
- `AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1` - JVM/EMQX/Redis增强
- `AuxiliaryTool/ScriptTool/新服务自检/mysql_depth_check.sh` - MySQL深度检测增强
---
**文档结束**
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论