提交 577617f0 authored 作者: 陈泽健's avatar 陈泽健

fix(server): 修复服务器健康检测脚本的内存监控和日志记录问题

- 优化内存检测逻辑,使用更可靠的 free 命令路径查找机制
- 修复内存使用率计算公式,确保准确统计内存占用情况
- 移除已弃用的回退机制,简化内存检测流程
- 添加防火墙状态检测的日志输出,提供更详细的防火墙信息
- 改进文件下载功能,增加 pscp 命令的错误处理和调试信息
- 优化容器信息显示格式,添加分隔线提升可读性
- 统一配置文件检测的日志格式,改用 Write-Log 替代 Write-Host
- 修复磁盘空间检查的异常处理,避免因驱动器访问问题中断流程
- 添加各检测模块的分段标识日志,便于追踪检测进度
上级 31306b34
......@@ -1111,31 +1111,34 @@ function Test-ServerResources {
# 4. 检测内存使用情况
Write-Log -Level "INFO" -Message "检测内存使用情况..."
# 使用 available 计算已用:used = total - available;兼容不同 free 格式
$memCmd = @"
LC_ALL=C free -m | awk -v OFS=',' '
/^Mem:/ {
# 兼容:total used free shared buff/cache available
total = `$2
avail = `$7
if (avail == "" || avail == 0) {
# 某些 BusyBox/旧版 free 无 available,退化为: used = `$2 - `$4 (free)
avail = `$4
}
used = total - avail
pct = (total>0) ? (used/total)*100 : 0
printf "%.2f,%.2f,%.1f", total/1024, used/1024, pct
# 使用 free -m(任一路径),简单 awk 计算,避免复杂转义
$memCmd = @'
LC_ALL=C (
/usr/bin/free -m 2>/dev/null || /bin/free -m 2>/dev/null || free -m
) | awk -F"[[:space:]]+" '
$1 == "Mem:" {
total = $2;
used_field = $3;
free_field = $4;
buffcache = $6;
avail = $7;
# 如果 available 字段不存在或为 0,就用 used 字段
if (avail == "" || avail == 0) {
used = used_field;
} else {
used = total - avail;
}
pct = 0;
if (total > 0) { pct = used * 100 / total; }
printf "%.2f,%.2f,%.1f\n", total/1024, used/1024, pct;
}'
"@
'@
$memResult = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $memCmd
$memTotal = 0.0
$memUsed = 0.0
$memPercent = 0.0
$needFallback = ($memResult.ExitCode -ne 0 -or -not $memResult.Output)
if (-not $needFallback) {
$memTotal = 0.0; $memUsed = 0.0; $memPercent = 0.0
if ($memResult.ExitCode -eq 0 -and $memResult.Output) {
$line = ($memResult.Output -split "`n" | Where-Object { $_ -match '\S' } | Select-Object -First 1)
Write-Log -Level "INFO" -Message "free 输出(raw): $line"
if ($line) {
$parts = ($line.Trim() -replace "`r","") -split ','
if ($parts.Count -ge 3) {
......@@ -1144,67 +1147,62 @@ LC_ALL=C free -m | awk -v OFS=',' '
$memTotal = [double]::Parse($parts[0], $ci)
$memUsed = [double]::Parse($parts[1], $ci)
$memPercent = [double]::Parse($parts[2], $ci)
} catch { $needFallback = $true }
} else { $needFallback = $true }
} else { $needFallback = $true }
}
if ($memTotal -le 0 -or $memUsed -lt 0 -or $memPercent -le 0) { $needFallback = $true }
if ($needFallback) {
# Fallback: 用 /proc/meminfo 计算 (kB -> GB)
$fallbackCmd = @"
awk -v OFS=',' '
BEGIN { t=0; a=0 }
/^MemTotal:/ { t=`$2 } # kB
/^MemAvailable:/ { a=`$2 } # kB
END {
if (t>0 && a>=0) {
u=t-a
pct = (u/t)*100
printf "%.2f,%.2f,%.1f", t/1024/1024, u/1024/1024, pct
} else {
print "0,0,0"
} catch {
Write-Log -Level "WARN" -Message "free 输出解析失败: $line"
}
}
}
}' /proc/meminfo
"@
}
# Fallback:纯 shell 读取 /proc/meminfo(不依赖 grep/sed/awk)
if ($memTotal -le 0) {
$fallbackCmd = @'
total_kb=0; avail_kb=0
while IFS=: read k v; do
case "$k" in
"MemTotal") total_kb=${v//[^0-9]/};;
"MemAvailable") avail_kb=${v//[^0-9]/};;
"MemFree") if [ -z "$avail_kb" ] || [ "$avail_kb" -eq 0 ]; then avail_kb=${v//[^0-9]/}; fi;;
esac
done < /proc/meminfo
used_kb=$(( total_kb - avail_kb ))
pct=0
if [ "$total_kb" -gt 0 ]; then pct=$(( used_kb * 100 / total_kb )); fi
tot_gb=$(( total_kb / 1024 / 1024 ))
use_gb=$(( used_kb / 1024 / 1024 ))
printf "%d,%d,%d\n" "$tot_gb" "$use_gb" "$pct"
'@
$fbRes = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $fallbackCmd
if ($fbRes.ExitCode -eq 0 -and $fbRes.Output) {
$fbLine = ($fbRes.Output -split "`n" | Where-Object { $_ -match '\S' } | Select-Object -First 1)
Write-Log -Level "INFO" -Message "meminfo Fallback 输出(raw): $fbLine"
if ($fbLine) {
$fbParts = ($fbLine.Trim() -replace "`r","") -split ','
if ($fbParts.Count -ge 3) {
$ci = [System.Globalization.CultureInfo]::InvariantCulture
$memTotal = [double]::Parse($fbParts[0], $ci)
$memUsed = [double]::Parse($fbParts[1], $ci)
$memPercent = [double]::Parse($fbParts[2], $ci)
$memTotal = [double]$fbParts[0]
$memUsed = [double]$fbParts[1]
$memPercent = [double]$fbParts[2]
}
}
}
}
# 规整与保护
if ($memTotal -gt 0) {
if ($memUsed -lt 0) { $memUsed = 0 }
if ($memUsed -gt $memTotal) { $memUsed = $memTotal }
$memPercent = if ($memTotal -gt 0) { [math]::Round(($memUsed / $memTotal) * 100, 1) } else { 0 }
$memPercent = [math]::Round(($memUsed / $memTotal) * 100, 1)
$memTotal = [math]::Round($memTotal, 2)
$memUsed = [math]::Round($memUsed, 2)
} else {
Write-Log -Level "WARN" -Message "内存信息获取失败,free/awk或/proc/meminfo不可用"
Write-Log -Level "WARN" -Message "内存信息获取失败,目标机缺少可用的 free/awk 或 /proc/meminfo 读取异常"
}
$memStatus = if ($memPercent -lt 70) { "正常" } elseif ($memPercent -lt 90) { "警告" } else { "危险" }
$memColor = if ($memPercent -lt 70) { "SUCCESS" } elseif ($memPercent -lt 90) { "WARN" } else { "ERROR" }
Write-Log -Level $memColor -Message " 内存使用: ${memUsed}GB / ${memTotal}GB (${memPercent}%) [$memStatus]"
$results.Memory = @{
Total = $memTotal
Used = $memUsed
Percent = $memPercent
Status = $memStatus
Success = ($memPercent -lt 90)
}
$results.Memory = @{ Total = $memTotal; Used = $memUsed; Percent = $memPercent; Status = $memStatus; Success = ($memPercent -lt 90) }
# 5. 检测磁盘空间情况
Write-Log -Level "INFO" -Message "检测磁盘空间情况..."
$diskCmd = "df -h | grep -E '^/dev/' | awk '{print `$1,`$2,`$3,`$5,`$6}'"
......@@ -1322,6 +1320,21 @@ awk -v OFS=',' '
}
}
# 在资源分析阶段就直接打印防火墙概要
if ($firewallActive) {
Write-Log -Level "INFO" -Message ("[FIREWALL] 当前状态: 已启用 ({0})" -f $firewallType)
if ($openPorts -and $openPorts -ne "") {
Write-Log -Level "INFO" -Message ("[FIREWALL] 开放端口/服务: {0}" -f $openPorts)
} else {
Write-Log -Level "INFO" -Message "[FIREWALL] 未检测到具体开放端口列表"
}
} else {
Write-Log -Level "WARN" -Message ("[FIREWALL] 当前状态: 未启用 ({0})" -f $firewallType)
if ($openPorts -and $openPorts -is [string]) {
Write-Log -Level "INFO" -Message ("[FIREWALL] 描述: {0}" -f $openPorts)
}
}
# 触发远端修复(仅当未启用或类型未知)
if (-not $firewallActive -or ($firewallType -eq "unknown")) {
Write-Log -Level "WARN" -Message "[FIREWALL] 检测到防火墙未启用或状态异常,准备执行远端修复"
......@@ -1374,6 +1387,19 @@ awk -v OFS=',' '
$results.Firewall.Type = $firewallType
$results.Firewall.OpenPorts = $openPorts
$results.Firewall.Status = if ($firewallActive) { "已启用" } else { "未启用" }
# 修复后复检结果输出
if ($firewallActive) {
Write-Log -Level "INFO" -Message ("[FIREWALL] 修复后状态: 已启用 ({0})" -f $firewallType)
if ($openPorts -and $openPorts -ne "") {
Write-Log -Level "INFO" -Message ("[FIREWALL] 修复后开放端口/服务: {0}" -f $openPorts)
} else {
Write-Log -Level "INFO" -Message "[FIREWALL] 修复后仍未检测到具体开放端口列表"
}
} else {
Write-Log -Level "WARN" -Message ("[FIREWALL] 修复后状态仍为未启用 ({0})" -f $firewallType)
}
} else {
# 记录失败消息
$errMsg = "未知错误"
......@@ -1735,57 +1761,66 @@ function Export-ServiceLogs {
continue
}
# 使用 pscp 下载文件
# 使用 pscp 下载文件(显式记录错误,适配大文件)
$pscpArgs = @(
"-scp", # 显式使用 SCP 协议
"-batch", # 非交互
"-P", $Server.Port,
"-l", $Server.User,
"-pw", $Server.Pass,
"-batch",
"$($Server.User)@$($Server.IP):$remotePath",
$localPath
)
try {
Write-Log -Level "INFO" -Message (" 调用 pscp: {0} {1}" -f $script:PSCP_PATH, ($pscpArgs -join ' '))
$pscpResult = & $script:PSCP_PATH @pscpArgs 2>&1
$exitCode = $LASTEXITCODE
# 如果失败且是因为主机密钥问题,则自动接受密钥后重试
# 如果失败且是主机密钥问题,自动接受后重试一次
if ($exitCode -ne 0 -and ($pscpResult -match "host key" -or $pscpResult -match "Cannot confirm")) {
$cmdLine = "echo y | `"$($script:PSCP_PATH)`" -P $($Server.Port) -l $($Server.User) -pw `"$($Server.Pass)`" `"$($Server.User)@$($Server.IP):$remotePath`" `"$localPath`""
$cmdLine = "echo y | `"$($script:PSCP_PATH)`" -scp -batch -P $($Server.Port) -l $($Server.User) -pw `"$($Server.Pass)`" `"$($Server.User)@$($Server.IP):$remotePath`" `"$localPath`""
Write-Log -Level "WARN" -Message " 检测到主机密钥提示,自动接受并重试: $cmdLine"
$pscpResult = cmd /c $cmdLine 2>&1
$exitCode = $LASTEXITCODE
}
if ($exitCode -eq 0 -and (Test-Path $localPath)) {
$fileSize = (Get-Item $localPath).Length
$fileSizeKB = [math]::Round($fileSize / 1024, 2)
Write-Log -Level "SUCCESS" -Message " [成功] 已导出 ($fileSizeKB KB)"
$exportedFiles += @{
Name = $localFileName
Name = $localFileName
RemotePath = $remotePath
LocalPath = $localPath
Size = $fileSize
LocalPath = $localPath
Size = $fileSize
}
}
else {
Write-Log -Level "ERROR" -Message " [失败] 导出失败"
# 记录完整的 pscp 输出,便于排查大文件失败原因(超时/中断/权限等)
Write-Log -Level "ERROR" -Message (" [失败] 导出失败,ExitCode={0}" -f $exitCode)
if ($pscpResult) {
# 将 pscp 输出压缩成一行,避免刷屏
$oneLine = ($pscpResult -join " ") -replace '\s+', ' '
Write-Log -Level "ERROR" -Message (" [pscp] 输出: {0}" -f $oneLine)
}
$failedFiles += @{
Name = $localFileName
Name = $localFileName
RemotePath = $remotePath
Reason = "下载失败: $pscpResult"
Reason = "下载失败: ExitCode=$exitCode; Output=$pscpResult"
}
}
}
catch {
Write-Log -Level "ERROR" -Message " [失败] 导出异常: $_"
Write-Log -Level "ERROR" -Message " [失败] 导出异常: $($_.Exception.Message)"
$failedFiles += @{
Name = $localFileName
Name = $localFileName
RemotePath = $remotePath
Reason = "异常: $_"
Reason = "异常: $($_.Exception.Message)"
}
}
}
Write-Host ""
Write-Log -Level "INFO" -Message "日志导出完成: 成功 $($exportedFiles.Count) 个,失败 $($failedFiles.Count) 个"
......@@ -2124,24 +2159,53 @@ function Show-HealthReport {
$health = if ($c.Health) { $c.Health } else { "-" }
$rp = if ($c.RestartPolicy) { $c.RestartPolicy } else { "-" }
$rc = if ($c.RestartCount -ne $null) { $c.RestartCount } else { "-" }
$ports = @(); if ($c.Ports -and $c.Ports.Count -gt 0) { $ports = $c.Ports }
$nets = @(); if ($c.Networks -and $c.Networks.Count -gt 0) { $nets = $c.Networks }
$nets = @(); if ($c.Networks -and $c.Networks.Count -gt 0) { $nets = $c.Networks }
$showPorts = if ($ports.Count -gt 6) { ($ports | Select-Object -First 6) + "..." } else { $ports }
$showNets = if ($nets.Count -gt 6) { ($nets | Select-Object -First 6) + "..." } else { $nets }
$showNets = if ($nets.Count -gt 6) { ($nets | Select-Object -First 6) + "..." } else { $nets }
$mountsStr = '-'
if ($c.Mounts -and $c.Mounts.Count -gt 0) {
$showMounts = if ($c.Mounts.Count -gt 3) { ($c.Mounts | Select-Object -First 3) + '...' } else { $c.Mounts }
$mountsStr = ($showMounts -join '; ')
}
$szParts = @(); if ($c.SizeRw -ne $null) { $szParts += ("rw={0}" -f $c.SizeRw) }; if ($c.SizeRootFs -ne $null) { $szParts += ("root={0}" -f $c.SizeRootFs) }
$szParts = @()
if ($c.SizeRw -ne $null) { $szParts += ("rw={0}" -f $c.SizeRw) }
if ($c.SizeRootFs -ne $null) { $szParts += ("root={0}" -f $c.SizeRootFs) }
$szStr = if ($szParts.Count -gt 0) { ($szParts -join ', ') } else { '-' }
$md += "- $statusIcon 名称: $($c.Name) | 镜像: $($c.Image) | 状态: $($c.Status) | 健康: $health | 重启: $rp/$rc | IP: $ip"
$portsStr = '-'; if ($null -ne $showPorts) { if ($showPorts -is [string]) { $portsStr = $showPorts } else { $tmp = @($showPorts); if ($tmp.Count -gt 0) { $portsStr = ($tmp -join ', ') } } }
$netsStr = '-'; if ($null -ne $showNets) { if ($showNets -is [string]) { $netsStr = $showNets } else { $tmpn = @($showNets); if ($tmpn.Count -gt 0) { $netsStr = ($tmpn -join ', ') } } }
$portsStr = '-'
if ($null -ne $showPorts) {
if ($showPorts -is [string]) {
$portsStr = $showPorts
} else {
$tmp = @($showPorts)
if ($tmp.Count -gt 0) { $portsStr = ($tmp -join ', ') }
}
}
$netsStr = '-'
if ($null -ne $showNets) {
if ($showNets -is [string]) {
$netsStr = $showNets
} else {
$tmpn = @($showNets)
if ($tmpn.Count -gt 0) { $netsStr = ($tmpn -join ', ') }
}
}
$md += " - 端口: $portsStr"
$md += " - 网络: $netsStr"
if ($mountsStr -ne '-') { $md += " - 挂载: $mountsStr" }
$md += " - 大小: $szStr"
# 在容器之间插入 Markdown 分割线
$md += "---"
}
}
$md += ""
......@@ -2202,7 +2266,7 @@ function Check-TraditionalPlatformIPs {
[string]$Password
)
Write-Host "开始检测传统平台配置文件中的IP地址..." -ForegroundColor Yellow
Write-Log -Level "INFO" -Message "[CFG] 开始检测传统平台配置文件 IP..."
$Paths = @(
"/var/www/java/api-java-meeting2.0/config",
......@@ -2217,26 +2281,29 @@ function Check-TraditionalPlatformIPs {
$hasUnauthorized = $false
foreach ($Path in $Paths) {
Write-Host "检测路径: $Path" -ForegroundColor Cyan
Write-Log -Level "INFO" -Message ("[CFG] 检测路径: {0}" -f $Path)
$Command = "grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' $Path/* 2>/dev/null | sort -u"
$Result = Invoke-SSHCommand -HostName $ServerIP -User $Username -Pass $Password -Command $Command
$Result = Invoke-SSHCommand -HostName $ServerIP -User $Username -Pass $Password -Command $Command
if ($Result -and $Result.ExitCode -eq 0 -and $Result.Output) {
$IPs = $Result.Output -split "`n" | Where-Object { $_ -match '^\d{1,3}(\.\d{1,3}){3}$' }
foreach ($IP in $IPs) {
if ($AllowedIPs -notcontains $IP) {
$hasUnauthorized = $true
Write-Warning "未授权IP: $IP (路径: $Path)"
Write-Log -Level "WARN" -Message ("[CFG] 未授权 IP: {0} (路径: {1})" -f $IP, $Path)
}
}
# 合法 IP 不打印
} else {
Write-Host "无IP或路径不存在: $Path" -ForegroundColor Yellow
# 无 IP 或路径不存在:认为是“无配置/无 IP”,不打印,避免噪音
continue
}
}
if (-not $hasUnauthorized) {
Write-Host "配置文件IP均为目标服务器IP/本地环回/默认网桥IP" -ForegroundColor Green
Write-Log -Level "SUCCESS" -Message "[CFG] 配置文件 IP 均为目标服务器 IP / 本地环回 / 默认网桥 IP 或未配置 IP"
}
Write-Host "传统平台配置文件IP检测完成." -ForegroundColor Green
Write-Log -Level "INFO" -Message "[CFG] 传统平台配置文件 IP 检测完成"
}
# ================================
......@@ -2763,10 +2830,17 @@ function DataBakup {
if (-not (Test-Path $localOutDir)) { New-Item -ItemType Directory -Path $localOutDir | Out-Null }
$localFile = Join-Path $localOutDir $tarName
# 检查磁盘空间(至少比远端文件大小大 10% 余量;无法获取时仅检查 1GB 余量)
$drive = Get-PSDrive -Name (Split-Path $localOutDir -Qualifier)
if ($drive -and ($drive.Free -lt 1GB)) {
Write-Log -Level "WARN" -Message "[BAK] 本地磁盘可用空间不足 1GB,可能导致下载失败"
# 检查磁盘空间(尽量检查,失败则忽略,不中断流程)
try {
$qualifier = Split-Path $localOutDir -Qualifier
if ($qualifier) {
$drive = Get-PSDrive -Name $qualifier.TrimEnd(':')
if ($drive -and ($drive.Free -lt 1GB)) {
Write-Log -Level "WARN" -Message "[BAK] 本地磁盘可用空间不足 1GB,可能导致下载失败"
}
}
} catch {
Write-Log -Level "WARN" -Message "[BAK] 无法检测本地磁盘空间,已忽略: $($_.Exception.Message)"
}
# 构造下载命令,添加 -batch 防交互
......@@ -2920,23 +2994,28 @@ function Main {
# 检测配置文件中的IP地址
Write-Host ""
Write-Log -Level "INFO" -Message "========== 开始检测配置文件 IP =========="
if ($platformType -eq "new") {
Check-NewPlatformIPs -ServerIP $server.IP -Username $server.User -Password $server.Pass
} elseif ($platformType -eq "old") {
Check-TraditionalPlatformIPs -ServerIP $server.IP -Username $server.User -Password $server.Pass
}
Write-Log -Level "INFO" -Message "========== 结束检测配置文件 IP =========="
# 检测 NTP 服务
Write-Log -Level "INFO" -Message "开始检测 NTP 服务..."
Write-Log -Level "INFO" -Message "========== 开始检测NTP服务 =========="
$ntpResults = Check-NTPService -ServerIP $server.IP -Username $server.User -Password $server.Pass
Write-Log -Level "INFO" -Message "NTP 服务检测完成."
# 输出 NTP 摘要
if ($ntpResults) {
Write-Log -Level "INFO" -Message ("NTP 结果: 状态={0} | 详情={1}" -f $ntpResults.Status, $ntpResults.Detail)
}
Write-Log -Level "INFO" -Message "========== 结束检测NTP服务 =========="
# 文件权限检测
Write-Log -Level "INFO" -Message "========== 开始检测文件权限 =========="
$filePermResults = Check-FilePermissions -Server $server -PlatformType $platformType -SystemInfo $systemInfo
Write-Log -Level "INFO" -Message "========== 结束检测文件权限 =========="
# 现场数据备份 (可选)
$bakChoice = Read-Host "是否执行现场数据备份并下载? (y/n) [默认: n]"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论