提交 d140c5e1 authored 作者: 陈泽健's avatar 陈泽健

feat(common): 添加Shell脚本上传功能并新增服务自检优化文档

- 实现Upload-ShellScript函数用于上传Shell脚本到远程服务器
- 创建服务自检检测优化需求文档_PRD_服务自检检测优化_需求文档.md
- 创建服务自检检测优化计划执行文档_PLAN_计划执行文档_服务自检检测优化.md
- 需求文档涵盖安全合规检测、MySQL/Redis深度检测、系统资源增强等功能
- 计划文档详细描述双版本同步策略和实施时间线
- 上传功能支持common.sh基础函数库和指定检测脚本的远程传输
上级 271ab62c
......@@ -60,7 +60,7 @@ $PSDefaultParameterValues['*:Encoding'] = 'utf8'
# ================================
# 全局配置
# ================================
$SCRIPT_DIR = Split-Path -Parent $MyInvocation.MyCommand.Path
$global:SCRIPT_DIR = Split-Path -Parent $MyInvocation.MyCommand.Path
$SSH_TIMEOUT = 30
# 脚本版本号(用于日志与报告)
......@@ -77,7 +77,7 @@ $global:PreferredSSHTool = $null
# ================================
# 日志配置
# ================================
$LOG_DIR = Join-Path $SCRIPT_DIR "logs"
$LOG_DIR = Join-Path $global:SCRIPT_DIR "logs"
$LOG_TIMESTAMP = Get-Date -Format "yyyyMMdd_HHmmss"
$global:LOG_FILE = Join-Path $LOG_DIR "health_check_$LOG_TIMESTAMP.log"
......@@ -275,10 +275,10 @@ $Global:OldPlatformUpythonLogs = @(
# 模块化检测模块导入
# ================================
# 获取模块目录路径
$ModulePath = Join-Path $SCRIPT_DIR "modules"
$ModulePath = Join-Path $global:SCRIPT_DIR "modules"
# 强制卸载已加载的模块(确保使用最新版本)
$ModulesToUnload = @("ServiceCheck", "Common", "DNSCheck", "ServerResourceAnalysis", "NTPCheck", "ContainerCheck", "ConfigIPCheck", "MiddlewareCheck", "AndroidCheck", "LogExport", "Report")
$ModulesToUnload = @("ServiceCheck", "Common", "DNSCheck", "ServerResourceAnalysis", "NTPCheck", "ContainerCheck", "ConfigIPCheck", "MiddlewareCheck", "AndroidCheck", "LogExport", "Report", "ServerProfile", "DataBackup", "FilePermission", "ShellAdapter")
foreach ($ModuleName in $ModulesToUnload) {
if (Get-Module -Name $ModuleName -ErrorAction SilentlyContinue) {
Remove-Module -Name $ModuleName -Force -ErrorAction SilentlyContinue
......@@ -302,6 +302,10 @@ catch {
# 导入各功能模块
$ModulesToImport = @(
"ServerProfile.psm1",
"DataBackup.psm1",
"FilePermission.psm1",
"ShellAdapter.psm1",
"ServiceCheck.psm1",
"DNSCheck.psm1",
"ServerResourceAnalysis.psm1",
......@@ -338,27 +342,57 @@ foreach ($Module in $ModulesToImport) {
# ================================
# 检查关键模块函数是否可用
$ExpectedFunctions = @(
# ServerProfile 模块
"Test-Dependencies",
"Select-Server",
"Test-SSHConnection",
"Get-PlatformType",
"Get-SystemType",
"Get-UjavaSystemVariant",
# DataBackup 模块
"Download-RemoteFile",
"DataBakup",
# FilePermission 模块
"Check-FilePermissions",
# ShellAdapter 模块
"Test-ServerResources-Shell",
"Test-MQTTConnection-Shell",
"Test-DNSResolution-Shell",
"Test-NTPService-Shell",
"Test-Firewall-Shell",
"Test-ConfigIPs-Shell",
"Test-UjavaServices-Shell",
# ServiceCheck 模块
"Test-UjavaServices",
"Test-UjavaHostServices",
"Test-UjavaOldPlatformContainerServices",
"Test-UjavaOldPlatformHostServices",
"Test-ContainerPorts",
"Repair-ExternalMeetingService",
# DNSCheck 模块
"Test-DNSResolution",
# ServerResourceAnalysis 模块
"Test-ServerResources",
# ContainerCheck 模块
"Get-ContainerDetails",
"Test-ContainerInformation",
# MiddlewareCheck 模块
"Test-MQTTConnection",
"Test-RedisConnection",
"Test-MySQLConnection",
"Test-FastDFSConnection",
# ConfigIPCheck 模块
"Test-NewPlatformIPs",
"Test-TraditionalPlatformIPs",
"Test-NewPlatformConsole",
"Test-TraditionalPlatformConsole",
# NTPCheck 模块
"Test-NTPService",
# AndroidCheck 模块
"Test-AndroidDeviceHealth",
# LogExport 模块
"Export-ConfigAndLogs",
# Report 模块
"Show-HealthReport"
)
......@@ -379,1897 +413,15 @@ if ($missingFunctions.Count -gt 0) {
Write-Host "[模块加载] 所有模块函数已成功导入并可用" -ForegroundColor Green
}
# ================================
# 公共基础函数(保留在主脚本中)
# 注意:Write-Log、Invoke-SSHCommand、Copy-File-To-Remote、Upload_the_repair_script 已移至 Common.psm1
# ================================
# ujava 系统细分:会议预定系统 vs 统一平台系统(PRD补充)
# 规则:
# - ujava 容器存在时:
# - 若宿主机存在 /var/www/java/unifiedPlatform => unified
# - 否则 => meeting
# 说明:仅对传统平台目录体系有意义,但为了统一,这里不强限制平台,按是否存在目录判定即可
# ================================
function Get-UjavaSystemVariant {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server
)
$cmd = "[ -d /var/www/java/unifiedPlatform ] && echo 'UNIFIED' || echo 'MEETING'"
$res = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
$last = (($res.Output -split "`n") | Where-Object { $_ -match '\S' } | Select-Object -Last 1).Trim()
if ($last -eq "UNIFIED") { return "unified" }
return "meeting"
}
# ================================
# Shell脚本调用函数(混合架构支持)
# ================================
# 上传Shell脚本到远程服务器
function Upload-ShellScript {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$true)] [string]$ScriptName,
[Parameter(Mandatory=$false)] [string]$RemotePath = "/tmp/health_check"
)
# 创建远程临时目录
$cmd = "mkdir -p $RemotePath 2>/dev/null"
$mkdirResult = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
if ($mkdirResult.ExitCode -ne 0) {
Write-Log -Level "ERROR" -Message "[SHELL] 创建远程目录失败: $RemotePath"
return $false
}
# 检查本地脚本是否存在
$localCommonPath = Join-Path $SCRIPT_DIR "lib\shell\common.sh"
$localScriptPath = Join-Path $SCRIPT_DIR "lib\shell\$ScriptName"
if (-not (Test-Path $localCommonPath)) {
Write-Log -Level "ERROR" -Message "[SHELL] 本地脚本不存在: $localCommonPath"
return $false
}
if (-not (Test-Path $localScriptPath)) {
Write-Log -Level "ERROR" -Message "[SHELL] 本地脚本不存在: $localScriptPath"
return $false
}
# 上传基础函数库
$commonUploadResult = Copy-File-To-Remote -LocalPath $localCommonPath -Server $Server -RemoteDir $RemotePath
if (-not $commonUploadResult) {
Write-Log -Level "ERROR" -Message "[SHELL] 上传基础函数库失败: common.sh"
return $false
}
# 上传检测脚本
$scriptUploadResult = Copy-File-To-Remote -LocalPath $localScriptPath -Server $Server -RemoteDir $RemotePath
if (-not $scriptUploadResult) {
Write-Log -Level "ERROR" -Message "[SHELL] 上传脚本失败: $ScriptName"
return $false
}
return $true
}
# 执行远程Shell脚本
function Invoke-RemoteShellCheck {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$true)] [string]$ScriptName,
[Parameter(Mandatory=$false)] [string]$Arguments = "",
[Parameter(Mandatory=$false)] [string]$RemotePath = "/tmp/health_check"
)
# 上传脚本
$uploadSuccess = Upload-ShellScript -Server $Server -ScriptName $ScriptName -RemotePath $RemotePath
if (-not $uploadSuccess) {
Write-Log -Level "ERROR" -Message "[SHELL] 脚本上传失败: $ScriptName"
return $null
}
# 执行脚本并将输出保存到临时文件
$outputFile = "/tmp/health_check_output.txt"
# 先检查远程是否有base64命令
$checkCmd = "which base64 || echo 'not_found'"
$checkResult = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $checkCmd
if ($checkResult.Output -match "base64") {
# 使用base64编码传输
$cmd = "cd $RemotePath && chmod +x common.sh $ScriptName && ./$ScriptName $Arguments > $outputFile 2>&1 && base64 $outputFile > ${outputFile}.b64 && rm -f $outputFile"
Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd | Out-Null
# 下载base64文件
$localTempDir = Join-Path $env:TEMP "health_check"
if (-not (Test-Path $localTempDir)) {
New-Item -ItemType Directory -Path $localTempDir -Force | Out-Null
}
$localBase64File = Join-Path $localTempDir "output.b64"
if ($global:PSCP_PATH -and (Test-Path $global:PSCP_PATH)) {
$pscpArgs = @(
"-P", $Server.Port,
"-l", $Server.User,
"-pw", $Server.Pass,
"-batch",
"$($Server.User)@$($Server.IP):${outputFile}.b64",
$localBase64File
)
& $global:PSCP_PATH @pscpArgs 2>&1 | Out-Null
if ((Test-Path $localBase64File) -and ((Get-Item $localBase64File).Length -gt 100)) {
try {
$base64Content = Get-Content $localBase64File -Raw
# 移除换行符
$base64Content = $base64Content -replace "`r", "" -replace "`n", ""
$decodedContent = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($base64Content))
Remove-Item $localBase64File -Force -ErrorAction SilentlyContinue
Remove-Item $localTempDir -Force -ErrorAction SilentlyContinue
Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command "rm -f ${outputFile}.b64; rm -rf $RemotePath" | Out-Null
return $decodedContent
} catch {
Write-Log -Level "ERROR" -Message "[SHELL] Base64解码失败: $($_.Exception.Message)"
}
}
}
}
# 清理远程临时文件
Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command "rm -f $outputFile ${outputFile}.b64; rm -rf $RemotePath" | Out-Null
# 如果base64方法失败,使用原始方法(添加过滤)
Write-Log -Level "WARN" -Message "[SHELL] Base64方法失败,使用原始方法: $ScriptName"
$cmd = "cd $RemotePath && chmod +x common.sh $ScriptName && ./$ScriptName $Arguments 2>&1"
$result = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
if ($result -and $result.Output) {
if ($result.Output -is [array]) {
$output = $result.Output -join ""
} else {
$output = $result.Output.ToString()
}
# 移除plink输出中的多余内容
$lines = $output -split "`n"
$jsonStarted = $false
$jsonLines = @()
$braceCount = 0
foreach ($line in $lines) {
$trimmed = $line.Trim()
if (-not $jsonStarted -and $trimmed.StartsWith("{")) {
$jsonStarted = $true
}
if ($jsonStarted) {
$jsonLines += $line
$braceCount += ($line.ToCharArray() | Where-Object { $_ -eq "{" } | Measure-Object).Count
$braceCount -= ($line.ToCharArray() | Where-Object { $_ -eq "}" } | Measure-Object).Count
if ($braceCount -le 0 -and $trimmed.EndsWith("}")) {
break
}
}
}
if ($jsonLines.Count -gt 0) {
return $jsonLines -join "`n"
}
}
return $null
}
# 解析Shell脚本JSON结果
function ConvertFrom-ShellJson {
param(
[Parameter(Mandatory=$false)] [string]$JsonString = ""
)
# 检查输入是否为空
if ([string]::IsNullOrEmpty($JsonString)) {
Write-Log -Level "ERROR" -Message "[SHELL] Shell脚本未返回任何输出"
return $null
}
try {
# 过滤掉非JSON行(plink输出、错误信息等)
# JSON通常以 { 开头,所以只保留从 { 开始的内容
$lines = $JsonString -split "`n"
$jsonLines = @()
$inJson = $false
$braceCount = 0
foreach ($line in $lines) {
$trimmed = $line.Trim()
# 跳过空行
if ([string]::IsNullOrEmpty($trimmed)) { continue }
# 跳过常见命令输出和错误信息(包括中文输出)
if ($trimmed -match "^(chmod|mkdir|rm|cd|\$|pscp:|plink:|Fatal|Network|Connection|已用|总用量|Mem:|Swap:|Cpu)") { continue }
# 找到JSON开始位置
if (-not $inJson -and $trimmed.StartsWith("{")) {
$inJson = $true
}
if ($inJson) {
$jsonLines += $line
$braceCount += ($line.ToCharArray() | Where-Object { $_ -eq "{" } | Measure-Object).Count
$braceCount -= ($line.ToCharArray() | Where-Object { $_ -eq "}" } | Measure-Object).Count
# 当大括号平衡时,结束解析
if ($braceCount -le 0 -and $trimmed.EndsWith("}")) {
break
}
}
}
if ($jsonLines.Count -eq 0) {
Write-Log -Level "ERROR" -Message "[SHELL] 未找到JSON输出"
return $null
}
$cleanJson = $jsonLines -join "`n"
return $cleanJson | ConvertFrom-Json
}
catch {
Write-Log -Level "ERROR" -Message "[SHELL] JSON解析失败: $($_.Exception.Message)"
Write-Log -Level "INFO" -Message "[SHELL] 原始输出前200字符: $($JsonString.Substring(0, [Math]::Min(200, $JsonString.Length)))"
return $null
}
}
# ================================
# Shell模式包装函数
# ================================
# 资源检测(Shell模式)- 使用文件下载方式避免输出截断
function Test-ServerResources-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server
)
Write-Host ""
Write-Log -Level "INFO" -Message "========== 开始资源检测 (Shell模式, 文件方式) =========="
# 上传脚本到远程服务器
$remotePath = "/tmp/health_check_resources"
$uploadSuccess = Upload-ShellScript -Server $Server -ScriptName "resource_check.sh" -RemotePath $remotePath
if (-not $uploadSuccess) {
Write-Log -Level "ERROR" -Message "[资源] 脚本上传失败,回退到PowerShell模式"
return Test-ServerResources -Server $Server
}
# 定义远程输出文件路径
$remoteOutputFile = "/tmp/resource_check_output.txt"
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$localOutputFile = Join-Path $env:TEMP "resource_check_${timestamp}.txt"
try {
# 执行远程脚本,将输出保存到文件
$cmd = "cd $remotePath && chmod +x common.sh resource_check.sh && ./resource_check.sh --format text --check all > $remoteOutputFile 2>&1"
Write-Log -Level "INFO" -Message "[资源] 执行远程检测命令..."
$execResult = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
# 下载输出文件
Write-Log -Level "INFO" -Message "[资源] 下载检测结果文件..."
$downloadResult = Download-RemoteFile -Server $Server -RemotePath $remoteOutputFile -LocalPath $localOutputFile -TimeoutSeconds 60
if ($downloadResult.Success -and (Test-Path $localOutputFile)) {
Write-Log -Level "SUCCESS" -Message "[资源] 检测结果下载成功,开始解析..."
# 读取文件内容
$content = Get-Content $localOutputFile -Raw -Encoding UTF8
# 解析文本内容并转换为标准格式
$parsedResults = Parse-ResourceCheckText -Content $content
$results = Convert-ResourceCheckToStandard -ParsedResults $parsedResults
# 清理临时文件
Remove-Item $localOutputFile -Force -ErrorAction SilentlyContinue
Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command "rm -f $remoteOutputFile; rm -rf $remotePath" | Out-Null
Write-Log -Level "INFO" -Message "========== 结束资源检测 (Shell模式) =========="
return $results
}
else {
Write-Log -Level "ERROR" -Message "[资源] 检测结果下载失败"
return $null
}
}
catch {
Write-Log -Level "ERROR" -Message "[资源] 执行过程中发生异常: $($_.Exception.Message)"
return $null
}
}
# 将解析结果转换为标准hashtable格式
function Convert-ResourceCheckToStandard {
param(
[Parameter(Mandatory=$true)] [array]$ParsedResults
)
$results = @{
OS = $null
Architecture = $null
CPU = $null
Memory = $null
Disk = @()
Firewall = $null
}
$cpuUsage = 0
$cpuCores = 0
$cpuLoad = ""
$memTotal = 0
$memUsed = 0
$memPercent = 0
$memTotalMB = 0
$memUsedMB = 0
# 系统信息变量
$osName = ""
$osVersion = ""
$systemArch = ""
$kernelVersion = ""
foreach ($item in $ParsedResults) {
switch ($item.Category) {
"系统信息" {
switch ($item.Item) {
"操作系统" { $osName = $item.Value }
"系统版本" { $osVersion = $item.Value }
"系统架构" { $systemArch = $item.Value }
"内核版本" { $kernelVersion = $item.Value }
}
}
"CPU" {
switch ($item.Item) {
"核心数" { $cpuCores = [int]$item.Value }
"使用率" {
if ($item.Value -match '([\d.]+)%') {
$cpuUsage = [double]$matches[1]
}
}
"负载平均" { $cpuLoad = $item.Value }
}
}
"内存" {
if ($item.Item -eq "使用率" -and $item.Value -match '([\d.]+)%') {
$memPercent = [double]$matches[1]
}
# 尝试获取总计和已用
if ($item.Item -eq "总计" -and $item.Value -match '([\d.]+)GB') {
$memTotal = [double]$matches[1]
}
if ($item.Item -eq "已用" -and $item.Value -match '([\d.]+)GB') {
$memUsed = [double]$matches[1]
}
# 如果是MB单位
if ($item.Item -eq "总计" -and $item.Value -match '([\d.]+)MB') {
$memTotalMB = [double]$matches[1]
}
if ($item.Item -eq "已用" -and $item.Value -match '([\d.]+)MB') {
$memUsedMB = [double]$matches[1]
}
}
"磁盘" {
# 解析磁盘信息:例如 "10G/50G (20%)" 或 "5G/20G (25%)"
if ($item.Value -match '([\d.]+[A-Z]+)/([\d.]+[A-Z])\s+\((\d+)%\)') {
$used = $matches[1]
$size = $matches[2]
$percent = [int]$matches[3]
$status = $item.Status
$results.Disk += @{
Device = "unknown"
Size = $size
Used = $used
Percent = $percent
MountPoint = $item.Item
Status = $status
}
}
}
}
}
# 构建CPU结果
if ($cpuCores -gt 0 -or $cpuUsage -gt 0) {
$cpuStatus = if ($cpuUsage -lt 70) { "正常" } elseif ($cpuUsage -lt 90) { "警告" } else { "严重" }
$results.CPU = @{
Usage = $cpuUsage
Cores = $cpuCores
Status = $cpuStatus
Success = ($cpuUsage -lt 90)
}
}
# 构建内存结果
if ($memTotal -gt 0 -or $memTotalMB -gt 0 -or $memPercent -gt 0) {
# 如果有GB单位数据,使用它;否则从MB转换
if ($memTotal -eq 0 -and $memTotalMB -gt 0) {
$memTotal = [math]::Round($memTotalMB / 1024, 2)
if ($memUsedMB -eq 0 -and $memPercent -gt 0) {
$memUsed = [math]::Round($memTotal * $memPercent / 100, 2)
} else {
$memUsed = [math]::Round($memUsedMB / 1024, 2)
}
}
# 如果只有百分比,估算(假设16GB总内存)
if ($memTotal -eq 0 -and $memPercent -gt 0) {
$memTotal = 16
$memUsed = [math]::Round($memTotal * $memPercent / 100, 2)
}
# 如果有总计和已用但没有百分比,计算百分比
if ($memPercent -eq 0 -and $memTotal -gt 0 -and $memUsed -gt 0) {
$memPercent = [math]::Round(($memUsed / $memTotal) * 100, 1)
}
$memStatus = if ($memPercent -lt 70) { "正常" } elseif ($memPercent -lt 90) { "警告" } else { "严重" }
$results.Memory = @{
Total = $memTotal
Used = $memUsed
Percent = $memPercent
Status = $memStatus
Success = ($memPercent -lt 90)
}
}
# 添加OS和架构信息
$osInfo = if ($osName -and $osVersion) { "$osName $osVersion" } elseif ($osName) { $osName } else { "Linux" }
$results.OS = @{
Info = $osInfo
Status = "正常"
Success = $true
}
$results.Architecture = @{
Arch = if ($systemArch) { $systemArch } else { "x86_64" }
Kernel = if ($kernelVersion) { $kernelVersion } else { "unknown" }
Status = "正常"
Success = $true
}
return $results
}
# 解析资源检测文本输出
function Parse-ResourceCheckText {
param(
[Parameter(Mandatory=$true)] [string]$Content
)
$results = @()
$lines = $Content -split "`n"
$currentSection = $null
$memTotal = 0
$memUsed = 0
$memPercent = 0
foreach ($line in $lines) {
$trimmed = $line.Trim()
# 跳过空行和分隔线
if ([string]::IsNullOrEmpty($trimmed) -or $trimmed -match '^=+$|^---+$') {
continue
}
# 系统信息
if ($trimmed -match '^操作系统:\s*(.+)$') {
$osName = $matches[1].Trim()
$results += [PSCustomObject]@{
Category = "系统信息"
Item = "操作系统"
Value = $osName
Status = "正常"
}
}
elseif ($trimmed -match '^系统版本:\s*(.+)$') {
$osVersion = $matches[1].Trim()
$results += [PSCustomObject]@{
Category = "系统信息"
Item = "系统版本"
Value = $osVersion
Status = "正常"
}
}
elseif ($trimmed -match '^系统架构:\s*(.+)$') {
$arch = $matches[1].Trim()
$results += [PSCustomObject]@{
Category = "系统信息"
Item = "系统架构"
Value = $arch
Status = "正常"
}
}
elseif ($trimmed -match '^内核版本:\s*(.+)$') {
$kernel = $matches[1].Trim()
$results += [PSCustomObject]@{
Category = "系统信息"
Item = "内核版本"
Value = $kernel
Status = "正常"
}
}
# CPU信息
elseif ($trimmed -match '^核心数:\s*(\d+)') {
$cores = $matches[1]
$results += [PSCustomObject]@{
Category = "CPU"
Item = "核心数"
Value = $cores
Status = "正常"
}
}
elseif ($trimmed -match '^使用率:\s*([\d.]+)%') {
$usage = [double]$matches[1]
$status = if ($usage -lt 80) { "正常" } elseif ($usage -lt 90) { "警告" } else { "严重" }
$results += [PSCustomObject]@{
Category = "CPU"
Item = "使用率"
Value = "$usage%"
Status = $status
}
}
elseif ($trimmed -match '^负载平均:\s*(.+)') {
$load = $matches[1].Trim()
$results += [PSCustomObject]@{
Category = "CPU"
Item = "负载平均"
Value = $load
Status = "正常"
}
}
# 内存信息
elseif ($trimmed -match '^--- 内存 ---') {
# 进入内存区域
$currentSection = "内存"
}
elseif ($trimmed -match '^总计:\s*([\d.]+)(GB|MB)') {
$memTotalValue = [double]$matches[1]
$memTotalUnit = $matches[2]
# 转换为GB存储
if ($memTotalUnit -eq "MB") {
$memTotal = $memTotalValue / 1024
} else {
$memTotal = $memTotalValue
}
}
elseif ($trimmed -match '^已用:\s*([\d.]+)(GB|MB)') {
$memUsedValue = [double]$matches[1]
$memUsedUnit = $matches[2]
# 转换为GB存储
if ($memUsedUnit -eq "MB") {
$memUsed = $memUsedValue / 1024
} else {
$memUsed = $memUsedValue
}
}
elseif ($trimmed -match '^使用率:\s*([\d.]+)%') {
$memPercent = [double]$matches[1]
$status = if ($memPercent -lt 70) { "正常" } elseif ($memPercent -lt 90) { "警告" } else { "严重" }
$results += [PSCustomObject]@{
Category = "内存"
Item = "使用率"
Value = "$memPercent%"
Status = $status
}
}
# 磁盘信息
elseif ($trimmed -match '^--- 磁盘 ---') {
# 进入磁盘区域
$currentSection = "磁盘"
}
elseif ($trimmed -match '^(/\S+|/[a-zA-Z]\S*)\s+([\d.]+[A-Z])\s+([\d.]+[A-Z])\s+\[([\d.]+)%\]\s+(\S+)') {
$mountpoint = $matches[1]
$size = $matches[2]
$used = $matches[3]
$percent = [int]$matches[4]
$status = $matches[5]
$results += [PSCustomObject]@{
Category = "磁盘"
Item = $mountpoint
Value = "$used/$size ($percent%)"
Status = $status
}
}
# 网络信息
elseif ($trimmed -match '^--- 网络 ---') {
# 进入网络区域
$currentSection = "网络"
}
elseif ($trimmed -match '^TCP连接数:\s*(\d+)') {
$tcpCount = $matches[1]
$results += [PSCustomObject]@{
Category = "网络"
Item = "TCP连接数"
Value = $tcpCount
Status = "正常"
}
}
elseif ($trimmed -match '^TIME_WAIT数:\s*(\d+)') {
$timeWait = $matches[1]
$results += [PSCustomObject]@{
Category = "网络"
Item = "TIME_WAIT数"
Value = $timeWait
Status = if ([int]$timeWait -gt 1000) { "警告" } else { "正常" }
}
}
}
# 添加内存总计和已用信息(如果有的话)
if ($memTotal -gt 0) {
$results += [PSCustomObject]@{
Category = "内存"
Item = "总计"
Value = "$memTotal"+"GB"
Status = "正常"
}
}
if ($memUsed -gt 0) {
$results += [PSCustomObject]@{
Category = "内存"
Item = "已用"
Value = "$memUsed"+"GB"
Status = "正常"
}
}
return $results
}
# Docker容器检测(Shell模式)
function Test-ContainerInformation-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$false)] [bool]$PrintDetails = $false
)
Write-Host ""
Write-Log -Level "INFO" -Message "========== 开始Docker检测 (Shell模式) =========="
$arguments = "--format json --detail basic"
$result = Invoke-RemoteShellCheck -Server $Server -ScriptName "docker_check.sh" -Arguments $arguments
$data = ConvertFrom-ShellJson -JsonString $result
if (-not $data) {
Write-Log -Level "ERROR" -Message "[容器] Shell脚本执行失败"
return $null
}
$results = @()
if ($data.containers) {
foreach ($container in $data.containers) {
$statusIcon = if ($container.state -eq "running") { "[运行]" } else { "[停止]" }
Write-Log -Level "INFO" -Message " $statusIcon $($container.name) - $($container.image)"
$results += [PSCustomObject]@{
Name = $container.name
Image = $container.image
State = $container.state
Status = $container.status
}
}
}
# 输出汇总信息
if ($data.summary) {
Write-Log -Level "INFO" -Message "汇总: 总计 $($data.summary.total) 个容器,运行中 $($data.summary.running) 个,已停止 $($data.summary.stopped) 个"
}
Write-Log -Level "INFO" -Message "========== 结束Docker检测 (Shell模式) =========="
return $results
}
# 中间件检测(Shell模式)
function Test-MQTTConnection-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$false)] [string]$EmqxLogPath = ""
)
Write-Host ""
Write-Log -Level "INFO" -Message "========== 开始中间件检测 (Shell模式) =========="
$arguments = "--format json --check all"
$result = Invoke-RemoteShellCheck -Server $Server -ScriptName "middleware_check.sh" -Arguments $arguments
$data = ConvertFrom-ShellJson -JsonString $result
if (-not $data) {
Write-Log -Level "ERROR" -Message "[中间件] Shell脚本执行失败"
return @()
}
$results = @()
# Redis检测结果(包含详细信息)
if ($data.redis) {
$redisStatus = $data.redis.status
$icon = if ($redisStatus -eq "running") { "[运行]" } else { "[停止]" }
$redisSuccess = ($redisStatus -eq "running")
# 构建详细信息字符串
$redisDetails = ""
if ($redisSuccess) {
$version = if ($data.redis.version) { $data.redis.version } else { "unknown" }
$memory = if ($data.redis.memory) { $data.redis.memory } else { "N/A" }
$clients = if ($data.redis.clients) { $data.redis.clients } else { "0" }
$redisDetails = "Redis服务完全正常(连接+读写+删除+信息采集) | 容器: $($data.redis.container) | 端口: $($data.redis.port) | 版本: $version | 内存: $memory | 连接数: $clients"
Write-Log -Level "INFO" -Message " $icon Redis ($($data.redis.container):$($data.redis.port)): $redisStatus | 版本: $version | 内存: $memory | 连接: $clients"
} else {
$redisDetails = if ($redisStatus -eq "stopped") { "容器未运行" } elseif ($redisStatus -eq "not_found") { "容器不存在" } else { "连接失败" }
Write-Log -Level "INFO" -Message " $icon Redis ($($data.redis.container):$($data.redis.port)): $redisStatus"
}
$results += [PSCustomObject]@{
Check = "Redis连接检测"
Status = if ($redisSuccess) { "正常" } else { "异常" }
Details = $redisDetails
Success = $redisSuccess
}
}
# MySQL检测结果(包含详细信息)
if ($data.mysql) {
$mysqlStatus = $data.mysql.status
$icon = if ($mysqlStatus -eq "running") { "[运行]" } else { "[停止]" }
$mysqlSuccess = ($mysqlStatus -eq "running")
# 构建详细信息字符串
$mysqlDetails = ""
if ($mysqlSuccess) {
$version = if ($data.mysql.version) { $data.mysql.version } else { "unknown" }
$mysqlDetails = "MySQL连接正常 | 容器: $($data.mysql.container) | 端口: $($data.mysql.port) | 版本: $version"
Write-Log -Level "INFO" -Message " $icon MySQL ($($data.mysql.container):$($data.mysql.port)): $mysqlStatus | 版本: $version"
} else {
$mysqlDetails = if ($mysqlStatus -eq "stopped") { "容器未运行" } elseif ($mysqlStatus -eq "not_found") { "容器不存在" } else { "连接失败" }
Write-Log -Level "INFO" -Message " $icon MySQL ($($data.mysql.container):$($data.mysql.port)): $mysqlStatus"
}
$results += [PSCustomObject]@{
Check = "MySQL连接检测"
Status = if ($mysqlSuccess) { "正常" } else { "异常" }
Details = $mysqlDetails
Success = $mysqlSuccess
}
}
# EMQX检测结果(包含详细信息)
if ($data.emqx) {
$emqxStatus = $data.emqx.status
$icon = if ($emqxStatus -eq "running") { "[运行]" } else { "[停止]" }
$emqxSuccess = ($emqxStatus -eq "running")
# 构建详细信息字符串
$emqxDetails = ""
if ($emqxSuccess) {
$dashboardPort = if ($data.emqx.dashboard_port) { $data.emqx.dashboard_port } else { "N/A" }
$emqxDetails = "MQTT服务连接正常 | 容器: $($data.emqx.container) | 端口: $($data.emqx.port) | Dashboard: $dashboardPort"
Write-Log -Level "INFO" -Message " $icon EMQX ($($data.emqx.container):$($data.emqx.port)): $emqxStatus | Dashboard: $dashboardPort"
} else {
$emqxDetails = if ($emqxStatus -eq "stopped") { "容器未运行" } elseif ($emqxStatus -eq "not_found") { "容器不存在" } else { "连接失败" }
Write-Log -Level "INFO" -Message " $icon EMQX ($($data.emqx.container):$($data.emqx.port)): $emqxStatus"
}
$results += [PSCustomObject]@{
Check = "MQTT/EMQX连接检测"
Status = if ($emqxSuccess) { "正常" } else { "异常" }
Details = $emqxDetails
Success = $emqxSuccess
}
}
# FastDFS检测结果(包含详细信息)
if ($data.fastdfs) {
$fastdfsStatus = $data.fastdfs.status
$icon = if ($fastdfsStatus -eq "running") { "[运行]" } else { "[停止]" }
$fastdfsSuccess = ($fastdfsStatus -eq "running")
# 构建详细信息字符串
$fastdfsDetails = ""
if ($fastdfsSuccess) {
$network = if ($data.fastdfs.network) { $data.fastdfs.network } else { "unknown" }
$fastdfsDetails = "FastDFS存储服务正常 | 容器: $($data.fastdfs.container) | 网络模式: $network"
Write-Log -Level "INFO" -Message " $icon FastDFS ($($data.fastdfs.container)): $fastdfsStatus | 网络: $network"
} else {
$fastdfsDetails = if ($fastdfsStatus -eq "stopped") { "服务未运行" } elseif ($fastdfsStatus -eq "not_found") { "容器不存在" } else { "检测失败" }
Write-Log -Level "INFO" -Message " $icon FastDFS ($($data.fastdfs.container)): $fastdfsStatus"
}
$results += [PSCustomObject]@{
Check = "FastDFS存储检测"
Status = if ($fastdfsSuccess) { "正常" } else { "异常" }
Details = $fastdfsDetails
Success = $fastdfsSuccess
}
}
Write-Log -Level "INFO" -Message "========== 结束中间件检测 (Shell模式) =========="
return $results
}
# 配置IP检测(Shell模式)
# DNS检测(Shell模式)
function Test-DNSResolution-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server
)
Write-Host ""
Write-Log -Level "INFO" -Message "========== 开始DNS检测 (Shell模式) =========="
$result = Invoke-RemoteShellCheck -Server $Server -ScriptName "dns_check.sh" -Arguments "--format json"
$data = ConvertFrom-ShellJson -JsonString $result
if (-not $data) {
Write-Log -Level "ERROR" -Message "[DNS] Shell脚本执行失败"
return @()
}
$results = @()
if ($data.results) {
foreach ($domain in $data.results) {
$icon = if ($domain.status -eq "success") { "[OK]" } else { "[FAIL]" }
Write-Log -Level "INFO" -Message " $icon $($domain.domain) -> $($domain.ip)"
$results += [PSCustomObject]@{
Check = $domain.domain
Status = if ($domain.status -eq "success") { "正常" } else { "异常" }
Details = if ($domain.ip) { $domain.ip } else { "解析失败" }
Success = ($domain.status -eq "success")
}
}
}
# 添加DNS服务器信息到结果中
$dnsServer = if ($data.dns_server) { $data.dns_server } else { "unknown" }
$results += [PSCustomObject]@{
Check = "DNS配置"
Status = "正常"
Details = "DNS服务器: $dnsServer"
Success = $true
Type = "DNSConfig"
}
Write-Log -Level "INFO" -Message "DNS服务器: $dnsServer"
Write-Log -Level "INFO" -Message "========== 结束DNS检测 (Shell模式) =========="
return $results
}
# NTP检测(Shell模式)
function Test-NTPService-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server
)
Write-Host ""
Write-Log -Level "INFO" -Message "========== 开始NTP检测 (Shell模式) =========="
# 直接执行SSH命令获取NTP信息,避免Shell脚本输出截断问题
$cmd = "export LANG=C && timedatectl status 2>/dev/null | grep -E 'System clock synchronized|NTP service'"
$result = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
if (-not $result -or -not $result.Output) {
Write-Log -Level "ERROR" -Message "[NTP] 无法获取NTP状态"
return $null
}
# 解析输出
$output = if ($result.Output -is [array]) { $result.Output -join "`n" } else { $result.Output.ToString() }
$ntpService = "unknown"
$serviceStatus = "stopped"
$timeSync = "unknown"
# 检查NTP服务状态
if ($output -match "NTP service:\s+active") {
$ntpService = "chronyd"
$serviceStatus = "running"
} elseif ($output -match "NTP service:\s+inactive") {
$serviceStatus = "stopped"
}
# 检查时间同步状态
if ($output -match "System clock synchronized:\s+yes") {
$timeSync = "synchronized"
} elseif ($output -match "System clock synchronized:\s+no") {
$timeSync = "not_synchronized"
}
# 获取NTP服务器(限制数量以避免输出过长)
# 使用单引号避免PowerShell变量展开
$cmd2 = @'
export LANG=C && cat /etc/chrony.conf 2>/dev/null | grep '^server ' | awk '{print $2}' | head -1
'@
$result2 = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd2
$ntpServers = if ($result2.Output) { $result2.Output -join "" } else { "unknown" }
$icon = if ($serviceStatus -eq "running") { "[运行]" } else { "[停止]" }
Write-Log -Level "INFO" -Message " $icon NTP服务: $ntpService ($serviceStatus)"
if ($timeSync -eq "synchronized") {
Write-Log -Level "SUCCESS" -Message " [OK] 时间同步: 已同步"
} else {
Write-Log -Level "WARN" -Message " [WARN] 时间同步: 未同步"
}
Write-Log -Level "INFO" -Message " NTP服务器: $ntpServers"
$results = @()
$results += [PSCustomObject]@{
Service = "NTP"
Daemon = $ntpService
Status = $serviceStatus
TimeSync = $timeSync
}
Write-Log -Level "INFO" -Message "========== 结束NTP检测 (Shell模式) =========="
return $results
}
# 防火墙检测(Shell模式)
function Test-Firewall-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server
)
Write-Host ""
Write-Log -Level "INFO" -Message "========== 开始防火墙检测 (Shell模式) =========="
# 检测防火墙状态
$cmd = "export LANG=C && systemctl is-active firewalld 2>/dev/null || echo 'inactive'"
$result = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
$firewallStatus = "unknown"
$firewallDescription = "未知"
if ($result.Output) {
$output = if ($result.Output -is [array]) { $result.Output -join "" } else { $result.Output.ToString() }
$output = $output.Trim()
if ($output -eq "active") {
$firewallStatus = "active"
$firewallDescription = "已启用 (firewalld)"
Write-Log -Level "INFO" -Message " 防火墙状态: $firewallDescription"
} elseif ($output -eq "inactive") {
$firewallStatus = "inactive"
$firewallDescription = "未启用"
Write-Log -Level "WARN" -Message " 防火墙状态: $firewallDescription"
}
}
# 获取开放的端口和服务
$openPorts = ""
if ($firewallStatus -eq "active") {
$cmd2 = "export LANG=C && firewall-cmd --list-ports 2>/dev/null && firewall-cmd --list-services 2>/dev/null"
$result2 = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd2
if ($result2.Output) {
$portsOutput = if ($result2.Output -is [array]) { $result2.Output -join " " } else { $result2.Output.ToString() }
$openPorts = $portsOutput -replace "`n", " " -replace "\s+", " "
Write-Log -Level "INFO" -Message " 开放端口/服务: $openPorts"
}
}
$results = @{
Status = $firewallStatus
Description = $firewallDescription
OpenPorts = $openPorts
}
Write-Log -Level "INFO" -Message "========== 结束防火墙检测 (Shell模式) =========="
return $results
}
# 配置IP检测(Shell模式)
function Test-ConfigIPs-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$false)] [string]$PlatformType
)
Write-Host ""
Write-Log -Level "INFO" -Message "========== 开始配置IP检测 (Shell模式) =========="
$results = @()
# 根据平台类型确定配置文件路径
$configFiles = @()
if ($PlatformType -eq "new") {
$configFiles = @(
"/data/middleware/nginx/config/*.conf",
"/data/middleware/emqx/config/*.conf",
"/data/middleware/mysql/conf/my.cnf"
)
} else {
$configFiles = @(
"/var/www/java/nginx-conf.d/*.conf",
"/var/www/emqx/config/*.conf"
)
}
# 检查每个配置文件
$ipPattern = '\b([0-9]{1,3}\.){3}[0-9]\b'
foreach ($configPattern in $configFiles) {
# 先展开通配符
$cmd = "export LANG=C && ls $configPattern 2>/dev/null"
$result = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
if (-not $result.Output) { continue }
$files = $result.Output -split "`n" | Where-Object { $_ -ne "" }
foreach ($file in $files) {
# 检查文件中的IP
$cmd2 = "export LANG=C && grep -oE '$ipPattern' '$file' 2>/dev/null | sort -u | head -5"
$result2 = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd2
if ($result2.Output) {
$ipsOutput = if ($result2.Output -is [array]) { $result2.Output -join "" } else { $result2.Output.ToString() }
$ips = @($ipsOutput -split "`n" | Where-Object { $_ -match $ipPattern })
if ($ips.Count -gt 0) {
Write-Log -Level "INFO" -Message " [包含IP] $file"
foreach ($ip in $ips) {
$results += [PSCustomObject]@{
File = $file
IP = $ip.Trim()
}
}
}
}
}
}
Write-Log -Level "INFO" -Message "汇总: 检测 $($results.Count) 个IP地址"
Write-Log -Level "INFO" -Message "========== 结束配置IP检测 (Shell模式) =========="
return $results
}
# Console配置检测(Shell模式)- 暂未实现,使用PowerShell模式
function Test-ConsoleConfig-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$false)] [string]$PlatformType,
[Parameter(Mandatory=$false)] [hashtable]$SystemInfo
)
Write-Host ""
Write-Log -Level "INFO" -Message "========== Console配置检测暂不支持Shell模式,使用PowerShell模式 =========="
# Shell模式暂不支持,返回空结果
return @()
}
# Java服务检测(Shell模式)
function Test-UjavaServices-Shell {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$false)] [string]$ContainerName,
[Parameter(Mandatory=$false)] [string]$PlatformType = "new"
)
Write-Host ""
if ($ContainerName) {
Write-Log -Level "INFO" -Message "========== 检测 ujava 服务 (容器: $ContainerName) (Shell模式) =========="
}
else {
Write-Log -Level "INFO" -Message "========== 检测 ujava 服务 (宿主机) (Shell模式) =========="
}
$arguments = "--container $ContainerName --platform $PlatformType --format json"
$result = Invoke-RemoteShellCheck -Server $Server -ScriptName "java_check.sh" -Arguments $arguments
$data = ConvertFrom-ShellJson -JsonString $result
if (-not $data) {
Write-Log -Level "ERROR" -Message "[UJAVA] Shell脚本执行失败"
return @()
}
$results = @()
if ($data.services) {
foreach ($service in $data.services) {
$statusIcon = if ($service.status -eq "运行中") { "[OK]" } else { "[FAIL]" }
Write-Log -Level "INFO" -Message " $statusIcon $($service.name) ($($service.jar)) [$($service.location)]: $($service.status)"
$results += [PSCustomObject]@{
Service = $service.name
Pattern = $service.jar
Status = $service.status
Running = ($service.status -eq "运行中")
}
}
}
Write-Log -Level "INFO" -Message "========== 结束检测 (Shell模式) =========="
return $results
}
# ================================
# 检查依赖
# ================================
function Test-Dependencies {
Write-Log -Level "INFO" -Message "检查系统依赖..."
$osVersion = [System.Environment]::OSVersion.Version
Write-Log -Level "INFO" -Message " Windows 版本: $($osVersion.Major).$($osVersion.Minor).$($osVersion.Build)"
$hasPasswordTool = $false
# 1. 优先检查脚本同目录下的 plink.exe
$localPlinkPath = Join-Path $SCRIPT_DIR "plink.exe"
if (Test-Path $localPlinkPath) {
$global:PLINK_PATH = $localPlinkPath
Write-Log -Level "INFO" -Message " plink 已找到 (本地): $localPlinkPath"
$hasPasswordTool = $true
$global:PreferredSSHTool = "plink"
}
else {
# 2. 检查远程容器更新目录下的 plink.exe
$remoteUpdatePlinkPath = Join-Path (Split-Path $SCRIPT_DIR -Parent) "远程容器更新\plink.exe"
if (Test-Path $remoteUpdatePlinkPath) {
$global:PLINK_PATH = $remoteUpdatePlinkPath
Write-Log -Level "INFO" -Message " plink 已找到 (远程容器更新目录): $remoteUpdatePlinkPath"
$hasPasswordTool = $true
$global:PreferredSSHTool = "plink"
}
else {
# 3. 检查系统 PATH 中的 plink
try {
$systemPlink = Get-Command plink -ErrorAction Stop
$global:PLINK_PATH = $systemPlink.Source
Write-Log -Level "INFO" -Message " plink 已找到 (系统): $($global:PLINK_PATH)"
$hasPasswordTool = $true
$global:PreferredSSHTool = "plink"
}
catch {
Write-Log -Level "WARN" -Message " plink.exe 未找到"
}
}
}
# 如果没有 plink,检查 sshpass
if (-not $hasPasswordTool) {
try {
$sshpassPath = Get-Command sshpass -ErrorAction Stop
Write-Log -Level "INFO" -Message " sshpass 已找到: $($sshpassPath.Source)"
$hasPasswordTool = $true
$global:PreferredSSHTool = "sshpass"
}
catch {
Write-Log -Level "WARN" -Message " sshpass 未找到"
}
}
if (-not $hasPasswordTool) {
Write-Host ""
Write-Log -Level "ERROR" -Message " 未检测到密码认证工具"
Write-Host ""
Write-Log -Level "ERROR" -Message " 请按以下方式解决:"
Write-Host ""
Write-Log -Level "ERROR" -Message " 方式1 (推荐,离线可用):"
Write-Log -Level "ERROR" -Message " 将 plink.exe 放在脚本同目录下"
Write-Log -Level "ERROR" -Message " 当前脚本目录: $SCRIPT_DIR"
Write-Host ""
Write-Log -Level "ERROR" -Message " 下载地址:"
Write-Log -Level "ERROR" -Message " plink.exe: https://the.earth.li/~sgtatham/putty/latest/w64/plink.exe"
Write-Host ""
return $false
}
# 检查 pscp(用于文件传输/日志导出)
if ($global:PreferredSSHTool -eq "plink") {
# 1. 优先检查脚本同目录下的 pscp.exe
$localPscpPath = Join-Path $SCRIPT_DIR "pscp.exe"
if (Test-Path $localPscpPath) {
$global:PSCP_PATH = $localPscpPath
Write-Log -Level "INFO" -Message " pscp 已找到 (本地): $localPscpPath"
}
else {
# 2. 检查远程容器更新目录下的 pscp.exe
$remoteUpdatePscpPath = Join-Path (Split-Path $SCRIPT_DIR -Parent) "远程容器更新\pscp.exe"
if (Test-Path $remoteUpdatePscpPath) {
$global:PSCP_PATH = $remoteUpdatePscpPath
Write-Log -Level "INFO" -Message " pscp 已找到 (远程容器更新目录): $remoteUpdatePscpPath"
}
else {
# 3. 检查系统 PATH 中的 pscp
try {
$systemPscp = Get-Command pscp -ErrorAction Stop
$global:PSCP_PATH = $systemPscp.Source
Write-Log -Level "INFO" -Message " pscp 已找到 (系统): $($global:PSCP_PATH)"
}
catch {
Write-Log -Level "WARN" -Message " pscp.exe 未找到,日志导出功能将不可用"
}
}
}
}
Write-Log -Level "INFO" -Message "系统依赖检查通过 (使用 $global:PreferredSSHTool 进行密码认证)"
return $true
}
# ================================
# 选择服务器
# ================================
function Select-Server {
Write-Log -Level "INFO" -Message "可选择的目标服务器:"
Write-Host ""
foreach ($key in ($ServerList.Keys | Sort-Object)) {
$server = $ServerList[$key]
Write-Host " [$key] $($server.Desc) ($($server.IP) $($server.User))"
}
Write-Host " [0] 手动输入服务器信息"
Write-Host ""
$serverKey = Read-Host "请输入服务器编号"
if ($serverKey -eq "0") {
Write-Log -Level "INFO" -Message "进入手动输入模式"
Write-Host ""
$remoteHost = Read-Host "请输入目标服务器 IP 地址"
if ([string]::IsNullOrEmpty($remoteHost)) {
Write-Log -Level "ERROR" -Message "服务器 IP 地址不能为空"
return $null
}
$sshPortInput = Read-Host "请输入 SSH 端口号 [默认 22]"
if ([string]::IsNullOrEmpty($sshPortInput)) {
$sshPort = 22
} else {
$sshPort = [int]$sshPortInput
}
$remoteUserInput = Read-Host "请输入登录用户名 [默认 root]"
if ([string]::IsNullOrEmpty($remoteUserInput)) {
$remoteUser = "root"
} else {
$remoteUser = $remoteUserInput
}
$remotePassSecure = Read-Host "请输入登录密码" -AsSecureString
$BSTR = [System.Runtime.InteropServices.Marshal]::SecureStringToBSTR($remotePassSecure)
$remotePass = [System.Runtime.InteropServices.Marshal]::PtrToStringAuto($BSTR)
[System.Runtime.InteropServices.Marshal]::ZeroFreeBSTR($BSTR)
if ([string]::IsNullOrEmpty($remotePass)) {
Write-Log -Level "ERROR" -Message "登录密码不能为空"
return $null
}
Write-Log -Level "INFO" -Message "已配置目标服务器: ${remoteUser}@${remoteHost}:${sshPort}"
return @{
IP = $remoteHost
User = $remoteUser
Pass = $remotePass
Port = $sshPort
Desc = "手动输入服务器"
}
}
elseif ($ServerList.ContainsKey($serverKey)) {
$server = $ServerList[$serverKey].Clone()
# 如果预设列表中没有指定端口,使用默认22
if (-not $server.ContainsKey('Port') -or $server.Port -eq 0) {
$server.Port = 22
}
Write-Log -Level "INFO" -Message "已选择 $($server.Desc) ($($server.IP)):$($server.Port)"
return $server
}
else {
Write-Log -Level "ERROR" -Message "编号 $serverKey 不存在,请重新运行脚本"
return $null
}
}
# ================================
# 测试 SSH 连接
# ================================
function Test-SSHConnection {
param(
[hashtable]$Server
)
Write-Log -Level "INFO" -Message "测试 SSH 连接..."
$result = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command "echo CONNECTION_OK"
if (($result.ExitCode -ne 0) -or ($result.Output -notmatch "CONNECTION_OK")) {
Write-Log -Level "ERROR" -Message "SSH 连接失败!"
Write-Log -Level "ERROR" -Message "输出信息: $($result.Output)"
Write-Log -Level "ERROR" -Message "请检查: 1) IP地址是否正确 2) 端口是否正确 3) 密码是否正确 4) 网络是否可达"
return $false
}
Write-Log -Level "SUCCESS" -Message "SSH 连接测试通过"
return $true
}
# ================================
# 检测平台类型
# ================================
function Get-PlatformType {
param(
[hashtable]$Server
)
Write-Log -Level "INFO" -Message "自动检测目标服务器平台类型..."
$result = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command "[ -d /data/services ] && echo 'NEW_PLATFORM' || echo 'OLD_PLATFORM'"
$outputLines = $result.Output -split "`n" | Where-Object { $_ -match '\S' }
$platformCheck = ($outputLines | Select-Object -Last 1).Trim()
if ($platformCheck -eq "NEW_PLATFORM") {
Write-Log -Level "SUCCESS" -Message "检测到 /data/services 目录存在,识别为【新统一平台】"
return "new"
}
else {
Write-Log -Level "SUCCESS" -Message "未检测到 /data/services 目录,识别为【传统平台】"
return "old"
}
}
# ================================
# 检测系统类型(容器类型)
# ================================
function Get-SystemType {
param(
[hashtable]$Server
)
Write-Log -Level "INFO" -Message "自动检测系统类型(容器)..."
# 检测运行中的容器
$result = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command "docker ps --format '{{.Names}}' 2>/dev/null || echo 'DOCKER_ERROR'"
if ($result.Output -match "DOCKER_ERROR" -or $result.ExitCode -ne 0) {
Write-Log -Level "WARN" -Message "Docker 命令执行失败,可能未安装 Docker"
return @{
HasUjava = $false
HasUpython = $false
HasUpythonVoice = $false
Containers = @()
}
}
$containers = $result.Output -split "`n" | Where-Object { $_ -match '\S' } | ForEach-Object { $_.Trim() }
$systemInfo = @{
HasUjava = $false
HasUpython = $false
HasUpythonVoice = $false
Containers = $containers
UjavaContainer = $null
UpythonContainer = $null
UpythonVoiceContainer = $null
}
foreach ($container in $containers) {
if ($container -match '^ujava\d*$') {
$systemInfo.HasUjava = $true
$systemInfo.UjavaContainer = $container
Write-Log -Level "INFO" -Message " 检测到 ujava 容器: $container"
}
elseif ($container -match '^upython\d*$' -and $container -notmatch 'voice') {
$systemInfo.HasUpython = $true
$systemInfo.UpythonContainer = $container
Write-Log -Level "INFO" -Message " 检测到 upython 容器: $container -> 运维集控系统"
}
elseif ($container -match '^upython_voice\d*$') {
$systemInfo.HasUpythonVoice = $true
$systemInfo.UpythonVoiceContainer = $container
Write-Log -Level "INFO" -Message " 检测到 upython_voice 容器: $container -> 转录系统"
}
}
if (-not $systemInfo.HasUjava -and -not $systemInfo.HasUpython -and -not $systemInfo.HasUpythonVoice) {
Write-Log -Level "WARN" -Message " 未检测到任何已知容器类型"
}
return $systemInfo
}
# ================================
# 修复文件权限
# ================================
function Invoke-RemoteFilePermissionFix {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$true)] [string]$PlatformType
)
Write-Log -Level "INFO" -Message "[PERM] 开始远程文件权限修复"
# 确定平台类型参数
$platformParam = if ($PlatformType -eq 'new') { 'new' } else { 'standard' }
# 上传修复脚本
$fixScriptPath = Join-Path $SCRIPT_DIR "问题处理\issue_handler.sh"
if (-not (Test-Path $fixScriptPath)) {
Write-Log -Level "ERROR" -Message "[PERM] 修复脚本不存在: $fixScriptPath"
return @{ Success = $false; Error = "修复脚本不存在" }
}
Write-Log -Level "INFO" -Message "[PERM] 上传修复脚本到远程服务器..."
# 创建远程临时目录
$remoteDir = "/tmp/permission_fix_$(Get-Date -Format 'yyyyMMddHHmmss')"
$cmd = "mkdir -p $remoteDir"
Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd | Out-Null
# 上传issue_handler.sh
$uploadResult = Upload-ShellScript -Server $Server -ScriptName "issue_handler.sh" -RemotePath $remoteDir
if (-not $uploadResult) {
Write-Log -Level "ERROR" -Message "[PERM] 上传修复脚本失败"
return @{ Success = $false; Error = "上传修复脚本失败" }
}
Write-Log -Level "INFO" -Message "[PERM] 执行权限修复命令..."
# 执行修复命令(非交互模式)
$cmd = "cd $remoteDir && bash issue_handler.sh --action fix_permissions --platform $platformParam --non-interactive --yes"
$result = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
# 检查执行结果
if ($result.ExitCode -eq 0) {
Write-Log -Level "SUCCESS" -Message "[PERM] 权限修复执行成功"
Write-Log -Level "INFO" -Message "[PERM] 修复输出: $($result.Output -join "`n")"
# 清理远程临时目录
Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command "rm -rf $remoteDir" | Out-Null
return @{ Success = $true }
} else {
Write-Log -Level "ERROR" -Message "[PERM] 权限修复执行失败 (退出码: $($result.ExitCode))"
Write-Log -Level "ERROR" -Message "[PERM] 错误输出: $($result.Output -join "`n")"
# 清理远程临时目录
Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command "rm -rf $remoteDir" | Out-Null
return @{ Success = $false; Error = "执行失败 (退出码: $($result.ExitCode))" }
}
}
# ================================
# 检测 Check-FilePermissions 服务
# ================================
function Check-FilePermissions {
param (
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$true)] [ValidateSet('new','old')] [string]$PlatformType,
[Parameter(Mandatory=$false)] [hashtable]$SystemInfo
)
Write-Log -Level "INFO" -Message "开始文件权限检测 (平台: $PlatformType) ..."
$targets = @()
if ($PlatformType -eq 'new') {
$targets += @(
"/data/services/api/auth/auth-sso-auth/run.sh",
"/data/services/api/auth/auth-sso-gatway/run.sh",
"/data/services/api/auth/auth-sso-system/run.sh",
"/data/services/api/java-meeting/java-meeting2.0/run.sh",
"/data/services/api/java-meeting/java-meeting-extapi/run.sh",
"/data/services/api/java-meeting/java-message-scheduling/run.sh",
"/data/services/api/java-meeting/java-mqtt/run.sh",
"/data/services/api/java-meeting/java-quartz/run.sh",
"/data/services/api/start.sh",
"/data/services/scripts/*.sh",
"/data/third_party/paperless/run.sh",
"/data/third_party/paperless/start.sh",
"/data/third_party/wifi-local/config.ini",
"/data/third_party/wifi-local/startDB.sh",
"/data/third_party/wifi-local/wifi*",
"/etc/rc.d/rc.local",
"/data/middleware/nginx/config/*.conf",
"/data/middleware/emqx/config/*.conf",
"/data/services/api/python-cmdb/*.sh",
"/data/services/api/python-voice/*.sh",
"/data/middleware/mysql/conf/my.cnf"
)
} else {
# ✅ 传统平台:根据 ujava 系统细分调整路径
$ujavaVariant = $null
if ($SystemInfo -and $SystemInfo.ContainsKey('UjavaSystemVariant')) { $ujavaVariant = $SystemInfo.UjavaSystemVariant }
$targets += @(
"/var/www/java/api-java-meeting2.0/run.sh",
"/var/www/java/external-meeting-api/run.sh",
"/var/www/html/start.sh",
"/var/www/wifi-local/config.ini",
"/var/www/wifi-local/startDB.sh",
"/var/www/wifi-local/wifi*",
"/var/www/paperless/run.sh",
"/var/www/paperless/start.sh",
"/var/www/redis/redis-*.conf",
"/var/www/emqx/config/*.conf",
"/etc/rc.d/rc.local",
"/usr/local/docker/mysql/my.cnf"
)
# start.sh 路径分支:unified 优先检查 unifiedPlatform/start.sh
if ($ujavaVariant -eq 'unified') {
$targets += "/var/www/java/unifiedPlatform/start.sh"
$targets += "/var/www/java/unifiedPlatform/nginx-conf.d/*.conf"
} else {
$targets += "/var/www/java/start.sh"
$targets += "/var/www/java/nginx-conf.d/*.conf"
}
}
Write-Log -Level "INFO" -Message "[PERM] 目标列表生成 (平台: $PlatformType)"
foreach ($path in $targets) { Write-Log -Level "INFO" -Message "[PERM] 待检查: $path" }
$lines = @()
foreach ($path in $targets) {
$cmd = "if ls -l $path 2>/dev/null; then echo '__PERM_OK__'; else echo '__PERM_MISS__ $path'; fi"
Write-Log -Level "INFO" -Message "[PERM] 执行: $cmd"
$res = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
$out = if ($res.Output) { ($res.Output -join " ") } else { "" }
Write-Log -Level "INFO" -Message "[PERM] 输出: $out"
if ($out -match "__PERM_OK__") {
($res.Output) | Where-Object { $_ -match "^[-dl]" } | ForEach-Object {
Write-Log -Level 'SUCCESS' -Message "[PERM] 权限: $_"; $lines += $_
}
} else { Write-Log -Level 'WARN' -Message "[PERM] 未找到: $path"; $lines += "MISS $path" }
}
Write-Log -Level "INFO" -Message "[PERM] 检测结束: 共 $($lines.Count) 项"
# 区分存在与缺失的统计
$foundCount = @($lines | Where-Object { $_ -notlike "MISS*" }).Count
$missCount = @($lines | Where-Object { $_ -like "MISS*" }).Count
$summaryText = "已检查: 找到 $foundCount 项, 缺失 $missCount 项"
# 如果有缺失文件,询问是否自动修复
if ($missCount -gt 0) {
Write-Log -Level "WARN" -Message "[PERM] 发现 $missCount 个缺失文件,询问是否自动修复..."
# 检查是否有修复脚本
$fixScriptPath = Join-Path $SCRIPT_DIR "问题处理\issue_handler.sh"
$hasFixScript = Test-Path $fixScriptPath
if ($hasFixScript) {
Write-Log -Level "INFO" -Message "[PERM] 检测到修复脚本: $fixScriptPath"
# 自动修复提示
$fixChoice = Read-Host "[PERM] 是否自动修复文件权限? (y/N)"
if ($fixChoice -eq 'y' -or $fixChoice -eq 'Y') {
Write-Log -Level "INFO" -Message "[PERM] 用户确认自动修复,开始执行..."
# 上传并执行修复脚本
$fixResult = Invoke-RemoteFilePermissionFix -Server $Server -PlatformType $PlatformType
if ($fixResult.Success) {
Write-Log -Level "SUCCESS" -Message "[PERM] 文件权限修复成功"
$summaryText += " (已自动修复)"
# 重新检测权限
Write-Log -Level "INFO" -Message "[PERM] 重新检测文件权限..."
$lines = @()
foreach ($path in $targets) {
$cmd = "if ls -l $path 2>/dev/null; then echo '__PERM_OK__'; else echo '__PERM_MISS__ $path'; fi"
$res = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
$out = if ($res.Output) { ($res.Output -join " ") } else { "" }
if ($out -match "__PERM_OK__") {
($res.Output) | Where-Object { $_ -match "^[-dl]" } | ForEach-Object {
Write-Log -Level 'INFO' -Message "[PERM] 权限: $_"; $lines += $_
}
} else { Write-Log -Level 'WARN' -Message "[PERM] 未找到: $path"; $lines += "MISS $path" }
}
# 更新统计
$foundCount = @($lines | Where-Object { $_ -notlike "MISS*" }).Count
$missCount = @($lines | Where-Object { $_ -like "MISS*" }).Count
$summaryText = "已检查: 找到 $foundCount 项, 缺失 $missCount 项 (已自动修复)"
} else {
Write-Log -Level "ERROR" -Message "[PERM] 文件权限修复失败: $($fixResult.Error)"
}
} else {
Write-Log -Level "INFO" -Message "[PERM] 用户取消自动修复"
}
} else {
Write-Log -Level "WARN" -Message "[PERM] 未找到修复脚本,跳过自动修复"
}
}
return @{ Summary = $summaryText; Lines = $lines }
}
# ================================
# 从远端下载文件到本地(基于 pscp)
# ================================
function Download-RemoteFile {
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$true)] [string]$RemotePath,
[Parameter(Mandatory=$true)] [string]$LocalPath,
[int]$TimeoutSeconds = 600
)
if (-not $global:PSCP_PATH -or -not (Test-Path $global:PSCP_PATH)) {
Write-Log -Level "ERROR" -Message "[DL] pscp.exe 未找到,无法下载文件: $RemotePath"
return @{ Success = $false; Reason = "pscp not found" }
}
$localDir = Split-Path $LocalPath -Parent
if (-not (Test-Path $localDir)) { New-Item -ItemType Directory -Path $localDir -Force | Out-Null }
$args = @(
"-scp",
"-batch",
"-P", $Server.Port,
"-l", $Server.User,
"-pw", $Server.Pass,
"$($Server.User)@$($Server.IP):$RemotePath",
$LocalPath
)
Write-Log -Level "INFO" -Message ("[DL] pscp 下载(超时 {0}s): {1} {2}" -f $TimeoutSeconds, $global:PSCP_PATH, ($args -join ' '))
# 用临时文件接 stdout/stderr,避免卡住无输出时看起来“没打印”
$tmpOut = Join-Path $env:TEMP ("pscp_out_{0}.log" -f ([guid]::NewGuid().ToString("N")))
$tmpErr = Join-Path $env:TEMP ("pscp_err_{0}.log" -f ([guid]::NewGuid().ToString("N")))
try {
$p = Start-Process -FilePath $global:PSCP_PATH `
-ArgumentList $args `
-NoNewWindow `
-PassThru `
-RedirectStandardOutput $tmpOut `
-RedirectStandardError $tmpErr
$ok = $p.WaitForExit($TimeoutSeconds * 1000)
if (-not $ok) {
try { $p.Kill() } catch {}
Write-Log -Level "ERROR" -Message ("[DL] 下载超时,已终止 pscp (>{0}s): {1}" -f $TimeoutSeconds, $RemotePath)
$outTxt = (Get-Content -Path $tmpOut -ErrorAction SilentlyContinue | Out-String).Trim()
$errTxt = (Get-Content -Path $tmpErr -ErrorAction SilentlyContinue | Out-String).Trim()
if ($outTxt) {
$one = ($outTxt -replace '\s+',' ').Trim()
$n = [math]::Min(500, $one.Length)
Write-Log -Level "ERROR" -Message ("[DL] pscp stdout: {0}" -f $one.Substring(0, $n))
}
if ($errTxt) {
$one = ($errTxt -replace '\s+',' ').Trim()
$n = [math]::Min(500, $one.Length)
Write-Log -Level "ERROR" -Message ("[DL] pscp stderr: {0}" -f $one.Substring(0, $n))
}
return @{ Success = $false; ExitCode = -1; Reason = "timeout" }
}
$code = $p.ExitCode
$outTxt = (Get-Content -Path $tmpOut -ErrorAction SilentlyContinue | Out-String)
$errTxt = (Get-Content -Path $tmpErr -ErrorAction SilentlyContinue | Out-String)
$all = (($outTxt + "`n" + $errTxt) -replace "`r","").Trim()
# host key:第一次连接需要 y,pscp 在 -batch 下会失败,这里自动接受并重试一次
if ($code -ne 0 -and $all -match "host key|Cannot confirm") {
$cmdLine = "echo y | `"$($global:PSCP_PATH)`" -scp -batch -P $($Server.Port) -l $($Server.User) -pw `"$($Server.Pass)`" `"$($Server.User)@$($Server.IP):$RemotePath`" `"$LocalPath`""
Write-Log -Level "WARN" -Message "[DL] 首次连接主机密钥提示,自动接受并重试一次"
$all2 = cmd /c $cmdLine 2>&1
$code2 = $LASTEXITCODE
if ($code2 -eq 0 -and (Test-Path $LocalPath)) {
$size = (Get-Item $LocalPath).Length
Write-Log -Level "SUCCESS" -Message "[DL] 下载成功 ($([math]::Round($size/1024,2)) KB): $LocalPath"
return @{ Success = $true; Size = $size; Output = $all2 }
} else {
$oneLine = (($all2 -join " ") -replace '\s+',' ').Trim()
Write-Log -Level "ERROR" -Message "[DL] 下载失败(重试) ExitCode=$code2, 输出: $oneLine"
return @{ Success = $false; ExitCode = $code2; Output = $all2 }
}
}
$code = $null
try { $code = $p.ExitCode } catch { $code = $null }
$outTxt = (Get-Content -Path $tmpOut -ErrorAction SilentlyContinue | Out-String)
$errTxt = (Get-Content -Path $tmpErr -ErrorAction SilentlyContinue | Out-String)
$all = (($outTxt + "`n" + $errTxt) -replace "`r","").Trim()
# ✅ 关键:以“文件是否存在且大小>0”作为成功判定(pscp 会把进度输出到 stderr,不代表失败)
$fileOk = $false
$size = 0
if (Test-Path -LiteralPath $LocalPath) {
try {
$size = (Get-Item -LiteralPath $LocalPath).Length
if ($size -gt 0) { $fileOk = $true }
} catch { $fileOk = $false }
}
# host key:第一次连接需要 y(只有在明确失败且输出包含 host key 时才走)
if (-not $fileOk -and $all -match "host key|Cannot confirm") {
$cmdLine = "echo y | `"$($global:PSCP_PATH)`" -scp -batch -P $($Server.Port) -l $($Server.User) -pw `"$($Server.Pass)`" `"$($Server.User)@$($Server.IP):$RemotePath`" `"$LocalPath`""
Write-Log -Level "WARN" -Message "[DL] 首次连接主机密钥提示,自动接受并重试一次"
$all2 = cmd /c $cmdLine 2>&1
$code2 = $LASTEXITCODE
if ((Test-Path -LiteralPath $LocalPath) -and ((Get-Item -LiteralPath $LocalPath).Length -gt 0)) {
$size2 = (Get-Item -LiteralPath $LocalPath).Length
Write-Log -Level "SUCCESS" -Message "[DL] 下载成功 ($([math]::Round($size2/1024,2)) KB): $LocalPath"
return @{ Success = $true; Size = $size2; Output = $all2 }
} else {
$oneLine = (($all2 -join " ") -replace '\s+',' ').Trim()
Write-Log -Level "ERROR" -Message "[DL] 下载失败(重试) ExitCode=$code2, 输出: $oneLine"
return @{ Success = $false; ExitCode = $code2; Output = $all2 }
}
}
if ($fileOk) {
Write-Log -Level "SUCCESS" -Message "[DL] 下载成功 ($([math]::Round($size/1024,2)) KB): $LocalPath"
return @{ Success = $true; Size = $size; ExitCode = $code; Output = $all }
} else {
$oneLine = ($all -replace '\s+',' ').Trim()
if ($oneLine.Length -gt 800) { $oneLine = $oneLine.Substring(0,800) + "..." }
Write-Log -Level "ERROR" -Message ("[DL] 下载失败 ExitCode={0}, 输出: {1}" -f $code, $oneLine)
return @{ Success = $false; ExitCode = $code; Output = $all }
}
}
finally {
Remove-Item -Path $tmpOut,$tmpErr -Force -ErrorAction SilentlyContinue | Out-Null
}
}
# ================================
# 检测 DataBakup 服务
# 所有检测函数已按职责拆分到 modules/ 目录
# - ServerProfile.psm1: 服务器探测(Test-Dependencies, Select-Server, Get-PlatformType 等)
# - DataBackup.psm1: 文件下载与备份(Download-RemoteFile, DataBakup)
# - FilePermission.psm1: 文件权限检查(Check-FilePermissions)
# - ShellAdapter.psm1: Shell 兼容层(*-Shell 函数)
# - Common.psm1: 公共函数(含 Upload-ShellScript)
# ================================
function DataBakup {
param (
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$true)] [ValidateSet('new','old')] [string]$PlatformType,
[Parameter(Mandatory=$true)] [hashtable]$SystemInfo
)
Write-Log -Level "INFO" -Message "开始现场数据备份 (平台: $PlatformType) ..."
$bakDir = "/home/bakup"
$cmds = @(
"set -e",
"mkdir -p $bakDir"
)
# ✅ 修复:SystemInfo key 对齐 Get-SystemType 的返回结构
$hasUjava = [bool]($SystemInfo.HasUjava)
$hasUpython = [bool]($SystemInfo.HasUpython)
$hasCardtable = ($SystemInfo.ContainsKey('cardtable') -and $SystemInfo['cardtable'])
$hasPaperless = ($SystemInfo.ContainsKey('paperless') -and $SystemInfo['paperless'])
$hasUmysql = ($SystemInfo.ContainsKey('umysql') -and $SystemInfo['umysql'])
$ujavaVariant = "meeting"
if ($SystemInfo.ContainsKey('UjavaSystemVariant') -and $SystemInfo.UjavaSystemVariant) {
$ujavaVariant = [string]$SystemInfo.UjavaSystemVariant
}
# 仅实现传统平台备份
if ($PlatformType -eq 'old') {
# ✅ ujava:按 meeting/unified 分支备份
if ($hasUjava) {
if ($ujavaVariant -eq 'unified') {
$cmds += "[ -d /var/www/java/unifiedPlatform ] && cp -a /var/www/java/unifiedPlatform $bakDir/"
} else {
$cmds += "[ -d /var/www/java ] && cp -a /var/www/java $bakDir/"
}
}
if ($hasUpython) {
$cmds += "[ -d /var/www/html ] && cp -a /var/www/html $bakDir/"
}
$cmds += "[ -d /var/www/emqx ] && cp -a /var/www/emqx $bakDir/"
$cmds += "[ -d /var/www/redis ] && cp -a /var/www/redis $bakDir/"
} else {
Write-Log -Level "WARN" -Message "新统一平台备份暂未实现,已跳过"
}
# ✅ umysql 判定建议改为:直接远端检测容器,而不是 SystemInfo['umysql']
if ($PlatformType -eq 'old') {
$dbUser = "root"; $dbPass = "dNrprU&2S"; $dbs = @("ubains","devops")
$cmds += "mkdir -p /tmp/bak_sql"
foreach ($db in $dbs) {
$cmds += "if docker ps --format '{{.Names}}' | grep -q '^umysql$'; then docker exec umysql sh -c ""mysqldump -u${dbUser} -p'${dbPass}' --single-transaction --quick --lock-tables=false ${db}"" > /tmp/bak_sql/${db}_$(date +%Y%m%d%H%M%S).sql; fi || true"
}
$cmds += "if ls /tmp/bak_sql/*.sql >/dev/null 2>&1; then cp -a /tmp/bak_sql/*.sql $bakDir/; fi || true"
}
Write-Log -Level "INFO" -Message "[BAK] 创建备份目录: $bakDir"
# 展示计划复制的目录
Write-Log -Level "INFO" -Message "[BAK] 平台: $PlatformType, 容器: ujava=$hasUjava upython=$hasUpython cardtable=$hasCardtable paperless=$hasPaperless umysql=$hasUmysql"
foreach ($c in $cmds) { Write-Log -Level "INFO" -Message "[BAK] 计划执行: $c" }
$joined = ($cmds -join '; ')
Write-Log -Level "INFO" -Message "[BAK] 执行远程备份命令"
$res = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $joined
$bakOutput = if ($res.Output) { [string]::Join(' ', $res.Output) } else { '' }
Write-Log -Level "INFO" -Message "[BAK] 备份输出: $bakOutput"
if ($res.ExitCode -ne 0) { Write-Log -Level "ERROR" -Message "[BAK] 远程备份步骤失败"; return @{ Summary = "失败" } }
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"; $tarName = "bakup_${timestamp}.tar.gz"; $tarPath = "/home/$tarName"
$packCmd = "set -e; tar -czf $tarPath -C /home bakup"
Write-Log -Level "INFO" -Message "[BAK] 压缩命令: $packCmd"
$res2 = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $packCmd
$packOutput = if ($res2.Output) { [string]::Join(' ', $res2.Output) } else { '' }
Write-Log -Level "INFO" -Message "[BAK] 压缩输出: $packOutput"
if ($res2.ExitCode -ne 0 -or $packOutput -match "error") { Write-Log -Level "ERROR" -Message "[BAK] 压缩备份失败"; return @{ Summary = "失败" } }
if (-not $global:PSCP_PATH) {
Write-Log -Level "ERROR" -Message "[BAK] 未找到 pscp.exe,无法下载备份文件"; $downloadSummary = "未下载"
} else {
# 使用 ASCII 路径避免编码问题
$localOutDir = Join-Path $SCRIPT_DIR "Downloads"
if (-not (Test-Path $localOutDir)) { New-Item -ItemType Directory -Path $localOutDir | Out-Null }
$localFile = Join-Path $localOutDir $tarName
# 检查磁盘空间(尽量检查,失败则忽略,不中断流程)
try {
$qualifier = Split-Path $localOutDir -Qualifier
if ($qualifier) {
$drive = Get-PSDrive -Name $qualifier.TrimEnd(':')
if ($drive -and ($drive.Free -lt 1GB)) {
Write-Log -Level "WARN" -Message "[BAK] 本地磁盘可用空间不足 1GB,可能导致下载失败"
}
}
} catch {
Write-Log -Level "WARN" -Message "[BAK] 无法检测本地磁盘空间,已忽略: $($_.Exception.Message)"
}
# 构造下载命令,添加 -batch 防交互
$pscpCmd = "`"$($global:PSCP_PATH)`" -batch -scp -P $($Server.Port) -pw `"$($Server.Pass)`" $($Server.User)@$($Server.IP):$tarPath `"$localFile`""
Write-Log -Level "INFO" -Message "[BAK] 下载命令: $pscpCmd"
$dl = & powershell -NoProfile -Command $pscpCmd
if ($LASTEXITCODE -ne 0) {
Write-Log -Level "ERROR" -Message "[BAK] 下载备份失败,尝试使用 TEMP 目录重试"
$fallbackDir = Join-Path $env:TEMP "ubains_downloads"
if (-not (Test-Path $fallbackDir)) { New-Item -ItemType Directory -Path $fallbackDir -Force | Out-Null }
$fallbackFile = Join-Path $fallbackDir $tarName
$pscpCmd2 = "`"$($global:PSCP_PATH)`" -batch -scp -P $($Server.Port) -pw `"$($Server.Pass)`" $($Server.User)@$($Server.IP):$tarPath `"$fallbackFile`""
Write-Log -Level "INFO" -Message "[BAK] 重试下载命令: $pscpCmd2"
$dl2 = & powershell -NoProfile -Command $pscpCmd2
if ($LASTEXITCODE -ne 0) {
Write-Log -Level "ERROR" -Message "[BAK] 重试下载失败"
$downloadSummary = "下载失败"
} else {
Write-Log -Level "SUCCESS" -Message "[BAK] 备份已下载: $fallbackFile"
$downloadSummary = "已下载(TEMP)"
}
} else {
Write-Log -Level "SUCCESS" -Message "[BAK] 备份已下载: $localFile"
$downloadSummary = "已下载"
}
}
$cleanCmd = "rm -rf $bakDir; rm -f $tarPath; rm -rf /tmp/bak_sql"; Write-Log -Level "INFO" -Message "[BAK] 清理命令: $cleanCmd"
[void] (Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cleanCmd)
Write-Log -Level "INFO" -Message "[BAK] 清理完成"
Write-Log -Level "INFO" -Message "[BAK] 备份流程完成"
return @{ Summary = "完成 ($downloadSummary)" }
}
# ================================
# 主函数
......@@ -2461,7 +613,7 @@ function Main {
# 确保 ContainerCheck 模块已加载
if (-not (Get-Module ContainerCheck -ErrorAction SilentlyContinue)) {
Write-Log -Level "WARN" -Message "[容器] ContainerCheck 模块未加载,尝试重新加载..."
$modulePath = Join-Path $SCRIPT_DIR "modules\ContainerCheck.psm1"
$modulePath = Join-Path $global:SCRIPT_DIR "modules\ContainerCheck.psm1"
if (Test-Path $modulePath) {
Import-Module $modulePath -Force -Global -ErrorAction Stop
Write-Log -Level "INFO" -Message "[容器] ContainerCheck 模块重新加载成功"
......@@ -2617,7 +769,7 @@ function Main {
}
# 安卓设备自检(按 PRD 15:手动输入设备IP,连接+拉取日志)
$androidResults = Test-AndroidDeviceHealth -ScriptDir $SCRIPT_DIR
$androidResults = Test-AndroidDeviceHealth -ScriptDir $global:SCRIPT_DIR
# 生成检测报告
# 转换NTPResults为hashtable格式(兼容两种检测模式的返回格式)
......
......@@ -295,6 +295,78 @@ rm -f /tmp/.upload_${fileName}.b64
}
}
# ==============================================================================
# 上传Shell脚本到远程服务器
# ==============================================================================
function Upload-ShellScript {
<#
.SYNOPSIS
上传Shell脚本到远程服务器
.DESCRIPTION
将本地 Shell 脚本(common.sh + 指定脚本)上传到远程服务器的临时目录。
用于 Shell 模式检测和文件权限修复。
.PARAMETER Server
服务器连接信息哈希表,包含 IP、User、Pass、Port
.PARAMETER ScriptName
要上传的脚本文件名(如 resource_check.sh)
.PARAMETER RemotePath
远程目标目录,默认为 /tmp/health_check
.EXAMPLE
Upload-ShellScript -Server $server -ScriptName "resource_check.sh"
.OUTPUTS
System.Boolean
上传成功返回 true,失败返回 false
#>
param(
[Parameter(Mandatory=$true)] [hashtable]$Server,
[Parameter(Mandatory=$true)] [string]$ScriptName,
[Parameter(Mandatory=$false)] [string]$RemotePath = "/tmp/health_check"
)
# 创建远程临时目录
$cmd = "mkdir -p $RemotePath 2>/dev/null"
$mkdirResult = Invoke-SSHCommand -HostName $Server.IP -User $Server.User -Pass $Server.Pass -Port $Server.Port -Command $cmd
if ($mkdirResult.ExitCode -ne 0) {
Write-Log -Level "ERROR" -Message "[SHELL] 创建远程目录失败: $RemotePath"
return $false
}
# 检查本地脚本是否存在
$localCommonPath = Join-Path $global:SCRIPT_DIR "lib\shell\common.sh"
$localScriptPath = Join-Path $global:SCRIPT_DIR "lib\shell\$ScriptName"
if (-not (Test-Path $localCommonPath)) {
Write-Log -Level "ERROR" -Message "[SHELL] 本地脚本不存在: $localCommonPath"
return $false
}
if (-not (Test-Path $localScriptPath)) {
Write-Log -Level "ERROR" -Message "[SHELL] 本地脚本不存在: $localScriptPath"
return $false
}
# 上传基础函数库
$commonUploadResult = Copy-File-To-Remote -LocalPath $localCommonPath -Server $Server -RemoteDir $RemotePath
if (-not $commonUploadResult) {
Write-Log -Level "ERROR" -Message "[SHELL] 上传基础函数库失败: common.sh"
return $false
}
# 上传检测脚本
$scriptUploadResult = Copy-File-To-Remote -LocalPath $localScriptPath -Server $Server -RemoteDir $RemotePath
if (-not $scriptUploadResult) {
Write-Log -Level "ERROR" -Message "[SHELL] 上传脚本失败: $ScriptName"
return $false
}
return $true
}
# ==============================================================================
# 上传修复脚本函数
# ==============================================================================
......@@ -459,5 +531,6 @@ Export-ModuleMember -Function @(
'Write-Log',
'Invoke-SSHCommand',
'Copy-File-To-Remote',
'Upload-ShellScript',
'Upload_the_repair_script'
)
# _PLAN_计划执行文档_服务自检检测优化
> 版本:V1.0
> 创建日期:2026-06-06
> 关联PRD:`_PRD_服务自检检测优化_需求文档.md`
---
## 1. 执行概述
### 1.1 项目背景
根据《服务器监测需求规整版需求文档》183项检测项分析,当前完成率 73%(134项),本次优化聚焦安全合规、中间件深度检测和系统资源增强,共新增/增强 9 大功能模块。
### 1.2 双版本同步说明
本次所有需求需同时覆盖两个版本:
| 版本 | 主脚本 | 架构特点 |
|------|--------|----------|
| **PowerShell** | `check_server_health.ps1` (830行) | Windows 本地运行,通过 plink SSH 远程执行,已模块化(15个 .psm1) |
| **Shell** | `check_server_health.sh` (4112行) | Linux 本地运行,单文件整体式架构 |
**同步策略:**
- PowerShell 版本:新增 .psm1 模块或在已有模块中增强函数,Main 函数中增加调用
- Shell 版本:在 check_server_health.sh 中新增对应函数,在主流程中增加调用
- Shell 的 lib/shell/ 库仅被 PS 版本远程调用使用,Shell 本地版本不依赖该目录
### 1.3 涉及脚本
| 脚本文件 | 路径 | 变更类型 |
|----------|------|----------|
| `check_server_health.ps1` | `AuxiliaryTool/ScriptTool/ServiceSelfInspection/` | 增强(导入新模块) |
| `check_server_health.sh` | `AuxiliaryTool/ScriptTool/ServiceSelfInspection/` | 增强(新增函数和流程) |
| `modules/SecurityCheck.psm1` | `modules/` | 🆕 新建 |
| `modules/MiddlewareCheck.psm1` | `modules/` | 增强 |
| `modules/ServerResourceAnalysis.psm1` | `modules/` | 增强 |
| `modules/DataBackup.psm1` | `modules/` | 增强 |
| `modules/Report.psm1` | `modules/` | 增强 |
---
## 2. 任务分解
### 2.1 安全合规检测(PRD 2.1)— 新建 SecurityCheck.psm1
#### 2.1.1 弱密码检测
**实现方案:**
| 检测项 | 实现方式 | PS 版本 | Shell 版本 |
|--------|----------|---------|------------|
| MySQL Root 访问范围 | 查询 `mysql.user` 表 root 用户的 host 字段 | `SecurityCheck.psm1` 中通过 `Invoke-SSHCommand` 执行 docker exec mysql 查询 | `check_server_health.sh` 中新增 `check_mysql_weak_password()` 函数 |
| MySQL 空密码用户 | 查询 `authentication_string` 为空的用户 | 同上 | 同上 |
| Redis 无密码/弱密码 | 读取容器内 `redis.conf``requirepass` 配置 | `SecurityCheck.psm1` 通过 SSH 读取配置文件 | `check_server_health.sh` 中新增 `check_redis_weak_password()` 函数 |
| EMQX 默认密码 | 读取容器内 `emqx.conf` 中 Dashboard 用户名密码 | 同上 | `check_server_health.sh` 中新增 `check_emqx_weak_password()` 函数 |
| Linux 空密码账户 | 解析 `/etc/shadow` 中密码字段 | 同上 | `check_server_health.sh` 中新增 `check_linux_weak_password()` 函数 |
**弱密码字典:**
```
常见弱密码列表:空字符串、123456、password、root、admin、redis、mysql、test、guest
```
**判断逻辑:**
| 检测项 | 正常条件 | 风险条件 |
|--------|----------|----------|
| MySQL Root 访问 | root 仅允许 localhost/127.0.0.1 访问 | root@% 允许任意主机 |
| MySQL 空密码 | 无空密码用户 | 存在空密码用户 |
| Redis 密码 | requirepass 已设置且非弱密码 | requirepass 为空或在弱密码字典中 |
| EMQX 密码 | 非 admin/public 默认组合 | 使用默认 admin/public |
| Linux 空密码 | 无空密码账户 | /etc/shadow 中存在空密码字段 |
#### 2.1.2 安全基线扫描
**实现方案:**
| 检测项 | 实现方式 | PS 版本 | Shell 版本 |
|--------|----------|---------|------------|
| 可疑 SUID 文件 | `find / -perm -4000 -type f` 与白名单对比 | `SecurityCheck.psm1` 通过 SSH 执行 | `check_server_health.sh` 中新增 `check_suid_files()` 函数 |
| 异常 crontab | 扫描 `/var/spool/cron/``/etc/cron.d/` | 同上 | 新增 `check_crontab()` 函数 |
| SSH 暴力破解 | 分析 `/var/log/secure` 失败登录次数 | 同上 | 新增 `check_ssh_bruteforce()` 函数 |
| 异常端口 | `netstat -tlnp` 与白名单对比 | 同上 | 新增 `check_open_ports()` 函数 |
**SUID 白名单:**
```
/usr/bin/sudo, /usr/bin/passwd, /usr/bin/su, /usr/bin/ping, /usr/bin/mount,
/usr/bin/umount, /usr/bin/newgrp, /usr/bin/chsh, /usr/bin/chfn, /usr/bin/gpasswd,
/usr/lib/openssh/ssh-keysign
```
**端口白名单:**
```
新平台预期端口:22, 80, 443, 1883, 3306, 6379, 8083, 8084, 8888, 18083
传统平台预期端口:22, 80, 443, 1883, 3306, 6379, 8081, 8082, 8443, 9001
```
**判断逻辑:**
| 检测项 | 正常条件 | 风险条件 |
|--------|----------|----------|
| SUID 文件 | 所有 SUID 文件都在白名单中 | 发现白名单外的 SUID 文件 |
| crontab | 所有定时任务为已知任务 | 发现可疑或未知的定时任务 |
| SSH 暴力破解 | 同一 IP 失败登录 < 50 次 | 同一 IP 失败登录 ≥ 50 次 |
| 异常端口 | 所有监听端口都在白名单中 | 发现非预期的监听端口 |
#### 实施步骤
| 步骤 | 描述 | PS 版本 | Shell 版本 | 状态 |
|------|------|---------|------------|------|
| 1 | 新建 SecurityCheck.psm1 模块 | 新建文件 | — | [ ] |
| 2 | 实现弱密码检测函数(5个检测项) | SecurityCheck.psm1 | check_server_health.sh 新增 5 个函数 | [ ] |
| 3 | 实现安全基线函数(4个检测项) | SecurityCheck.psm1 | check_server_health.sh 新增 4 个函数 | [ ] |
| 4 | PS 版 Main 函数增加安全检测调用 | check_server_health.ps1 | — | [ ] |
| 5 | Shell 版主流程增加安全检测调用 | — | check_server_health.sh | [ ] |
| 6 | 安全检测结果集成到报告 | Report.psm1 增强 | check_server_health.sh 报告章节增强 | [ ] |
---
### 2.2 MySQL 深度检测(PRD 2.2)— 增强 MiddlewareCheck.psm1
**实现方案:**
在现有 `Test-MySQLConnection` 函数之后,新增 `Test-MySQLDeepCheck` 函数。利用已有的 `check_mysql.sh` 上传执行机制,扩展检测范围。
| 检测项 | 数据来源 | 阈值 |
|--------|----------|------|
| 缓冲池命中率 | `SHOW STATUS LIKE 'Innodb_buffer_pool_read%'` | ≥ 95% 正常,80~95% 警告,< 80% 异常 |
| 慢查询状态 | `SHOW VARIABLES LIKE 'slow_query_log'` + `SHOW STATUS LIKE 'Slow_queries'` | 未开启为警告 |
| 连接使用率 | `SHOW STATUS LIKE 'Threads_connected'` / `SHOW VARIABLES LIKE 'max_connections'` | < 80% 正常,≥ 80% 警告 |
| 活跃连接详情 | `SHOW PROCESSLIST` | 记录当前活跃连接信息 |
| QPS/TPS | `SHOW STATUS LIKE 'Queries'` / `SHOW STATUS LIKE 'Uptime'` | 记录为性能基线 |
| 主从复制状态 | `SHOW SLAVE STATUS` | IO/SQL 线程必须为 Yes,延迟 < 60s |
| TOP20 大表 | `information_schema.TABLES ORDER BY data_length DESC LIMIT 20` | 记录为参考信息 |
**PS 版本实现方式:**
-`MiddlewareCheck.psm1` 中新增 `Test-MySQLDeepCheck` 函数
- 通过 `Invoke-SSHCommand` 执行 `docker exec umysql mysql -e "SQL"` 获取各项指标
- 结果以 `@{ Check; Status; Details; Value; Success }` 格式加入 `$middlewareResults`
**Shell 版本实现方式:**
-`check_server_health.sh` 中新增 `test_mysql_deep()` 函数
- 直接执行 `docker exec umysql mysql -e "SQL"` 获取各项指标
- 结果写入 `REPORT_KV``REPORT_LINES`
**降级策略:**
- 如果 `docker exec umysql mysql` 执行失败,跳过深度检测,仅保留现有连通性检测
- 如果某个 SQL 查询超时(10秒),标记该项为"检测超时"
#### 实施步骤
| 步骤 | 描述 | PS 版本 | Shell 版本 | 状态 |
|------|------|---------|------------|------|
| 1 | 新增 `Test-MySQLDeepCheck` 函数 | MiddlewareCheck.psm1 | check_server_health.sh 新增 `test_mysql_deep()` | [ ] |
| 2 | 实现缓冲池命中率检测 | 同上 | 同上 | [ ] |
| 3 | 实现慢查询检测 | 同上 | 同上 | [ ] |
| 4 | 实现连接使用率检测 | 同上 | 同上 | [ ] |
| 5 | 实现主从复制状态检测 | 同上 | 同上 | [ ] |
| 6 | 实现 TOP20 大表检测 | 同上 | 同上 | [ ] |
| 7 | Main/主流程中增加深度检测调用 | check_server_health.ps1 | check_server_health.sh | [ ] |
| 8 | 深度检测结果集成到报告 | Report.psm1 | check_server_health.sh 报告章节 | [ ] |
---
### 2.3 Redis 深度检测(PRD 2.3)— 增强 MiddlewareCheck.psm1
**实现方案:**
在现有 `Test-RedisConnection` 函数之后,新增 `Test-RedisDeepCheck` 函数。
| 检测项 | 数据来源 | 阈值 |
|--------|----------|------|
| RDB 持久化状态 | `INFO Persistence``rdb_last_bgsave_status` | ok 正常,其他异常 |
| RDB 最后保存时间 | `INFO Persistence``rdb_last_save_time` | 距今 > 1小时 警告 |
| AOF 持久化状态 | `INFO Persistence``aof_enabled` + `aof_last_write_status` | 未开启为建议,写入失败为异常 |
| 内存碎片率 | `INFO Memory``mem_fragmentation_ratio` | < 1.5 正常,1.5~2.0 警告,> 2.0 异常 |
| 缓存命中率 | `INFO Stats``keyspace_hits` / `keyspace_misses` | ≥ 80% 正常,50~80% 警告,< 50% 异常 |
| 键空间统计 | `INFO Keyspace` | 记录各 DB 键数量 |
| 主从复制状态 | `INFO Replication` | slave 的 `master_link_status` 必须为 up |
**PS 版本实现方式:**
-`MiddlewareCheck.psm1` 中新增 `Test-RedisDeepCheck` 函数
- 通过 SSH 执行 `docker exec uredis redis-cli -a 'xxx' INFO <section>` 获取数据
- 解析输出中的键值对,按阈值判断状态
**Shell 版本实现方式:**
-`check_server_health.sh` 中新增 `test_redis_deep()` 函数
- 直接执行 `docker exec uredis redis-cli -a 'xxx' INFO <section>`
- 结果写入 `REPORT_KV``REPORT_LINES`
**降级策略:**
- 如果 redis-cli 不可用,跳过深度检测
- 如果密码认证失败,仅记录错误,不影响已有连通性检测
#### 实施步骤
| 步骤 | 描述 | PS 版本 | Shell 版本 | 状态 |
|------|------|---------|------------|------|
| 1 | 新增 `Test-RedisDeepCheck` 函数 | MiddlewareCheck.psm1 | check_server_health.sh 新增 `test_redis_deep()` | [ ] |
| 2 | 实现持久化状态检测 | 同上 | 同上 | [ ] |
| 3 | 实现内存碎片率检测 | 同上 | 同上 | [ ] |
| 4 | 实现缓存命中率检测 | 同上 | 同上 | [ ] |
| 5 | 实现主从复制检测 | 同上 | 同上 | [ ] |
| 6 | Main/主流程中增加深度检测调用 | check_server_health.ps1 | check_server_health.sh | [ ] |
---
### 2.4 EMQX 深度检测(PRD 2.4)— 增强 MiddlewareCheck.psm1(第二期)
**实现方案:**
| 检测项 | 数据来源 | 说明 |
|--------|----------|------|
| 客户端连接数 | `curl http://localhost:18083/api/v5/stats` | 记录当前连接数 |
| 消息流入/流出 | 同上 | 记录吞吐量 |
| 消息丢弃数 | 同上 | > 0 需排查 |
| 主题和订阅数量 | 同上 | 记录数量 |
| 集群状态 | `curl http://localhost:18083/api/v5/cluster` | 节点需全部在线 |
| 活跃告警 | `curl http://localhost:18083/api/v5/alarms?activated=true` | 非空需关注 |
**PS 版本:** `MiddlewareCheck.psm1` 新增 `Test-EMQXDeepCheck`,通过 SSH 执行 `docker exec uemqx curl`
**Shell 版本:** `check_server_health.sh` 新增 `test_emqx_deep()`,直接执行 `docker exec uemqx curl`
---
### 2.5 系统资源增强检测(PRD 2.5)— 增强 ServerResourceAnalysis.psm1
**实现方案:**
| 检测项 | 数据来源 | 阈值 | 说明 |
|--------|----------|------|------|
| inode 使用率 | `df -i` | < 70% 正常,70~90% 警告,> 90% 异常 | 与磁盘空间并列展示 |
| 只读挂载 | `mount \| grep ' ro'` | 无只读挂载为正常 | 过滤 proc/sys 等虚拟文件系统 |
| 关键端口连通性 | `nc -zv``</dev/tcp` | MySQL/Redis/EMQX 端口可达 | 与服务检测互补 |
| 网关可达性 | `ping 默认网关` | 网关可 ping 通 | 基础网络检测 |
| TCP 状态分布 | `netstat -ant` 统计各状态 | CLOSE_WAIT > 100 或 TIME_WAIT > 1000 警告 | 网络健康指标 |
| DNS 解析延迟 | `time nslookup` | < 500ms 正常,≥ 500ms 警告 | 与现有 DNS 检测互补 |
| 僵尸进程 | `ps aux \| awk '$8~/Z/'` | 0 正常,> 0 需排查 | 系统健康指标 |
| TOP5 进程 | `ps aux --sort=-%mem/cpu` | 记录为参考信息 | 辅助排查 |
**PS 版本:** `ServerResourceAnalysis.psm1``Test-ServerResources` 函数末尾追加这些检测项
**Shell 版本:** `check_server_health.sh``test_resources()` 函数末尾追加
**inode 检测实现要点:**
- 解析 `df -i` 输出,提取每个挂载点的 inode 总量、已用量、使用率
- 过滤 tmpfs/overlay 等虚拟文件系统
- 与磁盘空间检测并列展示,统一格式
**只读挂载检测实现要点:**
- 执行 `mount | grep ' ro[, ]'` 查找只读挂载
- 排除 proc/sys/dev 等预期的只读挂载
- 发现意外只读挂载时标记为异常
#### 实施步骤
| 步骤 | 描述 | PS 版本 | Shell 版本 | 状态 |
|------|------|---------|------------|------|
| 1 | 实现 inode 使用率检测 | ServerResourceAnalysis.psm1 | check_server_health.sh | [ ] |
| 2 | 实现只读挂载检测 | 同上 | 同上 | [ ] |
| 3 | 实现关键端口连通性检测 | 同上 | 同上 | [ ] |
| 4 | 实现 TCP 状态分布检测 | 同上 | 同上 | [ ] |
| 5 | 实现僵尸进程和 TOP5 检测 | 同上 | 同上 | [ ] |
| 6 | 新增检测项集成到报告 | Report.psm1 | check_server_health.sh 报告章节 | [ ] |
---
### 2.6 新平台数据备份(PRD 2.6)— 增强 DataBackup.psm1
**实现方案:**
当前 `DataBakup` 函数仅支持传统平台(`$PlatformType -eq 'old'`),需新增新平台备份逻辑。
**新平台备份目录:**
| 备份对象 | 路径 | 必备 |
|----------|------|------|
| 服务目录 | `/data/services/` | ✅ |
| 中间件配置 | `/data/middleware/` | ✅ |
| 第三方应用 | `/data/third_party/` | ✅ |
| MySQL 数据 | `docker exec umysql mysqldump` | ✅ |
**备份流程(与传统平台一致):**
```
创建备份目录 → 复制关键目录 → MySQL 数据导出 → 打包压缩 → 下载到本地 → 清理临时文件
```
**PS 版本:** `DataBackup.psm1``DataBakup` 函数的 `$PlatformType -eq 'old'` else 分支中补充实现
**Shell 版本:** `check_server_health.sh``data_backup()` 函数的 else 分支中补充实现
#### 实施步骤
| 步骤 | 描述 | PS 版本 | Shell 版本 | 状态 |
|------|------|---------|------------|------|
| 1 | 实现新平台目录备份逻辑 | DataBackup.psm1 | check_server_health.sh | [ ] |
| 2 | 实现新平台 MySQL 导出 | 同上 | 同上 | [ ] |
| 3 | 测试备份下载流程 | 同上 | 同上 | [ ] |
---
### 2.7 JVM 健康检测(PRD 2.7)— 新建 JvmHealthCheck.psm1(第二期)
**实现方案:**
| 检测项 | 优先方案 | 降级方案 |
|--------|----------|----------|
| 堆内存使用率 | `jcmd <pid> GC.heap_info` | `/proc/<pid>/status` 中的 VmRSS/VmSize |
| GC 统计 | `jcmd <pid> GC.stats` | 解析应用日志中的 GC 信息 |
| 线程数 | `jcmd <pid> Thread.print` 统计 | `/proc/<pid>/status` 中的 Threads |
| 死锁检测 | `jcmd <pid> Thread.print` 解析 | 无降级,跳过 |
| JVM 版本 | `docker exec ujava java -version` | 同左 |
**PS 版本:** 新建 `modules/JvmHealthCheck.psm1`,通过 SSH 执行 `docker exec ujava jcmd` 命令
**Shell 版本:** `check_server_health.sh` 新增 `test_jvm_health()` 函数
**降级策略:**
- 如果容器内无 `jcmd`,通过 `/proc/<pid>/status` 获取基础内存和线程信息
- 如果 `jcmd` 执行超时(15秒),标记为检测超时
---
---
## 3. 报告集成方案
### 3.1 报告新增章节
现有报告结构需新增以下章节:
| 新增章节 | 位置 | 内容 |
|----------|------|------|
| 安全合规检测 | 中间件检测之前 | 弱密码检测结果(5项)+ 安全基线扫描结果(4项) |
| 中间件深度检测 | 现有中间件检测之后 | MySQL/Redis/EMQX 深度指标 |
| JVM 健康检测(第二期) | 服务检测之后 | 堆内存/GC/线程 |
### 3.2 PS 版本集成
- `SecurityCheck.psm1` 导出的检测结果通过 Main 函数传递给 `Show-HealthReport`
- `Report.psm1``Show-HealthReport` 增加 `-SecurityResults``-MySQLDeepResults` 等参数
- 报告模板中增加对应的章节输出
### 3.3 Shell 版本集成
- 各新增函数将结果写入 `REPORT_LINES[]``REPORT_KV`
- 报告生成章节中增加对应的 Markdown 输出块
---
## 4. 实施时间线
### 第一期
| 阶段 | 内容 | 前置依赖 |
|------|------|----------|
| 阶段1 | 弱密码检测 + 安全基线扫描(PRD 2.1) | 无 |
| 阶段2 | MySQL 深度检测(PRD 2.2) | 无 |
| 阶段3 | Redis 深度检测(PRD 2.3) | 无 |
| 阶段4 | 系统资源增强 — inode + 只读挂载(PRD 2.5) | 无 |
| 阶段5 | 新平台数据备份(PRD 2.6) | 无 |
| 阶段6 | 报告集成 + 端到端测试 | 阶段1~5 |
### 第二期
| 阶段 | 内容 | 前置依赖 |
|------|------|----------|
| 阶段7 | EMQX 深度检测(PRD 2.4) | 无 |
| 阶段8 | 网络连通性 + 进程检测增强(PRD 2.5) | 无 |
| 阶段9 | JVM 健康检测(PRD 2.7) | 无 |
---
## 5. 需求规范
- 代码规范: `Docs/PRD/01规范文档/_PRD_规范文档_代码规范.md`
- 问题总结: `Docs/PRD/01规范文档/_PRD_问题总结_记录文档.md`
- 方法总结: `Docs/PRD/01规范文档/_PRD_方法总结_记录文档.md`
- 文档规范: `Docs/PRD/01规范文档/_PRD_规范文档_文档规范.md`
- 测试规范: `Docs/PRD/01规范文档/_PRD_规范文档_测试规范.md`
---
## 6. 版本历史
| 版本 | 日期 | 变更内容 | 作者 |
|------|------|----------|------|
| V1.0 | 2026-06-06 | 初始版本 | Claude |
---
*文档结束*
# _PRD_服务自检检测优化_需求文档
> 版本:V1.0
> 更新日期:2026-06-06
> 适用范围:服务自检脚本 - 检测功能优化与新增
> 来源:基于《服务器监测需求规整版需求文档》183项检测项完成率分析(当前73%,134项已完成,49项未实现)
---
## 1. 背景与目标
### 1.1 背景
当前服务自检脚本已完成模块化拆分(15个模块),覆盖平台检测、服务检测、中间件连通性检测等基础能力。但根据《未实现功能清单》和实际运维场景,存在以下不足:
- **安全检测空白**:无弱密码检测、无安全合规扫描,现场部署存在安全隐患
- **中间件深度不足**:仅检测连通性,缺少健康指标(Redis 缓存命中率、MySQL 慢查询、EMQX 消息丢弃等)
- **磁盘检测盲区**:仅检测空间占用率,缺少 inode 使用率和只读挂载检测
- **JVM 黑盒**:Java 服务仅检测进程存活,无 JVM 堆内存、GC、线程等健康指标
- **新平台备份缺失**:DataBakup 仅支持传统平台,新统一平台无备份能力
- **缺乏趋势对比**:每次检测结果独立,无法与上次结果对比发现新增/修复的异常
### 1.2 目标
- 补齐安全合规检测能力,覆盖弱密码和安全基线扫描
- 增强中间件深度检测,提供有价值的健康指标而非仅连通性
- 补齐系统资源检测盲区(inode、只读挂载、网络状态)
- 新增 JVM 健康检测,为 Java 服务排障提供依据
- 补全新平台数据备份能力
- 新增 JVM 健康检测,为 Java 服务排障提供依据
---
## 2. 功能需求
### 2.1 安全合规检测
**目标:** 发现中间件和系统中的安全风险,包括弱密码配置和系统安全隐患
#### 2.1.1 弱密码检测
| 检测项 | 说明 |
|--------|------|
| MySQL Root 访问范围 | 检查 root 用户是否允许从任意主机(%)连接 |
| MySQL 空密码用户 | 检查是否存在无密码的数据库用户 |
| Redis 无密码/弱密码 | 检查 Redis 配置文件中 `requirepass` 是否为空或使用常见弱密码 |
| EMQX 默认密码 | 检查 EMQX Dashboard 是否使用默认用户名密码 |
| Linux 空密码账户 | 检查系统中是否存在无密码登录的用户账户 |
#### 2.1.2 安全基线扫描
| 检测项 | 说明 |
|--------|------|
| 可疑 SUID 文件 | 扫描系统中所有 SUID 文件,与白名单对比发现异常 |
| 异常 crontab 条目 | 扫描用户和系统定时任务,发现可疑条目 |
| SSH 暴力破解痕迹 | 分析安全日志中失败登录次数,识别暴力破解行为 |
| 异常开放端口 | 扫描所有监听端口,与预期端口白名单对比 |
---
### 2.2 MySQL 深度检测
**目标:** 在现有连通性检测基础上,新增 MySQL 引擎状态和性能指标检测
| 检测项 | 说明 |
|--------|------|
| InnoDB 缓冲池命中率 | 评估缓冲池效率,命中率过低表示需扩容 |
| 慢查询状态 | 检查慢查询日志是否开启,统计慢查询数量 |
| 连接使用率 | 当前连接数与最大连接数的比值,过高需关注 |
| 活跃连接详情 | 当前正在执行的 SQL 连接列表 |
| QPS/TPS | 每秒查询数和每秒事务数,作为性能基线 |
| 主从复制状态 | 检查 IO/SQL 线程状态和复制延迟 |
| TOP20 大表 | 按数据大小排序的前20张表,识别需优化的表 |
---
### 2.3 Redis 深度检测
**目标:** 在现有连通性检测基础上,新增 Redis 持久化和性能指标检测
| 检测项 | 说明 |
|--------|------|
| RDB 持久化状态 | 检查最后一次 RDB 快照是否成功及时间 |
| AOF 持久化状态 | 检查 AOF 是否开启及最后一次写入状态 |
| 内存碎片率 | 评估 Redis 内存碎片情况,过高需重启整理 |
| 缓存命中率 | keyspace_hits 与 keyspace_misses 的比率 |
| 键空间统计 | 各数据库的键数量、过期键和驱逐键数量 |
| 主从复制状态 | 复制角色、从库连接数、复制偏移量 |
---
### 2.4 EMQX 深度检测
**目标:** 通过 EMQX Dashboard API 获取详细运行指标
| 检测项 | 说明 |
|--------|------|
| 客户端连接数统计 | 当前连接数和连接数趋势 |
| 消息流入/流出速率 | 消息吞吐量指标 |
| 消息丢弃数 | 被丢弃的消息数量,大于0需排查 |
| 主题和订阅数量 | 当前主题数和订阅数 |
| 集群状态 | 如启用集群,检查所有节点是否在线 |
| 活跃告警 | 当前未处理的告警列表 |
---
### 2.5 系统资源增强检测
**目标:** 补齐现有 ServerResourceAnalysis 模块的检测盲区
#### 2.5.1 磁盘检测增强
| 检测项 | 说明 |
|--------|------|
| inode 使用率 | 检测各挂载点的 inode 使用率,inode 耗尽比空间耗尽更隐蔽 |
| 只读挂载检测 | 检测是否有文件系统被意外挂载为只读 |
#### 2.5.2 网络连通性增强
| 检测项 | 说明 |
|--------|------|
| 关键端口连通性 | 检测 MySQL(3306)/Redis(6379)/EMQX(1883) 等关键端口是否可达 |
| 网关可达性 | 检查默认网关是否可 ping 通 |
| TCP 连接状态分布 | 统计各 TCP 状态(ESTABLISHED/CLOSE_WAIT/TIME_WAIT 等)数量 |
| DNS 解析延迟 | 测量 DNS 解析响应时间 |
#### 2.5.3 进程与负载增强
| 检测项 | 说明 |
|--------|------|
| 僵尸进程检测 | 检查系统中是否存在僵尸进程(Z 状态) |
| 内存/CPU TOP5 进程 | 列出资源消耗最大的前5个进程 |
| 系统运行时间 | 记录系统上次重启时间 |
---
### 2.6 新平台数据备份
**目标:** 补全新统一平台的数据备份能力(当前仅支持传统平台)
| 备份对象 | 新平台路径 | 说明 |
|----------|-----------|------|
| Java 服务 | `/data/services/api/` | 所有 Java 后端服务 |
| 前端应用 | `/data/services/web/` | 所有前端应用 |
| 中间件配置 | `/data/middleware/` | nginx/emqx/redis/mysql/nacos 配置 |
| 启动脚本 | `/data/services/scripts/` | 启动脚本 |
| 第三方应用 | `/data/third_party/` | paperless/wifi-local |
| MySQL 数据 | 通过 `docker exec umysql mysqldump` | 导出所有用户数据库 |
备份流程与传统平台一致:复制 → 导出 → 打包 → 下载 → 清理。
---
### 2.7 JVM 健康检测
**目标:** 为 Java 服务提供 JVM 运行时健康指标,辅助问题排查
| 检测项 | 说明 |
|--------|------|
| 堆内存使用率 | 当前堆内存使用量与最大堆内存的比率 |
| GC 统计 | Young GC 和 Full GC 次数及耗时 |
| 线程数 | 当前 JVM 活跃线程数 |
| 死锁检测 | 检查是否存在线程死锁 |
| JVM 版本 | 记录 JDK 版本信息 |
**说明:** 优先通过 JDK 自带工具(`jcmd`)获取指标,无 `jcmd` 时通过 `/proc` 文件系统降级获取基础信息。
---
## 3. 实施优先级
### 第一期
| 优先级 | 需求 | 对应章节 |
|--------|------|----------|
| P0 | 弱密码检测 + 安全基线扫描 | 2.1 |
| P0 | MySQL 深度检测 | 2.2 |
| P1 | Redis 深度检测 | 2.3 |
| P1 | inode + 只读挂载检测 | 2.5 |
| P1 | 新平台数据备份 | 2.6 |
### 第二期
| 优先级 | 需求 | 对应章节 |
|--------|------|----------|
| P2 | EMQX 深度检测 | 2.4 |
| P2 | JVM 健康检测 | 2.7 |
| P2 | 网络连通性 + 进程检测增强 | 2.5 |
---
## 4. 验收标准
### 4.1 安全合规检测
- [ ] 能正确检测 MySQL 中 root@% 和空密码用户
- [ ] 能正确检测 Redis 无密码或弱密码配置
- [ ] 能正确检测 EMQX 默认 Dashboard 密码
- [ ] 能正确检测 Linux 系统空密码账户
- [ ] 能扫描 SUID 文件并与白名单对比
- [ ] 能扫描异常 crontab 条目
- [ ] 能检测 SSH 暴力破解痕迹
- [ ] 能扫描开放端口并与白名单对比
### 4.2 中间件深度检测
- [ ] MySQL 缓冲池命中率计算正确,阈值判断合理
- [ ] MySQL 慢查询检测包含开关状态和数量统计
- [ ] MySQL 主从复制状态和延迟检测正确
- [ ] Redis 持久化状态(RDB/AOF)检测正确
- [ ] Redis 缓存命中率计算正确
- [ ] EMQX 通过 API 获取连接数、消息丢弃数等指标
### 4.3 系统资源增强
- [ ] inode 使用率检测正确
- [ ] 只读挂载检测正确
- [ ] TCP 连接状态分布统计正确
- [ ] 僵尸进程检测正确
### 4.4 新平台备份
- [ ] 新平台所有关键目录完整备份
- [ ] MySQL 数据正确导出
- [ ] 压缩包完整下载到本地
### 4.5 JVM 健康检测
- [ ] 堆内存使用率获取正确
- [ ] GC 次数统计正确
- [ ] 死锁检测正确
- [ ] 无 jcmd 时能降级获取基础信息
---
## 5. 需求规范
- 代码规范: `Docs/PRD/01规范文档/_PRD_规范文档_代码规范.md`
- 问题总结: `Docs/PRD/01规范文档/_PRD_问题总结_记录文档.md`
- 方法总结: `Docs/PRD/01规范文档/_PRD_方法总结_记录文档.md`
- 文档规范: `Docs/PRD/01规范文档/_PRD_规范文档_文档规范.md`
- 测试规范: `Docs/PRD/01规范文档/_PRD_规范文档_测试规范.md`
---
## 6. 版本历史
| 版本 | 日期 | 变更内容 | 作者 |
|------|------|----------|------|
| V1.0 | 2026-06-06 | 初始版本 | Claude |
---
*文档结束*
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论