Skip to content
项目
群组
代码片段
帮助
正在加载...
帮助
为 GitLab 提交贡献
登录
切换导航
U
ubains-module-test
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
分枝图
比较
统计图
议题
1
议题
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
计划
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
分枝图
统计图
创建新议题
作业
提交
议题看板
打开侧边栏
郑晓兵
ubains-module-test
Commits
35387cfc
提交
35387cfc
authored
5月 09, 2026
作者:
陈泽健
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "feat(server): 增强服务器自检脚本功能"
This reverts commit
f4c2be9d
.
上级
f4c2be9d
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
1 行增加
和
749 行删除
+1
-749
check_server_health.ps1
AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1
+1
-749
没有找到文件。
AuxiliaryTool/ScriptTool/新服务自检/check_server_health.ps1
浏览文件 @
35387cfc
...
...
@@ -384,42 +384,6 @@ function Test-SystemBasicInfo {
# 系统负载
$info
.LoadAverage
=
Invoke-SSHCommand
"cat /proc/loadavg | awk '{print
`$
1,
`$
2,
`$
3}'"
-Timeout 10
# 内核启动参数
$cmdline
=
Invoke-SSHCommand
"cat /proc/cmdline 2>/dev/null"
-Timeout 10
if
(
$cmdline
)
{
$info
.KernelCmdline
=
$cmdline
}
# 系统资源限制
$ulimitInfo
=
Invoke-SSHCommand
"ulimit -a 2>/dev/null | grep -E 'open files|max user processes'"
-Timeout 10
if
(
$ulimitInfo
)
{
$info
.UlimitInfo
=
$ulimitInfo
}
# 内核关键参数
$kernelParams
=
Invoke-SSHCommand
"sysctl fs.file-max fs.inotify.max_user_watches net.core.somaxconn 2>/dev/null"
-Timeout 10
if
(
$kernelParams
)
{
$info
.KernelParams
=
$kernelParams
}
# 内存压力检测
$memPressure
=
Invoke-SSHCommand
"cat /proc/pressure/memory 2>/dev/null | head -3"
-Timeout 10
if
(
$memPressure
)
{
$info
.MemoryPressure
=
$memPressure
}
# 虚拟内存统计
$vmstat
=
Invoke-SSHCommand
"cat /proc/vmstat 2>/dev/null | grep -E 'nr_dirty|nr_writeback|nr_slab' | head -10"
-Timeout 10
if
(
$vmstat
)
{
$info
.VMStat
=
$vmstat
}
# 大页内存使用
$hugePages
=
Invoke-SSHCommand
"cat /proc/meminfo 2>/dev/null | grep -E 'HugePages|Hugepagesize'"
-Timeout 10
if
(
$hugePages
)
{
$info
.HugePages
=
$hugePages
}
$script
:systemInfo
=
$info
$script
:检测结果[
"系统基础信息"
]
=
$info
...
...
@@ -494,58 +458,6 @@ function Test-CPUResource {
}
}
# ========== 新增:中断统计检测 ==========
try
{
$interruptInfo
=
Invoke-SSHCommand
"cat /proc/interrupts | grep -E '^[0-9]+:' | head -1"
-Timeout 10
if
(
$interruptInfo
)
{
# 解析中断总数(格式:CPU0: 123456 789 0 ...)
$totalInterrupts
=
0
$fields
=
$interruptInfo
-split
"\s+"
foreach
(
$field
in
$fields
[
1..
(
$fields
.Length-1
)])
{
if
(
$field
-match
"^\d+$"
)
{
$totalInterrupts
+
=
[
int
]
$field
}
}
$results
+
=
[
PSCustomObject]@
{
Name
=
"CPU中断统计"
Value
=
"{0:N0}"
-f
$totalInterrupts
Threshold
=
"-"
Status
=
"正常"
Message
=
"CPU中断总次数"
}
}
}
catch
{
Write
-Log
"中断统计检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# ========== 新增:软中断统计检测 ==========
try
{
$softirqInfo
=
Invoke-SSHCommand
"cat /proc/softirqs | grep -E '^[0-9]+:' | head -1"
-Timeout 10
if
(
$softirqInfo
)
{
# 解析软中断总数
$totalSoftirqs
=
0
$fields
=
$softirqInfo
-split
"\s+"
foreach
(
$field
in
$fields
[
1..
(
$fields
.Length-1
)])
{
if
(
$field
-match
"^\d+$"
)
{
$totalSoftirqs
+
=
[
int
]
$field
}
}
$results
+
=
[
PSCustomObject]@
{
Name
=
"CPU软中断统计"
Value
=
"{0:N0}"
-f
$totalSoftirqs
Threshold
=
"-"
Status
=
"正常"
Message
=
"CPU软中断总次数"
}
}
}
catch
{
Write
-Log
"软中断统计检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# 保存结果
foreach
(
$result
in
$results
)
{
Save-TestResult
"CPU资源"
$result
...
...
@@ -838,79 +750,6 @@ function Test-DiskResource {
}
}
# 磁盘挂载情况检测
Write
-Log
"检测磁盘挂载情况..."
$mountOutput
=
Invoke-SSHCommand
"mount 2>/dev/null | grep -E 'nfs|cifs|fuse.sshfs'"
-Timeout 10
if
(
$mountOutput
)
{
if
(
$mountOutput
-is
[
array]
)
{
$mountOutput
=
$mountOutput
-join
"
`n
"
}
$remoteMounts
=
@
()
foreach
(
$mountLine
in
$mountOutput
-split
"
`n
"
)
{
if
(
$mountLine
-match
"on\s+(\S+)\s+type"
)
{
$remoteMounts
+
=
$matches
[
1]
}
}
if
(
$remoteMounts
.Count -gt 0
)
{
$results
+
=
[
PSCustomObject]@
{
Name
=
"远程磁盘挂载"
Value
=
"
$(
$remoteMounts
.Count
)
个"
Threshold
=
"-"
Status
=
"正常"
Message
=
"远程挂载:
$(
$remoteMounts
-join
', '
)
"
}
}
}
# RAID状态检测
Write
-Log
"检测RAID状态..."
$mdstat
=
Invoke-SSHCommand
"cat /proc/mdstat 2>/dev/null"
-Timeout 10
if
(
$mdstat
-and
$mdstat
-match
"md\d+ : active"
)
{
$raidDevices
=
@
()
foreach
(
$line
in
$mdstat
-split
"
`n
"
)
{
if
(
$line
-match
"(md\d+)\s*:\s*active\s+(\S+)"
)
{
$raidDevice
=
$matches
[
1]
$raidLevel
=
$matches
[
2]
$raidDevices
+
=
"
${
raidDevice
}
(
${
raidLevel
}
)"
}
}
if
(
$raidDevices
.Count -gt 0
)
{
# 检查RAID状态是否正常
$raidStatus
=
if
(
$mdstat
-match
"degraded|fail"
)
{
"警告"
}
else
{
"正常"
}
$results
+
=
[
PSCustomObject]@
{
Name
=
"RAID状态"
Value
=
(
$raidDevices
-join
", "
)
Threshold
=
"-"
Status
=
$raidStatus
Message
=
"软件RAID设备状态"
}
if
(
$raidStatus
-ne
"正常"
)
{
Add-Issue -Message
"RAID状态异常:
$(
$raidDevices
-join
', '
)
"
-Level
$raidStatus
}
}
}
# 磁盘调度算法检测
Write
-Log
"检测磁盘调度算法..."
$schedulerOutput
=
Invoke-SSHCommand
"ls /sys/block/*/queue/scheduler 2>/dev/null | head -5 | xargs -I {} sh -c 'echo {}: \
`c
at {}\
`'
"
-Timeout 10
if
(
$schedulerOutput
)
{
if
(
$schedulerOutput
-is
[
array]
)
{
$schedulerOutput
=
$schedulerOutput
-join
"
`n
"
}
$schedulerInfo
=
@
()
foreach
(
$schedLine
in
$schedulerOutput
-split
"
`n
"
)
{
if
(
$schedLine
-match
"\[([^\]]+)\]"
)
{
$schedulerInfo
+
=
$schedLine
}
}
if
(
$schedulerInfo
.Count -gt 0
)
{
$results
+
=
[
PSCustomObject]@
{
Name
=
"磁盘调度器"
Value
=
"已配置"
Threshold
=
"-"
Status
=
"正常"
Message
=
(
$schedulerInfo
-join
"; "
)
}
}
}
# 保存结果
foreach
(
$result
in
$results
)
{
Save-TestResult
"磁盘资源"
$result
...
...
@@ -1091,64 +930,6 @@ function Test-ProcessStatus {
}
}
# ========== 新增:按进程统计打开文件数 ==========
try
{
$fdByProcess
=
Invoke-SSHCommand
"for pid in \
$(
ps
-eo pid |
head
-20
)
; do echo -n \"
\
$pid
\
"; ls /proc/\
$pid
/fd 2>/dev/null | wc -l; done | sort -t: -k2 -rn | head -10"
-Timeout 15
if
(
$fdByProcess
)
{
if
(
$fdByProcess
-is
[
array]
)
{
$fdByProcess
=
$fdByProcess
-join
"
`n
"
}
$processFds
=
@
()
$totalFds
=
0
foreach
(
$line
in
$fdByProcess
-split
"
`n
"
)
{
if
(
$line
-match
"^(\d+)\s+(\d+)"
)
{
$processFds
+
=
"PID:
$(
$matches
[
1]
)
:
$(
$matches
[
2]
)
个"
$totalFds
+
=
[
int
]
$matches
[
2]
}
}
if
(
$processFds
.Count -gt 0
)
{
$results
+
=
[
PSCustomObject]@
{
Name
=
"打开文件数TOP10"
Value
=
"已统计"
Threshold
=
"-"
Status
=
"正常"
Message
=
"打开文件最多的10个进程: "
+
(
$processFds
-join
", "
)
}
}
}
}
catch
{
Write
-Log
"按进程统计打开文件数失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# 进程可执行文件路径检测
try
{
$exePaths
=
Invoke-SSHCommand
"ps -eo pid,comm --no-headers | head -10 | while read pid comm; do echo -n \"
PID:\
$pid
\
"; readlink /proc/\
$pid
/exe 2>/dev/null || echo 'N/A'; done"
-Timeout 15
if
(
$exePaths
)
{
if
(
$exePaths
-is
[
array]
)
{
$exePaths
=
$exePaths
-join
"
`n
"
}
$exeDetails
=
@
()
foreach
(
$exeLine
in
$exePaths
-split
"
`n
"
)
{
if
(
$exeLine
-match
"PID:\d+\s+(.+)"
)
{
$exeDetails
+
=
$matches
[
1].Trim
()
}
}
if
(
$exeDetails
.Count -gt 0
)
{
$results
+
=
[
PSCustomObject]@
{
Name
=
"进程可执行路径"
Value
=
"已获取"
Threshold
=
"-"
Status
=
"正常"
Message
=
"TOP10进程可执行文件路径"
}
}
}
}
catch
{
Write
-Log
"进程可执行路径检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# 保存结果
foreach
(
$result
in
$results
)
{
Save-TestResult
"进程状态"
$result
...
...
@@ -1275,106 +1056,6 @@ function Test-NetworkStatus {
}
}
# ========== 新增:TCP参数配置检测 ==========
try
{
$tcpParams
=
Invoke-SSHCommand
"sysctl net.ipv4.tcp_fin_timeout net.ipv4.tcp_keepalive_time net.ipv4.tcp_max_syn_backlog net.ipv4.tcp_tw_reuse 2>&1"
-Timeout 10
if
(
$tcpParams
-and
$tcpParams
-notmatch
"error|Error"
)
{
if
(
$tcpParams
-is
[
array]
)
{
$tcpParams
=
$tcpParams
-join
"
`n
"
}
$paramDetails
=
@
()
foreach
(
$line
in
$tcpParams
-split
"
`n
"
)
{
if
(
$line
-match
"net\.ipv4\.tcp_\w+\s*=\s*(.+)"
)
{
$paramName
=
$line
-replace
"net\.ipv4\.tcp_"
,
""
-replace
"\s*="
,
""
$paramValue
=
$matches
[
1].Trim
()
$paramDetails
+
=
"
${
paramName
}
:
${
paramValue
}
"
}
}
if
(
$paramDetails
.Count -gt 0
)
{
$results
+
=
[
PSCustomObject]@
{
Name
=
"TCP参数配置"
Value
=
"已获取"
Threshold
=
"-"
Status
=
"正常"
Message
=
(
$paramDetails
-join
", "
)
}
}
}
}
catch
{
Write
-Log
"TCP参数配置检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# ========== 新增:TCP扩展统计检测 ==========
try
{
$tcpStats
=
Invoke-SSHCommand
"cat /proc/net/snmp | grep Tcp: | head -1"
-Timeout 10
if
(
$tcpStats
)
{
# 解析TCP统计:Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails ...
$tcpFields
=
$tcpStats
-split
"\s+"
if
(
$tcpFields
.Count -gt 10
)
{
$activeOpens
=
if
(
$tcpFields
[
5] -match
"\d+"
)
{
$tcpFields
[
5]
}
else
{
"0"
}
$passiveOpens
=
if
(
$tcpFields
[
6] -match
"\d+"
)
{
$tcpFields
[
6]
}
else
{
"0"
}
$attemptFails
=
if
(
$tcpFields
[
7] -match
"\d+"
)
{
$tcpFields
[
7]
}
else
{
"0"
}
$estabResets
=
if
(
$tcpFields
[
8] -match
"\d+"
)
{
$tcpFields
[
8]
}
else
{
"0"
}
$results
+
=
[
PSCustomObject]@
{
Name
=
"TCP扩展统计"
Value
=
"已统计"
Threshold
=
"-"
Status
=
"正常"
Message
=
"主动打开:
$activeOpens
, 被动打开:
$passiveOpens
, 尝试失败:
$attemptFails
, 重置:
$estabResets
"
}
}
}
}
catch
{
Write
-Log
"TCP扩展统计检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# ========== 新增:ARP表检测 ==========
try
{
$arpTable
=
Invoke-SSHCommand
"ip neigh show | wc -l"
-Timeout 10
if
(
$arpTable
-match
"\d+"
)
{
$arpCount
=
[
int
]
$matches
[
0]
$arpStale
=
Invoke-SSHCommand
"ip neigh show | grep -c STALE"
-Timeout 10
$staleCount
=
if
(
$arpStale
-match
"\d+"
)
{
[
int
]
$matches
[
0]
}
else
{
0
}
$arpStatus
=
if
(
$staleCount
-gt
$arpCount
*
0.1
)
{
"警告"
}
else
{
"正常"
}
$results
+
=
[
PSCustomObject]@
{
Name
=
"ARP表"
Value
=
"
$arpCount
项"
Threshold
=
"-"
Status
=
$arpStatus
Message
=
"ARP邻居表条目数,过期:
$staleCount
"
}
}
}
catch
{
Write
-Log
"ARP表检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# ========== 新增:路由表检测 ==========
try
{
$routeCount
=
Invoke-SSHCommand
"ip route show | wc -l"
-Timeout 10
$defaultRoute
=
Invoke-SSHCommand
"ip route show | grep default"
-Timeout 10
if
(
$routeCount
-match
"\d+"
)
{
$routeCount
=
[
int
]
$matches
[
0]
$defaultInfo
=
if
(
$defaultRoute
)
{
"有默认网关"
}
else
{
"无默认网关"
}
$results
+
=
[
PSCustomObject]@
{
Name
=
"路由表"
Value
=
"
$routeCount
条路由"
Threshold
=
"-"
Status
=
"正常"
Message
=
$defaultInfo
}
}
}
catch
{
Write
-Log
"路由表检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# 保存结果
foreach
(
$result
in
$results
)
{
Save-TestResult
"网络连接"
$result
...
...
@@ -1553,128 +1234,6 @@ function Test-SecurityStatus {
}
}
# ========== 新增:iptables规则检测 ==========
try
{
$iptablesRules
=
Invoke-SSHCommand
"iptables -L -n --line-numbers 2>/dev/null | head -20"
-Timeout 10
if
(
$iptablesRules
-and
$iptablesRules
-notmatch
"error"
)
{
if
(
$iptablesRules
-is
[
array]
)
{
$iptablesRules
=
$iptablesRules
-join
"
`n
"
}
# 统计规则数量
$ruleCount
=
(
$iptablesRules
-split
"
`n
"
|
Where
-Object
{
$_
-match
"^\d+"
}
|
Measure-Object
)
.Count
# 检查是否有INPUT策略
$inputPolicy
=
Invoke-SSHCommand
"iptables -L INPUT -n | grep '^Chain' | awk '{print
`$
3}'"
-Timeout 10
$policyInfo
=
if
(
$inputPolicy
)
{
$inputPolicy
.Trim
()
}
else
{
"未知"
}
$results
+
=
[
PSCustomObject]@
{
Name
=
"iptables规则"
Value
=
"
$ruleCount
条"
Threshold
=
"-"
Status
=
"正常"
Message
=
"防火墙规则数量,INPUT策略:
$policyInfo
"
}
}
}
catch
{
Write
-Log
"iptables规则检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# ========== 新增:异常cron任务检测 ==========
try
{
# 检查最近修改的cron任务
$cronFiles
=
Invoke-SSHCommand
"find /etc/cron.* /var/spool/cron -type f -mtime -7 2>/dev/null | head -10"
-Timeout 10
if
(
$cronFiles
)
{
if
(
$cronFiles
-is
[
array]
)
{
$cronFiles
=
$cronFiles
-join
"
`n
"
}
$cronList
=
$cronFiles
-split
"
`n
"
|
Where
-Object
{
$_
-match
"\S+"
}
if
(
$cronList
.Count -gt 0
)
{
$results
+
=
[
PSCustomObject]@
{
Name
=
"最近修改的cron任务"
Value
=
"
$(
$cronList
.Count
)
个"
Threshold
=
"-"
Status
=
"正常"
Message
=
"7天内修改的定时任务文件"
}
}
}
# 检查可疑的cron任务(包含wget、curl等远程命令)
$suspiciousCron
=
Invoke-SSHCommand
"grep -r 'wget\|curl\|bash.*http' /etc/cron.* /var/spool/cron 2>/dev/null | head -5"
-Timeout 10
if
(
$suspiciousCron
)
{
if
(
$suspiciousCron
-is
[
array]
)
{
$suspiciousCron
=
$suspiciousCron
-join
"
`n
"
}
$suspiciousCount
=
(
$suspiciousCron
-split
"
`n
"
|
Where
-Object
{
$_
-match
"\S+"
}
|
Measure-Object
)
.Count
if
(
$suspiciousCount
-gt 0
)
{
$results
+
=
[
PSCustomObject]@
{
Name
=
"可疑cron任务"
Value
=
"
$suspiciousCount
个"
Threshold
=
">0"
Status
=
"警告"
Message
=
"发现包含远程命令的定时任务"
}
Add-Issue -Message
"发现可疑cron任务:
$suspiciousCount
个"
-Level
"警告"
}
}
}
catch
{
Write
-Log
"异常cron任务检测失败:
$(
$_
.Exception.Message
)
"
"WARN"
}
# ========== 新增:空密码账户检测 ==========
try
{
# 检测空密码账户,排除系统服务账户
$emptyPasswordAccounts
=
Invoke-SSHCommand
'awk -F: '
"'"
'($2 == "" || $2 == "!") {print $1}'
"'"
' /etc/shadow 2>/dev/null | grep -v -E "^((systemd|dbus|polkitd|saslauth|dhcpd|sshd|tss|unbound|chrony|nginx|apache|www|mysql|redis|emqx|nobody|mail|ftp|games|operator|sync|shutdown|halt|news|uucp|proxy|list|irc)|$)'
|
head
-10
' -Timeout 10
if ($emptyPasswordAccounts) {
if ($emptyPasswordAccounts -is [array]) { $emptyPasswordAccounts = $emptyPasswordAccounts -join "`n" }
$emptyAccounts = $emptyPasswordAccounts -split "`n" | Where-Object { $_ -match "\S+" }
if ($emptyAccounts.Count -gt 0) {
$results += [PSCustomObject]@{
Name = "空密码账户"
Value = "$($emptyAccounts.Count) 个"
Threshold = ">0"
Status = "严重"
Message = "发现空密码账户: $($emptyAccounts -join '
,
')"
}
Add-Issue -Message "发现空密码账户: $($emptyAccounts -join '
,
')" -Level "严重"
}
}
else {
$results += [PSCustomObject]@{
Name = "空密码账户"
Value = "0 个"
Threshold = ">0"
Status = "正常"
Message = "无空密码账户(已排除系统服务)"
}
}
}
catch {
Write-Log "空密码账户检测失败: $($_.Exception.Message)" "WARN"
}
# ========== 新增:最近修改的关键系统文件 ==========
try {
$recentSystemFiles = Invoke-SSHCommand "find /etc -name '
*
.conf
' -mtime -1 -type f 2>/dev/null | head -10" -Timeout 15
if ($recentSystemFiles) {
if ($recentSystemFiles -is [array]) { $recentSystemFiles = $recentSystemFiles -join "`n" }
$modifiedFiles = $recentSystemFiles -split "`n" | Where-Object { $_ -match "\S+" }
if ($modifiedFiles.Count -gt 0) {
$results += [PSCustomObject]@{
Name = "最近修改的系统配置"
Value = "$($modifiedFiles.Count) 个"
Threshold = "-"
Status = "正常"
Message = "24小时内修改的配置文件: " + (($modifiedFiles | ForEach-Object { $_ -replace '
/etc/
', '' }) -join '
,
')
}
}
}
}
catch {
Write-Log "最近修改的系统文件检测失败: $($_.Exception.Message)" "WARN"
}
# 保存结果
foreach
(
$result
in
$results
)
{
Save-TestResult
"安全合规"
$result
...
...
@@ -1774,69 +1333,6 @@ function Test-SystemLogs {
}
}
# ========== 新增:内核panic/oops检测 ==========
try {
$panicLog = Invoke-SSHCommand "dmesg | grep -iE '
panic|oops|bug
' | tail -20" -Timeout 10
if ($panicLog) {
$panicCount = ($panicLog -split "`n" | Where-Object { $_ -match "panic|oops|bug" } | Measure-Object).Count
if ($panicCount -gt 0) {
$results += [PSCustomObject]@{
Name = "内核异常日志"
Value = "$panicCount 条"
Threshold = ">0"
Status = "严重"
Message = "检测到内核panic/oops/bug记录"
}
Add-Issue -Message "检测到内核异常: $panicCount 条" -Level "严重"
}
}
}
catch {
Write-Log "内核panic/oops检测失败: $($_.Exception.Message)" "WARN"
}
# ========== 新增:服务崩溃重启记录 ==========
try {
$crashLog = Invoke-SSHCommand "journalctl --since '
24 hours ago
' --no-pager 2>/dev/null | grep -iE '
crashed|segfault|core dumped
' | tail -20" -Timeout 30
if ($crashLog) {
$crashCount = ($crashLog -split "`n" | Where-Object { $_ -match "crashed|segfault|core dumped" } | Measure-Object).Count
if ($crashCount -gt 0) {
$results += [PSCustomObject]@{
Name = "服务崩溃记录"
Value = "$crashCount 条"
Threshold = ">0"
Status = "严重"
Message = "24小时内检测到服务崩溃"
}
Add-Issue -Message "检测到服务崩溃: $crashCount 条" -Level "严重"
}
}
}
catch {
Write-Log "服务崩溃重启记录检测失败: $($_.Exception.Message)" "WARN"
}
# ========== 新增:系统资源耗尽事件 ==========
try {
$resourceEvents = Invoke-SSHCommand "journalctl --since '
24 hours ago
' --no-pager 2>/dev/null | grep -iE '
resource temporarily unavailable|cannot allocate memory|too many open files
' | tail -20" -Timeout 30
if ($resourceEvents) {
$eventCount = ($resourceEvents -split "`n" | Where-Object { $_ -match "resource|allocate|open files" } | Measure-Object).Count
if ($eventCount -gt 0) {
$results += [PSCustomObject]@{
Name = "资源耗尽事件"
Value = "$eventCount 条"
Threshold = ">0"
Status = "严重"
Message = "24小时内检测到资源耗尽事件"
}
Add-Issue -Message "检测到资源耗尽事件: $eventCount 条" -Level "严重"
}
}
}
catch {
Write-Log "系统资源耗尽事件检测失败: $($_.Exception.Message)" "WARN"
}
# 保存结果
foreach
(
$result
in
$results
)
{
Save-TestResult
"系统日志"
$result
...
...
@@ -2371,95 +1867,6 @@ function Test-DockerStatus {
}
}
# 容器资源限制检测
Write-Log "
检测容器资源限制...
"
$resourceLimits
= @("
umysql
", "
uredis
", "
uemqx
", "
ujava2
")
foreach (
$container
in
$resourceLimits
) {
$limitInfo
= Invoke-SSHCommand "
docker inspect
$container
2>/dev/null --format
'{{.HostConfig.Memory}}:{{.HostConfig.NanoCpus}}'
" -Timeout 10
if (
$limitInfo
-and
$limitInfo
-match "
^
(
\d+
)
:
(
\d+
)
$") {
$memLimit
= [long]
$matches
[1]
$cpuLimit
= [long]
$matches
[2]
$memLimitMB
= if (
$memLimit
-gt 0) { "
{
0:N0
}
MB
" -f (
$memLimit
/ 1MB) } else { "
无限制
" }
$cpuQuota
= if (
$cpuLimit
-gt 0) { [math]::Round(
$cpuLimit
/ 1e9, 2) } else { "
无限制
" }
$results
+= [PSCustomObject]@{
Name = "
容器资源限制
$container
"
Value = "
CPU:
$cpuQuota
核, 内存:
$memLimitMB
"
Threshold = "
-
"
Status = "
正常
"
Message = "
容器资源配置限制
"
}
}
}
# 容器日志错误扫描
Write-Log "
扫描容器日志中的错误...
"
foreach (
$container
in
$resourceLimits
) {
# 获取最近100行日志中的错误
$errorLogs
= Invoke-SSHCommand "
docker logs --tail 100
$container
2>&1 |
grep
-iE
'error|exception|fail|fatal|critical'
" -Timeout 15
if (
$errorLogs
) {
if (
$errorLogs
-is [array]) {
$errorLogs
=
$errorLogs
-join "
`n
" }
$errorCount
= (
$errorLogs
-split "
`n
" | Where-Object {
$_
-match "
\S+
" }).Count
if (
$errorCount
-gt 0) {
$status
= if (
$errorCount
-gt 10) { "
警告
" } else { "
注意
" }
$results
+= [PSCustomObject]@{
Name = "
容器日志错误
$container
"
Value = "
${
errorCount
}
条
"
Threshold = "
>
10条警告
"
Status =
$status
Message = "
最近100行日志中发现
${
errorCount
}
个错误
"
}
if (
$status
-eq "
警告
") {
Add-Issue -Message "
容器
${
container
}
日志中发现
${
errorCount
}
个错误
" -Level
$status
}
}
}
}
# Docker事件检测
Write-Log "
获取最近的Docker事件...
"
$dockerEvents
= Invoke-SSHCommand "
docker events --since
'1h'
--until
'0s'
--format
'{{.Type}}:{{.Action}}:{{.Actor.Attributes.name}}'
2>&1 |
head
-20
" -Timeout 15
if (
$dockerEvents
) {
if (
$dockerEvents
-is [array]) {
$dockerEvents
=
$dockerEvents
-join "
`n
" }
# 统计事件类型
$eventCount
= 0
$importantEvents
= @()
foreach (
$eventLine
in
$dockerEvents
-split "
`n
") {
if (
$eventLine
-match "
\S+
") {
$eventCount
++
# 关注重要事件
if (
$eventLine
-match "
die|oom|kill|destroy
") {
$importantEvents
+=
$eventLine
}
}
}
if (
$eventCount
-gt 0) {
$eventStatus
= if (
$importantEvents
.Count -gt 0) { "
警告
" } else { "
正常
" }
$eventMessage
= if (
$importantEvents
.Count -gt 0) {
"
最近1小时
${
eventCount
}
个事件, 重要:
$(
$importantEvents
.Count
)
"
} else {
"
最近1小时
${
eventCount
}
个事件
"
}
$results
+= [PSCustomObject]@{
Name = "
Docker事件
"
Value = "
${
eventCount
}
个
"
Threshold = "
-
"
Status =
$eventStatus
Message =
$eventMessage
}
if (
$importantEvents
.Count -gt 0) {
Add-Issue -Message "
Docker重要事件:
$(
$importantEvents
-join
', '
)
" -Level
$eventStatus
}
}
}
# 保存结果
foreach
(
$result
in
$results
)
{
Save-TestResult
"Docker容器"
$result
...
...
@@ -4254,73 +3661,6 @@ function Test-JavaApplication {
Write-Log "JVM运行时信息检测失败: $($_.Exception.Message)" "WARN"
}
# Spring Boot Actuator端点检测
try {
Write-Log "检测Spring Boot Actuator端点..."
$actuatorCheck = Invoke-SSHCommand '
curl
-
s
--
max-time
5
http://127.0.0.1:8080/actuator/health
2
>&
1
|
grep
-
oE
'"'
"'{"
status
":"
[^
"]+"
}
'"'
"'' -Timeout 10
if (
$actuatorCheck
-match '{"
status
":"
([^
"]+)"
}
') {
$healthStatus = $matches[1]
$actuatorStatus = if ($healthStatus -eq "UP") { "正常" } else { "警告" }
$results += [PSCustomObject]@{
Name = "Spring Boot健康"
Value = $healthStatus
Threshold = "-"
Status = $actuatorStatus
Message = "Actuator端点状态"
}
}
}
catch {
Write-Log "Spring Boot Actuator检测失败: $($_.Exception.Message)" "WARN"
}
# Java线程dump分析
try {
Write-Log "分析Java线程状态..."
# 获取Java进程PID并分析线程状态
$threadDump = Invoke-SSHCommand '
java_pid
=
$(
docker
exec
ujava2 jps 2>/dev/null |
grep
-v Jps |
awk
'"'
"'NR==1{print
$1
}'"
'"'
)
;
if
[
-
n
"
$java_pid
"
]
;
then
docker
exec
ujava2 jstack
$java_pid
2
>&
1
|
grep
-
oE
'"'
"'java.lang.Thread.State: [^:]+'"
'"'
|
sort
|
uniq
-
c
|
sort
-
rn;
fi
' -Timeout 15
if ($threadDump) {
if ($threadDump -is [array]) { $threadDump = $threadDump -join "`n" }
$threadStates = @{}
foreach ($line in $threadDump -split "`n") {
if ($line -match "(\d+)\s+java\.lang\.Thread\.State:\s+(\S+)") {
$count = [int]$matches[1]
$state = $matches[2]
$threadStates[$state] = $count
}
}
if ($threadStates.Count -gt 0) {
# 检查阻塞和等待的线程
$blockedCount = if ($threadStates["BLOCKED"]) { $threadStates["BLOCKED"] } else { 0 }
$waitingCount = if ($threadStates["WAITING"]) { $threadStates["WAITING"] } else { 0 }
$timedWaitingCount = if ($threadStates["TIMED_WAITING"]) { $threadStates["TIMED_WAITING"] } else { 0 }
$problematicThreads = $blockedCount + $waitingCount
$threadStatus = if ($problematicThreads -gt 50) { "警告" } else { "正常" }
$stateSummary = ($threadStates.GetEnumerator() | ForEach-Object { "$($_.Key):$($_.Value)" }) -join ", "
$results += [PSCustomObject]@{
Name = "Java线程状态"
Value = "BLOCKED:$blockedCount, WAITING:$waitingCount"
Threshold = "BLOCKED+WAITING>50警告"
Status = $threadStatus
Message = $stateSummary
}
if ($threadStatus -ne "正常") {
Add-Issue -Message "Java线程异常: 阻塞${blockedCount}, 等待${waitingCount}" -Level $threadStatus
}
}
}
}
catch {
Write-Log "Java线程dump分析失败: $($_.Exception.Message)" "WARN"
}
# 保存结果
foreach ($result in $results) {
Save-TestResult "Java应用" $result
...
...
@@ -4383,41 +3723,6 @@ function Test-ApplicationLogs {
Message = "最近200行日志中无错误"
}
}
# 错误频率统计(按小时分组)
$errorFreq = Invoke-SSHCommand "grep -iE '
error|exception|fail
' '
$javaLogPath
'/*.log 2>/dev/null | grep -oE '
[
0
-
9
]{
4
}-[
0
-
9
]{
2
}-[
0
-
9
]{
2
}
[
0
-
9
]{
2
}
' | sort | uniq -c | tail -8" -Timeout 30
if ($errorFreq) {
if ($errorFreq -is [array]) { $errorFreq = $errorFreq -join "`n" }
$freqDetails = @()
$maxHourlyErrors = 0
foreach ($freqLine in $errorFreq -split "`n") {
if ($freqLine -match "(\d+)\s+(\d{4}-\d{2}-\d{2} \d{2})") {
$count = [int]$matches[1]
$hour = $matches[2]
$freqDetails += "${hour}:${count}个"
if ($count -gt $maxHourlyErrors) {
$maxHourlyErrors = $count
}
}
}
if ($freqDetails.Count -gt 0) {
$freqStatus = if ($maxHourlyErrors -gt 100) { "警告" } else { "正常" }
$results += [PSCustomObject]@{
Name = "Java错误频率"
Value = "最高${maxHourlyErrors}个/小时"
Threshold = ">100个/小时警告"
Status = $freqStatus
Message = ($freqDetails -join ", ")
}
if ($freqStatus -ne "正常") {
Add-Issue -Message "Java错误频率异常: 最高${maxHourlyErrors}个/小时" -Level $freqStatus
}
}
}
}
# 日志文件大小检测
...
...
@@ -4539,39 +3844,6 @@ function Test-ApplicationLogs {
Message = "Nginx应用日志错误数"
}
}
# Nginx慢请求分析(访问日志)
$slowRequests = Invoke-SSHCommand "tail -1000 '
$nginxLogPath
'/access*.log 2>/dev/null | awk '
{
print \
$NF
}
' | grep -oE '
[
0
-
9
]+
\.[0-9]+
' | sort -rn | head -10" -Timeout 30
if ($slowRequests) {
if ($slowRequests -is [array]) { $slowRequests = $slowRequests -join "`n" }
$slowTimes = @()
$maxTime = 0
foreach ($slowLine in $slowRequests -split "`n") {
if ($slowLine -match "(\d+\.\d+)") {
$time = [double]$matches[1]
$slowTimes += $time
if ($time -gt $maxTime) { $maxTime = $time }
}
}
if ($slowTimes.Count -gt 0) {
$avgTime = [math]::Round(($slowTimes | Measure-Object -Average).Average, 2)
$slowStatus = if ($maxTime -gt 5) { "警告" } else { "正常" }
$results += [PSCustomObject]@{
Name = "Nginx慢请求"
Value = "最慢${maxTime}s, 平均${avgTime}s"
Threshold = ">5s警告"
Status = $slowStatus
Message = "最近1000条请求中最慢的10个"
}
if ($slowStatus -ne "正常") {
Add-Issue -Message "Nginx存在慢请求: 最慢${maxTime}s" -Level $slowStatus
}
}
}
}
# 保存结果
...
...
@@ -4688,26 +3960,6 @@ $(
| CPU核心数 | $($systemInfo.CPUCores) |
| 总内存 | $($systemInfo.MemoryTotal) |
| 系统负载 | $($systemInfo.LoadAverage) |
$(
if ($systemInfo.KernelParams) {
"| 内核参数 | $($systemInfo.KernelParams -replace '
\n
', '
<br>
') |`n"
}
)
$(
if ($systemInfo.UlimitInfo) {
"| 资源限制 | $($systemInfo.UlimitInfo -replace '
\n
', '
<br>
') |`n"
}
)
$(
if ($systemInfo.MemoryPressure) {
"| 内存压力 | $($systemInfo.MemoryPressure -replace '
\n
', '
<br>
') |`n"
}
)
$(
if ($systemInfo.HugePages) {
"| 大页内存 | $($systemInfo.HugePages -replace '
\n
', '
<br>
') |`n"
}
)
---
...
...
@@ -5004,7 +4256,7 @@ $(
*报告生成时间: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")*
*服务器健康监测脚本 v2.0*
*检测点数量: 1
5
0+*
*检测点数量: 1
2
0+*
"@
return $reportContent
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论