提交 37be9a81 authored 作者: 陈泽健's avatar 陈泽健

refactor(security): 重构安全检测模块结构

- 将安全合规检测模块重命名为 43_security_compliance.sh
- 将系统日志检测模块重命名为 44_system_logs.sh
- 将时间同步检测模块重命名为 45_time_sync.sh
- 更新PowerShell脚本中的模块映射关系
- 添加安全合规检测相关的显示名称和阈值配置
- 添加系统日志检测相关的显示名称和阈值配置
- 添加时间同步检测相关的显示名称和阈值配置
- 移除原有的 08_security_check.sh、09_system_logs.sh 和 10_time_sync.sh 文件
上级 18da83b0
......@@ -354,6 +354,9 @@ function Get-ModuleCategory {
"^3[26]_" { return "FastDFS应用" }
"^33_" { return "应用日志" }
"^40_" { return "综合诊断" }
"^43_" { return "安全合规检测" }
"^44_" { return "系统日志检测" }
"^45_" { return "时间同步检测" }
default { return "其他" }
}
}
......@@ -551,6 +554,108 @@ function Get-DisplayName {
"FASTDFS_HTTP_STATUS" = "FastDFS HTTP状态"
"FASTDFS_STORAGE_UPTIME_DAYS" = "FastDFS Storage运行天数"
"FASTDFS_GROUP_NAME" = "FastDFS组名"
# 安全合规检测
"AUTH_FAILURES_24H" = "24小时认证失败次数"
"AUTH_FAILURES_LEVEL" = "认证失败等级"
"RECENT_LOGINS" = "最近登录记录"
"CURRENT_USERS_COUNT" = "当前登录用户数"
"CURRENT_USERS" = "当前登录用户"
"SELINUX_STATUS" = "SELinux状态"
"FIREWALL_STATUS" = "防火墙状态"
"FIREWALL_RULES" = "防火墙规则"
"IPTABLES_STATUS" = "iptables状态"
"IPTABLES_RULES_COUNT" = "iptables规则数量"
"OPEN_PORTS_COUNT" = "开放端口数量"
"ABNORMAL_ACCOUNTS" = "异常账户"
"ABNORMAL_ACCOUNTS_LEVEL" = "异常账户等级"
"SUSPICIOUS_SUID_COUNT" = "可疑SUID文件数量"
"SUSPICIOUS_SUID_FILES" = "可疑SUID文件"
"MODIFIED_CONF_COUNT" = "修改的配置文件数量"
"MODIFIED_CONF_FILES" = "修改的配置文件"
"ABNORMAL_CRON" = "异常cron任务"
"ABNORMAL_CRON_LEVEL" = "异常cron任务等级"
"MAX_LOGIN_FAILURES" = "最大登录失败次数"
"BRUTE_FORCE_IPS" = "暴力破解IP"
"BRUTE_FORCE_LEVEL" = "暴力破解等级"
"EMPTY_PASSWORD_ACCOUNTS" = "空密码账户"
"EMPTY_PASSWORD_LEVEL" = "空密码账户等级"
"SSH_PERMIT_ROOT" = "SSH允许root登录"
"SSH_PASSWORD_AUTH" = "SSH密码认证"
"SSH_PORT" = "SSH端口"
"SSH_DEFAULT_PORT" = "SSH使用默认端口"
"SSH_DEFAULT_PORT_LEVEL" = "SSH默认端口等级"
"SSH_ROOT_LOGIN_LEVEL" = "SSH root登录等级"
"SSH_PASSWORD_AUTH_LEVEL" = "SSH密码认证等级"
"OPEN_TCP_PORTS" = "开放TCP端口"
"OPEN_UDP_PORTS" = "开放UDP端口"
"HIGH_RISK_PORTS" = "高风险端口"
"HIGH_RISK_PORTS_LEVEL" = "高风险端口等级"
# 系统日志检测
"KERNEL_ERRORS_24H" = "24小时内核错误数"
"KERNEL_ERRORS_LEVEL" = "内核错误等级"
"DISK_ERRORS_24H" = "24小时磁盘错误数"
"DISK_ERRORS_DMESG" = "dmesg磁盘错误数"
"DISK_ERRORS_LEVEL" = "磁盘错误等级"
"DMESG_ERRORS_COUNT" = "dmesg错误数量"
"DMESG_ERROR_TYPES" = "dmesg错误类型"
"MESSAGES_LOG_STATUS" = "messages日志状态"
"MESSAGES_ERRORS_COUNT" = "messages错误数量"
"MESSAGES_WARNS_COUNT" = "messages警告数量"
"MESSAGES_LOG_SIZE" = "messages日志大小"
"KERNEL_PANIC_COUNT" = "内核panic数量"
"KERNEL_OOPS_COUNT" = "内核oops数量"
"KERNEL_CRASH_FILES" = "内核崩溃文件数量"
"KERNEL_STABILITY_LEVEL" = "内核稳定性等级"
"SERVICE_CRASH_COUNT" = "服务崩溃数量"
"CRASHED_SERVICES" = "崩溃的服务"
"SERVICE_STABILITY_LEVEL" = "服务稳定性等级"
"SYSTEMD_STATUS" = "systemd状态"
"SYSTEMD_FAILED_COUNT" = "systemd失败服务数量"
"SYSTEMD_FAILED_SERVICES" = "systemd失败服务"
"SYSTEMD_FAILED_LEVEL" = "systemd失败等级"
"OOM_KILLER_COUNT" = "OOM Killer数量"
"CORE_DUMP_FILES" = "Core dump文件数量"
"OOM_VICTIMS" = "OOM受害者"
"OOM_LEVEL" = "OOM等级"
"RESOURCE_EXHAUSTION_EVENTS" = "资源耗尽事件"
"RESOURCE_EXHAUSTION_LEVEL" = "资源耗尽等级"
"HARDWARE_ERRORS" = "硬件错误"
"HARDWARE_ERRORS_LEVEL" = "硬件错误等级"
"LARGE_LOG_FILES" = "大日志文件"
"LARGE_LOG_FILES_LEVEL" = "大日志文件等级"
"NETWORK_ERRORS" = "网络错误"
"NETWORK_ERRORS_LEVEL" = "网络错误等级"
# 时间同步检测
"NTP_SERVICE_STATUS" = "NTP服务状态"
"SYSTEM_CLOCK_SYNC" = "系统时钟同步"
"SYSTEM_CLOCK_SYNC_LEVEL" = "系统时钟同步等级"
"NTP_SERVICE_NAME" = "NTP服务名称"
"NTP_DAEMON" = "NTP守护进程"
"NTP_SOURCES" = "NTP同步源"
"NTP_SOURCES_COUNT" = "NTP同步源数量"
"NTP_CURRENT_SOURCE" = "NTP当前同步源"
"NTP_CONFIG_SOURCES" = "NTP配置同步源"
"NTP_OFFSET_MS" = "NTP时钟偏差(毫秒)"
"NTP_OFFSET_SEC" = "NTP时钟偏差(秒)"
"NTP_OFFSET_LEVEL" = "NTP时钟偏差等级"
"SYSTEM_DATETIME" = "系统时间"
"SYSTEM_TIMESTAMP" = "系统时间戳"
"SYSTEM_TIMEZONE" = "系统时区"
"HTTPS_CERT_INFO" = "HTTPS证书信息"
"HTTPS_CERT_MIN_DAYS" = "HTTPS证书最小剩余天数"
"HTTPS_CERT_LEVEL" = "HTTPS证书等级"
"HTTPS_CERT_STATUS" = "HTTPS证书状态"
"EMQX_CERT_INFO" = "EMQX证书信息"
"EMQX_CERT_MIN_DAYS" = "EMQX证书最小剩余天数"
"EMQX_CERT_LEVEL" = "EMQX证书等级"
"EMQX_CERT_STATUS" = "EMQX证书状态"
"MYSQL_CERT_MIN_DAYS" = "MySQL证书最小剩余天数"
"MYSQL_CERT_LEVEL" = "MySQL证书等级"
"SYSTEM_UPTIME_DAYS" = "系统运行天数"
"HWCLOCK_STATUS" = "硬件时钟状态"
}
if ($displayNames.ContainsKey($Key)) {
......@@ -805,6 +910,56 @@ function Get-Threshold {
"FASTDFS_RECENT_ERRORS" = ">10"
"FASTDFS_FILE_COUNT" = ">100000"
"FASTDFS_HTTP_STATUS" = "0"
# ==================== 安全合规检测阈值 ====================
"AUTH_FAILURES_LEVEL" = ">100"
"ABNORMAL_ACCOUNTS" = ">0"
"ABNORMAL_ACCOUNTS_LEVEL" = ">0"
"MODIFIED_CONF_COUNT" = ">10"
"ABNORMAL_CRON" = "未发现异常"
"ABNORMAL_CRON_LEVEL" = "未发现异常"
"MAX_LOGIN_FAILURES" = ">20"
"BRUTE_FORCE_LEVEL" = ">20"
"EMPTY_PASSWORD_ACCOUNTS" = ">0"
"EMPTY_PASSWORD_LEVEL" = ">0"
"SSH_DEFAULT_PORT" = "是"
"SSH_DEFAULT_PORT_LEVEL" = "是"
"SSH_ROOT_LOGIN_LEVEL" = "是"
"SSH_PASSWORD_AUTH_LEVEL" = "是"
"HIGH_RISK_PORTS" = ">0"
"HIGH_RISK_PORTS_LEVEL" = ">0"
# ==================== 系统日志检测阈值 ====================
"KERNEL_ERRORS_LEVEL" = ">10"
"DISK_ERRORS_LEVEL" = ">5"
"KERNEL_PANIC_COUNT" = ">0"
"KERNEL_OOPS_COUNT" = ">5"
"KERNEL_STABILITY_LEVEL" = ">0"
"SERVICE_CRASH_COUNT" = ">5"
"SERVICE_STABILITY_LEVEL" = ">5"
"SYSTEMD_FAILED_COUNT" = ">0"
"SYSTEMD_FAILED_LEVEL" = ">0"
"OOM_KILLER_COUNT" = ">1"
"CORE_DUMP_FILES" = ">0"
"OOM_LEVEL" = ">0"
"RESOURCE_EXHAUSTION_EVENTS" = ">0"
"RESOURCE_EXHAUSTION_LEVEL" = ">0"
"HARDWARE_ERRORS" = ">0"
"HARDWARE_ERRORS_LEVEL" = ">0"
"LARGE_LOG_FILES_LEVEL" = ">0"
"NETWORK_ERRORS" = ">0"
"NETWORK_ERRORS_LEVEL" = ">0"
# ==================== 时间同步检测阈值 ====================
"SYSTEM_CLOCK_SYNC" = "未同步"
"SYSTEM_CLOCK_SYNC_LEVEL" = "未同步"
"NTP_OFFSET_LEVEL" = ">1秒"
"HTTPS_CERT_MIN_DAYS" = "<30天"
"HTTPS_CERT_LEVEL" = "<30天"
"EMQX_CERT_MIN_DAYS" = "<30天"
"EMQX_CERT_LEVEL" = "<30天"
"MYSQL_CERT_MIN_DAYS" = "<30天"
"MYSQL_CERT_LEVEL" = "<30天"
}
if ($thresholds.ContainsKey($Key)) {
......@@ -910,8 +1065,8 @@ function Invoke-AllChecks {
$systemModules = @(
"01_system_basic.sh", "02_cpu_check.sh", "03_memory_check.sh",
"04_disk_check.sh", "05_oom_check.sh", "06_process_check.sh",
"07_network_check.sh", "08_security_check.sh", "09_system_logs.sh",
"10_time_sync.sh", "11_scheduled_tasks.sh", "12_port_check.sh"
"07_network_check.sh", "43_security_compliance.sh", "44_system_logs.sh",
"45_time_sync.sh", "11_scheduled_tasks.sh", "12_port_check.sh"
)
# 综合诊断模块(在所有模块之后执行)
......
#!/bin/bash
################################################################################
# 安全合规检测模块
# 功能: 检测SELinux、防火墙、开放端口、SSH配置、异常账户等安全相关项
# 作者: Claude Code
# 日期: 2026-05-09
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# ==================== 检测函数 ====================
# 检测SELinux状态
check_selinux() {
local selinux_status=""
if command -v getenforce &> /dev/null; then
selinux_status=$(getenforce 2>/dev/null)
output_result "SELINUX_STATUS" "$selinux_status"
if [ "$selinux_status" = "Enforcing" ]; then
output_result "SELINUX_STATUS_LEVEL" "正常"
elif [ "$selinux_status" = "Permissive" ]; then
output_result "SELINUX_STATUS_LEVEL" "警告"
elif [ "$selinux_status" = "Disabled" ]; then
output_result "SELINUX_STATUS_LEVEL" "警告"
else
output_result "SELINUX_STATUS_LEVEL" "未知"
fi
else
output_result "SELINUX_STATUS" "未安装"
output_result "SELINUX_STATUS_LEVEL" "正常"
fi
}
# 检测防火墙状态
check_firewall() {
local fw_status="未运行"
local fw_level="警告"
# 检查 firewalld
if command -v systemctl &> /dev/null; then
if systemctl is-active firewalld &> /dev/null; then
fw_status="运行中"
fw_level="正常"
fi
fi
# 检查 ufw (Ubuntu)
if [ "$fw_status" = "未运行" ] && command -v ufw &> /dev/null; then
if ufw status | grep -q "Status: active"; then
fw_status="运行中"
fw_level="正常"
fi
fi
# 检查 iptables
if [ "$fw_status" = "未运行" ] && command -v iptables &> /dev/null; then
local iptables_rules=$(iptables -L -n 2>/dev/null | grep -c "^Chain")
if [ "$iptables_rules" -gt 0 ]; then
fw_status="iptables运行"
fw_level="正常"
fi
fi
output_result "FIREWALL_STATUS" "$fw_status"
output_result "FIREWALL_LEVEL" "$fw_level"
}
# 检测当前登录用户
check_logged_users() {
local user_count=0
if [ -f /usr/bin/who ]; then
user_count=$(who 2>/dev/null | wc -l)
elif [ -f /usr/bin/w ]; then
user_count=$(w 2>/dev/null | tail -n +2 | wc -l)
fi
output_result "LOGGED_USERS" "$user_count"
if [ "$user_count" -gt 10 ]; then
output_result "LOGGED_USERS_LEVEL" "警告"
else
output_result "LOGGED_USERS_LEVEL" "正常"
fi
}
# 检测开放端口
check_open_ports() {
local open_ports=0
if command -v ss &> /dev/null; then
open_ports=$(ss -tlnp 2>/dev/null | grep -c LISTEN || echo "0")
elif command -v netstat &> /dev/null; then
open_ports=$(netstat -tln 2>/dev/null | grep -c LISTEN || echo "0")
fi
output_result "OPEN_PORTS" "$open_ports"
if [ "$open_ports" -gt 50 ]; then
output_result "OPEN_PORTS_LEVEL" "警告"
else
output_result "OPEN_PORTS_LEVEL" "正常"
fi
}
# 检测认证失败历史
check_auth_failures() {
local fail_count=0
local status
# 从 journalctl 获取认证失败记录
if command -v journalctl &> /dev/null; then
fail_count=$(journalctl --since '24 hours ago' -t authpriv 2>/dev/null | grep -c "Failed password" || echo "0")
fi
# 备用方案:从 /var/log/messages 或 /var/log/auth.log
if [ "$fail_count" -eq 0 ]; then
if [ -f /var/log/auth.log ]; then
fail_count=$(grep -c "Failed password" /var/log/auth.log 2>/dev/null || echo "0")
elif [ -f /var/log/secure ]; then
fail_count=$(grep "Failed password" /var/log/secure 2>/dev/null | grep "$(date +%b %d)" | wc -l)
fi
fi
output_result "AUTH_FAIL_COUNT" "$fail_count"
if [ "$fail_count" -ge 1000 ]; then
status="严重"
elif [ "$fail_count" -ge 100 ]; then
status="警告"
else
status="正常"
fi
output_result "AUTH_FAIL_STATUS" "$status"
if [ "$status" != "正常" ]; then
echo "ERROR:认证失败次数过多(24h): ${fail_count}"
fi
}
# 检测异常账户(UID=0的非root账户)
check_anomalous_accounts() {
local uid_zero_accounts=""
local status="正常"
if [ -f /etc/passwd ]; then
uid_zero_accounts=$(awk -F: '$3==0 {print $1}' /etc/passwd 2>/dev/null | grep -v "^root$")
if [ -n "$uid_zero_accounts" ]; then
status="严重"
output_result "ANOMALOUS_ACCOUNTS" "$uid_zero_accounts"
echo "ERROR:发现异常UID=0账户: ${uid_zero_accounts}"
else
output_result "ANOMALOUS_ACCOUNTS" "无"
fi
fi
output_result "ANOMALOUS_ACCOUNTS_STATUS" "$status"
}
# 检测可疑SUID文件
check_suspicious_suid() {
local suid_count=0
local suid_files=""
local status="正常"
# 查找非常规路径的SUID文件
if command -v find &> /dev/null; then
suid_files=$(find / -perm -4000 -type f 2>/dev/null | grep -v -E '^/(usr|bin|sbin|lib|lib64)/' | head -10)
if [ -n "$suid_files" ]; then
suid_count=$(echo "$suid_files" | grep -c "^" || echo "0")
status="警告"
output_result "SUSPICIOUS_SUID_COUNT" "$suid_count"
echo "ERROR:发现可疑SUID文件: ${suid_count}个"
else
output_result "SUSPICIOUS_SUID_COUNT" "0"
fi
fi
output_result "SUSPICIOUS_SUID_STATUS" "$status"
}
# 检测SSH配置安全
check_ssh_config() {
local ssh_status="安全"
local ssh_level="正常"
local issues=""
if [ -f /etc/ssh/sshd_config ]; then
# 检查 PermitRootLogin
if grep -q "^PermitRootLogin yes" /etc/ssh/sshd_config 2>/dev/null; then
issues="${issues}允许root登录;"
ssh_status="存在风险"
ssh_level="警告"
fi
# 检查 PasswordAuthentication
if grep -q "^PasswordAuthentication yes" /etc/ssh/sshd_config 2>/dev/null; then
issues="${issues}允许密码认证;"
ssh_status="存在风险"
ssh_level="警告"
fi
# 检查 MaxAuthTries
local max_tries=$(grep "^MaxAuthTries" /etc/ssh/sshd_config 2>/dev/null | awk '{print $2}')
if [ -n "$max_tries" ] && [ "$max_tries" -gt 3 ]; then
issues="${issues}最大认证次数过高(${max_tries});"
ssh_status="存在风险"
ssh_level="警告"
fi
output_result "SSH_CONFIG_STATUS" "$ssh_status"
output_result "SSH_CONFIG_LEVEL" "$ssh_level"
if [ -n "$issues" ]; then
output_result "SSH_CONFIG_ISSUES" "$issues"
fi
else
output_result "SSH_CONFIG_STATUS" "配置文件不存在"
output_result "SSH_CONFIG_LEVEL" "未知"
fi
}
# 检测最近登录记录
check_recent_logins() {
local recent_logins=""
if command -v last &> /dev/null; then
recent_logins=$(last -n 5 -nohostname 2>/dev/null | head -5)
if [ -n "$recent_logins" ]; then
output_result "RECENT_LOGINS" "已获取"
else
output_result "RECENT_LOGINS" "无记录"
fi
else
output_result "RECENT_LOGINS" "命令不可用"
fi
}
# 检测密码策略
check_password_policy() {
local policy_status="未知"
if [ -f /etc/login.defs ]; then
local max_days=$(grep "^PASS_MAX_DAYS" /etc/login.defs 2>/dev/null | awk '{print $2}')
local min_days=$(grep "^PASS_MIN_DAYS" /etc/login.defs 2>/dev/null | awk '{print $2}')
if [ -n "$max_days" ]; then
output_result "PASS_MAX_DAYS" "$max_days"
fi
if [ -n "$min_days" ]; then
output_result "PASS_MIN_DAYS" "$min_days"
fi
policy_status="已配置"
fi
output_result "PASSWORD_POLICY" "$policy_status"
}
# ==================== 主检测流程 ====================
main() {
log_info "开始安全合规检测..."
# 执行各项检测
check_selinux
check_firewall
check_logged_users
check_open_ports
check_auth_failures
check_anomalous_accounts
check_suspicious_suid
check_ssh_config
check_recent_logins
check_password_policy
log_info "安全合规检测完成"
}
# 执行主函数
main
#!/bin/bash
################################################################################
# 系统日志检测模块
# 功能: 检测系统日志中的错误、警告、认证失败、服务失败等
# 作者: Claude Code
# 日期: 2026-05-09
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# ==================== 检测函数 ====================
# 检测内核错误(24小时内)
check_kernel_errors() {
local error_count=0
if command -v journalctl &> /dev/null; then
error_count=$(journalctl -k --since '24 hours ago' --no-pager 2>/dev/null | grep -ci "error" || echo "0")
fi
output_result "KERNEL_ERRORS_24H" "$error_count"
if [ "$error_count" -gt 50 ]; then
output_result "KERNEL_ERRORS_STATUS" "警告"
else
output_result "KERNEL_ERRORS_STATUS" "正常"
fi
}
# 检测认证失败(24小时内)
check_auth_failures() {
local fail_count=0
local status
if command -v journalctl &> /dev/null; then
fail_count=$(journalctl --since '24 hours ago' --no-pager 2>/dev/null | grep -ci "authentication failure" || echo "0")
fi
if [ "$fail_count" -ge 1000 ]; then
status="严重"
elif [ "$fail_count" -ge 100 ]; then
status="警告"
else
status="正常"
fi
output_result "AUTH_FAILURES_24H" "$fail_count"
output_result "AUTH_FAILURES_STATUS" "$status"
if [ "$status" != "正常" ]; then
echo "ERROR:认证失败次数过多(24h): ${fail_count}"
fi
}
# 检测磁盘错误(24小时内)
check_disk_errors() {
local error_count=0
if command -v journalctl &> /dev/null; then
error_count=$(journalctl --since '24 hours ago' --no-pager 2>/dev/null | grep -ciE "I/O error|disk error" || echo "0")
fi
output_result "DISK_ERRORS_24H" "$error_count"
if [ "$error_count" -gt 0 ]; then
output_result "DISK_ERRORS_STATUS" "警告"
echo "ERROR:检测到磁盘错误: ${error_count} 个"
else
output_result "DISK_ERRORS_STATUS" "正常"
fi
}
# 检测OOM Killer日志(7天内)
check_oom_logs() {
local oom_count=0
if command -v journalctl &> /dev/null; then
oom_count=$(journalctl -k --since '7 days ago' --no-pager 2>/dev/null | grep -ciE "oom|out of memory|killed process" || echo "0")
fi
output_result "OOM_LOGS_7D" "$oom_count"
if [ "$oom_count" -gt 0 ]; then
output_result "OOM_LOGS_STATUS" "严重"
echo "ERROR:7天内有OOM记录"
else
output_result "OOM_LOGS_STATUS" "正常"
fi
}
# 检测systemd失败服务
check_failed_services() {
local failed_count=0
local failed_list=""
if command -v systemctl &> /dev/null; then
failed_list=$(systemctl list-units --type=service --state=failed --no-pager --no-legend 2>/dev/null)
failed_count=$(echo "$failed_list" | grep -c "^" || echo "0")
fi
output_result "FAILED_SERVICES" "$failed_count"
if [ "$failed_count" -gt 0 ]; then
output_result "FAILED_SERVICES_STATUS" "警告"
echo "ERROR:检测到${failed_count}个失败服务"
else
output_result "FAILED_SERVICES_STATUS" "正常"
fi
}
# 检测日志文件大小
check_log_file_sizes() {
local large_logs=0
local log_dirs=("/var/log" "/data/services")
for dir in "${log_dirs[@]}"; do
if [ -d "$dir" ]; then
while IFS= read -r log_file; do
if [ -f "$log_file" ]; then
local size=$(stat -f%z "$log_file" 2>/dev/null || stat -c%s "$log_file" 2>/dev/null)
# 检查是否大于1GB
if [ "$size" -gt 1073741824 ]; then
large_logs=$((large_logs + 1))
fi
fi
done < <(find "$dir" -name "*.log" -type f 2>/dev/null | head -10)
fi
done
output_result "LARGE_LOG_FILES" "$large_logs"
if [ "$large_logs" -gt 3 ]; then
output_result "LOG_SIZE_STATUS" "警告"
else
output_result "LOG_SIZE_STATUS" "正常"
fi
}
# 检测系统启动失败服务
check_boot_failed_services() {
local boot_failed=0
if command -v systemctl &> /dev/null; then
boot_failed=$(systemctl list-units --type=service --state=failed --no-pager --no-legend 2>/dev/null | grep -c "^" || echo "0")
fi
output_result "BOOT_FAILED_SERVICES" "$boot_failed"
if [ "$boot_failed" -gt 0 ]; then
output_result "BOOT_FAILED_STATUS" "警告"
else
output_result "BOOT_FAILED_STATUS" "正常"
fi
}
# 检测journald日志状态
check_journal_status() {
local journal_status="正常"
local disk_usage=""
if command -v journalctl &> /dev/null; then
# 检查journald磁盘使用情况
disk_usage=$(journalctl --disk-usage 2>/dev/null | tail -1)
if [ -n "$disk_usage" ]; then
output_result "JOURNAL_DISK_USAGE" "$disk_usage"
fi
# 检查journald是否运行
if systemctl is-active systemd-journald &> /dev/null; then
journal_status="运行中"
else
journal_status="未运行"
fi
else
journal_status="未安装"
fi
output_result "JOURNAL_STATUS" "$journal_status"
}
# 检测系统异常日志关键词
check_error_keywords() {
local keywords=("panic" "segfault" "corruption" "malware" "intrusion")
local total_errors=0
if command -v journalctl &> /dev/null; then
for keyword in "${keywords[@]}"; do
local count=$(journalctl --since '24 hours ago' --no-pager 2>/dev/null | grep -ci "$keyword" || echo "0")
total_errors=$((total_errors + count))
done
fi
output_result "ERROR_KEYWORDS_24H" "$total_errors"
if [ "$total_errors" -gt 0 ]; then
output_result "ERROR_KEYWORDS_STATUS" "严重"
else
output_result "ERROR_KEYWORDS_STATUS" "正常"
fi
}
# 检测syslog服务状态
check_syslog_status() {
local syslog_status="正常"
# 检查 rsyslog
if command -v systemctl &> /dev/null; then
if systemctl is-active rsyslog &> /dev/null; then
syslog_status="rsyslog运行中"
elif systemctl is-active syslog-ng &> /dev/null; then
syslog_status="syslog-ng运行中"
elif systemctl is-active syslog &> /dev/null; then
syslog_status="syslog运行中"
else
syslog_status="syslog服务未运行"
fi
fi
output_result "SYSLOG_STATUS" "$syslog_status"
}
# ==================== 主检测流程 ====================
main() {
log_info "开始系统日志检测..."
# 执行各项检测
check_kernel_errors
check_auth_failures
check_disk_errors
check_oom_logs
check_failed_services
check_log_file_sizes
check_boot_failed_services
check_journal_status
check_error_keywords
check_syslog_status
log_info "系统日志检测完成"
}
# 执行主函数
main
#!/bin/bash
################################################################################
# 时间同步检测模块
# 功能: 检测NTP同步状态、时钟偏差、系统时间、SSL证书有效期等
# 作者: Claude Code
# 日期: 2026-05-09
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# ==================== 检测函数 ====================
# 检测NTP同步状态
check_ntp_status() {
local sync_status="未知"
local ntp_service=""
# 使用 timedatectl 检查
if command -v timedatectl &> /dev/null; then
local timedate_output
timedate_output=$(timedatectl status 2>/dev/null)
if echo "$timedate_output" | grep -q "System clock synchronized: yes"; then
sync_status="已同步"
elif echo "$timedate_output" | grep -q "System clock synchronized: no"; then
sync_status="未同步"
fi
# 检查NTP服务状态
if echo "$timedate_output" | grep -q "NTP service: active"; then
ntp_service="运行中"
else
ntp_service="未运行"
fi
fi
output_result "NTP_SYNC_STATUS" "$sync_status"
output_result "NTP_SERVICE" "$ntp_service"
if [ "$sync_status" = "未同步" ]; then
output_result "NTP_STATUS_LEVEL" "警告"
else
output_result "NTP_STATUS_LEVEL" "正常"
fi
}
# 检测时钟偏差
check_clock_offset() {
local offset=0
local status="正常"
# 使用 chronyc 检查
if command -v chronyc &> /dev/null; then
local tracking_output
tracking_output=$(chronyc tracking 2>/dev/null)
if [ -n "$tracking_output" ]; then
# 提取 Last offset
offset=$(echo "$tracking_output" | grep "Last offset" | sed 's/.*Last offset.*:\s*\(-\?[0-9.]*\).*/\1/')
if [ -n "$offset" ]; then
# 取绝对值
offset=$(echo "$offset" | awk '{if ($1 < 0) print -$1; else print $1}')
# 判断状态
if (( $(awk "BEGIN {print ($offset >= 5)}") )); then
status="严重"
elif (( $(awk "BEGIN {print ($offset >= 1)}") )); then
status="警告"
fi
output_result "CLOCK_OFFSET" "${offset}秒"
output_result "CLOCK_OFFSET_STATUS" "$status"
if [ "$status" != "正常" ]; then
echo "ERROR:时钟偏差过大: ${offset}秒"
fi
fi
fi
fi
# 如果没有chronyc,尝试使用 ntpq
if [ "$offset" -eq 0 ] && command -v ntpq &> /dev/null; then
local ntpq_output
ntpq_output=$(ntpq -pn 2>/dev/null | grep "*" | head -1)
if [ -n "$ntpq_output" ]; then
# 提取偏移量(第6列)
local ntp_offset=$(echo "$ntpq_output" | awk '{print $6}' | sed 's/[-+]//')
offset=$(echo "$ntp_offset" | awk '{printf "%.3f", $1/1000}')
if (( $(awk "BEGIN {print ($offset >= 5)}") )); then
status="严重"
elif (( $(awk "BEGIN {print ($offset >= 1)}") )); then
status="警告"
fi
output_result "CLOCK_OFFSET" "${offset}秒"
output_result "CLOCK_OFFSET_STATUS" "$status"
fi
fi
}
# 检测系统时间
check_system_time() {
local sys_time=""
local utc_time=""
if command -v date &> /dev/null; then
sys_time=$(date 2>/dev/null)
utc_time=$(date -u 2>/dev/null)
fi
if [ -n "$sys_time" ]; then
output_result "SYSTEM_TIME" "$sys_time"
fi
if [ -n "$utc_time" ]; then
output_result "UTC_TIME" "$utc_time"
fi
}
# 检测时区配置
check_timezone() {
local timezone=""
if [ -f /etc/timezone ]; then
timezone=$(cat /etc/timezone 2>/dev/null)
elif [ -f /etc/localtime ]; then
# 尝试从 /etc/localtime 链接推断时区
if [ -L /etc/localtime ]; then
timezone=$(readlink /etc/localtime 2>/dev/null | sed 's|.*/zoneinfo/||')
fi
fi
if [ -n "$timezone" ]; then
output_result "TIMEZONE" "$timezone"
else
output_result "TIMEZONE" "未知"
fi
}
# 检测SSL证书有效期
check_ssl_cert() {
local cert_status="正常"
local days_left=0
local expiry_date=""
if command -v openssl &> /dev/null; then
# 检查本地443端口SSL证书
local cert_info
cert_info=$(echo | openssl s_client -connect localhost:443 2>/dev/null | openssl x509 -noout -dates 2>/dev/null)
if [ -n "$cert_info" ]; then
expiry_date=$(echo "$cert_info" | grep "notAfter=" | sed 's/notAfter=//')
if [ -n "$expiry_date" ]; then
# 将证书日期转换为时间戳
local cert_ts
cert_ts=$(date -d "$expiry_date" +%s 2>/dev/null)
if [ -n "$cert_ts" ]; then
local current_ts
current_ts=$(date +%s)
days_left=$(( (cert_ts - current_ts) / 86400 ))
if [ "$days_left" -lt 0 ]; then
cert_status="严重"
output_result "SSL_CERT_STATUS" "已过期"
echo "ERROR:SSL证书已过期"
elif [ "$days_left" -lt 7 ]; then
cert_status="严重"
output_result "SSL_CERT_STATUS" "即将过期"
echo "ERROR:SSL证书将在${days_left}天后过期"
elif [ "$days_left" -lt 30 ]; then
cert_status="警告"
output_result "SSL_CERT_STATUS" "即将过期"
else
output_result "SSL_CERT_STATUS" "正常"
fi
output_result "SSL_CERT_DAYS_LEFT" "$days_left"
output_result "SSL_CERT_EXPIRY" "$expiry_date"
output_result "SSL_CERT_LEVEL" "$cert_status"
fi
fi
fi
else
output_result "SSL_CERT_STATUS" "openssl不可用"
fi
}
# 检测EMQX SSL证书
check_emqx_cert() {
local cert_status="正常"
local days_left=0
# 检查容器是否运行
if is_container_running "${CONTAINERS[emqx]}"; then
if command -v docker &> /dev/null; then
local cert_info
cert_info=$(docker exec "${CONTAINERS[emqx]}" sh -c "echo | openssl s_client -connect localhost:8883 2>/dev/null | openssl x509 -noout -dates 2>/dev/null" 2>/dev/null)
if [ -n "$cert_info" ]; then
local expiry_date
expiry_date=$(echo "$cert_info" | grep "notAfter=" | sed 's/notAfter=//')
if [ -n "$expiry_date" ]; then
local cert_ts
cert_ts=$(date -d "$expiry_date" +%s 2>/dev/null)
if [ -n "$cert_ts" ]; then
local current_ts
current_ts=$(date +%s)
days_left=$(( (cert_ts - current_ts) / 86400 ))
if [ "$days_left" -lt 7 ]; then
cert_status="严重"
elif [ "$days_left" -lt 30 ]; then
cert_status="警告"
fi
output_result "EMQX_CERT_DAYS_LEFT" "$days_left"
output_result "EMQX_CERT_LEVEL" "$cert_status"
fi
fi
fi
fi
else
output_result "EMQX_CERT_STATUS" "容器未运行"
fi
}
# 检测时间同步服务
check_timesync_service() {
local service_status="未知"
local service_name=""
# 检查常见的时间同步服务
if command -v systemctl &> /dev/null; then
if systemctl is-active systemd-timesyncd &> /dev/null; then
service_name="systemd-timesyncd"
service_status="运行中"
elif systemctl is-active chronyd &> /dev/null; then
service_name="chronyd"
service_status="运行中"
elif systemctl is-active ntpd &> /dev/null; then
service_name="ntpd"
service_status="运行中"
else
service_status="未运行"
fi
fi
output_result "TIMESYNC_SERVICE" "$service_name"
output_result "TIMESYNC_STATUS" "$service_status"
if [ "$service_status" = "未运行" ]; then
output_result "TIMESYNC_LEVEL" "警告"
else
output_result "TIMESYNC_LEVEL" "正常"
fi
}
# ==================== 主检测流程 ====================
main() {
log_info "开始时间同步检测..."
# 执行各项检测
check_ntp_status
check_clock_offset
check_system_time
check_timezone
check_timesync_service
check_ssl_cert
check_emqx_cert
log_info "时间同步检测完成"
}
# 执行主函数
main
#!/bin/bash
################################################################################
# 安全合规检测模块
# 功能: 检测系统安全状态、认证失败、异常账户、防火墙规则等
# 作者: Claude Code
# 日期: 2026-05-10
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# ==================== 辅助函数 ====================
# 检查命令是否存在
check_command() {
command -v "$1" >/dev/null 2>&1
}
# ==================== 检测函数 ====================
# 检测认证失败历史(最近24小时)
check_auth_failures() {
local auth_fail_count=0
# 快速检查:只检查secure/log/auth.log文件,避免耗时的journalctl查询
if [ -f "/var/log/secure" ]; then
auth_fail_count=$(grep -c "Failed password" /var/log/secure 2>/dev/null || echo "0")
elif [ -f "/var/log/auth.log" ]; then
auth_fail_count=$(grep -c "Failed password" /var/log/auth.log 2>/dev/null || echo "0")
fi
output_result "AUTH_FAILURES_24H" "$auth_fail_count"
# 判断状态
if [ "$auth_fail_count" -gt 1000 ]; then
output_result "AUTH_FAILURES_LEVEL" "严重"
elif [ "$auth_fail_count" -gt 100 ]; then
output_result "AUTH_FAILURES_LEVEL" "警告"
else
output_result "AUTH_FAILURES_LEVEL" "正常"
fi
}
# 检测最近登录
check_recent_logins() {
# 方法1: 使用last命令
if check_command last; then
local recent_logins
recent_logins=$(last -n 20 2>/dev/null | head -20)
output_result "RECENT_LOGINS" "最近20次登录记录已获取"
else
output_result "RECENT_LOGINS" "last命令不可用"
fi
}
# 检测当前登录用户
check_current_users() {
local current_users=""
local user_count="0"
# 方法1: 使用who命令
if check_command who; then
current_users=$(who 2>/dev/null)
if [ -n "$current_users" ]; then
user_count=$(echo "$current_users" | wc -l)
fi
fi
# 确保有输出
output_result "CURRENT_USERS_COUNT" "${user_count:-0}"
if [ -n "$current_users" ] && [ "$user_count" -gt 0 ]; then
local users_list
users_list=$(echo "$current_users" | cut -d' ' -f1 | tr '\n' ',' | sed 's/,$//' 2>/dev/null)
[ -n "$users_list" ] && output_result "CURRENT_USERS" "$users_list"
fi
# 方法2: 使用w命令
if check_command w; then
output_result "CURRENT_USERS_DETAIL" "已获取"
fi
}
# 检测SELinux状态
check_selinux_status() {
# 方法1: 使用getenforce命令
if check_command getenforce; then
local selinux_status
selinux_status=$(getenforce 2>/dev/null)
output_result "SELINUX_STATUS" "$selinux_status"
else
# 方法2: 检查selinux配置文件
if [ -f "/etc/selinux/config" ]; then
local selinux_config
selinux_config=$(grep "^SELINUX=" /etc/selinux/config 2>/dev/null | cut -d= -f2)
output_result "SELINUX_CONFIG" "$selinux_config"
fi
fi
}
# 检测防火墙规则
check_firewall_rules() {
# 检查firewalld状态
if check_command firewall-cmd; then
if systemctl is-active --quiet firewalld 2>/dev/null; then
output_result "FIREWALL_STATUS" "firewalld运行中"
# 获取防火墙规则
local firewall_rules
firewall_rules=$(firewall-cmd --list-all 2>/dev/null)
if [ -n "$firewall_rules" ]; then
output_result "FIREWALL_RULES" "已获取"
fi
else
output_result "FIREWALL_STATUS" "firewalld未运行"
fi
fi
# 检查iptables状态
if check_command iptables; then
local iptables_rules
iptables_rules=$(iptables -L -n 2>/dev/null)
if [ -n "$iptables_rules" ]; then
output_result "IPTABLES_STATUS" "iptables已配置"
output_result "IPTABLES_RULES_COUNT" "$(echo "$iptables_rules" | grep -c "^Chain" || echo "0")"
fi
fi
# 统计开放端口
local open_ports
open_ports=$(ss -tlnp 2>/dev/null | grep LISTEN | awk '{print $4}' | grep -oE '[0-9]+$' | sort -u | wc -l)
output_result "OPEN_PORTS_COUNT" "$open_ports"
}
# 检测异常账户(UID=0的非root账户)
check_abnormal_accounts() {
local abnormal_accounts=""
# 检查UID=0的所有账户
if [ -r "/etc/passwd" ]; then
while IFS=: read -r username x uid rest; do
if [ "$uid" = "0" ] && [ "$username" != "root" ]; then
if [ -z "$abnormal_accounts" ]; then
abnormal_accounts="$username"
else
abnormal_accounts="$abnormal_accounts,$username"
fi
fi
done < /etc/passwd 2>/dev/null || true
fi
if [ -n "$abnormal_accounts" ]; then
output_result "ABNORMAL_ACCOUNTS" "$abnormal_accounts"
output_result "ABNORMAL_ACCOUNTS_LEVEL" "严重"
else
output_result "ABNORMAL_ACCOUNTS" "无"
output_result "ABNORMAL_ACCOUNTS_LEVEL" "正常"
fi
}
# 检测可疑SUID文件
check_suid_files() {
# 查找SUID文件(排除常见合法路径,限制搜索范围)
local suid_files=""
# 只搜索常见用户目录,避免扫描整个文件系统
for search_dir in /opt /home /root /data /app /usr/local /var/www; do
if [ -d "$search_dir" ]; then
local dir_files
dir_files=$(find "$search_dir" -perm -4000 -type f 2>/dev/null | head -10)
if [ -n "$dir_files" ]; then
if [ -z "$suid_files" ]; then
suid_files="$dir_files"
else
suid_files="$suid_files"$'\n'"$dir_files"
fi
fi
fi
done
# 过滤掉Docker overlayfs中的文件和常见合法SUID文件
if [ -n "$suid_files" ]; then
suid_files=$(echo "$suid_files" | grep -v -E "overlayfs|/rootfs/|chsh|passwd|chfn|gpasswd|newgrp|mount|umount|su|dbus-daemon-launch-helper|unix_chkpwd|pkexec" || echo "")
fi
# 如果找到了可疑文件,输出结果
if [ -n "$suid_files" ]; then
local suid_count
suid_count=$(echo "$suid_files" | wc -l)
output_result "SUSPICIOUS_SUID_COUNT" "$suid_count"
output_result "SUSPICIOUS_SUID_FILES" "$(echo "$suid_files" | tr '\n' ',' | sed 's/,$//')"
else
output_result "SUSPICIOUS_SUID_COUNT" "0"
fi
}
# 检测最近修改的关键系统文件
check_modified_system_files() {
# 查找最近24小时修改的配置文件
local modified_files
modified_files=$(find /etc -name "*.conf" -mtime -1 2>/dev/null | head -20)
if [ -n "$modified_files" ]; then
local modified_count
modified_count=$(echo "$modified_files" | wc -l)
output_result "MODIFIED_CONF_COUNT" "$modified_count"
output_result "MODIFIED_CONF_FILES" "$(echo "$modified_files" | tr '\n' ',' | sed 's/,$//')"
else
output_result "MODIFIED_CONF_COUNT" "0"
fi
}
# 检测异常cron任务
check_abnormal_cron() {
local abnormal_cron=""
# 检查root用户的crontab
if check_command crontab; then
local root_cron
root_cron=$(crontab -l 2>/dev/null)
# 检查是否包含可疑命令
if echo "$root_cron" | grep -qE "(wget|curl|nc|ncat|/dev/tcp|bash.*-i|perl.*-e)"; then
abnormal_cron="root crontab包含可疑命令"
fi
fi
# 检查系统级cron目录
for cron_dir in /etc/cron.d /etc/cron.hourly /etc/cron.daily /etc/cron.weekly /etc/cron.monthly; do
if [ -d "$cron_dir" ]; then
local suspicious_files
suspicious_files=$(find "$cron_dir" -type f -executable 2>/dev/null | head -10)
if [ -n "$suspicious_files" ]; then
if [ -z "$abnormal_cron" ]; then
abnormal_cron="$cron_dir包含可执行文件"
else
abnormal_cron="$abnormal_cron, $cron_dir包含可执行文件"
fi
fi
fi
done
if [ -n "$abnormal_cron" ]; then
output_result "ABNORMAL_CRON" "$abnormal_cron"
output_result "ABNORMAL_CRON_LEVEL" "警告"
else
output_result "ABNORMAL_CRON" "未发现异常"
output_result "ABNORMAL_CRON_LEVEL" "正常"
fi
}
# 检测登录失败最多的IP(暴力破解检测)
check_brute_force_ips() {
local brute_force_ips=""
local max_attempts="0"
# 只从日志文件分析,避免耗时的journalctl查询
if [ -f "/var/log/secure" ]; then
brute_force_ips=$(grep "Failed password" /var/log/secure 2>/dev/null | grep -oE 'from [0-9.]+' | cut -d' ' -f2 | sort | uniq -c | sort -rn | head -5)
elif [ -f "/var/log/auth.log" ]; then
brute_force_ips=$(grep "Failed password" /var/log/auth.log 2>/dev/null | grep -oE 'from [0-9.]+' | cut -d' ' -f2 | sort | uniq -c | sort -rn | head -5)
fi
if [ -n "$brute_force_ips" ]; then
max_attempts=$(echo "$brute_force_ips" | head -1 | awk '{print $1}')
output_result "MAX_LOGIN_FAILURES" "$max_attempts"
output_result "BRUTE_FORCE_IPS" "$(echo "$brute_force_ips" | sed 's/ /:/g' | tr '\n' ',' | sed 's/,$//')"
# 判断是否存在暴力破解
if [ "$max_attempts" -gt 100 ]; then
output_result "BRUTE_FORCE_LEVEL" "严重"
elif [ "$max_attempts" -gt 20 ]; then
output_result "BRUTE_FORCE_LEVEL" "警告"
else
output_result "BRUTE_FORCE_LEVEL" "正常"
fi
else
output_result "MAX_LOGIN_FAILURES" "0"
output_result "BRUTE_FORCE_LEVEL" "正常"
fi
}
# 检测空密码账户
check_empty_password_accounts() {
local empty_password_accounts=""
local shadow_readable="false"
# 首先检查是否能读取shadow文件
if [ -r "/etc/shadow" ]; then
shadow_readable="true"
# 检查真正的空密码账户(密码字段完全为空)
# !! 或 * 表示账户被锁定,不是空密码
while IFS=: read -r username pass uid rest; do
# 只检查UID >= 1000 的用户账户
if [ "$uid" -ge 1000 ] 2>/dev/null; then
# 只有密码字段完全为空才是真正的空密码
if [ -z "$pass" ]; then
if [ -z "$empty_password_accounts" ]; then
empty_password_accounts="$username"
else
empty_password_accounts="$empty_password_accounts,$username"
fi
fi
fi
done < /etc/shadow 2>/dev/null || true
fi
# 输出结果
if [ "$shadow_readable" = "false" ]; then
output_result "EMPTY_PASSWORD_ACCOUNTS" "无法读取shadow文件"
output_result "EMPTY_PASSWORD_LEVEL" "未知"
elif [ -n "$empty_password_accounts" ]; then
output_result "EMPTY_PASSWORD_ACCOUNTS" "$empty_password_accounts"
output_result "EMPTY_PASSWORD_LEVEL" "严重"
else
output_result "EMPTY_PASSWORD_ACCOUNTS" "无"
output_result "EMPTY_PASSWORD_LEVEL" "正常"
fi
}
# 检测SSH配置安全检查
check_ssh_security() {
local ssh_config="/etc/ssh/sshd_config"
if [ -f "$ssh_config" ]; then
# 检查PermitRootLogin
local permit_root
permit_root=$(grep -E "^PermitRootLogin" "$ssh_config" 2>/dev/null | grep -v "^#" | awk '{print $2}')
output_result "SSH_PERMIT_ROOT" "${permit_root:-未设置}"
# 检查PasswordAuthentication
local password_auth
password_auth=$(grep -E "^PasswordAuthentication" "$ssh_config" 2>/dev/null | grep -v "^#" | awk '{print $2}')
output_result "SSH_PASSWORD_AUTH" "${password_auth:-未设置}"
# 检查Port
local ssh_port
ssh_port=$(grep -E "^Port" "$ssh_config" 2>/dev/null | grep -v "^#" | awk '{print $2}')
output_result "SSH_PORT" "${ssh_port:-22}"
# 检查是否使用默认端口22
if [ "$ssh_port" = "22" ] || [ -z "$ssh_port" ]; then
output_result "SSH_DEFAULT_PORT" "是"
output_result "SSH_DEFAULT_PORT_LEVEL" "警告"
else
output_result "SSH_DEFAULT_PORT" "否"
output_result "SSH_DEFAULT_PORT_LEVEL" "正常"
fi
# 检查是否允许root登录
if [ "$permit_root" = "yes" ]; then
output_result "SSH_ROOT_LOGIN_LEVEL" "严重"
else
output_result "SSH_ROOT_LOGIN_LEVEL" "正常"
fi
# 检查是否启用密码认证
if [ "$password_auth" = "yes" ]; then
output_result "SSH_PASSWORD_AUTH_LEVEL" "警告"
else
output_result "SSH_PASSWORD_AUTH_LEVEL" "正常"
fi
else
output_result "SSH_CONFIG_STATUS" "配置文件不存在"
fi
}
# 检测开放端口详情
check_open_ports_detail() {
# TCP开放端口
local tcp_ports
tcp_ports=$(ss -tlnp 2>/dev/null | grep LISTEN | awk '{print $4}' | grep -oE '[0-9]+$' | sort -nu | tr '\n' ',' | sed 's/,$//')
output_result "OPEN_TCP_PORTS" "$tcp_ports"
# UDP开放端口
local udp_ports
udp_ports=$(ss -ulnp 2>/dev/null | grep -E "^UNCONN" | awk '{print $4}' | grep -oE '[0-9]+$' | sort -nu | tr '\n' ',' | sed 's/,$//')
output_result "OPEN_UDP_PORTS" "$udp_ports"
# 高风险端口
local high_risk_ports="23,135,139,445,1433,3389,5900,5901,6379,27017"
local found_risk_ports=""
for port in ${high_risk_ports//,/ }; do
if ss -tlnp 2>/dev/null | grep -q ":$port "; then
if [ -z "$found_risk_ports" ]; then
found_risk_ports="$port"
else
found_risk_ports="$found_risk_ports,$port"
fi
fi
done
if [ -n "$found_risk_ports" ]; then
output_result "HIGH_RISK_PORTS" "$found_risk_ports"
output_result "HIGH_RISK_PORTS_LEVEL" "警告"
else
output_result "HIGH_RISK_PORTS" "无"
output_result "HIGH_RISK_PORTS_LEVEL" "正常"
fi
}
# ==================== 主检测流程 ====================
main() {
# 输出一个测试项确保模块被识别
output_result "SECURITY_CHECK_MODULE" "active"
log_info "开始安全合规检测..."
# 防止错误退出
set +e
# 执行各项安全检测
check_auth_failures 2>/dev/null || true
check_recent_logins 2>/dev/null || true
check_current_users 2>/dev/null || true
check_selinux_status 2>/dev/null || true
check_firewall_rules 2>/dev/null || true
check_abnormal_accounts 2>/dev/null || true
check_suid_files 2>/dev/null || true
check_modified_system_files 2>/dev/null || true
check_abnormal_cron 2>/dev/null || true
check_brute_force_ips 2>/dev/null || true
check_empty_password_accounts 2>/dev/null || true
check_ssh_security 2>/dev/null || true
check_open_ports_detail 2>/dev/null || true
log_info "安全合规检测完成"
}
# 执行主函数
main
#!/bin/bash
################################################################################
# 系统日志检测模块
# 功能: 检测内核错误、磁盘错误、服务崩溃、系统资源耗尽等日志问题
# 作者: Claude Code
# 日期: 2026-05-10
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# ==================== 辅助函数 ====================
# 安全输出结果(确保值不为空)
safe_output() {
local key=$1
local value=${2:-"0"}
# 如果值为空,使用默认值
[ -z "$value" ] && value="0"
output_result "$key" "$value"
}
# 检查命令是否存在
check_command() {
command -v "$1" >/dev/null 2>&1
}
# ==================== 检测函数 ====================
# 检测内核错误(24小时内)
check_kernel_errors() {
local kernel_errors=0
# 快速检查:只使用dmesg获取当前启动后的错误(避免耗时的journalctl查询)
if check_command dmesg; then
kernel_errors=$(dmesg 2>/dev/null | grep -iE "error|fail|timeout" | grep -v "ACPI" | wc -l || echo "0")
fi
output_result "KERNEL_ERRORS_24H" "${kernel_errors:-0}"
if [ "$kernel_errors" -gt 100 ]; then
output_result "KERNEL_ERRORS_LEVEL" "严重"
elif [ "$kernel_errors" -gt 10 ]; then
output_result "KERNEL_ERRORS_LEVEL" "警告"
else
output_result "KERNEL_ERRORS_LEVEL" "正常"
fi
}
# 检测磁盘错误(24小时内)
check_disk_errors() {
local disk_errors=0
local dmesg_disk_errors=0
# 快速检查:只使用dmesg(避免耗时的journalctl查询)
if check_command dmesg; then
dmesg_disk_errors=$(dmesg 2>/dev/null | grep -ciE "I/O error|disk error|ata error|sd.*error|nvme.*error|filesystem error" || echo "0")
fi
disk_errors=$dmesg_disk_errors
output_result "DISK_ERRORS_24H" "${disk_errors:-0}"
output_result "DISK_ERRORS_DMESG" "${dmesg_disk_errors:-0}"
# 确保disk_errors是数字
disk_errors=${disk_errors:-0}
if [ "$disk_errors" -gt 10 ]; then
output_result "DISK_ERRORS_LEVEL" "严重"
elif [ "$disk_errors" -gt 0 ]; then
output_result "DISK_ERRORS_LEVEL" "警告"
else
output_result "DISK_ERRORS_LEVEL" "正常"
fi
}
# 检测dmesg错误
check_dmesg_errors() {
local dmesg_output
# 获取dmesg关键错误
dmesg_output=$(dmesg 2>/dev/null | grep -iE "error|fail|timeout|warning" | tail -50)
if [ -n "$dmesg_output" ]; then
local error_count
error_count=$(echo "$dmesg_output" | wc -l)
output_result "DMESG_ERRORS_COUNT" "$error_count"
# 统计错误类型
local error_types
error_types=$(echo "$dmesg_output" | grep -oE "error|fail|timeout|warning" | sort | uniq -c | sort -rn | tr '\n' ',' | sed 's/,$//')
output_result "DMESG_ERROR_TYPES" "$error_types"
else
output_result "DMESG_ERRORS_COUNT" "0"
fi
}
# 检测/var/log/messages服务统计
check_messages_log() {
local messages_file="/var/log/messages"
if [ ! -f "$messages_file" ]; then
# 尝试其他可能的日志文件
if [ -f "/var/log/syslog" ]; then
messages_file="/var/log/syslog"
else
output_result "MESSAGES_LOG_STATUS" "日志文件不存在"
return
fi
fi
# 统计最近24小时的日志
local recent_logs
recent_logs=$(find "$messages_file" -mtime -1 -type f 2>/dev/null)
if [ -n "$recent_logs" ]; then
# 统计ERROR级别日志
local error_count
error_count=$(grep -c "ERROR" "$messages_file" 2>/dev/null || echo "0")
output_result "MESSAGES_ERRORS_COUNT" "$error_count"
# 统计WARN级别日志
local warn_count
warn_count=$(grep -c "WARN" "$messages_file" 2>/dev/null || echo "0")
output_result "MESSAGES_WARNS_COUNT" "$warn_count"
# 获取日志文件大小
local log_size
log_size=$(du -h "$messages_file" 2>/dev/null | awk '{print $1}')
output_result "MESSAGES_LOG_SIZE" "$log_size"
fi
}
# 检测内核panic/oops
check_kernel_panic() {
local panic_count=0
local oops_count=0
# 快速检查:只使用dmesg(避免耗时的journalctl查询)
if check_command dmesg; then
panic_count=$(dmesg 2>/dev/null | grep -ci "kernel panic" || echo "0")
oops_count=$(dmesg 2>/dev/null | grep -ci "kernel oops\|BUG:" || echo "0")
fi
# 检查kexec_dump的core dump文件
local crash_files="0"
if [ -d "/var/crash" ]; then
crash_files=$(find /var/crash -name "*.dump" 2>/dev/null | wc -l)
fi
output_result "KERNEL_PANIC_COUNT" "$panic_count"
output_result "KERNEL_OOPS_COUNT" "$oops_count"
output_result "KERNEL_CRASH_FILES" "$crash_files"
if [ "$panic_count" -gt 0 ] || [ "$crash_files" -gt 0 ]; then
output_result "KERNEL_STABILITY_LEVEL" "严重"
elif [ "$oops_count" -gt 5 ]; then
output_result "KERNEL_STABILITY_LEVEL" "警告"
else
output_result "KERNEL_STABILITY_LEVEL" "正常"
fi
}
# 检测服务崩溃重启记录
check_service_crashes() {
local crash_count=0
local crashed_services=""
# 快速检查:只检查Docker容器重启(避免耗时的journalctl查询)
if check_command docker; then
local restarted_containers
restarted_containers=$(docker ps -a --format "{{.Names}}: {{.RestartCount}}" 2>/dev/null | grep -v ": 0$" | grep -v ": $")
if [ -n "$restarted_containers" ]; then
crash_count=$(echo "$restarted_containers" | wc -l)
crashed_services=$(echo "$restarted_containers" | tr '\n' ',' | sed 's/,$//')
fi
fi
output_result "SERVICE_CRASH_COUNT" "$crash_count"
if [ -n "$crashed_services" ]; then
output_result "CRASHED_SERVICES" "$crashed_services"
fi
if [ "$crash_count" -gt 5 ]; then
output_result "SERVICE_STABILITY_LEVEL" "严重"
elif [ "$crash_count" -gt 0 ]; then
output_result "SERVICE_STABILITY_LEVEL" "警告"
else
output_result "SERVICE_STABILITY_LEVEL" "正常"
fi
}
# 检测systemd服务失败列表
check_systemd_failures() {
if ! check_command systemctl; then
output_result "SYSTEMD_STATUS" "systemctl不可用"
return
fi
# 获取失败的服务
local failed_services
failed_services=$(systemctl list-units --state=failed --no-legend 2>/dev/null | awk '{print $1}' | tr '\n' ',' | sed 's/,$//')
if [ -n "$failed_services" ]; then
local failed_count
failed_count=$(echo "$failed_services" | tr ',' '\n' | wc -l)
output_result "SYSTEMD_FAILED_COUNT" "$failed_count"
output_result "SYSTEMD_FAILED_SERVICES" "$failed_services"
output_result "SYSTEMD_FAILED_LEVEL" "严重"
else
output_result "SYSTEMD_FAILED_COUNT" "0"
output_result "SYSTEMD_FAILED_LEVEL" "正常"
fi
# 不检查dead状态(dead是正常的未运行状态)
}
# 检测OOM Killer记录
check_oom_killer() {
local oom_count=0
local oom_victims=""
# 快速检查:只使用dmesg(避免耗时的journalctl查询)
if check_command dmesg; then
local dmesg_oom
dmesg_oom=$(dmesg 2>/dev/null | grep -i "out of memory")
if [ -n "$dmesg_oom" ]; then
oom_count=$(echo "$dmesg_oom" | wc -l)
# 获取被杀掉的进程
oom_victims=$(echo "$dmesg_oom" | grep -oE "Killed process [0-9]+ \(.*\)" | head -5 | tr '\n' ',' | sed 's/,$//')
fi
fi
# 检查core dump文件(只检查存在的目录以避免错误)
local core_files="0"
for core_dir in /data /tmp /root; do
if [ -d "$core_dir" ]; then
local dir_cores
dir_cores=$(find "$core_dir" -maxdepth 2 -name "core.*" 2>/dev/null | wc -l)
core_files=$((core_files + dir_cores))
fi
done
output_result "OOM_KILLER_COUNT" "$oom_count"
output_result "CORE_DUMP_FILES" "$core_files"
if [ -n "$oom_victims" ]; then
output_result "OOM_VICTIMS" "$oom_victims"
fi
if [ "$oom_count" -gt 0 ] || [ "$core_files" -gt 0 ]; then
output_result "OOM_LEVEL" "严重"
else
output_result "OOM_LEVEL" "正常"
fi
}
# 检测系统资源耗尽事件
check_resource_exhaustion() {
local resource_events=""
# 快速检查:只使用dmesg(避免耗时的journalctl查询)
if check_command dmesg; then
# 检查文件描述符耗尽
local fd_exhaust
fd_exhaust=$(dmesg 2>/dev/null | grep -ci "VFS: file-max limit\|too many open files" || echo "0")
if [ "$fd_exhaust" -gt 0 ]; then
resource_events="${resource_events}文件描述符耗尽($fd_exhaust次),"
fi
# 检查内存耗尽
local mem_exhaust
mem_exhaust=$(dmesg 2>/dev/null | grep -ci "out of memory\|oom-killer" || echo "0")
if [ "$mem_exhaust" -gt 0 ]; then
resource_events="${resource_events}内存耗尽($mem_exhaust次),"
fi
# 检查进程资源限制
local process_limit
process_limit=$(dmesg 2>/dev/null | grep -ci "resource temporarily unavailable\|cannot allocate memory" || echo "0")
if [ "$process_limit" -gt 0 ]; then
resource_events="${resource_events}进程资源限制($process_limit次),"
fi
fi
# 去掉末尾逗号
resource_events=$(echo "$resource_events" | sed 's/,$//')
if [ -n "$resource_events" ]; then
output_result "RESOURCE_EXHAUSTION_EVENTS" "$resource_events"
output_result "RESOURCE_EXHAUSTION_LEVEL" "严重"
else
output_result "RESOURCE_EXHAUSTION_EVENTS" "无"
output_result "RESOURCE_EXHAUSTION_LEVEL" "正常"
fi
}
# 检测硬件错误
check_hardware_errors() {
local hw_errors=""
# 快速检查:使用dmesg检查内存错误(MCE)代替journalctl
if check_command dmesg; then
local mce_errors
mce_errors=$(dmesg 2>/dev/null | grep -ci "mce: machine check\|hardware error" || echo "0")
if [ "$mce_errors" -gt 0 ]; then
hw_errors="${hw_errors}内存错误($mce_errors次),"
fi
fi
# 检查温度错误
if check_command sensors; then
local temp_status
temp_status=$(sensors 2>/dev/null | grep -i "ALARM\|CRITICAL")
if [ -n "$temp_status" ]; then
hw_errors="${hw_errors}温度告警,"
fi
fi
# 去掉末尾逗号
hw_errors=$(echo "$hw_errors" | sed 's/,$//')
if [ -n "$hw_errors" ]; then
output_result "HARDWARE_ERRORS" "$hw_errors"
output_result "HARDWARE_ERRORS_LEVEL" "严重"
else
output_result "HARDWARE_ERRORS" "无"
output_result "HARDWARE_ERRORS_LEVEL" "正常"
fi
}
# 检测日志文件大小
check_log_file_sizes() {
local large_logs=""
# 检查常见日志文件大小
for log_file in /var/log/messages /var/log/syslog /var/log/kern.log /var/log/auth.log /var/log/secure; do
if [ -f "$log_file" ]; then
local log_size
log_size=$(du -m "$log_file" 2>/dev/null | awk '{print $1}')
if [ "$log_size" -gt 500 ]; then
local log_name
log_name=$(basename "$log_file")
large_logs="${large_logs}${log_name}:${log_size}MB,"
fi
fi
done
# 去掉末尾逗号
large_logs=$(echo "$large_logs" | sed 's/,$//')
if [ -n "$large_logs" ]; then
output_result "LARGE_LOG_FILES" "$large_logs"
output_result "LARGE_LOG_FILES_LEVEL" "警告"
else
output_result "LARGE_LOG_FILES" "无"
output_result "LARGE_LOG_FILES_LEVEL" "正常"
fi
}
# 检测网络错误
check_network_errors() {
local net_errors=""
# 快速检查:使用dmesg代替journalctl(如果可能)
if check_command dmesg; then
local dmesg_net_errors
dmesg_net_errors=$(dmesg 2>/dev/null | grep -ciE "network.*unreachable|connection.*timeout|link.*down|network.*dead" || echo "0")
if [ "$dmesg_net_errors" -gt 10 ]; then
net_errors="${net_errors}网络错误($dmesg_net_errors次),"
fi
fi
# 去掉末尾逗号
net_errors=$(echo "$net_errors" | sed 's/,$//')
if [ -n "$net_errors" ]; then
output_result "NETWORK_ERRORS" "$net_errors"
output_result "NETWORK_ERRORS_LEVEL" "警告"
else
output_result "NETWORK_ERRORS" "无"
output_result "NETWORK_ERRORS_LEVEL" "正常"
fi
}
# ==================== 主检测流程 ====================
main() {
# 输出一个测试项确保模块被识别
output_result "SYSTEM_LOGS_CHECK_MODULE" "active"
log_info "开始系统日志检测..."
# 防止错误退出
set +e
# 执行各项日志检测
check_kernel_errors 2>/dev/null || true
check_disk_errors 2>/dev/null || true
check_dmesg_errors 2>/dev/null || true
check_messages_log 2>/dev/null || true
check_kernel_panic 2>/dev/null || true
check_service_crashes 2>/dev/null || true
check_systemd_failures 2>/dev/null || true
check_oom_killer 2>/dev/null || true
check_resource_exhaustion 2>/dev/null || true
check_hardware_errors 2>/dev/null || true
check_log_file_sizes 2>/dev/null || true
check_network_errors 2>/dev/null || true
log_info "系统日志检测完成"
}
# 执行主函数
main
#!/bin/bash
################################################################################
# 时间同步检测模块
# 功能: 检测NTP同步状态、时钟偏差、SSL证书有效期等
# 作者: Claude Code
# 日期: 2026-05-10
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# ==================== 辅助函数 ====================
# 检查命令是否存在
check_command() {
command -v "$1" >/dev/null 2>&1
}
# 计算证书到期天数
calc_cert_expiry_days() {
local expiry_date=$1
if [ -z "$expiry_date" ]; then
echo "未知"
return
fi
# 尝试多种日期格式解析
local expiry_timestamp
if date -d "$expiry_date" >/dev/null 2>&1; then
expiry_timestamp=$(date -d "$expiry_date" +%s 2>/dev/null)
else
# 尝试其他格式
expiry_timestamp=$(date -d "${expiry_date:0:10}" +%s 2>/dev/null)
fi
if [ -n "$expiry_timestamp" ]; then
local current_timestamp
current_timestamp=$(date +%s)
local diff_days=$(( (expiry_timestamp - current_timestamp) / 86400 ))
echo "$diff_days"
else
echo "未知"
fi
}
# ==================== 检测函数 ====================
# 检测NTP同步状态
check_ntp_status() {
local ntp_active="false"
local ntp_service=""
local synchronized="false"
local ntp_offset="未知"
# 方法1: 使用timedatectl
if check_command timedatectl; then
local timedate_output
timedate_output=$(timedatectl status 2>/dev/null)
# 检查NTP服务状态
if echo "$timedate_output" | grep -q "NTP service: active"; then
ntp_active="true"
output_result "NTP_SERVICE_STATUS" "运行中"
elif echo "$timedate_output" | grep -q "NTP enabled: yes"; then
output_result "NTP_SERVICE_STATUS" "已启用"
else
output_result "NTP_SERVICE_STATUS" "未启用"
output_result "NTP_SERVICE_LEVEL" "警告"
fi
# 检查系统时钟同步状态
if echo "$timedate_output" | grep -q "System clock synchronized: yes"; then
synchronized="true"
output_result "SYSTEM_CLOCK_SYNC" "已同步"
else
output_result "SYSTEM_CLOCK_SYNC" "未同步"
output_result "SYSTEM_CLOCK_SYNC_LEVEL" "警告"
fi
# 获取NTP服务名称
ntp_service=$(echo "$timedate_output" | grep -oP "NTP service: \K.*" | head -1)
[ -n "$ntp_service" ] && output_result "NTP_SERVICE_NAME" "$ntp_service"
fi
# 方法2: 检查chronyd服务
if systemctl is-active --quiet chronyd 2>/dev/null; then
output_result "NTP_DAEMON" "chronyd"
elif systemctl is-active --quiet ntpd 2>/dev/null; then
output_result "NTP_DAEMON" "ntpd"
elif systemctl is-active --quiet systemd-timesyncd 2>/dev/null; then
output_result "NTP_DAEMON" "systemd-timesyncd"
fi
}
# 检测时钟同步源
check_ntp_sources() {
local sources=""
local source_count=0
local current_source=""
# 方法1: 使用chronyc
if check_command chronyc; then
local chronyc_sources
chronyc_sources=$(chronyc sources 2>/dev/null)
if [ -n "$chronyc_sources" ]; then
# 解析chronyc sources输出
sources=$(echo "$chronyc_sources" | grep "^[\*\+\#]" | awk '{print $2}' | tr '\n' ',' | sed 's/,$//')
source_count=$(echo "$chronyc_sources" | grep "^[\*\+\#\-\?]" | wc -l)
current_source=$(echo "$chronyc_sources" | grep "^\*" | awk '{print $2}' | head -1)
output_result "NTP_SOURCES" "$sources"
output_result "NTP_SOURCES_COUNT" "$source_count"
if [ -n "$current_source" ]; then
output_result "NTP_CURRENT_SOURCE" "$current_source"
fi
fi
fi
# 方法2: 使用ntpq
if [ -z "$sources" ] && check_command ntpq; then
local ntpq_output
ntpq_output=$(ntpq -p 2>/dev/null)
if [ -n "$ntpq_output" ]; then
# 解析ntpq输出
sources=$(echo "$ntpq_output" | grep "^[\*\+\+\#]" | awk '{print $1}' | tr '\n' ',' | sed 's/,$//')
source_count=$(echo "$ntpq_output" | grep -c "^\*" || echo "0")
current_source=$(echo "$ntpq_output" | grep "^\*" | awk '{print $1}' | head -1)
output_result "NTP_SOURCES" "$sources"
output_result "NTP_SOURCES_COUNT" "$source_count"
if [ -n "$current_source" ]; then
output_result "NTP_CURRENT_SOURCE" "$current_source"
fi
fi
fi
# 方法3: 检查配置文件
if [ -z "$sources" ]; then
if [ -f "/etc/chrony.conf" ]; then
sources=$(grep "^server\|^pool" /etc/chrony.conf 2>/dev/null | awk '{print $2}' | tr '\n' ',' | sed 's/,$//')
output_result "NTP_CONFIG_SOURCES" "$sources"
elif [ -f "/etc/ntp.conf" ]; then
sources=$(grep "^server\|^pool" /etc/ntp.conf 2>/dev/null | awk '{print $2}' | tr '\n' ',' | sed 's/,$//')
output_result "NTP_CONFIG_SOURCES" "$sources"
fi
fi
}
# 检测时钟偏差
check_clock_offset() {
local offset_ms="未知"
local offset_level="正常"
# 方法1: 使用chronyc tracking
if check_command chronyc; then
local chronyc_tracking
chronyc_tracking=$(chronyc tracking 2>/dev/null)
if [ -n "$chronyc_tracking" ]; then
# 获取RMS offset(毫秒)
offset_ms=$(echo "$chronyc_tracking" | grep "RMS offset" | awk '{print $4}' | sed 's/ us//')
if [ "$offset_ms" != "未知" ] && [ -n "$offset_ms" ]; then
# 转换为秒
local offset_sec
offset_sec=$(awk "BEGIN {printf \"%.6f\", $offset_ms / 1000000}")
output_result "NTP_OFFSET_MS" "$offset_ms"
output_result "NTP_OFFSET_SEC" "$offset_sec"
# 判断偏差级别(1秒=1000000微秒)
local offset_abs
offset_abs=$(awk "BEGIN {print ($offset_ms < 0) ? -$offset_ms : $offset_ms}")
if [ "$offset_abs" -gt 5000000 ]; then # >5秒
offset_level="严重"
elif [ "$offset_abs" -gt 1000000 ]; then # >1秒
offset_level="警告"
fi
fi
fi
fi
# 方法2: 使用ntpq
if [ "$offset_ms" = "未知" ] && check_command ntpq; then
local ntpq_offset
ntpq_offset=$(ntpq -p 2>/dev/null | grep "^\*" | awk '{print $9}' | head -1)
if [ -n "$ntpq_offset" ]; then
output_result "NTP_OFFSET_MS" "$ntpq_offset"
local offset_abs
offset_abs=$(awk "BEGIN {print ($ntpq_offset < 0) ? -$ntpq_offset : $ntpq_offset}")
if [ "$offset_abs" -gt 5000 ]; then # >5秒
offset_level="严重"
elif [ "$offset_abs" -gt 1000 ]; then # >1秒
offset_level="警告"
fi
fi
fi
output_result "NTP_OFFSET_LEVEL" "$offset_level"
}
# 检测系统时间准确性
check_system_time() {
local system_date
system_date=$(date 2>/dev/null)
output_result "SYSTEM_DATETIME" "$system_date"
local system_timestamp
system_timestamp=$(date +%s 2>/dev/null)
output_result "SYSTEM_TIMESTAMP" "$system_timestamp"
# 检查时区
local timezone
timezone=$(timedatectl status 2>/dev/null | grep "Time zone" | awk '{print $3}')
[ -n "$timezone" ] && output_result "SYSTEM_TIMEZONE" "$timezone"
}
# 检测HTTPS证书有效期
check_https_cert_expiry() {
# 检查Nginx SSL证书
local cert_files=""
# 常见证书位置
for cert_path in /etc/nginx/ssl/*.crt /etc/nginx/certs/*.crt /etc/ssl/certs/*.crt; do
if [ -f "$cert_path" ]; then
cert_files="${cert_files}${cert_path},"
fi
done
# 去掉末尾逗号
cert_files=$(echo "$cert_files" | sed 's/,$//')
if [ -n "$cert_files" ]; then
local min_days=9999
local cert_info=""
for cert in ${cert_files//,/ }; do
if [ -f "$cert" ]; then
local expiry_date
expiry_date=$(openssl x509 -in "$cert" -noout -enddate 2>/dev/null | cut -d= -f2)
if [ -n "$expiry_date" ]; then
local days_left
days_left=$(calc_cert_expiry_days "$expiry_date")
if [ "$days_left" != "未知" ]; then
if [ "$days_left" -lt "$min_days" ]; then
min_days=$days_left
fi
local cert_name
cert_name=$(basename "$cert")
cert_info="${cert_info}${cert_name}:${days_left}天(${expiry_date}),"
fi
fi
fi
done
# 去掉末尾逗号
cert_info=$(echo "$cert_info" | sed 's/,$//')
if [ -n "$cert_info" ]; then
output_result "HTTPS_CERT_INFO" "$cert_info"
output_result "HTTPS_CERT_MIN_DAYS" "$min_days"
# 判断证书状态
if [ "$min_days" -le 7 ]; then
output_result "HTTPS_CERT_LEVEL" "严重"
elif [ "$min_days" -le 30 ]; then
output_result "HTTPS_CERT_LEVEL" "警告"
else
output_result "HTTPS_CERT_LEVEL" "正常"
fi
fi
else
output_result "HTTPS_CERT_STATUS" "未找到证书文件"
fi
}
# 检测EMQX SSL证书有效期
check_emqx_cert_expiry() {
# EMQX证书位置
local emqx_cert_paths="/etc/emqx/certs /emqx/certs /var/lib/emqx/certs"
local cert_found="false"
local min_days=9999
local cert_info=""
for cert_dir in $emqx_cert_paths; do
if [ -d "$cert_dir" ]; then
for cert in "$cert_dir"/*.crt "$cert_dir"/*.pem; do
if [ -f "$cert" ]; then
cert_found="true"
local expiry_date
expiry_date=$(openssl x509 -in "$cert" -noout -enddate 2>/dev/null | cut -d= -f2)
if [ -n "$expiry_date" ]; then
local days_left
days_left=$(calc_cert_expiry_days "$expiry_date")
if [ "$days_left" != "未知" ]; then
if [ "$days_left" -lt "$min_days" ]; then
min_days=$days_left
fi
local cert_name
cert_name=$(basename "$cert")
cert_info="${cert_info}${cert_name}:${days_left}天, "
fi
fi
fi
done
fi
done
if [ "$cert_found" = "true" ]; then
# 去掉末尾空格
cert_info=$(echo "$cert_info" | sed 's/, $//')
output_result "EMQX_CERT_INFO" "$cert_info"
output_result "EMQX_CERT_MIN_DAYS" "$min_days"
# 判断证书状态
if [ "$min_days" -le 7 ]; then
output_result "EMQX_CERT_LEVEL" "严重"
elif [ "$min_days" -le 30 ]; then
output_result "EMQX_CERT_LEVEL" "警告"
else
output_result "EMQX_CERT_LEVEL" "正常"
fi
else
output_result "EMQX_CERT_STATUS" "未找到EMQX证书"
fi
}
# 检测MySQL证书有效期(如果配置了SSL)
check_mysql_cert_expiry() {
# MySQL证书位置
local mysql_cert_paths="/var/lib/mysql/private_key.pem /var/lib/mysql/server-cert.pem /etc/mysql/certs/*.pem"
local cert_found="false"
local min_days=9999
for cert in $mysql_cert_paths; do
if [ -f "$cert" ]; then
cert_found="true"
local expiry_date
expiry_date=$(openssl x509 -in "$cert" -noout -enddate 2>/dev/null | cut -d= -f2)
if [ -n "$expiry_date" ]; then
local days_left
days_left=$(calc_cert_expiry_days "$expiry_date")
if [ "$days_left" != "未知" ] && [ "$days_left" -lt "$min_days" ]; then
min_days=$days_left
fi
fi
fi
done
if [ "$cert_found" = "true" ] && [ "$min_days" -lt 9999 ]; then
output_result "MYSQL_CERT_MIN_DAYS" "$min_days"
# 判断证书状态
if [ "$min_days" -le 7 ]; then
output_result "MYSQL_CERT_LEVEL" "严重"
elif [ "$min_days" -le 30 ]; then
output_result "MYSQL_CERT_LEVEL" "警告"
else
output_result "MYSQL_CERT_LEVEL" "正常"
fi
fi
}
# 检测时间偏差对系统的影响
check_time_drift_impact() {
# 检查系统启动时间是否异常
local uptime_seconds
uptime_seconds=$(cat /proc/uptime 2>/dev/null | awk '{print $1}' | cut -d. -f1)
if [ -n "$uptime_seconds" ]; then
local uptime_days
uptime_days=$((uptime_seconds / 86400))
output_result "SYSTEM_UPTIME_DAYS" "$uptime_days"
fi
# 检查硬件时钟与系统时钟偏差
if check_command hwclock; then
local hwclock_time
hwclock_time=$(hwclock --show 2>/dev/null)
if [ -n "$hwclock_time" ]; then
# 简单对比时间差异
output_result "HWCLOCK_STATUS" "已读取"
fi
fi
}
# ==================== 主检测流程 ====================
main() {
log_info "开始时间同步检测..."
# 执行各项时间同步检测
check_ntp_status 2>/dev/null || true
check_ntp_sources 2>/dev/null || true
check_clock_offset 2>/dev/null || true
check_system_time 2>/dev/null || true
check_https_cert_expiry 2>/dev/null || true
check_emqx_cert_expiry 2>/dev/null || true
check_mysql_cert_expiry 2>/dev/null || true
check_time_drift_impact 2>/dev/null || true
log_info "时间同步检测完成"
}
# 执行主函数
main
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论