提交 18da83b0 authored 作者: 陈泽健's avatar 陈泽健

feat(service): 添加Nginx/Nacos/FastDFS深度检测功能并优化EMQX监测

- 新增Nginx深度检测脚本,实现连接统计、进程监控、配置检查等功能
- 新增Nacos深度检测脚本,实现服务实例统计、健康状态、Raft集群检测
- 新增FastDFS深度检测脚本,实现存储状态、同步状态、文件统计检测
- 优化EMQX最大连接数检测逻辑,修正集群模式识别算法
- 更新PowerShell主脚本,添加新的检测模块映射和结果显示配置
- 扩展系统阈值配置,为新增服务添加相应的告警规则定义
上级 658711d3
...@@ -349,9 +349,9 @@ function Get-ModuleCategory { ...@@ -349,9 +349,9 @@ function Get-ModuleCategory {
"^2[67]_" { return "EMQX消息队列" } "^2[67]_" { return "EMQX消息队列" }
"^28_" { return "Java应用" } "^28_" { return "Java应用" }
"^29_" { return "Python应用" } "^29_" { return "Python应用" }
"^30_" { return "Nginx应用" } "^3[04]_" { return "Nginx应用" }
"^31_" { return "Nacos应用" } "^3[15]_" { return "Nacos应用" }
"^32_" { return "FastDFS应用" } "^3[26]_" { return "FastDFS应用" }
"^33_" { return "应用日志" } "^33_" { return "应用日志" }
"^40_" { return "综合诊断" } "^40_" { return "综合诊断" }
default { return "其他" } default { return "其他" }
...@@ -488,6 +488,69 @@ function Get-DisplayName { ...@@ -488,6 +488,69 @@ function Get-DisplayName {
"PYTHON_VERSION" = "Python版本" "PYTHON_VERSION" = "Python版本"
"NGINX_VERSION" = "Nginx版本" "NGINX_VERSION" = "Nginx版本"
"NACOS_VERSION" = "Nacos版本" "NACOS_VERSION" = "Nacos版本"
# Nginx深度检测
"NGINX_ACTIVE_CONNECTIONS" = "Nginx活跃连接数"
"NGINX_ACCEPTED_CONNECTIONS" = "Nginx已接受连接数"
"NGINX_HANDLED_CONNECTIONS" = "Nginx已处理连接数"
"NGINX_REQUESTS_TOTAL" = "Nginx总请求数"
"NGINX_PROCESSES" = "Nginx进程数"
"NGINX_LISTENING_PORTS" = "Nginx监听端口"
"NGINX_CONFIG_STATUS" = "Nginx配置状态"
"NGINX_CONFIG_LEVEL" = "Nginx配置等级"
"NGINX_WORKER_PROCESSES" = "Nginx工作进程数"
"NGINX_WORKER_CONNECTIONS" = "Nginx工作连接数"
"NGINX_ACCESS_LOG_SIZE" = "Nginx访问日志大小"
"NGINX_ERROR_LOG_SIZE" = "Nginx错误日志大小"
"NGINX_RECENT_ERRORS" = "Nginx最近错误数"
"NGINX_SSL_ENABLED" = "Nginx SSL启用"
"NGINX_CACHE_ENABLED" = "Nginx缓存启用"
"NGINX_UPSTREAM_COUNT" = "Nginx上游数量"
"NGINX_SERVER_BLOCKS" = "Nginx服务器块数量"
"NGINX_UPTIME_DAYS" = "Nginx运行天数"
# Nacos深度检测
"NACOS_NAMESPACES" = "Nacos命名空间数"
"NACOS_SERVICES_COUNT" = "Nacos服务数量"
"NACOS_INSTANCES_COUNT" = "Nacos实例数量"
"NACOS_HEALTHY_INSTANCES" = "Nacos健康实例数"
"NACOS_HEALTH_RATE" = "Nacos健康率"
"NACOS_CONFIGS_COUNT" = "Nacos配置数量"
"NACOS_GRPC_CONNECTIONS" = "Nacos gRPC连接数"
"NACOS_RAFT_MODE" = "Nacos Raft模式"
"NACOS_RAFT_ROLE" = "Nacos Raft角色"
"NACOS_RAFT_NODES" = "Nacos Raft节点数"
"NACOS_HEALTH_STATUS" = "Nacos健康状态"
"NACOS_HEALTH_LEVEL" = "Nacos健康等级"
"NACOS_HEAP_USAGE" = "Nacos堆内存使用率"
"NACOS_THREAD_COUNT" = "Nacos线程数"
"NACOS_MAIN_PORT" = "Nacos主端口状态"
"NACOS_RAFT_PORT" = "Nacos Raft端口状态"
"NACOS_LOG_SIZE" = "Nacos日志大小"
"NACOS_RECENT_ERRORS" = "Nacos最近错误数"
"NACOS_UPTIME_DAYS" = "Nacos运行天数"
# FastDFS深度检测
"FASTDFS_VERSION" = "FastDFS版本"
"FASTDFS_TRACKER_STATUS" = "FastDFS Tracker状态"
"FASTDFS_TRACKER_PROCESSES" = "FastDFS Tracker进程数"
"FASTDFS_STORAGE_STATUS" = "FastDFS Storage状态"
"FASTDFS_STORAGE_PROCESSES" = "FastDFS Storage进程数"
"FASTDFS_STORE_PATH_COUNT" = "FastDFS存储路径数"
"FASTDFS_DISK_USAGE" = "FastDFS磁盘使用率"
"FASTDFS_SYNC_ENABLED" = "FastDFS同步启用"
"FASTDFS_FILE_COUNT" = "FastDFS文件数量"
"FASTDFS_DIR_COUNT" = "FastDFS目录数量"
"FASTDFS_TOTAL_SIZE" = "FastFS总大小"
"FASTDFS_TRUNK_ENABLED" = "FastDFS Trunk启用"
"FASTDFS_TRACKER_CONNECTION" = "FastDFS Tracker连接状态"
"FASTDFS_LOG_SIZE" = "FastDFS日志大小"
"FASTDFS_RECENT_ERRORS" = "FastDFS最近错误数"
"FASTDFS_HTTP_ENABLED" = "FastDFS HTTP启用"
"FASTDFS_HTTP_PORT" = "FastDFS HTTP端口"
"FASTDFS_HTTP_STATUS" = "FastDFS HTTP状态"
"FASTDFS_STORAGE_UPTIME_DAYS" = "FastDFS Storage运行天数"
"FASTDFS_GROUP_NAME" = "FastDFS组名"
} }
if ($displayNames.ContainsKey($Key)) { if ($displayNames.ContainsKey($Key)) {
...@@ -722,6 +785,26 @@ function Get-Threshold { ...@@ -722,6 +785,26 @@ function Get-Threshold {
# ==================== 证书检测阈值 ==================== # ==================== 证书检测阈值 ====================
"SSL_CERT_DAYS_LEFT" = "<30天" "SSL_CERT_DAYS_LEFT" = "<30天"
"EMQX_CERT_DAYS_LEFT" = "<30天" "EMQX_CERT_DAYS_LEFT" = "<30天"
# ==================== Nginx应用阈值 ====================
"NGINX_ACTIVE_CONNECTIONS" = ">1000"
"NGINX_CONFIG_LEVEL" = "严重"
"NGINX_RECENT_ERRORS" = ">10"
"NGINX_HEAP_USAGE" = ">80%"
# ==================== Nacos应用阈值 ====================
"NACOS_HEALTH_RATE" = "<90%"
"NACOS_HEALTH_LEVEL" = "严重"
"NACOS_HEAP_USAGE" = ">80%"
"NACOS_RECENT_ERRORS" = ">20"
"NACOS_THREAD_COUNT" = ">500"
# ==================== FastDFS应用阈值 ====================
"FASTDFS_DISK_USAGE" = ">80%"
"FASTDFS_TRACKER_CONNECTION" = "INACTIVE"
"FASTDFS_RECENT_ERRORS" = ">10"
"FASTDFS_FILE_COUNT" = ">100000"
"FASTDFS_HTTP_STATUS" = "0"
} }
if ($thresholds.ContainsKey($Key)) { if ($thresholds.ContainsKey($Key)) {
...@@ -841,8 +924,9 @@ function Invoke-AllChecks { ...@@ -841,8 +924,9 @@ function Invoke-AllChecks {
"20_docker_basic.sh", "21_docker_deep.sh", "22_mysql_basic.sh", "20_docker_basic.sh", "21_docker_deep.sh", "22_mysql_basic.sh",
"23_mysql_depth.sh", "24_redis_basic.sh", "25_redis_depth.sh", "23_mysql_depth.sh", "24_redis_basic.sh", "25_redis_depth.sh",
"26_emqx_basic.sh", "27_emqx_deep.sh", "28_java_check.sh", "26_emqx_basic.sh", "27_emqx_deep.sh", "28_java_check.sh",
"29_python_check.sh", "30_nginx_check.sh", "31_nacos_check.sh", "29_python_check.sh", "30_nginx_check.sh", "34_nginx_deep.sh",
"32_fastdfs_check.sh", "33_app_logs.sh" "31_nacos_check.sh", "35_nacos_deep.sh", "32_fastdfs_check.sh",
"36_fastdfs_deep.sh", "33_app_logs.sh"
) )
$totalModules = $systemModules.Count + $serviceModules.Count + $comprehensiveModules.Count $totalModules = $systemModules.Count + $serviceModules.Count + $comprehensiveModules.Count
......
...@@ -82,12 +82,17 @@ check_emqx_clients_detail() { ...@@ -82,12 +82,17 @@ check_emqx_clients_detail() {
output_result "EMQX_CLIENTS_DISCONNECTED" "$clients_disconnected" output_result "EMQX_CLIENTS_DISCONNECTED" "$clients_disconnected"
fi fi
# 检查最大连接数 # 获取最大客户端连接数配置
local max_clients local max_clients_config
max_clients=$(emqx_exec emqx_ctl listeners list 2>/dev/null | grep -c "running" || echo "0") max_clients_config=$(emqx_exec emqx_ctl listeners list 2>/dev/null | grep -E "max_connections" | awk '{print $NF}' | head -1)
if [ -n "$max_clients" ]; then if [ -z "$max_clients_config" ] || [ "$max_clients_config" = "0" ]; then
output_result "EMQX_MAX_CLIENTS" "$max_clients" # 如果无法获取配置,使用默认值infinity(或1024)
max_clients_config="infinity"
fi
if [ -n "$max_clients_config" ]; then
output_result "EMQX_MAX_CLIENTS" "$max_clients_config"
fi fi
} }
...@@ -202,24 +207,25 @@ check_emqx_cluster_detail() { ...@@ -202,24 +207,25 @@ check_emqx_cluster_detail() {
cluster_status=$(emqx_exec emqx_ctl cluster status 2>/dev/null) cluster_status=$(emqx_exec emqx_ctl cluster status 2>/dev/null)
if [ -n "$cluster_status" ]; then if [ -n "$cluster_status" ]; then
# 检查是否为集群模式 # 检查是否为集群模式(查找停止的节点或运行中的节点)
local is_cluster local is_running_cluster
is_cluster=$(echo "$cluster_status" | grep -c "Cluster" || echo "0") is_running_cluster=$(echo "$cluster_status" | grep -c "is running" || echo "0")
local is_stopped_cluster
if [ "$is_cluster" -gt 0 ]; then is_stopped_cluster=$(echo "$cluster_status" | grep -c "is stopped" || echo "0")
local total_nodes=$((is_running_cluster + is_stopped_cluster))
if [ "$total_nodes" -gt 1 ]; then
output_result "EMQX_CLUSTER_MODE" "是" output_result "EMQX_CLUSTER_MODE" "是"
output_result "EMQX_CLUSTER_NODES" "$total_nodes"
# 获取集群节点数量
local node_count
node_count=$(echo "$cluster_status" | grep -c "Node" || echo "0")
if [ -n "$node_count" ]; then
output_result "EMQX_CLUSTER_NODES" "$node_count"
fi
else else
output_result "EMQX_CLUSTER_MODE" "否" # 单节点模式
output_result "EMQX_CLUSTER_MODE" "否(单节点)"
output_result "EMQX_CLUSTER_NODES" "1" output_result "EMQX_CLUSTER_NODES" "1"
fi fi
else
# 无法获取集群状态,默认为单节点
output_result "EMQX_CLUSTER_MODE" "否(单节点)"
output_result "EMQX_CLUSTER_NODES" "1"
fi fi
} }
......
#!/bin/bash
################################################################################
# Nginx深度检测模块
# 功能: 深度检测Nginx连接数、请求统计、性能指标、配置详情等
# 作者: Claude Code
# 日期: 2026-05-09
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# Nginx容器名称
NGINX_CONTAINER="${CONTAINERS[nginx]}"
# ==================== 辅助函数 ====================
# 在Nginx容器中执行命令
nginx_exec() {
docker exec "$NGINX_CONTAINER" $@ 2>/dev/null
}
# ==================== 深度检测函数 ====================
# 检测Nginx版本详情
check_nginx_version_detail() {
local nginx_version
# 方法1: 直接在容器内执行nginx -v
nginx_version=$(nginx_exec sh -c "nginx -v 2>&1" | grep "nginx version" | sed 's/.*nginx\///' | sed 's/ $//')
# 方法2: 如果方法1失败,尝试使用which找到nginx路径
if [ -z "$nginx_version" ]; then
local nginx_path
nginx_path=$(nginx_exec which nginx 2>/dev/null)
if [ -n "$nginx_path" ]; then
nginx_version=$(nginx_exec "$nginx_path" -v 2>&1 | grep "nginx version" | sed 's/.*nginx\///' | sed 's/ $//')
fi
fi
# 方法3: 如果仍然失败,从容器标签获取
if [ -z "$nginx_version" ]; then
local image_version
image_version=$(docker inspect --format='{{.Config.Image}}' "$NGINX_CONTAINER" 2>/dev/null | grep -oE ':[0-9.]+' | sed 's/://')
if [ -n "$image_version" ]; then
nginx_version="$image_version(镜像)"
fi
fi
if [ -n "$nginx_version" ]; then
output_result "NGINX_VERSION" "$nginx_version"
else
output_result "NGINX_VERSION" "未知"
fi
}
# 检测Nginx连接统计
check_nginx_connections() {
# 从状态页面获取连接信息
local status_data
status_data=$(nginx_exec curl -s localhost/nginx_status 2>/dev/null)
if [ -n "$status_data" ]; then
# 解析active connections
local active_conn
active_conn=$(echo "$status_data" | grep "Active connections" | awk '{print $3}' || echo "0")
output_result "NGINX_ACTIVE_CONNECTIONS" "$active_conn"
# 解析accepted connections
local accepted_conn
accepted_conn=$(echo "$status_data" | awk 'NR==3 {print $1}' || echo "0")
output_result "NGINX_ACCEPTED_CONNECTIONS" "$accepted_conn"
# 解析handled connections
local handled_conn
handled_conn=$(echo "$status_data" | awk 'NR==3 {print $2}' || echo "0")
output_result "NGINX_HANDLED_CONNECTIONS" "$handled_conn"
# 解析请求数
local requests
requests=$(echo "$status_data" | awk 'NR==3 {print $3}' || echo "0")
output_result "NGINX_REQUESTS_TOTAL" "$requests"
fi
}
# 检测Nginx进程状态
check_nginx_process() {
local nginx_pid
nginx_pid=$(nginx_exec cat /var/run/nginx.pid 2>/dev/null)
if [ -n "$nginx_pid" ]; then
output_result "NGINX_PID" "$nginx_pid"
# 获取进程数(master + worker)- 使用多种方法
local process_count
# 方法1: 使用pgrep查找nginx进程
process_count=$(nginx_exec pgrep -f nginx 2>/dev/null | wc -l)
# 方法2: 如果pgrep失败,使用ps aux
if [ "$process_count" -eq 0 ]; then
process_count=$(nginx_exec ps aux | grep -v grep | grep -c "[n]ginx" || echo "0")
fi
# 方法3: 如果仍然为0,使用PID查找
if [ "$process_count" -eq 0 ]; then
process_count=$(nginx_exec ps -p "$nginx_pid" -o pid= 2>/dev/null | wc -l)
fi
if [ "$process_count" -gt 0 ]; then
output_result "NGINX_PROCESSES" "$process_count"
else
output_result "NGINX_PROCESSES" "未知"
fi
else
output_result "NGINX_PID" "未找到"
output_result "NGINX_PROCESSES" "未知"
fi
}
# 检测Nginx监听端口
check_nginx_listening() {
local listening_ports
listening_ports=$(nginx_exec netstat -tlnp 2>/dev/null | grep nginx | awk '{print $4}' | grep -oE '[0-9]+$' | sort -u | tr '\n' ',' | sed 's/,$//')
if [ -n "$listening_ports" ]; then
output_result "NGINX_LISTENING_PORTS" "$listening_ports"
fi
}
# 检测Nginx配置详情
check_nginx_config_detail() {
# 测试配置文件
local config_test
config_test=$(nginx_exec nginx -t 2>&1)
if echo "$config_test" | grep -q "successful"; then
output_result "NGINX_CONFIG_STATUS" "正常"
output_result "NGINX_CONFIG_LEVEL" "正常"
else
output_result "NGINX_CONFIG_STATUS" "异常"
output_result "NGINX_CONFIG_LEVEL" "严重"
# 获取错误信息
local error_msg
error_msg=$(echo "$config_test" | grep "error" | head -1)
[ -n "$error_msg" ] && output_result "NGINX_CONFIG_ERROR" "$error_msg"
fi
}
# 检测Nginx worker进程配置
check_nginx_worker_config() {
# 获取worker进程数
local worker_processes
worker_processes=$(nginx_exec nginx -T 2>/dev/null | grep "worker_processes" | grep -v "#" | awk '{print $2}' | head -1)
if [ -n "$worker_processes" ]; then
output_result "NGINX_WORKER_PROCESSES" "$worker_processes"
fi
# 获取worker连接数
local worker_connections
worker_connections=$(nginx_exec nginx -T 2>/dev/null | grep "worker_connections" | grep -v "#" | awk '{print $2}' | head -1)
if [ -n "$worker_connections" ]; then
output_result "NGINX_WORKER_CONNECTIONS" "$worker_connections"
fi
}
# 检测Nginx日志文件
check_nginx_logs() {
# 检查access log大小
local access_log_size
access_log_size=$(nginx_exec ls -lh /var/log/nginx/access.log 2>/dev/null | awk '{print $5}')
if [ -n "$access_log_size" ]; then
output_result "NGINX_ACCESS_LOG_SIZE" "$access_log_size"
fi
# 检查error log大小
local error_log_size
error_log_size=$(nginx_exec ls -lh /var/log/nginx/error.log 2>/dev/null | awk '{print $5}')
if [ -n "$error_log_size" ]; then
output_result "NGINX_ERROR_LOG_SIZE" "$error_log_size"
fi
# 统计最近的错误数量(最近1小时)
local recent_errors
recent_errors=$(nginx_exec tail -100 /var/log/nginx/error.log 2>/dev/null | grep -c "error" || echo "0")
output_result "NGINX_RECENT_ERRORS" "$recent_errors"
}
# 检测Nginx SSL证书状态
check_nginx_ssl() {
# 检查是否启用了SSL
local ssl_enabled
ssl_enabled=$(nginx_exec nginx -T 2>/dev/null | grep -c "listen.*ssl" || echo "0")
if [ "$ssl_enabled" -gt 0 ]; then
output_result "NGINX_SSL_ENABLED" "是"
# 获取SSL证书过期时间(如果有443端口)
local cert_expiry
cert_expiry=$(nginx_exec openssl x509 -in /etc/nginx/ssl/nginx.crt -noout -enddate 2>/dev/null | cut -d= -f2)
if [ -n "$cert_expiry" ]; then
output_result "NGINX_SSL_CERT_EXPIRY" "$cert_expiry"
fi
else
output_result "NGINX_SSL_ENABLED" "否"
fi
}
# 检测Nginx缓存状态
check_nginx_cache() {
# 检查是否配置了缓存
local cache_enabled
cache_enabled=$(nginx_exec nginx -T 2>/dev/null | grep -c "proxy_cache_path" || echo "0")
if [ "$cache_enabled" -gt 0 ]; then
output_result "NGINX_CACHE_ENABLED" "是"
# 获取缓存路径
local cache_path
cache_path=$(nginx_exec nginx -T 2>/dev/null | grep "proxy_cache_path" | grep -v "#" | awk '{print $2}' | head -1)
if [ -n "$cache_path" ]; then
output_result "NGINX_CACHE_PATH" "$cache_path"
fi
else
output_result "NGINX_CACHE_ENABLED" "否"
fi
}
# 检测Nginx upstream状态
check_nginx_upstream() {
# 检查upstream配置
local upstream_count
upstream_count=$(nginx_exec nginx -T 2>/dev/null | grep -c "upstream" || echo "0")
if [ "$upstream_count" -gt 0 ]; then
output_result "NGINX_UPSTREAM_COUNT" "$upstream_count"
# 获取upstream名称列表
local upstreams
upstreams=$(nginx_exec nginx -T 2>/dev/null | grep "upstream" | grep -v "#" | awk '{print $2}' | tr '\n' ',' | sed 's/,$//')
[ -n "$upstreams" ] && output_result "NGINX_UPSTREAMS" "$upstreams"
else
output_result "NGINX_UPSTREAM_COUNT" "0"
fi
}
# 检测Nginx server块配置
check_nginx_server_blocks() {
# 统计server块数量
local server_count
server_count=$(nginx_exec nginx -T 2>/dev/null | grep -c "server_name" || echo "0")
if [ -n "$server_count" ]; then
output_result "NGINX_SERVER_BLOCKS" "$server_count"
fi
}
# 检测Nginx运行时间
check_nginx_uptime() {
local nginx_pid
nginx_pid=$(nginx_exec cat /var/run/nginx.pid 2>/dev/null)
if [ -n "$nginx_pid" ]; then
# 获取进程运行时间
local uptime_seconds
uptime_seconds=$(nginx_exec ps -p "$nginx_pid" -o etimes= 2>/dev/null | tr -d ' ')
if [ -n "$uptime_seconds" ] && [ "$uptime_seconds" -gt 0 ]; then
local uptime_days=$((uptime_seconds / 86400))
output_result "NGINX_UPTIME_DAYS" "$uptime_days"
fi
fi
}
# ==================== 主检测流程 ====================
main() {
log_info "开始Nginx深度检测..."
# 检查容器状态
if ! docker ps --format "{{.Names}}" | grep -q "^${NGINX_CONTAINER}$"; then
log_warn "Nginx容器未运行"
output_result "NGINX_CONTAINER_STATUS" "未运行"
return 1
fi
output_result "NGINX_CONTAINER_STATUS" "运行中"
# 执行各项深度检测
check_nginx_version_detail 2>/dev/null || true
check_nginx_connections 2>/dev/null || true
check_nginx_process 2>/dev/null || true
check_nginx_listening 2>/dev/null || true
check_nginx_config_detail 2>/dev/null || true
check_nginx_worker_config 2>/dev/null || true
check_nginx_logs 2>/dev/null || true
check_nginx_ssl 2>/dev/null || true
check_nginx_cache 2>/dev/null || true
check_nginx_upstream 2>/dev/null || true
check_nginx_server_blocks 2>/dev/null || true
check_nginx_uptime 2>/dev/null || true
log_info "Nginx深度检测完成"
}
# 执行主函数
main
#!/bin/bash
################################################################################
# Nacos深度检测模块
# 功能: 深度检测Nacos服务实例、配置数量、健康状态、Raft集群等
# 作者: Claude Code
# 日期: 2026-05-09
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# Nacos容器名称
NACOS_CONTAINER="${CONTAINERS[nacos]}"
# ==================== 辅助函数 ====================
# 在Nacos容器中执行命令
nacos_exec() {
docker exec "$NACOS_CONTAINER" $@ 2>/dev/null
}
# 调用Nacos API
nacos_api() {
local endpoint=$1
nacos_exec curl -s "http://localhost:8848/nacos/$endpoint" 2>/dev/null
}
# ==================== 深度检测函数 ====================
# 检测Nacos版本
check_nacos_version() {
local nacos_version
nacos_version=$(nacos_exec cat /home/nacos/version.txt 2>/dev/null | head -1)
if [ -n "$nacos_version" ]; then
output_result "NACOS_VERSION" "$nacos_version"
else
# 尝试从启动日志获取版本
nacos_version=$(nacos_exec cat /home/nacos/logs/start.out 2>/dev/null | grep "Nacos" | head -1)
[ -n "$nacos_version" ] && output_result "NACOS_VERSION" "$nacos_version"
fi
}
# 检测Nacos服务实例统计
check_nacos_services() {
# 获取命名空间列表
local namespaces
namespaces=$(nacos_api "v1/console/namespaces" 2>/dev/null)
if [ -n "$namespaces" ]; then
# 统计命名空间数量
local namespace_count
namespace_count=$(echo "$namespaces" | grep -o "namespaceId" | wc -l)
output_result "NACOS_NAMESPACES" "$namespace_count"
fi
# 获取服务列表(默认命名空间)
local services
services=$(nacos_api "v1/ns/service/list?pageNo=1&pageSize=100&namespaceId=" 2>/dev/null)
if [ -n "$services" ]; then
# 提取服务总数
local service_count
service_count=$(echo "$services" | grep -o '"count":[0-9]*' | cut -d: -f2 | head -1)
[ -n "$service_count" ] && output_result "NACOS_SERVICES_COUNT" "$service_count"
fi
}
# 检测Nacos实例统计
check_nacos_instances() {
# 获取实例列表(简化版)
local instances
instances=$(nacos_api "v1/ns/instance/list?pageNo=1&pageSize=100&namespaceId=" 2>/dev/null)
if [ -n "$instances" ]; then
# 统计实例总数
local instance_count
instance_count=$(echo "$instances" | grep -o '"count":[0-9]*' | cut -d: -f2 | head -1)
[ -n "$instance_count" ] && output_result "NACOS_INSTANCES_COUNT" "$instance_count"
# 统计健康实例数
local healthy_count
healthy_count=$(echo "$instances" | grep -o '"healthyInstanceCount":[0-9]*' | cut -d: -f2 | head -1)
[ -n "$healthy_count" ] && output_result "NACOS_HEALTHY_INSTANCES" "$healthy_count"
# 计算健康率
if [ -n "$instance_count" ] && [ -n "$healthy_count" ] && [ "$instance_count" -gt 0 ]; then
local health_rate=$((healthy_count * 100 / instance_count))
output_result "NACOS_HEALTH_RATE" "${health_rate}%"
fi
fi
}
# 检测Nacos配置统计
check_nacos_configs() {
# 获取配置数量
local configs
configs=$(nacos_api "v1/cs/configs?dataId=&group=&pageNo=1&pageSize=10&tenant=" 2>/dev/null)
if [ -n "$configs" ]; then
# 提取配置总数
local config_count
config_count=$(echo "$configs" | grep -o '"total":[0-9]*' | cut -d: -f2 | head -1)
[ -n "$config_count" ] && output_result "NACOS_CONFIGS_COUNT" "$config_count"
fi
}
# 检测Nacos连接者统计
check_nacos_connections() {
# 获取Grpc连接数
local grpc_conn
grpc_conn=$(nacos_api "v1/ns/raft/conn" 2>/dev/null)
if [ -n "$grpc_conn" ]; then
output_result "NACOS_GRPC_CONNECTIONS" "$grpc_conn"
fi
}
# 检测Nacos Raft集群状态
check_nacos_raft() {
# 获取Raft集群信息
local raft_info
raft_info=$(nacos_api "v1/ns/raft/state" 2>/dev/null)
if [ -n "$raft_info" ]; then
# 检查是否为集群模式
if echo "$raft_info" | grep -q "leader"; then
output_result "NACOS_RAFT_MODE" "是"
# 获取角色(leader/follower)
local role
role=$(echo "$raft_info" | grep -o '"role":"[^"]*"' | cut -d'"' -f4 | head -1)
[ -n "$role" ] && output_result "NACOS_RAFT_ROLE" "$role"
# 获取节点数
local node_count
node_count=$(echo "$raft_info" | grep -c "peer" || echo "0")
[ "$node_count" -gt 0 ] && output_result "NACOS_RAFT_NODES" "$node_count"
else
output_result "NACOS_RAFT_MODE" "否(单机模式)"
fi
fi
}
# 检测Nacos健康状态详情
check_nacos_health_detail() {
# 方法1: 使用Console健康检查端点
local health_status
health_status=$(nacos_api "v1/console/health/readiness" 2>/dev/null)
# 方法2: 如果方法1失败,使用Actuator端点
if [ -z "$health_status" ]; then
health_status=$(nacos_api "actuator/health" 2>/dev/null)
fi
# 方法3: 检查HTTP状态码
if [ -z "$health_status" ]; then
health_status=$(nacos_exec curl -s -o /dev/null -w "%{http_code}" "http://localhost:8848/nacos/" 2>/dev/null)
if [ "$health_status" = "200" ] || [ "$health_status" = "302" ]; then
health_status="UP"
fi
fi
if [ -n "$health_status" ]; then
if echo "$health_status" | grep -qi "UP\|200\|302\|healthy"; then
output_result "NACOS_HEALTH_STATUS" "UP"
output_result "NACOS_HEALTH_LEVEL" "正常"
else
output_result "NACOS_HEALTH_STATUS" "DOWN"
output_result "NACOS_HEALTH_LEVEL" "严重"
fi
else
output_result "NACOS_HEALTH_STATUS" "DOWN"
output_result "NACOS_HEALTH_LEVEL" "严重"
fi
}
# 检测Nacos内存使用详情
check_nacos_memory_detail() {
# 获取JVM内存信息
local jvm_mem
jvm_mem=$(nacos_exec jps -l 2>/dev/null | grep nacos | awk '{print $1}')
if [ -n "$jvm_mem" ]; then
# 获取堆内存使用
local heap_info
heap_info=$(nacos_exec jstat -gc "$jvm_mem" 2>/dev/null | tail -1)
if [ -n "$heap_info" ]; then
# 解析堆内存使用率
local heap_used
heap_used=$(echo "$heap_info" | awk '{print $4}' | tr -d ' ')
local heap_capacity
heap_capacity=$(echo "$heap_info" | awk '{print $6}' | tr -d ' ')
if [ -n "$heap_used" ] && [ -n "$heap_capacity" ] && [ "$heap_capacity" -gt 0 ]; then
local heap_usage=$((heap_used * 100 / heap_capacity))
output_result "NACOS_HEAP_USAGE" "${heap_usage}%"
fi
fi
fi
}
# 检测Nacos任务线程
check_nacos_threads() {
local jvm_mem
jvm_mem=$(nacos_exec jps -l 2>/dev/null | grep nacos | awk '{print $1}')
if [ -n "$jvm_mem" ]; then
# 获取线程数
local thread_count
thread_count=$(nacos_exec jstack "$jvm_mem" 2>/dev/null | grep -c "java.lang.Thread.State" || echo "0")
[ -n "$thread_count" ] && output_result "NACOS_THREAD_COUNT" "$thread_count"
fi
}
# 检测Nacos端口监听
check_nacos_ports() {
# 检查主端口8848
local main_port
main_port=$(nacos_exec netstat -tlnp 2>/dev/null | grep -c ":8848 " || echo "0")
if [ "$main_port" -gt 0 ]; then
output_result "NACOS_MAIN_PORT" "监听中"
else
output_result "NACOS_MAIN_PORT" "未监听"
fi
# 检查Raft端口9848(如果启用)
local raft_port
raft_port=$(nacos_exec netstat -tlnp 2>/dev/null | grep -c ":9848 " || echo "0")
if [ "$raft_port" -gt 0 ]; then
output_result "NACOS_RAFT_PORT" "监听中"
fi
}
# 检测Nacos日志
check_nacos_logs() {
# 检查日志目录大小
local log_size
log_size=$(nacos_exec du -sh /home/nacos/logs 2>/dev/null | awk '{print $1}')
if [ -n "$log_size" ]; then
output_result "NACOS_LOG_SIZE" "$log_size"
fi
# 统计最近的错误数量
local error_count
error_count=$(nacos_exec tail -100 /home/nacos/logs/nacos.log 2>/dev/null | grep -c "ERROR" || echo "0")
output_result "NACOS_RECENT_ERRORS" "$error_count"
}
# 检测Nacos运行时间
check_nacos_uptime() {
local jvm_mem
jvm_mem=$(nacos_exec jps -l 2>/dev/null | grep nacos | awk '{print $1}')
if [ -n "$jvm_mem" ]; then
# 获取JVM运行时间(毫秒)
local uptime_ms
uptime_ms=$(nacos_exec jstat -gc "$jvm_mem" 2>/dev/null | tail -1 | awk '{print $14}' | tr -d ' ')
if [ -n "$uptime_ms" ] && [ "$uptime_ms" -gt 0 ]; then
local uptime_days=$((uptime_ms / 1000 / 86400))
output_result "NACOS_UPTIME_DAYS" "$uptime_days"
fi
fi
}
# ==================== 主检测流程 ====================
main() {
log_info "开始Nacos深度检测..."
# 检查容器状态
if ! docker ps --format "{{.Names}}" | grep -q "^${NACOS_CONTAINER}$"; then
log_warn "Nacos容器未运行"
output_result "NACOS_CONTAINER_STATUS" "未运行"
return 1
fi
output_result "NACOS_CONTAINER_STATUS" "运行中"
# 执行各项深度检测
check_nacos_version 2>/dev/null || true
check_nacos_services 2>/dev/null || true
check_nacos_instances 2>/dev/null || true
check_nacos_configs 2>/dev/null || true
check_nacos_connections 2>/dev/null || true
check_nacos_raft 2>/dev/null || true
check_nacos_health_detail 2>/dev/null || true
check_nacos_memory_detail 2>/dev/null || true
check_nacos_threads 2>/dev/null || true
check_nacos_ports 2>/dev/null || true
check_nacos_logs 2>/dev/null || true
check_nacos_uptime 2>/dev/null || true
log_info "Nacos深度检测完成"
}
# 执行主函数
main
#!/bin/bash
################################################################################
# FastDFS深度检测模块
# 功能: 深度检测FastDFS存储状态、同步状态、文件统计、Tracker/Storage详情等
# 作者: Claude Code
# 日期: 2026-05-09
################################################################################
# 获取脚本所在目录并加载依赖
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="/tmp/check_modules"
# 加载配置文件和通用函数库
if [ -f "$LIB_DIR/config.sh" ]; then
source "$LIB_DIR/config.sh"
else
echo "ERROR: 配置文件不存在: $LIB_DIR/config.sh"
exit 1
fi
if [ -f "$LIB_DIR/common.sh" ]; then
source "$LIB_DIR/common.sh"
else
echo "ERROR: 通用函数库不存在: $LIB_DIR/common.sh"
exit 1
fi
# FastDFS配置路径
FASTDFS_BASE_DIR="/fastdfs"
STORAGE_CONF="${FASTDFS_BASE_DIR}/storage/conf/storage.conf"
TRACKER_CONF="${FASTDFS_BASE_DIR}/tracker/conf/tracker.conf"
# ==================== 辅助函数 ====================
# 检查FastDFS命令是否可用
check_fdfs_command() {
if [ -f "${FASTDFS_BASE_DIR}/bin/fdfs_monitor" ]; then
return 0
fi
return 1
}
# 获取Storage配置值
get_storage_config() {
local key=$1
if [ -f "$STORAGE_CONF" ]; then
grep "^${key}=" "$STORAGE_CONF" | cut -d= -f2 | tr -d ' '
fi
}
# 获取Tracker配置值
get_tracker_config() {
local key=$1
if [ -f "$TRACKER_CONF" ]; then
grep "^${key}=" "$TRACKER_CONF" | cut -d= -f2 | tr -d ' '
fi
}
# ==================== 深度检测函数 ====================
# 检测FastDFS版本
check_fastdfs_version() {
local version
# 方法1: 从VERSION文件获取
version=$(cat "${FASTDFS_BASE_DIR}/VERSION" 2>/dev/null)
# 方法2: 从storage进程命令行获取
if [ -z "$version" ]; then
local cmd_line
cmd_line=$(ps aux | grep -v grep | grep "fdfs_storaged" | head -1)
if [ -n "$cmd_line" ]; then
# 尝试从版本号文件获取
local storage_bin="${FASTDFS_BASE_DIR}/storage/bin"
if [ -f "${storage_bin}/fdfs_storaged" ]; then
version=$("${storage_bin}/fdfs_storaged" --version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
fi
fi
fi
# 方法3: 从配置文件获取版本信息
if [ -z "$version" ]; then
if [ -f "$STORAGE_CONF" ]; then
# 检查配置文件中是否有版本信息
version=$(grep "version" "$STORAGE_CONF" 2>/dev/null | head -1)
fi
fi
if [ -n "$version" ]; then
output_result "FASTDFS_VERSION" "$version"
else
output_result "FASTDFS_VERSION" "未知(需要手动查看)"
fi
}
# 检测Tracker状态详情
check_tracker_status() {
# 检查Tracker进程
local tracker_count
tracker_count=$(ps aux | grep -v grep | grep -c "fdfs_trackerd" || echo "0")
if [ "$tracker_count" -gt 0 ]; then
output_result "FASTDFS_TRACKER_STATUS" "运行中"
output_result "FASTDFS_TRACKER_PROCESSES" "$tracker_count"
else
output_result "FASTDFS_TRACKER_STATUS" "未运行"
fi
# 检查Tracker端口
if command -v ss &> /dev/null; then
local tracker_port
tracker_port=$(ss -tlnp 2>/dev/null | grep -c ":22122 " || echo "0")
output_result "FASTDFS_TRACKER_PORT" "$tracker_port"
fi
# 获取Tracker配置
local tracker_port_config
tracker_port_config=$(get_tracker_config "port")
[ -n "$tracker_port_config" ] && output_result "FASTDFS_TRACKER_PORT_CONFIG" "$tracker_port_config"
}
# 检测Storage状态详情
check_storage_status() {
# 检查Storage进程
local storage_count
storage_count=$(ps aux | grep -v grep | grep -c "fdfs_storaged" || echo "0")
if [ "$storage_count" -gt 0 ]; then
output_result "FASTDFS_STORAGE_STATUS" "运行中"
output_result "FASTDFS_STORAGE_PROCESSES" "$storage_count"
else
output_result "FASTDFS_STORAGE_STATUS" "未运行"
fi
# 检查Storage端口
if command -v ss &> /dev/null; then
local storage_port
storage_port=$(ss -tlnp 2>/dev/null | grep -c ":23000 " || echo "0")
output_result "FASTDFS_STORAGE_PORT" "$storage_port"
fi
# 获取Storage配置
local storage_port_config
storage_port_config=$(get_storage_config "port")
[ -n "$storage_port_config" ] && output_result "FASTDFS_STORAGE_PORT_CONFIG" "$storage_port_config"
}
# 检测Storage存储状态
check_storage_storage() {
# 获取存储路径
local store_path
store_path=$(get_storage_config "store_path_count")
if [ -n "$store_path" ]; then
output_result "FASTDFS_STORE_PATH_COUNT" "$store_path"
# 获取第一个存储路径
local base_path
base_path=$(get_storage_config "base_path0")
[ -n "$base_path" ] && output_result "FASTDFS_BASE_PATH" "$base_path"
# 检查存储路径使用率
if [ -n "$base_path" ] && [ -d "$base_path" ]; then
local disk_usage
disk_usage=$(df -h "$base_path" | tail -1 | awk '{print $5}')
[ -n "$disk_usage" ] && output_result "FASTDFS_DISK_USAGE" "$disk_usage"
fi
fi
}
# 检测Storage同步状态
check_storage_sync() {
# 检查是否配置了同步
local sync_src_dir
sync_src_dir=$(get_storage_config "sync_src_dir")
if [ -n "$sync_src_dir" ]; then
output_result "FASTDFS_SYNC_ENABLED" "是"
output_result "FASTDFS_SYNC_SOURCE" "$sync_src_dir"
else
output_result "FASTDFS_SYNC_ENABLED" "否"
fi
}
# 检测Storage文件统计
check_storage_files() {
# 获取存储路径
local base_path
base_path=$(get_storage_config "base_path0")
if [ -n "$base_path" ] && [ -d "$base_path/data" ]; then
# 统计文件数量
local file_count
file_count=$(find "$base_path/data" -type f 2>/dev/null | wc -l)
output_result "FASTDFS_FILE_COUNT" "$file_count"
# 统计目录数量
local dir_count
dir_count=$(find "$base_path/data" -type d 2>/dev/null | wc -l)
output_result "FASTDFS_DIR_COUNT" "$dir_count"
# 计算总大小
local total_size
total_size=$(du -sh "$base_path/data" 2>/dev/null | awk '{print $1}')
[ -n "$total_size" ] && output_result "FASTDFS_TOTAL_SIZE" "$total_size"
fi
}
# 检测Trunk状态
check_trunk_status() {
# 检查是否启用了Trunk
local trunk_enabled
trunk_enabled=$(get_storage_config "use_trunk_file")
if [ "$trunk_enabled" = "true" ] || [ "$trunk_enabled" = "1" ]; then
output_result "FASTDFS_TRUNK_ENABLED" "是"
# 获取Trunk文件大小
local trunk_size
trunk_size=$(get_storage_config "trunk_file_size")
[ -n "$trunk_size" ] && output_result "FASTDFS_TRUNK_SIZE" "$trunk_size"
else
output_result "FASTDFS_TRUNK_ENABLED" "否"
fi
}
# 检测Storage连接状态
check_storage_connections() {
# 使用fdfs_monitor检查连接到tracker的状态
if check_fdfs_command; then
local tracker_status
tracker_status=$("${FASTDFS_BASE_DIR}/bin/fdfs_monitor" "${FASTDFS_BASE_DIR}/tracker/conf/client.conf" 2>/dev/null | grep -A 5 "tracker")
if [ -n "$tracker_status" ]; then
# 检查连接状态
if echo "$tracker_status" | grep -q "ACTIVE"; then
output_result "FASTDFS_TRACKER_CONNECTION" "ACTIVE"
else
output_result "FASTDFS_TRACKER_CONNECTION" "INACTIVE"
fi
fi
fi
}
# 检测FastDFS日志
check_fastdfs_logs() {
# 检查日志目录
local log_base
log_base=$(get_storage_config "log_base")
if [ -n "$log_base" ] && [ -d "$log_base" ]; then
# 统计日志大小
local log_size
log_size=$(du -sh "$log_base" 2>/dev/null | awk '{print $1}')
[ -n "$log_size" ] && output_result "FASTDFS_LOG_SIZE" "$log_size"
# 检查最近的错误
local error_count
error_count=$(tail -100 "${log_base}/storaged.log" 2>/dev/null | grep -c "ERROR" || echo "0")
output_result "FASTDFS_RECENT_ERRORS" "$error_count"
fi
}
# 检测HTTP服务状态
check_http_status() {
# 检查是否启用了HTTP服务
local http_enabled
http_enabled=$(get_storage_config "http.disabled")
if [ "$http_enabled" != "true" ] && [ "$http_enabled" != "1" ]; then
output_result "FASTDFS_HTTP_ENABLED" "是"
# 获取HTTP端口
local http_port
http_port=$(get_storage_config "http.server_port")
[ -n "$http_port" ] && output_result "FASTDFS_HTTP_PORT" "$http_port"
# 检查HTTP端口监听
if command -v ss &> /dev/null && [ -n "$http_port" ]; then
local http_listening
http_listening=$(ss -tlnp 2>/dev/null | grep -c ":${http_port} " || echo "0")
output_result "FASTDFS_HTTP_STATUS" "$http_listening"
fi
else
output_result "FASTDFS_HTTP_ENABLED" "否"
fi
}
# 检测Storage运行时间
check_storage_uptime() {
# 获取storage进程启动时间
local storage_pid
storage_pid=$(pgrep -o "fdfs_storaged" 2>/dev/null)
if [ -n "$storage_pid" ]; then
local uptime_seconds
uptime_seconds=$(ps -p "$storage_pid" -o etimes= 2>/dev/null | tr -d ' ')
if [ -n "$uptime_seconds" ] && [ "$uptime_seconds" -gt 0 ]; then
local uptime_days=$((uptime_seconds / 86400))
output_result "FASTDFS_STORAGE_UPTIME_DAYS" "$uptime_days"
fi
fi
}
# 检测组名配置
check_group_config() {
# 获取组名
local group_name
group_name=$(get_storage_config "group_name")
if [ -n "$group_name" ]; then
output_result "FASTDFS_GROUP_NAME" "$group_name"
fi
}
# ==================== 主检测流程 ====================
main() {
log_info "开始FastDFS深度检测..."
# 执行各项深度检测
check_fastdfs_version 2>/dev/null || true
check_tracker_status 2>/dev/null || true
check_storage_status 2>/dev/null || true
check_storage_storage 2>/dev/null || true
check_storage_sync 2>/dev/null || true
check_storage_files 2>/dev/null || true
check_trunk_status 2>/dev/null || true
check_storage_connections 2>/dev/null || true
check_fastdfs_logs 2>/dev/null || true
check_http_status 2>/dev/null || true
check_storage_uptime 2>/dev/null || true
check_group_config 2>/dev/null || true
log_info "FastDFS深度检测完成"
}
# 执行主函数
main
...@@ -251,6 +251,7 @@ calculate_change() { ...@@ -251,6 +251,7 @@ calculate_change() {
trend = "持平" trend = "持平"
if (type == "percent") { if (type == "percent") {
# 百分比类型:直接显示变化值
abs_change = (change < 0 ? -change : change) abs_change = (change < 0 ? -change : change)
if (abs_change > 5) { if (abs_change > 5) {
if (change > 0) { if (change > 0) {
...@@ -261,12 +262,22 @@ calculate_change() { ...@@ -261,12 +262,22 @@ calculate_change() {
} }
printf "%.1f%%|%s", change, trend printf "%.1f%%|%s", change, trend
} else { } else {
# 计数类型:计算相对变化的百分比
if (last != 0) {
percent_change = (change / last) * 100
} else {
percent_change = 0
}
abs_percent = (percent_change < 0 ? -percent_change : percent_change)
if (abs_percent > 5) {
if (change > 0) { if (change > 0) {
trend = "🔴 增加" trend = "🔴 增加"
} else if (change < 0) { } else if (change < 0) {
trend = "🟢 减少" trend = "🟢 减少"
} }
printf "%.0f%%|%s", change, trend }
printf "%.1f%%|%s", percent_change, trend
} }
}') }')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论