提交 d95380cf authored 作者: 陈泽健's avatar 陈泽健

fix(edit):调整AI服务器监测需求文档,完善工作流程。

上级 6d5f8905
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
自动处理部署脚本的交互式提示
"""
import sys
import os
import time
import subprocess
from datetime import datetime
def run_deployment_auto():
"""使用plink和yes命令自动运行部署"""
print("=" * 60)
print("自动处理交互式提示的部署脚本")
print("=" * 60)
print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
plink_path = r"E:\GithubData\ubains-module-test\AuxiliaryTool\ScriptTool\RemoteDeploy\plink.exe"
# 首先检查并终止现有进程
print("1. 检查并清理现有进程...")
check_cmd = [plink_path, "-pw", "Ubains@123", "-P", "22",
"root@192.168.5.52", "pkill -9 -f new_auto.sh; sleep 2"]
result = subprocess.run(check_cmd, capture_output=True, text=True,
creationflags=subprocess.CREATE_NO_WINDOW)
print(" 已清理现有进程")
# 创建部署命令脚本
print("2. 准备部署命令...")
# 使用yes命令自动响应y/n提示,然后进入部署目录并运行脚本
deploy_commands = """
cd /data/offline_auto_unifiedPlatform
# 使用yes命令自动响应所有的y/n提示
yes y | ./new_auto.sh
"""
# 运行部署
print("3. 开始部署(使用yes命令自动响应所有提示)...")
print(" 这将需要大约40分钟时间...")
print()
deploy_cmd = [plink_path, "-pw", "Ubains@123", "-P", "22",
"root@192.168.5.52", deploy_commands]
# 启动部署进程
process = subprocess.Popen(deploy_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
creationflags=subprocess.CREATE_NO_WINDOW)
# 监控部署过程
start_time = time.time()
max_time = 2400 # 40分钟
last_output = ""
output_count = 0
print("开始监控部署过程...")
print("-" * 60)
while time.time() - start_time < max_time:
try:
# 检查进程状态
if process.poll() is not None:
print("\n[完成] 部署进程已结束")
break
# 读取输出
try:
# 非阻塞读取
import msvcrt
if msvcrt.kbhit():
# 用户按了键,可以提前退出
if msvcrt.getch() == b'q':
print("\n用户取消部署")
process.terminate()
break
except:
pass
# 每60秒输出一次进度
elapsed = int(time.time() - start_time)
if elapsed % 60 == 0 and elapsed > 0 and elapsed != output_count:
output_count = elapsed
print(f"[进度] 部署进行中... 已用时: {int(elapsed/60)}分钟")
# 检查容器状态
check_cmd = [plink_path, "-pw", "Ubains@123", "-P", "22",
"root@192.168.5.52", "docker ps | wc -l"]
try:
check_result = subprocess.run(check_cmd, capture_output=True,
text=True, timeout=10,
creationflags=subprocess.CREATE_NO_WINDOW)
container_count = check_result.stdout.strip()
if container_count and int(container_count) > 1:
print(f"[容器] 已启动 {int(container_count)-1} 个容器")
except:
pass
time.sleep(10)
except KeyboardInterrupt:
print("\n用户中断部署")
process.terminate()
break
except Exception as e:
print(f"[WARN] 监控出错: {str(e)}")
time.sleep(10)
# 获取最终输出
print()
print("=" * 60)
print("部署完成检查")
print("=" * 60)
# 检查容器状态
final_check = [plink_path, "-pw", "Ubains@123", "-P", "22",
"root@192.168.5.52",
"docker ps --format 'table {{.Names}}\t{{.Status}}'"]
try:
final_result = subprocess.run(final_check, capture_output=True,
text=True, timeout=30,
creationflags=subprocess.CREATE_NO_WINDOW)
print("\n最终容器状态:")
print(final_result.stdout)
container_count = final_result.stdout.strip().count('\n') - 1
print(f"\n运行中的容器数量: {container_count}")
if container_count >= 5:
print("\n[SUCCESS] 部署成功完成!")
print("\n后续步骤:")
print("1. 系统授权: https://192.168.5.52/#/LoginConfig")
print(" 账号: superadmin / Ubains@1357")
print(" 验证码: csba")
print("\n2. 创建管理员: 为'自动化'公司创建admin用户")
return 0
else:
print("\n[WARN] 部署可能未完全完成")
return 1
except Exception as e:
print(f"\n[ERROR] 最终检查失败: {str(e)}")
return 1
if __name__ == '__main__':
try:
sys.exit(run_deployment_auto())
except Exception as e:
print(f"[ERROR] 部署失败: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(1)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
自动处理whiptail菜单的部署脚本
使用paramiko的invoke_shell和自动响应
"""
import sys
import os
import time
import threading
import queue
# 添加当前目录到路径
sys.path.insert(0, os.path.dirname(__file__))
import paramiko
class DeploymentAutomation:
def __init__(self, host, username, password):
self.host = host
self.username = username
self.password = password
self.client = None
self.shell = None
self.output_queue = queue.Queue()
def log(self, message, level="INFO"):
"""日志输出"""
prefix = {"INFO": "[OK]", "ERROR": "[ERROR]", "WARN": "[WARN]"}
print(f"{prefix.get(level, '[INFO]')} {message}")
def connect(self):
"""连接SSH"""
try:
self.client = paramiko.SSHClient()
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
self.client.connect(self.host, username=self.username, password=self.password, timeout=30)
self.log("SSH连接成功")
return True
except Exception as e:
self.log(f"SSH连接失败: {str(e)}", "ERROR")
return False
def create_shell(self):
"""创建交互式shell"""
try:
self.shell = self.client.invoke_shell()
time.sleep(1)
self.log("交互式shell创建成功")
return True
except Exception as e:
self.log(f"Shell创建失败: {str(e)}", "ERROR")
return False
def read_output(self, timeout=5):
"""读取shell输出"""
try:
output = ""
start_time = time.time()
while time.time() - start_time < timeout:
if self.shell.recv_ready():
chunk = self.shell.recv(4096).decode('utf-8', errors='ignore')
output += chunk
if chunk:
start_time = time.time() # 重置超时
else:
time.sleep(0.1)
return output
except:
return ""
def send_command(self, command, wait_time=1):
"""发送命令"""
try:
self.shell.send(command + "\n")
time.sleep(wait_time)
except Exception as e:
self.log(f"发送命令失败: {str(e)}", "ERROR")
def run_deployment(self):
"""执行部署"""
try:
self.log("开始部署流程")
# 清空初始缓冲区
self.read_output(timeout=1)
# 切换到部署目录
self.log("切换到部署目录")
self.send_command("cd /data/offline_auto_unifiedPlatform", wait_time=2)
# 运行部署脚本
self.log("启动部署脚本 (new_auto.sh)")
self.send_command("./new_auto.sh", wait_time=3)
# 监控部署过程
self.log("监控部署过程,等待菜单出现...")
deployment_start = time.time()
max_time = 2400 # 40分钟
menu_handled = False
while time.time() - deployment_start < max_time:
# 读取输出
output = self.read_output(timeout=10)
if output:
# 检测whiptail菜单
if 'whiptail' in output or '选择系统' in output:
if not menu_handled:
self.log("检测到whiptail菜单,自动选择'全部系统'")
# 按空格键选中,然后按回车确认
self.shell.send(" ")
time.sleep(1)
self.shell.send("\n")
time.sleep(2)
menu_handled = True
# 检测其他提示
elif 'Press any key' in output or '按任意键' in output:
self.log("检测到按键提示,发送回车")
self.send_command("", wait_time=1)
# 检测部署完成
elif '部署完成' in output or '部署成功' in output:
self.log("部署完成!")
break
# 检测错误
elif 'error' in output.lower() and 'fatal' in output.lower():
self.log(f"检测到错误: {output[-200:]}", "WARN")
# 每60秒输出进度
elapsed = int(time.time() - deployment_start)
if elapsed % 60 == 0 and elapsed > 0:
self.log(f"部署进行中... 已用时: {int(elapsed/60)}分钟")
# 检查部署结果
self.log("检查部署结果")
self.send_command("docker ps --format 'table {{.Names}}\t{{.Status}}'", wait_time=3)
docker_output = self.read_output(timeout=5)
print("\n" + "=" * 50)
print("部署完成 - 容器状态:")
print("=" * 50)
print(docker_output)
container_count = docker_output.count('\n') - 1
print(f"\n运行中的容器数量: {container_count}")
if container_count >= 5:
self.log("部署成功完成!")
return True
else:
self.log("部署可能未完全完成", "WARN")
return False
except Exception as e:
self.log(f"部署过程出错: {str(e)}", "ERROR")
import traceback
traceback.print_exc()
return False
def close(self):
"""关闭连接"""
if self.shell:
self.shell.close()
if self.client:
self.client.close()
def main():
print("=" * 60)
print("自动化部署脚本 (带whiptail菜单自动处理)")
print("=" * 60)
deployment = DeploymentAutomation(
host='192.168.5.52',
username='root',
password='Ubains@123'
)
try:
if not deployment.connect():
return 1
if not deployment.create_shell():
return 1
success = deployment.run_deployment()
print("\n" + "=" * 60)
if success:
print("部署执行完成")
print("\n后续步骤:")
print("1. 系统授权: https://192.168.5.52/#/LoginConfig")
print(" 账号: superadmin / Ubains@1357")
print(" 验证码: csba")
print("\n2. 创建管理员: 为'自动化'公司创建admin用户")
else:
print("部署未完全完成,请检查日志")
print("=" * 60)
return 0 if success else 1
finally:
deployment.close()
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
受控部署脚本 - 分步骤执行部署
"""
import sys
import os
import time
# 添加当前目录到路径
sys.path.insert(0, os.path.dirname(__file__))
from auto_deployment_python import RemoteDeploymentAutomation
def main():
print("=" * 50)
print("受控自动化部署")
print("=" * 50)
config = {
'host': '192.168.5.52',
'username': 'root',
'password': 'Ubains@123',
'license_path': r'E:\自动化部署\X86-5.52\license.zip'
}
deployment = RemoteDeploymentAutomation(**config)
try:
# 步骤1: 连接SSH并准备环境
print("\n步骤1: 连接SSH并准备环境")
print("-" * 50)
if not deployment.connect_ssh():
print("[ERROR] SSH连接失败")
return 1
print("[OK] SSH连接成功")
if not deployment.prepare_deployment_environment():
print("[ERROR] 环境准备失败")
return 1
print("[OK] 环境准备完成")
# 步骤2: 启动部署脚本
print("\n步骤2: 启动部署脚本")
print("-" * 50)
print("注意: 此步骤需要手动选择'全部系统'选项")
print("部署预计需要40分钟时间...")
# 创建交互式会话
if not deployment.connect_ssh_interactive():
print("[ERROR] 交互式连接失败")
return 1
# 切换到部署目录
print("切换到部署目录...")
deployment.ssh_shell.send("cd /data/offline_auto_unifiedPlatform\n")
time.sleep(2)
# 清空缓冲区
try:
while deployment.ssh_shell.recv_ready():
deployment.ssh_shell.recv(1024)
except:
pass
# 启动部署脚本
print("启动部署脚本 (new_auto.sh)...")
deployment.ssh_shell.send("./new_auto.sh\n")
time.sleep(3)
# 监控部署过程
print("开始监控部署过程...")
print("如果出现菜单,请手动选择'全部系统'选项")
deployment_start = time.time()
max_deployment_time = 2400 # 40分钟
menu_detected = False
output_buffer = ""
while time.time() - deployment_start < max_deployment_time:
try:
if deployment.ssh_shell.recv_ready():
chunk = deployment.ssh_shell.recv(4096).decode('utf-8', errors='ignore')
output_buffer += chunk
# 检测菜单
if 'whiptail' in chunk or '选择系统' in chunk or '全部系统' in chunk:
if not menu_detected:
print("\n[检测到菜单] 尝试自动选择'全部系统'...")
deployment.ssh_shell.send("\n")
menu_detected = True
time.sleep(2)
# 检测部署开始
if '开始部署' in output_buffer or '正在部署' in output_buffer:
if not menu_detected:
print("\n[OK] 部署已开始")
menu_detected = True
# 检测部署完成
if '部署完成' in output_buffer or '部署成功' in output_buffer:
print("\n[OK] 部署完成!")
break
# 检测错误
if 'error' in chunk.lower() and 'fatal' in chunk.lower():
print(f"\n[ERROR] 检测到严重错误")
print(f"错误信息: {chunk[-200:]}")
# 每60秒输出进度
elapsed = int(time.time() - deployment_start)
if elapsed % 60 == 0 and elapsed > 0:
print(f"[进度] 部署进行中... 已用时: {int(elapsed/60)}分钟")
time.sleep(5)
except Exception as e:
print(f"[WARN] 监控时出错: {str(e)}")
time.sleep(5)
# 步骤3: 检查部署结果
print("\n步骤3: 检查部署结果")
print("-" * 50)
deployment.ssh_shell.send("docker ps --format 'table {{.Names}}\t{{.Status}}'\n")
time.sleep(3)
docker_output = ""
try:
while deployment.ssh_shell.recv_ready():
docker_output += deployment.ssh_shell.recv(4096).decode('utf-8', errors='ignore')
except:
pass
print("容器状态:")
print(docker_output)
# 统计容器数量
container_count = docker_output.count('\n') - 1 # 减去表头
print(f"\n运行中的容器数量: {container_count}")
if container_count >= 5:
print("[OK] 部署成功,已启动多个容器")
elif container_count > 0:
print("[WARN] 部署可能未完全完成,容器数量较少")
else:
print("[ERROR] 部署可能失败,没有容器运行")
# 清理
print("\n清理连接...")
deployment.ssh_client.close()
if deployment.ssh_shell:
deployment.ssh_shell.close()
print("\n" + "=" * 50)
print("部署脚本执行完成")
print("=" * 50)
print("\n后续步骤:")
print("1. 系统授权: https://192.168.5.52/#/LoginConfig")
print(" 账号: superadmin / Ubains@1357")
print(" 验证码: csba")
print(" 授权文件: E:\\自动化部署\\X86-5.52\\license.zip")
print("\n2. 创建管理员: 为'自动化'公司创建admin用户")
print("\n3. 验收测试: 检查服务接口状态")
return 0
except Exception as e:
print(f"\n[ERROR] 部署过程出错: {str(e)}")
import traceback
traceback.print_exc()
return 1
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/expect -f
# 自动处理部署脚本的交互式提示
set timeout 300
# 设置服务器连接信息
set host "192.168.5.52"
set username "root"
set password "Ubains@123"
# 连接到SSH服务器
spawn ssh $username@$host
expect {
"yes/no" {
send "yes\r"
exp_continue
}
"password:" {
send "$password\r"
}
timeout {
puts "连接超时"
exit 1
}
}
# 等待shell提示符
expect "~]#"
# 切换到部署目录
send "cd /data/offline_auto_unifiedPlatform\r"
expect "~]#"
# 运行部署脚本
send "./new_auto.sh\r"
# 处理各种交互式提示
expect {
# "是否继续执行脚本(y/n)" 提示
"是否继续执行脚本" {
send "y\r"
exp_continue
}
# "确认当前机器信息" 提示
"确认当前机器信息" {
send "y\r"
exp_continue
}
# "确认无误请按" 提示
"确认无误请按" {
send "\r"
exp_continue
}
# "请输入当前日期" 提示
"请输入当前日期" {
# 获取当前日期
set current_date [clock format [clock seconds] -format "%Y/%m/%d"]
send "$current_date\r"
exp_continue
}
# "请输入当前时间" 提示
"请输入当前时间" {
# 获取当前时间
set current_time [clock format [clock seconds] -format "%H:%M:%S"]
send "$current_time\r"
exp_continue
}
# "是否使用自定义NTP" 提示
"是否使用自定义NTP" {
send "n\r"
exp_continue
}
# whiptail系统选择菜单
"whiptail" {
# 发送空格选择"全部系统",然后回车确认
send " "
sleep 1
send "\r"
exp_continue
}
# "选择系统" 提示
"选择系统" {
send "\r"
exp_continue
}
# 部署完成
"部署完成" {
puts "部署完成!"
send "\r"
}
# 部署成功
"部署成功" {
puts "部署成功!"
send "\r"
}
# 超时处理(长时间等待)
timeout {
# 检查是否部署还在进行
puts "等待部署中..."
exp_continue
}
# EOF处理
eof {
puts "脚本执行结束"
}
}
# 等待一段时间以查看输出
expect "~]#" {
puts "返回到shell提示符"
}
# 检查容器状态
send "docker ps --format 'table {{.Names}}\t{{.Status}}'\r"
expect "~]#"
# 保持连接打开以便查看输出
interact
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
部署监控脚本 - 实时监控部署进度
"""
import sys
import time
import subprocess
from datetime import datetime, timedelta
def run_ssh_command(command):
"""执行SSH命令"""
plink_path = r"E:\GithubData\ubains-module-test\AuxiliaryTool\ScriptTool\RemoteDeploy\plink.exe"
psi = subprocess.Popen(
[plink_path, "-pw", "Ubains@123", "-P", "22", "root@192.168.5.52", command],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
creationflags=subprocess.CREATE_NO_WINDOW
)
stdout, stderr = psi.communicate(timeout=30)
return stdout.decode('utf-8', errors='ignore'), stderr.decode('utf-8', errors='ignore')
def main():
print("=" * 50)
print("部署进度监控")
print("=" * 50)
print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
start_time = time.time()
max_monitor_time = 2700 # 45分钟监控时间
check_count = 0
while time.time() - start_time < max_monitor_time:
check_count += 1
current_time = datetime.now().strftime('%H:%M:%S')
elapsed = int(time.time() - start_time)
print(f"\n[{current_time}] 检查 #{check_count} (已监控: {elapsed}秒/{int(max_monitor_time/60)}分钟)")
print("-" * 50)
try:
# 检查部署进程
stdout, stderr = run_ssh_command('ps aux | grep new_auto | grep -v grep')
if 'new_auto.sh' in stdout:
# 解析进程信息
lines = stdout.strip().split('\n')
for line in lines:
if 'new_auto.sh' in line:
parts = line.split()
if len(parts) >= 9:
pid = parts[1]
cpu = parts[2]
mem = parts[3]
print(f"[运行中] PID: {pid}, CPU: {cpu}%, MEM: {mem}%")
else:
print("[完成] 部署进程已结束")
break
# 检查容器状态
stdout, stderr = run_ssh_command('docker ps --format "{{.Names}}"')
containers = [line for line in stdout.strip().split('\n') if line]
container_count = len(containers)
if container_count > 0:
print(f"[容器] 运行中: {container_count}个")
for container in containers[:10]: # 最多显示10个
print(f" - {container}")
else:
print("[容器] 尚未启动")
# 检查Docker镜像
stdout, stderr = run_ssh_command('docker images --format "{{.Repository}}" | grep -E "ubains|meeting|monitor" | wc -l')
image_count = stdout.strip()
print(f"[镜像] 已下载: {image_count}个")
# 预计完成时间
if container_count >= 5:
print("\n[SUCCESS] 部署成功!已启动所有容器")
break
elif container_count > 0:
remaining_time = max_monitor_time - elapsed
estimated_minutes = int(remaining_time / 60)
print(f"[进度] 容器启动中... 预计还需{estimated_minutes}分钟")
else:
remaining_time = max_monitor_time - elapsed
estimated_minutes = int(remaining_time / 60)
print(f"[进度] 正在部署... 预计还需{estimated_minutes}分钟")
except Exception as e:
print(f"[ERROR] 检查失败: {str(e)}")
# 等待30秒后再次检查
print("\n等待30秒后继续监控...")
time.sleep(30)
# 最终状态检查
print("\n" + "=" * 50)
print("部署完成 - 最终状态")
print("=" * 50)
try:
# 容器状态
stdout, stderr = run_ssh_command('docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"')
print("\n容器详细状态:")
print(stdout)
# 服务状态
stdout, stderr = run_ssh_command('docker ps | wc -l')
final_count = int(stdout.strip()) - 1
print(f"\n运行中的容器总数: {final_count}")
if final_count >= 5:
print("\n[SUCCESS] 部署成功完成")
print("\n后续步骤:")
print("1. 系统授权: 访问 https://192.168.5.52/#/LoginConfig")
print(" 账号: superadmin / Ubains@1357")
print(" 验证码: csba")
print("\n2. 创建管理员: 为'自动化'公司创建admin用户")
print("\n3. 验收测试: 检查服务接口")
return 0
else:
print("\n[WARN] 部署未完全完成,请检查日志")
return 1
except Exception as e:
print(f"\n[ERROR] 最终状态检查失败: {str(e)}")
return 1
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
简单部署测试 - 仅测试部署准备
"""
import sys
import os
# 添加当前目录到路径
sys.path.insert(0, os.path.dirname(__file__))
from auto_deployment_python import RemoteDeploymentAutomation
def main():
print("部署准备测试")
print("=" * 50)
config = {
'host': '192.168.5.52',
'username': 'root',
'password': 'Ubains@123',
'license_path': r'E:\自动化部署\X86-5.52\license.zip'
}
deployment = RemoteDeploymentAutomation(**config)
# 测试SSH连接
print("测试SSH连接...")
if deployment.connect_ssh():
print("[OK] SSH连接成功")
# 测试环境准备
print("测试环境准备...")
if deployment.prepare_deployment_environment():
print("[OK] 环境准备完成")
# 测试交互式连接
print("测试交互式连接...")
if deployment.connect_ssh_interactive():
print("[OK] 交互式连接成功")
# 发送测试命令
print("发送测试命令...")
deployment.ssh_shell.send("echo 'Test successful'\n")
import time
time.sleep(2)
# 读取响应
output = ""
try:
while deployment.ssh_shell.recv_ready():
output += deployment.ssh_shell.recv(4096).decode('utf-8', errors='ignore')
except:
pass
if 'Test successful' in output:
print("[OK] 测试命令执行成功")
print("响应:", output.strip())
else:
print("[ERROR] 测试命令执行失败")
print("输出:", output)
else:
print("[ERROR] 交互式连接失败")
else:
print("[ERROR] 环境准备失败")
deployment.ssh_client.close()
return 0
else:
print("[ERROR] SSH连接失败")
return 1
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
部署脚本测试 - 验证基本功能
"""
import sys
import os
# 添加当前目录到路径
sys.path.insert(0, os.path.dirname(__file__))
from auto_deployment_python import RemoteDeploymentAutomation
def test_ssh_connection():
"""测试SSH连接"""
print("=" * 50)
print("测试1: SSH连接")
print("=" * 50)
config = {
'host': '192.168.5.52',
'username': 'root',
'password': 'Ubains@123',
'license_path': r'E:\自动化部署\X86-5.52\license.zip'
}
deployment = RemoteDeploymentAutomation(**config)
# 测试paramiko连接
if deployment.connect_ssh():
print("[OK] SSH连接成功 (paramiko)")
# 测试环境准备
if deployment.prepare_deployment_environment():
print("[OK] 部署环境检查完成")
# 检查部署脚本
stdin, stdout, stderr = deployment.ssh_client.exec_command('ls -la /data/offline_auto_unifiedPlatform/*.sh')
scripts = stdout.read().decode('utf-8', errors='ignore')
print("\n部署脚本:")
print(scripts)
deployment.ssh_client.close()
return True
else:
print("[ERROR] SSH连接失败")
return False
def test_deployment_script_status():
"""测试部署脚本状态"""
print("\n" + "=" * 50)
print("测试2: 部署脚本状态")
print("=" * 50)
config = {
'host': '192.168.5.52',
'username': 'root',
'password': 'Ubains@123',
'license_path': r'E:\自动化部署\X86-5.52\license.zip'
}
deployment = RemoteDeploymentAutomation(**config)
if deployment.connect_ssh():
# 检查部署脚本权限
stdin, stdout, stderr = deployment.ssh_client.exec_command('ls -l /data/offline_auto_unifiedPlatform/new_auto.sh')
script_info = stdout.read().decode('utf-8', errors='ignore')
print(f"部署脚本信息: {script_info.strip()}")
# 检查是否有运行中的部署进程
stdin, stdout, stderr = deployment.ssh_client.exec_command('ps aux | grep new_auto | grep -v grep')
running_processes = stdout.read().decode('utf-8', errors='ignore')
if running_processes.strip():
print("[WARN] 检测到运行中的部署进程:")
print(running_processes)
else:
print("[OK] 无运行中的部署进程")
deployment.ssh_client.close()
return True
return False
def test_web_connectivity():
"""测试Web连接"""
print("\n" + "=" * 50)
print("测试3: Web连接")
print("=" * 50)
import requests
requests.packages.urllib3.disable_warnings()
config = {
'host': '192.168.5.52',
'username': 'root',
'password': 'Ubains@123',
'license_path': r'E:\自动化部署\X86-5.52\license.zip'
}
deployment = RemoteDeploymentAutomation(**config)
# 测试维护平台连接
urls = [
('维护平台', 'https://192.168.5.52/#/LoginConfig'),
('后台管理', 'https://192.168.5.52/#/LoginAdmin'),
('前台页面', 'https://192.168.5.52/'),
]
for name, url in urls:
try:
response = requests.get(url, verify=False, timeout=10)
if response.status_code == 200:
print(f"[OK] {name}: 可访问 ({url})")
else:
print(f"[WARN] {name}: 状态码 {response.status_code}")
except Exception as e:
print(f"[ERROR] {name}: {str(e)}")
return True
def main():
"""主测试函数"""
print("Python自动化部署脚本 - 功能测试")
print("目标服务器: 192.168.5.52")
print()
results = []
# 测试SSH连接
results.append(("SSH连接", test_ssh_connection()))
# 测试部署脚本状态
results.append(("部署脚本状态", test_deployment_script_status()))
# 测试Web连接
results.append(("Web连接", test_web_connectivity()))
# 输出测试结果
print("\n" + "=" * 50)
print("测试结果汇总")
print("=" * 50)
for name, result in results:
status = "[OK] PASS" if result else "[ERROR] FAIL"
print(f"{name}: {status}")
all_passed = all(result for _, result in results)
if all_passed:
print("\n[OK] 所有测试通过!可以执行完整部署。")
else:
print("\n[WARN] 部分测试失败,请检查环境配置。")
return 0 if all_passed else 1
if __name__ == '__main__':
sys.exit(main())
# AI分析_需求文档
## 参考资料
1. [服务器监测指标报告](Docs/PRD/AI服务器监测/服务器监测指标参考报告.md)
2. [JSON格式说明](Docs/PRD/AI服务器监测/通用模块/钉钉通知/JSON格式说明.md)
## AI调用
1. 接口协议工具:https://ark.cn-beijing.volces.com/api/coding
2. 密钥:ark-61a594a7-181f-4b38-824a-93ab6a00ae11-4eff6,配置在config.json中。
## 分析指标
1. 服务可用性
2. 数据库健康性
3. 系统资源
4. 异常问题分析
- 严重问题分析
- 警告问题分析
5. 与历史报告对比分析
## 提取JSON
1. 根据分析结果提取成JSON文件,参考[Docs/PRD/AI服务器监测/通用模块/钉钉通知/JSON格式说明.md]格式。
# 判断标准参考
## 阈值配置
- CPU使用率 > 85% = 警告, > 100% = 严重
- 内存使用率 > 85% = 警告, > 95% = 严重
- 磁盘使用率 > 90% = 警告, > 95% = 严重
- Inode使用率 > 90% = 警告
- Swap任何使用 = 警告, > 20% = 严重
- 系统负载 > 8 = 警告, > 16 = 严重
- 磁盘IO %util > 80% = 警告
- MySQL慢查询 > 100 = 警告, > 1000000 = 严重
- MySQL连接使用率 > 80% = 警告
- MySQL缓冲池命中率 < 95% = 警告
- 线程总数 > 1000 = 警告, > 3000 = 严重
- 文件描述符使用率 > 80% = 警告
- Redis键数量 > 1000000 = 警告
- Redis内存碎片率 > 5 = 警告, > 10 = 严重
- Redis缓存命中率 < 90% = 警告
- TIME_WAIT连接 > 500 = 警告, > 5000 = 严重
- CLOSE_WAIT连接 > 100 = 警告
- 僵尸进程 > 0 = 严重
- D状态进程 > 5 = 警告
- Full GC > 5次 = 警告, > 20次 = 严重
- Docker容器重启次数 > 0 = 警告
- 容器日志文件 > 500MB = 警告, > 2GB = 严重
- 认证失败(24h) > 100 = 警告, > 1000 = 严重
- SSL证书剩余 < 30天 = 警告, < 7天 = 严重
- NTP时钟偏差 > 1秒 = 警告
- 应用错误(单次巡检) > 50 = 警告, > 200 = 严重
- OOM事件 > 0 = 严重
## 进程资源图标
- CPU > 80%: 严重
- CPU 50-80%: 警告
- CPU < 50%: 正常
- 内存 > 80%: 严重
- 内存 50-80%: 警告
- 内存 < 50%: 正常
- 线程数 > 500: 严重
- 线程数 50-500: 警告
- 线程数 < 50: 正常
## 整体状态判断
- 严重问题 >= 3: 严重
- 严重问题 >= 1: 严重
- 警告 >= 3: 警告
- 警告 >= 1: 警告
- 无问题: 正常
---
# 注意事项
1. **安全性**: 只读操作优先,不要执行任何修改命令
2. **深度**: 不仅使用top,必须结合ps、ss、lsof、/proc等进行深层诊断
3. **完整性**: 120+检测点必须全部执行,不能遗漏
4. **客观性**: 所有数值必须来自实际命令输出,不要臆测
5. **可操作性**: AI建议必须具体可执行,不能泛泛而谈
6. **容器化适配**: 注意MySQL端口映射是8306→3306,不是直接3306
7. **JVM兼容**: ujava2容器内未安装jstat/jmap工具,需要通过/proc替代方案获取JVM信息
8. **历史对比**: 每次巡检保存JSON数据,下次自动进行对比分析
9. **错误分类**: 应用日志错误需要细分为Dubbo/钉钉/MQTT/MySQL/MyBatis等具体类型
10. **趋势预警**: 关注资源使用趋势,提前预警即将触达阈值的指标
```
\ No newline at end of file
# AI分析需求文档 - 计划执行
**文档版本**: 1.0
**创建日期**: 2026-05-15
**需求文档**: `_PRD_AI分析_需求文档.md`
**负责人**: AI巡检系统团队
---
## 1. 需求分析
### 1.1 核心需求
在服务器巡检报告生成后,通过AI接口对报告进行智能分析,提取结构化JSON数据,然后发送钉钉通知。
### 1.2 当前流程
```
检测脚本 → MD报告 → 正则提取 → JSON → 钉钉通知
```
**问题**:
- 正则提取不够准确,容易遗漏信息
- 无法进行深度分析和趋势判断
- 缺乏智能建议生成
### 1.3 目标流程
```
检测脚本 → MD报告 → AI分析 → JSON → 钉钉通知
```
**优势**:
- AI理解能力强,提取更准确
- 可进行趋势分析和历史对比
- 自动生成智能建议
---
## 2. 技术方案
### 2.1 AI接口信息
| 项目 | 值 |
|:---|:---|
| 接口地址 | https://ark.cn-beijing.volces.com/api/coding |
| API密钥 | ark-xxx (配置到config.json) |
| 接口类型 | 豆包代码补全/对话API |
| 费用情况 | 套餐内免费 |
### 2.2 实现架构
```
┌─────────────────────────────────────────────────────┐
│ 检测脚本 │
│ (执行巡检,生成MD报告) │
└──────────────────┬──────────────────────────────────┘
┌─────────────────────────────────────────────────────┐
│ AI分析模块 (新增) │
│ ┌──────────────────────────────────────────────┐ │
│ │ 1. 读取MD报告 │ │
│ │ 2. 构建AI Prompt (基于JSON格式说明) │ │
│ │ 3. 调用豆包API │ │
│ │ 4. 解析AI返回的JSON │ │
│ │ 5. 验证和修正JSON格式 │ │
│ └──────────────────────────────────────────────┘ │
└──────────────────┬──────────────────────────────────┘
┌─────────────────────────────────────────────────────┐
│ JSON摘要文件 │
│ (符合JSON格式说明的结构化数据) │
└──────────────────┬──────────────────────────────────┘
┌─────────────────────────────────────────────────────┐
│ 钉钉通知模块 │
│ (读取JSON,生成Markdown消息,发送通知) │
└─────────────────────────────────────────────────────┘
```
### 2.3 技术选型
| 组件 | 技术方案 | 说明 |
|:---|:---|:---|
| 开发语言 | Python 3.10+ | 与现有钉钉模块一致 |
| HTTP库 | requests | 调用豆包API |
| JSON处理 | json + pydantic | 生成和验证JSON格式 |
| 配置管理 | JSON | 复用现有config.json |
| 错误处理 | 多级降级机制 | AI失败时降级到正则提取 |
---
## 3. 实现计划
### 3.1 开发阶段
#### 阶段一:AI调用模块 (2天)
- [ ] 创建 `ai_analyzer.py` 模块
- [ ] 实现豆包API调用功能
- [ ] 设计AI Prompt模板
- [ ] 实现JSON解析和验证
- [ ] 单元测试
#### 阶段二:集成优化 (1天)
- [ ] 修改 `send_report.py` 集成AI分析
- [ ] 实现降级机制 (AI失败→正则提取)
- [ ] 优化配置文件管理
- [ ] 添加日志和错误处理
#### 阶段三:测试验证 (1天)
- [ ] 使用真实MD报告测试
- [ ] 验证JSON格式正确性
- [ ] 测试钉钉通知显示效果
- [ ] 性能测试 (API响应时间)
#### 阶段四:文档和部署 (0.5天)
- [ ] 编写使用文档
- [ ] 更新配置说明
- [ ] 部署到生产环境
### 3.2 文件清单
| 文件路径 | 说明 | 状态 |
|:---|:---|:---|
| `ai_analyzer.py` | AI分析核心模块 | 待创建 |
| `send_report.py` | 修改集成AI分析 | 待修改 |
| `config.json` | 添加AI配置项 | 待修改 |
| `requirements.txt` | 添加依赖 | 待修改 |
| `AI使用说明.md` | 使用文档 | 待创建 |
---
## 4. 详细设计
### 4.1 AI Prompt设计
```
你是一个服务器巡检报告分析专家。请分析以下服务器巡检报告,提取关键信息并输出JSON格式数据。
## 输出要求
1. 严格按照指定的JSON格式输出
2. 数值必须准确,不要臆测
3. 状态判断要根据阈值标准
4. 问题分析要具体准确
5. AI建议要可操作
## 阈值标准
- CPU > 85% 警告, > 95% 严重
- 内存 > 85% 警告, > 95% 严重
- 磁盘 > 90% 警告
- Swap任何使用 警告
- 线程 > 1000 警告
- Redis命中率 < 90% 警告
- ... (完整阈值见JSON格式说明)
## JSON格式
[输出符合JSON格式说明的完整示例]
## 待分析报告
[MD报告内容]
请直接输出JSON,不要其他说明文字。
```
### 4.2 降级机制
```
AI分析尝试 (最多3次)
├─ 成功 → 使用AI生成的JSON
└─ 失败 → 降级到正则提取 (md_to_json.py)
└─ 仍然失败 → 返回错误,不发送通知
```
### 4.3 配置文件扩展
```json
{
"Webhook": "...",
"Secret": "...",
"AI": {
"enabled": true,
"api_url": "https://ark.cn-beijing.volces.com/api/coding",
"api_key": "ark-xxx",
"model": "ep-xxx",
"max_retries": 3,
"timeout": 30,
"fallback_to_regex": true
}
}
```
---
## 5. 测试计划
### 5.1 测试用例
| 测试场景 | 输入 | 预期输出 |
|:---|:---|:---|
| 正常报告 | 健康MD报告 | 正确JSON,status=normal |
| 警告报告 | 有警告项MD报告 | 正确JSON,status=warning |
| 严重报告 | 有严重问题MD报告 | 正确JSON,status=critical |
| AI失败 | 模拟API失败 | 降级到正则提取 |
| 格式错误 | AI返回错误JSON | 重新尝试或降级 |
| 空报告 | 空白MD报告 | 返回错误 |
### 5.2 验收标准
- [ ] AI分析成功率 > 90%
- [ ] JSON格式100%符合规范
- [ ] 降级机制工作正常
- [ ] 钉钉通知显示正确
- [ ] 响应时间 < 30秒
- [ ] 无内存泄漏
---
## 6. 部署计划
### 6.1 部署步骤
1. **备份现有文件**
```bash
cp send_report.py send_report.py.bak
cp config.json config.json.bak
```
2. **安装新文件**
```bash
# 复制ai_analyzer.py到钉钉通知目录
# 更新send_report.py
# 更新config.json
```
3. **安装依赖**
```bash
pip install -r requirements.txt
```
4. **测试验证**
```bash
python test_ai_analyzer.py
```
5. **切换到生产**
- 修改检测脚本调用新流程
- 监控日志确认运行正常
### 6.2 回滚方案
如果AI分析出现问题:
1. 修改配置 `AI.enabled = false`
2. 自动降级到正则提取
3. 无需代码回滚
---
## 7. 风险评估
| 风险 | 概率 | 影响 | 应对措施 |
|:---|:---|:---|:---|
| API调用失败 | 中 | 高 | 降级到正则提取 |
| JSON格式错误 | 中 | 中 | 验证+重试机制 |
| 响应时间过长 | 低 | 中 | 设置超时+异步处理 |
| API费用超限 | 低 | 低 | 监控使用量 |
| AI理解偏差 | 中 | 中 | 优化Prompt+人工审核 |
---
## 8. 监控指标
### 8.1 关键指标
- AI分析成功率
- 平均响应时间
- 降级触发次数
- JSON格式错误率
- 钉钉发送成功率
### 8.2 日志记录
```
[2026-05-15 10:30:00] INFO: 开始AI分析 - 报告: xxx.md
[2026-05-15 10:30:15] INFO: AI分析成功 - 耗时: 15秒
[2026-05-15 10:30:16] INFO: 钉钉通知发送成功
```
---
## 9. 后续优化
### 9.1 短期优化 (1周内)
- [ ] 历史数据存储和趋势分析
- [ ] 优化AI Prompt提升准确率
- [ ] 添加更多检测点
### 9.2 长期优化 (1月内)
- [ ] 支持多服务器批量分析
- [ ] 建立历史数据库
- [ ] 智能预警和预测
---
## 10. 附录
### 10.1 参考文档
- [服务器监测指标参考报告](../服务器监测指标参考报告.md)
- [JSON格式说明](通用模块/钉钉通知/JSON格式说明.md)
- [豆包API文档](https://www.volcengine.com/docs/82379/1928261)
### 10.2 联系方式
- **技术支持**: AI巡检系统团队
- **文档维护**: 开发团队
---
**文档状态**: 待审核
**最后更新**: 2026-05-15
**下次审查**: 实施完成后
{
"server_name": "202服务器_展厅环境",
"project_type": "unified",
"check_time": "2026-05-15 09:35:23",
"host_info": {
"ip": "192.168.5.202",
"hostname": "localhost"
},
"status": "critical",
"critical_issues": 2,
"warnings": 9,
"metrics": {
"cpu": {
"current_value": "4.4%",
"status": "normal"
},
"memory": {
"current_value": "71.0%",
"status": "normal"
},
"swap": {
"current_value": "32.1%",
"status": "warning"
},
"disk": {
"current_value": "23%",
"status": "normal"
},
"threads": {
"current_value": "5412",
"status": "critical"
},
"load": {
"current_value": "0.54",
"status": "normal"
}
},
"middleware": {},
"issues": [
{
"level": "critical",
"component": "unknown",
"description": "🔴 ujava2容器已停止(Exited 4天前),已由ujava3替代运行"
},
{
"level": "critical",
"component": "emqx",
"description": "🔴 EMQX SSL证书(8883)已过期960天 (到期: 2023-09-28)"
},
{
"level": "warning",
"component": "unknown",
"description": "🟡 Swap已使用: 32.1% (1.7GiB/7.8GiB)"
},
{
"level": "warning",
"component": "unknown",
"description": "🟡 Java应用MQTT连接反复断开重连 (consumer_client_androidConfiguration)"
},
{
"level": "warning",
"component": "unknown",
"description": "🟡 unginx容器不存在(Nginx可能以其他方式部署)"
},
{
"level": "warning",
"component": "cpu",
"description": "🟡 所有Docker容器均未配置资源限制(CPU/Memory limits=0)"
},
{
"level": "warning",
"component": "mysql",
"description": "🟡 核心容器(umysql/uredis/uemqx/ujava2)未配置Docker健康检查"
},
{
"level": "warning",
"component": "unknown",
"description": "🟡 SSH允许root直接登录(建议改为PermitRootLogin no或prohibit-password)"
},
{
"level": "warning",
"component": "unknown",
"description": "🟡 SSH密码认证已开启(建议使用密钥认证)"
},
{
"level": "warning",
"component": "unknown",
"description": "🟡 SELinux已关闭(建议在生产环境启用)"
},
{
"level": "warning",
"component": "mysql",
"description": "🟡 MySQL binlog未设置过期天数(expire_logs_days=0),依赖binlog_expire_logs_seconds=2592000(30天)"
}
],
"recommendations": [
"🟡 MySQL binlog未设置过期天数(expire_logs_days=0),依赖binlog_expire_logs_seconds=2592000(30天)"
]
}
\ No newline at end of file
{
"server_name": "v3",
"project_type": "unified",
"check_time": "2026-05-15 09:01:05",
"host_info": {
"ip": "192.168.5.44",
"hostname": "loacalhost"
},
"status": "normal",
"critical_issues": 0,
"warnings": 0,
"metrics": {
"cpu": {
"current_value": "11.8%",
"status": "normal"
},
"memory": {
"current_value": "69.38%",
"status": "warning"
},
"swap": {
"current_value": "24.57%",
"status": "unknown"
},
"disk": {
"current_value": "86%",
"status": "warning"
},
"load": {
"current_value": "0.04",
"status": "normal"
},
"threads": {
"current_value": "3189",
"status": "unknown"
}
},
"middleware": {},
"issues": [],
"recommendations": []
}
\ No newline at end of file
{
"server_name": "139.159.163.86",
"project_type": "reservation",
"check_time": "2026-05-15 06:23:04",
"host_info": {
"ip": "139.159.163.86",
"hostname": "kunpeng"
},
"status": "normal",
"critical_issues": 0,
"warnings": 0,
"metrics": {
"cpu": {
"current_value": "1.1%",
"status": "unknown"
},
"memory": {
"current_value": "3.4",
"status": "unknown"
},
"swap": {
"current_value": "1.4%",
"status": "unknown"
},
"disk": {
"current_value": "66.0%",
"status": "unknown"
},
"threads": {
"current_value": "1419.0",
"status": "unknown"
},
"load": {
"current_value": "0.1",
"status": "unknown"
}
},
"middleware": {
"redis": {
"hit_rate": "58.0%"
}
},
"issues": [
{
"level": "critical",
"component": "unknown",
"description": "无"
},
{
"level": "warning",
"component": "unknown",
"description": "Swap已使用: 1.4%"
},
{
"level": "warning",
"component": "threads",
"description": "总线程数: 1443"
},
{
"level": "warning",
"component": "redis",
"description": "Redis缓存命中率: 58.0%"
}
],
"recommendations": []
}
\ No newline at end of file
# DingTalk Notification Module
# UTF-8 BOM encoding required
$ScriptRoot = if ($PSScriptRoot) { $PSScriptRoot } else { Split-Path -Parent $MyInvocation.MyCommand.Path }
function Get-DingTalkConfig {
param([string]$ConfigPath = (Join-Path $ScriptRoot "config.json"))
if (-not (Test-Path $ConfigPath)) {
Write-Host "ERROR: Config not found: $ConfigPath" -ForegroundColor Red
throw "Config not found"
}
$config = Get-Content $ConfigPath -Raw | ConvertFrom-Json
Write-Host "INFO: Config loaded" -ForegroundColor Green
return $config
}
function Start-ReportHttpServer {
param([string]$ReportDir, [int]$Port)
try {
if (-not (Test-Path $ReportDir)) {
New-Item -ItemType Directory -Path $ReportDir -Force | Out-Null
Write-Host "INFO: Created report dir: $ReportDir" -ForegroundColor Green
}
$existingProcess = Get-NetTCPConnection -LocalPort $Port -ErrorAction SilentlyContinue | Where-Object State -eq "Listen"
if ($existingProcess) {
Write-Host "INFO: HTTP server already running on port $Port" -ForegroundColor Yellow
return $true
}
$process = Start-Process -FilePath "python3" -ArgumentList "-m http.server $Port --directory $ReportDir" -WindowStyle Hidden -PassThru
Start-Sleep -Seconds 2
if ($process.HasExited) {
Write-Host "ERROR: HTTP server failed to start" -ForegroundColor Red
return $false
}
Write-Host "INFO: HTTP server started: http://localhost:$Port" -ForegroundColor Green
return $true
}
catch {
Write-Host "ERROR: Start HTTP server failed: $_" -ForegroundColor Red
return $false
}
}
function Get-NgrokPublicUrl {
param([string]$NgrokPath, [int]$Port)
try {
$ngrokApiUrl = "http://localhost:4040/api/tunnels"
try {
$tunnels = Invoke-RestMethod -Uri $ngrokApiUrl -ErrorAction Stop
$existingTunnel = $tunnels.tunnels | Where-Object { $_.config.addr -eq "http://localhost:$Port" }
if ($existingTunnel) {
Write-Host "INFO: ngrok tunnel exists: $($existingTunnel.public_url)" -ForegroundColor Yellow
return $existingTunnel.public_url
}
}
catch {
Write-Host "INFO: ngrok not running, starting new tunnel" -ForegroundColor Yellow
}
if (-not (Test-Path $NgrokPath)) {
Write-Host "ERROR: ngrok not found: $NgrokPath" -ForegroundColor Red
return $null
}
Start-Process -FilePath $NgrokPath -ArgumentList "http $Port --log=stdout" -WindowStyle Hidden
Start-Sleep -Seconds 3
$tunnels = Invoke-RestMethod -Uri $ngrokApiUrl
$publicUrl = $tunnels.tunnels[0].public_url
Write-Host "INFO: ngrok tunnel created: $publicUrl" -ForegroundColor Green
return $publicUrl
}
catch {
Write-Host "ERROR: Get ngrok URL failed: $_" -ForegroundColor Red
return $null
}
}
function New-DingTalkSignature {
param([string]$Secret, [long]$Timestamp)
try {
# Step 1: timestamp + \n + secret
$stringToSign = "$Timestamp`n$Secret"
# Step 2: HMAC-SHA256
$hmacsha = New-Object System.Security.Cryptography.HMACSHA256
$hmacsha.Key = [System.Text.Encoding]::UTF8.GetBytes($Secret)
$signatureBytes = $hmacsha.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($stringToSign))
# Step 3: Base64 encode
$base64Signature = [Convert]::ToBase64String($signatureBytes)
# Step 4: URL encode
$urlEncodedSign = [System.Uri]::EscapeDataString($base64Signature)
Write-Host "DEBUG: timestamp=$Timestamp" -ForegroundColor DarkGray
Write-Host "DEBUG: stringToSign=$stringToSign" -ForegroundColor DarkGray
Write-Host "DEBUG: sign=$urlEncodedSign" -ForegroundColor DarkGray
return $urlEncodedSign
}
catch {
Write-Host "ERROR: Generate signature failed: $_" -ForegroundColor Red
return $null
}
}
function Send-DingTalkMessage {
param([string]$Webhook, [string]$Secret, [string]$Title, [string]$Content)
try {
$timestamp = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds()
$sign = New-DingTalkSignature -Secret $Secret -Timestamp $timestamp
$body = @{
msgtype = "markdown"
markdown = @{
title = $Title
text = $Content
}
} | ConvertTo-Json -Depth 10
$url = "$Webhook&timestamp=$timestamp&sign=$sign"
$response = Invoke-RestMethod -Uri $url -Method Post -Body $body -ContentType "application/json" -TimeoutSec 10
if ($response.errcode -eq 0) {
Write-Host "INFO: DingTalk message sent successfully" -ForegroundColor Green
return $true
}
else {
Write-Host "ERROR: DingTalk message failed: $($response.errmsg)" -ForegroundColor Red
return $false
}
}
catch {
Write-Host "ERROR: Send DingTalk message exception: $_" -ForegroundColor Red
return $false
}
}
function Clear-ExpiredReports {
param([string]$ReportDir, [int]$Days)
try {
if (-not (Test-Path $ReportDir)) {
return
}
$expiredDate = (Get-Date).AddDays(-$Days)
$expiredFiles = Get-ChildItem -Path $ReportDir -Filter "*.md" | Where-Object { $_.LastWriteTime -lt $expiredDate }
foreach ($file in $expiredFiles) {
Remove-Item $file.FullName -Force
Write-Host "INFO: Deleted expired report: $($file.Name)" -ForegroundColor Yellow
}
if ($expiredFiles.Count -gt 0) {
Write-Host "INFO: Deleted $($expiredFiles.Count) expired reports" -ForegroundColor Green
}
}
catch {
Write-Host "ERROR: Clear expired reports failed: $_" -ForegroundColor Red
}
}
function Send-DingTalkReport {
param(
[string]$Summary,
[string]$FullReportPath,
[string]$ServerName,
[string]$ConfigPath = (Join-Path $ScriptRoot "config.json")
)
try {
$config = Get-DingTalkConfig -ConfigPath $ConfigPath
$displayName = if ($ServerName) { $ServerName } else { $config.ServerName }
$finalContent = $Summary
if ($FullReportPath -and (Test-Path $FullReportPath)) {
$reportUrl = $null
if ($config.EnableHttpServer) {
Start-ReportHttpServer -ReportDir $config.ReportDir -Port $config.HttpPort | Out-Null
}
if ($config.EnableNgrok) {
$publicUrl = Get-NgrokPublicUrl -NgrokPath $config.NgrokPath -Port $config.HttpPort
if ($publicUrl) {
$reportName = Split-Path $FullReportPath -Leaf
$reportUrl = "$publicUrl/$reportName"
}
}
if (-not $reportUrl) {
$reportName = Split-Path $FullReportPath -Leaf
$reportUrl = "file://$FullReportPath"
}
$finalContent = "$Summary`n`n---`n`n#### Report: [$reportName]($reportUrl)"
}
$title = "Server Health Report - $displayName"
$result = Send-DingTalkMessage -Webhook $config.Webhook -Secret $config.Secret -Title $title -Content $finalContent
Clear-ExpiredReports -ReportDir $config.ReportDir -Days $config.ReportRetentionDays
return $result
}
catch {
Write-Host "ERROR: Send DingTalk report failed: $_" -ForegroundColor Red
return $false
}
}
Export-ModuleMember -Function @('Get-DingTalkConfig', 'Start-ReportHttpServer', 'Get-NgrokPublicUrl', 'Send-DingTalkMessage', 'Send-DingTalkReport', 'Clear-ExpiredReports')
# Report Summary Extractor Module
# Extract key information from health check report for DingTalk notification
function Get-ReportSummary {
param([string]$ReportPath)
if (-not (Test-Path $ReportPath)) {
throw "Report not found: $ReportPath"
}
# Read file with Default encoding (for Chinese)
$reader = [System.IO.StreamReader]::new($ReportPath, [System.Text.Encoding]::Default)
$content = $reader.ReadToEnd()
$reader.Close()
$info = @{}
# Extract header info
if ($content -match '\*\*时间:\*\*\s*(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})') {
$info.Time = $matches[1]
}
if ($content -match '\*\*主机:\*\*\s*([\d.]+)\s+\((\w+)\)') {
$info.Host = $matches[1]
$info.Hostname = $matches[2]
}
if ($content -match 'up\s+(\d+)\s+days?') {
$info.UptimeDays = [int]$matches[1]
}
if ($content -match '\*\*状态:\*\*\s*(正常|警告|严重)') {
$info.Status = $matches[1]
}
# Extract issues (more flexible pattern)
if ($content -match '关键问题.*?(\d+).*?警告.*?(\d+)') {
$info.CriticalIssues = [int]$matches[1]
$info.Warnings = [int]$matches[2]
}
# Extract metrics from table - search for value patterns in table context
$metrics = @{}
# CPU: find the percentage value near "CPU"
if ($content -match '(?s)\|.*?CPU.*?\|.*?([\d.]+%).*?\|') {
$metrics.CPU = $matches[1]
}
# Memory: find the percentage value near "Memory" or "内存"
if ($content -match '(?s)\|.*?Memory.*?\|.*?([\d.]+%).*?\|') {
$metrics.Memory = $matches[1]
}
# Swap: find the percentage value near "Swap"
if ($content -match '(?s)\|.*?Swap.*?\|.*?([\d.]+%).*?\|') {
$metrics.Swap = $matches[1]
}
# Disk: find the percentage value near "Disk"
if ($content -match '(?s)\|.*?Disk.*?\|.*?([\d.]+%).*?\|') {
$metrics.Disk = $matches[1]
}
# Threads: use Chinese column name
if ($content -match '(?s)\|.*?线程总数.*?\|.*?(\d+).*?\|') {
$metrics.Threads = $matches[1]
}
# Load: find the decimal value
if ($content -match '(?s)\|.*?Load.*?\|.*?([\d.]+).*?\|') {
$metrics.Load = $matches[1]
}
$info.Metrics = $metrics
# Extract service status (more flexible pattern)
if ($content -match '(\d+)\s*/\s*(\d+).*?(?i)current.*connections?') {
$info.MySQL = @{
Connections = "$($matches[1])/$($matches[2])"
Usage = ""
}
# Try to get usage
if ($content -match '(?i)current.*connections?.*?\(([\d.]+%)\)') {
$info.MySQL.Usage = $matches[1]
}
}
if ($content -match '(?i)slow.*quer.*?:\s*(\d+)') {
$info.SlowQueries = [int]$matches[1]
}
if ($content -match '(?i)cache.*hit.*ratio.*?:\s*([\d.]+%)') {
$info.RedisHitRate = $matches[1]
}
# Extract security info
if ($content -match '\*\*认证失败:\*\*\s*(\d+)') {
$info.AuthFailures = [int]$matches[1]
}
# Extract trends (simplified)
$trends = @{}
$info.Trends = $trends
return $info
}
function New-DingTalkSummary {
param([hashtable]$Info, [string]$ServerName = "")
$displayName = if ($ServerName) { $ServerName } else { $Info.Hostname }
# Status icon
$statusIcon = switch ($Info.Status) {
"Normal" { "OK" }
"Warning" { "WARN" }
"Critical" { "CRITICAL" }
default { "?" }
}
# Build summary
$summary = @"
### Server Health Report - $displayName
**Time**: $($Info.Time)
**Host**: $($Info.Host)
**Uptime**: $($Info.UptimeDays) days
**Status**: $statusIcon ($($Info.CriticalIssues) critical, $($Info.Warnings) warnings)
#### Metrics
"@
# Only show abnormal metrics
$abnormalMetrics = @()
# CPU check
if ($Info.Metrics.CPU) {
$cpuVal = [double]($Info.Metrics.CPU -replace '%')
if ($cpuVal -gt 85) {
$trendIcon = Get-TrendIcon -Trend $Info.Trends.CPU
$abnormalMetrics += "| CPU | $($Info.Metrics.CPU) | 85% | CRITICAL $trendIcon |"
}
}
# Memory check
if ($Info.Metrics.Memory) {
$memVal = [double]($Info.Metrics.Memory -replace '%')
if ($memVal -gt 85) {
$trendIcon = Get-TrendIcon -Trend $Info.Trends.Memory
$abnormalMetrics += "| Memory | $($Info.Metrics.Memory) | 85% | CRITICAL $trendIcon |"
}
elseif ($memVal -gt 70) {
$trendIcon = Get-TrendIcon -Trend $Info.Trends.Memory
$abnormalMetrics += "| Memory | $($Info.Metrics.Memory) | 85% | WARN $trendIcon |"
}
}
# Swap check
if ($Info.Metrics.Swap) {
$swapVal = [double]($Info.Metrics.Swap -replace '%')
if ($swapVal -gt 0) {
$abnormalMetrics += "| Swap | $($Info.Metrics.Swap) | - | WARN |"
}
}
# Threads check
if ($Info.Metrics.Threads) {
$threadsVal = [int]$Info.Metrics.Threads
if ($threadsVal -gt 1000) {
$trendIcon = Get-TrendIcon -Trend $Info.Trends.Threads
$abnormalMetrics += "| Threads | $($Info.Metrics.Threads) | 1000 | WARN $trendIcon |"
}
}
if ($abnormalMetrics.Count -eq 0) {
$summary += "`nAll metrics OK"
}
else {
$summary += "`n| Metric | Current | Threshold | Status |`n"
$summary += "| :--- | :--- | :--- | :--- |`n"
$summary += ($abnormalMetrics -join "`n")
}
# Service status
$summary += "`n`n#### Services`n"
$summary += "- MySQL: $($Info.MySQL.Connections) (usage $($Info.MySQL.Usage))`n"
$summary += "- Slow queries: $($Info.SlowQueries)`n"
$summary += "- Redis hit rate: $($Info.RedisHitRate)"
# Security alerts
if ($Info.AuthFailures -gt 100) {
$summary += "`n`n#### Security Alert`n"
$summary += "- Auth failures(24h): $($Info.AuthFailures)"
}
return $summary
}
function Get-TrendIcon {
param([string]$Trend)
if ($Trend -eq "+") { return "UP" }
if ($Trend -eq "-") { return "DOWN" }
return "SAME"
}
Export-ModuleMember -Function @('Get-ReportSummary', 'New-DingTalkSummary', 'Get-TrendIcon')
# Universal Report Summary Extractor
# Supports multiple report formats from different projects
function Get-UniversalReportSummary {
param([string]$ReportPath)
if (-not (Test-Path $ReportPath)) {
throw "Report not found: $ReportPath"
}
# Read with Default encoding for Chinese support
$reader = [System.IO.StreamReader]::new($ReportPath, [System.Text.Encoding]::Default)
$content = $reader.ReadToEnd()
$reader.Close()
$info = @{}
# Extract header info (support multiple formats)
if ($content -match '\*\*时间:\*\*\s*(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})') {
$info.Time = $matches[1]
}
if ($content -match '\*\*主机:\*\*\s*([\d.]+)\s*\((\w+)\)') {
$info.Host = $matches[1]
$info.Hostname = $matches[2]
}
# Extract uptime (support both "up X days" and "已运行X天")
if ($content -match 'up\s+(\d+)\s+days?') {
$info.UptimeDays = [int]$matches[1]
}
elseif ($content -match '已运行(\d+)天') {
$info.UptimeDays = [int]$matches[1]
}
# Extract status (support with/without emoji)
if ($content -match '\*\*状态:\*\*\s*(🟢|🟡|🔴)?\s*(正常|警告|严重)') {
$info.Status = $matches[2]
$info.StatusIcon = $matches[1]
}
elseif ($content -match '\*\*状态:\*\*\s*(正常|警告|严重)') {
$info.Status = $matches[1]
}
# Extract issues (support multiple formats)
# Format 1: "关键问题: 0, 警告: 3"
if ($content -match '关键问题[::]\s*(\d+).*?警告[::]\s*(\d+)') {
$info.CriticalIssues = [int]$matches[1]
$info.Warnings = [int]$matches[2]
}
# Format 2: "严重问题: 2, 警告: 13"
elseif ($content -match '严重问题[::]\s*(\d+).*?警告[::]\s*(\d+)') {
$info.CriticalIssues = [int]$matches[1]
$info.Warnings = [int]$matches[2]
}
# Format 3: "严重: 0, 警告: 0"
elseif ($content -match '严重[::]\s*(\d+).*?警告[::]\s*(\d+)') {
$info.CriticalIssues = [int]$matches[1]
$info.Warnings = [int]$matches[2]
}
# Format 4: "严重问题 (2):"
elseif ($content -match '严重问题\s*\((\d+)\):') {
$info.CriticalIssues = [int]$matches[1]
}
# Extract metrics (support Chinese and English column names)
$metrics = @{}
# CPU
if ($content -match '(?s)\|.*?CPU使用率.*?\|.*?([\d.]+%).*?\|') {
$metrics.CPU = $matches[1]
}
# Memory
if ($content -match '(?s)\|.*?内存使用率.*?\|.*?([\d.]+%).*?\|') {
$metrics.Memory = $matches[1]
}
# Swap
if ($content -match '(?s)\|.*?Swap使用率.*?\|.*?([\d.]+%).*?\|') {
$metrics.Swap = $matches[1]
}
# Threads
if ($content -match '(?s)\|.*?线程总数.*?\|.*?(\d+).*?\|') {
$metrics.Threads = $matches[1]
}
$info.Metrics = $metrics
# Extract service status
if ($content -match '(?i)(\d+)\s*/\s*(\d+).*?(当前|Current).*?(连接|connections?)') {
$info.MySQL = @{
Connections = "$($matches[1])/$($matches[2])"
Usage = ""
}
if ($content -match '(?i)(当前|Current).*?(连接|connections?).*?\(([\d.]+%)\)') {
$info.MySQL.Usage = $matches[3]
}
}
# Slow queries
if ($content -match '(?i)(慢查询数|Slow.*quer).*?:\s*(\d+)') {
$info.SlowQueries = [int]$matches[2]
}
# Redis cache hit rate
if ($content -match '(?i)(缓存命中率|Cache.*hit).*?:\s*([\d.]+%)') {
$info.RedisHitRate = $matches[2]
}
# Security info
if ($content -match '(?i)(认证失败|Auth.*fail).*?:\s*(\d+)') {
$info.AuthFailures = [int]$matches[2]
}
return $info
}
Export-ModuleMember -Function @('Get-UniversalReportSummary')
......@@ -7,5 +7,14 @@
"ReportRetentionDays": 30,
"ServerName": "服务器巡检",
"EnableHttpServer": true,
"EnableNgrok": true
"EnableNgrok": true,
"AI": {
"enabled": true,
"api_url": "https://ark.cn-beijing.volces.com/api/coding",
"api_key": "ark-61a594a7-181f-4b38-824a-93ab6a00ae11-4eff6",
"model": "doubao-seed-2.0-pro",
"max_retries": 3,
"timeout": 60,
"fallback_to_regex": true
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论