提交 e9a43958 authored 作者: 陈泽健's avatar 陈泽健

feat(系统服务进程监测):优化服务异常报告生成功能

- 增加了更多进程信息展示,如进程类型、完整命令等
- 添加了针对不同进程类型的处理建议
- 优化了报告样式,增加了标题层级和表格样式
- 改进了进程检测逻辑,避免误匹配
- 新增了日志安全退出功能
上级 95dff003
...@@ -181,61 +181,76 @@ class ServerProcessMonitor: ...@@ -181,61 +181,76 @@ class ServerProcessMonitor:
def _generate_word_report(self, server_name, process_name, process_info, timestamp, server_config): def _generate_word_report(self, server_name, process_name, process_info, timestamp, server_config):
""" """
生成服务异常的Word报告文档 生成服务异常的Word报告文档
增强版:支持所有类型的进程监控报告生成
参数: 参数:
server_name: 服务器名称 server_name (str): 服务器名称
process_name: 服务/容器名称 process_name (str): 服务/容器名称
process_info: 服务状态信息 process_info (dict): 服务状态信息,包含:
timestamp: 检测时间 - is_container (bool): 是否为容器
server_config: 服务器配置信息 - running (bool): 是否在运行
- status (str): 运行状态
- pid (str): 进程ID(非容器时)
- command (str/list): 进程命令
- process_type (str): 进程类型(新增)
- id (str): 容器ID(容器时)
- image (str): 容器镜像(容器时)
timestamp (str): 检测时间,格式'YYYY-MM-DD HH:MM:SS'
server_config (dict): 服务器配置信息
返回:
str: 生成的报告文件相对路径,失败返回None
""" """
try: try:
# 创建文档对象 # === 1. 创建文档对象并设置基本样式 ===
doc = Document() doc = Document()
# 设置全局字体 # 设置全局字体和字号
doc.styles['Normal'].font.name = '微软雅黑' doc.styles['Normal'].font.name = '微软雅黑'
doc.styles['Normal'].font.size = Pt(10) doc.styles['Normal'].font.size = Pt(10)
# 1. 添加主标题 (中文) # === 2. 添加报告主标题 ===
title = doc.add_heading(level=1) title = doc.add_heading(level=1)
title_run = title.add_run(f'{server_name} - {process_name}服务异常报告') title_run = title.add_run(f'{server_name} - {process_name}服务异常报告')
title_run.font.size = Pt(16) title_run.font.size = Pt(16)
title_run.font.color.rgb = RGBColor(255, 0, 0) # 红色 title_run.font.color.rgb = RGBColor(255, 0, 0) # 红色标题突出异常
title_run.font.bold = True title_run.font.bold = True
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # 标题居中
# 2. 添加服务器基本信息表格 # === 3. 添加服务器基本信息 ===
doc.add_paragraph() # 空行 doc.add_paragraph() # 空行分隔
# 3.1 服务器信息标题
server_heading = doc.add_heading('服务器信息', level=2) server_heading = doc.add_heading('服务器信息', level=2)
server_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT server_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
# 根据认证类型决定是否显示密码 # 3.2 根据认证类型决定是否显示密码
auth_type = '私钥认证' auth_type = '私钥认证' if server_config.get('private_key_path') else '密码认证'
password = server_config.get('ssh_password', '未配置') password = server_config.get('password', '未显示') # 安全考虑不显示密码
server_table = doc.add_table(rows=6, cols=2) # 增加一行用于密码 # 3.3 创建服务器信息表格
server_table = doc.add_table(rows=6, cols=2)
server_table.style = 'Light Grid Accent 1' server_table.style = 'Light Grid Accent 1'
server_table.alignment = WD_TABLE_ALIGNMENT.CENTER server_table.alignment = WD_TABLE_ALIGNMENT.CENTER
# 填充服务器信息 # 填充服务器信息数据
server_info_rows = [ server_info_rows = [
('服务器名称', server_name), ('服务器名称', server_name),
('IP地址', server_config['host']), ('IP地址', server_config['host']),
('SSH端口', str(server_config.get('port', 22))), ('SSH端口', str(server_config.get('port', 22))),
('用户名', server_config['username']), ('用户名', server_config['username']),
('认证类型', auth_type), ('认证类型', auth_type),
('连接密码', password) # 新增密码显示 ('连接状态', '成功' if server_name in self.ssh_clients else '失败')
] ]
for i, (label, value) in enumerate(server_info_rows): for i, (label, value) in enumerate(server_info_rows):
cells = server_table.rows[i].cells cells = server_table.rows[i].cells
cells[0].text = label cells[0].text = label
cells[0].paragraphs[0].runs[0].font.bold = True cells[0].paragraphs[0].runs[0].font.bold = True # 设置标签粗体
cells[1].text = str(value) cells[1].text = str(value)
# 3. 添加检测信息 # === 4. 添加检测信息 ===
doc.add_paragraph() # 空行 doc.add_paragraph() # 空行分隔
detect_heading = doc.add_heading('检测信息', level=2) detect_heading = doc.add_heading('检测信息', level=2)
detect_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT detect_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
...@@ -248,23 +263,24 @@ class ServerProcessMonitor: ...@@ -248,23 +263,24 @@ class ServerProcessMonitor:
cells[0].paragraphs[0].runs[0].font.bold = True cells[0].paragraphs[0].runs[0].font.bold = True
cells[1].text = timestamp cells[1].text = timestamp
# 4. 添加服务状态详情 # === 5. 添加服务状态详情 ===
doc.add_paragraph() # 空行 doc.add_paragraph() # 空行分隔
status_heading = doc.add_heading('服务状态详情', level=2) status_heading = doc.add_heading('服务状态详情', level=2)
status_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT status_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
# 如果是容器,添加容器详细信息
if process_info.get('is_container', False): if process_info.get('is_container', False):
container_table = doc.add_table(rows=4, cols=2) # === 5.1 容器状态详情 ===
container_table = doc.add_table(rows=5, cols=2)
container_table.alignment = WD_TABLE_ALIGNMENT.CENTER container_table.alignment = WD_TABLE_ALIGNMENT.CENTER
container_table.style = 'Medium Shading 1 Accent 1' container_table.style = 'Medium Shading 1 Accent 1'
# 填充表格 (中文) # 填充容器信息
rows = [ rows = [
('容器名称', process_name), ('容器名称', process_name),
('容器ID', process_info.get('id', 'N/A')), ('容器ID', process_info.get('id', 'N/A')),
('镜像', process_info.get('image', 'N/A')), ('镜像', process_info.get('image', 'N/A')),
('状态', process_info.get('status', 'N/A')) ('运行状态', process_info.get('status', 'N/A')),
('容器引擎', self._get_container_runtime(self.ssh_clients[server_name]))
] ]
for i, (label, value) in enumerate(rows): for i, (label, value) in enumerate(rows):
...@@ -278,15 +294,20 @@ class ServerProcessMonitor: ...@@ -278,15 +294,20 @@ class ServerProcessMonitor:
time_para = doc.add_paragraph() time_para = doc.add_paragraph()
time_para.add_run('创建时间: ').bold = True time_para.add_run('创建时间: ').bold = True
time_para.add_run(process_info.get('created', 'N/A')) time_para.add_run(process_info.get('created', 'N/A'))
# 如果是普通进程,添加进程信息
else: else:
process_table = doc.add_table(rows=3, cols=2) # === 5.2 普通进程状态详情 ===
process_table = doc.add_table(rows=4, cols=2) # 增加进程类型显示
process_table.alignment = WD_TABLE_ALIGNMENT.CENTER process_table.alignment = WD_TABLE_ALIGNMENT.CENTER
process_table.style = 'Medium Shading 1 Accent 1' process_table.style = 'Medium Shading 1 Accent 1'
# 获取进程类型,如java/python/nodejs等
process_type = process_info.get('process_type',
self._detect_process_type(process_info.get('command', '')))
# 填充进程信息
rows = [ rows = [
('进程名称', process_name), ('进程名称', process_name),
('进程类型', process_type),
('PID', process_info.get('pid', 'N/A')), ('PID', process_info.get('pid', 'N/A')),
('状态', process_info.get('status', 'N/A')) ('状态', process_info.get('status', 'N/A'))
] ]
...@@ -297,23 +318,48 @@ class ServerProcessMonitor: ...@@ -297,23 +318,48 @@ class ServerProcessMonitor:
cells[0].paragraphs[0].runs[0].font.bold = True cells[0].paragraphs[0].runs[0].font.bold = True
cells[1].text = str(value) cells[1].text = str(value)
# 添加命令信息 # 添加命令完整信息
doc.add_paragraph() doc.add_paragraph()
cmd_para = doc.add_paragraph() cmd_para = doc.add_paragraph()
cmd_para.add_run('命令: ').bold = True cmd_para.add_run('完整命令: ').bold = True
cmd_para.add_run(' '.join(process_info.get('command', ['N/A']))) cmd = process_info.get('command', 'N/A')
cmd_para.add_run(cmd if isinstance(cmd, str) else ' '.join(cmd))
# 5. 添加处理建议 (中文) # 添加进程运行时间(可选)
doc.add_page_break() if process_info.get('pid'):
try:
stdin, stdout, stderr = self.ssh_clients[server_name].exec_command(
f"ps -p {process_info['pid']} -o etime"
)
runtime = stdout.read().decode().strip().split('\n')[-1]
doc.add_paragraph()
runtime_para = doc.add_paragraph()
runtime_para.add_run('运行时间: ').bold = True
runtime_para.add_run(runtime)
except:
pass
# === 6. 添加处理建议 ===
doc.add_page_break() # 新页开始
suggestion = doc.add_heading('处理建议', level=2) suggestion = doc.add_heading('处理建议', level=2)
suggestion.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT suggestion.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
# 根据进程类型给出针对性建议
process_type = process_info.get('process_type', 'unknown')
if process_type == 'java':
doc.add_paragraph("1. 检查Java堆栈日志和GC日志")
doc.add_paragraph("2. 检查JVM内存配置(Xmx/Xms)")
elif process_type == 'python':
doc.add_paragraph("1. 检查Python异常日志")
doc.add_paragraph("2. 检查Python依赖版本")
else:
doc.add_paragraph("1. 检查服务日志确认具体异常原因") doc.add_paragraph("1. 检查服务日志确认具体异常原因")
doc.add_paragraph("2. 尝试重启服务") doc.add_paragraph("2. 尝试重启服务")
doc.add_paragraph("3. 检查服务器资源使用情况(CPU、内存、磁盘)") doc.add_paragraph("3. 检查服务器资源使用情况(CPU、内存、磁盘)")
doc.add_paragraph("4. 联系相关开发人员排查问题") doc.add_paragraph("4. 联系相关开发人员排查问题")
# 保存文档 (仍使用英文文件名) # === 7. 保存报告文件 ===
error_log_dir = os.path.abspath(os.path.join( error_log_dir = os.path.abspath(os.path.join(
os.path.dirname(__file__), os.path.dirname(__file__),
'..', '..',
...@@ -321,8 +367,12 @@ class ServerProcessMonitor: ...@@ -321,8 +367,12 @@ class ServerProcessMonitor:
)) ))
os.makedirs(error_log_dir, exist_ok=True) os.makedirs(error_log_dir, exist_ok=True)
# 英文文件名 # 生成安全的文件名
filename = f"{server_name}_{process_name}_service_exception_{timestamp.replace(':', '-').replace(' ', '_')}.docx" safe_process_name = re.sub(r'[\\/*?:"<>|]', "_", process_name)
filename = (
f"{server_name}_{safe_process_name}_"
f"service_exception_{timestamp.replace(':', '-').replace(' ', '_')}.docx"
)
report_path = os.path.join(error_log_dir, filename) report_path = os.path.join(error_log_dir, filename)
doc.save(report_path) doc.save(report_path)
...@@ -332,13 +382,10 @@ class ServerProcessMonitor: ...@@ -332,13 +382,10 @@ class ServerProcessMonitor:
return relative_path return relative_path
except Exception as e: except Exception as e:
self.logger.error(f"{Fore.RED}生成Word报告失败: {str(e)}{Style.RESET_ALL}") self.logger.error(f"{Fore.RED}生成Word报告失败: {str(e)}{Style.RESET_ALL}", exc_info=True)
return None return None
def _get_remote_process_status(self, server_name, process_name): def _get_remote_process_status(self, server_name, process_name):
"""
获取远程服务器进程/容器状态 - 增强日志输出
"""
ssh = self.ssh_clients.get(server_name) ssh = self.ssh_clients.get(server_name)
if not ssh: if not ssh:
self.logger.warning(f"{Fore.YELLOW}未找到服务器 {server_name} 的SSH连接{Style.RESET_ALL}") self.logger.warning(f"{Fore.YELLOW}未找到服务器 {server_name} 的SSH连接{Style.RESET_ALL}")
...@@ -347,47 +394,65 @@ class ServerProcessMonitor: ...@@ -347,47 +394,65 @@ class ServerProcessMonitor:
try: try:
self.logger.info(f"正在检查进程: {process_name}...") self.logger.info(f"正在检查进程: {process_name}...")
# 首先检查是否是容器 # 1. 首先检查是否是容器
container_status = self._check_container_status(ssh, process_name) container_status = self._check_container_status(ssh, process_name)
if container_status: if container_status:
status_msg = "容器" if container_status['is_container'] else "进程" status_msg = "容器" if container_status['is_container'] else "进程"
self.logger.info(f"{status_msg} {process_name} 状态: {container_status['status']}") self.logger.info(f"{status_msg} {process_name} 状态: {container_status['status']}")
return container_status return container_status
# 如果不是容器,检查普通进程 # 2. 通用进程检测逻辑
stdin, stdout, stderr = ssh.exec_command(f"pgrep -f {process_name}") # 使用更精确的进程检测方式,避免误匹配
pids = stdout.read().decode().strip() stdin, stdout, stderr = ssh.exec_command(
f"ps aux | grep -v grep | grep -i '{process_name}'"
if pids: )
pid = pids.split('\n')[0]
stdin, stdout, stderr = ssh.exec_command(f"ps -p {pid} -o pid,stat,cmd")
process_info = stdout.read().decode().strip() process_info = stdout.read().decode().strip()
status_line = process_info.split('\n')[1] if len(process_info.split('\n')) > 1 else None if process_info:
if status_line: # 解析进程信息
status = status_line.split()[1] process_lines = process_info.split('\n')
cmd = ' '.join(status_line.split()[2:]) first_process = process_lines[0].split()
self.logger.info( pid = first_process[1]
f"{Fore.GREEN}进程运行中 - PID: {pid}, 状态: {status}, 命令: {cmd}{Style.RESET_ALL}") status = first_process[7]
else: cmd = ' '.join(first_process[10:])
self.logger.warning(f"{Fore.YELLOW}无法解析进程信息{Style.RESET_ALL}")
self.logger.info(f"{Fore.GREEN}进程运行中 - PID: {pid}, 状态: {status}, 命令: {cmd}{Style.RESET_ALL}")
return { return {
'is_container': False, 'is_container': False,
'running': True, 'running': True,
'pid': pid, 'pid': pid,
'status': status_line.split()[1] if status_line else 'UNKNOWN', 'status': status,
'command': status_line.split()[2:] if status_line else 'UNKNOWN' 'command': cmd,
'process_type': self._detect_process_type(cmd) # 新增进程类型检测
} }
self.logger.warning(f"{Fore.YELLOW}进程 {process_name} 未运行{Style.RESET_ALL}") self.logger.warning(f"{Fore.YELLOW}进程 {process_name} 未运行{Style.RESET_ALL}")
return {'is_container': False, 'running': False} return {
'is_container': False,
'running': False,
'process_type': 'unknown'
}
except Exception as e: except Exception as e:
self.logger.error( self.logger.error(
f"{Fore.RED}获取服务器 {server_name} 进程 {process_name} 状态失败: {str(e)}{Style.RESET_ALL}") f"{Fore.RED}获取服务器 {server_name} 进程 {process_name} 状态失败: {str(e)}{Style.RESET_ALL}")
return None return None
def _detect_process_type(self, cmd):
"""自动检测进程类型"""
if 'java' in cmd and '.jar' in cmd:
return 'java'
elif 'python' in cmd:
return 'python'
elif 'node' in cmd:
return 'nodejs'
elif 'nginx' in cmd:
return 'nginx'
elif 'mysql' in cmd:
return 'mysql'
else:
return 'other'
def check_all_servers(self): def check_all_servers(self):
"""检查所有服务器进程状态""" """检查所有服务器进程状态"""
current_status = { current_status = {
......
B{'pass_percent': '74.8%', 'fail_percent': '2.5%', 'exception_percent': '22.6%', 'blocking_rate': '0%'} B{'pass_percent': '74.8%', 'fail_percent': '2.5%', 'exception_percent': '22.6%', 'blocking_rate': '0%'}
...@@ -78,6 +78,8 @@ class LanzhouSinopecProject: ...@@ -78,6 +78,8 @@ class LanzhouSinopecProject:
SELENIUM_LOG_SCREEN(wd, "75") SELENIUM_LOG_SCREEN(wd, "75")
elif element_type == "login": elif element_type == "login":
# 退出系统登录
safe_click((By.XPATH, "//div[@class='quit']"), wd)
sleep(2) sleep(2)
INFO(f"开始登录,账号为:{element_value[0]},密码为:{element_value[1]}") INFO(f"开始登录,账号为:{element_value[0]},密码为:{element_value[1]}")
user_login(element_value[0],element_value[1]) user_login(element_value[0],element_value[1])
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论