提交 6bdd0ea7 authored 作者: 陈泽健's avatar 陈泽健

修改_process_line方法中的去重逻辑,确保检测到冷却期内的错误时立即返回。

上级 a68d97a9
......@@ -18,4 +18,5 @@
- 处理日志文件存放路径问题,文件目录名称被修改引起。处理日志文件存放问题,优化路径。
- 补充监测服务的前置ngrok映射以及端口开放的使用指令注释,处理注释错误。
- 补充_process_line函数对日志去重的IP过滤,parse_log_line函数增加IP过滤,处理相同错误日志因IP不同识别为不同错误问题。
- 移除控制台的详细错误日志输出,保留关键状态日志(连接状态、监控启停等),强异常处理和日志记录,在JSON文件名中加入服务器IP标识,添加了主程序入口的详细日志
\ No newline at end of file
- 移除控制台的详细错误日志输出,保留关键状态日志(连接状态、监控启停等),强异常处理和日志记录,在JSON文件名中加入服务器IP标识,添加了主程序入口的详细日志
- 修改_process_line方法中的去重逻辑,确保检测到冷却期内的错误时立即返回。
\ No newline at end of file
......@@ -233,12 +233,15 @@ class LogMonitor:
def _process_line(self, line):
"""
处理单行日志
1. 解析日志
2. 如果是ERROR则收集上下文
3. 触发告警
1. 维护行缓冲区
2. 解析日志行
3. 检查是否ERROR日志
4. 生成去重key并检查冷却期
5. 获取上下文并保存到文件
6. 发送钉钉通知(非冷却期内)
"""
with self.lock:
# 维护行缓冲区
# 维护行缓冲区(保留最近500行)
self.line_buffer.append(line)
if len(self.line_buffer) > self.buffer_size:
self.line_buffer.pop(0)
......@@ -249,9 +252,33 @@ class LogMonitor:
if not parsed or parsed['level'] != 'ERROR':
return
logging.info(f"发现ERROR日志: {parsed['module']}/{parsed['action']}")
# 生成去重key(过滤动态内容)
clean_message = re.sub(
r'\d{1,3}\\.\d{1,3}\\.\d{1,3}\\.\d{1,3}',
'[IP]',
parsed['message']
)
clean_message = re.sub(r'\d+', '[NUM]', clean_message) # 过滤所有数字
key = f"{parsed['module']}|{parsed['action']}|{clean_message}"
error_hash = hash(key)
current_time = time.time()
# 检查冷却期(必须先于获取上下文)
if error_hash in self.sent_errors:
time_diff = current_time - self.sent_errors[error_hash]
if time_diff < self.resend_interval:
logging.info(
f"冷却期内相同错误 [{parsed['module']}/{parsed['action']}] "
f"上次发送: {time_diff:.1f}/{self.resend_interval}秒前"
)
return # 关键点:立即返回不再处理
logging.info(
f"发现新ERROR日志 [{parsed['module']}/{parsed['action']}] "
f"消息: {clean_message[:100]}..."
)
# 获取完整日志上下文
# 获取日志上下文(只对新错误执行)
full_log = self.get_remote_log_with_paramiko(
host=self.host,
username=self.username,
......@@ -269,54 +296,38 @@ class LogMonitor:
lines = full_log.split('\n')
for i, l in enumerate(lines):
if line.strip() in l.strip():
start = max(0, i - 100)
end = min(len(lines), i + 101)
context = lines[start:end]
context = lines[max(0, i - 100):min(len(lines), i + 101)]
# 保存错误上下文
with self.lock:
self.error_contexts.append({
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'host': self.host,
'log_path': self.log_path,
'error_line': line,
'context': context,
'fingerprint': key, # 记录错误指纹
'structured': parsed
})
# 保存到文件并生成URL
# 保存到文件
file_path = self.save_error_contexts_to_json()
error_log_url = self.generate_error_log_url(file_path)
if not error_log_url:
if not file_path:
return
# 生成去重key (过滤IP地址)
clean_message = re.sub(
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}',
'[IP]',
parsed['message']
)
key = f"{parsed['module']}_{parsed['action']}_{clean_message}"
error_hash = hash(key)
current_time = time.time()
# 检查冷却期
if error_hash in self.sent_errors:
if current_time - self.sent_errors[error_hash] < self.resend_interval:
logging.info(f"相同错误在冷却期内,跳过: {key[:100]}...")
return
# 发送钉钉通知
try:
dingding_send_message(error_log_url, ding_type=self.ding_type)
self.sent_errors[error_hash] = current_time
logging.info(f"已发送钉钉通知: {self.ding_type}")
except Exception as e:
logging.error(f"发送钉钉消息失败: {e}")
error_log_url = self.generate_error_log_url(file_path)
if error_log_url:
try:
dingding_send_message(error_log_url, ding_type=self.ding_type)
self.sent_errors[error_hash] = current_time
logging.info(f"钉钉通知发送成功 [{self.ding_type}]")
except Exception as e:
logging.error(f"钉钉发送失败: {str(e)}")
break
break # 找到错误行后退出循环
except Exception as e:
logging.error(f"处理日志行异常: {e}")
logging.error(f"处理日志行异常: {str(e)}", exc_info=True)
def restart_monitoring(self):
"""重启监控"""
......@@ -430,10 +441,84 @@ if __name__ == "__main__":
"log_path": "/var/www/java/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log",
"ding_type": "235标准预定对内服务"
},
# 其他日志配置...
{
"log_path": "/var/www/java/external-meeting-api/logs/ubains-INFO-AND-ERROR.log",
"ding_type": "235标准预定对外服务"
}
]
},
# 其他服务器配置...
{
"host": "192.168.5.200",
"username": "root",
"private_key_path": "C:/Users/Administrator/.ssh/id_rsa",
"passphrase": "Ubains@123",
"logs": [
{
"log_path": "/var/www/java/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log",
"ding_type": "展厅预定对内服务"
},
{
"log_path": "/var/www/java/external-meeting-api/logs/ubains-INFO-AND-ERROR.log",
"ding_type": "展厅预定对外服务"
},
{
"log_path": "/var/www/html/log/uinfo.log",
"ding_type": "展厅运维服务"
},
{
"log_path": "/var/www/java/unifiedPlatform/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log",
"ding_type": "展厅统一平台对内服务"
},
{
"log_path": "/var/www/java/unifiedPlatform/external-meeting-api/logs/ubains-INFO-AND-ERROR.log",
"ding_type": "展厅统一平台对外服务"
},
{
"log_path": "/var/www/java/unifiedPlatform/api-dubbo-smc-three/log.out",
"ding_type": "展厅统一平台SMC服务"
},
{
"log_path": "/var/www/java/unifiedPlatform/api-dubbo-tencent-meeting/log.out",
"ding_type": "展厅统一平台腾讯服务"
},
{
"log_path": "/var/www/java/unifiedPlatform/api-dubbo-tencent-meeting/log.out",
"ding_type": "展厅统一平台腾讯服务"
},
{
"log_path": "/var/www/java/unifiedPlatform/auth-sso-gatway/log.out",
"ding_type": "展厅统一平台gatway服务"
},
{
"log_path": "/var/www/java/unifiedPlatform/auth-sso-auth/log.out",
"ding_type": "展厅统一平台auth服务"
},
{
"log_path": "/var/www/java/unifiedPlatform/auth-sso-system/log.out",
"ding_type": "展厅统一平台system服务"
}
]
},
{
"host": "139.9.60.86",
"username": "root",
"private_key_path": "C:/Users/Administrator/.ssh/id_rsa",
"passphrase": "Ubains@123",
"logs": [
{
"log_path": "/var/www/html/log/uinfo.log",
"ding_type": "对外云端运维集控服务"
},
{
"log_path": "/var/www/java/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log",
"ding_type": "对外云端预定对内服务"
},
{
"log_path": "/var/www/java/external-meeting-api/logs/ubains-INFO-AND-ERROR.log",
"ding_type": "对外云端预定对外服务"
}
]
}
]
monitors = []
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论