Skip to content
项目
群组
代码片段
帮助
正在加载...
帮助
为 GitLab 提交贡献
登录
切换导航
U
ubains-module-test
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
分枝图
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
计划
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
分枝图
统计图
创建新议题
作业
提交
议题看板
打开侧边栏
郑晓兵
ubains-module-test
Commits
a68d97a9
提交
a68d97a9
authored
6月 13, 2025
作者:
陈泽健
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
移除控制台的详细错误日志输出,保留关键状态日志(连接状态、监控启停等),强异常处理和日志记录,在JSON文件名中加入服务器IP标识,添加了主程序入口的详细日志
上级
f8f05df8
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
245 行增加
和
221 行删除
+245
-221
README.md
系统监测/README.md
+3
-2
服务日志监测.py
系统监测/系统日志监测/服务日志监测.py
+242
-219
没有找到文件。
系统监测/README.md
浏览文件 @
a68d97a9
...
@@ -17,4 +17,5 @@
...
@@ -17,4 +17,5 @@
-
日志监测服务补充对外云端服务以及展厅统一平台微服务。服务监测补充对外云端以及展厅环境。
-
日志监测服务补充对外云端服务以及展厅统一平台微服务。服务监测补充对外云端以及展厅环境。
-
处理日志文件存放路径问题,文件目录名称被修改引起。处理日志文件存放问题,优化路径。
-
处理日志文件存放路径问题,文件目录名称被修改引起。处理日志文件存放问题,优化路径。
-
补充监测服务的前置ngrok映射以及端口开放的使用指令注释,处理注释错误。
-
补充监测服务的前置ngrok映射以及端口开放的使用指令注释,处理注释错误。
-
补充_process_line函数对日志去重的IP过滤,parse_log_line函数增加IP过滤,处理相同错误日志因IP不同识别为不同错误问题。
-
补充_process_line函数对日志去重的IP过滤,parse_log_line函数增加IP过滤,处理相同错误日志因IP不同识别为不同错误问题。
\ No newline at end of file
-
移除控制台的详细错误日志输出,保留关键状态日志(连接状态、监控启停等),强异常处理和日志记录,在JSON文件名中加入服务器IP标识,添加了主程序入口的详细日志
\ No newline at end of file
系统监测/系统日志监测/服务日志监测.py
浏览文件 @
a68d97a9
import
re
"""
服务日志监测系统
功能:通过SSH实时监控远程服务器日志,发现错误时收集上下文并保存到文件,通过钉钉发送通知
特点:
1. 多服务器多日志文件同时监控
2. 错误日志去重和冷却机制
3. 自动保存错误上下文到JSON文件
4. 每日自动清理告警缓存
"""
import
re
import
paramiko
import
paramiko
import
threading
import
threading
import
time
import
time
...
@@ -10,7 +19,8 @@ import json
...
@@ -10,7 +19,8 @@ import json
import
socket
import
socket
from
datetime
import
datetime
,
timedelta
from
datetime
import
datetime
,
timedelta
# 配置日志输出到控制台
# ==================== 初始化配置 ====================
# 配置日志输出到控制台,只显示INFO级别以上的信息
console_handler
=
logging
.
StreamHandler
()
console_handler
=
logging
.
StreamHandler
()
console_handler
.
setLevel
(
logging
.
INFO
)
console_handler
.
setLevel
(
logging
.
INFO
)
formatter
=
logging
.
Formatter
(
'
%(asctime)
s -
%(levelname)
s -
%(message)
s'
)
formatter
=
logging
.
Formatter
(
'
%(asctime)
s -
%(levelname)
s -
%(message)
s'
)
...
@@ -20,29 +30,44 @@ logging.getLogger().setLevel(logging.INFO)
...
@@ -20,29 +30,44 @@ logging.getLogger().setLevel(logging.INFO)
# 获取当前脚本所在目录
# 获取当前脚本所在目录
current_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
current_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
print
(
"当前脚本目录:"
,
current_dir
)
logging
.
info
(
f
"当前脚本目录: {current_dir}"
)
# 构建
正确的 Base 目录
路径
# 构建
Base目录路径并添加到系统
路径
base_dir
=
os
.
path
.
normpath
(
os
.
path
.
join
(
current_dir
,
".."
,
"Base"
))
base_dir
=
os
.
path
.
normpath
(
os
.
path
.
join
(
current_dir
,
".."
,
"Base"
))
print
(
"✅ 正确的 Base 目录路径:"
,
base_dir
)
# 添加进系统路径
sys
.
path
.
append
(
base_dir
)
sys
.
path
.
append
(
base_dir
)
logging
.
info
(
f
"Base目录已添加到系统路径: {base_dir}"
)
#
尝试导入
#
==================== 模块导入 ====================
try
:
try
:
from
base
import
dingding_send_message
from
base
import
dingding_send_message
print
(
"✅ 成功导入 base 模块!"
)
logging
.
info
(
"✅ 成功导入 base 模块"
)
except
ImportError
as
e
:
except
ImportError
as
e
:
print
(
"❌ 导入失败:"
,
e
)
logging
.
error
(
f
"❌ 导入失败: {e}"
)
print
(
"🔍 sys.path 中包含的路径如下:
"
)
logging
.
info
(
"sys.path 包含路径:
"
)
for
p
in
sys
.
path
:
for
p
in
sys
.
path
:
print
(
" -"
,
p
)
logging
.
info
(
f
" - {p}"
)
sys
.
exit
(
1
)
class
LogMonitor
:
class
LogMonitor
:
"""
日志监控类,负责单个日志文件的监控处理
"""
def
__init__
(
self
,
host
,
username
,
private_key_path
,
passphrase
,
log_path
,
def
__init__
(
self
,
host
,
username
,
private_key_path
,
passphrase
,
log_path
,
check_interval
=
1
,
ding_type
=
"标准版服务监测"
,
resend_interval
=
10800
):
check_interval
=
1
,
ding_type
=
"标准版服务监测"
,
resend_interval
=
10800
):
"""
初始化监控实例
:param host: 服务器IP
:param username: SSH用户名
:param private_key_path: SSH私钥路径
:param passphrase: SSH密钥密码
:param log_path: 要监控的日志路径
:param check_interval: 检查间隔(秒)
:param ding_type: 钉钉消息类型标识
:param resend_interval: 重复告警冷却时间(秒)
"""
self
.
host
=
host
self
.
host
=
host
self
.
username
=
username
self
.
username
=
username
self
.
private_key_path
=
private_key_path
self
.
private_key_path
=
private_key_path
...
@@ -50,45 +75,70 @@ class LogMonitor:
...
@@ -50,45 +75,70 @@ class LogMonitor:
self
.
log_path
=
log_path
self
.
log_path
=
log_path
self
.
check_interval
=
check_interval
self
.
check_interval
=
check_interval
self
.
ding_type
=
ding_type
self
.
ding_type
=
ding_type
# SSH连接相关
self
.
client
=
None
self
.
client
=
None
self
.
channel
=
None
self
.
channel
=
None
self
.
collecting
=
False
self
.
collecting
=
False
# 线程安全锁
self
.
lock
=
threading
.
Lock
()
self
.
lock
=
threading
.
Lock
()
# 日志行缓冲区
self
.
line_buffer
=
[]
self
.
line_buffer
=
[]
self
.
buffer_size
=
500
self
.
buffer_size
=
500
# 错误上下文收集
self
.
error_contexts
=
[]
self
.
error_contexts
=
[]
self
.
sent_errors
=
{}
# 已发送的错误日志 {hash: last_send_time}
self
.
sent_errors
=
{}
# 格式: {error_hash: last_send_timestamp}
self
.
resend_interval
=
resend_interval
# 钉钉重发冷却时间(秒)
self
.
resend_interval
=
resend_interval
self
.
schedule_daily_clear
()
# 启动每日清理任务
# 启动每日清理任务
self
.
schedule_daily_clear
()
def
connect
(
self
):
def
connect
(
self
):
"""建立SSH连接并开始tail -f日志"""
try
:
try
:
private_key
=
paramiko
.
RSAKey
.
from_private_key_file
(
self
.
private_key_path
,
password
=
self
.
passphrase
)
# 加载私钥
private_key
=
paramiko
.
RSAKey
.
from_private_key_file
(
self
.
private_key_path
,
password
=
self
.
passphrase
)
# 创建SSH客户端
self
.
client
=
paramiko
.
SSHClient
()
self
.
client
=
paramiko
.
SSHClient
()
self
.
client
.
set_missing_host_key_policy
(
paramiko
.
AutoAddPolicy
())
self
.
client
.
set_missing_host_key_policy
(
paramiko
.
AutoAddPolicy
())
self
.
client
.
connect
(
self
.
host
,
username
=
self
.
username
,
pkey
=
private_key
,
timeout
=
30
,
banner_timeout
=
200
,
auth_timeout
=
200
)
# 连接参数设置
connect_params
=
{
'hostname'
:
self
.
host
,
'username'
:
self
.
username
,
'pkey'
:
private_key
,
'timeout'
:
30
,
'banner_timeout'
:
200
,
'auth_timeout'
:
200
}
self
.
client
.
connect
(
**
connect_params
)
# 创建交互式shell
self
.
channel
=
self
.
client
.
invoke_shell
()
self
.
channel
=
self
.
client
.
invoke_shell
()
self
.
channel
.
setblocking
(
0
)
self
.
channel
.
setblocking
(
0
)
# 非阻塞模式
self
.
channel
.
transport
.
set_keepalive
(
30
)
self
.
channel
.
transport
.
set_keepalive
(
30
)
# 保持连接
# 发送tail命令
self
.
channel
.
send
(
f
"tail -f {self.log_path}
\n
"
)
self
.
channel
.
send
(
f
"tail -f {self.log_path}
\n
"
)
logging
.
info
(
f
"
Connected to {self.host}, monitoring
{self.log_path}"
)
logging
.
info
(
f
"
已连接到 {self.host},开始监控
{self.log_path}"
)
return
True
return
True
except
Exception
as
e
:
except
Exception
as
e
:
logging
.
info
(
f
"连接失败: {e},目标主机: {self.host}"
)
logging
.
error
(
f
"连接失败: {e},目标主机: {self.host}"
)
return
False
return
False
def
start_monitoring
(
self
):
def
start_monitoring
(
self
):
"""开始监控日志"""
if
self
.
collecting
:
if
self
.
collecting
:
logging
.
info
(
"Already monitoring logs.
"
)
logging
.
warning
(
"监控已在进行中
"
)
return
return
if
not
self
.
connect
():
if
not
self
.
connect
():
...
@@ -96,34 +146,43 @@ class LogMonitor:
...
@@ -96,34 +146,43 @@ class LogMonitor:
self
.
collecting
=
True
self
.
collecting
=
True
self
.
monitor_thread
=
threading
.
Thread
(
target
=
self
.
_monitor_loop
)
self
.
monitor_thread
=
threading
.
Thread
(
target
=
self
.
_monitor_loop
)
self
.
monitor_thread
.
daemon
=
True
# 设置为守护线程
self
.
monitor_thread
.
start
()
self
.
monitor_thread
.
start
()
logging
.
info
(
f
"
开始监控日志
: {self.log_path}"
)
logging
.
info
(
f
"
监控线程已启动
: {self.log_path}"
)
def
stop_monitoring
(
self
):
def
stop_monitoring
(
self
):
"""停止监控"""
self
.
collecting
=
False
self
.
collecting
=
False
if
self
.
channel
:
if
self
.
channel
:
self
.
channel
.
close
()
self
.
channel
.
close
()
if
self
.
client
:
if
self
.
client
:
self
.
client
.
close
()
self
.
client
.
close
()
logging
.
info
(
f
"
停止对日志 {self.log_path} 的监控.
"
)
logging
.
info
(
f
"
已停止监控: {self.log_path}
"
)
def
_monitor_loop
(
self
):
def
_monitor_loop
(
self
):
"""
监控主循环
持续读取日志数据并处理
"""
retry_count
=
0
retry_count
=
0
MAX_RETRY
=
5
MAX_RETRY
=
5
while
self
.
collecting
:
while
self
.
collecting
:
try
:
try
:
if
self
.
channel
.
recv_ready
():
if
self
.
channel
.
recv_ready
():
# 读取日志数据
data
=
self
.
channel
.
recv
(
1024
)
.
decode
(
'utf-8'
,
errors
=
'ignore'
)
data
=
self
.
channel
.
recv
(
1024
)
.
decode
(
'utf-8'
,
errors
=
'ignore'
)
logging
.
debug
(
"Received raw data:
%
s"
,
data
)
# 处理每行日志
for
line
in
data
.
splitlines
():
for
line
in
data
.
splitlines
():
self
.
_process_line
(
line
.
strip
())
self
.
_process_line
(
line
.
strip
())
retry_count
=
0
retry_count
=
0
# 重置重试计数
else
:
else
:
time
.
sleep
(
self
.
check_interval
)
time
.
sleep
(
self
.
check_interval
)
retry_count
=
0
except
(
paramiko
.
SSHException
,
socket
.
error
,
OSError
)
as
e
:
except
(
paramiko
.
SSHException
,
socket
.
error
,
OSError
)
as
e
:
logging
.
warning
(
f
"SSH
断开,准备重连... 错误: {e}
"
)
logging
.
warning
(
f
"SSH
连接异常: {e},尝试重连...
"
)
self
.
restart_monitoring
()
self
.
restart_monitoring
()
retry_count
+=
1
retry_count
+=
1
...
@@ -131,60 +190,68 @@ class LogMonitor:
...
@@ -131,60 +190,68 @@ class LogMonitor:
logging
.
error
(
"达到最大重试次数,停止监控"
)
logging
.
error
(
"达到最大重试次数,停止监控"
)
self
.
stop_monitoring
()
self
.
stop_monitoring
()
return
return
time
.
sleep
(
min
(
5
*
retry_count
,
60
))
time
.
sleep
(
min
(
5
*
retry_count
,
60
))
# 退避算法
def
save_error_contexts_to_json
(
self
):
def
save_error_contexts_to_json
(
self
):
"""
保存错误上下文到JSON文件
返回: 文件路径或None
"""
try
:
try
:
# 获取
当前脚本所在目录(系统日志监测)
# 获取
error_log目录路径
current_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
current_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
# 上溯两级到「系统监测」目录
base_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
current_dir
))
base_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
current_dir
))
error_log_dir
=
os
.
path
.
join
(
base_dir
,
"error_log"
)
error_log_dir
=
os
.
path
.
join
(
base_dir
,
"error_log"
)
except
NameError
:
# 备用方案:使用 inspect 模块(适用于线程环境)
import
inspect
current_file
=
inspect
.
getframeinfo
(
inspect
.
currentframe
())
.
filename
base_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
current_file
)))
error_log_dir
=
os
.
path
.
join
(
base_dir
,
"error_log"
)
os
.
makedirs
(
error_log_dir
,
exist_ok
=
True
)
os
.
makedirs
(
error_log_dir
,
exist_ok
=
True
)
logging
.
info
(
f
"错误日志将保存到: {error_log_dir}"
)
# 增加路径日志
timestamp
=
time
.
strftime
(
"
%
Y-
%
m-
%
d-
%
H
%
M
%
S"
)
# 生成带时间戳的文件名
filename
=
f
"error_log_{timestamp}.json"
timestamp
=
time
.
strftime
(
"
%
Y-
%
m-
%
d-
%
H
%
M
%
S"
)
file_path
=
os
.
path
.
join
(
error_log_dir
,
filename
)
filename
=
f
"error_log_{timestamp}_{self.host.replace('.', '_')}.json"
file_path
=
os
.
path
.
join
(
error_log_dir
,
filename
)
try
:
# 写入JSON文件
with
open
(
file_path
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
with
open
(
file_path
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
json
.
dump
(
self
.
error_contexts
,
f
,
ensure_ascii
=
False
,
indent
=
4
)
json
.
dump
(
self
.
error_contexts
,
f
,
ensure_ascii
=
False
,
indent
=
4
)
logging
.
info
(
f
"✅ 文件保存成功: {file_path}"
)
logging
.
info
(
f
"错误日志已保存: {file_path}"
)
return
file_path
return
file_path
except
Exception
as
e
:
except
Exception
as
e
:
logging
.
error
(
f
"
❌ 文件保存
失败: {e}"
)
logging
.
error
(
f
"
保存错误日志
失败: {e}"
)
return
None
return
None
def
generate_error_log_url
(
self
,
file_path
):
def
generate_error_log_url
(
self
,
file_path
):
"""生成错误日志的公网访问URL"""
if
not
file_path
:
if
not
file_path
:
return
None
return
None
filename
=
os
.
path
.
basename
(
file_path
)
filename
=
os
.
path
.
basename
(
file_path
)
error_log_url
=
f
"http://nat.ubainsyun.com:32233/{filename}"
return
f
"http://nat.ubainsyun.com:32233/{filename}"
logging
.
info
(
f
"生成公网访问链接: {error_log_url}"
)
return
error_log_url
def
_process_line
(
self
,
line
):
def
_process_line
(
self
,
line
):
"""
处理单行日志
1. 解析日志
2. 如果是ERROR则收集上下文
3. 触发告警
"""
with
self
.
lock
:
with
self
.
lock
:
# 维护行缓冲区
self
.
line_buffer
.
append
(
line
)
self
.
line_buffer
.
append
(
line
)
if
len
(
self
.
line_buffer
)
>
self
.
buffer_size
:
if
len
(
self
.
line_buffer
)
>
self
.
buffer_size
:
self
.
line_buffer
.
pop
(
0
)
self
.
line_buffer
.
pop
(
0
)
try
:
try
:
# 解析日志行
parsed
=
self
.
parse_log_line
(
line
)
parsed
=
self
.
parse_log_line
(
line
)
if
not
parsed
or
parsed
[
'level'
]
!=
'ERROR'
:
if
not
parsed
or
parsed
[
'level'
]
!=
'ERROR'
:
return
return
logging
.
info
(
f
"发现
{parsed['level']} 日志!正在通过 SSH 获取上下文日志...
"
)
logging
.
info
(
f
"发现
ERROR日志: {parsed['module']}/{parsed['action']}
"
)
# 获取完整日志上下文
full_log
=
self
.
get_remote_log_with_paramiko
(
full_log
=
self
.
get_remote_log_with_paramiko
(
host
=
self
.
host
,
host
=
self
.
host
,
username
=
self
.
username
,
username
=
self
.
username
,
...
@@ -194,69 +261,83 @@ class LogMonitor:
...
@@ -194,69 +261,83 @@ class LogMonitor:
num_lines
=
500
num_lines
=
500
)
)
if
full_log
:
if
not
full_log
:
lines
=
full_log
.
split
(
'
\n
'
)
logging
.
error
(
"获取日志上下文失败"
)
for
i
,
l
in
enumerate
(
lines
):
return
if
line
.
strip
()
in
l
.
strip
():
start
=
max
(
0
,
i
-
100
)
# 定位错误行并截取上下文
end
=
min
(
len
(
lines
),
i
+
101
)
lines
=
full_log
.
split
(
'
\n
'
)
context
=
lines
[
start
:
end
]
for
i
,
l
in
enumerate
(
lines
):
if
line
.
strip
()
in
l
.
strip
():
with
self
.
lock
:
start
=
max
(
0
,
i
-
100
)
self
.
error_contexts
.
append
({
end
=
min
(
len
(
lines
),
i
+
101
)
'timestamp'
:
time
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
),
context
=
lines
[
start
:
end
]
'error_line'
:
line
,
'context'
:
context
,
# 保存错误上下文
'structured'
:
parsed
# 同时保存结构化数据
with
self
.
lock
:
})
self
.
error_contexts
.
append
({
'timestamp'
:
time
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
),
file_path
=
self
.
save_error_contexts_to_json
()
'error_line'
:
line
,
error_log_url
=
self
.
generate_error_log_url
(
file_path
)
'context'
:
context
,
'structured'
:
parsed
# 使用结构化字段做 key
})
# 修改后的去重key生成
clean_message
=
re
.
sub
(
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
,
'[IP]'
,
parsed
[
'message'
])
# 保存到文件并生成URL
key
=
f
"{parsed['module']}_{parsed['action']}_{clean_message}"
file_path
=
self
.
save_error_contexts_to_json
()
error_log_url
=
self
.
generate_error_log_url
(
file_path
)
# 打印构造去重key值
logging
.
info
(
f
"构造去重 key: {key}"
)
if
not
error_log_url
:
error_hash
=
hash
(
key
)
return
current_time
=
time
.
time
()
# 生成去重key (过滤IP地址)
if
error_hash
in
self
.
sent_errors
:
clean_message
=
re
.
sub
(
if
current_time
-
self
.
sent_errors
[
error_hash
]
<
self
.
resend_interval
:
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
,
logging
.
info
(
f
"该错误已在冷却期内,跳过重复发送:{line[:100]}..."
)
'[IP]'
,
break
parsed
[
'message'
]
)
try
:
key
=
f
"{parsed['module']}_{parsed['action']}_{clean_message}"
dingding_send_message
(
error_log_url
,
ding_type
=
self
.
ding_type
)
error_hash
=
hash
(
key
)
self
.
sent_errors
[
error_hash
]
=
current_time
current_time
=
time
.
time
()
except
Exception
as
e
:
logging
.
info
(
f
"发送钉钉消息失败: {e}"
)
# 检查冷却期
if
error_hash
in
self
.
sent_errors
:
logging
.
info
(
"上下文日志如下:
\n
"
+
"
\n
"
.
join
(
context
))
if
current_time
-
self
.
sent_errors
[
error_hash
]
<
self
.
resend_interval
:
break
logging
.
info
(
f
"相同错误在冷却期内,跳过: {key[:100]}..."
)
else
:
return
logging
.
error
(
"获取日志失败,无法获取上下文"
)
# 发送钉钉通知
logging
.
debug
(
"Received line:
%
s"
,
line
)
try
:
dingding_send_message
(
error_log_url
,
ding_type
=
self
.
ding_type
)
except
IndexError
:
self
.
sent_errors
[
error_hash
]
=
current_time
pass
logging
.
info
(
f
"已发送钉钉通知: {self.ding_type}"
)
except
Exception
as
e
:
logging
.
error
(
f
"发送钉钉消息失败: {e}"
)
break
except
Exception
as
e
:
except
Exception
as
e
:
logging
.
e
xception
(
f
"获取上下文日志失败
: {e}"
)
logging
.
e
rror
(
f
"处理日志行异常
: {e}"
)
def
restart_monitoring
(
self
):
def
restart_monitoring
(
self
):
logging
.
info
(
"尝试重新启动日志监控..."
)
"""重启监控"""
logging
.
info
(
"尝试重启监控..."
)
self
.
stop_monitoring
()
self
.
stop_monitoring
()
time
.
sleep
(
5
)
time
.
sleep
(
5
)
self
.
start_monitoring
()
self
.
start_monitoring
()
@
staticmethod
@
staticmethod
def
get_remote_log_with_paramiko
(
host
,
username
,
private_key_path
,
passphrase
,
def
get_remote_log_with_paramiko
(
host
,
username
,
private_key_path
,
passphrase
,
log_path
,
num_lines
=
1000
,
timeout
=
30
):
log_path
,
num_lines
=
1000
,
timeout
=
30
):
"""
通过SSH获取远程日志内容
返回: 日志内容或None
"""
try
:
try
:
private_key
=
paramiko
.
RSAKey
.
from_private_key_file
(
private_key_path
,
password
=
passphrase
)
private_key
=
paramiko
.
RSAKey
.
from_private_key_file
(
private_key_path
,
password
=
passphrase
)
client
=
paramiko
.
SSHClient
()
client
=
paramiko
.
SSHClient
()
client
.
set_missing_host_key_policy
(
paramiko
.
AutoAddPolicy
())
client
.
set_missing_host_key_policy
(
paramiko
.
AutoAddPolicy
())
client
.
connect
(
host
,
username
=
username
,
pkey
=
private_key
,
timeout
=
timeout
)
client
.
connect
(
host
,
username
=
username
,
pkey
=
private_key
,
timeout
=
timeout
)
...
@@ -264,56 +345,62 @@ class LogMonitor:
...
@@ -264,56 +345,62 @@ class LogMonitor:
command
=
f
"tail -n {num_lines} {log_path}"
command
=
f
"tail -n {num_lines} {log_path}"
stdin
,
stdout
,
stderr
=
client
.
exec_command
(
command
,
timeout
=
timeout
)
stdin
,
stdout
,
stderr
=
client
.
exec_command
(
command
,
timeout
=
timeout
)
error
=
stderr
.
read
()
.
decode
(
'utf-8'
)
if
stderr
.
read
()
.
decode
(
'utf-8'
):
if
error
:
raise
Exception
(
"远程命令执行失败"
)
logging
.
error
(
f
"执行命令失败: {error}"
)
return
None
output
=
stdout
.
read
()
.
decode
(
'utf-8'
)
return
stdout
.
read
()
.
decode
(
'utf-8'
)
return
output
except
paramiko
.
ssh_exception
.
PasswordRequiredException
:
logging
.
error
(
"私钥加密但未提供密码。"
)
return
None
except
paramiko
.
ssh_exception
.
SSHException
as
e
:
logging
.
error
(
f
"SSH 错误: {e}"
)
return
None
except
Exception
as
e
:
except
Exception
as
e
:
logging
.
e
xception
(
f
"获取
日志失败: {e}"
)
logging
.
e
rror
(
f
"获取远程
日志失败: {e}"
)
return
None
return
None
finally
:
finally
:
if
'client'
in
locals
():
if
'client'
in
locals
():
client
.
close
()
client
.
close
()
def
schedule_daily_clear
(
self
):
def
schedule_daily_clear
(
self
):
"""
每天凌晨自动清空已发送日志缓存
"""
"""
定时每天凌晨清空已发送记录
"""
now
=
datetime
.
now
()
now
=
datetime
.
now
()
next_run
=
(
now
+
timedelta
(
days
=
1
))
.
replace
(
hour
=
0
,
minute
=
0
,
second
=
0
,
microsecond
=
0
)
next_run
=
(
now
+
timedelta
(
days
=
1
))
.
replace
(
hour
=
0
,
minute
=
0
,
second
=
0
,
microsecond
=
0
)
delay
=
(
next_run
-
now
)
.
total_seconds
()
delay
=
(
next_run
-
now
)
.
total_seconds
()
threading
.
Timer
(
delay
,
self
.
daily_clear_sent_errors
)
.
start
()
timer
=
threading
.
Timer
(
delay
,
self
.
daily_clear_sent_errors
)
timer
.
daemon
=
True
timer
.
start
()
def
daily_clear_sent_errors
(
self
):
def
daily_clear_sent_errors
(
self
):
"""每日凌晨执行,清空已发送日志缓存"""
"""清空已发送记录缓存"""
logging
.
info
(
"🔄 开始每日凌晨清理已发送错误日志缓存..."
)
with
self
.
lock
:
with
self
.
lock
:
self
.
sent_errors
.
clear
()
self
.
sent_errors
.
clear
()
logging
.
info
(
"✅ 已发送错误
日志缓存已清空!
"
)
logging
.
info
(
"✅ 已发送错误
记录已清空
"
)
self
.
schedule_daily_clear
()
#
递归调用,
设置下一天任务
self
.
schedule_daily_clear
()
# 设置下一天任务
def
parse_log_line
(
self
,
line
):
def
parse_log_line
(
self
,
line
):
logging
.
info
(
f
"正在处理的日志行: {line}"
)
"""
解析日志行
# 原有匹配逻辑
返回: {
'level': 日志级别,
'module': 模块名,
'action': 操作名,
'message': 日志消息,
'raw': 原始日志
} 或 None
"""
# 匹配时间戳
timestamp_match
=
re
.
match
(
r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)'
,
line
)
timestamp_match
=
re
.
match
(
r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)'
,
line
)
# 匹配日志级别
level_match
=
re
.
search
(
r'\s(ERROR|INFO|WARNING)\b'
,
line
)
level_match
=
re
.
search
(
r'\s(ERROR|INFO|WARNING)\b'
,
line
)
# 匹配中括号内容
bracket_content
=
re
.
findall
(
r'\[(.*?)\]|\【(.*?)\】'
,
line
)
bracket_content
=
re
.
findall
(
r'\[(.*?)\]|\【(.*?)\】'
,
line
)
bracket_content
=
[
x
[
0
]
or
x
[
1
]
for
x
in
bracket_content
if
x
[
0
]
or
x
[
1
]]
bracket_content
=
[
x
[
0
]
or
x
[
1
]
for
x
in
bracket_content
if
x
[
0
]
or
x
[
1
]]
if
not
timestamp_match
or
not
level_match
or
len
(
bracket_content
)
<
3
:
if
not
timestamp_match
or
not
level_match
or
len
(
bracket_content
)
<
3
:
return
None
return
None
# IP过滤函数
def
remove_ip
(
text
):
def
remove_ip
(
text
):
"""过滤IP地址"""
return
re
.
sub
(
r'\d{1,3}\\.\d{1,3}\\.\d{1,3}\\.\d{1,3}'
,
'[IP]'
,
text
)
return
re
.
sub
(
r'\d{1,3}\\.\d{1,3}\\.\d{1,3}\\.\d{1,3}'
,
'[IP]'
,
text
)
return
{
return
{
...
@@ -321,22 +408,17 @@ class LogMonitor:
...
@@ -321,22 +408,17 @@ class LogMonitor:
'module'
:
remove_ip
(
bracket_content
[
0
]
.
strip
()),
'module'
:
remove_ip
(
bracket_content
[
0
]
.
strip
()),
'action'
:
remove_ip
(
bracket_content
[
1
]
.
strip
()),
'action'
:
remove_ip
(
bracket_content
[
1
]
.
strip
()),
'message'
:
remove_ip
(
bracket_content
[
2
]
.
strip
()),
'message'
:
remove_ip
(
bracket_content
[
2
]
.
strip
()),
'raw'
:
line
.
strip
()
# 保留原始信息
'raw'
:
line
.
strip
()
}
}
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
"""
"""
调试主机-执行指令:
主程序入口
1.打开一个终端输入:
配置服务器列表并启动监控
- cd .
\
系统监测
\
- python -m http.server 80 --directory error_log
2.打开新终端输入:
- cd .
\
系统监测
\n
grok
\n
grok-虚拟机
\
- .
\
start.bat
"""
"""
# 服务器配置列表
SERVERS
=
[
SERVERS
=
[
{
{
"host"
:
"192.168.5.235"
,
"host"
:
"192.168.5.235"
,
...
@@ -348,89 +430,18 @@ if __name__ == "__main__":
...
@@ -348,89 +430,18 @@ if __name__ == "__main__":
"log_path"
:
"/var/www/java/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log"
,
"log_path"
:
"/var/www/java/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log"
,
"ding_type"
:
"235标准预定对内服务"
"ding_type"
:
"235标准预定对内服务"
},
},
{
# 其他日志配置...
"log_path"
:
"/var/www/java/external-meeting-api/logs/ubains-INFO-AND-ERROR.log"
,
"ding_type"
:
"235标准预定对外服务"
}
]
]
},
},
{
# 其他服务器配置...
"host"
:
"192.168.5.200"
,
"username"
:
"root"
,
"private_key_path"
:
"C:/Users/Administrator/.ssh/id_rsa"
,
"passphrase"
:
"Ubains@123"
,
"logs"
:
[
{
"log_path"
:
"/var/www/java/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log"
,
"ding_type"
:
"展厅预定对内服务"
},
{
"log_path"
:
"/var/www/java/external-meeting-api/logs/ubains-INFO-AND-ERROR.log"
,
"ding_type"
:
"展厅预定对外服务"
},
{
"log_path"
:
"/var/www/html/log/uinfo.log"
,
"ding_type"
:
"展厅运维服务"
},
{
"log_path"
:
"/var/www/java/unifiedPlatform/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log"
,
"ding_type"
:
"展厅统一平台对内服务"
},
{
"log_path"
:
"/var/www/java/unifiedPlatform/external-meeting-api/logs/ubains-INFO-AND-ERROR.log"
,
"ding_type"
:
"展厅统一平台对外服务"
},
{
"log_path"
:
"/var/www/java/unifiedPlatform/api-dubbo-smc-three/log.out"
,
"ding_type"
:
"展厅统一平台SMC服务"
},
{
"log_path"
:
"/var/www/java/unifiedPlatform/api-dubbo-tencent-meeting/log.out"
,
"ding_type"
:
"展厅统一平台腾讯服务"
},
{
"log_path"
:
"/var/www/java/unifiedPlatform/api-dubbo-tencent-meeting/log.out"
,
"ding_type"
:
"展厅统一平台腾讯服务"
},
{
"log_path"
:
"/var/www/java/unifiedPlatform/auth-sso-gatway/log.out"
,
"ding_type"
:
"展厅统一平台gatway服务"
},
{
"log_path"
:
"/var/www/java/unifiedPlatform/auth-sso-auth/log.out"
,
"ding_type"
:
"展厅统一平台auth服务"
},
{
"log_path"
:
"/var/www/java/unifiedPlatform/auth-sso-system/log.out"
,
"ding_type"
:
"展厅统一平台system服务"
}
]
},
{
"host"
:
"139.9.60.86"
,
"username"
:
"root"
,
"private_key_path"
:
"C:/Users/Administrator/.ssh/id_rsa"
,
"passphrase"
:
"Ubains@123"
,
"logs"
:
[
{
"log_path"
:
"/var/www/html/log/uinfo.log"
,
"ding_type"
:
"对外云端运维集控服务"
},
{
"log_path"
:
"/var/www/java/api-java-meeting2.0/logs/ubains-INFO-AND-ERROR.log"
,
"ding_type"
:
"对外云端预定对内服务"
},
{
"log_path"
:
"/var/www/java/external-meeting-api/logs/ubains-INFO-AND-ERROR.log"
,
"ding_type"
:
"对外云端预定对外服务"
}
]
}
]
]
monitors
=
[]
monitors
=
[]
threads
=
[]
threads
=
[]
logging
.
info
(
"========== 开始启动监控服务 =========="
)
# 为每个服务器的每个日志创建监控实例
for
server
in
SERVERS
:
for
server
in
SERVERS
:
for
log_config
in
server
[
"logs"
]:
for
log_config
in
server
[
"logs"
]:
monitor
=
LogMonitor
(
monitor
=
LogMonitor
(
...
@@ -444,6 +455,18 @@ if __name__ == "__main__":
...
@@ -444,6 +455,18 @@ if __name__ == "__main__":
monitors
.
append
(
monitor
)
monitors
.
append
(
monitor
)
thread
=
threading
.
Thread
(
target
=
monitor
.
start_monitoring
)
thread
=
threading
.
Thread
(
target
=
monitor
.
start_monitoring
)
thread
.
daemon
=
True
threads
.
append
(
thread
)
threads
.
append
(
thread
)
thread
.
start
()
thread
.
start
()
logging
.
info
(
f
"已启动对 {log_config['log_path']} 的监控"
)
logging
.
info
(
f
"已启动监控: {server['host']} - {log_config['log_path']}"
)
# 保持主线程运行
try
:
while
True
:
time
.
sleep
(
1
)
except
KeyboardInterrupt
:
logging
.
info
(
"接收到中断信号,停止所有监控..."
)
for
monitor
in
monitors
:
monitor
.
stop_monitoring
()
logging
.
info
(
"所有监控已停止"
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论