import psutil
import time
import os
import sys
import paramiko
from datetime import datetime
import logging
import json
import urllib
from urllib.parse import urlencode
import hmac
import hashlib
import base64
import requests
import colorama
from colorama import Fore, Back, Style
from docx import Document
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.enum.table import WD_TABLE_ALIGNMENT

# 初始化颜色输出
colorama.init()


class ServerProcessMonitor:
    def __init__(self, servers_config, check_interval=60):
        """
        服务器进程监测初始化

        参数:
            servers_config: 服务器配置列表,每个配置项为字典,包含:
                - name: 服务器名称
                - host: IP或域名
                - port: SSH端口(默认22)
                - username: SSH用户名
                - password: SSH密码(建议使用密钥认证)
                - private_key_path: SSH私钥路径
                - passphrase: 私钥密码
                - process_names: 要监控的进程列表(支持容器名和普通进程名)
                - container_runtime: 容器运行时('docker'或'podman',默认'docker')

            check_interval: 检查间隔时间(秒)
        """
        self.servers_config = servers_config
        self.check_interval = check_interval
        self.ssh_clients = {}  # 存储SSH连接对象的字典
        self.status_log = []  # 状态历史记录

        # 初始化日志系统
        self.logger = self._setup_logger()

    def _setup_logger(self):
        """配置日志系统"""
        logger = logging.getLogger('ServerProcessMonitor')
        logger.setLevel(logging.INFO)

        # 设置日志格式
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

        # 控制台日志处理器
        ch = logging.StreamHandler()
        ch.setFormatter(formatter)
        logger.addHandler(ch)

        # 文件日志处理器
        log_file = os.path.join(os.path.dirname(__file__), 'server_monitor.log')
        fh = logging.FileHandler(log_file)
        fh.setFormatter(formatter)
        logger.addHandler(fh)

        return logger

    def _connect_server(self, server_config):
        """
        建立SSH连接到目标服务器 - 增强日志输出
        """
        server_name = server_config['name']
        try:
            self.logger.info(f"{Fore.CYAN}正在尝试连接服务器: {server_name}...{Style.RESET_ALL}")
            ssh = paramiko.SSHClient()
            ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

            private_key_path = server_config.get('private_key_path')
            if private_key_path and os.path.exists(private_key_path):
                try:
                    self.logger.info(f"使用私钥认证: {private_key_path}")
                    private_key = paramiko.RSAKey.from_private_key_file(
                        private_key_path,
                        password=server_config.get('passphrase')
                    )
                    ssh.connect(
                        hostname=server_config['host'],
                        port=server_config.get('port', 22),
                        username=server_config['username'],
                        pkey=private_key,
                        timeout=30,
                        banner_timeout=200,
                        auth_timeout=200
                    )
                    self.logger.info(f"{Fore.GREEN}私钥认证成功{Style.RESET_ALL}")
                except paramiko.ssh_exception.PasswordRequiredException:
                    self.logger.error(f"{Fore.RED}私钥文件需要密码,但未提供或密码不正确{Style.RESET_ALL}")
                    return False
                except paramiko.ssh_exception.AuthenticationException:
                    self.logger.error(f"{Fore.RED}SSH认证失败,请检查密码或密钥{Style.RESET_ALL}")
                    return False
            else:
                self.logger.info("使用密码认证")
                ssh.connect(
                    hostname=server_config['host'],
                    port=server_config.get('port', 22),
                    username=server_config['username'],
                    password=server_config.get('password'),
                    timeout=30,
                    banner_timeout=200,
                    auth_timeout=200
                )
                self.logger.info(f"{Fore.GREEN}密码认证成功{Style.RESET_ALL}")

            # 设置连接保持活跃
            ssh.get_transport().set_keepalive(30)
            self.ssh_clients[server_name] = ssh
            self.logger.info(f"{Fore.GREEN}成功连接到服务器: {server_name}{Style.RESET_ALL}")
            return True
        except Exception as e:
            self.logger.error(f"{Fore.RED}连接服务器 {server_name} 失败: {str(e)}{Style.RESET_ALL}")
            return False

    def _check_container_status(self, ssh, container_name):
        """
        检查容器状态
        """
        try:
            # 获取容器运行时(默认为docker)
            runtime = self._get_container_runtime(ssh)

            # 检查容器是否在运行
            stdin, stdout, stderr = ssh.exec_command(
                f"{runtime} inspect --format='{{{{.State.Status}}}}' {container_name}"
            )
            status = stdout.read().decode().strip().lower()

            if status in ['running', 'exited', 'created']:
                # 获取容器详细信息
                stdin, stdout, stderr = ssh.exec_command(
                    f"{runtime} inspect {container_name}"
                )
                container_info = json.loads(stdout.read().decode())

                return {
                    'is_container': True,
                    'running': status == 'running',
                    'status': status,
                    'id': container_info[0]['Id'][:12],  # 修正切片语法
                    'image': container_info[0]['Config']['Image'],
                    'created': container_info[0]['Created']
                }
            return {'is_container': True, 'running': False}  # 只在if条件不满足时返回
        except Exception as e:
            self.logger.error(f"检查容器状态失败: {str(e)}")
            return None

    def _get_container_runtime(self, ssh):
        """
        获取容器运行时类型(docker/podman)
        """
        try:
            # 检查docker是否可用
            ssh.exec_command("docker --version")
            return 'docker'
        except:
            try:
                # 检查podman是否可用
                ssh.exec_command("podman --version")
                return 'podman'
            except:
                # 默认返回docker
                return 'docker'

    def _generate_word_report(self, server_name, process_name, process_info, timestamp, server_config):
        """
        生成服务异常的Word报告文档

        参数:
            server_name: 服务器名称
            process_name: 服务/容器名称
            process_info: 服务状态信息
            timestamp: 检测时间
            server_config: 服务器配置信息
        """
        try:
            # 创建文档对象
            doc = Document()

            # 设置全局字体
            doc.styles['Normal'].font.name = '微软雅黑'
            doc.styles['Normal'].font.size = Pt(10)

            # 1. 添加主标题 (中文)
            title = doc.add_heading(level=1)
            title_run = title.add_run(f'{server_name} - {process_name}服务异常报告')
            title_run.font.size = Pt(16)
            title_run.font.color.rgb = RGBColor(255, 0, 0)  # 红色
            title_run.font.bold = True
            title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

            # 2. 添加服务器基本信息表格
            doc.add_paragraph()  # 空行
            server_heading = doc.add_heading('服务器信息', level=2)
            server_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT

            # 根据认证类型决定是否显示密码
            auth_type = '私钥认证'
            password = server_config.get('ssh_password', '未配置')

            server_table = doc.add_table(rows=6, cols=2)  # 增加一行用于密码
            server_table.style = 'Light Grid Accent 1'
            server_table.alignment = WD_TABLE_ALIGNMENT.CENTER

            # 填充服务器信息
            server_info_rows = [
                ('服务器名称', server_name),
                ('IP地址', server_config['host']),
                ('SSH端口', str(server_config.get('port', 22))),
                ('用户名', server_config['username']),
                ('认证类型', auth_type),
                ('连接密码', password)  # 新增密码显示
            ]

            for i, (label, value) in enumerate(server_info_rows):
                cells = server_table.rows[i].cells
                cells[0].text = label
                cells[0].paragraphs[0].runs[0].font.bold = True
                cells[1].text = str(value)

            # 3. 添加检测信息
            doc.add_paragraph()  # 空行
            detect_heading = doc.add_heading('检测信息', level=2)
            detect_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT

            detect_table = doc.add_table(rows=1, cols=2)
            detect_table.style = 'Light Grid Accent 1'
            detect_table.alignment = WD_TABLE_ALIGNMENT.CENTER

            cells = detect_table.rows[0].cells
            cells[0].text = '检测时间'
            cells[0].paragraphs[0].runs[0].font.bold = True
            cells[1].text = timestamp

            # 4. 添加服务状态详情
            doc.add_paragraph()  # 空行
            status_heading = doc.add_heading('服务状态详情', level=2)
            status_heading.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT

            # 如果是容器,添加容器详细信息
            if process_info.get('is_container', False):
                container_table = doc.add_table(rows=4, cols=2)
                container_table.alignment = WD_TABLE_ALIGNMENT.CENTER
                container_table.style = 'Medium Shading 1 Accent 1'

                # 填充表格 (中文)
                rows = [
                    ('容器名称', process_name),
                    ('容器ID', process_info.get('id', 'N/A')),
                    ('镜像', process_info.get('image', 'N/A')),
                    ('状态', process_info.get('status', 'N/A'))
                ]

                for i, (label, value) in enumerate(rows):
                    cells = container_table.rows[i].cells
                    cells[0].text = label
                    cells[0].paragraphs[0].runs[0].font.bold = True
                    cells[1].text = str(value)

                # 添加创建时间
                doc.add_paragraph()
                time_para = doc.add_paragraph()
                time_para.add_run('创建时间: ').bold = True
                time_para.add_run(process_info.get('created', 'N/A'))

            # 如果是普通进程,添加进程信息
            else:
                process_table = doc.add_table(rows=3, cols=2)
                process_table.alignment = WD_TABLE_ALIGNMENT.CENTER
                process_table.style = 'Medium Shading 1 Accent 1'

                rows = [
                    ('进程名称', process_name),
                    ('PID', process_info.get('pid', 'N/A')),
                    ('状态', process_info.get('status', 'N/A'))
                ]

                for i, (label, value) in enumerate(rows):
                    cells = process_table.rows[i].cells
                    cells[0].text = label
                    cells[0].paragraphs[0].runs[0].font.bold = True
                    cells[1].text = str(value)

                # 添加命令信息
                doc.add_paragraph()
                cmd_para = doc.add_paragraph()
                cmd_para.add_run('命令: ').bold = True
                cmd_para.add_run(' '.join(process_info.get('command', ['N/A'])))

            # 5. 添加处理建议 (中文)
            doc.add_page_break()
            suggestion = doc.add_heading('处理建议', level=2)
            suggestion.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT

            doc.add_paragraph("1. 检查服务日志确认具体异常原因")
            doc.add_paragraph("2. 尝试重启服务")
            doc.add_paragraph("3. 检查服务器资源使用情况(CPU、内存、磁盘)")
            doc.add_paragraph("4. 联系相关开发人员排查问题")

            # 保存文档 (仍使用英文文件名)
            error_log_dir = os.path.abspath(os.path.join(
                os.path.dirname(__file__),
                '..',
                'error_log'
            ))
            os.makedirs(error_log_dir, exist_ok=True)

            # 英文文件名
            filename = f"{server_name}_{process_name}_service_exception_{timestamp.replace(':', '-').replace(' ', '_')}.docx"
            report_path = os.path.join(error_log_dir, filename)
            doc.save(report_path)

            # 返回相对路径
            relative_path = os.path.relpath(report_path, os.path.dirname(__file__))
            self.logger.info(f"{Fore.GREEN}已生成Word报告: {relative_path}{Style.RESET_ALL}")
            return relative_path

        except Exception as e:
            self.logger.error(f"{Fore.RED}生成Word报告失败: {str(e)}{Style.RESET_ALL}")
            return None

    def _get_remote_process_status(self, server_name, process_name):
        """
        获取远程服务器进程/容器状态 - 增强日志输出
        """
        ssh = self.ssh_clients.get(server_name)
        if not ssh:
            self.logger.warning(f"{Fore.YELLOW}未找到服务器 {server_name} 的SSH连接{Style.RESET_ALL}")
            return None

        try:
            self.logger.info(f"正在检查进程: {process_name}...")

            # 首先检查是否是容器
            container_status = self._check_container_status(ssh, process_name)
            if container_status:
                status_msg = "容器" if container_status['is_container'] else "进程"
                self.logger.info(f"{status_msg} {process_name} 状态: {container_status['status']}")
                return container_status

            # 如果不是容器,检查普通进程
            stdin, stdout, stderr = ssh.exec_command(f"pgrep -f {process_name}")
            pids = stdout.read().decode().strip()

            if pids:
                pid = pids.split('\n')[0]
                stdin, stdout, stderr = ssh.exec_command(f"ps -p {pid} -o pid,stat,cmd")
                process_info = stdout.read().decode().strip()

                status_line = process_info.split('\n')[1] if len(process_info.split('\n')) > 1 else None
                if status_line:
                    status = status_line.split()[1]
                    cmd = ' '.join(status_line.split()[2:])
                    self.logger.info(
                        f"{Fore.GREEN}进程运行中 - PID: {pid}, 状态: {status}, 命令: {cmd}{Style.RESET_ALL}")
                else:
                    self.logger.warning(f"{Fore.YELLOW}无法解析进程信息{Style.RESET_ALL}")

                return {
                    'is_container': False,
                    'running': True,
                    'pid': pid,
                    'status': status_line.split()[1] if status_line else 'UNKNOWN',
                    'command': status_line.split()[2:] if status_line else 'UNKNOWN'
                }

            self.logger.warning(f"{Fore.YELLOW}进程 {process_name} 未运行{Style.RESET_ALL}")
            return {'is_container': False, 'running': False}

        except Exception as e:
            self.logger.error(
                f"{Fore.RED}获取服务器 {server_name} 进程 {process_name} 状态失败: {str(e)}{Style.RESET_ALL}")
            return None

    def check_all_servers(self):
        """检查所有服务器进程状态"""
        current_status = {
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'servers': {}
        }

        for server in self.servers_config:
            server_name = server['name']

            # 确保连接有效
            if server_name not in self.ssh_clients or not self.ssh_clients[server_name].get_transport().is_active():
                if not self._connect_server(server):
                    current_status['servers'][server_name] = {
                        'status': 'CONNECTION_FAILED',
                        'processes': {}
                    }
                    continue

            server_status = {
                'status': 'CONNECTED',
                'processes': {}
            }

            for process_name in server['process_names']:
                process_status = self._get_remote_process_status(server_name, process_name)
                if process_status is None:
                    server_status['processes'][process_name] = {
                        'status': 'CHECK_FAILED',
                        'pid': None
                    }
                else:
                    status_key = 'RUNNING' if process_status['running'] else 'NOT_RUNNING'
                    if process_status.get('is_container'):
                        status_key = f"CONTAINER_{status_key}"

                    server_status['processes'][process_name] = {
                        'status': status_key,
                        'pid': process_status.get('pid'),
                        'container_id': process_status.get('id'),
                        'details': process_status
                    }

            current_status['servers'][server_name] = server_status

        self.status_log.append(current_status)
        return current_status

    def dingding_send_message(self, title, report_url, server_name=None):
        """
        发送钉钉机器人消息 - 增强日志输出

        参数:
            title: 消息标题
            message: 消息内容
            server_name: 服务器名称(可选)
        """
        try:
            # 记录发送请求开始
            self.logger.info(f"{Fore.CYAN}=== 开始构建钉钉告警消息 ==={Style.RESET_ALL}")

            # 如果提供了服务器名称,则添加到标题中
            if server_name:
                title = f"[{server_name}] {title}"
                self.logger.info(f"{Fore.BLUE}服务器名称已添加到标题: {title}{Style.RESET_ALL}")
            else:
                self.logger.info(f"{Fore.BLUE}使用原始标题: {title}{Style.RESET_ALL}")

            # 钉钉机器人的 Webhook URL 和密钥
            webhook_url = 'https://oapi.dingtalk.com/robot/send?access_token=7fbf40798cad98b1b5db55ff844ba376b1816e80c5777e6f47ae1d9165dacbb4'
            secret = 'SEC610498ed6261ae2df1d071d0880aaa70abf5e67efe47f75a809c1f2314e0dbd6'

            # 生成时间戳
            timestamp = str(round(time.time() * 1000))
            self.logger.info(f"生成时间戳: {timestamp}")

            # 生成签名
            secret_enc = secret.encode('utf-8')
            string_to_sign = f'{timestamp}\n{secret}'
            string_to_sign_enc = string_to_sign.encode('utf-8')
            hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
            sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
            self.logger.info("签名生成完成")

            # 构建最终的 Webhook URL
            params = {
                'access_token': webhook_url.split('=')[1],
                'timestamp': timestamp,
                'sign': sign
            }
            final_webhook_url = f'https://oapi.dingtalk.com/robot/send?{urllib.parse.urlencode(params)}'
            self.logger.info(f"最终请求URL: {final_webhook_url}")

            # 构建消息体
            headers = {'Content-Type': 'application/json'}
            message = {
                'msgtype': 'link',
                'link': {
                    'title': title,
                    'messageUrl': report_url,
                    'text': "请点击链接,尽快排查处理!!!"
                },
                "at": {
                    "atMobiles": "13724387318",
                    "isAtAll": True
                }
            }
            self.logger.info(
                f"{Fore.BLUE}消息体构建完成:{Style.RESET_ALL}\n{json.dumps(message, indent=2, ensure_ascii=False)}")

            try:
                # 发送 POST 请求
                response = requests.post(final_webhook_url, data=json.dumps(message), headers=headers)

                # 检查响应状态码
                if response.status_code == 200:
                    self.logger.info('消息发送成功!')
                    self.logger.info(f'响应内容: {response.text}')
                else:
                    self.logger.error(f'消息发送失败,状态码: {response.status_code}')
                    self.logger.error(f'响应内容: {response.text}')
            except requests.exceptions.RequestException as e:
                self.logger.error(f'请求异常: {e}')

        except Exception as e:
            self.logger.error(f"{Fore.RED}发送钉钉消息时发生异常:{Style.RESET_ALL}")
            self.logger.error(f"异常类型: {type(e).__name__}")
            self.logger.error(f"异常详情: {str(e)}")
            if hasattr(e, 'request'):
                self.logger.error(f"请求信息: {e.request.method} {e.request.url}")
            return False

    def monitor(self):
        """启动监测循环 - 增强日志输出"""
        self.logger.info(f"{Fore.CYAN}=== 开始监测 {len(self.servers_config)} 台服务器 ==={Style.RESET_ALL}")
        self.logger.info(f"{Fore.CYAN}检查间隔: {self.check_interval}秒{Style.RESET_ALL}")

        try:
            while True:
                status = self.check_all_servers()
                self.logger.info(f"\n{Fore.CYAN}=== 检查时间: {status['timestamp']} ==={Style.RESET_ALL}")

                for server_name, server_info in status['servers'].items():
                    status_color = Fore.GREEN if server_info['status'] == 'CONNECTED' else Fore.RED
                    self.logger.info(
                        f"\n{status_color}服务器: {server_name} - 状态: {server_info['status']}{Style.RESET_ALL}")

                    # 获取当前服务器配置
                    server_config = next((s for s in self.servers_config if s['name'] == server_name), None)

                    for process_name, process_info in server_info['processes'].items():
                        if 'NOT_RUNNING' in process_info['status']:
                            status_color = Fore.RED

                            # 生成Word报告 (英文文件名)
                            report_path = self._generate_word_report(
                                server_name,
                                process_name,
                                process_info['details'],
                                status['timestamp'],
                                server_config  # 传递服务器配置
                            )

                            # 构建告警消息
                            alert_title = f"{process_name}服务异常"
                            report_url = ""

                            if report_path:
                                # 使用英文文件名拼接URL
                                filename = os.path.basename(report_path)
                                report_url = f"http://nat.ubainsyun.com:32233/{filename}"
                                self.logger.info(f"{Fore.BLUE}生成报告下载链接: {report_url}{Style.RESET_ALL}")

                            # 发送钉钉通知
                            self.dingding_send_message(alert_title, report_url, server_name)

                        elif 'RUNNING' in process_info['status']:
                            status_color = Fore.GREEN
                        else:
                            status_color = Fore.YELLOW

                        # 日志保持中文
                        status_msg = f"  {process_name}: {status_color}{process_info['status']}{Style.RESET_ALL}"
                        if process_info.get('container_id'):
                            status_msg += f" (容器ID: {process_info['container_id']})"
                        elif process_info.get('pid'):
                            status_msg += f" (PID: {process_info['pid']})"
                        self.logger.info(status_msg)

                time.sleep(self.check_interval)

        except KeyboardInterrupt:
            self.logger.info(f"{Fore.CYAN}监测已手动停止{Style.RESET_ALL}")
        except Exception as e:
            self.logger.error(f"{Fore.RED}监测异常终止: {str(e)}{Style.RESET_ALL}")
        finally:
            self.__del__()

    def __del__(self):
        """清理资源"""
        for name, ssh in self.ssh_clients.items():
            try:
                ssh.close()
                self.logger.info(f"已关闭服务器 {name} 的连接")
            except Exception as e:
                self.logger.error(f"关闭服务器 {name} 连接时出错: {str(e)}")


if __name__ == "__main__":
    """
            调试主机-执行指令:
                1.打开一个终端输入:
                    - cd .\系统监测\
                    - python -m http.server 80 --directory error_log
                2.打开新终端输入:      
                    - cd .\系统监测\ngrok\ngrok-虚拟机\
                    - .\start.bat
    """

    # 服务器配置示例 - 与日志监测脚本保持一致
    servers_config = [
        {  # 服务器1
            'name': 'Server235',
            'host': '192.168.5.235',
            'port': 22,
            'username': 'root',
            'ssh_password': "Ubains@123",
            'private_key_path': "C:/Users/Administrator/.ssh/id_rsa",
            'passphrase': "Ubains@123",
            'process_names': [
                'ungrok', 'umysql', 'uredis',
                'upython', 'ujava2', 'paperless',
                'cardtable', 'ustorage', 'utracker'
            ]
        },
        {  # 服务器2(对外云端)
            'name': 'CloudServer',
            'host': '139.9.60.86',
            'port': 22,
            'username': 'root',
            'password': "hzpassw0RD@0324",  # 使用密码认证
            'private_key_path': "C:/Users/Administrator/.ssh/id_rsa",
            'passphrase': "Ubains@123",
            'process_names': [
                'zlm', 'udboard2', 'ujavahw', 'uredis',
                'ungrok2', 'uemqx2', 'upython2', 'uvod',
                'uauth', 'uerpsql', 'uerp', 'ujava2', 'ustorage',
                'utracker', 'showdoc', 'mysql', 'registry2'
            ]
        },
        {  # 服务器3(展厅服务器)
            'name': 'ExhitServer',
            'host': '192.168.5.200',
            'port': 22,
            'username': 'root',
            'password': "aF3wEPio",  # 使用密码认证
            'private_key_path': "C:/Users/Administrator/.ssh/id_rsa",
            'passphrase': "Ubains@123",
            'process_names': [
                'aimodel3', 'paperless', 'uemqx', 'ujava230705', 'mysql8',
                'ujava5', 'upython2', 'uredis', 'unginx', 'ustorage', 'utracker',
                'umysql'
            ]
        }
    ]

    # 创建并启动监测器
    monitor = ServerProcessMonitor(
        servers_config=servers_config,
        check_interval=3000  # 每50分钟检查一次
    )

    monitor.monitor()