当前位置: 首页 > news >正文

python 实现服务器监控,cpu,内存,磁盘空间,网络等

python 实现服务器监控,cpu,内存,磁盘空间,网络等

Posted on 2026-03-25 17:52  打杂滴  阅读(0)  评论(0)    收藏  举报


import psutil
import requests
import smtplib
import logging
import time
from datetime import datetime
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from typing import Dict, List, Tuple
import json
import argparse
import sys

# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('server_monitor.log'),
logging.StreamHandler(sys.stdout)
]
)

class ServerMonitor:
def __init__(self, config_file: str = 'config.json'):
"""初始化服务器监控器"""
self.config = self.load_config(config_file)
self.alert_history = {}

def load_config(self, config_file: str) -> dict:
"""加载配置文件"""
try:
with open(config_file, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
# 创建默认配置文件
default_config = {
"servers": [
{"name": "本地服务器", "host": "127.0.0.1", "port": 80},
{"name": "百度", "host": "www.baidu.com", "port": 443}
],
"thresholds": {
"cpu_usage": 80,
"memory_usage": 85,
"disk_usage": 90
},
"check_interval": 60,
"alert_email": {
"enabled": True,
"smtp_server": "smtp.qq.com",
"smtp_port": 465,
"sender_email": "111@qq.com",
"sender_password": "password",
"recipient_emails": ["111@qq.com"]
}
}
with open(config_file, 'w', encoding='utf-8') as f:
json.dump(default_config, f, indent=4, ensure_ascii=False)
logging.info(f"已创建默认配置文件: {config_file}")
return default_config

def check_cpu_usage(self) -> Tuple[bool, float]:
"""检查CPU使用率"""
try:
cpu_percent = psutil.cpu_percent(interval=1)
threshold = self.config['thresholds']['cpu_usage']
status = cpu_percent > threshold
return status, cpu_percent
except Exception as e:
logging.error(f"检查CPU使用率时出错: {e}")
return False, 0.0

def check_memory_usage(self) -> Tuple[bool, float]:
"""检查内存使用率"""
try:
memory = psutil.virtual_memory()
memory_percent = memory.percent
threshold = self.config['thresholds']['memory_usage']
status = memory_percent > threshold
return status, memory_percent
except Exception as e:
logging.error(f"检查内存使用率时出错: {e}")
return False, 0.0

def check_disk_usage(self) -> Tuple[bool, float]:
"""检查磁盘使用率"""
try:
disk = psutil.disk_usage('/')
disk_percent = (disk.used / disk.total) * 100
threshold = self.config['thresholds']['disk_usage']
status = disk_percent > threshold
return status, disk_percent
except Exception as e:
logging.error(f"检查磁盘使用率时出错: {e}")
return False, 0.0

def check_network_connectivity(self, host: str, port: int) -> bool:
"""检查网络连通性"""
try:
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(5)
result = sock.connect_ex((host, port))
sock.close()
return result == 0
except Exception as e:
logging.error(f"检查网络连通性时出错 ({host}:{port}): {e}")
return False

def check_http_service(self, url: str) -> bool:
"""检查HTTP服务状态"""
try:
response = requests.get(url, timeout=10)
return response.status_code == 200
except Exception as e:
logging.error(f"检查HTTP服务时出错 ({url}): {e}")
return False

def send_alert_email(self, subject: str, message: str):
"""发送告警邮件"""
email_config = self.config['alert_email']
if not email_config['enabled']:
return

try:
msg = MIMEMultipart()
msg['From'] = email_config['sender_email']
msg['To'] = ', '.join(email_config['recipient_emails'])
msg['Subject'] = subject

msg.attach(MIMEText(message, 'plain', 'utf-8'))

server = smtplib.SMTP(email_config['smtp_server'], email_config['smtp_port'])
server.starttls()
server.login(email_config['sender_email'], email_config['sender_password'])
text = msg.as_string()
server.sendmail(email_config['sender_email'], email_config['recipient_emails'], text)
server.quit()

logging.info(f"告警邮件已发送: {subject}")
except Exception as e:
logging.error(f"发送告警邮件时出错: {e}")

def check_all_servers(self) -> List[Dict]:
"""检查所有服务器状态"""
results = []

# 检查本地系统资源
cpu_alert, cpu_usage = self.check_cpu_usage()
memory_alert, memory_usage = self.check_memory_usage()
disk_alert, disk_usage = self.check_disk_usage()

local_result = {
'name': '本地服务器',
'type': 'system',
'status': not (cpu_alert or memory_alert or disk_alert),
'details': {
'cpu_usage': f"{cpu_usage:.1f}%",
'memory_usage': f"{memory_usage:.1f}%",
'disk_usage': f"{disk_usage:.1f}%"
},
'alerts': []
}

if cpu_alert:
local_result['alerts'].append(f"CPU使用率过高: {cpu_usage:.1f}%")
if memory_alert:
local_result['alerts'].append(f"内存使用率过高: {memory_usage:.1f}%")
if disk_alert:
local_result['alerts'].append(f"磁盘使用率过高: {disk_usage:.1f}%")

results.append(local_result)

# 检查远程服务器
for server in self.config['servers']:
if 'url' in server:
# HTTP服务检查
status = self.check_http_service(server['url'])
details = {'service': 'HTTP'}
else:
# 网络连通性检查
status = self.check_network_connectivity(server['host'], server['port'])
details = {'host': server['host'], 'port': server['port']}

server_result = {
'name': server['name'],
'type': 'network' if 'host' in server else 'http',
'status': status,
'details': details,
'alerts': [] if status else [f"服务不可达"]
}

results.append(server_result)

# 发送告警
if not status:
alert_key = f"{server['name']}_down"
if alert_key not in self.alert_history or \
time.time() - self.alert_history[alert_key] > 300: # 5分钟内不重复告警

self.alert_history[alert_key] = time.time()
subject = f"服务器告警: {server['name']} 服务不可达"
message = f"""
时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
服务器: {server['name']}
问题: 服务不可达
详情: {details}
"""
self.send_alert_email(subject, message)

return results

def print_status_report(self, results: List[Dict]):
"""打印状态报告"""
print("\n" + "="*60)
print(f"服务器状态检查报告 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)

for result in results:
status_icon = "✓" if result['status'] else "✗"
status_text = "正常" if result['status'] else "异常"
print(f"\n{status_icon} {result['name']} [{status_text}]")

# 显示详细信息
if isinstance(result['details'], dict):
for key, value in result['details'].items():
print(f" {key}: {value}")

# 显示告警信息
if result['alerts']:
for alert in result['alerts']:
print(f" ⚠️ {alert}")

print("\n" + "="*60)

def run_continuous_monitoring(self):
"""持续监控模式"""
interval = self.config['check_interval']
logging.info(f"开始持续监控,检查间隔: {interval} 秒")

try:
while True:
results = self.check_all_servers()
self.print_status_report(results)

# 记录到日志
healthy_count = sum(1 for r in results if r['status'])
total_count = len(results)
logging.info(f"健康服务器: {healthy_count}/{total_count}")

time.sleep(interval)
except KeyboardInterrupt:
logging.info("监控已停止")
except Exception as e:
logging.error(f"监控过程中出错: {e}")

def main():
parser = argparse.ArgumentParser(description='服务器状态检查工具')
parser.add_argument('--continuous', '-c', action='store_true',
help='持续监控模式')
parser.add_argument('--config', '-f', default='config.json',
help='配置文件路径')

args = parser.parse_args()

monitor = ServerMonitor(args.config)

if args.continuous:
monitor.run_continuous_monitoring()
else:
results = monitor.check_all_servers()
monitor.print_status_report(results)

if __name__ == "__main__":
main()

 

 

--------------------

 

PS F:\python> & C:/Users/Administrator/AppData/Local/Microsoft/WindowsApps/python3.13.exe f:/python/python_new/自动化/ServerMonitor.py
2026-03-25 17:52:02,637 - ERROR - 发送告警邮件时出错: Connection unexpectedly closed

============================================================
服务器状态检查报告 - 2026-03-25 17:52:02
============================================================

✓ 本地服务器 [正常]
cpu_usage: 8.5%
memory_usage: 83.7%
disk_usage: 17.7%

✗ 本地服务器 [异常]
host: 127.0.0.1
port: 80
⚠️ 服务不可达

✓ 百度 [正常]
host: www.baidu.com
port: 443

============================================================