当前位置：首页 > news >正文

Python安全文件上传

news 2026/7/24 6:52:19

"""
Python 安全文件上传 —— 全方位的文件上传安全防护
涵盖扩展名校验、MIME 检测、内容扫描、大小控制和文件名消毒
"""

# 安装依赖：pip install python-magic-bin （Windows）或 python-magic （Linux/Mac）
# 文件上传是 Web 应用最常见的安全风险点之一

import os
import re
import imghdr
import magic # 用于检测文件真实 MIME 类型
from pathlib import Path
from typing import Tuple, Optional, Set
from werkzeug.utils import secure_filename
import hashlib
import time

# ========== 第一部分：配置 ==========

class UploadConfig:
"""
文件上传的安全配置。
所有限制参数集中管理，便于审计和修改。
"""
# 允许的文件扩展名白名单
ALLOWED_EXTENSIONS: Set[str] = {
".txt", ".pdf", ".doc", ".docx",
".xls", ".xlsx", ".csv",
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp",
".mp3", ".mp4", ".zip"
}
# 允许的 MIME 类型白名单
ALLOWED_MIME_TYPES: Set[str] = {
"text/plain",
"application/pdf",
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"image/jpeg", "image/png", "image/gif", "image/bmp", "image/webp",
"audio/mpeg", "video/mp4",
"application/zip",
"text/csv"
}
# 文件大小限制
MAX_FILE_SIZE_MB: int = 10
MAX_FILE_SIZE_BYTES: int = MAX_FILE_SIZE_MB * 1024 * 1024
# 上传目录
UPLOAD_DIR: str = "./secure_uploads"
# 危险文件签名（魔数）黑名单
DANGEROUS_SIGNATURES: Set[bytes] = {
b"MZ", # Windows 可执行文件
b"\x7fELF", # Linux 可执行文件
b"%PDF", # 某些情况下 PDF 可能带恶意宏（但白名单允许则放行）
}

# ========== 第二部分：核心安全校验器 ==========

class SecureFileUploader:
"""
安全文件上传器 —— 实现多层安全校验。
"""

def __init__(self, config: UploadConfig = None):
self.config = config or UploadConfig()
# 确保上传目录存在
os.makedirs(self.config.UPLOAD_DIR, exist_ok=True)

def validate_extension(self, filename: str) -> bool:
"""
第一层校验：文件扩展名白名单检查。
攻击者可能将可执行文件命名为 image.jpg.exe 绕过。
"""
# 获取文件扩展名（转小写）
ext = Path(filename).suffix.lower()
if ext not in self.config.ALLOWED_EXTENSIONS:
print(f"拒绝：不允许的扩展名 '{ext}'")
return False
print(f"扩展名校验通过：{ext}")
return True

def validate_mime_type(self, filepath: str) -> bool:
"""
第二层校验：MIME 类型检测。
使用 python-magic 读取文件头（魔数）判断真实类型，
防止攻击者将恶意文件伪装成合法后缀。
"""
try:
# 检测文件的真实 MIME 类型（基于文件内容，而非扩展名）
mime_type = magic.from_file(filepath, mime=True)
print(f"检测到 MIME 类型：{mime_type}")

if mime_type not in self.config.ALLOWED_MIME_TYPES:
print(f"拒绝：不允许的 MIME 类型 '{mime_type}'")
return False
# 二次验证：确保 MIME 类型与扩展名匹配
ext = Path(filepath).suffix.lower()
if not self._mime_matches_extension(mime_type, ext):
print(f"拒绝：MIME 类型 '{mime_type}' 与扩展名 '{ext}' 不匹配")
return False
print("MIME 类型校验通过")
return True
except Exception as e:
print(f"MIME 检测失败：{e}")
return False

def _mime_matches_extension(self, mime: str, ext: str) -> bool:
"""
辅助方法：检查 MIME 类型与文件扩展名是否一致。
"""
mime_to_ext = {
"text/plain": ".txt",
"application/pdf": ".pdf",
"image/jpeg": ".jpg",
"image/png": ".png",
"image/gif": ".gif",
}
expected_ext = mime_to_ext.get(mime)
if expected_ext and expected_ext != ext:
return False
return True

def scan_content(self, filepath: str) -> bool:
"""
第三层校验：内容安全检查。
检查文件是否包含危险签名或可执行代码。
"""
with open(filepath, "rb") as f:
header = f.read(20) # 读取文件头

# 检查危险文件签名
for sig in self.config.DANGEROUS_SIGNATURES:
if header.startswith(sig):
print(f"拒绝：检测到危险文件签名 '{sig}'")
return False

# 检查图片文件是否包含隐藏的脚本（二次渲染攻击）
ext = Path(filepath).suffix.lower()
if ext in {".jpg", ".jpeg", ".png", ".gif"}:
if not self._validate_image_safety(filepath):
return False

print("内容安全检查通过")
return True

def _validate_image_safety(self, filepath: str) -> bool:
"""
检查图片文件的安全性：验证图片完整性，检测嵌入脚本。
"""
try:
# 使用 imghdr 验证图片文件头是否有效
image_type = imghdr.what(filepath)
if not image_type:
print("拒绝：无效的图片文件")
return False
# 检查文件是否过大（防止 decompression bomb）
file_size = os.path.getsize(filepath)
if file_size > 50 * 1024 * 1024: # 50MB 图片限制
print("拒绝：图片文件过大，可能为解压炸弹")
return False
return True
except Exception as e:
print(f"图片安全检查失败：{e}")
return False

def validate_file_size(self, filepath: str) -> bool:
"""
第四层校验：文件大小检查。
防止大文件上传导致磁盘空间耗尽（DoS 攻击）。
"""
size = os.path.getsize(filepath)
if size > self.config.MAX_FILE_SIZE_BYTES:
print(f"拒绝：文件大小 {size / 1024 / 1024:.1f}MB 超过限制 "
f"{self.config.MAX_FILE_SIZE_MB}MB")
return False
print(f"文件大小校验通过：{size / 1024:.1f}KB")
return True

def sanitize_filename(self, filename: str) -> str:
"""
第五层校验：文件名消毒。
防止路径遍历攻击（如 ../../../etc/passwd）和特殊字符注入。
"""
# 使用 Werkzeug 的 secure_filename 进行基础消毒
safe_name = secure_filename(filename)
if not safe_name:
# 如果文件名完全不可用，生成随机名称
safe_name = self._generate_safe_filename(filename)
print(f"文件名已消毒：{filename} -> {safe_name}")
return safe_name

def _generate_safe_filename(self, original: str) -> str:
"""
生成安全的随机文件名，保留原始扩展名。
"""
ext = Path(original).suffix.lower()
timestamp = int(time.time())
random_hash = hashlib.md5(
f"{original}{timestamp}".encode()
).hexdigest()[:8]
return f"{timestamp}_{random_hash}{ext}"

# ========== 第三部分：完整上传流程 ==========

def secure_upload(uploader: SecureFileUploader, file_data: bytes,
original_filename: str) -> Optional[str]:
"""
安全上传的完整流程：执行所有五层校验。
"""
print(f"\n开始安全检查：{original_filename}")
print("=" * 40)

# 第一步：扩展名校验
if not uploader.validate_extension(original_filename):
return None

# 第二步：文件名消毒
safe_filename = uploader.sanitize_filename(original_filename)
temp_path = os.path.join(uploader.config.UPLOAD_DIR, f"temp_{safe_filename}")

try:
# 写入临时文件供后续检测
with open(temp_path, "wb") as f:
f.write(file_data)

# 第三步：文件大小校验
if not uploader.validate_file_size(temp_path):
os.remove(temp_path)
return None

# 第四步：MIME 类型校验
if not uploader.validate_mime_type(temp_path):
os.remove(temp_path)
return None

# 第五步：内容安全检查
if not uploader.scan_content(temp_path):
os.remove(temp_path)
return None

# 所有校验通过，移动到最终目录
final_path = os.path.join(uploader.config.UPLOAD_DIR, safe_filename)
os.rename(temp_path, final_path)
print(f"\n上传成功！文件保存至：{final_path}")
return final_path

except Exception as e:
print(f"上传过程中出错：{e}")
if os.path.exists(temp_path):
os.remove(temp_path)
return None

# ========== 第四部分：演示 ==========

def demo_secure_upload():
"""
演示安全上传：展示合法文件和恶意文件的处理流程。
"""
uploader = SecureFileUploader()

# 合法文件上传
print("场景一：合法文件上传")
secure_upload(
uploader,
b"Hello, this is a safe text file!",
"report.txt"
)

# 伪装扩展名的恶意文件
print("\n场景二：伪装扩展名的可执行文件")
secure_upload(
uploader,
b"MZ\x90\x00\x03\x00\x00\x00\x04\x00\x00\x00\xff\xff\x00\x00",
"document.jpg"
)

if __name__ == "__main__":
demo_secure_upload()