python爬虫4K高清美女壁纸
简介:
- 一次爬取20张图片,可以更改这段代码的数值,改变下载图片数量:if success_count >= 20:
- 图片存放到D:\pachong1,可以更改这段代码的值修改存放地址:SAVE_DIR = r"D:\pachong1"
- 需要安装对应的库,使用:pip install 库名来安装需要的库
代码:
""" AURA 壁纸爬虫 - 摄影美女专题 爬取 https://gallery.wallaura.cn/?t=摄影美女 上的图片 保存到 D:\pachong1,爬取20张 真图(跳过占位图<100KB) """ import os import time import json import hashlib import base64 import requests from Crypto.Cipher import AES from selenium import webdriver from selenium.webdriver.chrome.options import Options SAVE_DIR = r"D:\pachong1" os.makedirs(SAVE_DIR, exist_ok=True) def encrypt_md5(text): return hashlib.md5(text.encode()).hexdigest() def decrypt_aes(cipher_text, key): if not cipher_text or not key: return "" key = (key * 16)[-16:] md5_key = encrypt_md5(key) iv = md5_key[8:24] try: raw = bytes.fromhex(cipher_text) b64_data = base64.b64encode(raw).decode() except: return cipher_text try: cipher = AES.new(key.encode('utf-8'), AES.MODE_CBC, iv.encode('utf-8')) decrypted = cipher.decrypt(base64.b64decode(b64_data)) return decrypted.rstrip(b'\x00').decode('utf-8') except: return cipher_text def decrypt_url(img_url, provider): if not img_url or not provider: return "" parts = img_url.split("?") path_parts = parts[0].split("/") filename = path_parts[-1] name_parts = filename.split(".") if len(name_parts[0]) < 32: return img_url encrypted = name_parts[0][:32] remaining = name_parts[0][32:] decrypted = decrypt_aes(encrypted, provider) name_parts[0] = decrypted + remaining path_parts[-1] = ".".join(name_parts) parts[0] = "/".join(path_parts) return "?".join(parts) def try_download(img_url, save_path, index): """尝试下载,如果是占位图(<100KB)返回False""" headers_list = [ { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Referer": "https://gallery.wallaura.cn/", }, { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", }, ] for attempt, headers in enumerate(headers_list): try: resp = requests.get(img_url, headers=headers, timeout=30) if resp.status_code == 200: size_kb = len(resp.content) / 1024 if size_kb < 100: print(f" [占位图 {size_kb:.1f}KB] 跳过") return False with open(save_path, "wb") as f: f.write(resp.content) print(f" [OK] 第{index}张 ({size_kb:.1f} KB)") return True except Exception as e: print(f" 尝试{attempt+1}异常: {e}") time.sleep(0.5) return False def main(): print("=" * 60) print("AURA 壁纸爬虫 - 摄影美女专题 (只下真图)") print("=" * 60) chrome_options = Options() chrome_options.add_argument('--headless=new') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--window-size=1920,1080') chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36') driver = webdriver.Chrome(options=chrome_options) try: print("\n[1/2] 正在获取图片数据...") driver.get("https://gallery.wallaura.cn/?t=%E6%91%84%E5%BD%B1%E7%BE%8E%E5%A5%B3") time.sleep(6) data_json = driver.execute_script(""" if (typeof DATA_CACHE !== 'undefined' && DATA_CACHE.length > 0) { var items = []; for (var i = 0; i < DATA_CACHE.length; i++) { var item = DATA_CACHE[i]; if (!item.rawprovider || !item.rawid) continue; var imgurl = item.imgurl || item.thumburl || ''; if (!imgurl) continue; var decrypted = imgurl; try { decrypted = decryptUrl(imgurl, item.rawprovider); } catch(e) {} items.push({ provider: item.rawprovider, rawid: item.rawid, url: decrypted }); } return JSON.stringify(items); } return '[]'; """) items = json.loads(data_json) print(f"获取到 {len(items)} 张图片") if not items: print("没有获取到图片!") return print(f"\n[2/2] 开始下载,跳过占位图(<100KB)直到凑满20张...") success_count = 0 for i, item in enumerate(items): if success_count >= 20: break print(f"\n--- [{success_count+1}/20] 第{i+1}/{len(items)}张 ---") print(f" 来源: {item['provider']}, ID: {item['rawid']}") print(f" URL: {item['url'][:100]}...") url_path = item['url'].split("?")[0] ext = os.path.splitext(url_path)[1] or ".jpg" if len(ext) > 5: ext = ".jpg" file_name = f"aura_{item['provider']}_{item['rawid']}{ext}" save_path = os.path.join(SAVE_DIR, file_name) if try_download(item['url'], save_path, success_count + 1): success_count += 1 time.sleep(0.3) print("\n" + "=" * 60) print(f"完成!真图: {success_count}/20 张") print(f"保存路径: {SAVE_DIR}") print("=" * 60) finally: driver.quit() if __name__ == "__main__": main()