当前位置: 首页 > news >正文

Python爬虫:获取某平台数据的下载链接 - 指南

searchid的生成参考代码如下:

function a(e, t) {
for (var n = "".concat(e).split("").reverse(), a = "".concat(t).split("").reverse(), r = [], i = n.length, o = a.length, c = 0, s = i + o - 1; c <= s; c++)
r[c] = 0;
for (var l = 0; l < o; l++)
for (var u = 0; u < i; u++)
r[u + l] += parseInt(n[u], 10) * parseInt(a[l], 10),
r[u + 1 + l] += Math.floor(r[u + l] / 10),
r[u + l] = r[u + l] % 10;
return r.reverse(),
0 == r[0] && r.shift(),
r.join("")
}
function r(e, t) {
for (var n = "".concat(e).split("").reverse(), a = "".concat(t).split("").reverse(), r = n.length, i = a.length, o = 0, c = 0, s = 0, l = 0, u = 0, m = Math.max(r, i); u < m; u++)
c = u < r ? parseInt(n[u], 10) : 0,
s = u < i ? parseInt(a[u], 10) : 0,
l = Math.round(c) + Math.round(s) + o,
n[u] = "".concat(l % 10),
o = l >= 10 ? 1 : 0;
return 1 == o && n.push("1"),
n.reverse().join("")
}
function getSearchId(){
const e = 3;
var t = a(e, "18014398509481984")
, n = a(Math.round(Math.random() * parseInt("4194304", 10)), "4294967296")
, i = new Date
, o = 1e3 * (3600 * i.getHours() + 60 * i.getMinutes() + i.getSeconds()) + i.getMilliseconds();
return r(r(t, n), o)
}
console.log(getSearchId())

Python爬虫代码如下:

import requests
import numpy as np
import subprocess
import json
import execjs
import time
keyword = input("输入关键词:")
cookies = ''  # 这部分为你自己的cookie信息
with open(file='encode_random.js', mode='r', encoding='utf-8') as f:
js_str1 = f.read()
with open(file='encode_sign.js', mode='r', encoding='utf-8') as f:
js_str2 = f.read()
with open(file='encode_data.js', mode='r', encoding='utf-8') as f:
js_str3 = f.read()
with open(file='person.txt', mode='r', encoding='utf-8') as f:
QQ_str = f.read()
ctx = execjs.compile(js_str1)
searchid = ctx.call('getSearchId')
tk2 = ctx.call('get_tk', cookies)
obj = {
"comm": {
"cv": 4747474,
"ct": 24,
"format": "json",
"inCharset": "utf-8",
"outCharset": "utf-8",
"notice": 0,
"platform": "yqq.json",
"needNewCode": 1,
"uin": QQ_str,
"g_tk_new_20200303": tk2,
"g_tk": tk2
},
"req_1": {
"method": "DoSearchForQQMusicDesktop",
"module": "music.search.SearchCgiService",
"param": {
"remoteplace": "txt.yqq.song",
"searchid": searchid,
"search_type": 0,
"query": keyword,
"page_num": 1,
"num_per_page": 10
}
}
}
ctx2 = execjs.compile(js_str2)
sign = ctx2.call('main', obj)
time2 = int(time.time() * 1000)
params = {
'_': time2,
'encoding': 'ag-1',
'sign': sign
}
ctx3 = execjs.compile(js_str3)
url = 'https://u6.y.qq.com/cgi-bin/musics.fcg'
with open(file='2.js', mode='w', encoding='utf-8') as f:
f.write(js_str3 + f'\nmain1({obj})')
res = subprocess.run(['node', '2.js'], capture_output=True)
# 这里的js代码进行了异步操作,无法直接用execjs模块运行
data = res.stdout.decode('utf-8').split('\n')[0]
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
'cookie': cookies
}
rsp = requests.post(url=url, headers=headers, data=data, params=params)
content = rsp.content
byte_arr = np.frombuffer(content, dtype=np.uint8)
arr = [int(e) for e in byte_arr]
with open(file='decode.js', mode='r', encoding='utf-8') as f:
js_str = f.read()
# 这里解密操作arr数组因为涉及js与python的数据不兼容的问题,使用subprocess执行js代码
with open(file='1.js', mode='w', encoding='utf-8') as f:
f.write(js_str + f'\nconsole.log(main({arr}))')
res = subprocess.run(['node', '1.js'], capture_output=True)
res_str = res.stdout.decode('utf-8').split('\n')[0]
with open(file='res.json', mode='w', encoding='utf-8') as f:
f.write(res_str)
map = json.loads(res_str)
songs = map['req_1']['data']['body']['song']['list']
for i, song in enumerate(songs):
id, mid = song['id'], song['mid']
singer = ','.join([s['name'] for s in song['singer']])
title = song['title']
print(i + 1, title, singer, id, mid)
index = int(input('输入想下载的歌曲id:'))
song_info = songs[index - 1]
song_id = song_info['id']
song_mid = song_info['mid']
album_mid = song_info['album']['mid']
d2 = song_info['vs'][0]
d3 = song_info['file']['media_mid']
pay_play = song_info['pay']['pay_play']
guid_js = '''
function main(){
let a = (new Date).getUTCMilliseconds();
return String(Math.round(2147483647 * Math.random()) * a % 1e10);
}
'''
guid = execjs.compile(guid_js).call('main')
obj2 = {"comm": {"cv": 4747474, "ct": 24, "format": "json", "inCharset": "utf-8", "outCharset": "utf-8", "notice": 0,
"platform": "yqq.json", "needNewCode": 1, "uin": QQ_str, "g_tk_new_20200303": tk2,
"g_tk": tk2},
"req_1": {"module": "music.musicasset.SongFavRead", "method": "IsSongFanByMid",
"param": {"v_songMid": [song_mid]}},
"req_2": {"module": "music.musichallSong.PlayLyricInfo", "method": "GetPlayLyricInfo",
"param": {"songMID": song_mid, "songID": song_id}},
"req_3": {"method": "GetCommentCount", "module": "music.globalComment.GlobalCommentRead",
"param": {"request_list": [{"biz_type": 1, "biz_id": str(song_id), "biz_sub_type": 0}]}},
"req_4": {"module": "music.musichallAlbum.AlbumInfoServer", "method": "GetAlbumDetail",
"param": {"albumMid": album_mid}},
"req_5": {"module": "music.vkey.GetEVkey", "method": "GetUrl",
"param": {"guid": guid, "songmid": [song_mid], "songtype": [0], "uin": QQ_str,
"loginflag": 1, "platform": "20", "xcdn": 1}}}
if pay_play == 1:
obj2['req_5']['param']['filename'] = [f"RS02{d2 or d3}.mp3"]
sign2 = ctx2.call('main', obj2)
params = {
'_': int(time.time() * 1000),
'encoding': 'ag-1',
'sign': sign2
}
with open(file='3.js', mode='w', encoding='utf-8') as f:
f.write(js_str3 + f'\nmain1({obj2})')
res = subprocess.run(['node', '3.js'], capture_output=True)
# 这里的js代码进行了异步操作,无法直接用execjs模块运行
data = res.stdout.decode('utf-8').split('\n')[0]
rsp2 = requests.post(url=url, headers=headers, data=data, params=params)
content = rsp2.content
byte_arr = np.frombuffer(content, dtype=np.uint8)
arr = [int(e) for e in byte_arr]
with open(file='decode.js', mode='r', encoding='utf-8') as f:
js_str = f.read()
# 这里解密操作arr数组因为涉及js与python的数据不兼容的问题,使用subprocess执行js代码
with open(file='4.js', mode='w', encoding='utf-8') as f:
f.write(js_str + f'\nconsole.log(main({arr}))')
res = subprocess.run(['node', '4.js'], capture_output=True)
res_str = res.stdout.decode('utf-8').split('\n')[0]
with open(file='6.json', mode='w', encoding='utf-8') as f:
f.write(res_str)
map = json.loads(res_str)
download_url = map['req_5']['data']['midurlinfo'][0]['xcdnurl']
download_url2 = 'https://ws6.stream.qqmusic.qq.com/' + map['req_5']['data']['midurlinfo'][0]['purl']
if pay_play == 1:
print("当前歌曲需要vip,试听下载链接为:",download_url2)
else:
print('下载链接:', download_url)
http://www.jsqmd.com/news/20459/

相关文章:

  • 厨房电子秤芯片方案:SIC8632
  • 《借力JBoltAI框架,Java技术开发公司开启高效开发新范式》
  • Wireshark抓包教程:JSON和HTTPS抓取
  • 贡献法算法思维
  • 白鲸开源数据基建平台荣登“创客北京 2025”企业组TOP150,共同助 Agentic AI 行业变革
  • ORACLE检查并创建表空间和表分区
  • 让cherry studio访问使用Docket Desktop中的Docker Model Runner运行的模型
  • 禁用内核模块,是否需要执行脚本 $ sudo update-initramfs -u $ sudo update-grub ? - 详解
  • 2025 年东莞钢结构厂房施工公司最新推荐榜:聚焦企业专利技术、品质管控及知名客户合作案例的权威解析
  • 高性能AI股票预测分析报告 - 2025年10月23日
  • 2025 年电子万能试验机生产厂家最新推荐榜:聚焦企业专利技术、品质管控及知名客户合作案例的权威解析
  • Spring AI Alibaba Admin 正式开源!!
  • snack4-jsonpath v4.0.2 发布
  • SMARTFORMS去掉数值后面的小数点
  • 089_尚硅谷_switch的课堂练习
  • SQL SERVER死锁查询,死锁分析,解锁,查询占用
  • 自定义组件中监听properties里面的属性的方法里面去取另外的properties里面的属性拿不到值的问题
  • 数字征程,逐级而上;生态协同,智绘新篇—— 艾拓琪威灏 与 哲讯顺维 数字化战略合作签署暨CRMMES项目启动会隆重举行
  • 嵌入式主板全景解析:从选型到趋势,读懂工业智能的核心载体
  • 2025 年唐山油漆生产厂家最新推荐榜单:精选优质企业,解析专业品牌选购指南唐山油漆批发/唐山油漆生产公司推荐
  • 做题笔记21
  • 权威调研榜单:重庆旧房加层改造公司TOP3榜单好评深度解析
  • 2025 年小饭桌加盟品牌最新推荐排行榜:多元服务与深厚底蕴品牌小饭桌加盟连锁/小饭桌加盟连锁/小饭桌少儿托管加盟机构推荐
  • 2025 年 MVR 蒸发器源头厂家最新推荐排行榜:聚焦食品医药化工环保领域优质设备,助力企业精准选型
  • DolphinScheduler依赖机制、Open-Falcon告警推送与监控的优化实践
  • Tailwind CSS 使用入门
  • 2025 年托管班加盟品牌最新推荐排行榜:聚焦国内优质机构,为投资者精选靠谱加盟项目托管班机构加盟/儿童托管班中心加盟/课后托管班加盟/小学托管班加盟连锁推荐
  • vscode创建快捷代码块,同时配置vue2和vue3的快捷代码块
  • 实用指南:Calico 网络插件在 K8s 集群的作用
  • 终于能打出生僻字了!麒麟系统搜狗输入法完整安装指南 - 实践