某宝sign,_m_h5_tk,_m_h5_tk_enc逆向
作者声明:本文仅供参考学习,请勿用于其他用途,违者后果自负!!!!
网址:某宝
一. 目标分析
1.确认需要逆向的参数,通过py复现随便一页,发现cookie里面_m_h5_tk,_m_h5_tk_enc用到,其他都可以删掉,在这里一定要确认需要逆向的参数,不要搞了最后,发现有些值不需要逆向那就很搞笑了!初步分析如下:
2.直接搜索sign, 可以发现有值,可以打上断点,发现可以断住
3.经过分析
- r.token是 _m_h5_tk值的前半部分
- u 是u = (new Date).getTime()确定是时间戳
- s 是可以写死的
- n.data 就是载荷里面的data的内容
- 最后经过c方法变成了一个长度是32位的字符串,经过测试发现就是MD5加密长度是32位,(可以根据长度猜测加密方式)
4.js代码复现一下
const CryptoJs = require("crypto-js") function get_sign(token,tamp,appkey,data){ l = token + "&" + tamp + "&" + appkey + "&" + data sign = CryptoJs.MD5(l).toString() return sign // console.log(sign) }5.接下来处理n.data
6.接下来发现是可以爬取,但是发现另一个问题就是出现令牌过期问题,这就要处理cookie里面的参数值_m_h5_tk,_m_h5_tk_enc,经过搜索发现是服务器返回的cookie,这下就好啦,找到借口发送请求,把_m_h5_tk,_m_h5_tk_enc提取出来就好了
二. 代码复现
import requests import time import execjs import json from urllib.parse import quote headers = { "accept": "*/*", "accept-language": "zh-CN,zh;q=0.9", "cache-control": "no-cache", "pragma": "no-cache", "referer": "https://uland.taobao.com/sem/tbsearch?_input_charset=utf-8&bc_fl_src=tbsite_NOX36458&bd_vid=7309723691170949243&channelSrp=baiduSomama&clk1=9eba0b3103da9d29a844f28acaa8311c&commend=all&ie=utf8&initiative_id=tbindexz_20170306&keyword=%E6%89%8B%E6%9C%BA&localImgKey=&page=1&preLoadOrigin=https%3A%2F%2Fwww.taobao.com&q=%E6%89%8B%E6%9C%BA&refpid=mm_26632258_3504122_32538762&search_type=item&source=suggest&sourceId=tb.index&spm=tbpc.pc_sem_alimama%2Fa.search_history.d1&ssid=s5-e&suggest_query=&tab=all&wq=", "sec-ch-ua": "\"Google Chrome\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "script", "sec-fetch-mode": "no-cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36" } def get_params(): cookies = { "tfstk": "hoqx6GA2MOONfcBlJpqg9cng1TfHMlDsCRywCjijizH_s7WN3m2s65Foan4cuVc9Bcm1tkYa34dqnA-fjqTfVsvSXgfhxM2VlxogFRvbXhkSQbMXccM1e4MtInis5xw5eAMMGFtb186-QbtjhcGXF3MmdIT_1c68FbksfxNslTeSaAis1a6Zltj3v7okXi02uMlJfHpEhwGFYb-wbuU8IXsmwsbUvX07QVEwQhUYhyPZdSOfMcw-pWHLMNvZMzw3mPsURlXW5F7jUx9M_BVRQSyseXXkrF8ZrYk-tTBJ7FujUYhhEgYw74DP.", "isg": "BA8PACsHuTvlPb3Bt0N_BXiSnqMZNGNW-Fw5TyEdJn4h8C3yKAdWpsqq8iDOiDvO", "cna": "avbDIv+ogDsCARu91EbXIuGX" } url = "https://h5api.m.taobao.com/h5/mtop.tmall.kangaroo.core.service.route.aldlampservicefixedresv2/1.0/" params = { "jsv": "2.7.4", "appKey": "12574478", "t": "1782384773298", "sign": "ffb3fd366c2adc288ed65e8f7d504b61", "jsonpIncPrefix": "tbnav", "api": "mtop.tmall.kangaroo.core.service.route.AldLampServiceFixedResV2", "v": "1.0", "type": "originaljsonp", "dataType": "jsonp", "valueType": "original", "timeout": "5000", "callback": "mtopjsonptbnav43", "data": "{\"params\":\"{\\\"resId\\\":34667024,\\\"bizId\\\":\\\"443\\\"}\"}" } response = requests.get(url, headers=headers, cookies=cookies, params=params) tk = response.cookies.get("_m_h5_tk") tkenc = response.cookies.get("_m_h5_tk_enc") # print(response.text) return tk, tkenc # 定义基础参数(可动态传入) def build_params(page ,q, pageSize=48): # 构建内层 params 字典 inner_params = { "device": "HMA-AL00", "isBeta": "false", "grayHair": "false", "from": "nt_history", "brand": "HUAWEI", "info": "wifi", "index": "4", "rainbow": "", "schemaType": "auction", "elderHome": "false", "isEnterSrpSearch": "true", "newSearch": "false", "network": "wifi", "subtype": "", "hasPreposeFilter": "false", "prepositionVersion": "v2", "client_os": "Android", "gpsEnabled": "false", "searchDoorFrom": "srp", "debug_rerankNewOpenCard": "false", "homePageVersion": "v7", "searchElderHomeOpen": "false", "search_action": "initiative", "sugg": "_4_1", "sversion": "13.6", "style": "list", "ttid": "600000@taobao_pc_10.7.0", "needTabs": "true", "areaCode": "CN", "vm": "nw", "countryNum": "156", "m": "pc_sem", "page": page, # 动态传入 "n": pageSize, "q": q, "qSource": "url", "pageSource": "tbpc.pc_sem_alimama/a.search_history.d1", "tab": "all", "pageSize": pageSize, "totalPage": 100, "totalResults": 4800, "sourceS": "0", "sort": "_coefp", "bcoffset": "", "ntoffset": "", "filterTag": "", "service": "", "prop": "", "loc": "", "start_price": None, "end_price": None, "startPrice": None, "endPrice": None, "itemIds": None, "p4pIds": None, "categoryp": "", "myCNA": "EJTCIkkW5TACARuAJWamgPdD", "clk1": "9eba0b3103da9d29a844f28acaa8311c", "refpid": "mm_26632258_3504122_32538762" } # 构建外层 data 字典 data_dict = { "appId": "43356", "params": json.dumps(inner_params, separators=(',', ':')) } return json.dumps(data_dict, separators=(',', ':')) tk , tkenc = get_params() base_cookies = { "_m_h5_tk": tk, "_m_h5_tk_enc": tkenc, } base_url = "https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/" if __name__ == '__main__': # 使用示例:动态传入 page 参数 content = "手机" q_value = quote(content) # 搜索关键词 page_size = 48 for i in range(1,4): print(f"第{i}页") page = i # 可以是 "info", "1", "2" 等 params = { "jsv": "2.7.2", "appKey": "12574478", "t": int(time.time() * 1000), "sign": "b280df2002d4635bbff91e609c5ca04f", "api": "mtop.relationrecommend.wirelessrecommend.recommend", "v": "2.0", "type": "jsonp", "dataType": "jsonp", "callback": "mtopjsonp3", "data": build_params(page, q_value, pageSize=page_size) } # 从 cookie 中提取 tk token = base_cookies.get("_m_h5_tk") # print("tk: ", tk) if token: token = token.split("_")[0] # 使用正确的参数生成签名 appkey = params["appKey"] tamp = params["t"] data = params["data"] # 加载 JS 并调用签名函数 js_code = open("01taobao.js", encoding="utf-8").read() js_compiled = execjs.compile(js_code) params["sign"] = js_compiled.call("get_sign", token,tamp,appkey,data) response = requests.get(base_url, headers=headers, cookies=base_cookies, params=params) print(response.text) print(response)