使用python下载二进制文件
图片下载
import requestsheaders = {
}
cookies = {}url = "https://"
params = {"w": "640","h": "427"
}
response = requests.get(url, headers=headers, cookies=cookies, params=params)
open('1.jpg','wb').write(response.content)
print(response)
base64模块
同样的是一个内置的模块。
import base64
# 要编码的字符串
original_string = "Hello, World!"
# 进行Base64编码
encoded_bytes = base64.b64encode(original_string.encode('utf-8'))
encoded_string = encoded_bytes.decode('utf-8')
print("Base64编码后的字符串:", encoded_string)
# 进行Base64解码
decoded_bytes = base64.b64decode(encoded_string)
decoded_string = decoded_bytes.decode('utf-8')
print("Base64解码后的字符串:", decoded_string)
字符集不加也是一样的
import base64
# 要编码的字符串
original_string = "Hello, World!"
# 进行Base64编码
encoded_bytes = base64.b64encode(original_string.encode())
encoded_string = encoded_bytes.decode()
print("Base64编码后的字符串:", encoded_string)
# 进行Base64解码
decoded_bytes = base64.b64decode(encoded_string)
decoded_string = decoded_bytes.decode()
print("Base64解码后的字符串:", decoded_string)
b64图片下载
使用base64模块把b64编码转成图片
import base64
a = "data:image\/jpeg;base64,aUkfslGRshF...YAA=="
b = a.replace("\\","").replace("data:image/jpeg;base64,","")
imgdata = base64.b64decode(b)
print(imgdata)
with open("decoded_image.jpg", "wb") as file:file.write(imgdata)
音频下载
音频也是同样原理
response = requests.get(url, headers=headers, cookies=cookies, params=params)
with open('3.mp3', 'wb') as f:f.write(response.content)
print(response)
视频下载(m3u8)
一个视频看做一个整体,一大段二进制数据
data = '12313213213213213214325432543654786597659'
# 1,2,3,1,3,2....
-
请求获取m3u8文件
-
看这行 EXT-X-KEY 分辨是否有有加密
#EXT-X-KEY:METHOD=AES-128,URI="https://.../static.key"
-
下载ts文件,如果有加密就解密保存,否则直接保存,并且放到一起。
-
下载完所有ts文件之后,需要打开一个新文件,使用二进制模式打开。
-
循环读取所有ts文件,写入第四步打开的这个新文件中。
import jsonimport requests
import re
from tqdm import tqdm #进度条模块,IDE提示安装就装上headers = {#自己补上
}
cookies = {#需要的时候补上
}
def get_m3u8_file_url():url = "https://.."response = requests.get(url, headers=headers, cookies=cookies)document = response.textinfo = re.findall(' 自己补上要提取的正则表达式', document, re.S)[0].strip()[:-1]info_json = json.loads(info)play = json.loads(info_json['a']['a1'])['b'][0]['b1'][0]['url']filename = json.loads(info)['title']return play,filenamedef get_ts_files_url(url):response = requests.get(url, headers=headers, cookies=cookies)document = response.textts_files = re.sub('#.*','',document).split() #去掉多余的部分,只留下ts文件的urlprint(ts_files)return ts_filesdef get_ts_files(ts_files,path,filename):with open(f'{path}/{filename}.mp4','ab') as f:for ts in tqdm(ts_files):ts_url = 'https://..' + tsts_content = requests.get(ts_url, headers=headers, cookies=cookies).content#如果有加密,则在这里先将二进制数据解密,随后再保存f.write(ts_content)print(f'{filename}:合成完成!')if __name__ == '__main__':path = 'D:\test' #自行修改m3u8_file_url,title = get_m3u8_file_url()print(m3u8_file_url)ts_files_url = get_ts_files_url(m3u8_file_url)get_ts_files(ts_files_url,path,title)
