爆米花视频"/>
python HTTP协议爬取爆米花视频
给大家分享一点代码,爬取小视频
代码如下:
def get_part(self, url):headers = {#这个取决于你依托哪个浏览器访问http"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}try:response = requests.get(url, headers=headers)if response.status_code == 200:return response.textreturn Noneexcept Exception as e:print(e)return Nonedef parse_part_1(self, html):try:pattern = repile('.*?var flvid = (.*?);.*?', re.S)result_1 = re.search(pattern, html).group(1)print('flvid为:' + result_1)return result_1except Exception:print('flvid解析失败')return Falsedef parse_part_2(self, html):try:pattern = repile('.*?&host_480=(.*?)&.*?&dir=(.*?)&.*?', re.S)result_2 = re.search(pattern, html).group(1)result_3 = re.search(pattern, html).group(2)result = result_2 + '/' + result_3print('第一部分URL:' + result)return resultexcept Exception:print('解析第一部分URL失败')def parse_part_3(self, html):try:pattern = repile('.*?&stream_name=(.*?)&.*?', re.S)result_4 = re.search(pattern, html).group(1)print('第二部分URL:' + '/' + result_4 + '.mp4')return result_4except Exception:print('解析第二部分URL失败')def download_video(self, url, timeout):headers = {#这个取决于你依托哪个浏览器访问http"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}file_path = 'D:/workspace/Base_Project_DT/test.mp4'print('开始下载视频:' + url)#当时我是异步下载,监控速率,忽视print('设置速率监测时间为%ds' % timeout)current_time = time.time()if os.path.exists(file_path):print('删除已经存在的视频')os.remove(file_path)# 在测试路线上反复下载视频while int(time.time()) - current_time <= timeout:try:response = requests.get(url, headers=headers)data = response.contentwith open(file_path, 'wb')as f:f.write(data)f.flush()f.close()except Exception:print('视频下载失败')print('文件为:' + file_path)print('视频下载成功:' + url)def main(self, url, timeout):try:print('视频初始URL:' + url)html = get_part(self, url)if html:flvid = parse_part_1(self, html)if flvid:url = '.aspx?qudaoid=42&devicetype=pc%5Fplayer&flvid={}&Resolution=1'.format(flvid)html = get_part(self, url)if html:base_url_1 = parse_part_2(self, html)url = '.aspx?flvid={}&devicetype=phone_app_Android'.format(flvid)html = get_part(self, url)if html:base_url_2 = parse_part_3(self, html)url = 'http://' + base_url_1 + '/' + base_url_2 + '.mp4'print('视频下载URL:' + url)download_video(self, url, timeout)except Exception:print('解析视频地址失败')def downMedia(self, timeout):print('温馨提醒:e.g. ' + '、' + '' + '...')# url传入图片中地址就行,你想爬取哪个传入哪个链接url = ''main(self, url, timeout)return None
当然,你可以改造下,加入循环下载多个视频,监控下载速率,测试产品网络性能;此外,还有些爬取腾讯VIP视频的方法,有时间再给大家分享!
备注:爆米花视频网站,直接在百度上搜索即可
更多推荐
python HTTP协议爬取爆米花视频
发布评论