admin管理员组

文章数量:1659320

免费视频网站:https://mixkit.co/free-stock-video/nature
应用版本:PyCharm 2023.2.4、Python 3.11.3
结果展示:
涉及依赖:
import requests
from bs4 import BeautifulSoup
import json
from tqdm import tqdm
from multiprocessing import Pool, freeze_support, RLock
代码:
import requests
from bs4 import BeautifulSoup
import json
from tqdm import tqdm
from multiprocessing import Pool, freeze_support, RLock

headers = {
    # 根据自己浏览器配置
    "User-Agent": ""
}

# 文件保存的绝对路径
file_path = "C:\\Users\\Desktop\\video\\"


def download_videos_thread(videoWebUrls):
    freeze_support()
    pool = Pool(len(videoWebUrls), initializer=tqdm.set_lock, initargs=(RLock(),))
    pool.map(download_video, videoWebUrls)


def download_video(videoWebUrl):
    soup = getHTMLText(videoWebUrl, headers)
    title = soup.find('title').text
    videoUrl = soup.find_all('video')[0].get('src')
    # 进度条
    with requests.get(videoUrl, stream=True) as r:
        r.raise_for_status()
        # 获取文件大小
        total_size = int(r.headers.get('Content-Length', 0))
        block_size = 1024
        progress_bar = tqdm(total=total_size, unit='B', unit_scale=True, desc=title + '--下载进度:', colour='blue')

        with open(file_path + title + '.mp4', 'wb') as f:
            for chunk in r.iter_content(block_size):
                if chunk:
                    f.write(chunk)
                    progress_bar.update(len(chunk))

        progress_bar.close()


def get_videos_url(rootUrl):
    soup = getHTMLText(rootUrl, headers)
    # 解析script内容
    # loads = json.loads(soup.find("script", {"data-test-id": "schema_org_data-0"}).get_text())
    json_loads = json.loads(soup.find('script', {'type': 'application/ld+json'}).get_text())
    graphs = json_loads.get('@graph')

    global items
    for graph in graphs:
        if graph.get('@type') == 'ItemList':
            items = graph.get('itemListElement')

    # 获取每个视频网页url
    videoWebUrls = []
    for item in items:
        url = item.get('@id')
        videoWebUrls.append(url)

    return videoWebUrls


# 访问网页并返回HTML相关的信息
def getHTMLText(url, headers):
    # 向目标服务器发起请求并返回响应
    try:
        r = requests.get(url=url, headers=headers, timeout=20)
        r.encoding = r.apparent_encoding
        soup = BeautifulSoup(r.text, "html.parser")
        return soup
    except:
        return ""


if __name__ == '__main__':
    # 根url
    rootUrl = 'https://mixkit.co/free-stock-video/nature/'
    # 获取视频所在地址
    videoWebUrls = get_videos_url(rootUrl)
    # 多线程下载
    download_videos_thread(videoWebUrls)
仅供参考学习
完~

本文标签: 多线程进度条视频Python