力量"/>
Python 协程的力量
爬取135页图片 同步访问
import re
import requests
import csv
from bs4 import BeautifulSoupfor i in range(3,135):print(f'当前页码:{i}')url=f'{i}.html'# url ='.htm'headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0"}res=requests.get(url=url,headers=headers)res.encoding='utf-8'page= BeautifulSoup(res.text,"html.parser")graph=page.find('p',attrs={'id':'l'})alist = graph.find_all('a')for a in alist:sub_url=f'{a.get("href")}'# print(sub_url)sub_res=sub_page=requests.get(url=sub_url,headers=headers)sub_res.encoding='utf-8'sub_page=BeautifulSoup(sub_res.text,"html.parser")pic=sub_page.find('img').get('src')filename=pic.split('/')[-1]with open(f'D:\\images\\{filename}',mode='wb') as f:image=requests.get(pic)f.write(image.content)sub_res.closeres.close()
下了2小时没整完成
import asyncio
import aiohttp,aiofiles
from bs4 import BeautifulSoupheaders = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0"
}async def get_images(url, session):async with session.get(url=url, headers=headers) as res:res.encoding='utf-8'page= BeautifulSoup(await res.text(),"html.parser")graph=page.find('p',attrs={'id':'l'})alist = graph.find_all('a')for a in alist:sub_url=f'{a.get("href")}'async with session.get(url=sub_url,headers=headers) as sub_res:sub_res.encoding='utf-8'sub_page=BeautifulSoup(await sub_res.text(),"html.parser")pic=sub_page.find('img').get('src')filename=pic.split('/')[-1]async with aiofiles.open(f'D:\\images\\{filename}',mode='wb') as f:image=await session.get(pic)await f.write(await image.content.read())async def job():tasks = []async with aiohttp.ClientSession() as session:for i in range(3, 135):url = f'{i}.html'tasks.append(asyncio.create_task(get_images(url, session)))done, pending = await asyncio.wait(tasks)if __name__ == '__main__':# asyncio.run(func())asyncio.run(job())
5分钟下完
更多推荐
Python 协程的力量
发布评论