景点评论信息:基于requests和BeautifulSoup库"/>
【python】爬取驴妈妈景点评论信息:基于requests和BeautifulSoup库
效果如图:
修改data中的id就可以爬取其他景点评论的信息
from gevent import monkey
monkey.patch_all()
import gevent
import requests
import openpyxl
from bs4 import BeautifulSoup
import json
import re
import time allList = []
def comment(page):url = ""data = {"type":"all","currentPage":page,"totalCount":"5757","placeId":"105140","productId":"","placeIdType":"PLACE","isPicture":"","isBest":"","isPOI":"Y","isELong":"N",}headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",}res = requests.post(url,data=data,headers=headers,timeout=30)bs = BeautifulSoup(res.text,"html.parser")results = bs.find_all("div",class_="comment-li")if results != None:passelse:print("false")for result in results:#评论者IDuserId = result.find("div",class_="com-userinfo").find("p").find("a").text# 评论日期 date = result.find("div",class_="com-userinfo").find_all("em")[-1].text# 总评分score = result.find(class_="ufeed-level").find("i")["data-level"]#景区服务 5 (推荐) 游玩体验 5 (推荐) 预订便捷 5 (推荐) 性价比 5 (推荐)scores = result.find_all("span",class_="ufeed-item")if scores == []:service = ""experience = ""book = ""costPerformance = ""else:service = scores[0].find("i").text[0]experience = scores[1].find("i").text[0]book = scores[2].find("i").text[0]costPerformance = scores[3].find("i").text[0]#图片数量if result.find("div",class_="compic-small") == None:imgNum = 0else:imgNum = len(result.find("div",class_="compic-small").find_all("li"))#文本评论ILLEGAL_CHARACTERS_RE = repile(r'[\000-\010]|[\013-\014]|[\016-\037]')if result.find("div",class_="ufeed-content").find("span") == None:text = result.find("div",class_="ufeed-content").text.strip()else:surplus = result.find("div",class_="ufeed-content").find("span").texttext = result.find("div",class_="ufeed-content").text.replace(surplus,"").strip()text= ILLEGAL_CHARACTERS_RE.sub(r'', text)commentList = [userId,date,score,service,experience,book,costPerformance,imgNum,text]allList.append(commentList)time.sleep(5)def storage(name,reviewsList):header = ['评论者ID','评论日期','总评分','景区服务','游玩体验','预订便捷','性价比','图片数量','文本评论']wb = openpyxl.Workbook()sheet = wb.activesheet.title = "commentInfo"sheet.append(header)for reviewList in reviewsList:sheet.append(reviewList)wb.save("存储/驴妈妈 " + name + "'s "+ 'comment.xlsx')if __name__=="__main__":page = 576taskList = []for i in range(1,page+1):task = gevent.spawn(comment,i)taskList.append(task)gevent.joinall(taskList)print(len(allList))storage("windows of the world",allList)
更多推荐
【python】爬取驴妈妈景点评论信息:基于requests和BeautifulSoup库
发布评论