Python爬取安居客新房房源

编程入门 行业动态 更新时间:2024-10-17 07:34:07

Python爬取安居客新房<a href=https://www.elefans.com/category/jswz/34/1767729.html style=房源"/>

Python爬取安居客新房房源

1.房源访问的网址为城市的拼音+后面统一的地址。需要用到xpinyin库

2.用了2种解析网页数据的库bs4和xpath(先学习的bs4,学了xpath后部分代码改成xpath)

import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
from xpinyin import Pinyin
import time
from lxml import etree
import json
'''
更多Python学习资料以及源码教程资料,可以在群1136201545免费获取
'''
class anjuk_spider(object):url = '.fang.anjuke/loupan/all/'def __init__(self,city):self.city = city#建一个空列表,存放所有的房源信息self.items = []#把输入的城市转换成拼音def citypinyin(self,city):p = Pinyin()return p.get_pinyin(self.city,'')#构建请求体def handle_request(self,city_url,page_num):city_url = city_url + 'p' + str(page_num + 1) + '/'headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"}request = urllib.request.Request(city_url, headers=headers)return request#解析价格def price(self,item):temp = item.xpath('.//a[@class="favor-pos"]/p[1]/text()')[0]if temp == '售价待定':temp = temp + " 周边均价:" + item.xpath('.//a[@class="favor-pos"]/p/span/text()')[0] + "元/㎡"else:temp = temp + item.xpath('.//a[@class="favor-pos"]/p/span/text()')[0] + "元/㎡"return temp#解析户型def huxing(self,item):temp = item.xpath('.//a[@class="huxing"]//text()')# 第1步,先建立个空列表。整理爬下来的列表,去掉空格,换行等list = []for item in temp:item = item.replace("\n", "").replace("\t", "").replace("\xa0", "").replace("/", "").replace(" ", "")list.append(item)# 删除列表里的空值new_list = [i for i in list if i != '']# 把整理好的列表转换成新字符串item_huxing = "/".join(new_list)return item_huxing#解析位置def address(self,item):temp = item.xpath('.//a[@class="address"]//span[@class="list-map"]/text()')[0]end_item_address = temp.find("]")item_address = temp[end_item_address + 2:]return item_address#解析数据def parse_content(self,content):#利用xpath提取数据tree = etree.HTML(content)div_list = tree.xpath('//div[@class="key-list imglazyload"]//div[@class="item-mod "]')for item in div_list:try:#小区名称item_name = item.xpath('.//h3//span[@class="items-name"]/text()')[0]#小区位置item_address = self.address(item)#小区户型item_huxing = self.huxing(item)# 小区价格item_Price = self.price(item)#把数据存放到字典中dict = {"小区名称":item_name,"小区位置":item_address,"户型面积":item_huxing,"价格":item_Price,}#每条字典更新到列表中self.items.append(dict)print("小区名称:%s | 位置:%s" %(item_name,item_address))print("%s|价格:%s" %(item_huxing,item_Price))print("******************************************************************************")time.sleep(1)# print("小区名称:%s | 小区位置:%s |价格:%s" % (item_name,item_address,item_Price))except:print("读取出现问题!")def run(self):# 先要取得请求网址,请求网址的组合为城市拼音+上后面的网址citypinyin = self.citypinyin(self.city)#输入的城市请求网址city_url = 'https://' + citypinyin + self.urlheaders = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"}#请求体city_request = urllib.request.Request(city_url,headers = headers)# 发送请求content = urllib.request.urlopen(city_request).read().decode()#处理响应数据,提取当前房源数量,判断应读取几页数据#创建bs4对象soup = BeautifulSoup(content,'lxml')temp_page = int(soup.select('.list-results > .key-sort > .sort-condi > span > em')[0].text)#用取得的房源数量,计算需要读取多少页数据#每页有60条数据,如果能整除,那就有总数/60页,如果不能整除,那么就总数/60取整+1页。if temp_page <= 60:page = 1if (temp_page % 60) == 0:page = temp_page / 60else:page = (temp_page // 60) + 1#发送请求for page_num in range(page):#输入网址,页数,反回请求数据request = self.handle_request(city_url,page_num)content = urllib.request.urlopen(request).read().decode()#解析数据self.parse_content(content)time.sleep(2)#转成json格式string = json.dumps(self.items,ensure_ascii=False)#将数据写入文本中with open('安居客房价.txt','w',encoding='utf-8') as f:f.write(string)print("存入文本!")def main():city = input("请输入城市名字:")#创建对象,开始爬取数据spider = anjuk_spider(city)spider.run()if __name__ == '__main__':main()

更多推荐

Python爬取安居客新房房源

本文发布于:2024-03-06 04:44:31,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1714390.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:房源   新房   Python

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!