天气预报数据"/>
Python 爬取天气预报数据
安装依赖
更新pip
pip install --upgrade pip
安装lxml
参考:
1. 到这个连接下, 搜索lxml, 下载对应的 lxml.whl 文件(预编译过的)
如:我本机python的版本是3.59 64位win10系统, 那么就选择
注意: 如果没有安装wheel, 要先安装wheel
pip install wheel
2. 下载完成后, 把下载好的.whl文件放在python的跟目录下, 比如我的是: D:\Python\Python39
在该目录下执行:
python -m pip install lxml-win32.whl
安装beautifulsoup4
pip install beautifulsoup4
编写代码 weather.py
参考:
from bs4 import BeautifulSoup
from bs4 import UnicodeDammit # BS 内置库,猜测文档编码
import urllib.requesturl = '.shtml'try:headers = {'User-Agent':'Mozilla/5.0(Windows;U;Windows NT 6.0 x64;en-US;rv:1.9pre)Gecko/20191008 Minefield/3.0.2pre'}req = urllib.request.Request(url,headers = headers)data = urllib.request.urlopen(req)data = data.read()dammint = UnicodeDammit(data,['utf-8','gbk']) #鉴别编码,做一个包装-markupdata = dammint.unicode_markupsoup = BeautifulSoup(data,'lxml')lis = soup.select("ul[class='t clearfix'] li") # 找到ul下的所有lifor li in lis:try:data = li.select('h1')[0].text # h1的第一个元素的text文本weather = li.select("p[class='wea']")[0].texttemp = li.findAll('span')[0].text + '/' + li.findAll('i')[0].textprint(data,weather,temp)except Exception as err:print(err)
except Exception as err:print(err)
测试
python weather.py
成功爬取中国天气网某城市七天的天气情况。
升级版
爬取的升级版,是爬取几个城市的天气情况:
from bs4 import BeautifulSoup
from bs4 import UnicodeDammit
import urllib.request
import sqlite3class WeatherDB: # 包含对数据库的操作def openDB(self):self.con = sqlite3.connect('weathers.db')self.cursor = self.con.cursor()try:self.cursor.execute('create table weathers (wCity varchar(16),wDate varchar(16),wWeather varchar(64),wTemp varchar(32),constraint pk_weather primary key(wCity,wDate))')# 爬取城市的天气预报数据储存到数据库weather.db中except: # 第一次创建表格是成功的;第二次创建就会清空表格self.cursor.execute('delete from weathers')def closeDB(self):self.conmit()self.con.close()def insert(self,city,date,weather,temp):try:self.cursor.execute('insert into weather (wCity,wDate,wTemp)values(?,?,?,?)',(city,date,weather,temp))except Exception as err:print(err)def show(self):self.cursor.execute('select * from weathers')rows = self.cursor.fetchall()print('%-16s%-16s%-32s%-16s'%('city','date','weather','temp'))for row in rows:print('%-16s%-16s%-32s%-16s'%(row[0],row[1],row[2],row[3]))class WeatherForecast: # 调用url,request函数访问网站def __init__(self):self.headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US;rv:1.9pre)Gecko/2019100821 Minefield/3.0.2pre'} # 创建头,伪装成服务器/浏览器访问远程的web服务器self.cityCode = {'北京':'101010100','上海':'101020100','广州':'101280101','深圳':'101280601'} # 查找的城市def forecastCity(self,city):if city not in self.cityCode.keys():print(city+'code cannot be found')returnurl = '/'+self.cityCode[city]+'.shtml' # 创建成urltry:req = urllib.request.Request(url,headers=self.headers) # 访问地址data = urllib.request.urlopen(req)data = data.read()dammit = UnicodeDammit(data,['utf-8'],'gbk')data = dammit.unicode_markupsoup = BeautifulSoup(data,'lxml')lis = soup.select("ul[class='t clearfix'] li") # 找到每一个天气数据for li in lis:try:date = li.select('h1')[0].textweather = li.select('p[class="wea"]')[0].texttemp = li.select('p[class="tem"] span')[0].text+'/'+li.select('p[class="tem"] i')[0].textprint(city,date,weather,temp)self.db.insert(city,date,weather,temp) # 插入到数据库的记录except Exception as err:print(err)except Exception as err:print(err)def process(self,cities):self.db = WeatherDB()self.db.openDB()for city in cities:self.forecastCity(city) # 循环每一个城市self.db.closeDB()ws = WeatherForecast()
ws.process(['北京','上海','广州','深圳'])
print('completed')
更多推荐
Python 爬取天气预报数据
发布评论