贴吧HTML页面内容"/>
python爬取贴吧HTML页面内容
代码如下:
# -*- coding: utf-8 -*-
import urllib2
import urllib
def loadPage(url,filename):
"""
作用:根据url发送请求,获取服务器响应文件
url:所要爬取的url地址
"""
print "正在下载" + filename
headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv2.0.1) Gecko/20100101 Firefox/4.0.1"}
request = urllib2.Request(url,headers=headers)
return urllib2.urlopen(request).read()
def writePage(html,filename):
"""
作用:将html内容写入到本地
HTML:服务器响应的文件内容
"""
print "正在保存" + filename
with open(filename,"w")as f:
f.write(html)
print "-" * 30
def tiebaSpider(url,beginPage,endPage):
"""
更多推荐
python爬取贴吧HTML页面内容
发布评论