import requests
from lxml import etree
#获得详情页
def getDetailPage(page):
url = 'https://www.runoob/python/python-exercise-example{}.html'.format(page)
response = requests.get(url)
html = etree.HTML(response.text)
hs = html.xpath('//div[@class="article-intro"]')
for h in hs:
title = h.xpath('./h1/text()')[0]
name = h.xpath('./p[2]/text()')[0]
code = h.xpath('.//div[@class="hl-main"]//span/text()')
code = ''.join(code)
result = h.xpath('./pre/text()')
result = ''.join(result)
data = ''.join([title + '\n', '题目:'+ name + '\n', '代码:' + code + '\n', '输出:' + result])
print(data)
savedate(data)
#保存数据
def savedate(data):
with open('菜鸟python编程100例.txt', 'a', encoding='utf-8') as f:
f.write(data)
def main():
for page in range(1,3):
getDetailPage(page)
if __name__ == '__main__':
main()
有问题请联系博主:
微信:hrvrap
qq:2580419087
更多推荐
Python爬取实战-爬取菜鸟教程python100例
发布评论