python爬app ios"/>
python爬app ios
该楼层疑似违规已被系统折叠 隐藏此楼查看此楼
RT,随便找了个网站练习,爬取APP信息然后写入数据库。新手写的比较乱,请见谅。源码如下:
#encoding:utf-8
import requests,io,sys,re,pymysql
from bs4 import BeautifulSoup
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')
url=''
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36'
}
wants={}
def spider_bang(classs):
response=requests.get(url,headers=headers)
soup=BeautifulSoup(response.text,'lxml')
pays=soup.find_all('td',class_=classs)
tits=[]
leis=[]
prices=[]
for pay in pays:
titles=pay.find_all('h4')
ps1s=pay.find_all('p')[0].text.split(':',1)[1]
ps2s=pay.find_all('p')[1].text.encode('utf-8').decode('utf-8')
ps2s=re.findall('价格:¥(.*?).00',ps2s)
wants['是否收费']=classs
for title in titles:
title=title.text
wants['APP名称']=title
wants['APP类别']=ps1s
for ps2 in ps2s:
wants['价格']=ps2
print(wants)
db = pymysql.connect(host='localhost', user='root', password='xxxxxxx', port=3306, db='mysql')
cursor = db.cursor()
table = 'topapp'
keys = ', '.join(wants.keys())
values = ', '.join(['%s'] * len(wants))
sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(table=table, keys=keys, values=values)
if cursor.execute(sql, tuple(wants.values())):
print("Succesful")
dbmit()
if __name__ == '__main__':
classss=[
'pay',
'free',
'all'
]
for classs in classss:
spider_bang(classs)
更多推荐
python爬app ios
发布评论