python3爬取数据导入mysql

编程入门 行业动态 更新时间:2024-10-28 08:19:53

python3爬取<a href=https://www.elefans.com/category/jswz/34/1771445.html style=数据导入mysql"/>

python3爬取数据导入mysql

class Douban:

def __init__(self):

#模拟请求头

self.header={

"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",

"Accept-Language": "zh-CN,zh;q=0.9",

"Cache-Control": "max-age=0",

"Connection": "keep-alive",

"Host": "movie.douban",

"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36"

}

def get_html(self,url): #获取页面

try:

resopnse = requests.get(url, headers=self.header)

resopnse.encoding = "utf-8"

resopnse.status_code

html = resopnse.text

return html

except Exception as e:

print("页面获取失败"+e)

return ""

def detail_url(self,html): #获取详情页面的url

html = etree.HTML(html)

durl = html.xpath('//ol[@class="grid_view"]/li//div[@class="pic"]/a/@href')

for url in durl:

self.detail_html(url)

time.sleep(2)

self.next_html(html)

def next_html(self,html): #获取下一页

n_url = html.xpath('//span[@class="next"]/a/@href')[0]

next_url = ""+n_url

if next_url:

print("="*1000,next_url)

html = self.get_html(next_url)

self.detail_url(html)

def detail_html(self,url): #获取详情页面的信息

htmls = self.get_html(url)

html = etree.HTML(htmls)

name = "".join(html.xpath('//div[@id="content"]/h1//span/text()')) # 获取名字标题

img_url = html.xpath('//div[@id="mainpic"]/a/img/@src')[0] #图片地址

daoyan = html.xpath('//div[@id="info"]/span[1]/span[2]/a/text()')[0] #导演

bianju = "".join(html.xpath('//div[@id="info"]/span[2]/span[2]//a/text()')) #编剧

zhuyan = "".join(html.xpath('//div[@id="info"]/span[3]/span[2]//text()')).replace("/",",") #主演

type = "".join(html.xpath('//span[@property="v:genre"]/text()'))#类型

score = html.xpath('//strong[contains(@class,"rating_num")]/text()')[0] # 获取评分

zu = (name,img_url,daoyan,bianju,zhuyan,type,score)

print(zu)

self.insert_table(zu)

def lian(self): # 连接数据库

username = "root"

password = "root"

path = "localhost"

dbname = "python_test"

db = pymysql.connect(path,username,password,dbname)

return db

def create_table(self): #创建表

cursor= self.lian().cursor()

cursor.execute("drop table if exists test3")

sql = """

create table test3(

id int primary key auto_increment,

name varchar(255),

img_url varchar(255),

daoyan varchar(255),

bianju varchar(255),

zhuyan text,

type varchar(255),

score varchar(255)

)character set utf8

"""

cursor.execute(sql)

def insert_table(self,zu): #添加数据到数据库

try:

cursor = self.lian().cursor()

sql = "insert into test3(name,img_url,daoyan,bianju,zhuyan,type,score) value(%s,%s,%s,%s,%s,%s,%s) "

cursor.execute(sql, zu)

self.lian()mit()

except:

self.lian().rollback()

print("添加失败")

def run(self):

self.create_table() #创建表

url = ""

html = self.get_html(url)

self.detail_url(html)

self.lian().close() #关闭数据库

# print(html)

if __name__ == '__main__':

d = Douban()

d.run()

更多推荐

python3爬取数据导入mysql

本文发布于:2024-03-11 18:53:28,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1729653.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:数据   mysql

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!