自己使用

编程入门 行业动态 更新时间:2024-10-26 23:39:50

自己使用

自己使用

import requests
from lxml import etree
import re
import time
import csv
import pandas as pd
import oslis_firm=[]
lis_name=[]
lis_workplace=[]
lis_pay=[]
lis_time=[]keyword=input("请输入你想找到的工作:")
page=input("请输入你想爬取的页数:")
page=int(page)
headers = {'Accept': 'textml,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9','Accept-Language': 'zh-CN,zh;q=0.9','Cache-Control': 'max-age=0','Connection': 'keep-alive',# Requests sorts cookies= alphabetically# 'Cookie': '_uab_collina=165494029760362106180467; guid=038a32b83973a819c180179ba511742c; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; acw_tc=2f624a4816549990797972577e0f84a5a8fe2c1095ecfd612196c594d90db2; search=jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%CA%FD%BE%DD%B7%D6%CE%F6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60120500%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%CA%FD%BE%DD%B7%D6%CE%F6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch2%7E%60120500%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%C5%C0%B3%E6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21; ssxmod_itna=QqAx9D2DRQi=f4Cq0d48+Q=4Y5NDKUNDC9IKr2DBqEO4iNDnD8x7YDvIIoKVIW/AAxEYfKtDTwxKW=RDhI+WPOwfNFV=x0aDbqGkqWC84GGUxBYDQxAYDGDDPDogPD1D3qDkD7EZlMBsqDEDYp9DA3Di4D+8MQDmqG0DDU7B4G2D7U9Q7GN8TrUCntdEkDPrDh9D0tQxBLK8cTo1P9NBTrTr1iatqGySPGu0uU/lRbDCxtVRk0sGbx4I05PKO+K7ODeKhq4/7EAaDxt3AxqD4EPYAqckhqQ/ESd/DDAiBwd+HD==; ssxmod_itna2=QqAx9D2DRQi=f4Cq0d48+Q=4Y5NDKUNDC9IKrx8dPEwqGNLKGaWB+Ikqw/+zx8r2QCeKxC00CKDbYvie/4ILoWGYRhLSXLYBAlcvCnf8A9Tsphl1W=mareFxHs6fPtudewZ+07IE7p5swgw8YB9bf2Kz3WKs/QiOOgqx4=9bPpWa1AopYaKzqYWF/gPa=l4kvpHtxza7KjnaVipNhZqhDonFyPaTx1ybBtuNqBIXeT02SIlmQTMRkrj2x3ZFN8P2G3QH3h82umLnL3=HotT7r3Lfx9BQdTiCspO620FZNl/H=D8GeQIV0r0+xb35m/cCzhqiyHePqLRDzaG+Y2Qyd7D2Fa1mba7TgFbxTAiSp4sAjz7WBiOK05B+4/0DDTPurdjR69Ia/c++bHfAPq4=9+u3Fxa0tObiLnH0cX9ic8G5h8cbD280i17iR+0b8BD+H/hED+xGgnQSMQie+gN3wn/9KP4xekKsvHvOCxMQ1Mji+kXPCx+5P+8qSaUMHVMNlBaWL+v212rk6bxgL=vM1huQP9HXRyiY1VhD1C3D07S7Dwix2Pur3tw130ecfz2UUrhOqgo1KG3wGdZBqgO9MU0R3QrYTo7QsGDng3Kv=YsKAFhygDEd4BAxqBS3mdaV1HsKKixD7=DY95eD','Sec-Fetch-Dest': 'document','Sec-Fetch-Mode': 'navigate','Sec-Fetch-Site': 'same-origin','Sec-Fetch-User': '?1','Upgrade-Insecure-Requests': '1','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.115 Safari/537.36','sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"','sec-ch-ua-mobile': '?0','sec-ch-ua-platform': '"Windows"',
}
cookies = {'_uab_collina': '165494029760362106180467','guid': '038a32b83973a819c180179ba511742c','nsearch': 'jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D','acw_tc': '2f624a4816549990797972577e0f84a5a8fe2c1095ecfd612196c594d90db2','search': 'jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%CA%FD%BE%DD%B7%D6%CE%F6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60120500%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%CA%FD%BE%DD%B7%D6%CE%F6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch2%7E%60120500%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%C5%C0%B3%E6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21','ssxmod_itna': 'QqAx9D2DRQi=f4Cq0d48+Q=4Y5NDKUNDC9IKr2DBqEO4iNDnD8x7YDvIIoKVIW/AAxEYfKtDTwxKW=RDhI+WPOwfNFV=x0aDbqGkqWC84GGUxBYDQxAYDGDDPDogPD1D3qDkD7EZlMBsqDEDYp9DA3Di4D+8MQDmqG0DDU7B4G2D7U9Q7GN8TrUCntdEkDPrDh9D0tQxBLK8cTo1P9NBTrTr1iatqGySPGu0uU/lRbDCxtVRk0sGbx4I05PKO+K7ODeKhq4/7EAaDxt3AxqD4EPYAqckhqQ/ESd/DDAiBwd+HD==','ssxmod_itna2': 'QqAx9D2DRQi=f4Cq0d48+Q=4Y5NDKUNDC9IKrx8dPEwqGNLKGaWB+Ikqw/+zx8r2QCeKxC00CKDbYvie/4ILoWGYRhLSXLYBAlcvCnf8A9Tsphl1W=mareFxHs6fPtudewZ+07IE7p5swgw8YB9bf2Kz3WKs/QiOOgqx4=9bPpWa1AopYaKzqYWF/gPa=l4kvpHtxza7KjnaVipNhZqhDonFyPaTx1ybBtuNqBIXeT02SIlmQTMRkrj2x3ZFN8P2G3QH3h82umLnL3=HotT7r3Lfx9BQdTiCspO620FZNl/H=D8GeQIV0r0+xb35m/cCzhqiyHePqLRDzaG+Y2Qyd7D2Fa1mba7TgFbxTAiSp4sAjz7WBiOK05B+4/0DDTPurdjR69Ia/c++bHfAPq4=9+u3Fxa0tObiLnH0cX9ic8G5h8cbD280i17iR+0b8BD+H/hED+xGgnQSMQie+gN3wn/9KP4xekKsvHvOCxMQ1Mji+kXPCx+5P+8qSaUMHVMNlBaWL+v212rk6bxgL=vM1huQP9HXRyiY1VhD1C3D07S7Dwix2Pur3tw130ecfz2UUrhOqgo1KG3wGdZBqgO9MU0R3QrYTo7QsGDng3Kv=YsKAFhygDEd4BAxqBS3mdaV1HsKKixD7=DY95eD',
}
params = {'lang': 'c','postchannel': '0000','workyear': '99','cotype': '99','degreefrom': '99','jobterm': '99','companysize': '99','ord_field': '0','dibiaoid': '0','line': '','welfare': '',
}for pageNum in range(1,page+1):pageNum=str(pageNum)print('===============正在爬取第{'+pageNum+'}页数据内容===============')time.sleep(2)url = ",000000,0000,00,9,99,{},2,{}.html?".format(keyword, pageNum)response=requests.get(url=url,headers=headers,cookies=cookies,params=params)response.encoding =response.apparent_encodingresponds=response.textprint(url)print(responds)
#     =========公司名==li_firmex = r'"company_name":"(.*?)","'li_firm = re.findall(ex, responds, re.S)print("=========公司名==li_firm")print(li_firm)print(len(li_firm))for i in li_firm:lis_firm.append(i)
#     =========职位名==li_nameex = r'"job_name":"(.*?)","'li_name = re.findall(ex,responds,re.S)print("=========职位名==li_name")print(li_name)print(len(li_name))for i in li_name:lis_name.append(i)
#     =========工作地点==li_workplaceex = r'"workarea_text":"(.*?)","'li_workplace = re.findall(ex, responds, re.S)print("=========工作地点==li_workplace")print(li_workplace)print(len(li_workplace))for i in li_workplace:lis_workplace.append(i)
#     =========薪资==li_payex = r'"providesalary_text":"(.*?)","'li_pay = re.findall(ex, responds, re.S)print("=========薪资==li_pay")print(li_pay)print(len(li_pay))for i in li_pay:lis_pay.append(i)
#     =========发布时间==li_timeex = r'"issuedate":"(.*?)","'li_time = re.findall(ex, responds, re.S)print("=========发布时间==li_time")print(li_time)li_time.pop()print(len(li_time))for i in li_time:lis_time.append(i)a = [x for x in lis_firm]b = [x for x in lis_name]c = [x for x in lis_workplace]d = [x for x in lis_pay]e = [x for x in lis_time]dataframe = pd.DataFrame({'公司名': a, '职位名': b, '工作地点': c, '薪资': d, '发布时间': e})dataframe.to_csv("爬取数据.csv", index=False, sep=',')

更多推荐

自己使用

本文发布于:2024-02-16 18:52:57,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1691104.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!