自己使用
import requests from lxml import etree import re import time import csv import pandas as pd import oslis_firm=[] lis_name=[] lis_workplace=[] lis_pay=[] lis_time=[]keyword=input("请输入你想找到的工作:") page=input("请输入你想爬取的页数:") page=int(page) headers = {'Accept': 'textml,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9','Accept-Language': 'zh-CN,zh;q=0.9','Cache-Control': 'max-age=0','Connection': 'keep-alive',# Requests sorts cookies= alphabetically# 'Cookie': '_uab_collina=165494029760362106180467; guid=038a32b83973a819c180179ba511742c; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; acw_tc=2f624a4816549990797972577e0f84a5a8fe2c1095ecfd612196c594d90db2; search=jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%CA%FD%BE%DD%B7%D6%CE%F6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60120500%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%CA%FD%BE%DD%B7%D6%CE%F6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch2%7E%60120500%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%C5%C0%B3%E6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21; ssxmod_itna=QqAx9D2DRQi=f4Cq0d48+Q=4Y5NDKUNDC9IKr2DBqEO4iNDnD8x7YDvIIoKVIW/AAxEYfKtDTwxKW=RDhI+WPOwfNFV=x0aDbqGkqWC84GGUxBYDQxAYDGDDPDogPD1D3qDkD7EZlMBsqDEDYp9DA3Di4D+8MQDmqG0DDU7B4G2D7U9Q7GN8TrUCntdEkDPrDh9D0tQxBLK8cTo1P9NBTrTr1iatqGySPGu0uU/lRbDCxtVRk0sGbx4I05PKO+K7ODeKhq4/7EAaDxt3AxqD4EPYAqckhqQ/ESd/DDAiBwd+HD==; ssxmod_itna2=QqAx9D2DRQi=f4Cq0d48+Q=4Y5NDKUNDC9IKrx8dPEwqGNLKGaWB+Ikqw/+zx8r2QCeKxC00CKDbYvie/4ILoWGYRhLSXLYBAlcvCnf8A9Tsphl1W=mareFxHs6fPtudewZ+07IE7p5swgw8YB9bf2Kz3WKs/QiOOgqx4=9bPpWa1AopYaKzqYWF/gPa=l4kvpHtxza7KjnaVipNhZqhDonFyPaTx1ybBtuNqBIXeT02SIlmQTMRkrj2x3ZFN8P2G3QH3h82umLnL3=HotT7r3Lfx9BQdTiCspO620FZNl/H=D8GeQIV0r0+xb35m/cCzhqiyHePqLRDzaG+Y2Qyd7D2Fa1mba7TgFbxTAiSp4sAjz7WBiOK05B+4/0DDTPurdjR69Ia/c++bHfAPq4=9+u3Fxa0tObiLnH0cX9ic8G5h8cbD280i17iR+0b8BD+H/hED+xGgnQSMQie+gN3wn/9KP4xekKsvHvOCxMQ1Mji+kXPCx+5P+8qSaUMHVMNlBaWL+v212rk6bxgL=vM1huQP9HXRyiY1VhD1C3D07S7Dwix2Pur3tw130ecfz2UUrhOqgo1KG3wGdZBqgO9MU0R3QrYTo7QsGDng3Kv=YsKAFhygDEd4BAxqBS3mdaV1HsKKixD7=DY95eD','Sec-Fetch-Dest': 'document','Sec-Fetch-Mode': 'navigate','Sec-Fetch-Site': 'same-origin','Sec-Fetch-User': '?1','Upgrade-Insecure-Requests': '1','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.115 Safari/537.36','sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"','sec-ch-ua-mobile': '?0','sec-ch-ua-platform': '"Windows"', } cookies = {'_uab_collina': '165494029760362106180467','guid': '038a32b83973a819c180179ba511742c','nsearch': 'jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D','acw_tc': '2f624a4816549990797972577e0f84a5a8fe2c1095ecfd612196c594d90db2','search': 'jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%CA%FD%BE%DD%B7%D6%CE%F6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60120500%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%CA%FD%BE%DD%B7%D6%CE%F6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch2%7E%60120500%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%C5%C0%B3%E6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21','ssxmod_itna': 'QqAx9D2DRQi=f4Cq0d48+Q=4Y5NDKUNDC9IKr2DBqEO4iNDnD8x7YDvIIoKVIW/AAxEYfKtDTwxKW=RDhI+WPOwfNFV=x0aDbqGkqWC84GGUxBYDQxAYDGDDPDogPD1D3qDkD7EZlMBsqDEDYp9DA3Di4D+8MQDmqG0DDU7B4G2D7U9Q7GN8TrUCntdEkDPrDh9D0tQxBLK8cTo1P9NBTrTr1iatqGySPGu0uU/lRbDCxtVRk0sGbx4I05PKO+K7ODeKhq4/7EAaDxt3AxqD4EPYAqckhqQ/ESd/DDAiBwd+HD==','ssxmod_itna2': 'QqAx9D2DRQi=f4Cq0d48+Q=4Y5NDKUNDC9IKrx8dPEwqGNLKGaWB+Ikqw/+zx8r2QCeKxC00CKDbYvie/4ILoWGYRhLSXLYBAlcvCnf8A9Tsphl1W=mareFxHs6fPtudewZ+07IE7p5swgw8YB9bf2Kz3WKs/QiOOgqx4=9bPpWa1AopYaKzqYWF/gPa=l4kvpHtxza7KjnaVipNhZqhDonFyPaTx1ybBtuNqBIXeT02SIlmQTMRkrj2x3ZFN8P2G3QH3h82umLnL3=HotT7r3Lfx9BQdTiCspO620FZNl/H=D8GeQIV0r0+xb35m/cCzhqiyHePqLRDzaG+Y2Qyd7D2Fa1mba7TgFbxTAiSp4sAjz7WBiOK05B+4/0DDTPurdjR69Ia/c++bHfAPq4=9+u3Fxa0tObiLnH0cX9ic8G5h8cbD280i17iR+0b8BD+H/hED+xGgnQSMQie+gN3wn/9KP4xekKsvHvOCxMQ1Mji+kXPCx+5P+8qSaUMHVMNlBaWL+v212rk6bxgL=vM1huQP9HXRyiY1VhD1C3D07S7Dwix2Pur3tw130ecfz2UUrhOqgo1KG3wGdZBqgO9MU0R3QrYTo7QsGDng3Kv=YsKAFhygDEd4BAxqBS3mdaV1HsKKixD7=DY95eD', } params = {'lang': 'c','postchannel': '0000','workyear': '99','cotype': '99','degreefrom': '99','jobterm': '99','companysize': '99','ord_field': '0','dibiaoid': '0','line': '','welfare': '', }for pageNum in range(1,page+1):pageNum=str(pageNum)print('===============正在爬取第{'+pageNum+'}页数据内容===============')time.sleep(2)url = ",000000,0000,00,9,99,{},2,{}.html?".format(keyword, pageNum)response=requests.get(url=url,headers=headers,cookies=cookies,params=params)response.encoding =response.apparent_encodingresponds=response.textprint(url)print(responds) # =========公司名==li_firmex = r'"company_name":"(.*?)","'li_firm = re.findall(ex, responds, re.S)print("=========公司名==li_firm")print(li_firm)print(len(li_firm))for i in li_firm:lis_firm.append(i) # =========职位名==li_nameex = r'"job_name":"(.*?)","'li_name = re.findall(ex,responds,re.S)print("=========职位名==li_name")print(li_name)print(len(li_name))for i in li_name:lis_name.append(i) # =========工作地点==li_workplaceex = r'"workarea_text":"(.*?)","'li_workplace = re.findall(ex, responds, re.S)print("=========工作地点==li_workplace")print(li_workplace)print(len(li_workplace))for i in li_workplace:lis_workplace.append(i) # =========薪资==li_payex = r'"providesalary_text":"(.*?)","'li_pay = re.findall(ex, responds, re.S)print("=========薪资==li_pay")print(li_pay)print(len(li_pay))for i in li_pay:lis_pay.append(i) # =========发布时间==li_timeex = r'"issuedate":"(.*?)","'li_time = re.findall(ex, responds, re.S)print("=========发布时间==li_time")print(li_time)li_time.pop()print(len(li_time))for i in li_time:lis_time.append(i)a = [x for x in lis_firm]b = [x for x in lis_name]c = [x for x in lis_workplace]d = [x for x in lis_pay]e = [x for x in lis_time]dataframe = pd.DataFrame({'公司名': a, '职位名': b, '工作地点': c, '薪资': d, '发布时间': e})dataframe.to_csv("爬取数据.csv", index=False, sep=',')
更多推荐
自己使用
发布评论