全国排污许可证数据

编程入门 行业动态 更新时间:2024-10-27 08:34:52

全国排污<a href=https://www.elefans.com/category/jswz/34/1762491.html style=许可证数据"/>

全国排污许可证数据

from tkinter import *  # 导入窗口控件
import tkinter.filedialog
import requests
from lxml import etree
from tkinter import ttk
from bs4 import BeautifulSoup
import webbrowser  # 调用浏览器打开网页
from tkinter import messagebox  # 弹出提示框
from openpyxl import Workbook
import openpyxl
import time  # 延时
import random   #随机
import datetime  #调用时间'''
版本:V3.0
语言环境:python 3.8pycharm 2020.2'''treedata1 = []  # 全局变量用于存储查询到企业详细信息数据
treedata_jianhuaguanli=[]  # 全局变量用于存储查询到简化管理企业详细信息数据
lerror2=[]  #保存错误页面信息
global jishuleijia  # 全局变量用于存储查询企业,进度条用数据
jishuleijia = 0# 得到总页数_开始
def kaishipaqu_begin():if treedata1 or treedata_jianhuaguanli:messagebox.showinfo("提示", '程序正在运行中,请稍候........')starttime = datetime.datetime.now()  # 记录程序开始时间datas = {"page.pageNo": "1","page.orderBy": "","page.order": "","province": "","city": "","registerentername": "","xkznum": "","treadname": "","treadcode": "","publishtime": ""}headers = {"Accept": "text/html,application/xhtml+xml,application/xml;", "Accept-Encoding": "gzip","Accept-Language": "zh-CN,zh;q=0.8","Cache-Control": "no-cache","Connection": "keep-alive","Content-Length": "141","Content-Type": "application/x-www-form-urlencoded",# "Cookie": "JSESSIONID=8B40D3C75600CE7920700553EF4526AC; __guid=120853001.4108463626362789000.1591195329366.5571; _gscu_1697192173=06743647pn47tb15; viewsid=321fe86328084f7ca61707b7e3864ee5; Hm_lvt_0f50400dd25408cef4f1afb556ccb34f=1606743649; paiwu80_cookie=37836164; JSESSIONID9002C=CDDDEEB25180581FFDBC32071E89002C; es.echatsoft_12555_encryptVID=rCEQ7DECIUK4Rh6UyHgHmQ%3D%3D; es.echatsoft_12555_chatVisitorId=885531424; echat_firsturl=http%3A%2F%2Fpermit.mee.gov%2FpermitExt%2Fdefaults%2Fdefault-index!getInformation.action; echat_firsttitle=%E5%85%A8%E5%9B%BD%E6%8E%92%E6%B1%A1%E8%AE%B8%E5%8F%AF%E8%AF%81%E7%AE%A1%E7%90%86%E4%BF%A1%E6%81%AF%E5%B9%B3%E5%8F%B0-%E5%85%AC%E5%BC%80%E7%AB%AF; echat_referrer_timer=echat_referrer_timeout; echat_referrer=http%3A%2F%2Fpermit.mee.gov%2FpermitExt%2Foutside%2Fdefault.jsp; echat_referrer_pre=; monitor_count=9","DNT": "1","Host": "permit.mee.gov","Origin": "","Pragma": "no-cache","Referer": "!licenseInformation.action","Upgrade-Insecure-Requests": "1","User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}url = "!licenseInformation.action"r = requests.post(url, headers=headers, data=datas)html = etree.HTML(r.text)# print(r.text)# input("22332332")urlpage = html.xpath('//div[@class="fr margin-t-33 margin-b-20"]/a/@onclick')[5]  # 找到HTML中总页数# print(urlpage)     得到字符串:javascript:jumpPage2(60)zonyeshu = int(re.sub("\D", "", urlpage)[1:])  ## 截取字符串,得到总页数print(zonyeshu)# 得到总页数_结束# 得到企业总个数_开始datas = {"page.pageNo": zonyeshu,"page.orderBy": "","page.order": "","province": "","city": "","registerentername": "","xkznum": "","treadname": "","treadcode": "","publishtime": ""}url = "!licenseInformation.action"r = requests.post(url, headers=headers, data=datas)html = etree.HTML(r.text)href_url = html.xpath('//table[@class="tabtd"]/tr/td/a/@href')# print(len(href_url))    #得到最后一页,有几个数据zuihouyiye = int(len(href_url))qiyezongshu = (zonyeshu - 1) * 10 + zuihouyiye  # 总页数减最后一页,再加最后一页企业数,得到发放排污许可证企业数量if not qiyezongshu:  # 判断IP是否被封,如果被封程序暂停。input("IP错误,请更换!!!!!")print(qiyezongshu)yijinghefaqiyeshu = '已核发排放许可证:', qiyezongshu, '个.'# 得到企业总个数_结束# messagebox.showinfo("提示,企业总页数", zonyeshu )# zonyeshu2=2  #调试读取页数设置zonyeshu2 = zonyeshu + 1  # 因为变量i,是从零加在累加,如果不加1,到最后倒数第二页就截止爬取,最后一页没有数据。# print(type(zonyeshu2))print(zonyeshu2)for i in range(1, zonyeshu2):time_random = [6,10,5,8,7,13, 9]  # 设置随机延迟访问,防止封IP     为秒数,自己可适当调整。time_test = random.choice(time_random)yanshi=time_testprint('延迟时间', time_test)for aa in range(yanshi):time.sleep(1)l2["text"] = '延迟时间:' + str(yanshi) + " / " + str(aa) + " 开始时间:" + starttime.strftime('%H:%M')l2.update()# time.sleep(time_test)# welcome(time_test)datas = {"page.pageNo": i,"page.orderBy": "","page.order": "","province": "","city": "","registerentername": "","xkznum": "","treadname": "","treadcode": "","publishtime": ""}# s = requests.session()#开启session保持状态url = "!licenseInformation.action"r = requests.post(url, headers=headers, data=datas)html = etree.HTML(r.text)# messagebox.showinfo("提示" )paiwuxuke_sheng = html.xpath('//table[@class="tabtd"]/tr/td[1]/text()')[1:]  # 得到排污许可证省paiwuxuke_dishi = html.xpath('//table[@class="tabtd"]/tr/td[2]/text()')[1:]  # 得到排污许可证地市paiwuxuke_url = html.xpath('//table[@class="tabtd"]/tr/td/a/@href')  # 得到排污许可证详细页面链接paiwuxuke_id = html.xpath('//table[@class="tabtd"]/tr/td[@class="font-green"]/text()')  # 得到排污许可证号码paiwuxuke_name = html.xpath('//table[@class="tabtd"]/tr/td[@style="text-align: left;padding-left: 5px;"]/text()')  # 得到企业名称paiwuxuke_shenpishijian = html.xpath('//table[@class="tabtd"]/tr/td[7]/text()')[1:]  # 得到排污许可证审批时间paiwuxuke_hangye = html.xpath('//table[@class="tabtd"]/tr/td[5]/text()')[1:]  # 得到排污许可证行业paiwuxuke_youxiaoqi = html.xpath('//table[@class="tabtd"]/tr/td[6]/text()')[1:]  # 得到排污许可证审批时间if not paiwuxuke_url:  # 判断IP是否被封,如果被封程序暂停。input("IP错误,请更换!!!!!")z1 = len(paiwuxuke_id)z2 = len(paiwuxuke_name)z3 = len(paiwuxuke_url)z4 = len(paiwuxuke_youxiaoqi)z5 = len(paiwuxuke_hangye)z6 = len(paiwuxuke_shenpishijian)if not z1 == z2 == z3 == z4 == z5 == z6:print("发现有数据不一致的地方")print('发现错误页面:' + str(i))lerror2.append([i])lerror["text"] = '程序发现错误页面:' + str(i)lerror.update()print(lerror2)continue# print(paiwuxuke_id)# print(paiwuxuke_name)# print(paiwuxuke_url)# print(paiwuxuke_youxiaoqi )# print(paiwuxuke_hangye)# print(paiwuxuke_shenpishijian)# messagebox.showinfo("总页数", zonyeshu2)# treedata1.append([paiwuxuke_id,paiwuxuke_name,paiwuxuke_hangye,paiwuxuke_youxiaoqi,paiwuxuke_shenpishijian,paiwuxuke_url])                      #全局变量中存储查询到企业的详细信息z = len(paiwuxuke_name)for i in range(z):global jishuleijiatree2.insert("", '0', jishuleijia, text="", values=(jishuleijia, paiwuxuke_sheng[i],paiwuxuke_dishi[i],paiwuxuke_id[i], paiwuxuke_name[i], paiwuxuke_hangye[i], paiwuxuke_youxiaoqi[i],paiwuxuke_shenpishijian[i], paiwuxuke_url[i]))  # 在TREE列表中显示查询到企业的详细信息# time.sleep(1)  # 设计延时2秒treedata_jianhuaguanli.append([jishuleijia,  paiwuxuke_sheng[i],paiwuxuke_dishi[i],paiwuxuke_id[i], paiwuxuke_name[i], paiwuxuke_hangye[i], paiwuxuke_youxiaoqi[i],paiwuxuke_shenpishijian[i],paiwuxuke_url[i]])jishuleijia = jishuleijia + 1print(jishuleijia, "====", qiyezongshu)l["text"] = '' + str(jishuleijia) + '/' + str(qiyezongshu)l.update()tree2.update()#input("zhanting....................")messagebox.showinfo("提示", "恭喜,所有数据都已准备完毕!请保存excel文件")endtime = datetime.datetime.now()seconds = (endtime - starttime).secondsstart = starttime.strftime('%Y-%m-%d %H:%M')# 100 秒# 分钟minutes = seconds // 60second = seconds % 60print((endtime - starttime))timeStr = str(minutes) + '分钟' + str(second) + "秒"print("程序从 " + start + ' 开始运行,运行时间为:' + timeStr)l2["text"] = '程序共运行时间:' + timeStrl2.update()def tree_click(event):if not tree2.item(tree2.selection(), 'values'):  # 判断tree2控件中是否有数据。messagebox.showinfo("提示", '现在还没有数据!')else:item_text = tree2.item(tree2.selection(), 'values')[1]messagebox.showinfo("提示", "你所选择的数据是:" + item_text)# webbrowser.open_new_tab('' + item_text)  # 打开链接def jiayiguanli_save():try:if treedata_jianhuaguanli:  # 判断是否爬取到数据,是否需要保存excel文件# wb = openpyxl.load_workbook('paiwuxukejianyiguanli.xlsx')# ws = wb['Sheet1']# aa = len(tree.get_children())biaoti = [['序号',  '省/直辖市', '地市','许可证编号', '企业名称', '行业类别', '有效期限', '登记时间', '详细链接']]wb = Workbook()wb1 = wb.create_sheet('index', 0)wb1.title = '管理数据'filename =  tkinter.filedialog.asksaveasfilename(filetypes=[('xlsx', '*.xlsx')], initialdir='D:\\')filename = filename + '.xlsx'for row2 in range(len(biaoti)):wb1.append(biaoti[row2])for row in range(len(treedata_jianhuaguanli)):wb1.append(treedata_jianhuaguanli[row])#wb.save("paiwuxukejianyiguanli.xlsx")wb.save(filename)messagebox.showinfo("提示", "paiwuxukejianyiguanli.xlsx保存完毕~!!!")else:messagebox.showinfo("提示", '没有数据,不必保存')except:messagebox.showinfo("提示", '保存文件错误,请重试~!!')root = Tk()  # 创建窗口
root.title("排污许可证数据信息")
root.geometry("900x750+500+50")  # 小写x代表乘号500x400为窗口大小,+0+0窗口显示位置
lbxianshixinxi = LabelFrame(root, width=800, text='', padx=80, pady=10)#, labelanchor=W
lbxianshixinxi.grid(row=0, column=0)
l = Label(lbxianshixinxi, text='0/0', width=20)  # 创建标签控件
l.grid(row=0, column=0,padx=10, pady=10,sticky=W)
l2 = Label(lbxianshixinxi, text='程序运行时间:', justify=RIGHT)  # 创建标签控件
l2.grid(row=0, column=1,padx=10, pady=10,sticky=W)
lerror = Label(lbxianshixinxi, text='', width=25, fg='red',justify=RIGHT)  # 创建标签控件
lerror.grid(row=0, column=2,padx=10, pady=10,sticky=W)
lbtree = LabelFrame(root, width=500, height=10, text='数据显示区域', padx=8, pady=10)
lbtree.grid(row=1, column=0)
lb4 = LabelFrame(root, width=800, height=500, text='重点管理排污许可证数据', padx=8, pady=8, foreground='red')
lb4.grid(row=8, column=0)
button = Button(lb4, text=" 开始爬取(重点管理) ", command=kaishipaqu_begin)  # 创建按钮控件
button.grid(row=5, column=2, padx=20, pady=0,sticky=W)
button2 = Button(lb4, text=" 保存列表信息数据 ", command=jiayiguanli_save)  # 创建按钮控件
button2.grid(row=5, column=4, sticky=W)
button1 = Button(lb4, text=" 退   出 ", command=root.quit)  # 创建按钮控件
button1.grid(row=5, column=5, padx=20, pady=0,sticky=N)
tree2 = ttk.Treeview(lbtree, height=20, show="headings")  # 表格第一列不显示
scroll2_ty = Scrollbar(root, orient=VERTICAL, command=tree2.yview)  # 添加滚动条
scroll2_ty.grid(row=1, column=2, sticky=N + S)
tree2['yscrollcommand'] = scroll2_ty.set
scroll2_tx = Scrollbar(root, orient=HORIZONTAL, command=tree2.xview)
scroll2_tx.grid(row=3, column=0, sticky=E + W)
tree2['xscrollcommand'] = scroll2_tx.set
tree2.grid(row=1, columnspan=1)
tree2["columns"] = ('序号',  '省/直辖市', '地市','许可证编号', '企业名称', '行业类别', '有效期限', '登记时间', '详细链接')
# 设置列,不显示
tree2.column("序号", width=50)
tree2.column("省/直辖市", width=100)
tree2.column("地市", width=100)
tree2.column("许可证编号", width=100)
tree2.column("企业名称", width=100)
tree2.column("行业类别", width=100)
tree2.column("有效期限", width=100)
tree2.column("登记时间", width=100)
tree2.column("详细链接", width=100)
# 显示表头
tree2.heading("序号", text="序号")
tree2.heading("省/直辖市", text="省/直辖市")
tree2.heading("地市", text="地市")
tree2.heading("许可证编号", text="许可证编号")
tree2.heading("企业名称", text="企业名称")
tree2.heading("行业类别", text="行业类别")
tree2.heading("有效期限", text="有效期限")
tree2.heading("登记时间", text="登记时间")
tree2.heading("详细链接", text="详细链接")tree2.bind('<ButtonRelease>', tree_click)  # 列表框绑定鼠标事件函数root.mainloop()  # 显示窗口  mainloop 消息循环

更多推荐

全国排污许可证数据

本文发布于:2024-02-27 14:40:13,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1706939.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:许可证   数据   全国

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!