ruby调用python实现大数据量导出的案例

编程入门 行业动态 更新时间:2024-10-13 06:15:42

ruby调用python实现大数据量导出的<a href=https://www.elefans.com/category/jswz/34/1770649.html style=案例"/>

ruby调用python实现大数据量导出的案例

先看看一般情况下ruby导出csv的代码案例:

respond_to do |format|
format.html # index.html.erb
format.xml { render :xml => @mobile_units}
format.csv {
columns = Ultra::MobileUnit.export_columns_name
datas = UI::Grid.new(Ultra::MobileUnit, @mobile_units).to_csv
(columns) do |col, data|
data[col]
end
send_data(datas,:type=>'text/csv;header=persent',:filename => 'wifi_mobile_units.csv')
}
end
end


由于数据量巨大,需要考虑到将导出任务转移到后台去执行,经过查找发现了python中有一个很好用的Celery异步执行工具。

def export_mobile
#params[:param_values]为一个执行sql脚本
param_values=params[:param_values]
@celery_task = CeleryTask.new({:name => "report_#{Time.now.to_i}.csv", :task_type => "mobile_export",:result => "30", :user_id => current_user.id,:params => param_values})
@celery_task.save!()
#生成一条Celery任务记录到数据库表
task_file = File.join(RAILS_ROOT,"python_tasks","ultra_export_main.py")
#system服务器掉调用python脚本执行请求。
system "python #{task_file} #{@celery_task.id}"
@celery_id = @celery_task.id
respond_to do |format|
format.html { render :layout => 'simple'}
end
end




#python接收
# encoding: utf-8
import sys
from models import CeleryTask,session
from export_oracle import export_excel_oracle
from export import export_excel
from import_site import import_xls
from amend import amend_xls
from export_xls import export_amend_data
from export_alarm import export_history_alarms

if __name__ == "__main__":
id = sys.argv[1]
task = session.query(CeleryTask).get(id)
export_type = task.task_type
if export_type in ["export_perf_report", "export_alarm_report", "export_config_report"]:
print(">>> export report")
res = export_excel_oracle.delay(id)
elif export_type in ["ap_query_export", "site_query_export", "alarms_export"]:
print(">>export")
res = export_excel.delay(id)
# 更新 celery_tasks记录的task_id
task.task_id = res.task_id
sessionmit()



models模块定义:

#接收参数,生成cession同事插入cerery表中做task_id。
# encoding: utf-8
from celeryconfig import CELERY_RESULT_DBURI
from sqlalchemy import Column
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy import Text
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

engine = create_engine(CELERY_RESULT_DBURI+"?charset=utf8", pool_recycle=600)
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()

Base = declarative_base()
class CeleryTask(Base):
__tablename__ = 'celery_tasks'
id = Column(Integer, primary_key=True)
name = Column(String(255)) # 任务名称
task_id = Column(String(255)) # 关联celery_taskmeta表中的task_id
task_type = Column(String(50)) # 任务类型
user_id = Column(Integer) # 操作用户
excel_file = Column(String(255)) # 导入的文件名称,或导出的文件名称
result = Column(String(255)) # 任务结果
error_file = Column(String(255)) # 导入错误数据时,生成的错误数据文件名称
params = Column(Text) # 调用存储过程的参数字符串


mysql导出案例:


import sys,os
from celery.task import task
from models import session,CeleryTask
import MySQLdb
from celeryconfig import MYSQL_CHARSET,MYSQL_DB,MYSQL_HOST,MYSQL_PASSWD,MYSQL_PORT,MYSQL_USER
from export_help import get_columns
from export_config import ap_columns, ap_columns_dict, ap_state, fit_ap, indoor, managed, site_columns, site_columns_dict, is_checked, pro_status, alarm_columns,alarm_columns_dict
import csv
import codecs

@task(time_limit=1800, soft_timeout_limit=1200)
def export_excel(task_id):
if sys.getdefaultencoding() != 'utf-8':
reload(sys)
sys.setdefaultencoding('utf-8')
task = session.query(CeleryTask).get(task_id)
task.result = "35"
sessionmit()

conn = MySQLdb.connect(host=MYSQL_HOST, user=MYSQL_USER, passwd=MYSQL_PASSWD, db=MYSQL_DB, charset=MYSQL_CHARSET,port=MYSQL_PORT)
cursor = conn.cursor(cursorclass = MySQLdb.cursors.DictCursor)
export_type = task.task_type
if export_type == "site_query_export":
cursor.execute("drop table if exists port_ap_temp")
cursor.execute('''create temporary table if not exists port_ap_temp
(port int not null,ap_num int,fit_ap_num int,fat_ap_num int,on_line int,off_line int,
index port_ap_temp_port (port))''')
cursor.execute('''insert into port_ap_temp
(select port,count(id) ap_num,count(case ap_fit when 1 then id else null end) fit_ap_num,
count(case ap_fit when 1 then null else id end) fat_ap_num,
count(case when ap_fit = 1 and managed_state = 1 then id else null end) on_line,
count(case when ap_fit = 1 and managed_state = 1 then null else id end) off_line
from mit_aps group by port)''')
cursor.execute("drop table if exists port_sw_temp")
#生成一张临时表来处理数据
cursor.execute('''create temporary table if not exists port_sw_temp
(port int not null,sw_num int,index port_sw_temp_port (port))''')
cursor.execute("insert into port_sw_temp (select port, count(*) sw_num from mit_switchs group by port)")
cursor.execute(task.params)
results = cursor.fetchall()
task.result = "50"
sessionmit()

export_excel = export_type+u'_'+str(task.id)+".csv"
root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),'..')
export_path = os.path.join(root_path,'public','export')
if not os.path.exists(export_path):
os.mkdir(export_path)
csv_file_dest = os.path.join(export_path,export_excel)

if export_type == "ap_query_export":
write_aps(results,csv_file_dest)
elif export_type == "site_query_export":
code_data = get_dict_data(cursor)
write_sites(results,csv_file_dest,code_data)
task.error_file = export_excel
task.result = "100"
sessionmit()
cursor.close()
conn.close()

def write_aps(results,csv_file_dest):
outputFile = codecs.open(csv_file_dest,'w', 'gbk') # 'wb'
output = csv.writer(outputFile, dialect='excel')
ap_cols,ap_cols_dict = get_columns("ap")
export_ap_columns = ap_columns + ap_cols
ap_columns_dict.update(ap_cols_dict)
headers = []
for col in export_ap_columns:
headers.append(ap_columns_dict[col])
output.writerow(headers)
for result in results:
r = []
try:
for col, col_name in enumerate(export_ap_columns):
if col_name == "transfer_type":
r.append(result.get("transfer_type_name"," "))
elif col_name == "power_type":
r.append(result.get("power_type_name"," "))
elif col_name == "ap_state":
r.append(ap_state.get(result.get(col_name," ")))
elif col_name == "managed_state":
r.append(managed.get(result.get(col_name," ")))
elif col_name == "is_indoor":
r.append(indoor.get(result.get(col_name," ")))
else:
r.append(result.get(col_name," "))
output.writerow(r)
except:
print "export ap except"
outputFile.close()


def write_sites(results,csv_file_dest,code_data):
outputFile = codecs.open(csv_file_dest,'w', 'gbk') # 'wb'
output = csv.writer(outputFile, dialect='excel')
site_cols,site_cols_dict = get_columns("site")
export_site_columns = site_columns + site_cols
site_columns_dict.update(site_cols_dict)
headers = []
for col in export_site_columns:
headers.append(site_columns_dict[col])
output.writerow(headers)
for result in results:
r = []
try:
for col, col_name in enumerate(export_site_columns):
if col_name in ["ap_type","sw_type","phase","document","net_state","site_level","transfer_type"]:
r.append(code_data.get(result.get(col_name," ")))
elif col_name == "is_checked":
r.append(is_checked.get(result.get(col_name," ")))
elif col_name == "project_status":
r.append(pro_status.get(result.get(col_name," ")))
else:
r.append(result.get(col_name," "))
output.writerow(r)
except:
print "export site except"

outputFile.close()


if __name__ == "__main__":
id = sys.argv[1]
export_excel(id)


其中excell表头名,在引用文件中定义:

# -*- coding: utf-8 -*-
ap_columns = ["province",
"city",
"town",
"hp_id",
"port_name",
"site_type_name",
"ap_cn",
"ap_level_name",
"mac",
"longitude",
"address",
"managed_state",
"created_at"]

ap_columns_dict = {
"province": u'省',
"city": u'地市',
"town": u'区县',
"ac_ip": u"所属AC的IP地址",
"sw_cn": u"所属Switch",
"uplink_bandwidth": u"上联带宽(Mbps)",
"port_z": u"上联设备端口",
"odf_z": u"对端ODF",
"port_a": u"本端端口",
"odf_a": u"本端ODF",
"integration_unit": u"集成商",
"supply_name": u"代维厂家",
"remark": u"备注",
"address": u"位置",
"managed_state": u"在线状态",
"created_at": u"创建日期",
}



oracle调用实例:


# encoding: utf-8
import sys, os
from celery.task import task
from models import session,CeleryTask
from xlwt import Workbook,XFStyle
import cx_Oracle
from celeryconfig import ORACLE_DSN, ORACLE_USER, ORACLE_PASSWD, REPORT_EXPORT_NUM
from export_help import oracle_encode, format_data, get_format_dict
from ultra_export_config_apoffline import apoffline_columns_dict
import csv
import codecs

@task(time_limit=1800, soft_timeout_limit=1200)
def export_excel_apoffline(task_id):
if sys.getdefaultencoding() != 'utf-8':
reload(sys)
sys.setdefaultencoding('utf-8')
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
task = session.query(CeleryTask).get(task_id)
task.result = "35"
sessionmit()
conn = cx_Oracle.connect(ORACLE_USER, ORACLE_PASSWD, ORACLE_DSN)
encoding = conn.encoding
cursor = conn.cursor()
#连接oracle库
cursor.execute(task.params)
#执行sql脚本
results = cursor.fetchall()
#获取返回值
task.result = "50"
sessionmit()
export_type = task.task_type
export_name = task.name
export_excel = export_name+u'_'+str(task.id)+".csv"
root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),'..')
export_path = os.path.join(root_path,'public','export')
if not os.path.exists(export_path):
os.mkdir(export_path)
#定义添加csv文件
csv_file_dest = os.path.join(export_path,export_excel)
#获取sql脚本列值序列
colname = [tuple[0] for tuple in cursor.description]
outputFile = codecs.open(csv_file_dest,'w', 'gbk')
output = csv.writer(outputFile, dialect='excel')
#写入csv表头
headers = []
for col in colname:
headers.append(apoffline_columns_dict[col])
output.writerow(headers)
#写入查询值
for result in results:
r = []
try:
for col_name in result:
r.append(col_name)
output.writerow(r)
except:
print "export apoffline except"
outputFile.close()
task.error_file = export_excel
task.result = "100"
sessionmit()
session.close()
cursor.close()
conn.close()

更多推荐

ruby调用python实现大数据量导出的案例

本文发布于:2024-03-04 21:46:20,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1710509.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:案例   数据   ruby   python

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!