数据库搭载、导入、清洗、查询"/>
PGSQL个人数据库搭载、导入、清洗、查询
PS:如果各位有疑问的话,可以留下微信,我看到会第一时间加的,以后可以多多交流
CREATE TABLE data1(姓 varchar(10),户籍地城市编号 text CHECK (length(户籍地城市编号)=6));
\COPY data1 FROM ‘C:\Users\Administrator\Desktop\数据可视化课程\考核\考核项目2_数据清洗及筛选综合实践\data01.csv’ WITH CSV HEADER ENCODING ‘utf8’
–导入data1数据,设置约束
CREATE TABLE data2(LIKE data1);
\COPY data2 FROM 'C:\Users\Administrator\Desktop\数据可视化课程\考核\考核项目2_数据清洗及筛选综合实践\data02.csv' WITH CSV HEADER ENCODING 'utf8'
--导入data2数据CREATE TABLE cities(行政编码 int,省 varchar(10),市 varchar(10),区县 text,lng numeric,lat numeric);
\COPY cities FROM 'C:\Users\Administrator\Desktop\数据可视化课程\考核\考核项目2_数据清洗及筛选综合实践\中国行政代码对照表.csv' WITH CSV HEADER ENCODING 'utf8'
--导入cities数据SELECT * FROM data1 WHERESUBSTRING(户籍地城市编号,1,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,2,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,3,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,4,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,5,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,6,1) NOT IN ('1','2','3','4','5','6','7','8','9','0');
--查看data1问题数据SELECT * FROM data2 WHERESUBSTRING(户籍地城市编号,1,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,2,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,3,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,4,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,5,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,6,1) NOT IN ('1','2','3','4','5','6','7','8','9','0');
--查看data2问题数据DELETE FROM data1 WHERESUBSTRING(户籍地城市编号,1,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,2,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,3,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,4,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,5,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,6,1) NOT IN ('1','2','3','4','5','6','7','8','9','0');
--删除data1问题数据 DELETE FROM data2 WHERESUBSTRING(户籍地城市编号,1,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,2,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,3,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,4,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,5,1) NOT IN ('1','2','3','4','5','6','7','8','9','0') ORSUBSTRING(户籍地城市编号,6,1) NOT IN ('1','2','3','4','5','6','7','8','9','0');
--删除data2问题数据 ALTER TABLE data1 DROP CONSTRAINT data1_户籍地城市编号_check;
--删除约束
ALTER TABLE data1 ALTER COLUMN 户籍地城市编号 TYPE int USING(户籍地城市编号::int);
ALTER TABLE data2 ALTER COLUMN 户籍地城市编号 TYPE int USING(户籍地城市编号::int);
--修改两个表内户籍地城市编号的字符类型CREATE TABLE data ASSELECT * FROM data1 UNION ALL SELECT * FROM data2;
--合并data1和data2为dataCREATE TABLE result_data ASSELECT a.姓,a.户籍地城市编号,b.省,b.市,b.区县,b.lng,b.lat FROM data a INNER JOIN cities bON a.户籍地城市编号 = b.行政编码;
--连接data和cities为result_dataSELECT 姓,COUNT(姓),CAST(COUNT(姓) AS numeric)/ (SELECT COUNT(姓) FROM result_data) AS 姓氏占比FROM result_data GROUP BY 姓 ORDER BY COUNT(姓) DESC LIMIT 20;
--转换COUNT姓的格式后求出占比
更多推荐
PGSQL个人数据库搭载、导入、清洗、查询
发布评论