实战整理

编程入门 行业动态 更新时间:2024-10-26 06:29:30

<a href=https://www.elefans.com/category/jswz/34/1769775.html style=实战整理"/>

实战整理

数据集链接:淘宝用户购物行为数据集_数据集-阿里云天池

因原数据集过大,电脑和MySQL跑不动,故截取前一百万条数据进行实战演练、

1、导入数据

利用navicat软件直接导入下载好的Excel文件(较大数据集可用kettle导入)

 2、数据预处理

use aori;
desc userbehavior;
select * from userbehavior limit 5;--更改字段名alter table userbehavior change f1 user_id int;
alter table userbehavior change f2 item_id int;
alter table userbehavior change f3 category_id int;
alter table userbehavior change f4 behavior_type varchar(5);
alter table userbehavior change f5 timestamps int;--检查空值select * from userbehavior where user_id is null;
select * from userbehavior where item_id is null;
select * from userbehavior where category_id is null;
select * from userbehavior where behavior_type is null;
select * from userbehavior where timestamps is null;--检查重复值
select user_id,item_id,timestamps from userbehavior
group by user_id,item_id,timestamps
having count(*) > 1;--去重-设置主键alter table userbehavior add id int first;
alter table userbehavior modify id int primary key auto_increment;
select * from userbehavior limit 5;-去除重复值delete userbehavior from 
userbehavior,
(
select user_id,item_id,timestamps,min(id) id from userbehavior
group by user_id,item_id,timestamps
having count(*) > 1
) t2
where userbehavior.user_id = t2.user_id
and userbehavior.item_id = t2.item_id
and userbehavior.timestamps  = t2.timestamps
and userbehavior.id > t2.id--新增日期
- datetimealter table userbehavior add datetimes TIMESTAMP(0);
update userbehavior set datetimes=FROM_UNIXTIME(timestamps);
select * from userbehavior limit 5;-time
alter table userbehavior add dates char(10);
alter table userbehavior add times char(8);
alter table userbehavior add hours char(2);update userbehavior set dates=substring(datetimes,1,10);
update userbehavior set times=substring(datetimes,12,8);
update userbehavior set hours=substring(datetimes,12,2);
select * from userbehavior limit 5;-- 去异常
select max(datetimes),min(datetimes) from userbehavior;delete from userbehavior
where datetimes < '2017-11-25 00:00:00'
or datetimes > '2017-12-03 23:59:59';--数据概览
desc userbehavior;
select * from userbehavior limit 5;
select count(1) from userbehavior; # 999530条记录

3、时间序列分析

-- 统计日期-小时行为
select dates,hours
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
from userbehavior
group by dates,hours
order by dates,hourscreate table date_hour_behavior(
dates char(10),
hours char(2),
pv int,
cart int,
fav int,
buy int
);insert into date_hour_behavior
select dates,hours
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
from userbehavior
group by dates,hours
order by dates,hoursselect * from date_hour_behavior 

 4、获客情况

--创建临时表
create table temp_behaviors like userbehavior--截取insert into temp_behaviors
select * from userbehavior limit 100000;select * from temp_behaviors;-- pv(Page View页面浏览量)select dates
,count(behavior_type) 'pv'
from temp_behaviors
where behavior_type = 'pv'
group by dates;-- UV(Uniqque Visitor独立访客数)select dates
,count(distinct user_id) 'uv'
from temp_behaviors
where behavior_type = 'pv'
group by dates;-- 一条语句select dates
,count(behavior_type) 'pv'
,count(distinct user_id) 'uv'
,round(count(behavior_type)/count(distinct user_id),1) 'pv/uv' 
from temp_behaviors
where behavior_type = 'pv'
group by dates;-- 处理真实数据
create table pv_uv_puv(
dates char(10),
pv int(9),
uv int(9),
puv decimal(10,1)
);insert into pv_uv_puv
select dates
,count(behavior_type) 'pv'
,count(distinct user_id) 'uv'
,round(count(behavior_type)/count(distinct user_id),1) 'pv/uv' 
from userbehavior
where behavior_type = 'pv'
group by dates;select * from pv_uv_puv;

 5、留存情况

  ①留存率计算

select user_id,dates
from userbehavior 
group by user_id,dates;-- 自关联select * from 
(
select user_id,dates
from userbehavior 
group by user_id,dates
) a
,(
select user_id,dates
from userbehavior 
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates <= b.dates-- 留存数
select a.dates
,count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_0
,count(if(datediff(b.dates,a.dates)=1,b.user_id,null)) retention_1
,count(if(datediff(b.dates,a.dates)=3,b.user_id,null)) retention_3
,count(if(datediff(b.dates,a.dates)=1,b.user_id,null))/count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_rate_1
from
(
select user_id,dates
from userbehavior 
group by user_id,dates
) a
,(
select user_id,dates
from userbehavior 
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates <= b.dates
group by a.dates-- 保存结果
create table retention_rate(
dates char(10),
retention_1 float
);insert into retention_rate
select a.dates
,count(if(datediff(b.dates,a.dates)=1,b.user_id,null))/count(if(datediff(b.dates,a.dates)=0,b.user_id,null)) retention_rate_1
from
(
select user_id,dates
from userbehavior 
group by user_id,dates
) a
,(
select user_id,dates
from userbehavior 
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates <= b.dates
group by a.datesselect * from retention_rate;

②跳失用户计算

-- 跳失率-- 跳失用户select count(*)
from
(
select user_id from userbehavior
group by user_id
having count(behavior_type)=1
) a;select sum(pv) from pv_uv_puv;

 6、行为路径分析


create view user_behavior_view as 
select user_id,item_id
,count(if(behavior_type='pv',behavior_type,null)) 'pv'
,count(if(behavior_type='fav',behavior_type,null)) 'fav'
,count(if(behavior_type='cart',behavior_type,null)) 'cart'
,count(if(behavior_type='buy',behavior_type,null)) 'buy'
from userbehavior
group by user_id,item_id-- 用户行为标准化create view user_behavior_standard as 
select user_id,item_id
,(case when pv>0 then 1 else 0 end) '浏览'
,(case when fav>0 then 1 else 0 end) '收藏'
,(case when cart>0 then 1 else 0 end) '加购'
,(case when buy>0 then 1 else 0 end) '购买'
from user_behavior_view-- 路径类型create view user_behavior_path as 
select *,
concat(浏览,收藏,加购,购买) as path_type
from user_behavior_standard as a 
where a.购买 > 0;-- 统计各类型购买数量create view path_count as 
select path_type
,count(*) path_type_num
from user_behavior_path
group by path_type
order by path_type_num desc;select * from path_count;

-- 改名表create table change_name(
path_type char(4),
description varchar(40));insert into change_name
values('0001','购买'),
('1001','浏览购买'),
('0011','加购购买'),
('1011','浏览加购购买'),
('0101','收藏购买'),
('1101','浏览收藏购买'),
('0111','收藏加购购买'),
('1111','浏览收藏加购购买');select * from change_name;create table path_result
(description varchar(40)
,path_type_num int);insert into path_result
select description,
path_type_num
from path_count
join change_name
on path_count.path_type = change_name.path_type;select * from path_result;

 7、用户转化率分析


-- 统计各类行为用户数select behavior_type
,count(DISTINCT user_id) user_num
from userbehavior
group by behavior_type
order by behavior_type desccreate table behavior_user_num(
behavior_type varchar(5),
user_num int
);insert into behavior_user_num
select behavior_type
,count(DISTINCT user_id) user_num
from userbehavior
group by behavior_type
order by behavior_type desc;select * from behavior_user_num;

-- 统计各类行为数量select behavior_type
,count(*) behavior_count_num
from userbehavior
group by behavior_type
order by behavior_type desccreate table behavior_num(
behavior_type varchar(5),
behavior_count_num int
);insert into behavior_num
select behavior_type
,count(*) behavior_count_num
from userbehavior
group by behavior_type
order by behavior_type desc;select * from behavior_num;

 8、TOP商品

-- 品类浏览量TOP10create table popular_categories(
category_id int,
pv int
);insert into popular_categories
select category_id
,count(if(behavior_type='pv',behavior_type,null))  '品类浏览量'
from userbehavior
group by category_id
order by 2 desc
limit 10;select * from popular_categories;

-- 商品浏览量TOP10create table popular_items(
item_id int,
pv int
);insert into popular_items
select item_id
,count(if(behavior_type='pv',behavior_type,null))  '商品浏览量'
from userbehavior
group by item_id
order by 2 desc
limit 10;select * from popular_items;

-- 各类别商品浏览量TOP10create table popular_cateitems(
category_id int,
item_id int,
pv int
);insert into popular_cateitems
select category_id,item_id,品类商品浏览量 from
(
select category_id
,item_id
,count(if(behavior_type='pv',behavior_type,null))  '品类商品浏览量'
,rank() over(partition by category_id order by count(if(behavior_type='pv',behavior_type,null)) desc) r
from userbehavior
group by category_id,item_id
order by 3 desc
) a 
where a.r = 1
order by a.品类商品浏览量 desc
limit 10;select * from popular_cateitems;

9、RFM模型


-- 最近购买时间
select user_id
,max(dates) '最近购买时间'
from userbehavior
where behavior_type = 'buy'
group by user_id
order by 2 desc;-- 购买次数
select user_id
,count(user_id) '购买次数'
from userbehavior
where behavior_type = 'buy'
group by user_id
order by 2 desc;-- 统一
select user_id
,max(dates) '最近购买时间'
,count(user_id) '购买次数'
from userbehavior
where behavior_type = 'buy'
group by user_id
order by 2 desc,3 desc;-- 存储drop table if exists rfm_model;
create table rfm_model(
user_id int,
recently char(10),
frequency int
);insert into rfm_model
select user_id
,max(dates) '最近购买时间'
,count(user_id) '购买次数'
from userbehavior
where behavior_type = 'buy'
group by user_id
order by 2 desc,3 desc;-- 根据购买次数对用户进行分层alter table rfm_model add column fscore int;update rfm_model
set fscore = 
case when frequency >= 20 then 5
when frequency between 15 and 19 then 4
when frequency between 10 and 15 then 3
when frequency between 5 and 10 then 2
else 1
end;-- 根据最近购买时间对用户进行分层alter table rfm_model add column rscore int;update rfm_model
set rscore = 
case when recently = '2017-12-03' then 5
when recently in ('2017-12-01','2017-12-02') then 4
when recently in ('2017-11-30','2017-11-29') then 3
when recently in ('2017-11-28','2017-11-27') then 2
else 1
end;-- 分层- 设置变量
set @f_avg = null;
set @r_avg = null;
select avg(fscore) into @f_avg from rfm_model;
select avg(rscore) into @r_avg from rfm_model;select *
,(case
when fscore > @f_avg and rscore > @r_avg then '价值用户'
when fscore > @f_avg and rscore < @r_avg then '保持用户'
when fscore < @f_avg and rscore > @r_avg then '发展用户'
when fscore < @f_avg and rscore < @r_avg then '挽留用户'
end) class
from rfm_model;-- 插入alter table rfm_model add column class varchar(40);update rfm_model
set class=
case
when fscore > @f_avg and rscore > @r_avg then '价值用户'
when fscore > @f_avg and rscore < @r_avg then '保持用户'
when fscore < @f_avg and rscore > @r_avg then '发展用户'
when fscore < @f_avg and rscore < @r_avg then '挽留用户'
end;select * from rfm_model;

-- 统计各分区用户数
select class
,count(user_id) class_num
from rfm_model
group by class;

 

更多推荐

实战整理

本文发布于:2023-06-20 04:23:26,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/796030.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:实战

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!