聊天记录数据可视化"/>
pyecharts qq聊天记录数据可视化
数据说明
1.本次数据来源于qq群聊天记录
2.一共3123条数据,可以直接用qq导出聊天记录
3.使用到的库有pyecharts jieba
发言排行
index_id | name | count |
---|---|---|
1 | 闰土 | 739 |
2 | 天天 | 495 |
3 | 温柔少女豆瓣酱β | 308 |
4 | 7酱 | 296 |
5 | 逢考必过 | 271 |
6 | 土猹 | 265 |
7 | 辞 | 187 |
8 | 不吃香菜 | 125 |
9 | 予 | 89 |
10 | 靓仔飞机 | 66 |
时间段统计
词频分析
index_id | Word | count |
---|---|---|
1 | ? | 174 |
2 | 懂 | 74 |
3 | kpdd | 73 |
4 | 天天 | 60 |
5 | 土狗 | 51 |
6 | 逼 | 51 |
7 | 哥哥 | 43 |
8 | 傻 | 41 |
9 | 闰土 | 38 |
10 | 排位 | 38 |
end
# qq聊天记录 制图from pyecharts.charts import Bar,Line,WordCloud
from pyecharts import options as opts
import jieba
import collections
import rehour_list = {'1':0,'2':0,'3':0,'4':0,'5':0,'6':0,'7':0,'8':0,'9':0,'10':0,'11':0,'12':0,'13': 0,'14':0,'15':0,'16':0,'17':0,'18':0,'19':0,'20':0,'21':0,'22':0,'23':0,'0':0
}content_count = {}
content_all = ''def parse():with open("C:/Users/Administrator/Desktop/土狗大队.txt", "r", encoding='utf-8') as f:text = f.read()text_list = re.compile('(\d{4}-\d{2}-\d{2} \d{1,2}:\d{2}:\d{2}) (.*)[\(,\<](.*)[\),\>][\n](.*)[\n]').findall(text)for item in text_list:time = item[0]name = item[1]qq = item[2]content = item[3]print('日期:', time)print('昵称:', name)print('qq:', qq)print('消息:', content)print()# 统计时间段time_hour = re.compile('[\s](\d{1,2}):').findall(time)[0]hour_list[time_hour] = hour_list[time_hour] + 1# 消息文本global content_allcontent_all = content_all + content + "\t"# 发言统计try:content_count[qq]['count'] = content_count[qq]['count'] + 1except:if name == '':content_count[qq] = {'name': qq, 'count': 1}else:content_count[qq] = {'name': name, 'count': 1}print(hour_list)print(content_all)print(content_count)def top():bar = Bar()top = []for content in content_count:name = content_count[content]['name']count = content_count[content]['count']top.append((content, name, count))# 指定第三个元素排序def takeSecond(elem):return elem[2]top.sort(key=takeSecond, reverse=True)print(top)xaxis = []yaxis = []for item in top[:10]:xaxis.append(item[1])yaxis.append(item[2])bar.add_xaxis(xaxis)bar.add_yaxis('发言top10', yaxis, gap="80%")bar.set_global_opts(title_opts=opts.TitleOpts(title="土狗大队", subtitle="8月29号-9月4号"),xaxis_opts=opts.AxisOpts(axislabel_opts={"rotate": 30}))bar.render('top.html')def time():xaxis = []yaxis = []for itme in hour_list:xaxis.append(itme)yaxis.append(hour_list[itme])(Line(init_opts=opts.InitOpts(width="600px", height="400px")).set_global_opts(title_opts=opts.TitleOpts(title="土狗大队", subtitle="8月29号-9月4号")).add_xaxis(xaxis_data=xaxis).add_yaxis(series_name="发言时间段统计",y_axis=yaxis).render("time.html"))def wordCloud():seg_list_exact = jieba.lcut(content_all, cut_all=False) # 精确模式分词object_list = []remove_words = [u'\t', u'图片', u'[', u']', u' ', u'我', u'了', u'你', u'的', u'是', u'就', u'都', u',', u'不', u'吗', u'@', u'还', u'没', u'这', u'好', u'有', u'在', u'也', u'吧', u'。', u'月', u'说', u'打', u'她', u'表情'] # 自定义去除词库for word in seg_list_exact: # 循环读出每个分词if word not in remove_words: # 如果不在去除词库中object_list.append(word) # 分词追加到列表word_counts = collections.Counter(object_list)print(word_counts.most_common(40))(WordCloud().add(series_name="词频", data_pair=word_counts.most_common(40), word_size_range=[18, 198]).set_global_opts(title_opts=opts.TitleOpts(title="土狗大队", subtitle="8月29号-9月4号", title_textstyle_opts=opts.TextStyleOpts(font_size=23)),tooltip_opts=opts.TooltipOpts(is_show=True),).render("wordcloud.html"))if __name__ == '__main__':parse()top()time()wordCloud()
更多推荐
pyecharts qq聊天记录数据可视化
发布评论