pyecharts qq聊天记录数据可视化

编程入门行业动态更新时间:2024-10-25 20:27:03

pyecharts qq<a href=https://www.elefans.com/category/jswz/34/1762589.html style= 聊天记录数据可视化"/>

pyecharts qq聊天记录数据可视化

数据说明
1.本次数据来源于qq群聊天记录
2.一共3123条数据，可以直接用qq导出聊天记录
3.使用到的库有pyecharts jieba

发言排行

index_id	name	count
1	闰土	739
2	天天	495
3	温柔少女豆瓣酱β	308
4	7酱	296
5	逢考必过	271
6	土猹	265
7	辞	187
8	不吃香菜	125
9	予	89
10	靓仔飞机	66

时间段统计

词频分析

index_id	Word	count
1	？	174
2	懂	74
3	kpdd	73
4	天天	60
5	土狗	51
6	逼	51
7	哥哥	43
8	傻	41
9	闰土	38
10	排位	38

end

# qq聊天记录 制图from pyecharts.charts import Bar,Line,WordCloud
from pyecharts import options as opts
import jieba
import collections
import rehour_list = {'1':0,'2':0,'3':0,'4':0,'5':0,'6':0,'7':0,'8':0,'9':0,'10':0,'11':0,'12':0,'13': 0,'14':0,'15':0,'16':0,'17':0,'18':0,'19':0,'20':0,'21':0,'22':0,'23':0,'0':0
}content_count = {}
content_all = ''def parse():with open("C:/Users/Administrator/Desktop/土狗大队.txt", "r", encoding='utf-8') as f:text = f.read()text_list = re.compile('(\d{4}-\d{2}-\d{2} \d{1,2}:\d{2}:\d{2}) (.*)[\(,\<](.*)[\),\>][\n](.*)[\n]').findall(text)for item in text_list:time = item[0]name = item[1]qq = item[2]content = item[3]print('日期：', time)print('昵称：', name)print('qq：', qq)print('消息：', content)print()# 统计时间段time_hour = re.compile('[\s](\d{1,2}):').findall(time)[0]hour_list[time_hour] = hour_list[time_hour] + 1# 消息文本global content_allcontent_all = content_all + content + "\t"# 发言统计try:content_count[qq]['count'] = content_count[qq]['count'] + 1except:if name == '':content_count[qq] = {'name': qq, 'count': 1}else:content_count[qq] = {'name': name, 'count': 1}print(hour_list)print(content_all)print(content_count)def top():bar = Bar()top = []for content in content_count:name = content_count[content]['name']count = content_count[content]['count']top.append((content, name, count))# 指定第三个元素排序def takeSecond(elem):return elem[2]top.sort(key=takeSecond, reverse=True)print(top)xaxis = []yaxis = []for item in top[:10]:xaxis.append(item[1])yaxis.append(item[2])bar.add_xaxis(xaxis)bar.add_yaxis('发言top10', yaxis, gap="80%")bar.set_global_opts(title_opts=opts.TitleOpts(title="土狗大队", subtitle="8月29号-9月4号"),xaxis_opts=opts.AxisOpts(axislabel_opts={"rotate": 30}))bar.render('top.html')def time():xaxis = []yaxis = []for itme in hour_list:xaxis.append(itme)yaxis.append(hour_list[itme])(Line(init_opts=opts.InitOpts(width="600px", height="400px")).set_global_opts(title_opts=opts.TitleOpts(title="土狗大队", subtitle="8月29号-9月4号")).add_xaxis(xaxis_data=xaxis).add_yaxis(series_name="发言时间段统计",y_axis=yaxis).render("time.html"))def wordCloud():seg_list_exact = jieba.lcut(content_all, cut_all=False)  # 精确模式分词object_list = []remove_words = [u'\t', u'图片', u'[', u']', u' ', u'我', u'了', u'你', u'的', u'是', u'就', u'都', u'，', u'不', u'吗', u'@', u'还', u'没', u'这', u'好', u'有', u'在', u'也', u'吧', u'。', u'月', u'说', u'打', u'她', u'表情']  # 自定义去除词库for word in seg_list_exact:  # 循环读出每个分词if word not in remove_words:  # 如果不在去除词库中object_list.append(word)  # 分词追加到列表word_counts = collections.Counter(object_list)print(word_counts.most_common(40))(WordCloud().add(series_name="词频", data_pair=word_counts.most_common(40), word_size_range=[18, 198]).set_global_opts(title_opts=opts.TitleOpts(title="土狗大队", subtitle="8月29号-9月4号", title_textstyle_opts=opts.TextStyleOpts(font_size=23)),tooltip_opts=opts.TooltipOpts(is_show=True),).render("wordcloud.html"))if __name__ == '__main__':parse()top()time()wordCloud()

更多推荐

pyecharts qq聊天记录数据可视化

本文发布于:2023-07-01 05:38:01，感谢您对本站的认可！

本文链接:https://www.elefans.com/category/jswz/34/971478.html