【Python】Python编程入门项目2:数据可视化

编程入门 行业动态 更新时间:2024-10-26 08:30:20

【Python】Python编程<a href=https://www.elefans.com/category/jswz/34/1770026.html style=入门项目2:数据可视化"/>

【Python】Python编程入门项目2:数据可视化

Python编程入门:数据可视化

数据可视化是指通过可视化表示来探索数据,它与数据挖掘紧密相关,而数据挖掘指的是使用代码来探索数据集的规律和关联。数据集可以是一行代码就能表示的小型数字列表,也可以是数以吉字节的数据。

安装matplotlib

在windows操作系统中执行

pip install matplotlib

绘制折线图

简单折线图

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: mpl_squares.py
@time: 2020/01/30
"""
import matplotlib.pyplot as pltinput_values = [1, 2, 3, 4, 5]
squares = [1, 4, 9, 16, 25]
plt.plot(input_values, squares, linewidth=5)
# 设置图表标题,并给坐标轴加上标签
plt.title("Square Numbers", fontsize=24)
plt.xlabel("Value", fontsize=14)
plt.ylabel("Square of Value", fontsize=14)# 设置刻度标记的大小
plt.tick_params(axis="both", labelsize=14)plt.show()

绘制散点图

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: scatter_squares.py
@time: 2020/01/31
"""
import matplotlib.pyplot as plt# x_values = [1, 2, 3, 4, 5]
# y_values = [1, 4, 9, 16, 25]
x_values = list(range(1, 1001))
y_values = [x ** 2 for x in x_values]
plt.scatter(x_values, y_values, c=y_values, cmap=plt.cm.Blues, edgecolors='none', s=40)
# 设置图表标题并给坐标轴加上标签
plt.title("Square Numbers", fontsize=24)
plt.xlabel("Value", fontsize=14)
plt.ylabel("Square of Value", fontsize=14)# 设置刻度标记的大小
plt.tick_params(axis='both', which='major', labelsize=14)# 设置每个坐标轴的取值范围
plt.axis([0, 1100, 0, 1100000])
plt.show()
plt.savefig("square_plot.png", bbox_inches='tight')

随机漫步图

随机漫步是这样行走得到的路径:每次行走都完全是随机的,没有明确的方向,结果是由一系列随机决策决定的。

创建RandomWalk()类

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: random_walk.py
@time: 2020/01/31
"""
from random import choiceclass RandomWalk():"""一个生成随机漫步数据的类"""def __init__(self, num_points=5000):"""初始化随机漫步的属性"""self.num_points = num_points# 所有随机漫步都始于(0,0)self.x_values = [0]self.y_values = [0]def fill_walk(self):"""计算随机漫步包含的所有点"""# 不断漫步,直到列表达到指定的长度while len(self.x_values) < self.num_points:# 决定前进方向以及沿这个方向前进的距离x_direction = choice([1, -1])x_distance = choice([0, 1, 2, 3, 4])x_step = x_direction * x_distancey_direction = choice([1, -1])y_distance = choice([0, 1, 2, 3, 4])y_step = y_direction * y_distance# 拒绝原地踏步if x_step == 0 and y_step == 0:continue# 计算下一个点的x和y值next_x = self.x_values[-1] + x_stepnext_y = self.y_values[-1] + y_stepself.x_values.append(next_x)self.y_values.append(next_y)

绘制随机漫步图

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: rw_visual.py
@time: 2020/01/31
"""
import matplotlib.pyplot as plt
from PythonPrograming.dataplot.random_walk import RandomWalk# 只要程序处于活动状态,就不断地模拟随机漫步
while True:# 创建一个RandomWalk实例,并将其包含的点都绘制出来rw = RandomWalk(50000)rw.fill_walk()# 设置绘图窗口的尺寸plt.figure(dpi=128, figsize=(10, 6))point_numbers = list(range(rw.num_points))plt.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.Blues, edgecolors='none', s=1)# 突出起点和终点plt.scatter(0, 0, c='green', edgecolors='none', s=100)plt.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='none', s=100)# 隐藏坐标轴plt.axes().get_xaxis().set_visible(False)plt.axes().get_yaxis().set_visible(False)plt.show()keep_running = input("Make another walk?(y/n): ")if keep_running == 'n':break

使用Pygal模拟掷骰子

安装Pygal

使用pip安装Pygal:

pip install pygal

创建Die类

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: die.py
@time: 2020/01/31
"""
from random import randint
class Die():"""表示一个骰子的类"""def __init__(self, num_sides = 6):"""骰子默认为6面"""self.num_sides = num_sidesdef roll(self):"""返回一个位于1和骰子面数之间的随机值"""return randint(1,self.num_sides)

绘制两个骰子50000次的结果

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: different_dice.py
@time: 2020/01/31
"""
import pygal
from PythonPrograming.dataplot.die import Die
# 创建一个D6和一个D10
die_1 = Die()
die_2 = Die(10)# 掷骰子多次,并将结果存储到一个列表中
results = []
for roll_num in range(5000):result = die_1.roll()+die_2.roll()results.append(result)
print(results)# 分析结果
frequencies = []
max_result = die_1.num_sides+die_2.num_sides
for value in range(1, max_result + 1):frequency = results.count(value)frequencies.append(frequency)
print(frequencies)# 对结果进行可视化
hist = pygal.Bar()hist._title = "Result of rolling a D6 and a D10 50000 times."
hist.x_labels = ['2', '3', '4', '5', '6', '7','8','9','10','11','12','13','14','15','16']
hist._x_title = "Result"
hist._y_title = "Frequency of Result"hist.add("D6 + D10", frequencies)
hist.render_to_file('dice_visual2.svg')

基于CSV格式的数据可视化

将数据作为一系列以逗号分隔的值(CSV)写入文件,这样的文件称为CSV文件。

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: highs_lows.py
@time: 2020/01/31
"""
import csv
from matplotlib import pyplot as plt
from datetime import datetime# filename = "sitka_weather_07-2014.csv"
# filename = "sitka_weather_2014.csv"
filename = "death_valley_2014.csv"
with open(filename) as f:reader = csv.reader(f)# 分析CSV文件头header_row = next(reader)print(header_row)# 打印文件头及其位置for index, column_header in enumerate(header_row):print(index, column_header)# 从文件中获取日期、最高气温和最低气温dates, highs, lows = [], [], []for row in reader:try:current_date = datetime.strptime(row[0], "%Y-%m-%d")high = int(row[1])low = int(row[3])except ValueError:print(current_date,'missing data')else:dates.append(current_date)highs.append(high)lows.append(low)# print(highs)# 绘制气温图表fig = plt.figure(dpi=128, figsize=(10, 6))plt.plot(dates, highs, c='red', alpha=0.5)plt.plot(dates, lows, c='blue', alpha=0.5)plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)# 设置图形的格式plt.title("Daily high and low temperatures - 2014\nDeath Valley, CA", fontsize=20)plt.xlabel('', fontsize=16)fig.autofmt_xdate()plt.ylabel("Temperature (F)", fontsize=16)plt.tick_params(axis='both', which='major', labelsize=16)plt.show()

基于JSON格式的数据可视化

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: btc_close_2017.py
@time: 2020/01/31
"""
from __future__ import (absolute_import, division, print_function, unicode_literals)try:# Python 2.x 版本from urllib2 import urlopen
except ImportError:# Python 3.x 版本from urllib.request import urlopen
import json
import pygal
import math
from itertools import groupby# json_url = '.json'
# response = urlopen(json_url)
# # 读取数据
# req = response.read()
# # 将数据写入文件
# with open('btc_close_2017_urllib.json', 'wb') as f:
#     f.write(req)
# # 加载json格式
# file_urllib = json.loads(req)
# print(file_urllib)# 将数据加载到一个列表中
filename = 'btc_close_2017.json'
with open(filename) as f:btc_data = json.load(f)# 绘制收盘价折线图
# 创建5个列表,分别存储日期和收盘价
dates, months, weeks, weekdays, closes = [], [], [], [], []
# 打印每一天的信息
for btc_dict in btc_data:date = btc_dict['date']month = int(btc_dict['month'])week = int(btc_dict['week'])weekday = btc_dict['weekday']close = int(float(btc_dict['close']))print("{} is month {} week {}, {}, the close price is {} RMB".format(date, month, week, weekday, close))dates.append(date)months.append(month)weeks.append(week)weekdays.append(weekday)closes.append(close)line_chart = pygal.Line(x_label_rotation=20, show_minor_x_labels=False)
line_chart._title = '收盘价对数变换(¥)'
line_chart.x_labels = dates
N = 20  # 坐标轴每隔20天显示一次
line_chart._x_labels_major = dates[::N]
closes_log = [math.log10(_) for _ in closes]
line_chart.add('log收盘价', closes_log)
line_chart.render_to_file('收盘价对数变换折线图(¥).svg')def draw_line(x_data, y_data, title, y_legend):xy_map = []for x, y in groupby(sorted(zip(x_data, y_data)), key=lambda _: _[0]):y_list = [v for _, v in y]xy_map.append([x, sum(y_list) / len(y_list)])x_unique, y_mean = [*zip(*xy_map)]line_chart = pygal.Line()line_chart._title = titleline_chart.x_labels = x_uniqueline_chart.add(y_legend, y_mean)line_chart.render_to_file(title + '.svg')return line_chartidx_month = dates.index('2017-12-01')
line_chart_month = draw_line(months[:idx_month], closes[:idx_month], '收盘价月日均值(¥)', '月日均值')
line_chart_monthidx_week = dates.index('2017-12-11')
line_chart_week = draw_line(weeks[1:idx_week], closes[1:idx_week], '收盘价周日均值(¥)', '周日均值')
line_chart_weekwd = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdays_int = [wd.index(w) + 1 for w in weekdays[1:idx_week]]
line_chart_weekday = draw_line(weekdays_int, closes[1:idx_week], '收盘价星期均值(¥)', '星期均值')
line_chart_weekday.x_labels = ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
line_chart_weekday.render_to_file('收盘价星期均值(¥).svg')# 收盘价数据仪表盘
with open('收盘价Dashboard.html', 'w', encoding='utf-8') as html_file:html_file.write('<html><head><title>收盘价Dashboard</title><meta charset = "utf-8"></head><body>\n')for svg in ['收盘价折线图(¥).svg', '收盘价对数变换折线图(¥).svg', '收盘价月日均值(¥).svg', '收盘价周日均值(¥).svg', '收盘价星期均值(¥).svg']:html_file.write('   <object type = "image/svg+xml" data = "{0}" height = 500></object>\n'.format(svg))html_file.write('</body></html>')

基于API对网络数据的可视化

获取Github上stars最多的项目

#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: Xinzhe
@file: python_repos.py
@time: 2020/01/31
"""
import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS# 执行API调用并存储响应
url = '=language:python&sort=stars'r = requests.get(url)
print('Status code: ', r.status_code)# 将API响应存储在一个变量中
response_dict = r.json()
print("Total repositories: ", response_dict['total_count'])# 数据处理
# print(response_dict.keys())
# 探索有关仓库的信息
repo_dicts = response_dict['items']
print("Repositories returned: ", len(repo_dicts))# 研究第一个仓库
# repo_dict = repo_dicts[0]
# print("\nKeys: ", len(repo_dict))
# for key in sorted(repo_dict.keys()):
#     print(key)
# print("\nSelected information about each repository:")
# for repo_dict in repo_dicts:
#     print('Name: ',repo_dict['name'])
#     print('Owner:', repo_dict['owner']['login'])
#     print('Stars:', repo_dict['stargazers_count'])
#     print('Repository: ',repo_dict['html_url'])
#     print('Created: ',repo_dict['created_at'])
#     print('Updated: ', repo_dict['updated_at'])
#     print('Description: ',repo_dict['description'])
names, stars, plot_dicts = [], [], []
for repo_dict in repo_dicts:names.append(repo_dict['name'])stars.append(repo_dict['stargazers_count'])if repo_dict['description']:plot_dict = {'value': repo_dict['stargazers_count'],'label': repo_dict['description'],'Repository': repo_dict['html_url']}else:plot_dict = {'value': repo_dict['stargazers_count'],'label': 'None','Repository': repo_dict['html_url']}plot_dicts.append(plot_dict)
# 可视化
my_style = LS('#333366', base_style=LCS)my_config = pygal.Config()
my_config.x_label_rotation = 45
my_config.show_legend = False
my_config.title_font_size = 24
my_config.label_font_size = 14
my_config.major_label_font_size = 18
my_config.truncate_label = 15
my_config.show_y_guides = False
my_config.width = 1000chart = pygal.Bar(my_config, style=my_style)
chart.title = 'Most-Starred Python Projects on Github'
chart.x_labels = names# chart.add('', stars)
chart.add('', plot_dicts)
chart.render_to_file('python_repos.svg')

参考资料

《Python编程从入门到实践》

更多推荐

【Python】Python编程入门项目2:数据可视化

本文发布于:2024-02-24 21:56:17,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1696796.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:入门   项目   数据   Python

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!