画图\读操作csv文件\zip groupby)"/>
Python编程从入门到实践三(画图\读操作csv文件\zip groupby)
文章目录
- chap15画图
- 折线图plot
- 散点图scatter
- 柱状图 bar
- 随机漫步云图scatter
- 画图习题小结
- csv文件处理
- csv文件读取函数
- date模块处理,
- fillbetween填充两者中间
- 处理缺失值ValueError加try-catch-else
- zip和groupby
- zip
- groupby
- ==收盘价月均值==
- ==收盘价周均值==
- zip&groupby 小结
chap15画图
折线图plot
# 折线图#导入pyplot包为plt
import matplotlib.pyplot as plt#定义横、纵坐标
input_values = [1,2,3,4,5]
squares = [1,4,9,16,25]#传递给函数plot,横坐标默认为0 1 2 3 4
plt.plot(input_values, squares,linewidth = 5) # 线的注释plt.title("Square Numbers",fontsize = 24)
plt.xlabel("Value",fontsize = 14)
plt.ylabel("Squre of Value",fontsize = 14)plt.tick_params(axis='both',which = 'major',labelsize = 15) # 刻度线样式#显示图像
plt.show()
散点图scatter
#自动计算数据,自定义颜色,颜色映射
import matplotlib.pyplot as pltx_values = list(range(1,1001)) ##不加list好像也可 但测试打印不出来列表 就是一个range(x,y),所以还是加一个list转换
y_values = [x**2 for x in x_values]
plt.scatter(x_values,y_values,c = y_values,edgecolors='none',s = 40,cmap=plt.get_cmap('RdYlBu')) #'c' argument has 4 elements, .... .达成,#有cmap的话c只能是y_value一个列表
#否则c可以是一个三元组表示的一个值???
# ‘c’ argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with ‘x’ & ‘y’. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.plt.title("Square Numbers",fontsize = 24)
plt.xlabel("Value",fontsize = 14)
plt.ylabel("Squre of Value",fontsize = 14)plt.tick_params(axis='both',which = 'major',labelsize = 15) # 刻度线样式
plt.axis([0,1100,0,1100000]) #横纵坐标但取值范围plt.show()
#plt.savefig('squares_plot.png',bbox_inches='tight')
#cmap可以去matplotlib查看color map的样式#自动保存列表
#plt.show替换为
plt.savefig('squares_plot.png',bbox_inches='tight')
柱状图 bar
#pygal 矢量图
import pygal
from random import randintclass Die():'''表示一个骰子的类'''def __init__(self,num_sides=6):'''骰子默认是6面'''self.numsides = num_sidesdef roll(self,times):'''返回结果随机值'''results = []for time in range(times):results.append(randint(1,self.numsides)) #闭return resultsdef cal(self,results):'''对结果进行统计'''freq = []for value in range(1,self.numsides+1): #range左闭右开freq.append(results.count(value))return freqnum_size = 6
times = 10000
die = Die(num_size)
results = die.roll(times)
frequency = die.cal(results)#单个骰子平均,两个骰子和 正太
# die2 = Die(num_size)
# results2 = die2.roll(times)
# results3 = []
# for i,j in zip(results,results2):
# results3.append(i+j)
#
# frequency = []
# for value in range(2,13):
# frequency.append(results3.count(value))#可视化结果
hist = pygal.Bar()hist.title = "Results of rolling one D6 1000 times"
hist.x_labels = list(range(1,num_size+1))
hist.x_title = "Result"
hist.y_title = "Frequency of Result"hist.add('D6',frequency)
hist.render_to_file('die_visual.svg')#两个列表对应元素相加
a = [1,2,3]
b = [2,3,4]
print(a+b)c=[]
for i,j in zip(a,b):summ=i+jc.append(summ)
print(c)
随机漫步云图scatter
#随机漫步,模拟现实中很多问题
import matplotlib.pyplot as plt
from random import choiceclass RandomWalk():'''一个生成随机漫步数据的类'''def __init__(self,num_points = 5000):'''初始化随机漫步的属性'''self.num_points = num_points#所有随机漫步始于0,0self.x_values = [0]self.y_values = [0]def fill_walk(self):'''计算随机漫步包含的所有点'''#不断漫步,知道列表达到指定的长度while len(self.x_values) < self.num_points:#决定前进方向以及沿着个方向前进的距离x_direction = choice([1,-1])x_distance = choice([0,1,2,3,4])x_step = x_direction*x_distancey_direction = choice([1, -1])y_distance = choice([0, 1, 2, 3, 4])y_step = y_direction * y_distanceif x_step==0 and y_step==0:continuenext_x = self.x_values[-1]+x_stepnext_y = self.y_values[-1]+y_stepself.x_values.append(next_x)self.y_values.append(next_y)while True:rw = RandomWalk(50000)rw.fill_walk()#设置绘图窗口尺寸plt.figure(figsize=(10,6))point_numbers = list(range(rw.num_points))plt.scatter(rw.x_values,rw.y_values,c = point_numbers,cmap=plt.get_cmap("RdYlBu"),edgecolors='none',s = 1)#隐藏坐标轴plt.axes().get_xaxis().set_visible(False)plt.axes().get_yaxis().set_visible(False)#突出起点和终点plt.scatter(0,0,c = 'green',edgecolors='none',s = 150)plt.scatter(rw.x_values[-1],rw.y_values[-1],c = 'red',edgecolors='none',s = 150)#plt.show()plt.savefig('random walk.png',bbox_inched = 'tight')keep_running = input("Make Another Walk?(y/n): ")if keep_running=='n':break
画图习题小结
1.pygal随机漫步——pygal.XY(stroke=False)
2.matplotlib柱状图——plt.bar(x,y)
图 | plt | pygal |
---|---|---|
柱状图 | plt.bar | pygal.Bar() |
散点图 | plt.scatter | pygal.XY() |
折线图 | plt.plot | pygal.Line() |
csv文件处理
3.3 Files and the Operating System 文件与操作系统:系统总结一下的参考资料吧
csv文件读取函数
csv | json | txt |
---|---|---|
reader = csv.reader(file) | dump(x,file) | read() |
row = next(reader) | load(file) | readlines() |
相当于有一个指针一行一行 | readline() |
# csv文件读取
import csv
with open('zichuang.csv') as file:#templatereader = csv.reader(file)#next()调用一次读一行,作为列表header_row = next(reader)for index,column_header in enumerate(header_row):print(index.__str__()+' '+column_header)#打印从第二行开始第二列column_2=[]for row in reader:column_2.append(row[1])print(column_2)
date模块处理,
fillbetween填充两者中间
处理缺失值ValueError加try-catch-else
#date
import matplotlib.pyplot as plt
import csv
from datetime import datetimewith open("zichuang_date.csv") as file:reader = csv.reader(file)header_row = next(reader)dates,highs = [],[]for row in reader:#print(row[0])date = datetime.strptime(row[0],"%m/%d/%Y") #Y 大写dates.append(date)highs.append(row[1])fig = plt.figure(dpi = 128,figsize=(10,6))
plt.plot(dates,highs,c='red')
plt.title("xiadabi")
plt.xlabel("",fontsize = 20)
plt.ylabel("Temperature(F)",fontsize = 20)
plt.tick_params(axis='both',which='both',labelsize = 20)
fig.autofmt_xdate() #让x轴的刻度日期斜着写
plt.show()
- log10指代数量级上的增长,比如10 100 1000->1 2 3
zip和groupby
zip
#zip多个列表合成一个列表,元素为对应位置元素组成的元组,多余的去掉,*解压 [*zipped_2],list(Object)但和*冲突,解压之后也没法再解压
groupby
#关于groupby但说明
from itertools import groupby#groupby分组,按指定位置的元素
print("groupby的结果")
test=[('a',5),('a',4),('b',1),('a',3),('a',2),('b',4),('b',3),('c',5)]
temp = groupby(sorted(test),lambda x:x[0]) #sorted按首位元素大小排序,groupby按0位元素大小分组
##得到一个列表,[(分类元素'a',剩下的东西),]
print("1.list处理之前打印temp")
print(temp)
# print("2.list处理的temp")
# print(list(temp))
# print("3.list处理过的temp")
# print(temp)
print("4.list未处理过的temp分组打印")
for a,b in temp:print(list(b))
#注意这里b还是一个object (itertools.groupby object)想要看内容,需要list(b)'''
groupby的结果
1.list处理之前打印temp
<itertools.groupby object at 0x106add138>
2.list处理的temp
[('a', <itertools._grouper object at 0x106ad5588>), ('b', <itertools._grouper object at 0x106ad58d0>), ('c', <itertools._grouper object at 0x106ad5908>)]
3.list处理过的temp
<itertools.groupby object at 0x106add138>
4.list未处理过的temp分组打印
[('a', 2), ('a', 3), ('a', 4), ('a', 5)]
[('b', 1), ('b', 3), ('b', 4)]
[('c', 5)]
'''
收盘价月均值
#收盘价均值
# zip是一个列表,每个纵列为一个元组
# groupby根据某一位元素分组,得到一个字典,key:分组的元素,value:分组后的一堆元组,是一个objectimport pygal
import json
import math
from itertools import groupby#导入分组,月份,周数,周几再计算每组的平均值#求某段为单位的平均数
def draw_line(x_data, y_data, title, y_legend):# x轴 y轴 生成文件的名称 图例,线的名称eg:'月平均值'xy_map = []for x, y in groupby(sorted(zip(x_data, y_data)), key=lambda _: _[0]):'''有月份 有月份对应的温度同样的"月份"对应的值全部取出来 然后计算平均值'''# zip:x轴y轴合并,对应位置元素组成元组:# x_data=[1,1,2,2]# y_data=[3,2,4,6]# (zip(x_data,y_data)) = [(1,3),(1,2),(2,4),(2,6)]# 排序:# sorted(zip(x_data,y_data))=[(1,2),(1,3),(2,4),(2,6)]# groupby:# key=lambda _:_[0] 分组按0位元素,得到一个字典# {1:[(1,2),(1,3)],2:[(2,4),(2,6)]}# for循环:# x就是key,分组的依据 x=1 x=2# y就是value 元组组成的列表 y=[(1,2),(1,3)] y=[(2,4),(2,6)]y_list = [v for _, v in y]#y里面的元素是(1,2)(1,3) 不要1 取出2和3xy_map.append([x, sum(y_list)/len(y_list)])#每一对值作为一个列表放到xy_map=[[1,2.5],[2,5]]x_unique, y_mean = [*zip(*xy_map)]# *解包,里面两个元组->两个元组,里面两个列表->两个列表# *xy_map=[1,2.5],[2,5]# zip再纵向值压缩# zip(*xy_map)=[(1,2),(2.5,5)]# *zip(*xy_map) = (1,2),(2.5,5)# 用两个值把两个元组提取出来# x_unique=(1,2) 不管1月份多少个但是最后都合成了一个# y_mean = (2.5,5)line_chart = pygal.Line()line_chart.title = titleline_chart.x_labels = x_uniqueline_chart.add(y_legend, y_mean)line_chart.render_to_file(title+'.svg')return line_chartfilename = 'btc_close_2017_request.json'#btc_close_2017_request
with open(filename) as f:btc_data = json.load(f)for btc_dict in btc_data:date = btc_dict['date']month = int(btc_dict['month'])week = int(btc_dict['week'])weekday = btc_dict['weekday']close = int(float(btc_dict['close']))print("{} is month {} week {},{}.The close price is {} RMB".format(date, month, week, weekday, close))dates, months, weeks, weekdays, closes = [], [], [], [], []
for btc_dict in btc_data:#dates.append(btc_dict['date'])months.append(int(btc_dict['month']))weeks.append(int(btc_dict['week']))weekdays.append(btc_dict['weekday'])closes.append(int(float(btc_dict['close'])))idx_month = dates.index('2017-12-01')
# str.index(str,beg=0,end=len(str)) 从beg到end查str 有返回索引
#调用函数:由于12月数据不完整,只取1-11月
line_chart_month = draw_line(months[:idx_month], closes[:idx_month], '收盘价月日均值', '月日均值')
收盘价周均值
import pygal
import json
import math
from itertools import groupby#导入分组,月份,周数,周几再计算每组的平均值#求某段为单位的平均数
def draw_line(x_data, y_data, title, y_legend):# x轴 y轴 生成文件的名称 图例,线的名称eg:'月平均值'xy_map = []for x, y in groupby(sorted(zip(x_data, y_data)), key=lambda _: _[0]):# zip:x轴y轴合并,对应位置元素组成元组:# x_data=[1,1,2,2]# y_data=[3,2,4,6]# (zip(x_data,y_data)) = [(1,3),(1,2),(2,4),(2,6)]# 排序:# sorted(zip(x_data,y_data))=[(1,2),(1,3),(2,4),(2,6)]# groupby:# key=lambda _:_[0] 分组按0位元素,得到一个字典# {1:[(1,2),(1,3)],2:[(2,4),(2,6)]}# for循环:# x就是key,分组的依据 x=1 x=2# y就是value 元组组成的列表 y=[(1,2),(1,3)] y=[(2,4),(2,6)]y_list = [v for _, v in y]#y里面的元素是(1,2)(1,3) 不要1 取出2和3xy_map.append([x, sum(y_list)/len(y_list)])#每一对值作为一个列表放到xy_map=[[1,2.5],[2,5]]x_unique, y_mean = [*zip(*xy_map)]# *解包,里面两个元组->两个元组,里面两个列表->两个列表# *xy_map=[1,2.5],[2,5]# zip再纵向值压缩# zip(*xy_map)=[(1,2),(2.5,5)]# *zip(*xy_map) = (1,2),(2.5,5)# 用两个值把两个元组提取出来# x_unique=(1,2) 不管1月份多少个但是最后都合成了一个# y_mean = (2.5,5)line_chart = pygal.Line()line_chart.title = titleline_chart.x_labels = x_uniqueline_chart.add(y_legend, y_mean)line_chart.render_to_file(title+'.svg')return line_chartfilename = 'btc_close_2017_request.json'#btc_close_2017_request
with open(filename) as f:btc_data = json.load(f)for btc_dict in btc_data:date = btc_dict['date']month = int(btc_dict['month'])week = int(btc_dict['week'])weekday = btc_dict['weekday']close = int(float(btc_dict['close']))#print("{} is month {} week {},{}.The close price is {} RMB".format(date, month, week, weekday, close))dates, months, weeks, weekdays, closes = [], [], [], [], []
for btc_dict in btc_data:#dates.append(btc_dict['date'])months.append(int(btc_dict['month']))weeks.append(int(btc_dict['week']))weekdays.append(btc_dict['weekday'])closes.append(int(float(btc_dict['close']))) #先转浮点 再转整数idx_month = dates.index('2017-12-10') #注意往后退一天,索引+切片# str = '123456'
# a = str.index('3')
# b = str[:a]
# print(b) #12# str.index(str,beg=0,end=len(str)) 从beg到end查str 有返回索引
#调用函数:由于12月数据不完整,只取1-11月
line_chart_month = draw_line(weeks[1:idx_month+1], closes[1:idx_month+1], '收盘价周日均值', '周日均值')# 注意weeks列表在append的时候就要变int整形,因为排序的时候字符串(11会排在2前面)和数是不一样的
zip&groupby 小结
- 排序注意字符串和数,即从文件中取数的时候就判断一下该用什么类型
- 索引+切片 开闭区间
- zip+groupby 两个列表对应纵列操作
with open('收盘价Dashboard.html','w',encoding='utf-8') as html_file:html_file.write('<html><head><title>收盘价Dashboard</title><metacharset="utf-8"></head><body>\n')for svg in['收盘价周均值.svg','收盘价星期均值.svg','收盘价月均值.svg']:html_file.write(' <object type="image/svg+xml" data="{0}" height=500></object>\n'.format(svg))html_file.write('</body></html>')
更多推荐
Python编程从入门到实践三(画图\读操作csv文件\zip groupby)
发布评论