PySpark实战:Matplotlib数据可视化

来自CloudWiki
跳转至: 导航搜索

介绍

Matplotlib 是 Python 的绘图库。 它可与 NumPy 一起使用,提供了一种有效的 MatLab 开源替代方案。

代码

折线图

输出:

Python21072816.png


import matplotlib
import matplotlib.pyplot as plt
import numpy as np

#支持中文,否则乱码
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']

#准备数据
t = np.arange(0.0, 2.0, 0.01)#x轴数据
s = 1 + np.sin(2 * np.pi * t) #y轴数据

#创建图形对象
fig, ax = plt.subplots()
#设置窗口标题
fig.canvas.set_window_title('折线图示例')

#绘图,折线图
ax.plot(t, s)
#坐标轴设置
ax.set(xlabel='时间 (s)', ylabel='电压 (mV)',
       title='折线图')
#显示网格线
ax.grid()
#当前目录下保存图片
fig.savefig("line.png")
#显示图片
plt.show()

直方图

Python21072817.png

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

#支持中文,否则乱码
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']

#准备数据
np.random.seed(20170907)#设定随机数生成器种子参数
mu = 100  # 均值
sigma = 15  # 标准差
x = mu + sigma * np.random.randn(1024)
num_bins = 50
fig, ax = plt.subplots()
#设置窗口标题
fig.canvas.set_window_title('直方图示例')
#直方图数据
n, bins, patches = ax.hist(x, num_bins, density=1)
#添加'best fit'线
y = ((1 / (np.sqrt(2 * np.pi) * sigma)) *
     np.exp(-0.5 * (1 / sigma * (bins - mu))**2))
#绘图,折线图
ax.plot(bins, y, '--')
#坐标轴设置
ax.set_xlabel('人数')
ax.set_ylabel('概率密度')
ax.set_title(r'$\mu=100$, $\sigma=15$')

#调整间距以防止ylabel剪切
fig.tight_layout()
#当前目录下保存图片
fig.savefig("histogram.png")
#显示图片
plt.show()

同时绘制多个图形

Matplotlib不但可以绘制单个图形,还可以将画图进行分区,同时绘制多个图形

600px

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.gridspec as gridspec
#支持中文,否则乱码
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']
# 解决保存图像时负号'-'显示为方块的问题
plt.rcParams['axes.unicode_minus'] = False 
# plt.rcParams['savefig.dpi'] =300
# plt.rcParams['figure.dpi'] = 300 
# 分配2x2个区域进行绘图
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
#设置窗口标题
fig.canvas.set_window_title('多图示例')
#############################################
#第1个图绘制
delta = 0.025
x = np.arange(-3.0, 3.0, delta)
y = np.arange(-2.0, 2.0, delta)
X, Y = np.meshgrid(x, y)
Z1 = np.exp(-X**2 - Y**2)
Z2 = np.exp(-(X - 1)**2 - (Y - 1)**2)
Z = (Z1 - Z2) * 2
CS = ax1.contour(X, Y, Z)
ax1.clabel(CS, inline=1, fontsize=10)
ax1.set(title='等高线')
#第2个图绘制
x = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
y1 = [6, 5, 8, 5, 6, 6, 8, 9, 8, 10]
y2 = [5, 3, 6, 4, 3, 4, 7, 4, 4, 6]
y3 = [4, 1, 2, 1, 2, 1, 6, 2, 3, 2]
#柱状图
ax2.bar(x, y1, label="label1", color='red')
ax2.bar(x, y2, label="label2",color='orange')
ax2.bar(x, y3, label="label3", color='green')
ax2.set(title='柱状图',ylabel='数量',xlabel='类型')
#第3个图绘制
w = 3
Y, X = np.mgrid[-w:w:100j, -w:w:100j]
U = -1 - X**2 + Y
V = 1 + X - Y**2
speed = np.sqrt(U**2 + V**2)
gs = gridspec.GridSpec(nrows=3, ncols=2, height_ratios=[1, 1, 2])
#streamplot
ax3.streamplot(X, Y, U, V, density=[0.5, 1])
ax3.set_title('密度')

#第4个图绘制
np.random.seed(20170907)
N = 100
r0 = 0.6
x = 0.9 * np.random.rand(N)
y = 0.9 * np.random.rand(N)
area = (20 * np.random.rand(N))**2 
c = np.sqrt(area)
r = np.sqrt(x ** 2 + y ** 2)
area1 = np.ma.masked_where(r < r0, area)
area2 = np.ma.masked_where(r >= r0, area)
ax4.scatter(x, y, s=area1, marker='^', c=c)
ax4.scatter(x, y, s=area2, marker='o', c=c)
theta = np.arange(0, np.pi / 2, 0.01)
ax4.plot(r0 * np.cos(theta), r0 * np.sin(theta))
ax4.set_title('分类示例')
###################################################
fig.tight_layout()
fig.savefig("muliPlot.png")
plt.show()

绘制3D图

绘制3D图形:

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.gridspec as gridspec
#支持中文,否则乱码
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False 
#洛伦兹吸引子(Lorenz attractor)用于混沌现象——蝴蝶效应
def lorenz(x, y, z, s=10, r=28, b=2.667):
    x_dot = s*(y - x)
    y_dot = r*x - y - x*z
    z_dot = x*y - b*z
    return x_dot, y_dot, z_dot
dt = 0.01
num_steps = 20000
xs = np.empty(num_steps + 1)
ys = np.empty(num_steps + 1)
zs = np.empty(num_steps + 1)
#初始值
xs[0], ys[0], zs[0] = (0., 1., 1.05)
for i in range(num_steps):
    x_dot, y_dot, z_dot = lorenz(xs[i], ys[i], zs[i])
    xs[i + 1] = xs[i] + (x_dot * dt)
    ys[i + 1] = ys[i] + (y_dot * dt)
    zs[i + 1] = zs[i] + (z_dot * dt)
#绘图
fig = plt.figure()
#设置窗口标题
fig.canvas.set_window_title('3D图')
ax = fig.gca(projection='3d')
#lw是linewidth缩写,线条宽度
ax.plot(xs, ys, zs, lw=1,color='red')
ax.set_xlabel("X轴")
ax.set_ylabel("Y轴")
ax.set_zlabel("Z轴")
ax.set_title("3D图形")
fig.tight_layout()
#当前目录下保存图片
fig.savefig("3d.png")
plt.show()