python_plotting - thawk/wiki GitHub Wiki

Python图形相关

matplotlib

import matplotlib
import matplotlib.pyplot as plt

matplotlib安装位置

import matplotlib
matplotlib.matplotlib_fname()

惯用法

plt.plot(x=df['OrderTime'], y=df['Latency'], figsize=(16,10), linestyle='', marker='.')

设置中文字体

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'WenQuanYi Zen Hei', 'SimHei'] + matplotlib.rcParams['font.sans-serif']

Linux下启用中文字体

matplotlib不支持ttc格式的字体文件,因此需要拷贝一个ttf到字体目录:

  • 拷贝msyh.ttf到/usr/share/fonts
  • 运行sudo fc-cache -f刷新字体缓冲区
  • 运行rm ~/.cache/matplotlib/fontList.cache
  • matplotlib.rcParams['font.sans-serif'] = ['Microsoft YaHei'] + matplotlib.rcParams['font.sans-serif']

fc-list :lang=zh 可以列出支持中文的字体。

import matplotlib
sorted([f.name for f in matplotlib.font_manager.fontManager.ttflist])

设置缺省图形大小

plt.rcParams['figure.figsize'] = (12,8)
plt.rcParams['figure.dpi'] = 200

修正负号显示为方框的问题

matplotlib.rcParams['axes.unicode_minus'] = False

两列数据画在一个histogram图中

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas
#import seaborn
#seaborn.set(style='ticks')

np.random.seed(0)
df = pandas.DataFrame(np.random.normal(size=(37,2)), columns=['A', 'B'])
fig, ax = plt.subplots()

a_heights, a_bins = np.histogram(df['A'])
b_heights, b_bins = np.histogram(df['B'], bins=a_bins)

width = (a_bins[1] - a_bins[0])/3

ax.bar(a_bins[:-1], a_heights, width=width, facecolor='cornflowerblue')
ax.bar(b_bins[:-1]+width, b_heights, width=width, facecolor='seagreen')

样例

import numpy as np
import matplotlib.pyplot as plt

# 建立新figure
fig = plt.figure()
# 调整figure左、右、上、下留空的位置及subfigure之间的间隔
fig.subplots_adjust(right=0.6)
# 建立新的subfigure,111表示1行1列中的第1个。211表示2行1列的第1个(上面)
ax = fig.add_subplot(111)
# 设置label
ax.set_ylabel('Latency (Milliseconds) --->')
ax.set_xlabel('Order Count --->')
# 画线。linestyle指定线型
ax.plot(listofdata, linestyle, markersize=2, label="zqdm")
# 设定标题
fig.suptitle(title)
# 画图示
h, l = ax.get_legend_handles_labels()
# 这样会把图示画在ax内部
ax.legend(h, l, "right")
# 这样会把图示画在ax外
ax.legend(h, l, "right")
fig_stddev.subplots_adjust(top=0.95, bottom=0.15, right=0.95)
ax = fig_stddev.add_subplot(111)

调整边框颜色

ax1.spines['bottom'].set_color('green')
ax1.spines['top'].set_color('green')
ax1.spines['left'].set_color('green')
ax1.spines['right'].set_color('green')
for t in ax1.xaxis.get_ticklines(): t.set_color('green')
for t in ax1.yaxis.get_ticklines(): t.set_color('green')
for t in ax2.xaxis.get_ticklines(): t.set_color('green')
for t in ax2.yaxis.get_ticklines(): t.set_color('green')

或更简洁的:

import matplotlib.pyplot as plt

for ax, color in zip([ax1, ax2, ax3, ax4], ['green', 'green', 'blue', 'blue']):
    plt.setp(ax.spines.values(), color=color)
    plt.setp([ax.get_xticklines(), ax1.get_yticklines()], color=color)

隐藏一条坐标轴上的标记

plt.gca().xaxis.set_major_locator(plt.NullLocator())

设置坐标轴的最小、最大值

plt.xlim(-1, plt.xlim()[1])
plt.ylim(0, 100)

高亮一部分区域

plt.plot(range(10))
plt.axvspan(3, 6, color='red', alpha=0.5)
plt.show()

格式化坐标轴的标记

plt.gca().yaxis.set_major_formatter(plt.FuncFormatter('{0}%'.format))
ax.yaxis.set_major_formatter(matplotlib.ticker.StrMethodFormatter('{:,}'))
ax.yaxis.set_major_formatter(matplotlib.dates.DateFormatter("%H:%M:%S.%f"))

Y轴设置位指数标记,标记为2的幂

通过数值的极值和设置前Y轴的范围,确定数值所在范围。这样可以处理极值很大,但绘制为均值时,实际范围变小的情况, 以及范围比较小,但Y轴自动设置得偏大但情况。

全局禁用了小刻度,防止自动标记的小刻度出现指数刻度的情况。

max_exp = math.ceil(math.log(min(df[y].max(), ax.get_ylim()[1]), 2))
min_exp = floor(math.log(max(df[y].min(), ax.get_ylim()[0]), 2))
yticks = [2**i for i in range(min_exp, max_exp+1)]
ax.set_yticks(yticks)
ax.set_yticklabels(yticks)
ax.minorticks_off()

为X轴指定为日期格式,解决seaborn等没有正确显示X轴的问题

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# build the figure
fig, ax = plt.subplots()
sns.tsplot(df, time='Date', value='Value', unit='Unit', ax=ax)

# assign locator and formatter for the xaxis ticks.
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y.%m.%d'))

# put the labels at 45deg since they tend to be too long
fig.autofmt_xdate()
plt.show()

为bar图增加数值标记

# now to plot the figure...
plt.figure(figsize=(12, 8))
ax = freq_series.plot(kind='bar')

rects = ax.patches

# Now make some labels
labels = ["label%d" % i for i in xrange(len(rects))]

for rect, label in zip(rects, labels):
    height = rect.get_height()
    ax.text(rect.get_x() + rect.get_width()/2, height + 5, label, ha='center', va='bottom')

使用style中的颜色

list(plt.rcParams['axes.prop_cycle'])[idx]['color']

配色包

Palettable

Palettable (formerly brewer2mpl) is a library of color palettes for Python. It's written in pure Python with no dependencies, but it can supply color maps for matplotlib. You can use Palettable to customize matplotlib plots or supply colors for a web application.

pip install palettable
from palettable.colorbrewer.qualitative import Dark2_7
ax.set_color_cycle(palettable.colorbrewer.qualitative.Dark2_8.mpl_colors)
from palettable.colorbrewer.sequential import Blues_8
ax.imshow(data, cmap=Blues_8.mpl_colormap)

prettyplotlib

https://github.com/olgabot/prettyplotlib

可以关注的指令

plt.tight_layout()

使用两条Y轴

fig, ax1 = plt.subplots()
ax2 = ax1.twinx()

如果需要把两条轴的图例画在一起:

fig, ax1 = plt.subplots()
ax2 = ax1.twinx()

lines = []

lines += ax1.plot(...)
lines += ax2.plot(...)

ax.legend(lines, [l.get_label() for l in lines], loc=0)

gizeh -- Simple Vector Graphics for Python

https://github.com/Zulko/gizeh

seaborn

Statistical data visualization using matplotlib

调用 pandas、matplotlib 的统计绘图库

import seaborn as sns

惯用法

sns.relplot(data=df, x='OrderTime', y='Latency', s=5, hue='PlatformID', palette='Accent', height=6, aspect=2, linewidth=0, legend='full')
  • s=5: 指定数据点的大小
  • hue='PlatformID': 按指定列变化不同颜色。size/style与此类似
  • palette='Accent': 指定hue使用的调色板
  • height=6: 指定宽度
  • aspect=2: 指定宽高比
  • linewidth=0: 去掉点外面点线框
  • legend='full': 如果hue/size/style等对应的列是数字,seaborn会假设其值为从0开始连续递增的值,会出现不存在的项目。加上这个就不会了

低版本JointGrid(标记颜色,两边有柱状图)

def plot_joint(data, x, y, hue, xlim, ylim):
  g = sns.JointGrid(
    x=None, y=None, xlim=xlim, ylim=ylim)

  sns.scatterplot(
    data=data, x=x, y=y, hue=hue,
    linewidth=0, ax=g.ax_joint)

  sns.distplot(
    data[x], bins=200,
    ax=g.ax_marg_x)

  sns.distplot(
    data[y], bins=200, vertical=True,
    ax=g.ax_marg_y)

  return g

def plot(data, x, y, hue, xlim=None, ylim=(0, 15000)):
  g = plot_joint(data=data, x=x, y=y, hue=hue, xlim=xlim, ylim=ylim)
  g.fig.subtitle("Title")
  g.fig.set_figwidth(16)
  g.fig.set_figheight(8)
  
  return g

plotly

Plotly