Python.matplotlib - k821209/pipelines GitHub Wiki
http://www.futurile.net/2016/03/31/colormaps-in-matplotlib/
๋ ์ด๋ธ
ax.set(xlabel='common xlabel', ylabel='common ylabel')
์์ญํ์
ax[n].axvspan(left, right, alpha=0.05, color='blue')
fig title
fig.suptitle('This is a somewhat long figure title', fontsize=16)
matplotlib style
%matplotlib inline
import pandas as pd
from hurry.filesize import size
import matplotlib.pyplot as plt
import matplotlib.style as style
style.use('seaborn-poster') #sets the size of the charts
style.use('ggplot')
matplotlib ํ๊ธ๋์ค๊ฒ
import matplotlib.font_manager as fm
import matplotlib
#fm.get_fontconfig_fonts()
font_location = '/usr/share/fonts/truetype/nanum/NanumGothic_Coding_Bold.ttf'
font_name = fm.FontProperties(fname=font_location).get_name()
matplotlib.rc('font', family=font_name)
plot with percent
rects = ax[0].bar(x,y,width=0.5)
for rect in rects:
height = rect.get_height()
ax[0].text(rect.get_x() + rect.get_width()/2., 0.99*height,
'%d' % int(height*100/sum(y)) + "%", ha='center', va='bottom')
font config
params = {'legend.fontsize': 'small',
'figure.figsize': (15, 5),
'axes.labelsize': 'small',
'axes.titlesize':'small',
'xtick.labelsize':'small',
'ytick.labelsize':'small'}
plt.rcParams.update(params)
heatmap clustering visualization
import scipy.cluster.hierarchy as sch
genename = 'Cre14.g612633'
fig = plt.figure(figsize=(8,8))
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
df = df_v11_ix.loc[genename]
D = df[df.columns[1:]].values
Y = sch.linkage(D, method='ward')
Z = sch.dendrogram(Y, orientation='left')
ax1.set_xticks([])
ax1.set_yticks([])
ax1.set_axis_off()
zix = Z['leaves']
ax2 = fig.add_axes([0.53,0.1,0.5,0.6])
# cax = ax2.pcolor(D[zix])
cax = ax2.pcolor(D[zix],cmap=plt.cm.seismic, vmin=0, vmax=2) # ์๊น ์ซ์ ๋ฒ์ ์ง์
ax2.axis('tight') # remove blank column
ax2.set_yticks(np.arange(0.5, len(df.index), 1))
ax2.set_yticklabels(df['target_id'][zix],ha='right')
ax2.set_xticks(np.arange(0.5, len(df.columns[1:]), 1))
ax2.set_xticklabels(df.columns[1:],rotation='vertical')
fig.colorbar(cax, ticks=[0, 10,20,30,40,50,60], orientation='vertical')
plt.show()
venndia
https://github.com/ksahlin/pyinfor/blob/master/venn.py
plt.rcParams['figure.figsize'] = (10,10)
mask = (df_tmap['class_code'] == '=')
ref_t_cs = set(df_tmap[mask]['ref_id'].values)# completely supported
mask = (df_tmap['class_code'] == 'j')
ref_t_is = set(df_tmap[mask]['ref_id'].values)# isoform support
mask = (df_tmap['class_code'] == 'c')
ref_t_ps = set(df_tmap[mask]['ref_id'].values)# part support
from matplotlib_venn import venn3
venn3([ref_t_cs,ref_t_is,ref_t_ps], ('Complete match (%d)'%len(ref_t_cs), 'Isoform (%d)'%len(ref_t_is),'Part (%d)'%len(ref_t_ps)))
plt.show()
trend line
def window_mean(x,y,window,overlap):
x = np.array(x)
newx = []
newy = []
for i in range(max(x)//(window-overlap)):
start = i*(window-overlap)
end = i*(window-overlap) + window
mask = (x<end) & (start<=x)
if np.sum(mask) > 0:
newy.append(np.mean(y[mask]))
newx.append((start+end/2))
return newx,newy
example
df = pd.read_csv('Cre/mayra.ngs.txt',sep='\t',header=None)
swin = 100000
sovp = 50000
fig,ax = plt.subplots(3,figsize=(10,10),sharex=True,sharey=True)
x = df[4].values
y = df[0].values
ax[0].scatter(x,y,c='r',label='Cov1x')
xnew, ynew = window_mean(x,y,swin,sovp)
ax[0].plot(xnew,ynew,c='gray',linewidth=5,alpha=0.5)
x = df[4].values
y = df[1].values
ax[1].scatter(x,y,c='b',label='Cov10x')
xnew, ynew = window_mean(x,y,swin,sovp)
ax[1].plot(xnew,ynew,c='gray',linewidth=5,alpha=0.5)
x = df[4].values
y = df[2].values
ax[2].scatter(x,y,c='g',label='Cov30x')
xnew, ynew = window_mean(x,y,swin,sovp)
ax[2].plot(xnew,ynew,c='gray',linewidth=5,alpha=0.5)
ax[2].set_xlim(0,2000000)
ax[2].set_ylim(0,1.2)
plt.tight_layout()
markers
http://matplotlib.org/api/markers_api.html
import
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#colors = sns.color_palette("deep", 20)
colors = sns.choose_colorbrewer_palette('qualitative') #{โsequentialโ, โdivergingโ, โqualitativeโ}
sns.set(context="poster")
sns.set_style("dark")
## ๋ค์๋ ์คํ์ผ ์ฃผ๋๋ฒ
import matplotlib
matplotlib.style.use('ggplot')
colors
import matplotlib.cm as cm
n=len(set(labels))-2 # number of colors
colors=cm.rainbow(np.linspace(0,1,n))
plot
fig, ((ax1,ax2,ax3,ax4,ax5)) = plt.subplots(1,5,sharey=True,figsize=(15, 6))
x = df_WE_Aly[0][:5]
y = df_WE_Aly[1][:5]
ax1.plot(np.arange(len(y)),y)
ax1.set_xticks(np.arange(len(y)))
ax1.set_xticklabels(x, rotation=60,ha='right')
## histogram
x = df_phenotype_mask['Height'].values.astype(float)
plt.hist(x,bins=np.arange(min(x),max(x),1))
figure size
- sns.set_context(rc={"figure.figsize": (16, 5)})
- figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
subplots legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels,bbox_to_anchor=(1.5, 0.98))
์ฉ๋ฒ : plt.figlegend([1.],[2.],3.)
- ๋ถ๋ฅ๋ฅผ ์ํด ์ค๋นํด๋ dicCat2num ๋์ ๋๋ฆฌ์ colors mapping (colors๋ seaborn) ๋ฅผ mpathes.Patch ์ธ์คํด์ค ๋ฆฌ์คํธ
- ๊ฐ ์ธ์คํด์ค๋ค์ ํด๋นํ๋ ๋ผ๋ฒจ ๋ฆฌ์คํธ
- ๋ ์ ๋ ์์น
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
colors = sns.color_palette("deep", 20)
plt.figlegend([mpatches.Patch(color=colors[x]) for x in dicCat2num.values()],list(dicCat2num.keys()),'upper right')
plt.figlegend([mpatches.Patch(color=colors[x]) for x in [0,1,2,3]],\
['gene length < 2000','2000 <= gene length < 4000','4000 <= gene length < 6000','6000 < gene length'],\
'upper right',bbox_to_anchor=(1.5, 0.98))
## or
line1 = ax[0].bar(np.arange(len(anova)),anova,color='red',label='ANOVA')
line2 = ax[1].bar(np.arange(len(ttest)),ttest,color='blue',label='TTEST')
fig.text(0.5, -0.01, 'Observation point', ha='center',fontsize=15)
fig.text(-0.01, 0.5, 'P value', va='center', rotation='vertical',fontsize=15)
plt.figlegend([line1[0],line2[0]],['ANOVA','TTEST'],bbox_to_anchor=[0.96,0.96],loc='upper right')
plt.tight_layout()
์๋ธํ๋์ ๊ณตํต ๋ผ๋ฒจ!
fig.text(0.5, -0.02, 'Coverage', ha='center',fontsize=20)
fig.text(-0.02, 0.5, 'Counts', va='center', rotation='vertical',fontsize=20)
savefig์์ ์งค๋ฆด๋ ์ถ๊ฐ
plt.savefig('pie.chart.tmap.svg',bbox_inches='tight' )
#plt.gcf().subplots_adjust(bottom=0.15)
tick change
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
x = [0,5,9,10,15]
y = [0,1,2,3,4]
fig, ax = plt.subplots()
ax.plot(x,y)
start, end = ax.get_xlim()
ax.xaxis.set_ticks(np.arange(start, end, 0.712123))
ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
plt.show()
pie chart
plt.rcParams['font.size'] = 24
labels = '0<=cov<25', '25<=cov<50', '50<=cov<75', '75<=cov<100'
sizes = [len(cf_0_25), len(cf_25_50), len(cf_50_75), len(cf_75_100)]
colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral']
explode = (0.1, 0.1, 0.1, 0) # only "explode" the 2nd slice (i.e. 'Hogs')
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct='%1.1f%%', shadow=True, startangle=90)
# Set aspect ratio to be equal so that pie is drawn as a circle.
plt.axis('equal')
plt.show()
line histogram
fig, ax = plt.subplots(1,sharex=True,sharey=True,figsize=(10, 7))
fig1, ax2 = plt.subplots(1,sharex=True,sharey=True,figsize=(10, 7))
inlist = [len_dist_ath,len_dist_cre,len_dist_osa,len_dist_yst]
labels = ['ATH','CRE','OSA','SCE']
x = np.arange(0,10000,100) #for CDS
for i,j in enumerate(inlist):
a,b,c = ax.hist(j,bins=x,color=colors[i],label=labels[i])
ax2.plot(b[0:len(a)],a,alpha=0.9,color=colors[i],label=labels[i])
plt.legend()
Histogram
ax.hist(GDarray, 50, histtype='bar',color=colors[0])
boxplot
http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
fig, ax = plt.subplots(1)
ax.boxplot(dicT2GC.values(),)
ax.set_xticklabels(dicT2GC.keys(), rotation=60,ha='right')
ax.set_ylabel('GC contents')
local max/min
data = coins_flatten
x = np.linspace(0,1,len(data))
# that's the line, you need:
a = diff(sign(diff(data))).nonzero()[0] + 1 # local min+max
b = (diff(sign(diff(data))) > 0).nonzero()[0] + 1 # local min
c = (diff(sign(diff(data))) < 0).nonzero()[0] + 1 # local max
plt.plot(x,data)
plt.plot(x[b], data[b], "o", label="min")
plt.plot(x[c], data[c], "o", label="max")
peak detection
https://blog.ytotech.com/2015/11/01/findpeaks-in-python/
https://gist.github.com/endolith/250860
heat map
http://stackoverflow.com/questions/14391959/heatmap-in-matplotlib-with-pcolor
column_labels = list('ABCDEFGH')
row_labels = list('0123456789')
data = deduced_density
fig, ax = plt.subplots()
heatmap = ax.pcolor(data, cmap=plt.cm.Blues)
# put the major ticks at the middle of each cell
ax.set_xticks(np.arange(data.shape[1])+0.5, minor=False)
ax.set_yticks(np.arange(data.shape[0])+0.5, minor=False)
# want a more natural, table-like display
ax.invert_yaxis()
ax.xaxis.tick_top()
ax.set_xticklabels(row_labels, minor=False)
ax.set_yticklabels(column_labels, minor=False)
plt.show()
density plot (seaborn)
import numpy as np
import seaborn as sns
data = [1.5]*7 + [2.5]*2 + [3.5]*8 + [4.5]*3 + [5.5]*1 + [6.5]*8
sns.set_style('whitegrid')
sns.kdeplot(np.array(data), bw=0.5)
savefig
from matplotlib.backends.backend_pdf import PdfPages
pp = PdfPages('cre.lengthcomp.exonvsgene.pdf')
plt.savefig(pp, format='pdf')
pp.close()