python.scipy - k821209/pipelines GitHub Wiki

clustering

fig = plt.figure(figsize=(5,50))
ax  = fig.add_subplot(1, 1, 1)
X   = df.values
y   = df.index
linkage_array = complete(X)
Z   = dendrogram(linkage_array,orientation='left',labels=y,ax=ax)
ax.tick_params(axis='x', which='major', labelsize=15)
ax.tick_params(axis='y', which='major', labelsize=8)

clustering ver

#!/usr/bin/env python3
"""
Example code for plotting a heatmap, colorbar, dendrograms, and class labels with matplotlib.
Author: Ryan Z. Friedman (rfriedman22)
Email: [email protected]
License: BSD 3 clause
Package versions:
    Python 3.6.5
    Matplotlib 3.0.2
    Numpy 1.15.4
    Pandas 0.23.4
    Scipy 1.1.0
    Scikit-learn 0.19.1
"""

import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import pandas as pd
from scipy import stats
from scipy.cluster import hierarchy
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler

# Set display parameters
mpl.rcParams["axes.titlesize"] = 25
mpl.rcParams["axes.labelsize"] = 20
mpl.rcParams["xtick.labelsize"] = 15
mpl.rcParams["ytick.labelsize"] = 15
mpl.rcParams["legend.fontsize"] = 15
mpl.rcParams["figure.figsize"] = (8, 8)
mpl.rcParams["image.cmap"] = "viridis"
mpl.rcParams["lines.markersize"] = 5
mpl.rcParams["lines.linewidth"] = 3

# Load the wine dataset
wine_data = datasets.load_wine()
data = wine_data["data"]
labels = wine_data["target"]
feature_names = wine_data["feature_names"]

# Min-max normalize the data
data = MinMaxScaler().fit_transform(data)

# Make the data pandas objects
data = pd.DataFrame(data, columns=feature_names)
labels = pd.Series(labels)

# Row-cluster the data
row_link = hierarchy.linkage(data, method="ward")
# Make the dendrogram, but don't plot it yet
row_dendro = hierarchy.dendrogram(row_link, no_plot=True)
# Row order needs to be inverted to display correctly with the dendrogram
row_order = row_dendro["leaves"][::-1]

# Column-cluster the data but don't plot the dendrogram yet
col_link = hierarchy.linkage(data.T, method="ward")
col_dendro = hierarchy.dendrogram(col_link, no_plot=True)
# Col order does not need to be inverted
col_order = col_dendro["leaves"]

# Make the heatmap
fig, ax = plt.subplots()
data = data.iloc[row_order, col_order]
labels = labels[row_order]
heatmap = ax.imshow(data, aspect="auto")
ax.set_yticks([])
# Show the feature names on the x ticks
ax.set_xticks(np.arange(data.columns.size))
ax.set_xticklabels(data.columns, rotation=90)

# Add heatmap colorbar to the right
divider = make_axes_locatable(ax)
cbar_ax = divider.append_axes("right", size="5%", pad="2%")
fig.colorbar(heatmap, cax=cbar_ax)

# Add bar indicating classes to the left
class_ax = divider.append_axes("left", size="5%")
# Need to make the series into a column vector to work with imshow
# Use a different color scheme for this axis
class_ax.imshow(labels[:, np.newaxis], aspect="auto", cmap="Set1")
class_ax.set_xticks([])
class_ax.set_yticks([])

# Add dendrogram for row clustering
row_ax = divider.append_axes("left", size="50%")
hierarchy.dendrogram(row_link, no_labels=True, orientation="left", ax=row_ax, color_threshold=0, above_threshold_color="black")
row_ax.axis("off")

# Add dendrogram for column clustering
col_ax = divider.append_axes("top", size="25%")
hierarchy.dendrogram(col_link, no_labels=True, ax=col_ax, color_threshold=0, above_threshold_color="black")
col_ax.axis("off")

fig.tight_layout()
plt.show()

from scipy.cluster.hierarchy import dendrogram, linkage
import  pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from matplotlib.pyplot import cm
from scipy.cluster import hierarchy
import matplotlib as mpl

df_samplewise = df_count_ix.loc[cand_ix]
fig = plt.figure(figsize=(8,8))

cmap = cm.tab20(np.linspace(0, 2, 20))
hierarchy.set_link_color_palette([mpl.colors.rgb2hex(rgb[:3]) for rgb in cmap])
# 클러스터링 
X = df_samplewise.values # 값만으로 이루어진 메트릭스 
mm = preprocessing.MinMaxScaler()
X = mm.fit_transform(X.T).T
y = df_samplewise.index
df_samplewise_scaled = pd.DataFrame(X,index=df_samplewise.index,columns=df_samplewise.columns)

ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])
Z = linkage(X, 'average')
#dn = dendrogram(Z)
dn = dendrogram(Z,orientation='left',color_threshold=0.8,ax=ax1) # 트리의 해상력을 높일때 threshold를 바꿔보면된다. 
ax1.set_xticks([])
ax1.set_yticks([])
zix = dn['leaves']

axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.6])
im = axmatrix.matshow(X[zix,:], aspect='auto', origin='lower', cmap="YlGnBu")
axmatrix.set_xticks([])
axmatrix.set_yticks([])
plt.show()

# 엑셀에 색깔 정보 넣어서 저장하기 

dicIndex2color = dict(zip(df_samplewise.index[dn['leaves']],dn['leaves_color_list']))
df_samplewise['color'] =  [dicIndex2color[x] for x in df_samplewise.index]
df_samplewise_scaled['color'] = [dicIndex2color[x] for x in df_samplewise.index]

from scipy.cluster.hierarchy import dendrogram, linkage
import  pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from matplotlib.pyplot import cm
from scipy.cluster import hierarchy
import matplotlib as mpl

cmap = cm.tab20(np.linspace(0, 2, 20))
hierarchy.set_link_color_palette([mpl.colors.rgb2hex(rgb[:3]) for rgb in cmap])
# 클러스터링 
X = df_samplewise.values # 값만으로 이루어진 메트릭스 
Z = linkage(X, 'ward')

fig = plt.figure(figsize=(25, 10))
dn = dendrogram(Z)
#dn = dendrogram(Z, orientation='left',color_threshold=10) # 트리의 해상력을 높일때 threshold를 바꿔보면된다. 
zix = dn['leaves']


# 계층 클러스터링 이후 색깔에 해당하는 요소들을 가져오기. 
q_color = 'c'
m = np.array(dn['color_list']) == q_color
t,b = int((min(np.array(dn['icoord'])[m][:,1])-5)/10),int((max(np.array(dn['icoord'])[m][:,2])-5)/10)
target_samples = df_samplewise.index[zix][t:b+1]


# 엑셀에 색깔 정보 넣어서 저장하기 
key = []
value = []
for c in set(Z['color_list']):
    if c == 'b':
        continue
    q_color = c
    m = np.array(Z['color_list']) == q_color
    t,b = int((min(np.array(Z['icoord'])[m][:,1])-5)/10),int((max(np.array(Z['icoord'])[m][:,2])-5)/10)
    target_samples = df_selected.index[zix][t:b+1]
    key += list(target_samples)
    value += [c]*len(target_samples)

dicIndex2color = dict(zip(key,value))
df_selected['color'] =  [dicID2color[x] for x in df_selected.index]
df_selected.to_excel('selected_genes_color.xlsx')

# color html creation 
# http://nbviewer.jupyter.org/gist/vals/150ec97a5b7db9c82ee9