KEGG pathway display with DiagrammeR - lmmx/devnotes GitHub Wiki
# install.packages('DiagrammeR')
# requires V8 package (in turn requires `apt-get install libv8-dev`)
# source("http://bioconductor.org/bioclite.r")
# biocLite("KEGGgraph")
# biocLite('Rgraphviz')
# library('DiagrammeR')
# library('KEGGgraph')
# library('Rgraphviz')
Download your KGML (.xml
) for the organism/pathway from KEGG, e.g. adenine pathway in S. cerevisiae:
KEGGgraph parses KEGG XML ("KGML") into graph models in R
# purine.kgml <- system.file("sce00230.xml", package="KEGGgraph")
# ^ did not work
purine.kgml <- "sce00230.xml"
purine.graph <- parseKGML2DataFrame(purine.kgml, genesOnly = FALSE)
# purine.graph is Formal class graphNEL
DiagrammeR
works with DOT language graph specifications. graphViz
can create these from this data frame (actually I think it's a Diagrammer
function) :
nodes_edges <-
graphviz_single_df(
df = purine.graph,
edge_between = c("from -> to"),
node_attr = c("from:
shape = circle,
style = filled,
height = 2,
layer = 'all',
fontname = Helvetica,
fontsize = 42,
fillcolor = lightblue",
"to:
shape = circle,
style = filled,
height = 1,
layer = 'all',
fontname = Helvetica,
fontsize = 0,
fillcolor = seagreen3"),
edge_attr = "1:
color = #ff000040,
arrowhead = dot
"
)
# this is for the radial plot (stolen from flights example)
grViz("
digraph adenine {
# Graph statements
graph [layout = twopi,
overlap = false,
fixedsize = true,
ranksep = 11,
outputorder = edgesfirst]
# Nodes and edges
@@1
}
[1]: nodes_edges
")
Not working: KEGG IDs being displayed, potentially multiple names per KEGG ID so just explicitly write out :disappointed:
...the following will crash all your open Chrome windows...
purine_graph <- "
digraph purine_graph {
# enzyme 'node' statements
node [shape = box,
fontname = Helvetica]
ADE4; ADE5; ADE8; ADE6; ADE7; ADE2; ADE1; etc
# gene 'node' statements
node [shape = circle,
fixedsize = true,
width = 0.9] // sets as circles
P-ribosyl-PP; P-ribosylamine; P-ribosylglycinamide; P-ribosylformyl glycinamide; P-ribosylformyl glycinamidine; P-ribosylamino imidazole (AIR); P-ribosylamino imidazolecarboxylate (CAIR); P-ribosylsuccino carboxamide aminoimidazole (SAICAIR); adenine
# several 'edge' statements
P-ribosyl-PP->ADE4
ADE4->P-ribosylamine
P-ribosylamine->ADE5
ADE5->P-ribosylglycinamide
P-ribosylglycinamide->ADE8
ADE8->P-ribosylformyl glycinamide
P-ribosylformyl glycinamide->ADE6
ADE6->P-ribosylformyl glycinamidine
P-ribosylformyl glycinamide->ADE7
ADE7->P-ribosylamino imidazole (AIR)
P-ribosylamino imidazole (AIR)->ADE2
ADE2->P-ribosylamino imidazolecarboxylate (CAIR)
P-ribosylamino imidazolecarboxylate (CAIR)->ADE1
ADE1->P-ribosylsuccino carboxamide aminoimidazole (SAICAIR)
P-ribosylsuccino carboxamide aminoimidazole (SAICAIR)->etc
etc->adenine
# a 'graph' statement
graph [overlap = true, fontsize = 10]
}
"
grViz(purine_graph)
Don't use node labels with non-alphanumeric character (yes this hugely limits the usefulness)
purine_graph <- "
digraph purine_graph {
# enzyme 'node' statements
node [shape = box,
fontname = Helvetica]
ADE4; ADE5; ADE8; ADE6; ADE7; ADE2; ADE1; etc
# gene 'node' statements
node [shape = circle,
fixedsize = true,
width = 0.9] // sets as circles
P_ribosyl_PP; P_ribosylamine; P_ribosylglycinamide; P_ribosylformyl_glycinamide; P_ribosylformyl_glycinamidine; AIR; CAIR; SAICAIR; adenine
# several 'edge' statements
P_ribosyl-PP->ADE4
ADE4->P_ribosylamine
P_ribosylamine->ADE5
ADE5->P_ribosylglycinamide
P_ribosylglycinamide->ADE8
ADE8->P_ribosylformyl_glycinamide
P_ribosylformyl_glycinamide->ADE6
ADE6->P_ribosylformyl_glycinamidine
P_ribosylformyl_glycinamide->ADE7
ADE7->AIR
AIR->ADE2
ADE2->CAIR
CAIR->ADE1
ADE1->SAICAIR
SAICAIR->etc
etc->adenine
# a 'graph' statement
graph [overlap = true, fontsize = 10]
}
"
grViz(purine_graph)