KEGG pathway display with DiagrammeR - lmmx/devnotes GitHub Wiki

# install.packages('DiagrammeR')
# requires V8 package (in turn requires `apt-get install libv8-dev`)

# source("http://bioconductor.org/bioclite.r")
# biocLite("KEGGgraph")
# biocLite('Rgraphviz')

# library('DiagrammeR')
# library('KEGGgraph')
# library('Rgraphviz')

Download your KGML (.xml) for the organism/pathway from KEGG, e.g. adenine pathway in S. cerevisiae:

KEGGgraph parses KEGG XML ("KGML") into graph models in R

# purine.kgml <- system.file("sce00230.xml", package="KEGGgraph")
# ^ did not work

purine.kgml <- "sce00230.xml"
purine.graph <- parseKGML2DataFrame(purine.kgml, genesOnly = FALSE)
# purine.graph is Formal class graphNEL

DiagrammeR works with DOT language graph specifications. graphViz can create these from this data frame (actually I think it's a Diagrammer function) :

nodes_edges <-
    graphviz_single_df(
        df = purine.graph,
        edge_between = c("from -> to"),
        node_attr = c("from:
                   shape = circle,
                   style = filled,
                   height = 2,
                   layer = 'all',
                   fontname = Helvetica,
                   fontsize = 42,
                   fillcolor = lightblue",
                      "to: 
                   shape = circle,
                   style = filled,
                   height = 1,
                   layer = 'all',
                   fontname = Helvetica,
                   fontsize = 0,
                   fillcolor = seagreen3"),
        edge_attr = "1:
                   color = #ff000040,
                   arrowhead = dot
                  "
    )

# this is for the radial plot (stolen from flights example)

grViz("
digraph adenine {

  # Graph statements
  graph [layout = twopi,
         overlap = false,
         fixedsize = true,
         ranksep = 11,
         outputorder = edgesfirst]

  # Nodes and edges
  @@1

}
[1]: nodes_edges
")

Not working: KEGG IDs being displayed, potentially multiple names per KEGG ID so just explicitly write out :disappointed:

...the following will crash all your open Chrome windows...

purine_graph <- "
digraph purine_graph {
    # enzyme 'node' statements
    node [shape = box,
          fontname = Helvetica]
    ADE4; ADE5; ADE8; ADE6; ADE7; ADE2; ADE1; etc
    # gene 'node' statements
    node [shape = circle,
          fixedsize = true,
          width = 0.9] // sets as circles
    P-ribosyl-PP; P-ribosylamine; P-ribosylglycinamide; P-ribosylformyl glycinamide; P-ribosylformyl glycinamidine; P-ribosylamino imidazole (AIR); P-ribosylamino imidazolecarboxylate (CAIR); P-ribosylsuccino carboxamide aminoimidazole (SAICAIR); adenine

    # several 'edge' statements
    P-ribosyl-PP->ADE4
    ADE4->P-ribosylamine
    P-ribosylamine->ADE5
    ADE5->P-ribosylglycinamide
    P-ribosylglycinamide->ADE8
    ADE8->P-ribosylformyl glycinamide
    P-ribosylformyl glycinamide->ADE6
    ADE6->P-ribosylformyl glycinamidine
    P-ribosylformyl glycinamide->ADE7
    ADE7->P-ribosylamino imidazole (AIR)
    P-ribosylamino imidazole (AIR)->ADE2
    ADE2->P-ribosylamino imidazolecarboxylate (CAIR)
    P-ribosylamino imidazolecarboxylate (CAIR)->ADE1
    ADE1->P-ribosylsuccino carboxamide aminoimidazole (SAICAIR)
    P-ribosylsuccino carboxamide aminoimidazole (SAICAIR)->etc
    etc->adenine
    
    # a 'graph' statement
    graph [overlap = true, fontsize = 10]
}
"
grViz(purine_graph)

Don't use node labels with non-alphanumeric character (yes this hugely limits the usefulness)

purine_graph <- "
digraph purine_graph {
    # enzyme 'node' statements
node [shape = box,
fontname = Helvetica]
ADE4; ADE5; ADE8; ADE6; ADE7; ADE2; ADE1; etc
# gene 'node' statements
node [shape = circle,
fixedsize = true,
width = 0.9] // sets as circles
P_ribosyl_PP; P_ribosylamine; P_ribosylglycinamide; P_ribosylformyl_glycinamide; P_ribosylformyl_glycinamidine; AIR; CAIR; SAICAIR; adenine

# several 'edge' statements
P_ribosyl-PP->ADE4
ADE4->P_ribosylamine
P_ribosylamine->ADE5
ADE5->P_ribosylglycinamide
P_ribosylglycinamide->ADE8
ADE8->P_ribosylformyl_glycinamide
P_ribosylformyl_glycinamide->ADE6
ADE6->P_ribosylformyl_glycinamidine
P_ribosylformyl_glycinamide->ADE7
ADE7->AIR
AIR->ADE2
ADE2->CAIR
CAIR->ADE1
ADE1->SAICAIR
SAICAIR->etc
etc->adenine

# a 'graph' statement
graph [overlap = true, fontsize = 10]
}
"
grViz(purine_graph)