gg plot2 - littleclouds/R-for-everyone GitHub Wiki

plots in ggplot2 consist of 3 component

data

geometry : type of plot

Asthetic mapping: variables mapped to visual cues

scale,labels,theme

library(tidyverse)

library(dslabs)

data(murders)

to associate the ggplot to the dataset

ggplot(data = murders)

will display a grey screen

murders %>% ggplot()

to save ggplot into a variable

p<- ggplot(data = murders)

class(p)

print(p)

##layers in ggplot graphs are created by adding layer by layer

data %>% ggplot() + layer1+.....layerN

murders %>% ggplot() + geom_point(aes(x = population/10^6 , y=total))

add layer to predefined ggplot object

things written inside aes() are used in mapping purpose

p <- ggplot(data = murders)

p+geom_point(aes(x = population/10^6 , y=total))

add text layer to scatter plot

p+geom_point(aes(x = population/10^6 , y=total))+

geom_text(aes(population/10^6 ,total,label = abb))

tinkering

to change the size of the points

p+geom_point(aes(x = population/10^6 , y=total, size = 3))+

geom_text(aes(population/10^6 ,total,label = abb))

to nudge the labels

p+geom_point(aes(x = population/10^6 , y=total, size = 2))+

geom_text(aes(population/10^6 ,total,label = abb),nudge_x = 1)

simplify the code by adding global aesthetic

p <- murders %>% ggplot(aes(population/10^6,total,label = abb))

p + geom_point(size = 3) +

geom_text(nudge_x = 1.5)

local aesthetic can overwrite global

p + geom_point(size = 3) +

geom_text(aes(x = 10,y=800,label = 'hello'),nudge_x = 1.5)

define p

library(tidyverse)

library(dslabs)

data("murders")

p <- murders %>% ggplot(aes(population/10^6 , total,label = abb))

log base 10 scale the x axis and y axis

p + geom_point(size = 3)+

geom_text(nudge_x = 0.05)+

scale_x_continuous(trans = 'log10')+

scale_y_continuous(trans = 'log10')

efficient log scalling of the axes

p + geom_point(size = 3)+

geom_text(nudge_x = 0.05)+

scale_x_log10()+

scale_y_log10()

to add labels and title

p + geom_point(size = 3)+

geom_text(nudge_x = 0.05)+

scale_x_log10()+

scale_y_log10()+

xlab('population in mllion(log scale)')+

ylab('total number of murder(log scale)')+

ggtitle('US murder in 2010')

change color of the points

1.to make all points blue

p + geom_point(size = 3)+

geom_text(nudge_x = 0.05)+

scale_x_log10()+

scale_y_log10()+

xlab('population in mllion(log scale)')+

ylab('total number of murder(log scale)')+

ggtitle('US murder in 2010')+

geom_point(size = 3,colour = 'blue')

  1. to make colour points by region

p + geom_point(size = 3)+

geom_text(nudge_x = 0.05)+

scale_x_log10()+

scale_y_log10()+

xlab('population in mllion(log scale)')+

ylab('total number of murder(log scale)')+

ggtitle('US murder in 2010')+

geom_point(aes(col = region), size = 3)

to do efficiently redefine p

p <- murders %>% ggplot(aes(population/10^6 , total, label = abb))+

geom_text(nudge_x = 0.05)+

scale_x_log10()+

scale_y_log10()+

xlab('population in mllion(log scale)')+

ylab('total number of murder(log scale)')+

ggtitle('US murder in 2010')

p+ geom_point(aes(col = region), size = 2)

add the avg line in the scatter plot

define the average

r <- murders %>%

summarize(rate = sum(total) / sum(population) * 10^6) %>%

pull(rate)

p + geom_point(aes(col = region), size = 3)+

geom_abline(intercept = log10(r))

slope has default value of 1

costmizing line to dashed and grey,line etc

p + geom_point(aes(col = region), size = 3)+

geom_abline(intercept = log10(r), lty = 2, color = 'black')

change the legend

p + geom_point(aes(col = region), size = 3)+

geom_abline(intercept = log10(r), lty = 2, color = 'black')+

scale_color_discrete(name = 'Regions')

add ons

themes

library(dslabs)

ds_theme_set() default theme in dslab

themes from ggthemes

install.packages('ggthemes')

library(ggthemes)

p + geom_point(aes(col = region), size = 3)+

geom_abline(intercept = log10(r), lty = 2, color = 'black')+

scale_color_discrete(name = 'Regions')+

theme_economist()

p + geom_point(aes(col = region), size = 3)+

geom_abline(intercept = log10(r), lty = 2, color = 'black')+

scale_color_discrete(name = 'Regions')+

theme_fivethirtyeight()

ggrepel to stop labels from overlapping

install.packages("ggrepel")

include it in place of nudge

making a complete plot

library(tidyverse)

library(ggrepel)

library(ggthemes)

library(dslabs)

data(murders)

r <- murders %>%

summarize(rate = sum(total) / sum(population) * 10^6) %>%

pull(rate)

murders %>% ggplot(aes(population/10^6 , total, label = abb))+

geom_text(nudge_x = 0.05)+

scale_x_log10()+

scale_y_log10()+

xlab('population in mllion(log scale)')+

ylab('total number of murder(log scale)')+

ggtitle('US murder in 2010')+

geom_point(aes(col = region), size = 3)+

geom_abline(intercept = log10(r), lty = 2, color = 'black')+

theme_fivethirtyeight()

geom_histogram

library(tidyverse)

library(dslabs)

data(heights)

p<- heights %>%

filter(sex == 'Male') %>%

ggplot(aes(x = height,col = sex))

p + geom_histogram()

p + geom_histogram(binwidth = 1)

colour

p + geom_histogram(binwidth = 1,fill = 'blue',col = 'black')+

xlab('Male heights in inches')+

ggtitle('histogram')

smooth density plots

p + geom_density()

p + geom_density(fill = 'blue')

for both the gender

p<- heights %>%

ggplot(aes(x = height,col = sex))

p + geom_histogram()

p + geom_histogram(binwidth = 1)

p + geom_density()

p + geom_density(data = heights)

basic QQ-plot

p<- heights %>%

filter(sex == 'Male') %>%

ggplot(aes(sample = height))

p + geom_qq()

mean=0 sd =1

qq-plot against a normal distribution with same mean/sd

params <- heights %>%

filter(sex=='Male')%>%

summarize(mean = mean(height), sd = sd(height))

p + geom_qq(dparams = params)

qq-plot against standard normal distribution

heights %>%

ggplot(aes(sample = scale(height)))+

geom_qq()+

geom_abline()

two add more graphs on a page

ds_theme_set()

library(ggthemes)

p0<- heights %>%

filter(sex == 'Male') %>%

ggplot(aes(x = height))

p1<- heights %>%

filter(sex == 'Female') %>%

ggplot(aes(x = height))

g1 <- p0 + geom_histogram(binwidth = 1,fill = 'Blue')+

xlab('Male heights in inches')+

ggtitle('histogram')+

theme_foundation()

g2 <- p1 + geom_histogram(binwidth = 2,fill = 'Red')+

xlab('Female heights in inches')+

ggtitle('histogram')+

theme_foundation()

g3 <- p0 + geom_density(fill = 'Blue')+

xlab('Male heights in inches')+

ggtitle('densityplot')+

theme_foundation()

g4 <- p1 + geom_density(fill = 'Red')+

xlab('Female heights in inches')+

ggtitle('density')+

theme_foundation()

install.packages('gridExtra')

library(gridExtra)

grid.arrange(g1,g2,g3,g4,ncol = 4)

grid.arrange(g1,g2,g3,g4,ncol = 2,nrow = 2)