Basic Of R - littleclouds/R-for-everyone GitHub Wiki

first script in R

install.packages('dslabs')

install new packages

library(dslabs)

call new packages

library(tidyverse)

##access the data set data("murders") data()

general example of plotting in R

murders %>% ggplot(aes(population,total,label=abb,color=region)) + geom_label()

#Assigning the variables

a <- 2 b <- -1 c <- -4 print(a)

solving quadratic equation

solution1 <- (-b + sqrt(b^2 - 4*a*c))/(2*a) solution2 <- (-b - sqrt(b^2 - 4*a*c))/(2*a)

print value

print(solution1) print(solution2)

if else are basic conditional checks

a <- 1 if(a != 0){ print(1/a) }else{ print('no reciprocal for 0.') }

library(dslabs) data("murders") murder_rate <- murders$total/murders$population*100000 ind <- which.min(murder_rate) if(murder_rate[ind] < 0.5){ print(murders$state[ind]) }else{ print('no state') }

if(murder_rate[ind] < 0.25){ print(murders$state[ind]) }else{ print('no state') }

ifelse take(condition,expression for truth and expression for false)

a <- c(0,1,2,-4,5) result <- ifelse(a>0 , 1/a , NA) print(result)

is.na() tells about NA in that column

data("na_example") sum(is.na(na_example)) no_nas <- ifelse(is.na(na_example),0,na_example) sum(is.na(no_nas))

any is use to check if there is any true in variable

z <- c(TRUE,FALSE,TRUE) any(z)

all is use to check if all the entries are false

all(z)

creating your own function

my_function <- function(VARIABLE_NAME){ PERFORM OPERATION IN VARIABLE_NAME VALUE }

avg <- function(x){ s <- sum(x) l <- length(x) s/l } x <- 1:100 avg(x)

identical(mean(x),avg(x))

avg <- function(x,arithmetic = TRUE){ n <- length(x) ifelse(arithmetic , sum(x)/n , prod(x)^(1/n)) }

creates vectors

code <- c(380,124,818) cod <- c('italy','spain','germany') codes <- c(italy=380,spain=124,germany=818)

to merge to vectors

names(code) <- cod print(code)

to get the desried value

codes[2]

to get selected data

code[c(1,3)]

to get the range of data

code[1:3]

to get the data under the vector

code['germany'] codes[c('spain','germany')]

coercion is property to self convert or change the datatype of object

x <-c(1,'canada',3)

all are converted into string

print(x) x[1] x[1:3]

#to convert character into string and vice versa as.character(code) as.numeric(code)

vector arithmetic

a <- c(1,2,3,4,5,6) print(a*2) print(a+2) print(a-2) b <- c(7,8,9,6,4,5) print(a+b) print(a*b)

murders$state[which.max(murders$population)] murders$state[which.max(murders$total)] max(murders$population)

murder_rate = murders$total/murders$population*100000 print(murder_rate) max(murder_rate) murders$state[order(murder_rate)] max(murder_rate) sort(murder_rate) order(murder_rate) rank(murder_rate)

data wrangling

x <- c(2,43,27,96,18) sort(x) order(x) rank(x) min(x) which.min(x) max(x) which.max(x)

name <- c('m','a','n','o') distance <- c(0.8,3.1,2.8,4.0) time <- c(10,30,40,50) ti <- time/60

speed <- distance/ti print(speed)

library(dplyr)

mutate

murder <- mutate(murders,rate=total/population*100000) print(murder)

filter

filter(murder,rate <= 0.71)

select

new <- select(murder,state,region,rate) print(new)

pipe line operation

%>%

murder %>% select(state,region,rate) %>% filter(rate <= 0.71)

library(dslabs) data("murders")

function

to see the data type of the variable

class(murders)

to see the structure of data frame

str(murders)

to show the first 6 lines in the dataframe

head(murders)

to see deisred no. of values

head(murders,n=50)

$ is an accessor operator used to access the data under the column in the dataframe

the output is called as vectors

murders$population

length shows length of the column of the dataframe

pop <- murders$population length(pop)

class(pop) class(murders$state)

z <- 3 == 2 class(z) class(murders$region)

a <- murders$population

to call various column in a data frame

b <- murders[['population']]

to check if to varible are of same class

identical(a,b)

no. of levels in a factor data type column

nlevels(movielens$genres)

counts the repeatation of element in a vector or variable

x <- c('a','a','b','b','c') table(x)

ls()

shows variables assigned

log(8) exp(1) log(2.718282) log(exp(1))

nested function

args(log)

to find no. of arguments a function takes

log(8,base=2) log(x=8,base=2) data()

to see all the data set

help('seq')

to get help related to the function

log2(8) log(1024,base = 4)

#plotting library(dplyr) library(dslabs)

mutate

(murders,rate = total/population*100000,rank=rank(-rate)) %>% select(state,rate,rank)

data(murders) mystate <- murders %>% mutate(rate=total/population*100000,rank=rank(-rate)) %>% filter(states %in% c('Northeast','West') & rate > 1) %>% select(state,rate,rank) print(mystate)

x <- murders$population y <- murders$total plot(x,y)

murders <- mutate(murders,rate=total/population*100000,rank=rank(-rate)) print(murders) hist(murders$total) hist(murders$rate) boxplot(rate ~ region,data=murders)

to arrange the data in increasing order

library(dslabs) data("murders") death <- murders$total x <- c(21,5,4,6,89,12,21) sort(murders$total) sort(x)

to get the index oof sorted result

inex <- order(death) death[inex] order(death) index <- order(x) x[index] order(x)

shows which state has maximum killing

murders$state murders$abb index <- order(murders$total) murders$abb[index]

max shows the max value in that coulmn

max(murders$total)

which max shows the cindex at max is present

i_max <- which.max(murders$total)

shows state at the max index

murders$state[i_max]

to get the index in which data will be stored

rank(x) x <- c(88,100,83,92,94)

high to low

rank(x)

low to high

rank(-x)

#For loop compute_sum <- function(n){ x <- 1:n sum(x) }

simple for loop

for(i in 1:5){ print(i) } print(i) #i will store only the final value after loop

m<-100 s_n <- vector(length = m) #creating empty vector for(n in 1:m){ s_n[n] <- compute_sum(n) } print(s_n) #to hold all values

n<- 1:m plot(n,s_n) lines(n,n*(n+1)/2) for (i in 1:15) { if(i>10){ print(i) }else{ print('hello') } }

⚠️ **GitHub.com Fallback** ⚠️