first script in R

install.packages('dslabs')

install new packages

library(dslabs)

call new packages

library(tidyverse)

##access the data set data("murders") data()

general example of plotting in R

murders %>% ggplot(aes(population,total,label=abb,color=region)) + geom_label()

#Assigning the variables

a <- 2 b <- -1 c <- -4 print(a)

solving quadratic equation

solution1 <- (-b + sqrt(b^2 - 4*a*c))/(2*a) solution2 <- (-b - sqrt(b^2 - 4*a*c))/(2*a)

print value

print(solution1) print(solution2)

if else are basic conditional checks

a <- 1 if(a != 0){ print(1/a) }else{ print('no reciprocal for 0.') }

library(dslabs) data("murders") murder_rate <- murders$total/murders$population*100000 ind <- which.min(murder_rate) if(murder_rate[ind] < 0.5){ print(murders$state[ind]) }else{ print('no state') }

if(murder_rate[ind] < 0.25){ print(murders$state[ind]) }else{ print('no state') }

ifelse take(condition,expression for truth and expression for false)

a <- c(0,1,2,-4,5) result <- ifelse(a>0 , 1/a , NA) print(result)

is.na() tells about NA in that column

data("na_example") sum(is.na(na_example)) no_nas <- ifelse(is.na(na_example),0,na_example) sum(is.na(no_nas))

any is use to check if there is any true in variable

z <- c(TRUE,FALSE,TRUE) any(z)

all is use to check if all the entries are false

all(z)

creating your own function

my_function <- function(VARIABLE_NAME){ PERFORM OPERATION IN VARIABLE_NAME VALUE }

avg <- function(x){ s <- sum(x) l <- length(x) s/l } x <- 1:100 avg(x)

identical(mean(x),avg(x))

avg <- function(x,arithmetic = TRUE){ n <- length(x) ifelse(arithmetic , sum(x)/n , prod(x)^(1/n)) }

creates vectors

code <- c(380,124,818) cod <- c('italy','spain','germany') codes <- c(italy=380,spain=124,germany=818)

to merge to vectors

names(code) <- cod print(code)

to get the desried value

codes[2]

to get selected data

code[c(1,3)]

to get the range of data

code[1:3]

to get the data under the vector

code['germany'] codes[c('spain','germany')]

coercion is property to self convert or change the datatype of object

x <-c(1,'canada',3)

all are converted into string

print(x) x[1] x[1:3]

#to convert character into string and vice versa as.character(code) as.numeric(code)

vector arithmetic

a <- c(1,2,3,4,5,6) print(a*2) print(a+2) print(a-2) b <- c(7,8,9,6,4,5) print(a+b) print(a*b)

murders$state[which.max(murders$population)] murders$state[which.max(murders$total)] max(murders$population)

murder_rate = murders$total/murders$population*100000 print(murder_rate) max(murder_rate) murders$state[order(murder_rate)] max(murder_rate) sort(murder_rate) order(murder_rate) rank(murder_rate)

data wrangling

x <- c(2,43,27,96,18) sort(x) order(x) rank(x) min(x) which.min(x) max(x) which.max(x)

name <- c('m','a','n','o') distance <- c(0.8,3.1,2.8,4.0) time <- c(10,30,40,50) ti <- time/60

speed <- distance/ti print(speed)

library(dplyr)

mutate

murder <- mutate(murders,rate=total/population*100000) print(murder)

filter

filter(murder,rate <= 0.71)

select

new <- select(murder,state,region,rate) print(new)

pipe line operation

%>%

murder %>% select(state,region,rate) %>% filter(rate <= 0.71)

library(dslabs) data("murders")

function

to see the data type of the variable

class(murders)

to see the structure of data frame

str(murders)

to show the first 6 lines in the dataframe

head(murders)

to see deisred no. of values

head(murders,n=50)

$ is an accessor operator used to access the data under the column in the dataframe

the output is called as vectors

murders$population

length shows length of the column of the dataframe

pop <- murders$population length(pop)

class(pop) class(murders$state)

z <- 3 == 2 class(z) class(murders$region)

a <- murders$population

to call various column in a data frame

b <- murders[['population']]

to check if to varible are of same class

identical(a,b)

no. of levels in a factor data type column

nlevels(movielens$genres)

counts the repeatation of element in a vector or variable

x <- c('a','a','b','b','c') table(x)

ls()

shows variables assigned

log(8) exp(1) log(2.718282) log(exp(1))

nested function

args(log)

to find no. of arguments a function takes

log(8,base=2) log(x=8,base=2) data()

to see all the data set

help('seq')

to get help related to the function

log2(8) log(1024,base = 4)

#plotting library(dplyr) library(dslabs)

mutate

(murders,rate = total/population*100000,rank=rank(-rate)) %>% select(state,rate,rank)

data(murders) mystate <- murders %>% mutate(rate=total/population*100000,rank=rank(-rate)) %>% filter(states %in% c('Northeast','West') & rate > 1) %>% select(state,rate,rank) print(mystate)

x <- murders$population y <- murders$total plot(x,y)

murders <- mutate(murders,rate=total/population*100000,rank=rank(-rate)) print(murders) hist(murders$total) hist(murders$rate) boxplot(rate ~ region,data=murders)

to arrange the data in increasing order

library(dslabs) data("murders") death <- murders$total x <- c(21,5,4,6,89,12,21) sort(murders$total) sort(x)

to get the index oof sorted result

inex <- order(death) death[inex] order(death) index <- order(x) x[index] order(x)

shows which state has maximum killing

murders$state murders$abb index <- order(murders$total) murders$abb[index]

max shows the max value in that coulmn

max(murders$total)

which max shows the cindex at max is present

i_max <- which.max(murders$total)

shows state at the max index

murders$state[i_max]

to get the index in which data will be stored

rank(x) x <- c(88,100,83,92,94)

high to low

rank(x)

low to high

rank(-x)

#For loop compute_sum <- function(n){ x <- 1:n sum(x) }

simple for loop

for(i in 1:5){ print(i) } print(i) #i will store only the final value after loop

m<-100 s_n <- vector(length = m) #creating empty vector for(n in 1:m){ s_n[n] <- compute_sum(n) } print(s_n) #to hold all values

n<- 1:m plot(n,s_n) lines(n,n*(n+1)/2) for (i in 1:15) { if(i>10){ print(i) }else{ print('hello') } }

Basic Of R - littleclouds/R-for-everyone GitHub Wiki

first script in R

install new packages

call new packages

general example of plotting in R

solving quadratic equation

print value

if else are basic conditional checks

ifelse take(condition,expression for truth and expression for false)

is.na() tells about NA in that column

any is use to check if there is any true in variable

all is use to check if all the entries are false

creating your own function

creates vectors

to merge to vectors

to get the desried value

to get selected data

to get the range of data

to get the data under the vector

coercion is property to self convert or change the datatype of object

all are converted into string

vector arithmetic

data wrangling

mutate

filter

select

pipe line operation

%>%

function

to see the data type of the variable

to see the structure of data frame

to show the first 6 lines in the dataframe

to see deisred no. of values

$ is an accessor operator used to access the data under the column in the dataframe

the output is called as vectors

length shows length of the column of the dataframe

to call various column in a data frame

to check if to varible are of same class

no. of levels in a factor data type column

counts the repeatation of element in a vector or variable

shows variables assigned

nested function

to find no. of arguments a function takes

to see all the data set

to get help related to the function

mutate

to arrange the data in increasing order

to get the index oof sorted result

shows which state has maximum killing

max shows the max value in that coulmn

which max shows the cindex at max is present

shows state at the max index

to get the index in which data will be stored

high to low

low to high

simple for loop

⚠️ **GitHub.com Fallback** ⚠️

⚠️ GitHub.com Fallback ⚠️