R Reading Data - gizotso/R GitHub Wiki

Read()

read.table(file                       # filename "data.csv" or string variable
         , header = FALSE             # TRUE | FALSE
         , sep = ""                   # field delimiter : ",", ";", "\t", ...
         , quote = "\"’"              # set of quoting chars
         , dec = "."                  # decimal point symbol
         , row.names                  # vector with row names or column's  number or name which contains the row name
         , col.names                  # vector with col names (default: v1, ,2, ...)
         , as.is = FALSE              # convert character variables to factors (vector TRUE | FALSE | vector with col numbers or names)
         , na.strings = "NA"          # to be treated as missing values (NA)
         , colClasses = NA            # vector of classes to be assumed for the columns
         , nrows = -1                 # max nb rows  to read
         , skip = 0                   # nb rows to skip
         , check.names = TRUE         # check variable names are valid
         , fill = TRUE                # TRUE|FALSE. If TRUE when rows have unequal length, blank fields are implicitly added.
         , strip.white = FALSE        # used if sep is specified. TRUE > stripping of leading and trailing white space from unquoted character fields.
         , blank.lines.skip = TRUE   # ignore blank lines
         , comment.char = "#"
         )

Note : read.fwf function is similar and used to read fixed width file mydata <- read.fwf("data.txt", widths=c(1, 4, 3))

read.csv(    file, header = TRUE, sep = ",",  quote="\"", dec=".", fill = TRUE, ...)
read.csv2(   file, header = TRUE, sep = ";",  quote="\"", dec=",", fill = TRUE, ...)
read.delim(  file, header = TRUE, sep = "\t", quote="\"", dec=".", fill = TRUE, ...)
read.delim2( file, header = TRUE, sep = "\t", quote="\"", dec=",", fill = TRUE, ...)

Options

  • sep: field delimiter (",", ";", \t)

Read.table()

example with fitness data fitness data

setwd("T:\\SW Dev\\R project")
fitness = read.table('T:\\SW Dev\\R project\\Fitness_data.csv', header = TRUE, sep = ",", dec = ".")
# or
fitness = read.table('Fitness_data.csv', header = TRUE, sep = ",", dec = ".")
# or
fitness = read.csv('Fitness_data.csv')
> fitness$Age
> fitness[,1]
 [1] 44 40 44 42 38 47 40 43 44 38 44 45 45 47 54 49 51 51 48 49 57 54 52 50 51 54
[27] 51 57 49 48 52

> fitness["Age"]
   Age
1   44
2   40
3   44
4   42
5   38

Scan, Cat

scan(file = "", what = double(0), nmax = -1, n = -1, sep = "",
     quote = if (sep=="\n") "" else "'\"", dec = ".",
     skip = 0, nlines = 0, na.strings = "NA",
     flush = FALSE, fill = FALSE, strip.white = FALSE, quiet = FALSE,
     blank.lines.skip = TRUE, multi.line = TRUE, comment.char = "",
     allowEscapes = TRUE
    )
f = scan('Fitness_data.csv', skip=1, sep = ",")
# generate dummy file
cat("TITLE extra line", "2 3 5 7", "11 13 17", file = "ex.data", sep = "\n")

pp <- scan("ex.data", skip = 1, quiet = TRUE, what = 0)
> pp
[1]  2  3  5  7 11 13 17
unlink("ex.data") # tidy up (remove file)

Inline usage: x = scan(text = "1 2 3")

Read 3 items
> x
[1] 1 2 3

keyboard input (Esc to finsih): x = scan()

Readlines

## get the first names of the members of R-core
a <- readLines(file.path(R.home("doc"), "AUTHORS"))[-(1:8)]
a <- a[(0:2)-length(a)]
(a <- sub(" .*","", a))

## keyboard input
size <- readline("How many digits do you want to display? ")

Database

  • DBI pacakge : R Database Interface A database interface (DBI) definition for communication between R and relational database management systems. All classes in this package are virtual and need to be extended by the various R/DBMS implementations.
  • RMySQL querying databases and filling a data frame with the results of your query : querying databases and filling a data frame with the results of your query
  • R Oracle querying databases and filling a data frame with the results of your query OCI based Oracle database interface for R

R Oracle