R Reading Data - gizotso/R GitHub Wiki
Read()
read.table(file # filename "data.csv" or string variable
, header = FALSE # TRUE | FALSE
, sep = "" # field delimiter : ",", ";", "\t", ...
, quote = "\"’" # set of quoting chars
, dec = "." # decimal point symbol
, row.names # vector with row names or column's number or name which contains the row name
, col.names # vector with col names (default: v1, ,2, ...)
, as.is = FALSE # convert character variables to factors (vector TRUE | FALSE | vector with col numbers or names)
, na.strings = "NA" # to be treated as missing values (NA)
, colClasses = NA # vector of classes to be assumed for the columns
, nrows = -1 # max nb rows to read
, skip = 0 # nb rows to skip
, check.names = TRUE # check variable names are valid
, fill = TRUE # TRUE|FALSE. If TRUE when rows have unequal length, blank fields are implicitly added.
, strip.white = FALSE # used if sep is specified. TRUE > stripping of leading and trailing white space from unquoted character fields.
, blank.lines.skip = TRUE # ignore blank lines
, comment.char = "#"
)
Note : read.fwf function is similar and used to read fixed width file
mydata <- read.fwf("data.txt", widths=c(1, 4, 3))
read.csv( file, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE, ...)
read.csv2( file, header = TRUE, sep = ";", quote="\"", dec=",", fill = TRUE, ...)
read.delim( file, header = TRUE, sep = "\t", quote="\"", dec=".", fill = TRUE, ...)
read.delim2( file, header = TRUE, sep = "\t", quote="\"", dec=",", fill = TRUE, ...)
Options
- sep: field delimiter (",", ";", \t)
Read.table()
example with fitness data
setwd("T:\\SW Dev\\R project")
fitness = read.table('T:\\SW Dev\\R project\\Fitness_data.csv', header = TRUE, sep = ",", dec = ".")
# or
fitness = read.table('Fitness_data.csv', header = TRUE, sep = ",", dec = ".")
# or
fitness = read.csv('Fitness_data.csv')
> fitness$Age
> fitness[,1]
[1] 44 40 44 42 38 47 40 43 44 38 44 45 45 47 54 49 51 51 48 49 57 54 52 50 51 54
[27] 51 57 49 48 52
> fitness["Age"]
Age
1 44
2 40
3 44
4 42
5 38
Scan, Cat
scan(file = "", what = double(0), nmax = -1, n = -1, sep = "",
quote = if (sep=="\n") "" else "'\"", dec = ".",
skip = 0, nlines = 0, na.strings = "NA",
flush = FALSE, fill = FALSE, strip.white = FALSE, quiet = FALSE,
blank.lines.skip = TRUE, multi.line = TRUE, comment.char = "",
allowEscapes = TRUE
)
f = scan('Fitness_data.csv', skip=1, sep = ",")
# generate dummy file
cat("TITLE extra line", "2 3 5 7", "11 13 17", file = "ex.data", sep = "\n")
pp <- scan("ex.data", skip = 1, quiet = TRUE, what = 0)
> pp
[1] 2 3 5 7 11 13 17
unlink("ex.data") # tidy up (remove file)
Inline usage: x = scan(text = "1 2 3")
Read 3 items
> x
[1] 1 2 3
keyboard input (Esc to finsih): x = scan()
Readlines
## get the first names of the members of R-core
a <- readLines(file.path(R.home("doc"), "AUTHORS"))[-(1:8)]
a <- a[(0:2)-length(a)]
(a <- sub(" .*","", a))
## keyboard input
size <- readline("How many digits do you want to display? ")
Database
- DBI pacakge : R Database Interface A database interface (DBI) definition for communication between R and relational database management systems. All classes in this package are virtual and need to be extended by the various R/DBMS implementations.
- RMySQL querying databases and filling a data frame with the results of your query : querying databases and filling a data frame with the results of your query
- R Oracle querying databases and filling a data frame with the results of your query OCI based Oracle database interface for R
