Save and Load R Data

Download Report

Transcript Save and Load R Data

R Data Import/Export
Dr. Jieh-Shan George YEH
[email protected]
Save and Load R Data
• Data in R can be saved as .Rdata files with
function save().
getwd()
setwd("c:\\temp")
a <- 1:10
save(a, file="dumData.Rdata")
rm(a)
load("dumData.Rdata")
print(a)
Scan() - Read data into a vector or list
from the console or file
cat("2 3 5 7", "11 13 17 19", file="ex1.data", sep="\n")
scan(file="ex1.data", what=list(x=0, y="", z=0), flush=TRUE)
cat("TITLE extra line", "2 3 5 7", "11 13 17", file = "ex2.data", sep
= "\n")
pp <- scan("ex2.data", skip = 1, quiet = TRUE)
scan("ex2.data", skip = 1)
scan("ex2.data", skip = 1, nlines = 1) # only 1 line after the
skipped one
pp2<-scan("ex2.data", what = list("","","")) # flush is F -> read "7"
pp3<-scan("ex2.data", what = list("","",""), flush = TRUE)
unlink("ex2.data") # unlink deletes the file
Import from and Export to .CSV Files
• Create a dataframe df1 and save it as a .CSV le with
write.csv().
• The dataframe is loaded from file to df2 with read.csv()
var1 <- 1:5
var2 <- (1:5) / 10
var3 <- c("R", "and", "Data Mining", "Examples", "Case
Studies")
df1 <- data.frame(var1, var2, var3)
names(df1) <- c("VariableInt", "VariableReal", "VariableChar")
write.csv(df1, "dummmyData.csv", row.names = FALSE)
df2 <- read.csv("dummmyData.csv")
print(df2)
read.table() - Reads a file in table format
and creates a data frame from it
Usage:
read.table(file, header = FALSE, sep = "",
row.names, col.names, nrows = -1, skip = 0)
Example:
• HousePrice <- read.table("houses.data")
• HousePrice <- read.table("houses.data",
header=TRUE)
PACKAGE ‘XLSX’
Package ‘xlsx’
• http://cran.r-project.org/web/packages/xlsx/xlsx.pdf
install.packages("xlsx")
require(xlsx)
# example of reading xlsx sheets
file <- system.file("tests", "test_import.xlsx", package = "xlsx")
res <- read.xlsx(file, 2) # read the second sheet
# example of writing xlsx sheets
file <- paste(tempfile(), "xlsx", sep=".")
write.xlsx(USArrests, file=file) #This data set contains statistics, in arrests per 100,000 residents
for assault, murder, and rape in each of the 50 US states in 1973. Also given is the percent of the population
living in urban areas.
res <- read.xlsx("mydata.xlsx", 1, encoding="utf-8") # read the sheet1
Output to connections
zz <- file("ex.data", "w") # open an output file
connection
cat("TITLE extra line", "2 3 5 7", "", "11 13 17",
file = zz, sep = "\n")
cat("One more line\n", file = zz)
close(zz)
Output to connections
## capture R output: use examples from help(lm)
zz <- textConnection("ex.lm.out", "w")
sink(zz)
example(lm, prompt.prefix = "> ")
sink()
close(zz)
## now ‘ex.lm.out’ contains the output for futher
processing.
## Look at it by, e.g.,
cat(ex.lm.out, sep = "\n")
Input from connections
## read in file created in last examples
readLines("ex.data")
unlink("ex.data")
## read listing of current directory (Unix)
readLines(pipe("ls -1"))
## read listing of current directory (windows)
readLines(pipe(“dir"))
PACKAGE ‘XML’
Parsing XML
• library(XML)
• u<- "http://www.w3schools.com/xml/cd_catalog.xml"
• xml_data <- xmlToList(u) # Convert an XML node/document to a
more R-like list
• xml_data
• class(xml_data)
• xml_data[["CD"]][["TITLE"]]
• library(plyr)
• df<-ldply(xml_data, data.frame) # Split list to data frame
Parsing HTML table
• install.packages("XML")
• library(XML)
• theURL<"http://www.jaredlander.com/2012/02/another-kindof-super-bowl-pool/"
• bowPool<- readHTMLTable(theURL, which=1,
header=FALSE, stringAsFactors=FALSE)
• bowPool
PACKAGE ‘JSONLITE’
Parsing JSON
• library(jsonlite)
• jsoncars <- toJSON(mtcars) #Convert R objects to JSON
• mtcars2 <- fromJSON(jsoncars) #Convert R objects from
JSON
• All.equal(mtcars, mtcars2)
• Reference: https://cran.rproject.org/web/packages/jsonlite/vignettes/jsonaaquickstart.html
BUILT IN DATASETS
Accessing built in datasets
• Around 100 datasets are supplied with R (in
package datasets)
data()
data(infert)
• To access data from a particular package, use
the package argument
data(package="rpart")
data(Puromycin, package="datasets")
Editing data
• This is useful for making small changes once a
data set has been read. The command
data(car90, package="rpart")
xnew <- edit(car90)
• If you want to alter the original dataset xold, the
simplest way is to use fix(xold),
• which is equivalent to xold <- edit(xold).
• to enter new data via the spreadsheet interface.
xnew <- edit(data.frame())
OPEN DATA ONLINE
• undata, http://data.un.org/
• Data.gov, https://www.data.gov/
• European Union Open Data Portal,
https://open-data.europa.eu/