This is an R Markdown document for the course ‘Computing in R’. Using this document, you can easily run selected pieces of R code shown during the lectures from within RStudio.
2+7
## [1] 9
sqrt(2)
## [1] 1.414214
cos(pi)
## [1] -1
log10(10^3)
## [1] 3
x <- 2
print(x)
## [1] 2
x
## [1] 2
x^2
## [1] 4
x
## [1] 2
#y < - x
help(sqrt)
## starting httpd help server ... done
help.start()
## If nothing happens, you should open
## 'http://127.0.0.1:14020/doc/html/index.html' yourself
help(mean)
# ? is a shorthand for help()
?mean
x <- c(10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
x
## [1] 10 9 8 7 6 5 4 3 2 1
x <- seq(from = 10, to = 1, by = -1)
x
## [1] 10 9 8 7 6 5 4 3 2 1
x <- seq(10, 1)
x
## [1] 10 9 8 7 6 5 4 3 2 1
x <- 10:1
x
## [1] 10 9 8 7 6 5 4 3 2 1
x[5]
## [1] 6
x[10]
## [1] 1
x[5] + x[10]
## [1] 7
indx <- c(5,10)
indx
## [1] 5 10
x[indx]
## [1] 6 1
x[c(5,10)]
## [1] 6 1
c(-5, -10)
## [1] -5 -10
x[c(-5, -10)]
## [1] 10 9 8 7 5 4 3 2
x[4] <- 12
x
## [1] 10 9 8 12 6 5 4 3 2 1
mean(x)
## [1] 6
x + 1
## [1] 11 10 9 13 7 6 5 4 3 2
2*x
## [1] 20 18 16 24 12 10 8 6 4 2
help(matrix)
A <- matrix(data = 1:10, nrow = 2, ncol = 5)
A <- matrix(1:10, 2, 5)
A
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 3 5 7 9
## [2,] 2 4 6 8 10
A[2, 3]
## [1] 6
A[2,3] <- 12
A[1, ]
## [1] 1 3 5 7 9
A[1,1:5]
## [1] 1 3 5 7 9
A[, c(1, 5)]
## [,1] [,2]
## [1,] 1 9
## [2,] 2 10
A[1:2, c(1,5)]
## [,1] [,2]
## [1,] 1 9
## [2,] 2 10
dim(A[, c(1, 5)])
## [1] 2 2
#x[1,3]
x[c(1,3)]
## [1] 10 8
ls()
## [1] "A" "indx" "r" "x"
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.25 5.50 6.00 8.75 12.00
summary(A)
## V1 V2 V3 V4 V5
## Min. :1.00 Min. :3.00 Min. : 5.00 Min. :7.00 Min. : 9.00
## 1st Qu.:1.25 1st Qu.:3.25 1st Qu.: 6.75 1st Qu.:7.25 1st Qu.: 9.25
## Median :1.50 Median :3.50 Median : 8.50 Median :7.50 Median : 9.50
## Mean :1.50 Mean :3.50 Mean : 8.50 Mean :7.50 Mean : 9.50
## 3rd Qu.:1.75 3rd Qu.:3.75 3rd Qu.:10.25 3rd Qu.:7.75 3rd Qu.: 9.75
## Max. :2.00 Max. :4.00 Max. :12.00 Max. :8.00 Max. :10.00
a2 <- 1
#2a <- 1
c <- 1
c
## [1] 1
c(1,2)
## [1] 1 2
#else <- 1
c(1, 2, 3, 4)
## [1] 1 2 3 4
-2 < -2
## [1] FALSE
letters[1:3]
## [1] "a" "b" "c"
mode(x)
## [1] "numeric"
as.character(x)
## [1] "10" "9" "8" "12" "6" "5" "4" "3" "2" "1"
x
## [1] 10 9 8 12 6 5 4 3 2 1
x[c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE,FALSE, TRUE, FALSE)]
## [1] 10 8 6 4 2
x>5
## [1] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
x[x>5]
## [1] 10 9 8 12 6
length(x>5)
## [1] 10
length(x)
## [1] 10
x[x==5] # == : test for equality
## [1] 5
x[x=5] # = : assignment operator
## [1] 6
sum(x>5)
## [1] 5
TRUE & FALSE
## [1] FALSE
TRUE | FALSE
## [1] TRUE
x
## [1] 10 9 8 12 6 5 4 3 2 1
x>5
## [1] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
x<8
## [1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
x>5 & x<8
## [1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
sum(x>5 & x<8)
## [1] 1
x[x>5 & x<8]
## [1] 6
m <- c(1,2,3,4)
names(m) <- c("gene 1","gene 2","gene 3","gene 4")
m
## gene 1 gene 2 gene 3 gene 4
## 1 2 3 4
rownames(A)
## NULL
rownames(A) <- c("gene 1", "gene 2")
colnames(A) <- c("array 1", "array 2", "array 3","array 4", "array 5")
A
## array 1 array 2 array 3 array 4 array 5
## gene 1 1 3 5 7 9
## gene 2 2 4 12 8 10
A["gene 1", ]
## array 1 array 2 array 3 array 4 array 5
## 1 3 5 7 9
c("gene 1", 5)
## [1] "gene 1" "5"
list(gene = "gene 1", number = 5)
## $gene
## [1] "gene 1"
##
## $number
## [1] 5
x <- list(gene = "gene 1", number = 5)
x[1]
## $gene
## [1] "gene 1"
x[[1]]
## [1] "gene 1"
x$gene
## [1] "gene 1"
pclass <- c("1st","2nd","1st")
survived <- c(1,1,0)
name <- c("Elisabeth Walton","Hudson Trevor","Helen Loraine")
age <- c(29.0,0.9167,2.0)
titanic <- data.frame(pclass,survived,name,age)
titanic
## pclass survived name age
## 1 1st 1 Elisabeth Walton 29.0000
## 2 2nd 1 Hudson Trevor 0.9167
## 3 1st 0 Helen Loraine 2.0000
titanic[c(2,3),]
## pclass survived name age
## 2 2nd 1 Hudson Trevor 0.9167
## 3 1st 0 Helen Loraine 2.0000
titanic[,c("name","age")]
## name age
## 1 Elisabeth Walton 29.0000
## 2 Hudson Trevor 0.9167
## 3 Helen Loraine 2.0000
titanic[c(2,3),c("name","age")]
## name age
## 2 Hudson Trevor 0.9167
## 3 Helen Loraine 2.0000
titanic$age
## [1] 29.0000 0.9167 2.0000
titanic[["age"]]
## [1] 29.0000 0.9167 2.0000
titanic$status <- c("yes","yes","no")
titanic
## pclass survived name age status
## 1 1st 1 Elisabeth Walton 29.0000 yes
## 2 2nd 1 Hudson Trevor 0.9167 yes
## 3 1st 0 Helen Loraine 2.0000 no
In the exercises, we use the Titanic data set. See here for a description of the data.
library(foreign)
titanic3 <- read.dta("Exercises/titanic3.dta", convert.underscore=TRUE) # convert.underscore: Convert "_" in Stata variable names to "." in R names?
dim(titanic3)
## [1] 1309 17
head(titanic3[,1:4])
## pclass survived name sex
## 1 1st 1 Allen, Miss. Elisabeth Walton female
## 2 1st 1 Allison, Master. Hudson Trevor male
## 3 1st 0 Allison, Miss. Helen Loraine female
## 4 1st 0 Allison, Mr. Hudson Joshua Crei male
## 5 1st 0 Allison, Mrs. Hudson J C (Bessi female
## 6 1st 1 Anderson, Mr. Harry male
tail(titanic3[,1:4])
## pclass survived name sex
## 1304 3rd 0 Yousseff, Mr. Gerious male
## 1305 3rd 0 Zabour, Miss. Hileni female
## 1306 3rd 0 Zabour, Miss. Thamine female
## 1307 3rd 0 Zakarian, Mr. Mapriededer male
## 1308 3rd 0 Zakarian, Mr. Ortin male
## 1309 3rd 0 Zimmerman, Mr. Leo male
str(titanic3[,1:4])
## 'data.frame': 1309 obs. of 4 variables:
## $ pclass : Factor w/ 3 levels "1st","2nd","3rd": 1 1 1 1 1 1 1 1 1 1 ...
## $ survived: int 1 1 0 0 0 1 1 0 1 0 ...
## $ name : chr "Allen, Miss. Elisabeth Walton" "Allison, Master. Hudson Trevor" "Allison, Miss. Helen Loraine" "Allison, Mr. Hudson Joshua Crei" ...
## $ sex : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ...
In R, we can also import directly from a web site:
load(url("https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic3.sav"))
ls()
## [1] "A" "a2" "age" "c" "indx" "m"
## [7] "name" "pclass" "r" "survived" "titanic" "titanic3"
## [13] "x"
An example using the readxl package for MS Excel files. The data set can be obtained from here. If the data set has been stored in a subfolder Exercises, it can be imported via
install.packages("readxl")
## package 'readxl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.4
titanic3 <- read_excel("Exercises/titanic3.xls")
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i = sheet, :
## Coercing text to numeric in M1306 / R1306C13: '328'
## Explain warning message and repair by converting to number
## tibble
dim(titanic3)
## [1] 1309 14
head(titanic3[,1:4])
## # A tibble: 6 x 4
## pclass survived name sex
## <dbl> <dbl> <chr> <chr>
## 1 1 1 Allen, Miss. Elisabeth Walton female
## 2 1 1 Allison, Master. Hudson Trevor male
## 3 1 0 Allison, Miss. Helen Loraine female
## 4 1 0 Allison, Mr. Hudson Joshua Creighton male
## 5 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female
## 6 1 1 Anderson, Mr. Harry male
tail(titanic3[,1:4])
## # A tibble: 6 x 4
## pclass survived name sex
## <dbl> <dbl> <chr> <chr>
## 1 3 0 Yousseff, Mr. Gerious male
## 2 3 0 Zabour, Miss. Hileni female
## 3 3 0 Zabour, Miss. Thamine female
## 4 3 0 Zakarian, Mr. Mapriededer male
## 5 3 0 Zakarian, Mr. Ortin male
## 6 3 0 Zimmerman, Mr. Leo male
## Last row: (not) empty?
We run the code to import the STATA file, which is what we will do in the exercises as well.
library(foreign)
titanic3 <- read.dta("Exercises/titanic3.dta", convert.underscore=TRUE)
Using the logarithm as an example, we show the different ways to obtain the 10-log of 100 (which should give 2 as outcome).
## Basic structure
log(100) # does not give 2
## [1] 4.60517
help(log)
log(100, 10)
## [1] 2
log(10, 100) # does not give 2
## [1] 0.5
log(base=10, x=100)
## [1] 2
log(b=10,x=100)
## [1] 2
## Some functions have an ... argument
c
## [1] 1
c(3,6,8)
## [1] 3 6 8
AMC <- c("Academic","Medical","Center")
help(paste)
paste(AMC, collapse=" ")
## [1] "Academic Medical Center"
## Some functions have an alias that is closer to common use
## E.g. + is an alias for the "+" function
"+"(1,7)
## [1] 8
## To cite R in publications:
citation()
##
## To cite R in publications use:
##
## R Core Team (2020). R: A language and environment for statistical
## computing. R Foundation for Statistical Computing, Vienna, Austria.
## URL https://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2020},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.
We can see the contents of a function by leaving out the parentheses. And we can write our own functions.
help
## function (topic, package = NULL, lib.loc = NULL, verbose = getOption("verbose"),
## try.all.packages = getOption("help.try.all.packages"), help_type = getOption("help_type"))
## {
## types <- c("text", "html", "pdf")
## help_type <- if (!length(help_type))
## "text"
## else match.arg(tolower(help_type), types)
## if (!missing(package))
## if (is.name(y <- substitute(package)))
## package <- as.character(y)
## if (missing(topic)) {
## if (!is.null(package)) {
## if (interactive() && help_type == "html") {
## port <- tools::startDynamicHelp(NA)
## if (port <= 0L)
## return(library(help = package, lib.loc = lib.loc,
## character.only = TRUE))
## browser <- if (.Platform$GUI == "AQUA") {
## get("aqua.browser", envir = as.environment("tools:RGUI"))
## }
## else getOption("browser")
## browseURL(paste0("http://127.0.0.1:", port, "/library/",
## package, "/html/00Index.html"), browser)
## return(invisible())
## }
## else return(library(help = package, lib.loc = lib.loc,
## character.only = TRUE))
## }
## if (!is.null(lib.loc))
## return(library(lib.loc = lib.loc))
## topic <- "help"
## package <- "utils"
## lib.loc <- .Library
## }
## ischar <- tryCatch(is.character(topic) && length(topic) ==
## 1L, error = function(e) FALSE)
## if (!ischar) {
## reserved <- c("TRUE", "FALSE", "NULL", "Inf", "NaN",
## "NA", "NA_integer_", "NA_real_", "NA_complex_", "NA_character_")
## stopic <- deparse1(substitute(topic))
## if (!is.name(substitute(topic)) && !stopic %in% reserved)
## stop("'topic' should be a name, length-one character vector or reserved word")
## topic <- stopic
## }
## paths <- index.search(topic, find.package(if (is.null(package))
## loadedNamespaces()
## else package, lib.loc, verbose = verbose))
## try.all.packages <- !length(paths) && is.logical(try.all.packages) &&
## !is.na(try.all.packages) && try.all.packages && is.null(package) &&
## is.null(lib.loc)
## if (try.all.packages) {
## for (lib in .libPaths()) {
## packages <- .packages(TRUE, lib)
## packages <- packages[is.na(match(packages, .packages()))]
## paths <- c(paths, index.search(topic, file.path(lib,
## packages)))
## }
## paths <- paths[nzchar(paths)]
## }
## structure(unique(paths), call = match.call(), topic = topic,
## tried_all_packages = try.all.packages, type = help_type,
## class = "help_files_with_topic")
## }
## <bytecode: 0x00000000086a2828>
## <environment: namespace:utils>
good.morning <- function(work){
if(work==TRUE) cat("wake up") else
cat("you can stay in bed")
}
good.morning
## function(work){
## if(work==TRUE) cat("wake up") else
## cat("you can stay in bed")
## }
#good.morning()
good.morning(work=FALSE)
## you can stay in bed
install.packages("ISwR")
## package 'ISwR' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages
library(ISwR)
## Warning: package 'ISwR' was built under R version 4.0.4
help(juul)
summary(juul)
## age menarche sex igf1
## Min. : 0.170 Min. :1.000 Min. :1.000 Min. : 25.0
## 1st Qu.: 9.053 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:202.2
## Median :12.560 Median :1.000 Median :2.000 Median :313.5
## Mean :15.095 Mean :1.476 Mean :1.534 Mean :340.2
## 3rd Qu.:16.855 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:462.8
## Max. :83.000 Max. :2.000 Max. :2.000 Max. :915.0
## NA's :5 NA's :635 NA's :5 NA's :321
## tanner testvol
## Min. :1.00 Min. : 1.000
## 1st Qu.:1.00 1st Qu.: 1.000
## Median :2.00 Median : 3.000
## Mean :2.64 Mean : 7.896
## 3rd Qu.:5.00 3rd Qu.:15.000
## Max. :5.00 Max. :30.000
## NA's :240 NA's :859
juul$age[1:10]
## [1] NA NA NA NA NA 0.17 0.17 0.17 0.17 0.17
juul[1:10,"age"]
## [1] NA NA NA NA NA 0.17 0.17 0.17 0.17 0.17
## head and tail functions
tail(juul[, c("menarche","sex")])
## menarche sex
## 1334 2 2
## 1335 2 2
## 1336 2 2
## 1337 2 2
## 1338 2 2
## 1339 2 2
## in baby steps
tmp <- juul[, c("menarche","sex")] # all rows, two columns
tail(tmp) # last six rows
## menarche sex
## 1334 2 2
## 1335 2 2
## 1336 2 2
## 1337 2 2
## 1338 2 2
## 1339 2 2
## or
tail(juul)[, c("menarche","sex")]
## menarche sex
## 1334 2 2
## 1335 2 2
## 1336 2 2
## 1337 2 2
## 1338 2 2
## 1339 2 2
## in baby steps
tmp <- tail(juul) # last six rows, all columns
tmp[, c("menarche","sex")] # two columns
## menarche sex
## 1334 2 2
## 1335 2 2
## 1336 2 2
## 1337 2 2
## 1338 2 2
## 1339 2 2
## some selections using subset function
subset(juul, tanner==2 & age<10)
## age menarche sex igf1 tanner testvol
## 192 9.50 NA 1 NA 2 2
## 831 9.82 1 2 NA 2 NA
## 835 9.89 1 2 229 2 NA
subset(juul, tanner>=4 & age>45)
## age menarche sex igf1 tanner testvol
## 1325 47.37 2 2 144 5 NA
## 1326 48.01 2 2 154 5 NA
## 1329 51.07 2 2 187 5 NA
## 1334 58.95 2 2 218 5 NA
## 1335 60.99 2 2 226 5 NA
subset(juul, tanner %in% c(1,5) & (age==0.25|age>50))
## age menarche sex igf1 tanner testvol
## 13 0.25 NA 1 90 1 NA
## 14 0.25 NA 1 141 1 NA
## 628 0.25 NA 2 51 1 NA
## 1329 51.07 2 2 187 5 NA
## 1334 58.95 2 2 218 5 NA
## 1335 60.99 2 2 226 5 NA
with(juul, table(sex, tanner))
## tanner
## sex 1 2 3 4 5
## 1 291 55 34 41 124
## 2 224 48 38 40 204
xtabs(~sex+tanner, data=juul)
## tanner
## sex 1 2 3 4 5
## 1 291 55 34 41 124
## 2 224 48 38 40 204
xtabs(~sex+tanner, data=juul, subset=(menarche==1))
## tanner
## sex 1 2 3 4 5
## 2 221 43 32 14 2
## also select columns using subset function
subset(juul, tanner==2 & age<10, select=c(menarche,sex))
## menarche sex
## 192 NA 1
## 831 1 2
## 835 1 2
When checking for missingness, we need to use the is.na function.
titanic3$age[1:20]
## [1] 29.0000 0.9167 2.0000 30.0000 25.0000 48.0000 63.0000 39.0000 53.0000
## [10] 71.0000 47.0000 18.0000 24.0000 26.0000 80.0000 NA 24.0000 50.0000
## [19] 32.0000 36.0000
table(titanic3$age==NA)
## < table of extent 0 >
3==NA
## [1] NA
is.na(3)
## [1] FALSE
table(is.na(titanic3$age))
##
## FALSE TRUE
## 1046 263
## select those with missing igf1 value
subset(juul, tanner %in% c(1,5) & is.na(igf1) & (age<5|age>18))
## age menarche sex igf1 tanner testvol
## 630 2.64 1 2 NA 1 NA
## 1225 18.09 2 2 NA 5 NA
## 1239 18.44 2 2 NA 5 NA
#quantile(juul$age, prob=c(0.025,0.25,0.5,0.75,0.975)) # gives an error
quantile(juul$age, prob=c(0.025,0.25,0.5,0.75,0.975), na.rm=TRUE)
## 2.5% 25% 50% 75% 97.5%
## 2.88525 9.05250 12.56000 16.85500 51.48175
with(juul, table(sex, menarche))
## menarche
## sex 1 2
## 1 0 0
## 2 369 335
with(juul, table(sex, menarche, useNA="always"))
## menarche
## sex 1 2 <NA>
## 1 0 0 621
## 2 369 335 9
## <NA> 0 0 5
DiseaseState <- factor(c("Cancer", "Cancer", "Normal"))
DiseaseState
## [1] Cancer Cancer Normal
## Levels: Cancer Normal
str(DiseaseState)
## Factor w/ 2 levels "Cancer","Normal": 1 1 2
levels(DiseaseState)
## [1] "Cancer" "Normal"
typeof(DiseaseState) # the internal coding is via integers
## [1] "integer"
is.factor(DiseaseState)
## [1] TRUE
help(factor)
## change labels
factor(c("Cancer", "Cancer", "Normal"), labels=c("Dis","Norm"))
## [1] Dis Dis Norm
## Levels: Dis Norm
## change ordering of levels
DiseaseState2 <- factor(c("Cancer", "Cancer", "Normal"),levels=c("Normal","Cancer"))
table(DiseaseState)
## DiseaseState
## Cancer Normal
## 2 1
table(DiseaseState2)
## DiseaseState2
## Normal Cancer
## 1 2
## can also use relevel function
relevel(DiseaseState, ref="Normal")
## [1] Cancer Cancer Normal
## Levels: Normal Cancer
## see what changes internally
as.numeric(DiseaseState)
## [1] 1 1 2
as.numeric(relevel(DiseaseState, ref="Normal"))
## [1] 2 2 1
as.Date("15 April 1912", "%d %b %Y")
## [1] "1912-04-15"
julian(as.Date("15 April 1912", "%d %b %Y"))
## [1] -21080
## attr(,"origin")
## [1] "1970-01-01"
titanic3$dob <- as.Date("15 April 1912", "%d %b %Y")+ (-titanic3$age*365.25)
as.Date("15041912", "%d%m%Y")
## [1] "1912-04-15"
as.Date("150412", "%d%m%y")
## [1] "2012-04-15"
format(as.Date("1912April15", "%Y%b%d"),"%A %B %d, %Y")
## [1] "Monday April 15, 1912"
install.packages("lubridate")
## package 'lubridate' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.4
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
wday(as.Date("1912April15", "%Y%b%d"),label=TRUE)
## [1] Mon
## Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat
#install.packages("swirl")
#library(swirl)
#install_from_swirl("R Programming")
#swirl()
x <- (0:100)/10
plot(x, x^3 - 13 * x^2 + 39 * x)
plot(x, x^3 - 13 * x^2 + 39 * x,cex.axis=1.5,cex.lab=1.5)
plot(x,x^3-13*x^2+39*x,type="l",xlab="time (hours)",ylab="temperature",main="Enhanced plot",cex.axis=1.5,cex.lab=1.5)
colors()
## [1] "white" "aliceblue" "antiquewhite"
## [4] "antiquewhite1" "antiquewhite2" "antiquewhite3"
## [7] "antiquewhite4" "aquamarine" "aquamarine1"
## [10] "aquamarine2" "aquamarine3" "aquamarine4"
## [13] "azure" "azure1" "azure2"
## [16] "azure3" "azure4" "beige"
## [19] "bisque" "bisque1" "bisque2"
## [22] "bisque3" "bisque4" "black"
## [25] "blanchedalmond" "blue" "blue1"
## [28] "blue2" "blue3" "blue4"
## [31] "blueviolet" "brown" "brown1"
## [34] "brown2" "brown3" "brown4"
## [37] "burlywood" "burlywood1" "burlywood2"
## [40] "burlywood3" "burlywood4" "cadetblue"
## [43] "cadetblue1" "cadetblue2" "cadetblue3"
## [46] "cadetblue4" "chartreuse" "chartreuse1"
## [49] "chartreuse2" "chartreuse3" "chartreuse4"
## [52] "chocolate" "chocolate1" "chocolate2"
## [55] "chocolate3" "chocolate4" "coral"
## [58] "coral1" "coral2" "coral3"
## [61] "coral4" "cornflowerblue" "cornsilk"
## [64] "cornsilk1" "cornsilk2" "cornsilk3"
## [67] "cornsilk4" "cyan" "cyan1"
## [70] "cyan2" "cyan3" "cyan4"
## [73] "darkblue" "darkcyan" "darkgoldenrod"
## [76] "darkgoldenrod1" "darkgoldenrod2" "darkgoldenrod3"
## [79] "darkgoldenrod4" "darkgray" "darkgreen"
## [82] "darkgrey" "darkkhaki" "darkmagenta"
## [85] "darkolivegreen" "darkolivegreen1" "darkolivegreen2"
## [88] "darkolivegreen3" "darkolivegreen4" "darkorange"
## [91] "darkorange1" "darkorange2" "darkorange3"
## [94] "darkorange4" "darkorchid" "darkorchid1"
## [97] "darkorchid2" "darkorchid3" "darkorchid4"
## [100] "darkred" "darksalmon" "darkseagreen"
## [103] "darkseagreen1" "darkseagreen2" "darkseagreen3"
## [106] "darkseagreen4" "darkslateblue" "darkslategray"
## [109] "darkslategray1" "darkslategray2" "darkslategray3"
## [112] "darkslategray4" "darkslategrey" "darkturquoise"
## [115] "darkviolet" "deeppink" "deeppink1"
## [118] "deeppink2" "deeppink3" "deeppink4"
## [121] "deepskyblue" "deepskyblue1" "deepskyblue2"
## [124] "deepskyblue3" "deepskyblue4" "dimgray"
## [127] "dimgrey" "dodgerblue" "dodgerblue1"
## [130] "dodgerblue2" "dodgerblue3" "dodgerblue4"
## [133] "firebrick" "firebrick1" "firebrick2"
## [136] "firebrick3" "firebrick4" "floralwhite"
## [139] "forestgreen" "gainsboro" "ghostwhite"
## [142] "gold" "gold1" "gold2"
## [145] "gold3" "gold4" "goldenrod"
## [148] "goldenrod1" "goldenrod2" "goldenrod3"
## [151] "goldenrod4" "gray" "gray0"
## [154] "gray1" "gray2" "gray3"
## [157] "gray4" "gray5" "gray6"
## [160] "gray7" "gray8" "gray9"
## [163] "gray10" "gray11" "gray12"
## [166] "gray13" "gray14" "gray15"
## [169] "gray16" "gray17" "gray18"
## [172] "gray19" "gray20" "gray21"
## [175] "gray22" "gray23" "gray24"
## [178] "gray25" "gray26" "gray27"
## [181] "gray28" "gray29" "gray30"
## [184] "gray31" "gray32" "gray33"
## [187] "gray34" "gray35" "gray36"
## [190] "gray37" "gray38" "gray39"
## [193] "gray40" "gray41" "gray42"
## [196] "gray43" "gray44" "gray45"
## [199] "gray46" "gray47" "gray48"
## [202] "gray49" "gray50" "gray51"
## [205] "gray52" "gray53" "gray54"
## [208] "gray55" "gray56" "gray57"
## [211] "gray58" "gray59" "gray60"
## [214] "gray61" "gray62" "gray63"
## [217] "gray64" "gray65" "gray66"
## [220] "gray67" "gray68" "gray69"
## [223] "gray70" "gray71" "gray72"
## [226] "gray73" "gray74" "gray75"
## [229] "gray76" "gray77" "gray78"
## [232] "gray79" "gray80" "gray81"
## [235] "gray82" "gray83" "gray84"
## [238] "gray85" "gray86" "gray87"
## [241] "gray88" "gray89" "gray90"
## [244] "gray91" "gray92" "gray93"
## [247] "gray94" "gray95" "gray96"
## [250] "gray97" "gray98" "gray99"
## [253] "gray100" "green" "green1"
## [256] "green2" "green3" "green4"
## [259] "greenyellow" "grey" "grey0"
## [262] "grey1" "grey2" "grey3"
## [265] "grey4" "grey5" "grey6"
## [268] "grey7" "grey8" "grey9"
## [271] "grey10" "grey11" "grey12"
## [274] "grey13" "grey14" "grey15"
## [277] "grey16" "grey17" "grey18"
## [280] "grey19" "grey20" "grey21"
## [283] "grey22" "grey23" "grey24"
## [286] "grey25" "grey26" "grey27"
## [289] "grey28" "grey29" "grey30"
## [292] "grey31" "grey32" "grey33"
## [295] "grey34" "grey35" "grey36"
## [298] "grey37" "grey38" "grey39"
## [301] "grey40" "grey41" "grey42"
## [304] "grey43" "grey44" "grey45"
## [307] "grey46" "grey47" "grey48"
## [310] "grey49" "grey50" "grey51"
## [313] "grey52" "grey53" "grey54"
## [316] "grey55" "grey56" "grey57"
## [319] "grey58" "grey59" "grey60"
## [322] "grey61" "grey62" "grey63"
## [325] "grey64" "grey65" "grey66"
## [328] "grey67" "grey68" "grey69"
## [331] "grey70" "grey71" "grey72"
## [334] "grey73" "grey74" "grey75"
## [337] "grey76" "grey77" "grey78"
## [340] "grey79" "grey80" "grey81"
## [343] "grey82" "grey83" "grey84"
## [346] "grey85" "grey86" "grey87"
## [349] "grey88" "grey89" "grey90"
## [352] "grey91" "grey92" "grey93"
## [355] "grey94" "grey95" "grey96"
## [358] "grey97" "grey98" "grey99"
## [361] "grey100" "honeydew" "honeydew1"
## [364] "honeydew2" "honeydew3" "honeydew4"
## [367] "hotpink" "hotpink1" "hotpink2"
## [370] "hotpink3" "hotpink4" "indianred"
## [373] "indianred1" "indianred2" "indianred3"
## [376] "indianred4" "ivory" "ivory1"
## [379] "ivory2" "ivory3" "ivory4"
## [382] "khaki" "khaki1" "khaki2"
## [385] "khaki3" "khaki4" "lavender"
## [388] "lavenderblush" "lavenderblush1" "lavenderblush2"
## [391] "lavenderblush3" "lavenderblush4" "lawngreen"
## [394] "lemonchiffon" "lemonchiffon1" "lemonchiffon2"
## [397] "lemonchiffon3" "lemonchiffon4" "lightblue"
## [400] "lightblue1" "lightblue2" "lightblue3"
## [403] "lightblue4" "lightcoral" "lightcyan"
## [406] "lightcyan1" "lightcyan2" "lightcyan3"
## [409] "lightcyan4" "lightgoldenrod" "lightgoldenrod1"
## [412] "lightgoldenrod2" "lightgoldenrod3" "lightgoldenrod4"
## [415] "lightgoldenrodyellow" "lightgray" "lightgreen"
## [418] "lightgrey" "lightpink" "lightpink1"
## [421] "lightpink2" "lightpink3" "lightpink4"
## [424] "lightsalmon" "lightsalmon1" "lightsalmon2"
## [427] "lightsalmon3" "lightsalmon4" "lightseagreen"
## [430] "lightskyblue" "lightskyblue1" "lightskyblue2"
## [433] "lightskyblue3" "lightskyblue4" "lightslateblue"
## [436] "lightslategray" "lightslategrey" "lightsteelblue"
## [439] "lightsteelblue1" "lightsteelblue2" "lightsteelblue3"
## [442] "lightsteelblue4" "lightyellow" "lightyellow1"
## [445] "lightyellow2" "lightyellow3" "lightyellow4"
## [448] "limegreen" "linen" "magenta"
## [451] "magenta1" "magenta2" "magenta3"
## [454] "magenta4" "maroon" "maroon1"
## [457] "maroon2" "maroon3" "maroon4"
## [460] "mediumaquamarine" "mediumblue" "mediumorchid"
## [463] "mediumorchid1" "mediumorchid2" "mediumorchid3"
## [466] "mediumorchid4" "mediumpurple" "mediumpurple1"
## [469] "mediumpurple2" "mediumpurple3" "mediumpurple4"
## [472] "mediumseagreen" "mediumslateblue" "mediumspringgreen"
## [475] "mediumturquoise" "mediumvioletred" "midnightblue"
## [478] "mintcream" "mistyrose" "mistyrose1"
## [481] "mistyrose2" "mistyrose3" "mistyrose4"
## [484] "moccasin" "navajowhite" "navajowhite1"
## [487] "navajowhite2" "navajowhite3" "navajowhite4"
## [490] "navy" "navyblue" "oldlace"
## [493] "olivedrab" "olivedrab1" "olivedrab2"
## [496] "olivedrab3" "olivedrab4" "orange"
## [499] "orange1" "orange2" "orange3"
## [502] "orange4" "orangered" "orangered1"
## [505] "orangered2" "orangered3" "orangered4"
## [508] "orchid" "orchid1" "orchid2"
## [511] "orchid3" "orchid4" "palegoldenrod"
## [514] "palegreen" "palegreen1" "palegreen2"
## [517] "palegreen3" "palegreen4" "paleturquoise"
## [520] "paleturquoise1" "paleturquoise2" "paleturquoise3"
## [523] "paleturquoise4" "palevioletred" "palevioletred1"
## [526] "palevioletred2" "palevioletred3" "palevioletred4"
## [529] "papayawhip" "peachpuff" "peachpuff1"
## [532] "peachpuff2" "peachpuff3" "peachpuff4"
## [535] "peru" "pink" "pink1"
## [538] "pink2" "pink3" "pink4"
## [541] "plum" "plum1" "plum2"
## [544] "plum3" "plum4" "powderblue"
## [547] "purple" "purple1" "purple2"
## [550] "purple3" "purple4" "red"
## [553] "red1" "red2" "red3"
## [556] "red4" "rosybrown" "rosybrown1"
## [559] "rosybrown2" "rosybrown3" "rosybrown4"
## [562] "royalblue" "royalblue1" "royalblue2"
## [565] "royalblue3" "royalblue4" "saddlebrown"
## [568] "salmon" "salmon1" "salmon2"
## [571] "salmon3" "salmon4" "sandybrown"
## [574] "seagreen" "seagreen1" "seagreen2"
## [577] "seagreen3" "seagreen4" "seashell"
## [580] "seashell1" "seashell2" "seashell3"
## [583] "seashell4" "sienna" "sienna1"
## [586] "sienna2" "sienna3" "sienna4"
## [589] "skyblue" "skyblue1" "skyblue2"
## [592] "skyblue3" "skyblue4" "slateblue"
## [595] "slateblue1" "slateblue2" "slateblue3"
## [598] "slateblue4" "slategray" "slategray1"
## [601] "slategray2" "slategray3" "slategray4"
## [604] "slategrey" "snow" "snow1"
## [607] "snow2" "snow3" "snow4"
## [610] "springgreen" "springgreen1" "springgreen2"
## [613] "springgreen3" "springgreen4" "steelblue"
## [616] "steelblue1" "steelblue2" "steelblue3"
## [619] "steelblue4" "tan" "tan1"
## [622] "tan2" "tan3" "tan4"
## [625] "thistle" "thistle1" "thistle2"
## [628] "thistle3" "thistle4" "tomato"
## [631] "tomato1" "tomato2" "tomato3"
## [634] "tomato4" "turquoise" "turquoise1"
## [637] "turquoise2" "turquoise3" "turquoise4"
## [640] "violet" "violetred" "violetred1"
## [643] "violetred2" "violetred3" "violetred4"
## [646] "wheat" "wheat1" "wheat2"
## [649] "wheat3" "wheat4" "whitesmoke"
## [652] "yellow" "yellow1" "yellow2"
## [655] "yellow3" "yellow4" "yellowgreen"
plot(x,x^3-13*x^2+39*x,pch=18,xlab="time (hours)",ylab="temperature",col="red",main="Enhanced plot",cex.axis=1.5,cex.lab=1.5)
palette()
## [1] "black" "#DF536B" "#61D04F" "#2297E6" "#28E2E5" "#CD0BBC" "#F5C710"
## [8] "gray62"
plot(x,x^3-13*x^2+39*x,pch=18,xlab="time (hours)",ylab="temperature",col=3,main="Enhanced plot",cex.axis=1.5,cex.lab=1.5)
plot(1:25, pch=1:25,cex=2,bg="grey")
x<-(0:100)/10
plot(x,x^3-13*x^2+39*x,type="l",xlab="time (hours)",ylab="temperature",cex.axis=1.5,cex.lab=1.5)
points(2,34,col="red",pch=16,cex=2)
arrows(4,50,2.2,34.5)
text(4.15,50,"local maximum",adj=0,col="blue",cex=1.5)
lines(x,30-50*sin(x/2),col="blue")
legend(x=0,y=80,legend=c("Sahara","Gobi"),lty=1,col=c("black","blue"),cex=1.5)
?par
par("bg")
## [1] "white"
par(bg="green")
par("bg")
## [1] "green"
# rerun the plot commands
x<-(0:100)/10
plot(x,x^3-13*x^2+39*x,type="l",xlab="time (hours)",ylab="temperature",cex.axis=1.5,cex.lab=1.5)
points(2,34,col="red",pch=16,cex=2)
arrows(4,50,2.2,34.5)
text(4.15,50,"local maximum",adj=0,col="blue",cex=1.5)
lines(x,30-50*sin(x/2),col="blue")
legend(x=0,y=80,legend=c("Sahara","Gobi"),lty=1,col=c("black","blue"),cex=1.5)
plot(1:10)
par(bg="white")
plot(1:10)
# save default graphical parameters to be able to restore them later on
par.defaults <- par(no.readonly=TRUE)
par(bg="green")
par("bg")
## [1] "green"
plot(1:10)
par(par.defaults)
par("bg")
## [1] "white"
plot(1:10)
x<-(0:100)/10
plot(x,x^3-13*x^2+39*x,type="l",xlab= "time (hours)",ylab="temperature",lwd=3,las=1,cex.axis=1.5,cex.lab=1.5)
hist(titanic3$age,breaks=15,freq=FALSE,cex.axis=1.5,cex.lab=1.5)
boxplot(titanic3$fare,ylim=c(0,300),ylab="fare",cex.axis=1.5,cex.lab=1.5)
boxplot(fare ~ pclass,data=titanic3,ylim=c(0,300),ylab="fare",cex.axis=1.5,cex.lab=1.5)
plot(fare ~ pclass,data=titanic3,ylim=c(0,300),ylab="fare",cex.axis=1.5,cex.lab=1.5)
# graphics
plot(fare~age,data=titanic3) # very skewed distribution
plot(log10(fare)~age,data=titanic3) # plot log10(y) vs x
plot(fare~age,data=titanic3,log="y") # original data, but log scale
## Warning in xy.coords(x, y, xlabel, ylabel, log): 17 y values <= 0 omitted from
## logarithmic plot
min(titanic3$fare,na.rm=TRUE) # zero values!
## [1] 0
sum(titanic3$fare==0,na.rm=TRUE) # zero values!
## [1] 17
install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.4
qplot(age,fare,data=titanic3)
## Warning: Removed 264 rows containing missing values (geom_point).
qplot(age,fare,data=titanic3) + scale_y_log10() # log scale for y values
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 264 rows containing missing values (geom_point).
qplot(age,fare,data=titanic3,facets=~pclass) # one plot per pclass
## Warning: Removed 264 rows containing missing values (geom_point).
qplot(age,fare,data=titanic3,facets=~pclass,ylim=c(0,280)) # better for comparison
## Warning: Removed 268 rows containing missing values (geom_point).
qplot(age,fare,data=titanic3,facets=sex~pclass) + scale_y_log10() # even better
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 264 rows containing missing values (geom_point).
qplot(age,fare,data=titanic3,facets=pclass~sex) + scale_y_log10() # better for comparing males and females per class
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 264 rows containing missing values (geom_point).
#install.packages("devtools")
#library(devtools)
#install_github("rCharts", "ramnathv", ref = "dev")
#library(rCharts)
#titanic3$survived = factor(titanic3$survived, labels=c("no","yes"))
#rPlot(fare ~ age| sex + pclass, data = titanic3, type = "point", color = "survived", size = list(const = 2))
#dt <- dTable(titanic3,sPaginationType= "full_numbers")
#dt
## Open the PDF device
pdf("titanic3_ggplot_v1.pdf")
qplot(age,fare,data=titanic3,facets=pclass~sex) + scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 264 rows containing missing values (geom_point).
## Close the device again to save the file
dev.off()
## png
## 2
qplot(age,fare,data=titanic3,facets=pclass~sex) + scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 264 rows containing missing values (geom_point).
dev.print("titanic3_ggplot_v2.pdf",device=pdf)
## png
## 2
# Change size, for pdf device width and height are specified in inches
pdf("titanic3_ggplot_v1_big.pdf", width=21, height=21)
qplot(age,fare,data=titanic3,facets=pclass~sex) + scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 264 rows containing missing values (geom_point).
## Close the device again to save the file
dev.off()
## png
## 2
search()
## [1] ".GlobalEnv" "package:ggplot2" "package:lubridate"
## [4] "package:ISwR" "package:readxl" "package:foreign"
## [7] "package:stats" "package:graphics" "package:grDevices"
## [10] "package:utils" "package:datasets" "package:methods"
## [13] "Autoloads" "package:base"
ls() # objects in Workspace
## [1] "A" "a2" "age" "AMC"
## [5] "c" "DiseaseState" "DiseaseState2" "good.morning"
## [9] "indx" "m" "name" "par.defaults"
## [13] "pclass" "r" "survived" "titanic"
## [17] "titanic3" "tmp" "x"
ls("package:stats")[1:20] # some objects in stats package
## [1] "acf" "acf2AR" "add.scope"
## [4] "add1" "addmargins" "aggregate"
## [7] "aggregate.data.frame" "aggregate.ts" "AIC"
## [10] "alias" "anova" "ansari.test"
## [13] "aov" "approx" "approxfun"
## [16] "ar" "ar.burg" "ar.mle"
## [19] "ar.ols" "ar.yw"
ls("package:base")[1:30] # some objects in base package
## [1] "-" "-.Date" "-.POSIXt"
## [4] "!" "!.hexmode" "!.octmode"
## [7] "!=" "$" "$.DLLInfo"
## [10] "$.package_version" "$<-" "$<-.data.frame"
## [13] "%%" "%*%" "%/%"
## [16] "%in%" "%o%" "%x%"
## [19] "&" "&&" "&.hexmode"
## [22] "&.octmode" "(" "*"
## [25] "*.difftime" "/" "/.difftime"
## [28] ":" "::" ":::"
## hierarchical structure and duplicate names
data(quakes) # from package:datasets
## new quakes object in Workspace hides the other
quakes <- 1:10
quakes
## [1] 1 2 3 4 5 6 7 8 9 10
## removing it makes the other visible again
rm(quakes)
quakes$mag # original quakes not overwritten!
## [1] 4.8 4.2 5.4 4.1 4.0 4.0 4.8 4.4 4.7 4.3 4.4 4.6 4.4 4.4 6.1 4.3 6.0 4.5
## [19] 4.4 4.4 4.5 4.2 4.4 4.7 5.4 4.0 4.6 5.2 4.5 4.4 4.6 4.7 4.8 4.0 4.5 4.3
## [37] 4.5 4.6 4.1 4.4 4.7 4.6 4.4 4.3 4.6 4.9 4.5 4.4 4.3 5.1 4.2 4.0 4.6 4.3
## [55] 4.2 4.4 4.5 4.0 4.4 4.3 4.7 4.1 5.0 4.6 4.9 4.7 4.1 5.0 4.5 5.5 4.0 4.5
## [73] 4.3 5.2 4.4 4.3 4.1 4.5 4.2 5.3 5.2 4.5 4.6 4.3 4.0 4.3 4.7 4.5 4.2 4.3
## [91] 5.1 4.7 5.2 4.2 4.2 4.0 4.5 5.2 5.1 4.7 4.1 4.6 4.7 4.7 4.6 4.2 4.4 4.6
## [109] 5.7 5.0 4.5 4.2 4.0 4.8 4.4 4.2 5.3 4.7 4.8 4.2 4.8 4.3 4.7 4.5 4.4 5.1
## [127] 4.2 5.0 4.8 4.3 4.5 4.2 4.5 4.6 4.3 4.7 5.1 4.6 4.9 4.2 4.6 4.0 5.0 4.4
## [145] 4.2 4.2 4.4 4.9 5.3 4.0 5.7 6.4 4.3 4.2 4.7 4.7 4.2 4.3 4.9 4.6 4.1 4.8
## [163] 4.6 4.6 4.8 5.0 5.6 5.3 4.7 4.5 4.3 4.6 4.1 4.1 4.1 5.7 5.0 4.5 4.1 4.6
## [181] 4.5 4.3 4.4 4.2 4.1 4.9 4.3 4.9 4.6 4.6 5.3 4.7 4.6 4.1 4.6 4.2 4.6 4.5
## [199] 4.3 5.2 4.3 4.0 4.7 4.5 4.5 4.3 5.2 4.5 4.7 4.2 4.7 4.3 4.1 5.4 4.3 4.3
## [217] 4.2 4.6 4.4 4.2 4.7 4.6 4.9 4.2 4.5 4.9 4.2 4.5 5.0 5.0 4.7 4.3 4.4 4.9
## [235] 4.5 4.0 4.4 5.0 4.7 4.8 4.5 4.1 5.3 4.8 5.0 4.6 4.3 4.7 5.3 4.2 4.7 4.5
## [253] 5.1 4.9 4.4 4.2 4.6 4.7 4.2 4.9 5.1 4.7 4.4 4.4 4.2 4.9 4.6 4.4 4.6 4.5
## [271] 4.4 4.9 4.1 4.2 5.7 4.6 5.0 4.3 4.5 5.2 4.4 4.2 4.6 4.0 4.4 4.7 4.2 4.7
## [289] 4.5 5.0 5.0 4.8 4.6 4.6 5.0 5.0 5.6 4.0 4.0 4.6 4.4 4.8 4.7 4.6 4.3 4.7
## [307] 4.1 4.9 4.6 4.8 4.9 5.1 5.4 4.5 4.9 4.4 4.1 5.3 4.5 4.9 4.8 5.2 4.4 4.4
## [325] 4.9 4.4 4.2 4.6 4.6 5.3 5.3 4.5 4.4 5.0 5.1 4.5 4.5 5.4 4.5 4.7 4.2 4.8
## [343] 4.5 4.3 4.3 4.3 4.6 4.5 5.0 4.6 4.6 4.9 4.1 5.5 4.7 5.0 5.1 5.5 4.6 4.7
## [361] 4.2 4.0 5.4 4.3 4.4 4.6 5.1 4.5 4.2 4.1 5.1 5.4 5.1 5.1 4.4 5.7 4.4 5.1
## [379] 4.6 5.5 5.1 4.4 5.0 5.0 5.1 5.1 4.2 4.5 4.0 4.9 4.3 4.8 4.4 4.2 4.9 4.2
## [397] 5.3 5.0 5.7 5.3 4.7 4.6 4.3 5.4 4.4 4.3 4.3 4.7 4.4 4.2 4.5 4.8 4.8 4.3
## [415] 4.4 5.1 4.4 4.4 4.3 4.8 4.3 4.5 4.1 5.1 4.7 4.6 4.7 4.6 4.4 4.2 4.1 4.7
## [433] 4.0 4.8 4.4 4.3 4.4 4.5 4.5 4.7 4.3 4.7 4.8 4.6 5.1 4.7 4.3 5.1 5.5 4.5
## [451] 4.3 4.3 4.7 4.3 4.3 4.6 4.5 4.5 5.4 4.6 4.4 5.0 5.2 4.4 5.1 4.4 4.4 4.8
## [469] 4.4 4.7 4.7 4.4 4.3 5.0 4.5 4.6 5.4 4.6 4.5 4.4 4.5 4.1 4.0 4.9 4.1 5.2
## [487] 4.4 4.1 4.9 4.6 4.5 4.8 4.2 4.2 4.1 5.5 4.4 4.8 4.2 4.8 4.9 4.6 4.5 4.4
## [505] 4.6 4.2 4.7 4.4 4.5 4.9 4.7 5.5 4.7 4.6 4.1 4.7 4.6 4.3 4.4 4.7 4.3 4.1
## [523] 4.7 4.5 5.5 4.5 4.6 5.1 4.4 4.7 5.5 4.6 4.0 4.6 4.8 4.7 4.7 4.2 5.4 4.6
## [541] 5.5 4.2 4.5 4.5 4.8 4.6 5.4 4.6 5.0 4.3 4.3 4.8 4.7 4.7 4.8 4.2 4.2 5.9
## [559] 4.6 4.5 4.9 4.7 4.9 5.3 4.2 4.2 4.5 5.2 4.5 5.6 5.2 4.8 4.5 5.0 4.4 4.5
## [577] 4.6 4.5 5.2 5.1 4.8 4.3 5.1 4.7 4.6 4.8 4.6 4.4 4.6 5.1 4.3 4.4 4.7 4.8
## [595] 4.1 4.6 4.9 4.0 4.7 4.7 5.2 4.6 4.6 4.9 5.7 4.7 4.4 4.9 4.8 4.6 4.4 4.8
## [613] 4.7 4.2 5.1 4.8 4.9 5.1 4.3 4.4 4.7 4.7 5.3 5.1 4.9 4.5 4.7 4.2 5.1 4.8
## [631] 4.2 4.8 4.4 4.5 4.3 5.6 4.0 5.0 4.2 4.6 4.6 4.5 5.0 4.7 4.6 4.8 4.6 4.4
## [649] 5.6 4.2 5.4 4.8 5.6 4.4 4.7 4.6 5.1 4.6 4.5 4.2 4.8 4.9 5.5 5.0 4.2 5.2
## [667] 4.8 4.4 4.8 4.4 4.1 4.9 4.6 4.5 5.3 4.8 4.6 4.8 4.7 4.9 5.3 4.4 4.6 4.3
## [685] 4.4 4.2 4.1 4.2 5.0 4.1 4.3 5.2 4.2 4.5 4.4 4.4 5.0 4.0 4.8 5.0 4.2 5.2
## [703] 5.2 4.5 4.3 4.5 4.6 5.1 4.4 4.3 4.7 5.6 4.9 5.1 4.6 4.4 4.8 4.2 4.7 4.1
## [721] 4.7 4.0 4.9 5.0 4.5 4.4 4.0 4.5 4.7 4.4 4.7 4.7 4.0 4.2 4.5 4.7 4.5 4.1
## [739] 4.6 4.3 4.6 5.2 4.6 4.7 5.0 5.2 4.4 4.6 4.5 4.0 4.2 5.3 5.9 4.9 4.5 4.4
## [757] 5.4 5.3 5.1 4.1 4.3 4.5 4.1 5.1 5.3 4.5 4.1 4.4 4.7 4.0 5.1 4.0 4.3 4.3
## [775] 4.0 4.1 4.4 4.3 4.2 4.0 4.5 4.7 5.0 4.3 5.1 4.7 5.3 5.0 4.5 5.0 4.1 4.9
## [793] 4.1 4.0 4.4 4.5 4.4 4.5 4.3 4.3 5.0 4.5 4.5 4.4 4.5 4.1 4.9 4.7 4.3 4.6
## [811] 4.6 5.2 4.5 4.3 4.6 4.0 4.5 4.8 4.6 4.1 4.9 4.7 4.1 4.3 4.4 4.0 4.8 4.4
## [829] 4.4 4.6 4.2 4.1 4.2 4.0 4.1 4.1 4.7 4.8 5.1 5.0 4.8 4.4 5.0 5.2 4.2 4.1
## [847] 4.8 4.5 5.0 5.1 4.1 4.7 5.2 4.8 4.5 4.0 4.7 4.1 4.3 4.3 4.0 4.7 4.1 4.2
## [865] 4.8 4.8 4.2 4.2 5.7 6.0 4.2 4.5 4.9 4.4 4.0 4.3 4.2 4.5 4.8 4.4 4.2 4.2
## [883] 5.0 4.2 5.2 4.5 4.6 5.0 5.0 5.4 4.8 4.2 5.5 4.3 4.1 4.4 4.9 4.5 4.9 4.0
## [901] 4.5 5.0 4.9 4.5 4.2 4.2 4.2 5.2 4.3 5.1 4.6 4.6 4.4 4.4 4.9 5.1 4.2 4.7
## [919] 4.0 5.6 5.3 5.0 4.3 4.1 5.1 4.5 4.9 5.4 4.7 4.8 4.5 4.2 4.6 4.6 5.6 5.4
## [937] 4.0 5.2 4.2 4.7 4.3 4.8 4.6 5.4 4.8 4.6 4.2 5.5 4.7 4.7 4.5 5.5 4.5 4.1
## [955] 4.0 4.3 4.7 4.9 4.5 4.7 4.5 4.8 4.3 4.1 5.4 4.1 4.8 4.2 4.8 5.4 4.4 5.2
## [973] 4.7 4.9 4.9 4.2 4.4 4.4 4.3 4.9 5.0 4.7 4.9 4.3 4.5 4.2 5.2 4.8 4.0 4.7
## [991] 4.3 4.3 4.9 4.0 4.2 4.4 4.7 4.5 4.5 6.0
find("quakes")
## [1] "package:datasets"
save(titanic3, file="Titanic.RData")
rm(titanic3)
load("Titanic.RData")
getwd()
## [1] "C:/Users/pdmoerland/Dropbox/education/Computing in R"
#setwd("c:/AMC/Teaching/RCursus") # just an example, note that a forward slash is used
## sort
install.packages("dplyr")
## also installing the dependency 'lifecycle'
## package 'lifecycle' successfully unpacked and MD5 sums checked
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\pdmoerland\R\R-4.0.0\library\00LOCK\dplyr\libs\x64\dplyr.dll to C:
## \Users\pdmoerland\R\R-4.0.0\library\dplyr\libs\x64\dplyr.dll: Permission denied
## Warning: restored 'dplyr'
##
## The downloaded binary packages are in
## C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
titanic.SortedAge <- arrange(titanic3, age)
head(titanic.SortedAge)
## pclass survived name sex age sibsp
## 764 3rd 1 Dean, Miss. Elizabeth Gladys \\"M female 0.1667 1
## 748 3rd 0 Danbom, Master. Gilbert Sigvard male 0.3333 0
## 1241 3rd 1 Thomas, Master. Assad Alexander male 0.4167 0
## 428 2nd 1 Hamalainen, Master. Viljo male 0.6667 1
## 658 3rd 1 Baclini, Miss. Eugenie female 0.7500 2
## 659 3rd 1 Baclini, Miss. Helene Barbara female 0.7500 2
## parch ticket fare cabin embarked boat body
## 764 2 C.A. 2315 20.5750 Southampton 10 NA
## 748 2 347080 14.4000 Southampton NA
## 1241 1 2625 8.5167 Cherbourg 16 NA
## 428 1 250649 14.5000 Southampton 4 NA
## 658 1 2666 19.2583 Cherbourg C NA
## 659 1 2666 19.2583 Cherbourg C NA
## home.dest dob family agecat
## 764 Devon, England Wichita, KS 1912-02-14 yes (0,18]
## 748 Stanton, IA 1911-12-15 yes (0,18]
## 1241 1911-11-14 yes (0,18]
## 428 Detroit, MI 1911-08-15 yes (0,18]
## 658 Syria New York, NY 1911-07-16 yes (0,18]
## 659 Syria New York, NY 1911-07-16 yes (0,18]
## base R
x <- c(5,8,1)
sort(x)
## [1] 1 5 8
order(x)
## [1] 3 1 2
x[order(x)]
## [1] 1 5 8
z <- data.frame(x=c(5,8,1), y=c("A","M","C"))
z
## x y
## 1 5 A
## 2 8 M
## 3 1 C
z[sort(z$x),]
## x y
## 1 5 A
## NA NA <NA>
## NA.1 NA <NA>
z[order(z$x),]
## x y
## 3 1 C
## 1 5 A
## 2 8 M
## merge
emb <- data.frame(embarked=c("Cherbourg","Queenstown","Southampton"), country=c("France","Ireland","United Kingdom"))
emb
## embarked country
## 1 Cherbourg France
## 2 Queenstown Ireland
## 3 Southampton United Kingdom
titanic3 <- left_join(titanic3, emb, by="embarked") # all rows from titanic 3, all columns from both
head(titanic3)
## pclass survived name sex age sibsp parch
## 1 1st 1 Allen, Miss. Elisabeth Walton female 29.0000 0 0
## 2 1st 1 Allison, Master. Hudson Trevor male 0.9167 1 2
## 3 1st 0 Allison, Miss. Helen Loraine female 2.0000 1 2
## 4 1st 0 Allison, Mr. Hudson Joshua Crei male 30.0000 1 2
## 5 1st 0 Allison, Mrs. Hudson J C (Bessi female 25.0000 1 2
## 6 1st 1 Anderson, Mr. Harry male 48.0000 0 0
## ticket fare cabin embarked boat body home.dest
## 1 24160 211.3375 B5 Southampton 2 NA St Louis, MO
## 2 113781 151.5500 C22 C26 Southampton 11 NA Montreal, PQ / Chesterville, ON
## 3 113781 151.5500 C22 C26 Southampton NA Montreal, PQ / Chesterville, ON
## 4 113781 151.5500 C22 C26 Southampton 135 Montreal, PQ / Chesterville, ON
## 5 113781 151.5500 C22 C26 Southampton NA Montreal, PQ / Chesterville, ON
## 6 19952 26.5500 E12 Southampton 3 NA New York, NY
## dob family agecat country
## 1 1883-04-14 no (18,30] United Kingdom
## 2 1911-05-16 yes (0,18] United Kingdom
## 3 1910-04-15 yes (0,18] United Kingdom
## 4 1882-04-14 yes (18,30] United Kingdom
## 5 1887-04-14 yes (18,30] United Kingdom
## 6 1864-04-14 no (40,80] United Kingdom
## base R
titanic3 <- merge(titanic3, emb)
## wide <-> long
install.packages("tidyr")
## also installing the dependency 'cpp11'
## package 'cpp11' successfully unpacked and MD5 sums checked
## package 'tidyr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.0.4
df1 <- data.frame(c("a", "a", "a", "a", "b", "b", "b", "b"), c(1,2,3,4,1,2,3,4), c(0.1, 0.2, 0.3, 0.4, 2.1, 2.2, 2.3, 2.4))
colnames(df1) <- c("Name", "Time", "Value")
df1
## Name Time Value
## 1 a 1 0.1
## 2 a 2 0.2
## 3 a 3 0.3
## 4 a 4 0.4
## 5 b 1 2.1
## 6 b 2 2.2
## 7 b 3 2.3
## 8 b 4 2.4
df1.wide <- spread(df1, Time, Value)
df1.wide
## Name 1 2 3 4
## 1 a 0.1 0.2 0.3 0.4
## 2 b 2.1 2.2 2.3 2.4
df1.long <- gather(df1.wide, "Time", "Value", 2:5)
df1.long
## Name Time Value
## 1 a 1 0.1
## 2 b 1 2.1
## 3 a 2 0.2
## 4 b 2 2.2
## 5 a 3 0.3
## 6 b 3 2.3
## 7 a 4 0.4
## 8 b 4 2.4
arrange(df1.long, Name, Time)
## Name Time Value
## 1 a 1 0.1
## 2 a 2 0.2
## 3 a 3 0.3
## 4 a 4 0.4
## 5 b 1 2.1
## 6 b 2 2.2
## 7 b 3 2.3
## 8 b 4 2.4
## base R
df1.wide <- reshape(df1, timevar="Time", idvar="Name", direction="wide", v.names="Value")
df1.wide
## Name Value.1 Value.2 Value.3 Value.4
## 1 a 0.1 0.2 0.3 0.4
## 5 b 2.1 2.2 2.3 2.4
df1.long <- reshape(df1.wide, varying=2:5, direction="long")
df1.long
## Name time Value id
## 1.1 a 1 0.1 1
## 2.1 b 1 2.1 2
## 1.2 a 2 0.2 1
## 2.2 b 2 2.2 2
## 1.3 a 3 0.3 1
## 2.3 b 3 2.3 2
## 1.4 a 4 0.4 1
## 2.4 b 4 2.4 2
df1.long[order(df1.long$Name,df1.long$time),]
## Name time Value id
## 1.1 a 1 0.1 1
## 1.2 a 2 0.2 1
## 1.3 a 3 0.3 1
## 1.4 a 4 0.4 1
## 2.1 b 1 2.1 2
## 2.2 b 2 2.2 2
## 2.3 b 3 2.3 2
## 2.4 b 4 2.4 2
## transformation using within
titanic3 <- within(titanic3, {
agecat <- cut(age, breaks=c(0,18,30,40,80))
family <- factor(ifelse(parch==0 & sibsp==0, "no", "yes"))
})
summary(titanic3[,c("age","agecat","family")])
## age agecat family
## Min. : 0.1667 (0,18] :193 no :788
## 1st Qu.:21.0000 (18,30]:416 yes:519
## Median :28.0000 (30,40]:209
## Mean :29.8426 (40,80]:226
## 3rd Qu.:39.0000 NA's :263
## Max. :80.0000
## NA's :263
## transformation using transform
titanic3 <- transform(titanic3,
agecat = cut(age, breaks=c(0,18,30,40,80)),
family = factor(ifelse(parch==0 & sibsp==0, "no", "yes"))
)
summary(titanic3[,c("age","agecat","family")])
## age agecat family
## Min. : 0.1667 (0,18] :193 no :788
## 1st Qu.:21.0000 (18,30]:416 yes:519
## Median :28.0000 (30,40]:209
## Mean :29.8426 (40,80]:226
## 3rd Qu.:39.0000 NA's :263
## Max. :80.0000
## NA's :263
# crosstable using descr package
install.packages("descr")
## package 'descr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages
library(descr)
## Warning: package 'descr' was built under R version 4.0.4
descr(titanic3$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.1667 21.0000 28.0000 29.8426 39.0000 80.0000 263
with(titanic3, CrossTable(pclass,sex,format="SPSS"))
## Cell Contents
## |-------------------------|
## | Count |
## | Chi-square contribution |
## | Row Percent |
## | Column Percent |
## | Total Percent |
## |-------------------------|
##
## ================================
## sex
## pclass female male Total
## --------------------------------
## 1st 142 179 321
## 6.900 3.798
## 44.2% 55.8% 24.6%
## 30.6% 21.2%
## 10.9% 13.7%
## --------------------------------
## 2nd 106 171 277
## 0.597 0.329
## 38.3% 61.7% 21.2%
## 22.8% 20.3%
## 8.1% 13.1%
## --------------------------------
## 3rd 216 493 709
## 5.064 2.787
## 30.5% 69.5% 54.2%
## 46.6% 58.5%
## 16.5% 37.7%
## --------------------------------
## Total 464 843 1307
## 35.5% 64.5%
## ================================
## summary by subgroup, all produce basically the same output
## basic R functions
aggregate(age~pclass, data=titanic3, FUN=summary)
## pclass age.Min. age.1st Qu. age.Median age.Mean age.3rd Qu. age.Max.
## 1 1st 0.91670 28.00000 39.00000 39.08304 49.75000 80.00000
## 2 2nd 0.66670 22.00000 29.00000 29.50670 36.00000 70.00000
## 3 3rd 0.16670 18.00000 24.00000 24.81637 32.00000 74.00000
by(titanic3,titanic3$pclass,FUN=function(x) summary(x$age))
## titanic3$pclass: 1st
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.9167 28.0000 39.0000 39.0830 49.7500 80.0000 39
## ------------------------------------------------------------
## titanic3$pclass: 2nd
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.6667 22.0000 29.0000 29.5067 36.0000 70.0000 16
## ------------------------------------------------------------
## titanic3$pclass: 3rd
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.1667 18.0000 24.0000 24.8164 32.0000 74.0000 208
with(titanic3, tapply(age,pclass,FUN=summary))
## $`1st`
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.9167 28.0000 39.0000 39.0830 49.7500 80.0000 39
##
## $`2nd`
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.6667 22.0000 29.0000 29.5067 36.0000 70.0000 16
##
## $`3rd`
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.1667 18.0000 24.0000 24.8164 32.0000 74.0000 208
## using doBy package
install.packages("doBy")
## package 'doBy' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages
library(doBy)
## Warning: package 'doBy' was built under R version 4.0.4
##
## Attaching package: 'doBy'
## The following object is masked from 'package:dplyr':
##
## order_by
summaryBy(age~pclass, data=titanic3, FUN=summary)
## pclass age.Min. age.1st Qu. age.Median age.Mean age.3rd Qu. age.Max. age.NA's
## 1 1st 0.9167 28 39 39.08304 49.75 80 39
## 2 2nd 0.6667 22 29 29.50670 36.00 70 16
## 3 3rd 0.1667 18 24 24.81637 32.00 74 208
help(summary)
fit.age <- lm(age ~ pclass, data=titanic3)
summary(fit.age)
##
## Call:
## lm(formula = age ~ pclass, data = titanic3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.166 -7.816 -0.816 7.917 49.184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.0830 0.7814 50.018 <2e-16 ***
## pclass2nd -9.5763 1.1270 -8.497 <2e-16 ***
## pclass3rd -14.2667 0.9768 -14.605 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.12 on 1041 degrees of freedom
## (263 observations deleted due to missingness)
## Multiple R-squared: 0.1702, Adjusted R-squared: 0.1686
## F-statistic: 106.8 on 2 and 1041 DF, p-value: < 2.2e-16
class(fit.age)
## [1] "lm"
help(summary.lm)
help(descriptive)
## No documentation for 'descriptive' in specified packages and libraries:
## you could try '??descriptive'
#help.search("descriptive statistics") # same as ??descriptive
#RSiteSearch("{descriptive statistics}") # braces to indicate that words are to be searched as one entity
#install.packages("sos")
#library(sos)
#findFn("{descriptive statistics}")
help.search("Student")
help(t.test)
with(titanic3,t.test(age[sex=="male"],age[sex=="female"]))
##
## Welch Two Sample t-test
##
## data: age[sex == "male"] and age[sex == "female"]
## t = 2.172, df = 796.3, p-value = 0.03015
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1933196 3.8238645
## sample estimates:
## mean of x mean of y
## 30.58523 28.57664
t.test(subset(titanic3,sex=="male")$age,subset(titanic3,sex=="female")$age)
##
## Welch Two Sample t-test
##
## data: subset(titanic3, sex == "male")$age and subset(titanic3, sex == "female")$age
## t = 2.172, df = 796.3, p-value = 0.03015
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1933196 3.8238645
## sample estimates:
## mean of x mean of y
## 30.58523 28.57664
test.age <- t.test(age ~ sex, data = titanic3)
test.age
##
## Welch Two Sample t-test
##
## data: age by sex
## t = -2.172, df = 796.3, p-value = 0.03015
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.8238645 -0.1933196
## sample estimates:
## mean in group female mean in group male
## 28.57664 30.58523
fit.age <- lm(age ~ pclass, data=titanic3)
fit.age
##
## Call:
## lm(formula = age ~ pclass, data = titanic3)
##
## Coefficients:
## (Intercept) pclass2nd pclass3rd
## 39.083 -9.576 -14.267
summary(fit.age)
##
## Call:
## lm(formula = age ~ pclass, data = titanic3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.166 -7.816 -0.816 7.917 49.184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.0830 0.7814 50.018 <2e-16 ***
## pclass2nd -9.5763 1.1270 -8.497 <2e-16 ***
## pclass3rd -14.2667 0.9768 -14.605 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.12 on 1041 degrees of freedom
## (263 observations deleted due to missingness)
## Multiple R-squared: 0.1702, Adjusted R-squared: 0.1686
## F-statistic: 106.8 on 2 and 1041 DF, p-value: < 2.2e-16
## output of model is a list
str(fit.age)
## List of 14
## $ coefficients : Named num [1:3] 39.08 -9.58 -14.27
## ..- attr(*, "names")= chr [1:3] "(Intercept)" "pclass2nd" "pclass3rd"
## $ residuals : Named num [1:1044] 8.184 0.917 11.917 -15.816 16.917 ...
## ..- attr(*, "names")= chr [1:1044] "1" "2" "3" "4" ...
## $ effects : Named num [1:1044] -964.24 -6.27 -191.64 -15.77 16.13 ...
## ..- attr(*, "names")= chr [1:1044] "(Intercept)" "pclass2nd" "pclass3rd" "" ...
## $ rank : int 3
## $ fitted.values: Named num [1:1044] 24.8 39.1 39.1 24.8 39.1 ...
## ..- attr(*, "names")= chr [1:1044] "1" "2" "3" "4" ...
## $ assign : int [1:3] 0 1 1
## $ qr :List of 5
## ..$ qr : num [1:1044, 1:3] -32.311 0.0309 0.0309 0.0309 0.0309 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:1044] "1" "2" "3" "4" ...
## .. .. ..$ : chr [1:3] "(Intercept)" "pclass2nd" "pclass3rd"
## .. ..- attr(*, "assign")= int [1:3] 0 1 1
## .. ..- attr(*, "contrasts")=List of 1
## .. .. ..$ pclass: chr "contr.treatment"
## ..$ qraux: num [1:3] 1.03 1.02 1.05
## ..$ pivot: int [1:3] 1 2 3
## ..$ tol : num 1e-07
## ..$ rank : int 3
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 1041
## $ na.action : 'omit' Named int [1:263] 5 13 27 37 39 45 47 48 59 71 ...
## ..- attr(*, "names")= chr [1:263] "5" "13" "27" "37" ...
## $ contrasts :List of 1
## ..$ pclass: chr "contr.treatment"
## $ xlevels :List of 1
## ..$ pclass: chr [1:3] "1st" "2nd" "3rd"
## $ call : language lm(formula = age ~ pclass, data = titanic3)
## $ terms :Classes 'terms', 'formula' language age ~ pclass
## .. ..- attr(*, "variables")= language list(age, pclass)
## .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:2] "age" "pclass"
## .. .. .. ..$ : chr "pclass"
## .. ..- attr(*, "term.labels")= chr "pclass"
## .. ..- attr(*, "order")= int 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(age, pclass)
## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "factor"
## .. .. ..- attr(*, "names")= chr [1:2] "age" "pclass"
## $ model :'data.frame': 1044 obs. of 2 variables:
## ..$ age : num [1:1044] 33 40 51 9 56 19 27 27 45 56 ...
## ..$ pclass: Factor w/ 3 levels "1st","2nd","3rd": 3 1 1 3 1 3 1 1 1 1 ...
## ..- attr(*, "terms")=Classes 'terms', 'formula' language age ~ pclass
## .. .. ..- attr(*, "variables")= language list(age, pclass)
## .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:2] "age" "pclass"
## .. .. .. .. ..$ : chr "pclass"
## .. .. ..- attr(*, "term.labels")= chr "pclass"
## .. .. ..- attr(*, "order")= int 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(age, pclass)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "factor"
## .. .. .. ..- attr(*, "names")= chr [1:2] "age" "pclass"
## ..- attr(*, "na.action")= 'omit' Named int [1:263] 5 13 27 37 39 45 47 48 59 71 ...
## .. ..- attr(*, "names")= chr [1:263] "5" "13" "27" "37" ...
## - attr(*, "class")= chr "lm"
names(fit.age)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "na.action" "contrasts" "xlevels" "call"
## [13] "terms" "model"
summary(fit.age$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -38.1663 -7.8164 -0.8164 0.0000 7.9170 49.1836
## or
summary(resid(fit.age))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -38.1663 -7.8164 -0.8164 0.0000 7.9170 49.1836
hist(resid(fit.age))
coef(fit.age)
## (Intercept) pclass2nd pclass3rd
## 39.083038 -9.576333 -14.266671
predict(fit.age, newdata=data.frame(pclass=factor(levels(titanic3$pclass))))
## 1 2 3
## 39.08304 29.50670 24.81637
update(fit.age,. ~ .+sex)
##
## Call:
## lm(formula = age ~ pclass + sex, data = titanic3)
##
## Coefficients:
## (Intercept) pclass2nd pclass3rd sexmale
## 37.060 -9.840 -14.875 3.778
## the output has a specific class, in this case lm
class(fit.age)
## [1] "lm"
## which functions have been written for this class?
help(methods)
methods(class="lm")
## [1] add1 alias anova case.names coerce
## [6] confint cooks.distance deviance dfbeta dfbetas
## [11] drop1 dummy.coef effects esticon extractAIC
## [16] family formula fortify hatvalues influence
## [21] initialize kappa labels linest logLik
## [26] model.frame model.matrix nobs plot predict
## [31] print proj qr residuals rstandard
## [36] rstudent show simulate slotsFromS3 summary
## [41] variable.names vcov
## see '?methods' for accessing help and source code
## we want to know what the summary function does with an object of class lm
help(summary) # does not give the desired help file
## we need to consult the help file for summary.lm
methods(summary)
## [1] summary,ANY-method summary,DBIObject-method
## [3] summary,diagonalMatrix-method summary,sparseMatrix-method
## [5] summary.aov summary.aovlist*
## [7] summary.aspell* summary.check_packages_in_dir*
## [9] summary.connection summary.data.frame
## [11] summary.Date summary.default
## [13] summary.Duration* summary.ecdf*
## [15] summary.esticon_class* summary.factor
## [17] summary.ggplot* summary.glm
## [19] summary.hcl_palettes* summary.infl*
## [21] summary.Interval* summary.linest_class*
## [23] summary.linest_matrix_class* summary.lm
## [25] summary.lmBy* summary.loess*
## [27] summary.loglm* summary.manova
## [29] summary.matrix summary.mlm*
## [31] summary.negbin* summary.nls*
## [33] summary.packageStatus* summary.Period*
## [35] summary.polr* summary.POSIXct
## [37] summary.POSIXlt summary.ppr*
## [39] summary.prcomp* summary.princomp*
## [41] summary.proc_time summary.rlang_error*
## [43] summary.rlang_trace* summary.rlm*
## [45] summary.shingle* summary.srcfile
## [47] summary.srcref summary.stepfun
## [49] summary.stl* summary.table
## [51] summary.trellis* summary.tukeysmooth*
## [53] summary.vctrs_sclr* summary.vctrs_vctr*
## [55] summary.warnings
## see '?methods' for accessing help and source code
help(summary.lm)
plot(fit.age)
plot(age ~ fare, data=titanic3)
## the type of plot is automaticaaly adapted to the type of variable
plot(age~pclass, data=titanic3)
x <- 10
z <- if (x < 2) 4 else 3
z
## [1] 3
A <- matrix(1:10, 2, 5)
rownames(A) <- c("gene 1", "gene 2")
colnames(A) <- c("array 1", "array 2", "array 3","array 4", "array 5")
results <- numeric(2)
results
## [1] 0 0
for (i in 1:2) {
results[i] <- mean(A[i, ])
}
results
## [1] 5 6
apply(A, 1, mean)
## gene 1 gene 2
## 5 6
titanic3 <- read.dta("Exercises/titanic3.dta", convert.underscore=TRUE)
head(titanic3[,c("fare","pclass")])
## fare pclass
## 1 211.3375 1st
## 2 151.5500 1st
## 3 151.5500 1st
## 4 151.5500 1st
## 5 151.5500 1st
## 6 26.5500 1st
with(titanic3,tapply(fare, pclass, mean,na.rm=T))
## 1st 2nd 3rd
## 87.50899 21.17920 13.30289
# Solution using the dplyr package
#install.packages("dplyr")
library(dplyr)
summarise(group_by(titanic3, pclass), mean(fare,na.rm=TRUE))
## # A tibble: 3 x 2
## pclass `mean(fare, na.rm = TRUE)`
## * <fct> <dbl>
## 1 1st 87.5
## 2 2nd 21.2
## 3 3rd 13.3