CourseMain.utf8

This is an R Markdown document for the course ‘Computing in R’. Using this document, you can easily run selected pieces of R code shown during the lectures from within RStudio.

Basics of R

2+7

## [1] 9

sqrt(2)

## [1] 1.414214

cos(pi)

## [1] -1

log10(10^3)

## [1] 3

x <- 2
print(x)

## [1] 2

## [1] 2

x^2

## [1] 4

## [1] 2

#y < - x

help(sqrt)

## starting httpd help server ... done

help.start()

## If nothing happens, you should open
## 'http://127.0.0.1:14020/doc/html/index.html' yourself

help(mean)
# ? is a shorthand for help()
?mean

Syntax: data

Data structures

x <- c(10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
x

##  [1] 10  9  8  7  6  5  4  3  2  1

x <- seq(from = 10, to = 1, by = -1)
x

##  [1] 10  9  8  7  6  5  4  3  2  1

x <- seq(10, 1)
x

##  [1] 10  9  8  7  6  5  4  3  2  1

x <- 10:1
x

##  [1] 10  9  8  7  6  5  4  3  2  1

x[5]

## [1] 6

x[10]

## [1] 1

x[5] + x[10]

## [1] 7

indx <- c(5,10)
indx

## [1]  5 10

x[indx]

## [1] 6 1

x[c(5,10)]

## [1] 6 1

c(-5, -10)

## [1]  -5 -10

x[c(-5, -10)]

## [1] 10  9  8  7  5  4  3  2

x[4] <- 12
x

##  [1] 10  9  8 12  6  5  4  3  2  1

mean(x)

## [1] 6

x + 1

##  [1] 11 10  9 13  7  6  5  4  3  2

2*x

##  [1] 20 18 16 24 12 10  8  6  4  2

help(matrix)

A <- matrix(data = 1:10, nrow = 2, ncol = 5)
A <- matrix(1:10, 2, 5)
A

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    7    9
## [2,]    2    4    6    8   10

A[2, 3]

## [1] 6

A[2,3] <- 12

A[1, ]

## [1] 1 3 5 7 9

A[1,1:5]

## [1] 1 3 5 7 9

A[, c(1, 5)]

##      [,1] [,2]
## [1,]    1    9
## [2,]    2   10

A[1:2, c(1,5)]

##      [,1] [,2]
## [1,]    1    9
## [2,]    2   10

dim(A[, c(1, 5)])

## [1] 2 2

#x[1,3]
x[c(1,3)]

## [1] 10  8

ls()

## [1] "A"    "indx" "r"    "x"

summary(x)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    3.25    5.50    6.00    8.75   12.00

summary(A)

##        V1             V2             V3              V4             V5       
##  Min.   :1.00   Min.   :3.00   Min.   : 5.00   Min.   :7.00   Min.   : 9.00  
##  1st Qu.:1.25   1st Qu.:3.25   1st Qu.: 6.75   1st Qu.:7.25   1st Qu.: 9.25  
##  Median :1.50   Median :3.50   Median : 8.50   Median :7.50   Median : 9.50  
##  Mean   :1.50   Mean   :3.50   Mean   : 8.50   Mean   :7.50   Mean   : 9.50  
##  3rd Qu.:1.75   3rd Qu.:3.75   3rd Qu.:10.25   3rd Qu.:7.75   3rd Qu.: 9.75  
##  Max.   :2.00   Max.   :4.00   Max.   :12.00   Max.   :8.00   Max.   :10.00

a2 <- 1
#2a <- 1
c <- 1
c

## [1] 1

c(1,2)

## [1] 1 2

#else <- 1

c(1, 2, 3, 4)

## [1] 1 2 3 4

-2 < -2

## [1] FALSE

letters[1:3]

## [1] "a" "b" "c"

mode(x)

## [1] "numeric"

as.character(x)

##  [1] "10" "9"  "8"  "12" "6"  "5"  "4"  "3"  "2"  "1"

##  [1] 10  9  8 12  6  5  4  3  2  1

x[c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE,FALSE, TRUE, FALSE)]

## [1] 10  8  6  4  2

x>5

##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE

x[x>5]

## [1] 10  9  8 12  6

length(x>5)

## [1] 10

length(x)

## [1] 10

x[x==5]   # == : test for equality

## [1] 5

x[x=5]    # = : assignment operator

## [1] 6

sum(x>5)

## [1] 5

TRUE & FALSE

## [1] FALSE

TRUE | FALSE

## [1] TRUE

##  [1] 10  9  8 12  6  5  4  3  2  1

x>5

##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE

x<8

##  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

x>5 & x<8

##  [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE

sum(x>5 & x<8)

## [1] 1

x[x>5 & x<8]

## [1] 6

m <- c(1,2,3,4)
names(m) <- c("gene 1","gene 2","gene 3","gene 4")
m

## gene 1 gene 2 gene 3 gene 4 
##      1      2      3      4

rownames(A)

## NULL

rownames(A) <- c("gene 1", "gene 2")
colnames(A) <- c("array 1", "array 2", "array 3","array 4", "array 5")

##        array 1 array 2 array 3 array 4 array 5
## gene 1       1       3       5       7       9
## gene 2       2       4      12       8      10

A["gene 1", ]

## array 1 array 2 array 3 array 4 array 5 
##       1       3       5       7       9

c("gene 1", 5)

## [1] "gene 1" "5"

list(gene = "gene 1", number = 5)

## $gene
## [1] "gene 1"
## 
## $number
## [1] 5

x <- list(gene = "gene 1", number = 5)
x[1]

## $gene
## [1] "gene 1"

x[[1]]

## [1] "gene 1"

x$gene

## [1] "gene 1"

pclass <- c("1st","2nd","1st")
survived <- c(1,1,0)
name <- c("Elisabeth Walton","Hudson Trevor","Helen Loraine")
age <- c(29.0,0.9167,2.0)
titanic <- data.frame(pclass,survived,name,age)
titanic

##   pclass survived             name     age
## 1    1st        1 Elisabeth Walton 29.0000
## 2    2nd        1    Hudson Trevor  0.9167
## 3    1st        0    Helen Loraine  2.0000

titanic[c(2,3),]

##   pclass survived          name    age
## 2    2nd        1 Hudson Trevor 0.9167
## 3    1st        0 Helen Loraine 2.0000

titanic[,c("name","age")]

##               name     age
## 1 Elisabeth Walton 29.0000
## 2    Hudson Trevor  0.9167
## 3    Helen Loraine  2.0000

titanic[c(2,3),c("name","age")]

##            name    age
## 2 Hudson Trevor 0.9167
## 3 Helen Loraine 2.0000

titanic$age

## [1] 29.0000  0.9167  2.0000

titanic[["age"]]

## [1] 29.0000  0.9167  2.0000

titanic$status <- c("yes","yes","no")
titanic

##   pclass survived             name     age status
## 1    1st        1 Elisabeth Walton 29.0000    yes
## 2    2nd        1    Hudson Trevor  0.9167    yes
## 3    1st        0    Helen Loraine  2.0000     no

In the exercises, we use the Titanic data set. See here for a description of the data.

library(foreign)
titanic3 <- read.dta("Exercises/titanic3.dta", convert.underscore=TRUE) # convert.underscore: Convert "_" in Stata variable names to "." in R names?
dim(titanic3)

## [1] 1309   17

head(titanic3[,1:4])

##   pclass survived                            name    sex
## 1    1st        1   Allen, Miss. Elisabeth Walton female
## 2    1st        1  Allison, Master. Hudson Trevor   male
## 3    1st        0    Allison, Miss. Helen Loraine female
## 4    1st        0 Allison, Mr. Hudson Joshua Crei   male
## 5    1st        0 Allison, Mrs. Hudson J C (Bessi female
## 6    1st        1             Anderson, Mr. Harry   male

tail(titanic3[,1:4])

##      pclass survived                      name    sex
## 1304    3rd        0     Yousseff, Mr. Gerious   male
## 1305    3rd        0      Zabour, Miss. Hileni female
## 1306    3rd        0     Zabour, Miss. Thamine female
## 1307    3rd        0 Zakarian, Mr. Mapriededer   male
## 1308    3rd        0       Zakarian, Mr. Ortin   male
## 1309    3rd        0        Zimmerman, Mr. Leo   male

str(titanic3[,1:4])

## 'data.frame':    1309 obs. of  4 variables:
##  $ pclass  : Factor w/ 3 levels "1st","2nd","3rd": 1 1 1 1 1 1 1 1 1 1 ...
##  $ survived: int  1 1 0 0 0 1 1 0 1 0 ...
##  $ name    : chr  "Allen, Miss. Elisabeth Walton" "Allison, Master. Hudson Trevor" "Allison, Miss. Helen Loraine" "Allison, Mr. Hudson Joshua Crei" ...
##  $ sex     : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ...

Data import

In R, we can also import directly from a web site:

load(url("https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic3.sav"))
ls()

##  [1] "A"        "a2"       "age"      "c"        "indx"     "m"       
##  [7] "name"     "pclass"   "r"        "survived" "titanic"  "titanic3"
## [13] "x"

An example using the readxl package for MS Excel files. The data set can be obtained from here. If the data set has been stored in a subfolder Exercises, it can be imported via

install.packages("readxl")

## package 'readxl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages

library(readxl)

## Warning: package 'readxl' was built under R version 4.0.4

titanic3 <- read_excel("Exercises/titanic3.xls")

## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i = sheet, :
## Coercing text to numeric in M1306 / R1306C13: '328'

## Explain warning message and repair by converting to number
## tibble
dim(titanic3)

## [1] 1309   14

head(titanic3[,1:4])

## # A tibble: 6 x 4
##   pclass survived name                                            sex   
##    <dbl>    <dbl> <chr>                                           <chr> 
## 1      1        1 Allen, Miss. Elisabeth Walton                   female
## 2      1        1 Allison, Master. Hudson Trevor                  male  
## 3      1        0 Allison, Miss. Helen Loraine                    female
## 4      1        0 Allison, Mr. Hudson Joshua Creighton            male  
## 5      1        0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female
## 6      1        1 Anderson, Mr. Harry                             male

tail(titanic3[,1:4])

## # A tibble: 6 x 4
##   pclass survived name                      sex   
##    <dbl>    <dbl> <chr>                     <chr> 
## 1      3        0 Yousseff, Mr. Gerious     male  
## 2      3        0 Zabour, Miss. Hileni      female
## 3      3        0 Zabour, Miss. Thamine     female
## 4      3        0 Zakarian, Mr. Mapriededer male  
## 5      3        0 Zakarian, Mr. Ortin       male  
## 6      3        0 Zimmerman, Mr. Leo        male

## Last row: (not) empty?

We run the code to import the STATA file, which is what we will do in the exercises as well.

library(foreign)
titanic3 <- read.dta("Exercises/titanic3.dta", convert.underscore=TRUE)

Functions; special data formats; selections

Functions

Using the logarithm as an example, we show the different ways to obtain the 10-log of 100 (which should give 2 as outcome).

## Basic structure
log(100)  # does not give 2

## [1] 4.60517

help(log)
log(100, 10)

## [1] 2

log(10, 100) # does not give 2

## [1] 0.5

log(base=10, x=100)

## [1] 2

log(b=10,x=100)

## [1] 2

## Some functions have an ... argument
c

## [1] 1

c(3,6,8)

## [1] 3 6 8

AMC <- c("Academic","Medical","Center")
help(paste)
paste(AMC, collapse=" ")

## [1] "Academic Medical Center"

## Some functions have an alias that is closer to common use
## E.g. + is an alias for the "+" function
"+"(1,7)

## [1] 8

## To cite R in publications:
citation()

## 
## To cite R in publications use:
## 
##   R Core Team (2020). R: A language and environment for statistical
##   computing. R Foundation for Statistical Computing, Vienna, Austria.
##   URL https://www.R-project.org/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2020},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.

We can see the contents of a function by leaving out the parentheses. And we can write our own functions.

help

## function (topic, package = NULL, lib.loc = NULL, verbose = getOption("verbose"), 
##     try.all.packages = getOption("help.try.all.packages"), help_type = getOption("help_type")) 
## {
##     types <- c("text", "html", "pdf")
##     help_type <- if (!length(help_type)) 
##         "text"
##     else match.arg(tolower(help_type), types)
##     if (!missing(package)) 
##         if (is.name(y <- substitute(package))) 
##             package <- as.character(y)
##     if (missing(topic)) {
##         if (!is.null(package)) {
##             if (interactive() && help_type == "html") {
##                 port <- tools::startDynamicHelp(NA)
##                 if (port <= 0L) 
##                   return(library(help = package, lib.loc = lib.loc, 
##                     character.only = TRUE))
##                 browser <- if (.Platform$GUI == "AQUA") {
##                   get("aqua.browser", envir = as.environment("tools:RGUI"))
##                 }
##                 else getOption("browser")
##                 browseURL(paste0("http://127.0.0.1:", port, "/library/", 
##                   package, "/html/00Index.html"), browser)
##                 return(invisible())
##             }
##             else return(library(help = package, lib.loc = lib.loc, 
##                 character.only = TRUE))
##         }
##         if (!is.null(lib.loc)) 
##             return(library(lib.loc = lib.loc))
##         topic <- "help"
##         package <- "utils"
##         lib.loc <- .Library
##     }
##     ischar <- tryCatch(is.character(topic) && length(topic) == 
##         1L, error = function(e) FALSE)
##     if (!ischar) {
##         reserved <- c("TRUE", "FALSE", "NULL", "Inf", "NaN", 
##             "NA", "NA_integer_", "NA_real_", "NA_complex_", "NA_character_")
##         stopic <- deparse1(substitute(topic))
##         if (!is.name(substitute(topic)) && !stopic %in% reserved) 
##             stop("'topic' should be a name, length-one character vector or reserved word")
##         topic <- stopic
##     }
##     paths <- index.search(topic, find.package(if (is.null(package)) 
##         loadedNamespaces()
##     else package, lib.loc, verbose = verbose))
##     try.all.packages <- !length(paths) && is.logical(try.all.packages) && 
##         !is.na(try.all.packages) && try.all.packages && is.null(package) && 
##         is.null(lib.loc)
##     if (try.all.packages) {
##         for (lib in .libPaths()) {
##             packages <- .packages(TRUE, lib)
##             packages <- packages[is.na(match(packages, .packages()))]
##             paths <- c(paths, index.search(topic, file.path(lib, 
##                 packages)))
##         }
##         paths <- paths[nzchar(paths)]
##     }
##     structure(unique(paths), call = match.call(), topic = topic, 
##         tried_all_packages = try.all.packages, type = help_type, 
##         class = "help_files_with_topic")
## }
## <bytecode: 0x00000000086a2828>
## <environment: namespace:utils>

good.morning <- function(work){ 
  if(work==TRUE) cat("wake up") else 
    cat("you can stay in bed")
}
good.morning

## function(work){ 
##   if(work==TRUE) cat("wake up") else 
##     cat("you can stay in bed")
## }

#good.morning()
good.morning(work=FALSE)

## you can stay in bed

Selections

install.packages("ISwR")

## package 'ISwR' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages

library(ISwR)

## Warning: package 'ISwR' was built under R version 4.0.4

help(juul)
summary(juul)

##       age            menarche          sex             igf1      
##  Min.   : 0.170   Min.   :1.000   Min.   :1.000   Min.   : 25.0  
##  1st Qu.: 9.053   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:202.2  
##  Median :12.560   Median :1.000   Median :2.000   Median :313.5  
##  Mean   :15.095   Mean   :1.476   Mean   :1.534   Mean   :340.2  
##  3rd Qu.:16.855   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:462.8  
##  Max.   :83.000   Max.   :2.000   Max.   :2.000   Max.   :915.0  
##  NA's   :5        NA's   :635     NA's   :5       NA's   :321    
##      tanner        testvol      
##  Min.   :1.00   Min.   : 1.000  
##  1st Qu.:1.00   1st Qu.: 1.000  
##  Median :2.00   Median : 3.000  
##  Mean   :2.64   Mean   : 7.896  
##  3rd Qu.:5.00   3rd Qu.:15.000  
##  Max.   :5.00   Max.   :30.000  
##  NA's   :240    NA's   :859

juul$age[1:10]

##  [1]   NA   NA   NA   NA   NA 0.17 0.17 0.17 0.17 0.17

juul[1:10,"age"]

##  [1]   NA   NA   NA   NA   NA 0.17 0.17 0.17 0.17 0.17

## head and tail functions
tail(juul[, c("menarche","sex")])

##      menarche sex
## 1334        2   2
## 1335        2   2
## 1336        2   2
## 1337        2   2
## 1338        2   2
## 1339        2   2

## in baby steps
tmp <- juul[, c("menarche","sex")] # all rows, two columns
tail(tmp) # last six rows

##      menarche sex
## 1334        2   2
## 1335        2   2
## 1336        2   2
## 1337        2   2
## 1338        2   2
## 1339        2   2

## or 
tail(juul)[, c("menarche","sex")]

##      menarche sex
## 1334        2   2
## 1335        2   2
## 1336        2   2
## 1337        2   2
## 1338        2   2
## 1339        2   2

## in baby steps 
tmp <- tail(juul) # last six rows, all columns
tmp[, c("menarche","sex")] # two columns

##      menarche sex
## 1334        2   2
## 1335        2   2
## 1336        2   2
## 1337        2   2
## 1338        2   2
## 1339        2   2

## some selections using subset function
subset(juul, tanner==2 & age<10)

##      age menarche sex igf1 tanner testvol
## 192 9.50       NA   1   NA      2       2
## 831 9.82        1   2   NA      2      NA
## 835 9.89        1   2  229      2      NA

subset(juul, tanner>=4 & age>45)

##        age menarche sex igf1 tanner testvol
## 1325 47.37        2   2  144      5      NA
## 1326 48.01        2   2  154      5      NA
## 1329 51.07        2   2  187      5      NA
## 1334 58.95        2   2  218      5      NA
## 1335 60.99        2   2  226      5      NA

subset(juul, tanner %in% c(1,5) & (age==0.25|age>50))

##        age menarche sex igf1 tanner testvol
## 13    0.25       NA   1   90      1      NA
## 14    0.25       NA   1  141      1      NA
## 628   0.25       NA   2   51      1      NA
## 1329 51.07        2   2  187      5      NA
## 1334 58.95        2   2  218      5      NA
## 1335 60.99        2   2  226      5      NA

with(juul, table(sex, tanner))

##    tanner
## sex   1   2   3   4   5
##   1 291  55  34  41 124
##   2 224  48  38  40 204

xtabs(~sex+tanner, data=juul)

##    tanner
## sex   1   2   3   4   5
##   1 291  55  34  41 124
##   2 224  48  38  40 204

xtabs(~sex+tanner, data=juul, subset=(menarche==1))

##    tanner
## sex   1   2   3   4   5
##   2 221  43  32  14   2

## also select columns using subset function
subset(juul, tanner==2 & age<10, select=c(menarche,sex))

##     menarche sex
## 192       NA   1
## 831        1   2
## 835        1   2

Some special data types

Missing data

When checking for missingness, we need to use the is.na function.

titanic3$age[1:20]

##  [1] 29.0000  0.9167  2.0000 30.0000 25.0000 48.0000 63.0000 39.0000 53.0000
## [10] 71.0000 47.0000 18.0000 24.0000 26.0000 80.0000      NA 24.0000 50.0000
## [19] 32.0000 36.0000

table(titanic3$age==NA)

## < table of extent 0 >

3==NA

## [1] NA

is.na(3)

## [1] FALSE

table(is.na(titanic3$age))

## 
## FALSE  TRUE 
##  1046   263

## select those with missing igf1 value
subset(juul, tanner %in% c(1,5) & is.na(igf1) & (age<5|age>18))

##        age menarche sex igf1 tanner testvol
## 630   2.64        1   2   NA      1      NA
## 1225 18.09        2   2   NA      5      NA
## 1239 18.44        2   2   NA      5      NA

#quantile(juul$age, prob=c(0.025,0.25,0.5,0.75,0.975)) # gives an error
quantile(juul$age, prob=c(0.025,0.25,0.5,0.75,0.975), na.rm=TRUE)

##     2.5%      25%      50%      75%    97.5% 
##  2.88525  9.05250 12.56000 16.85500 51.48175

with(juul, table(sex, menarche))

##    menarche
## sex   1   2
##   1   0   0
##   2 369 335

with(juul, table(sex, menarche, useNA="always"))

##       menarche
## sex      1   2 <NA>
##   1      0   0  621
##   2    369 335    9
##   <NA>   0   0    5

Factors

DiseaseState <- factor(c("Cancer", "Cancer", "Normal"))
DiseaseState

## [1] Cancer Cancer Normal
## Levels: Cancer Normal

str(DiseaseState)

##  Factor w/ 2 levels "Cancer","Normal": 1 1 2

levels(DiseaseState)

## [1] "Cancer" "Normal"

typeof(DiseaseState) # the internal coding is via integers

## [1] "integer"

is.factor(DiseaseState)

## [1] TRUE

help(factor)
## change labels
factor(c("Cancer", "Cancer", "Normal"), labels=c("Dis","Norm"))

## [1] Dis  Dis  Norm
## Levels: Dis Norm

## change ordering of levels
DiseaseState2 <- factor(c("Cancer", "Cancer", "Normal"),levels=c("Normal","Cancer"))
table(DiseaseState)

## DiseaseState
## Cancer Normal 
##      2      1

table(DiseaseState2)

## DiseaseState2
## Normal Cancer 
##      1      2

## can also use relevel function
relevel(DiseaseState, ref="Normal")

## [1] Cancer Cancer Normal
## Levels: Normal Cancer

## see what changes internally
as.numeric(DiseaseState)

## [1] 1 1 2

as.numeric(relevel(DiseaseState, ref="Normal"))

## [1] 2 2 1

Dates

as.Date("15 April 1912", "%d %b %Y")

## [1] "1912-04-15"

julian(as.Date("15 April 1912", "%d %b %Y"))

## [1] -21080
## attr(,"origin")
## [1] "1970-01-01"

titanic3$dob <- as.Date("15 April 1912", "%d %b %Y")+ (-titanic3$age*365.25)
as.Date("15041912", "%d%m%Y")

## [1] "1912-04-15"

as.Date("150412", "%d%m%y")

## [1] "2012-04-15"

format(as.Date("1912April15", "%Y%b%d"),"%A %B %d, %Y")

## [1] "Monday April 15, 1912"

install.packages("lubridate")

## package 'lubridate' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages

library(lubridate)

## Warning: package 'lubridate' was built under R version 4.0.4

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

wday(as.Date("1912April15", "%Y%b%d"),label=TRUE)

## [1] Mon
## Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat

Swirl

#install.packages("swirl")
#library(swirl)
#install_from_swirl("R Programming")
#swirl()

Graphics

Basic graphics

x <- (0:100)/10
plot(x, x^3 - 13 * x^2 + 39 * x)

plot(x, x^3 - 13 * x^2 + 39 * x,cex.axis=1.5,cex.lab=1.5)

plot(x,x^3-13*x^2+39*x,type="l",xlab="time (hours)",ylab="temperature",main="Enhanced plot",cex.axis=1.5,cex.lab=1.5)

colors()

##   [1] "white"                "aliceblue"            "antiquewhite"        
##   [4] "antiquewhite1"        "antiquewhite2"        "antiquewhite3"       
##   [7] "antiquewhite4"        "aquamarine"           "aquamarine1"         
##  [10] "aquamarine2"          "aquamarine3"          "aquamarine4"         
##  [13] "azure"                "azure1"               "azure2"              
##  [16] "azure3"               "azure4"               "beige"               
##  [19] "bisque"               "bisque1"              "bisque2"             
##  [22] "bisque3"              "bisque4"              "black"               
##  [25] "blanchedalmond"       "blue"                 "blue1"               
##  [28] "blue2"                "blue3"                "blue4"               
##  [31] "blueviolet"           "brown"                "brown1"              
##  [34] "brown2"               "brown3"               "brown4"              
##  [37] "burlywood"            "burlywood1"           "burlywood2"          
##  [40] "burlywood3"           "burlywood4"           "cadetblue"           
##  [43] "cadetblue1"           "cadetblue2"           "cadetblue3"          
##  [46] "cadetblue4"           "chartreuse"           "chartreuse1"         
##  [49] "chartreuse2"          "chartreuse3"          "chartreuse4"         
##  [52] "chocolate"            "chocolate1"           "chocolate2"          
##  [55] "chocolate3"           "chocolate4"           "coral"               
##  [58] "coral1"               "coral2"               "coral3"              
##  [61] "coral4"               "cornflowerblue"       "cornsilk"            
##  [64] "cornsilk1"            "cornsilk2"            "cornsilk3"           
##  [67] "cornsilk4"            "cyan"                 "cyan1"               
##  [70] "cyan2"                "cyan3"                "cyan4"               
##  [73] "darkblue"             "darkcyan"             "darkgoldenrod"       
##  [76] "darkgoldenrod1"       "darkgoldenrod2"       "darkgoldenrod3"      
##  [79] "darkgoldenrod4"       "darkgray"             "darkgreen"           
##  [82] "darkgrey"             "darkkhaki"            "darkmagenta"         
##  [85] "darkolivegreen"       "darkolivegreen1"      "darkolivegreen2"     
##  [88] "darkolivegreen3"      "darkolivegreen4"      "darkorange"          
##  [91] "darkorange1"          "darkorange2"          "darkorange3"         
##  [94] "darkorange4"          "darkorchid"           "darkorchid1"         
##  [97] "darkorchid2"          "darkorchid3"          "darkorchid4"         
## [100] "darkred"              "darksalmon"           "darkseagreen"        
## [103] "darkseagreen1"        "darkseagreen2"        "darkseagreen3"       
## [106] "darkseagreen4"        "darkslateblue"        "darkslategray"       
## [109] "darkslategray1"       "darkslategray2"       "darkslategray3"      
## [112] "darkslategray4"       "darkslategrey"        "darkturquoise"       
## [115] "darkviolet"           "deeppink"             "deeppink1"           
## [118] "deeppink2"            "deeppink3"            "deeppink4"           
## [121] "deepskyblue"          "deepskyblue1"         "deepskyblue2"        
## [124] "deepskyblue3"         "deepskyblue4"         "dimgray"             
## [127] "dimgrey"              "dodgerblue"           "dodgerblue1"         
## [130] "dodgerblue2"          "dodgerblue3"          "dodgerblue4"         
## [133] "firebrick"            "firebrick1"           "firebrick2"          
## [136] "firebrick3"           "firebrick4"           "floralwhite"         
## [139] "forestgreen"          "gainsboro"            "ghostwhite"          
## [142] "gold"                 "gold1"                "gold2"               
## [145] "gold3"                "gold4"                "goldenrod"           
## [148] "goldenrod1"           "goldenrod2"           "goldenrod3"          
## [151] "goldenrod4"           "gray"                 "gray0"               
## [154] "gray1"                "gray2"                "gray3"               
## [157] "gray4"                "gray5"                "gray6"               
## [160] "gray7"                "gray8"                "gray9"               
## [163] "gray10"               "gray11"               "gray12"              
## [166] "gray13"               "gray14"               "gray15"              
## [169] "gray16"               "gray17"               "gray18"              
## [172] "gray19"               "gray20"               "gray21"              
## [175] "gray22"               "gray23"               "gray24"              
## [178] "gray25"               "gray26"               "gray27"              
## [181] "gray28"               "gray29"               "gray30"              
## [184] "gray31"               "gray32"               "gray33"              
## [187] "gray34"               "gray35"               "gray36"              
## [190] "gray37"               "gray38"               "gray39"              
## [193] "gray40"               "gray41"               "gray42"              
## [196] "gray43"               "gray44"               "gray45"              
## [199] "gray46"               "gray47"               "gray48"              
## [202] "gray49"               "gray50"               "gray51"              
## [205] "gray52"               "gray53"               "gray54"              
## [208] "gray55"               "gray56"               "gray57"              
## [211] "gray58"               "gray59"               "gray60"              
## [214] "gray61"               "gray62"               "gray63"              
## [217] "gray64"               "gray65"               "gray66"              
## [220] "gray67"               "gray68"               "gray69"              
## [223] "gray70"               "gray71"               "gray72"              
## [226] "gray73"               "gray74"               "gray75"              
## [229] "gray76"               "gray77"               "gray78"              
## [232] "gray79"               "gray80"               "gray81"              
## [235] "gray82"               "gray83"               "gray84"              
## [238] "gray85"               "gray86"               "gray87"              
## [241] "gray88"               "gray89"               "gray90"              
## [244] "gray91"               "gray92"               "gray93"              
## [247] "gray94"               "gray95"               "gray96"              
## [250] "gray97"               "gray98"               "gray99"              
## [253] "gray100"              "green"                "green1"              
## [256] "green2"               "green3"               "green4"              
## [259] "greenyellow"          "grey"                 "grey0"               
## [262] "grey1"                "grey2"                "grey3"               
## [265] "grey4"                "grey5"                "grey6"               
## [268] "grey7"                "grey8"                "grey9"               
## [271] "grey10"               "grey11"               "grey12"              
## [274] "grey13"               "grey14"               "grey15"              
## [277] "grey16"               "grey17"               "grey18"              
## [280] "grey19"               "grey20"               "grey21"              
## [283] "grey22"               "grey23"               "grey24"              
## [286] "grey25"               "grey26"               "grey27"              
## [289] "grey28"               "grey29"               "grey30"              
## [292] "grey31"               "grey32"               "grey33"              
## [295] "grey34"               "grey35"               "grey36"              
## [298] "grey37"               "grey38"               "grey39"              
## [301] "grey40"               "grey41"               "grey42"              
## [304] "grey43"               "grey44"               "grey45"              
## [307] "grey46"               "grey47"               "grey48"              
## [310] "grey49"               "grey50"               "grey51"              
## [313] "grey52"               "grey53"               "grey54"              
## [316] "grey55"               "grey56"               "grey57"              
## [319] "grey58"               "grey59"               "grey60"              
## [322] "grey61"               "grey62"               "grey63"              
## [325] "grey64"               "grey65"               "grey66"              
## [328] "grey67"               "grey68"               "grey69"              
## [331] "grey70"               "grey71"               "grey72"              
## [334] "grey73"               "grey74"               "grey75"              
## [337] "grey76"               "grey77"               "grey78"              
## [340] "grey79"               "grey80"               "grey81"              
## [343] "grey82"               "grey83"               "grey84"              
## [346] "grey85"               "grey86"               "grey87"              
## [349] "grey88"               "grey89"               "grey90"              
## [352] "grey91"               "grey92"               "grey93"              
## [355] "grey94"               "grey95"               "grey96"              
## [358] "grey97"               "grey98"               "grey99"              
## [361] "grey100"              "honeydew"             "honeydew1"           
## [364] "honeydew2"            "honeydew3"            "honeydew4"           
## [367] "hotpink"              "hotpink1"             "hotpink2"            
## [370] "hotpink3"             "hotpink4"             "indianred"           
## [373] "indianred1"           "indianred2"           "indianred3"          
## [376] "indianred4"           "ivory"                "ivory1"              
## [379] "ivory2"               "ivory3"               "ivory4"              
## [382] "khaki"                "khaki1"               "khaki2"              
## [385] "khaki3"               "khaki4"               "lavender"            
## [388] "lavenderblush"        "lavenderblush1"       "lavenderblush2"      
## [391] "lavenderblush3"       "lavenderblush4"       "lawngreen"           
## [394] "lemonchiffon"         "lemonchiffon1"        "lemonchiffon2"       
## [397] "lemonchiffon3"        "lemonchiffon4"        "lightblue"           
## [400] "lightblue1"           "lightblue2"           "lightblue3"          
## [403] "lightblue4"           "lightcoral"           "lightcyan"           
## [406] "lightcyan1"           "lightcyan2"           "lightcyan3"          
## [409] "lightcyan4"           "lightgoldenrod"       "lightgoldenrod1"     
## [412] "lightgoldenrod2"      "lightgoldenrod3"      "lightgoldenrod4"     
## [415] "lightgoldenrodyellow" "lightgray"            "lightgreen"          
## [418] "lightgrey"            "lightpink"            "lightpink1"          
## [421] "lightpink2"           "lightpink3"           "lightpink4"          
## [424] "lightsalmon"          "lightsalmon1"         "lightsalmon2"        
## [427] "lightsalmon3"         "lightsalmon4"         "lightseagreen"       
## [430] "lightskyblue"         "lightskyblue1"        "lightskyblue2"       
## [433] "lightskyblue3"        "lightskyblue4"        "lightslateblue"      
## [436] "lightslategray"       "lightslategrey"       "lightsteelblue"      
## [439] "lightsteelblue1"      "lightsteelblue2"      "lightsteelblue3"     
## [442] "lightsteelblue4"      "lightyellow"          "lightyellow1"        
## [445] "lightyellow2"         "lightyellow3"         "lightyellow4"        
## [448] "limegreen"            "linen"                "magenta"             
## [451] "magenta1"             "magenta2"             "magenta3"            
## [454] "magenta4"             "maroon"               "maroon1"             
## [457] "maroon2"              "maroon3"              "maroon4"             
## [460] "mediumaquamarine"     "mediumblue"           "mediumorchid"        
## [463] "mediumorchid1"        "mediumorchid2"        "mediumorchid3"       
## [466] "mediumorchid4"        "mediumpurple"         "mediumpurple1"       
## [469] "mediumpurple2"        "mediumpurple3"        "mediumpurple4"       
## [472] "mediumseagreen"       "mediumslateblue"      "mediumspringgreen"   
## [475] "mediumturquoise"      "mediumvioletred"      "midnightblue"        
## [478] "mintcream"            "mistyrose"            "mistyrose1"          
## [481] "mistyrose2"           "mistyrose3"           "mistyrose4"          
## [484] "moccasin"             "navajowhite"          "navajowhite1"        
## [487] "navajowhite2"         "navajowhite3"         "navajowhite4"        
## [490] "navy"                 "navyblue"             "oldlace"             
## [493] "olivedrab"            "olivedrab1"           "olivedrab2"          
## [496] "olivedrab3"           "olivedrab4"           "orange"              
## [499] "orange1"              "orange2"              "orange3"             
## [502] "orange4"              "orangered"            "orangered1"          
## [505] "orangered2"           "orangered3"           "orangered4"          
## [508] "orchid"               "orchid1"              "orchid2"             
## [511] "orchid3"              "orchid4"              "palegoldenrod"       
## [514] "palegreen"            "palegreen1"           "palegreen2"          
## [517] "palegreen3"           "palegreen4"           "paleturquoise"       
## [520] "paleturquoise1"       "paleturquoise2"       "paleturquoise3"      
## [523] "paleturquoise4"       "palevioletred"        "palevioletred1"      
## [526] "palevioletred2"       "palevioletred3"       "palevioletred4"      
## [529] "papayawhip"           "peachpuff"            "peachpuff1"          
## [532] "peachpuff2"           "peachpuff3"           "peachpuff4"          
## [535] "peru"                 "pink"                 "pink1"               
## [538] "pink2"                "pink3"                "pink4"               
## [541] "plum"                 "plum1"                "plum2"               
## [544] "plum3"                "plum4"                "powderblue"          
## [547] "purple"               "purple1"              "purple2"             
## [550] "purple3"              "purple4"              "red"                 
## [553] "red1"                 "red2"                 "red3"                
## [556] "red4"                 "rosybrown"            "rosybrown1"          
## [559] "rosybrown2"           "rosybrown3"           "rosybrown4"          
## [562] "royalblue"            "royalblue1"           "royalblue2"          
## [565] "royalblue3"           "royalblue4"           "saddlebrown"         
## [568] "salmon"               "salmon1"              "salmon2"             
## [571] "salmon3"              "salmon4"              "sandybrown"          
## [574] "seagreen"             "seagreen1"            "seagreen2"           
## [577] "seagreen3"            "seagreen4"            "seashell"            
## [580] "seashell1"            "seashell2"            "seashell3"           
## [583] "seashell4"            "sienna"               "sienna1"             
## [586] "sienna2"              "sienna3"              "sienna4"             
## [589] "skyblue"              "skyblue1"             "skyblue2"            
## [592] "skyblue3"             "skyblue4"             "slateblue"           
## [595] "slateblue1"           "slateblue2"           "slateblue3"          
## [598] "slateblue4"           "slategray"            "slategray1"          
## [601] "slategray2"           "slategray3"           "slategray4"          
## [604] "slategrey"            "snow"                 "snow1"               
## [607] "snow2"                "snow3"                "snow4"               
## [610] "springgreen"          "springgreen1"         "springgreen2"        
## [613] "springgreen3"         "springgreen4"         "steelblue"           
## [616] "steelblue1"           "steelblue2"           "steelblue3"          
## [619] "steelblue4"           "tan"                  "tan1"                
## [622] "tan2"                 "tan3"                 "tan4"                
## [625] "thistle"              "thistle1"             "thistle2"            
## [628] "thistle3"             "thistle4"             "tomato"              
## [631] "tomato1"              "tomato2"              "tomato3"             
## [634] "tomato4"              "turquoise"            "turquoise1"          
## [637] "turquoise2"           "turquoise3"           "turquoise4"          
## [640] "violet"               "violetred"            "violetred1"          
## [643] "violetred2"           "violetred3"           "violetred4"          
## [646] "wheat"                "wheat1"               "wheat2"              
## [649] "wheat3"               "wheat4"               "whitesmoke"          
## [652] "yellow"               "yellow1"              "yellow2"             
## [655] "yellow3"              "yellow4"              "yellowgreen"

plot(x,x^3-13*x^2+39*x,pch=18,xlab="time (hours)",ylab="temperature",col="red",main="Enhanced plot",cex.axis=1.5,cex.lab=1.5)

palette()

## [1] "black"   "#DF536B" "#61D04F" "#2297E6" "#28E2E5" "#CD0BBC" "#F5C710"
## [8] "gray62"

plot(x,x^3-13*x^2+39*x,pch=18,xlab="time (hours)",ylab="temperature",col=3,main="Enhanced plot",cex.axis=1.5,cex.lab=1.5)

plot(1:25, pch=1:25,cex=2,bg="grey")

x<-(0:100)/10
plot(x,x^3-13*x^2+39*x,type="l",xlab="time (hours)",ylab="temperature",cex.axis=1.5,cex.lab=1.5)
points(2,34,col="red",pch=16,cex=2)
arrows(4,50,2.2,34.5)
text(4.15,50,"local maximum",adj=0,col="blue",cex=1.5)
lines(x,30-50*sin(x/2),col="blue")
legend(x=0,y=80,legend=c("Sahara","Gobi"),lty=1,col=c("black","blue"),cex=1.5)

?par
par("bg")

## [1] "white"

par(bg="green")
par("bg")

## [1] "green"

# rerun the plot commands
x<-(0:100)/10
plot(x,x^3-13*x^2+39*x,type="l",xlab="time (hours)",ylab="temperature",cex.axis=1.5,cex.lab=1.5)
points(2,34,col="red",pch=16,cex=2)
arrows(4,50,2.2,34.5)
text(4.15,50,"local maximum",adj=0,col="blue",cex=1.5)
lines(x,30-50*sin(x/2),col="blue")
legend(x=0,y=80,legend=c("Sahara","Gobi"),lty=1,col=c("black","blue"),cex=1.5)

plot(1:10)
par(bg="white")
plot(1:10)

# save default graphical parameters to be able to restore them later on
par.defaults <- par(no.readonly=TRUE)
par(bg="green")
par("bg")

## [1] "green"

plot(1:10)
par(par.defaults)
par("bg")

## [1] "white"

plot(1:10)

x<-(0:100)/10
plot(x,x^3-13*x^2+39*x,type="l",xlab= "time (hours)",ylab="temperature",lwd=3,las=1,cex.axis=1.5,cex.lab=1.5)

hist(titanic3$age,breaks=15,freq=FALSE,cex.axis=1.5,cex.lab=1.5)

boxplot(titanic3$fare,ylim=c(0,300),ylab="fare",cex.axis=1.5,cex.lab=1.5)

boxplot(fare ~ pclass,data=titanic3,ylim=c(0,300),ylab="fare",cex.axis=1.5,cex.lab=1.5)
plot(fare ~ pclass,data=titanic3,ylim=c(0,300),ylab="fare",cex.axis=1.5,cex.lab=1.5)

Advanced graphics

# graphics
plot(fare~age,data=titanic3) # very skewed distribution

plot(log10(fare)~age,data=titanic3) # plot log10(y) vs x

plot(fare~age,data=titanic3,log="y") # original data, but log scale

## Warning in xy.coords(x, y, xlabel, ylabel, log): 17 y values <= 0 omitted from
## logarithmic plot

min(titanic3$fare,na.rm=TRUE) # zero values!

## [1] 0

sum(titanic3$fare==0,na.rm=TRUE) # zero values!

## [1] 17

install.packages("ggplot2")

## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.4

qplot(age,fare,data=titanic3)

## Warning: Removed 264 rows containing missing values (geom_point).

qplot(age,fare,data=titanic3) + scale_y_log10() # log scale for y values

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 264 rows containing missing values (geom_point).

qplot(age,fare,data=titanic3,facets=~pclass) # one plot per pclass

## Warning: Removed 264 rows containing missing values (geom_point).

qplot(age,fare,data=titanic3,facets=~pclass,ylim=c(0,280)) # better for comparison

## Warning: Removed 268 rows containing missing values (geom_point).

qplot(age,fare,data=titanic3,facets=sex~pclass) + scale_y_log10() # even better

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 264 rows containing missing values (geom_point).

qplot(age,fare,data=titanic3,facets=pclass~sex) + scale_y_log10() # better for comparing males and females per class

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 264 rows containing missing values (geom_point).

#install.packages("devtools")
#library(devtools)
#install_github("rCharts", "ramnathv", ref = "dev") 
#library(rCharts)
#titanic3$survived = factor(titanic3$survived, labels=c("no","yes"))
#rPlot(fare ~  age| sex + pclass, data = titanic3, type = "point", color = "survived", size = list(const = 2))

#dt <- dTable(titanic3,sPaginationType=  "full_numbers")
#dt

## Open the PDF device
pdf("titanic3_ggplot_v1.pdf")
qplot(age,fare,data=titanic3,facets=pclass~sex) + scale_y_log10()

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 264 rows containing missing values (geom_point).

## Close the device again to save the file
dev.off()

## png 
##   2

qplot(age,fare,data=titanic3,facets=pclass~sex) + scale_y_log10()

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 264 rows containing missing values (geom_point).

dev.print("titanic3_ggplot_v2.pdf",device=pdf)

## png 
##   2

# Change size, for pdf device width and height are specified in inches
pdf("titanic3_ggplot_v1_big.pdf", width=21, height=21)
qplot(age,fare,data=titanic3,facets=pclass~sex) + scale_y_log10()

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 264 rows containing missing values (geom_point).

## Close the device again to save the file
dev.off()

## png 
##   2

Structure of R

search()

##  [1] ".GlobalEnv"        "package:ggplot2"   "package:lubridate"
##  [4] "package:ISwR"      "package:readxl"    "package:foreign"  
##  [7] "package:stats"     "package:graphics"  "package:grDevices"
## [10] "package:utils"     "package:datasets"  "package:methods"  
## [13] "Autoloads"         "package:base"

ls() # objects in Workspace

##  [1] "A"             "a2"            "age"           "AMC"          
##  [5] "c"             "DiseaseState"  "DiseaseState2" "good.morning" 
##  [9] "indx"          "m"             "name"          "par.defaults" 
## [13] "pclass"        "r"             "survived"      "titanic"      
## [17] "titanic3"      "tmp"           "x"

ls("package:stats")[1:20] # some objects in stats package

##  [1] "acf"                  "acf2AR"               "add.scope"           
##  [4] "add1"                 "addmargins"           "aggregate"           
##  [7] "aggregate.data.frame" "aggregate.ts"         "AIC"                 
## [10] "alias"                "anova"                "ansari.test"         
## [13] "aov"                  "approx"               "approxfun"           
## [16] "ar"                   "ar.burg"              "ar.mle"              
## [19] "ar.ols"               "ar.yw"

ls("package:base")[1:30] # some objects in base package

##  [1] "-"                 "-.Date"            "-.POSIXt"         
##  [4] "!"                 "!.hexmode"         "!.octmode"        
##  [7] "!="                "$"                 "$.DLLInfo"        
## [10] "$.package_version" "$<-"               "$<-.data.frame"   
## [13] "%%"                "%*%"               "%/%"              
## [16] "%in%"              "%o%"               "%x%"              
## [19] "&"                 "&&"                "&.hexmode"        
## [22] "&.octmode"         "("                 "*"                
## [25] "*.difftime"        "/"                 "/.difftime"       
## [28] ":"                 "::"                ":::"

## hierarchical structure and duplicate names
data(quakes) # from package:datasets
## new quakes object in Workspace hides the other
quakes <- 1:10 
quakes

##  [1]  1  2  3  4  5  6  7  8  9 10

## removing it makes the other visible again
rm(quakes)
quakes$mag # original quakes not overwritten!

##    [1] 4.8 4.2 5.4 4.1 4.0 4.0 4.8 4.4 4.7 4.3 4.4 4.6 4.4 4.4 6.1 4.3 6.0 4.5
##   [19] 4.4 4.4 4.5 4.2 4.4 4.7 5.4 4.0 4.6 5.2 4.5 4.4 4.6 4.7 4.8 4.0 4.5 4.3
##   [37] 4.5 4.6 4.1 4.4 4.7 4.6 4.4 4.3 4.6 4.9 4.5 4.4 4.3 5.1 4.2 4.0 4.6 4.3
##   [55] 4.2 4.4 4.5 4.0 4.4 4.3 4.7 4.1 5.0 4.6 4.9 4.7 4.1 5.0 4.5 5.5 4.0 4.5
##   [73] 4.3 5.2 4.4 4.3 4.1 4.5 4.2 5.3 5.2 4.5 4.6 4.3 4.0 4.3 4.7 4.5 4.2 4.3
##   [91] 5.1 4.7 5.2 4.2 4.2 4.0 4.5 5.2 5.1 4.7 4.1 4.6 4.7 4.7 4.6 4.2 4.4 4.6
##  [109] 5.7 5.0 4.5 4.2 4.0 4.8 4.4 4.2 5.3 4.7 4.8 4.2 4.8 4.3 4.7 4.5 4.4 5.1
##  [127] 4.2 5.0 4.8 4.3 4.5 4.2 4.5 4.6 4.3 4.7 5.1 4.6 4.9 4.2 4.6 4.0 5.0 4.4
##  [145] 4.2 4.2 4.4 4.9 5.3 4.0 5.7 6.4 4.3 4.2 4.7 4.7 4.2 4.3 4.9 4.6 4.1 4.8
##  [163] 4.6 4.6 4.8 5.0 5.6 5.3 4.7 4.5 4.3 4.6 4.1 4.1 4.1 5.7 5.0 4.5 4.1 4.6
##  [181] 4.5 4.3 4.4 4.2 4.1 4.9 4.3 4.9 4.6 4.6 5.3 4.7 4.6 4.1 4.6 4.2 4.6 4.5
##  [199] 4.3 5.2 4.3 4.0 4.7 4.5 4.5 4.3 5.2 4.5 4.7 4.2 4.7 4.3 4.1 5.4 4.3 4.3
##  [217] 4.2 4.6 4.4 4.2 4.7 4.6 4.9 4.2 4.5 4.9 4.2 4.5 5.0 5.0 4.7 4.3 4.4 4.9
##  [235] 4.5 4.0 4.4 5.0 4.7 4.8 4.5 4.1 5.3 4.8 5.0 4.6 4.3 4.7 5.3 4.2 4.7 4.5
##  [253] 5.1 4.9 4.4 4.2 4.6 4.7 4.2 4.9 5.1 4.7 4.4 4.4 4.2 4.9 4.6 4.4 4.6 4.5
##  [271] 4.4 4.9 4.1 4.2 5.7 4.6 5.0 4.3 4.5 5.2 4.4 4.2 4.6 4.0 4.4 4.7 4.2 4.7
##  [289] 4.5 5.0 5.0 4.8 4.6 4.6 5.0 5.0 5.6 4.0 4.0 4.6 4.4 4.8 4.7 4.6 4.3 4.7
##  [307] 4.1 4.9 4.6 4.8 4.9 5.1 5.4 4.5 4.9 4.4 4.1 5.3 4.5 4.9 4.8 5.2 4.4 4.4
##  [325] 4.9 4.4 4.2 4.6 4.6 5.3 5.3 4.5 4.4 5.0 5.1 4.5 4.5 5.4 4.5 4.7 4.2 4.8
##  [343] 4.5 4.3 4.3 4.3 4.6 4.5 5.0 4.6 4.6 4.9 4.1 5.5 4.7 5.0 5.1 5.5 4.6 4.7
##  [361] 4.2 4.0 5.4 4.3 4.4 4.6 5.1 4.5 4.2 4.1 5.1 5.4 5.1 5.1 4.4 5.7 4.4 5.1
##  [379] 4.6 5.5 5.1 4.4 5.0 5.0 5.1 5.1 4.2 4.5 4.0 4.9 4.3 4.8 4.4 4.2 4.9 4.2
##  [397] 5.3 5.0 5.7 5.3 4.7 4.6 4.3 5.4 4.4 4.3 4.3 4.7 4.4 4.2 4.5 4.8 4.8 4.3
##  [415] 4.4 5.1 4.4 4.4 4.3 4.8 4.3 4.5 4.1 5.1 4.7 4.6 4.7 4.6 4.4 4.2 4.1 4.7
##  [433] 4.0 4.8 4.4 4.3 4.4 4.5 4.5 4.7 4.3 4.7 4.8 4.6 5.1 4.7 4.3 5.1 5.5 4.5
##  [451] 4.3 4.3 4.7 4.3 4.3 4.6 4.5 4.5 5.4 4.6 4.4 5.0 5.2 4.4 5.1 4.4 4.4 4.8
##  [469] 4.4 4.7 4.7 4.4 4.3 5.0 4.5 4.6 5.4 4.6 4.5 4.4 4.5 4.1 4.0 4.9 4.1 5.2
##  [487] 4.4 4.1 4.9 4.6 4.5 4.8 4.2 4.2 4.1 5.5 4.4 4.8 4.2 4.8 4.9 4.6 4.5 4.4
##  [505] 4.6 4.2 4.7 4.4 4.5 4.9 4.7 5.5 4.7 4.6 4.1 4.7 4.6 4.3 4.4 4.7 4.3 4.1
##  [523] 4.7 4.5 5.5 4.5 4.6 5.1 4.4 4.7 5.5 4.6 4.0 4.6 4.8 4.7 4.7 4.2 5.4 4.6
##  [541] 5.5 4.2 4.5 4.5 4.8 4.6 5.4 4.6 5.0 4.3 4.3 4.8 4.7 4.7 4.8 4.2 4.2 5.9
##  [559] 4.6 4.5 4.9 4.7 4.9 5.3 4.2 4.2 4.5 5.2 4.5 5.6 5.2 4.8 4.5 5.0 4.4 4.5
##  [577] 4.6 4.5 5.2 5.1 4.8 4.3 5.1 4.7 4.6 4.8 4.6 4.4 4.6 5.1 4.3 4.4 4.7 4.8
##  [595] 4.1 4.6 4.9 4.0 4.7 4.7 5.2 4.6 4.6 4.9 5.7 4.7 4.4 4.9 4.8 4.6 4.4 4.8
##  [613] 4.7 4.2 5.1 4.8 4.9 5.1 4.3 4.4 4.7 4.7 5.3 5.1 4.9 4.5 4.7 4.2 5.1 4.8
##  [631] 4.2 4.8 4.4 4.5 4.3 5.6 4.0 5.0 4.2 4.6 4.6 4.5 5.0 4.7 4.6 4.8 4.6 4.4
##  [649] 5.6 4.2 5.4 4.8 5.6 4.4 4.7 4.6 5.1 4.6 4.5 4.2 4.8 4.9 5.5 5.0 4.2 5.2
##  [667] 4.8 4.4 4.8 4.4 4.1 4.9 4.6 4.5 5.3 4.8 4.6 4.8 4.7 4.9 5.3 4.4 4.6 4.3
##  [685] 4.4 4.2 4.1 4.2 5.0 4.1 4.3 5.2 4.2 4.5 4.4 4.4 5.0 4.0 4.8 5.0 4.2 5.2
##  [703] 5.2 4.5 4.3 4.5 4.6 5.1 4.4 4.3 4.7 5.6 4.9 5.1 4.6 4.4 4.8 4.2 4.7 4.1
##  [721] 4.7 4.0 4.9 5.0 4.5 4.4 4.0 4.5 4.7 4.4 4.7 4.7 4.0 4.2 4.5 4.7 4.5 4.1
##  [739] 4.6 4.3 4.6 5.2 4.6 4.7 5.0 5.2 4.4 4.6 4.5 4.0 4.2 5.3 5.9 4.9 4.5 4.4
##  [757] 5.4 5.3 5.1 4.1 4.3 4.5 4.1 5.1 5.3 4.5 4.1 4.4 4.7 4.0 5.1 4.0 4.3 4.3
##  [775] 4.0 4.1 4.4 4.3 4.2 4.0 4.5 4.7 5.0 4.3 5.1 4.7 5.3 5.0 4.5 5.0 4.1 4.9
##  [793] 4.1 4.0 4.4 4.5 4.4 4.5 4.3 4.3 5.0 4.5 4.5 4.4 4.5 4.1 4.9 4.7 4.3 4.6
##  [811] 4.6 5.2 4.5 4.3 4.6 4.0 4.5 4.8 4.6 4.1 4.9 4.7 4.1 4.3 4.4 4.0 4.8 4.4
##  [829] 4.4 4.6 4.2 4.1 4.2 4.0 4.1 4.1 4.7 4.8 5.1 5.0 4.8 4.4 5.0 5.2 4.2 4.1
##  [847] 4.8 4.5 5.0 5.1 4.1 4.7 5.2 4.8 4.5 4.0 4.7 4.1 4.3 4.3 4.0 4.7 4.1 4.2
##  [865] 4.8 4.8 4.2 4.2 5.7 6.0 4.2 4.5 4.9 4.4 4.0 4.3 4.2 4.5 4.8 4.4 4.2 4.2
##  [883] 5.0 4.2 5.2 4.5 4.6 5.0 5.0 5.4 4.8 4.2 5.5 4.3 4.1 4.4 4.9 4.5 4.9 4.0
##  [901] 4.5 5.0 4.9 4.5 4.2 4.2 4.2 5.2 4.3 5.1 4.6 4.6 4.4 4.4 4.9 5.1 4.2 4.7
##  [919] 4.0 5.6 5.3 5.0 4.3 4.1 5.1 4.5 4.9 5.4 4.7 4.8 4.5 4.2 4.6 4.6 5.6 5.4
##  [937] 4.0 5.2 4.2 4.7 4.3 4.8 4.6 5.4 4.8 4.6 4.2 5.5 4.7 4.7 4.5 5.5 4.5 4.1
##  [955] 4.0 4.3 4.7 4.9 4.5 4.7 4.5 4.8 4.3 4.1 5.4 4.1 4.8 4.2 4.8 5.4 4.4 5.2
##  [973] 4.7 4.9 4.9 4.2 4.4 4.4 4.3 4.9 5.0 4.7 4.9 4.3 4.5 4.2 5.2 4.8 4.0 4.7
##  [991] 4.3 4.3 4.9 4.0 4.2 4.4 4.7 4.5 4.5 6.0

find("quakes")

## [1] "package:datasets"

save(titanic3, file="Titanic.RData")
rm(titanic3)
load("Titanic.RData")

getwd()

## [1] "C:/Users/pdmoerland/Dropbox/education/Computing in R"

#setwd("c:/AMC/Teaching/RCursus") # just an example, note that a forward slash is used

Data management

## sort
install.packages("dplyr")

## also installing the dependency 'lifecycle'

## package 'lifecycle' successfully unpacked and MD5 sums checked
## package 'dplyr' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'dplyr'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying C:
## \Users\pdmoerland\R\R-4.0.0\library\00LOCK\dplyr\libs\x64\dplyr.dll to C:
## \Users\pdmoerland\R\R-4.0.0\library\dplyr\libs\x64\dplyr.dll: Permission denied

## Warning: restored 'dplyr'

## 
## The downloaded binary packages are in
##  C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

titanic.SortedAge <- arrange(titanic3, age)
head(titanic.SortedAge)

##      pclass survived                              name    sex    age sibsp
## 764     3rd        1 Dean, Miss. Elizabeth Gladys \\"M female 0.1667     1
## 748     3rd        0   Danbom, Master. Gilbert Sigvard   male 0.3333     0
## 1241    3rd        1   Thomas, Master. Assad Alexander   male 0.4167     0
## 428     2nd        1         Hamalainen, Master. Viljo   male 0.6667     1
## 658     3rd        1            Baclini, Miss. Eugenie female 0.7500     2
## 659     3rd        1     Baclini, Miss. Helene Barbara female 0.7500     2
##      parch    ticket    fare cabin    embarked boat body
## 764      2 C.A. 2315 20.5750       Southampton   10   NA
## 748      2    347080 14.4000       Southampton        NA
## 1241     1      2625  8.5167         Cherbourg   16   NA
## 428      1    250649 14.5000       Southampton    4   NA
## 658      1      2666 19.2583         Cherbourg    C   NA
## 659      1      2666 19.2583         Cherbourg    C   NA
##                       home.dest        dob family agecat
## 764  Devon, England Wichita, KS 1912-02-14    yes (0,18]
## 748                 Stanton, IA 1911-12-15    yes (0,18]
## 1241                            1911-11-14    yes (0,18]
## 428                 Detroit, MI 1911-08-15    yes (0,18]
## 658          Syria New York, NY 1911-07-16    yes (0,18]
## 659          Syria New York, NY 1911-07-16    yes (0,18]

## base R
x <- c(5,8,1)
sort(x)

## [1] 1 5 8

order(x)

## [1] 3 1 2

x[order(x)]

## [1] 1 5 8

z <- data.frame(x=c(5,8,1), y=c("A","M","C"))
z

##   x y
## 1 5 A
## 2 8 M
## 3 1 C

z[sort(z$x),]

##       x    y
## 1     5    A
## NA   NA <NA>
## NA.1 NA <NA>

z[order(z$x),]

##   x y
## 3 1 C
## 1 5 A
## 2 8 M

## merge
emb <- data.frame(embarked=c("Cherbourg","Queenstown","Southampton"), country=c("France","Ireland","United Kingdom"))
emb

##      embarked        country
## 1   Cherbourg         France
## 2  Queenstown        Ireland
## 3 Southampton United Kingdom

titanic3 <- left_join(titanic3, emb, by="embarked") # all rows from titanic 3, all columns from both
head(titanic3)

##   pclass survived                            name    sex     age sibsp parch
## 1    1st        1   Allen, Miss. Elisabeth Walton female 29.0000     0     0
## 2    1st        1  Allison, Master. Hudson Trevor   male  0.9167     1     2
## 3    1st        0    Allison, Miss. Helen Loraine female  2.0000     1     2
## 4    1st        0 Allison, Mr. Hudson Joshua Crei   male 30.0000     1     2
## 5    1st        0 Allison, Mrs. Hudson J C (Bessi female 25.0000     1     2
## 6    1st        1             Anderson, Mr. Harry   male 48.0000     0     0
##   ticket     fare   cabin    embarked boat body                       home.dest
## 1  24160 211.3375      B5 Southampton    2   NA                    St Louis, MO
## 2 113781 151.5500 C22 C26 Southampton   11   NA Montreal, PQ / Chesterville, ON
## 3 113781 151.5500 C22 C26 Southampton        NA Montreal, PQ / Chesterville, ON
## 4 113781 151.5500 C22 C26 Southampton       135 Montreal, PQ / Chesterville, ON
## 5 113781 151.5500 C22 C26 Southampton        NA Montreal, PQ / Chesterville, ON
## 6  19952  26.5500     E12 Southampton    3   NA                    New York, NY
##          dob family  agecat        country
## 1 1883-04-14     no (18,30] United Kingdom
## 2 1911-05-16    yes  (0,18] United Kingdom
## 3 1910-04-15    yes  (0,18] United Kingdom
## 4 1882-04-14    yes (18,30] United Kingdom
## 5 1887-04-14    yes (18,30] United Kingdom
## 6 1864-04-14     no (40,80] United Kingdom

## base R
titanic3 <- merge(titanic3, emb)

## wide <-> long
install.packages("tidyr")

## also installing the dependency 'cpp11'

## package 'cpp11' successfully unpacked and MD5 sums checked
## package 'tidyr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages

library(tidyr)

## Warning: package 'tidyr' was built under R version 4.0.4

df1 <- data.frame(c("a", "a", "a", "a", "b", "b", "b", "b"), c(1,2,3,4,1,2,3,4), c(0.1, 0.2, 0.3, 0.4, 2.1, 2.2, 2.3, 2.4))
colnames(df1) <-  c("Name", "Time", "Value")
df1

##   Name Time Value
## 1    a    1   0.1
## 2    a    2   0.2
## 3    a    3   0.3
## 4    a    4   0.4
## 5    b    1   2.1
## 6    b    2   2.2
## 7    b    3   2.3
## 8    b    4   2.4

df1.wide <- spread(df1, Time, Value)
df1.wide

##   Name   1   2   3   4
## 1    a 0.1 0.2 0.3 0.4
## 2    b 2.1 2.2 2.3 2.4

df1.long <- gather(df1.wide, "Time", "Value", 2:5)
df1.long

##   Name Time Value
## 1    a    1   0.1
## 2    b    1   2.1
## 3    a    2   0.2
## 4    b    2   2.2
## 5    a    3   0.3
## 6    b    3   2.3
## 7    a    4   0.4
## 8    b    4   2.4

arrange(df1.long, Name, Time)

##   Name Time Value
## 1    a    1   0.1
## 2    a    2   0.2
## 3    a    3   0.3
## 4    a    4   0.4
## 5    b    1   2.1
## 6    b    2   2.2
## 7    b    3   2.3
## 8    b    4   2.4

## base R
df1.wide <- reshape(df1, timevar="Time", idvar="Name", direction="wide", v.names="Value")
df1.wide

##   Name Value.1 Value.2 Value.3 Value.4
## 1    a     0.1     0.2     0.3     0.4
## 5    b     2.1     2.2     2.3     2.4

df1.long <- reshape(df1.wide, varying=2:5, direction="long")
df1.long

##     Name time Value id
## 1.1    a    1   0.1  1
## 2.1    b    1   2.1  2
## 1.2    a    2   0.2  1
## 2.2    b    2   2.2  2
## 1.3    a    3   0.3  1
## 2.3    b    3   2.3  2
## 1.4    a    4   0.4  1
## 2.4    b    4   2.4  2

df1.long[order(df1.long$Name,df1.long$time),]

##     Name time Value id
## 1.1    a    1   0.1  1
## 1.2    a    2   0.2  1
## 1.3    a    3   0.3  1
## 1.4    a    4   0.4  1
## 2.1    b    1   2.1  2
## 2.2    b    2   2.2  2
## 2.3    b    3   2.3  2
## 2.4    b    4   2.4  2

## transformation using within
titanic3 <- within(titanic3, {
  agecat <- cut(age, breaks=c(0,18,30,40,80))
  family <- factor(ifelse(parch==0 & sibsp==0, "no", "yes"))
})
summary(titanic3[,c("age","agecat","family")])

##       age              agecat    family   
##  Min.   : 0.1667   (0,18] :193   no :788  
##  1st Qu.:21.0000   (18,30]:416   yes:519  
##  Median :28.0000   (30,40]:209            
##  Mean   :29.8426   (40,80]:226            
##  3rd Qu.:39.0000   NA's   :263            
##  Max.   :80.0000                          
##  NA's   :263

## transformation using transform
titanic3 <- transform(titanic3,
  agecat = cut(age, breaks=c(0,18,30,40,80)),
  family = factor(ifelse(parch==0 & sibsp==0, "no", "yes"))
)
summary(titanic3[,c("age","agecat","family")])

##       age              agecat    family   
##  Min.   : 0.1667   (0,18] :193   no :788  
##  1st Qu.:21.0000   (18,30]:416   yes:519  
##  Median :28.0000   (30,40]:209            
##  Mean   :29.8426   (40,80]:226            
##  3rd Qu.:39.0000   NA's   :263            
##  Max.   :80.0000                          
##  NA's   :263

# crosstable using descr package
install.packages("descr")

## package 'descr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages

library(descr)

## Warning: package 'descr' was built under R version 4.0.4

descr(titanic3$age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1667 21.0000 28.0000 29.8426 39.0000 80.0000     263

with(titanic3, CrossTable(pclass,sex,format="SPSS"))

##    Cell Contents 
## |-------------------------|
## |                   Count | 
## | Chi-square contribution | 
## |             Row Percent | 
## |          Column Percent | 
## |           Total Percent | 
## |-------------------------|
## 
## ================================
##           sex
## pclass    female    male   Total
## --------------------------------
## 1st         142     179     321 
##           6.900   3.798         
##            44.2%   55.8%   24.6%
##            30.6%   21.2%        
##            10.9%   13.7%        
## --------------------------------
## 2nd         106     171     277 
##           0.597   0.329         
##            38.3%   61.7%   21.2%
##            22.8%   20.3%        
##             8.1%   13.1%        
## --------------------------------
## 3rd         216     493     709 
##           5.064   2.787         
##            30.5%   69.5%   54.2%
##            46.6%   58.5%        
##            16.5%   37.7%        
## --------------------------------
## Total       464     843    1307 
##            35.5%   64.5%        
## ================================

## summary by subgroup, all produce basically the same output
## basic R functions
aggregate(age~pclass, data=titanic3, FUN=summary)

##   pclass age.Min. age.1st Qu. age.Median age.Mean age.3rd Qu. age.Max.
## 1    1st  0.91670    28.00000   39.00000 39.08304    49.75000 80.00000
## 2    2nd  0.66670    22.00000   29.00000 29.50670    36.00000 70.00000
## 3    3rd  0.16670    18.00000   24.00000 24.81637    32.00000 74.00000

by(titanic3,titanic3$pclass,FUN=function(x) summary(x$age))

## titanic3$pclass: 1st
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.9167 28.0000 39.0000 39.0830 49.7500 80.0000      39 
## ------------------------------------------------------------ 
## titanic3$pclass: 2nd
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.6667 22.0000 29.0000 29.5067 36.0000 70.0000      16 
## ------------------------------------------------------------ 
## titanic3$pclass: 3rd
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1667 18.0000 24.0000 24.8164 32.0000 74.0000     208

with(titanic3, tapply(age,pclass,FUN=summary))

## $`1st`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.9167 28.0000 39.0000 39.0830 49.7500 80.0000      39 
## 
## $`2nd`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.6667 22.0000 29.0000 29.5067 36.0000 70.0000      16 
## 
## $`3rd`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1667 18.0000 24.0000 24.8164 32.0000 74.0000     208

## using doBy package
install.packages("doBy")

## package 'doBy' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\pdmoerland\AppData\Local\Temp\RtmpkfCiIZ\downloaded_packages

library(doBy)

## Warning: package 'doBy' was built under R version 4.0.4

## 
## Attaching package: 'doBy'

## The following object is masked from 'package:dplyr':
## 
##     order_by

summaryBy(age~pclass, data=titanic3, FUN=summary)

##   pclass age.Min. age.1st Qu. age.Median age.Mean age.3rd Qu. age.Max. age.NA's
## 1    1st   0.9167          28         39 39.08304       49.75       80       39
## 2    2nd   0.6667          22         29 29.50670       36.00       70       16
## 3    3rd   0.1667          18         24 24.81637       32.00       74      208

Documentation and help

help(summary)
fit.age <- lm(age ~ pclass, data=titanic3)
summary(fit.age)

## 
## Call:
## lm(formula = age ~ pclass, data = titanic3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.166  -7.816  -0.816   7.917  49.184 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.0830     0.7814  50.018   <2e-16 ***
## pclass2nd    -9.5763     1.1270  -8.497   <2e-16 ***
## pclass3rd   -14.2667     0.9768 -14.605   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.12 on 1041 degrees of freedom
##   (263 observations deleted due to missingness)
## Multiple R-squared:  0.1702, Adjusted R-squared:  0.1686 
## F-statistic: 106.8 on 2 and 1041 DF,  p-value: < 2.2e-16

class(fit.age)

## [1] "lm"

help(summary.lm)

help(descriptive)

## No documentation for 'descriptive' in specified packages and libraries:
## you could try '??descriptive'

#help.search("descriptive statistics") # same as ??descriptive
#RSiteSearch("{descriptive statistics}") # braces to indicate that words are to be searched as one entity
#install.packages("sos")
#library(sos)
#findFn("{descriptive statistics}")

Model fitting; formulas

help.search("Student")
help(t.test)
with(titanic3,t.test(age[sex=="male"],age[sex=="female"]))

## 
##  Welch Two Sample t-test
## 
## data:  age[sex == "male"] and age[sex == "female"]
## t = 2.172, df = 796.3, p-value = 0.03015
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1933196 3.8238645
## sample estimates:
## mean of x mean of y 
##  30.58523  28.57664

t.test(subset(titanic3,sex=="male")$age,subset(titanic3,sex=="female")$age)

## 
##  Welch Two Sample t-test
## 
## data:  subset(titanic3, sex == "male")$age and subset(titanic3, sex == "female")$age
## t = 2.172, df = 796.3, p-value = 0.03015
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1933196 3.8238645
## sample estimates:
## mean of x mean of y 
##  30.58523  28.57664

test.age <- t.test(age ~ sex, data = titanic3)
test.age

## 
##  Welch Two Sample t-test
## 
## data:  age by sex
## t = -2.172, df = 796.3, p-value = 0.03015
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.8238645 -0.1933196
## sample estimates:
## mean in group female   mean in group male 
##             28.57664             30.58523

fit.age <- lm(age ~ pclass, data=titanic3)
fit.age

## 
## Call:
## lm(formula = age ~ pclass, data = titanic3)
## 
## Coefficients:
## (Intercept)    pclass2nd    pclass3rd  
##      39.083       -9.576      -14.267

summary(fit.age)

## 
## Call:
## lm(formula = age ~ pclass, data = titanic3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.166  -7.816  -0.816   7.917  49.184 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.0830     0.7814  50.018   <2e-16 ***
## pclass2nd    -9.5763     1.1270  -8.497   <2e-16 ***
## pclass3rd   -14.2667     0.9768 -14.605   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.12 on 1041 degrees of freedom
##   (263 observations deleted due to missingness)
## Multiple R-squared:  0.1702, Adjusted R-squared:  0.1686 
## F-statistic: 106.8 on 2 and 1041 DF,  p-value: < 2.2e-16

## output of model is a list
str(fit.age)

## List of 14
##  $ coefficients : Named num [1:3] 39.08 -9.58 -14.27
##   ..- attr(*, "names")= chr [1:3] "(Intercept)" "pclass2nd" "pclass3rd"
##  $ residuals    : Named num [1:1044] 8.184 0.917 11.917 -15.816 16.917 ...
##   ..- attr(*, "names")= chr [1:1044] "1" "2" "3" "4" ...
##  $ effects      : Named num [1:1044] -964.24 -6.27 -191.64 -15.77 16.13 ...
##   ..- attr(*, "names")= chr [1:1044] "(Intercept)" "pclass2nd" "pclass3rd" "" ...
##  $ rank         : int 3
##  $ fitted.values: Named num [1:1044] 24.8 39.1 39.1 24.8 39.1 ...
##   ..- attr(*, "names")= chr [1:1044] "1" "2" "3" "4" ...
##  $ assign       : int [1:3] 0 1 1
##  $ qr           :List of 5
##   ..$ qr   : num [1:1044, 1:3] -32.311 0.0309 0.0309 0.0309 0.0309 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:1044] "1" "2" "3" "4" ...
##   .. .. ..$ : chr [1:3] "(Intercept)" "pclass2nd" "pclass3rd"
##   .. ..- attr(*, "assign")= int [1:3] 0 1 1
##   .. ..- attr(*, "contrasts")=List of 1
##   .. .. ..$ pclass: chr "contr.treatment"
##   ..$ qraux: num [1:3] 1.03 1.02 1.05
##   ..$ pivot: int [1:3] 1 2 3
##   ..$ tol  : num 1e-07
##   ..$ rank : int 3
##   ..- attr(*, "class")= chr "qr"
##  $ df.residual  : int 1041
##  $ na.action    : 'omit' Named int [1:263] 5 13 27 37 39 45 47 48 59 71 ...
##   ..- attr(*, "names")= chr [1:263] "5" "13" "27" "37" ...
##  $ contrasts    :List of 1
##   ..$ pclass: chr "contr.treatment"
##  $ xlevels      :List of 1
##   ..$ pclass: chr [1:3] "1st" "2nd" "3rd"
##  $ call         : language lm(formula = age ~ pclass, data = titanic3)
##  $ terms        :Classes 'terms', 'formula'  language age ~ pclass
##   .. ..- attr(*, "variables")= language list(age, pclass)
##   .. ..- attr(*, "factors")= int [1:2, 1] 0 1
##   .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. ..$ : chr [1:2] "age" "pclass"
##   .. .. .. ..$ : chr "pclass"
##   .. ..- attr(*, "term.labels")= chr "pclass"
##   .. ..- attr(*, "order")= int 1
##   .. ..- attr(*, "intercept")= int 1
##   .. ..- attr(*, "response")= int 1
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##   .. ..- attr(*, "predvars")= language list(age, pclass)
##   .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "factor"
##   .. .. ..- attr(*, "names")= chr [1:2] "age" "pclass"
##  $ model        :'data.frame':   1044 obs. of  2 variables:
##   ..$ age   : num [1:1044] 33 40 51 9 56 19 27 27 45 56 ...
##   ..$ pclass: Factor w/ 3 levels "1st","2nd","3rd": 3 1 1 3 1 3 1 1 1 1 ...
##   ..- attr(*, "terms")=Classes 'terms', 'formula'  language age ~ pclass
##   .. .. ..- attr(*, "variables")= language list(age, pclass)
##   .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
##   .. .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. .. ..$ : chr [1:2] "age" "pclass"
##   .. .. .. .. ..$ : chr "pclass"
##   .. .. ..- attr(*, "term.labels")= chr "pclass"
##   .. .. ..- attr(*, "order")= int 1
##   .. .. ..- attr(*, "intercept")= int 1
##   .. .. ..- attr(*, "response")= int 1
##   .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##   .. .. ..- attr(*, "predvars")= language list(age, pclass)
##   .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "factor"
##   .. .. .. ..- attr(*, "names")= chr [1:2] "age" "pclass"
##   ..- attr(*, "na.action")= 'omit' Named int [1:263] 5 13 27 37 39 45 47 48 59 71 ...
##   .. ..- attr(*, "names")= chr [1:263] "5" "13" "27" "37" ...
##  - attr(*, "class")= chr "lm"

names(fit.age)

##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "na.action"     "contrasts"     "xlevels"       "call"         
## [13] "terms"         "model"

summary(fit.age$residuals)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -38.1663  -7.8164  -0.8164   0.0000   7.9170  49.1836

## or
summary(resid(fit.age))

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -38.1663  -7.8164  -0.8164   0.0000   7.9170  49.1836

hist(resid(fit.age))

coef(fit.age)

## (Intercept)   pclass2nd   pclass3rd 
##   39.083038   -9.576333  -14.266671

predict(fit.age, newdata=data.frame(pclass=factor(levels(titanic3$pclass))))

##        1        2        3 
## 39.08304 29.50670 24.81637

update(fit.age,. ~ .+sex)

## 
## Call:
## lm(formula = age ~ pclass + sex, data = titanic3)
## 
## Coefficients:
## (Intercept)    pclass2nd    pclass3rd      sexmale  
##      37.060       -9.840      -14.875        3.778

## the output has a specific class, in this case lm
class(fit.age)

## [1] "lm"

## which functions have been written for this class?
help(methods)
methods(class="lm")

##  [1] add1           alias          anova          case.names     coerce        
##  [6] confint        cooks.distance deviance       dfbeta         dfbetas       
## [11] drop1          dummy.coef     effects        esticon        extractAIC    
## [16] family         formula        fortify        hatvalues      influence     
## [21] initialize     kappa          labels         linest         logLik        
## [26] model.frame    model.matrix   nobs           plot           predict       
## [31] print          proj           qr             residuals      rstandard     
## [36] rstudent       show           simulate       slotsFromS3    summary       
## [41] variable.names vcov          
## see '?methods' for accessing help and source code

## we want to know what the summary function does with an object of class lm 
help(summary) # does not give the desired help file
## we need to consult the help file for summary.lm
methods(summary)

##  [1] summary,ANY-method             summary,DBIObject-method      
##  [3] summary,diagonalMatrix-method  summary,sparseMatrix-method   
##  [5] summary.aov                    summary.aovlist*              
##  [7] summary.aspell*                summary.check_packages_in_dir*
##  [9] summary.connection             summary.data.frame            
## [11] summary.Date                   summary.default               
## [13] summary.Duration*              summary.ecdf*                 
## [15] summary.esticon_class*         summary.factor                
## [17] summary.ggplot*                summary.glm                   
## [19] summary.hcl_palettes*          summary.infl*                 
## [21] summary.Interval*              summary.linest_class*         
## [23] summary.linest_matrix_class*   summary.lm                    
## [25] summary.lmBy*                  summary.loess*                
## [27] summary.loglm*                 summary.manova                
## [29] summary.matrix                 summary.mlm*                  
## [31] summary.negbin*                summary.nls*                  
## [33] summary.packageStatus*         summary.Period*               
## [35] summary.polr*                  summary.POSIXct               
## [37] summary.POSIXlt                summary.ppr*                  
## [39] summary.prcomp*                summary.princomp*             
## [41] summary.proc_time              summary.rlang_error*          
## [43] summary.rlang_trace*           summary.rlm*                  
## [45] summary.shingle*               summary.srcfile               
## [47] summary.srcref                 summary.stepfun               
## [49] summary.stl*                   summary.table                 
## [51] summary.trellis*               summary.tukeysmooth*          
## [53] summary.vctrs_sclr*            summary.vctrs_vctr*           
## [55] summary.warnings              
## see '?methods' for accessing help and source code

help(summary.lm)
plot(fit.age)

plot(age ~ fare, data=titanic3)

## the type of plot is automaticaaly adapted to the type of variable
plot(age~pclass, data=titanic3)

Programming and ply functions

x <- 10
z <- if (x < 2) 4 else 3
z

## [1] 3

A <- matrix(1:10, 2, 5)
rownames(A) <- c("gene 1", "gene 2")
colnames(A) <- c("array 1", "array 2", "array 3","array 4", "array 5")
results <- numeric(2)
results

## [1] 0 0

for (i in 1:2) {
  results[i] <- mean(A[i, ])
}
results

## [1] 5 6

apply(A, 1, mean)

## gene 1 gene 2 
##      5      6

titanic3 <- read.dta("Exercises/titanic3.dta", convert.underscore=TRUE)
head(titanic3[,c("fare","pclass")])

##       fare pclass
## 1 211.3375    1st
## 2 151.5500    1st
## 3 151.5500    1st
## 4 151.5500    1st
## 5 151.5500    1st
## 6  26.5500    1st

with(titanic3,tapply(fare, pclass, mean,na.rm=T))

##      1st      2nd      3rd 
## 87.50899 21.17920 13.30289

# Solution using the dplyr package
#install.packages("dplyr")
library(dplyr)
summarise(group_by(titanic3, pclass), mean(fare,na.rm=TRUE))

## # A tibble: 3 x 2
##   pclass `mean(fare, na.rm = TRUE)`
## * <fct>                       <dbl>
## 1 1st                          87.5
## 2 2nd                          21.2
## 3 3rd                          13.3