resources – Page 3

Iteration

# let's grab a subset of the mtcars dataset
mtc <- mtcars[ , 1:3] # store the first three columns in a new object

#### basic approach:

mean(mtc$mpg)
mean(mtc$cyl)
mean(mtc$disp)

#### for loop approach:

output <- vector("double", ncol(mtc)) # pre-allocate an empty vector

# replace each value in the vector by the column mean using a for loop
for(i in seq_along(mtc)){
  output[i] <- mean(mtc[[i]])
}

# print the output
output

[1]  20.09062   6.18750 230.72188
ls() # inspect global environment

[1] "i" "mtc" "output"
rm(i, output) # remove clutter
#### functional programming approach:

col_mean <- function(df) {
  output <- vector("double", length(df))
  for (i in seq_along(df)) {
    output[i] <- mean(df[[i]])
  }
  output
}
col_mean(mtc)
col_mean(mtcars)
col_mean(iris[1:4])

[1]  20.09062   6.18750 230.72188

[1]  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750   0.437500   0.406250   3.687500   2.812500

[1] 5.843333 3.057333 3.758000 1.199333
ls()

[1] "col_mean" "mtc"   
#### apply approach:

# apply loops a function over the margin of a dataset
apply(mtc, MARGIN = 1, mean) # either by its rows (MARGIN = 1)
apply(mtc, MARGIN = 2, mean) # or over the columns (MARGIN = 2)

# in both cases apply returns the results in a vector
# sapply loops a function over the columns, returning the results in a vector
sapply(mtc, mean) 

  mpg       cyl      disp 
20.09062   6.18750 230.72188 
# lapply loops a function over the columns, returning the results in a list
lapply(mtc, mean)

$mpg
[1] 20.09062
$cyl
[1] 6.1875
$disp
[1] 230.7219
# tapply loops a function over a vector 
# grouping it by a second INDEX vector 
# and returning the results in a vector
tapply(mtc$mpg, INDEX = mtc$cyl, mean)

   4        6        8 
26.66364 19.74286 15.10000 
install.packages("purrr")
library("purrr")
# map returns a list.
map(mtc, mean) 
$mpg
[1] 20.09062
$cyl
[1] 6.1875
$disp
[1] 230.7219
# map_lgl returns a logical vector
# as numeric means aren't often logical, I had to call a different function
map_lgl(mtc, is.logical) # mtc's columns are numerical, hence FALSE
mpg   cyl  disp 
FALSE FALSE FALSE 
# map_int returns an integer vector
# as numeric means aren't often integers, I had to call a different function
map_int(mtc, is.integer) # returned FALSE, which is converted to integer (0)
mpg  cyl disp 
  0    0    0 
#map_dbl returns a double vector.
map_dbl(mtc, mean) 
  mpg       cyl      disp 
20.09062   6.18750 230.72188
# map_chr returns a character vector.
map_chr(mtc, mean) 
     mpg          cyl         disp 
"20.090625"   "6.187500" "230.721875" 

map_dbl(rbind(mtc, c(NA, NA, NA)), mean) # returns NA due to the row of missing values
map_dbl(rbind(mtc, c(NA, NA, NA)), mean, na.rm = TRUE) # handles those NAs
mpg  cyl disp 
 NA   NA   NA 

     mpg       cyl      disp 
20.09062   6.18750 230.72188 
mtc %>% 
  split(.$cyl) %>% 
  map(~ lm(mpg ~ disp, data = .)) 

$4
Call:
lm(formula = mpg ~ disp, data = .)
Coefficients:
(Intercept)         disp  
    40.8720      -0.1351  
$6
Call:
lm(formula = mpg ~ disp, data = .)
Coefficients:
(Intercept)         disp  
  19.081987     0.003605  
$8
Call:
lm(formula = mpg ~ disp, data = .)
Coefficients:
(Intercept)         disp  
   22.03280     -0.01963  
mtc %>% 
  split(.$cyl) %>% 
  map(~ lm(mpg ~ disp, data = .)) %>%
  map(summary) %>% # returns a list of linear model summaries
  map("coefficients") 
$4
              Estimate Std. Error   t value     Pr(>|t|)
(Intercept) 40.8719553 3.58960540 11.386197 1.202715e-06
disp        -0.1351418 0.03317161 -4.074021 2.782827e-03
$6
                Estimate Std. Error   t value    Pr(>|t|)
(Intercept) 19.081987419 2.91399289 6.5483988 0.001243968
disp         0.003605119 0.01555711 0.2317344 0.825929685
$8
               Estimate  Std. Error   t value     Pr(>|t|)
(Intercept) 22.03279891 3.345241115  6.586311 2.588765e-05
disp        -0.01963409 0.009315926 -2.107584 5.677488e-02
install.packages('swirl') #download swirl package 
library(swirl) #load in swirl package

1. Resources

2. Functional programming example

Share this:

Step 1: An R Folder (15 min)

Step 2: Handy Cheat Sheets (15 min)

Step 3: swirl Away in RStudio (8h)

Step 4: A Pirate’s Guide to R (10h)

Step 5: R for Data Science (16h)

Step 6: Specialize (∞)

Share this:

Share this:

Table of Contents (clickable)

Introductory R

Introductory Books

Online Courses

Style Guides

Advanced R

Package Development

Non-standard Evaluation

Functional Programming

Cheat Sheets

Data Manipulation

Data Visualization

Colors

Interactive / HTML / JavaScript widgets

ggplot2

ggplot2 extensions

Miscellaneous

Shiny, Dashboards, & Apps

Markdown & Other Output Formats

Cloud, Server, & Database

Statistical Modeling & Machine Learning

Books

Courses

Cheat sheets

Time series

Survival analysis

Bayesian

Miscellaneous

Natural Language Processing & Text Mining

Regular Expressions

Geographic & Spatial mapping

Bioinformatics & Computational Biology

Integrated Development Environments (IDEs) & Graphical User Inferfaces (GUIs)

R & other software and languages

R & Excel

R & Python

R & SQL

R Help, Connect, & Inspiration

R Blogs

R Conferences, Events, & Meetups

R Jobs

Share this:

Step 3: `swirl` Away in RStudio (8h)

Integrated Development Environments (IDEs) &
Graphical User Inferfaces (GUIs)