::run_tutorial(
learnrname = "ObjectTypes2",
package = "DataScienceExercises",
shiny_args=list("launch.browser"=TRUE))
🗓️ Session 4: Advanced object types in R
In this video-based lecture you learn about the most important advanced object types in R. The two object types covered in this lecture, factors and data frames, are advanced in the sense that they can be thought of extensions of some of the basic object types you encountered before: factors and special kinds of integers, and data frames are special kinds of lists. This lecture concludes the first part of the lecture about the basics of R.
👨🏫 Lecture Slides
Either click on the slide area below or click here to download the slides.
The R script of this session
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Session Script on Advanced object types | |
# Digression | |
ff <- factor(c("F", "M", "M"), levels = c("F", "M", "D")) | |
attributes(ff) # See the class attribute 'factor' | |
typeof(ff) # It still remains an integer type... | |
class(ff) # but the class was changed | |
# Factors-------------------- | |
## Intro factors------------- | |
f_1 <- factor(c(rep("F", 4), rep("D", 5), rep("M", 3)), | |
levels = c("D", "F", "M")) | |
f_1 | |
## Mini exercise 1----------- | |
# What happens if we do not specify levels explicitly? | |
f_ex1 <- factor(c(rep("F", 2), rep("M", 3), rep("D", 3))) | |
f_ex1 # They are set automatically to all elements that occur at least once | |
levels(f_ex1) # Alternative to return levels as character | |
# What happens if the vector contains elements not pre-specified as levels? | |
f_ex2 <- factor(c(rep("F", 2), rep("M", 3), rep("D", 3)), | |
levels = c("F", "M")) | |
f_ex2 # Elements of vector are set to NA | |
## Ordered factors----------- | |
f_2 <- factor(c("high", "high", "low"), | |
levels = c("low", "mid", "high"), | |
ordered=TRUE) | |
f_2 | |
## Using table()------------- | |
# Gives a frequency table for a factor: | |
table(f_1) | |
table(f_ex1) | |
table(f_ex2) | |
# Sometimes, there is a category for which no elements are available (or they | |
# were removed). In this case, its good practice to set the levels explicitly | |
# such that there catories do not vanish: | |
f_3a <- factor(c(rep("M", 2), rep("D", 3))) | |
table(f_3a) # No females in the vector, but category not shown | |
f_3b <- factor(c(rep("M", 2), rep("D", 3)), | |
levels = c("D", "F", "M")) | |
table(f_3b) # Category now shown explicitly | |
# Data frames and tibbles---- | |
## Data frames--------------- | |
# Create from scratch | |
df_1 <- data.frame( | |
"gender" = c(rep("male", 3), rep("female", 2)), | |
"height" = c(189, 175, 180, 166, 150) | |
) | |
# Create from list | |
df_1b <- list( | |
"gender" = c(rep("male", 3), rep("female", 2)), | |
"height" = c(189, 175, 180, 166, 150) | |
) | |
df_1b | |
is.data.frame(df_1b) # False | |
df_1b <- as.data.frame(df_1b) | |
df_1b | |
is.data.frame(df_1b) # Now true | |
# In any case, data frames are fancy lists: | |
typeof(df_1) | |
## Tibbles------------------- | |
tb_1 <- tibble::as_tibble(df_1) | |
tb_1 | |
## Extracting columns-------- | |
# Whats the difference between using [ or [[ | |
tb_1["gender"] | |
tb_1[["gender"]] | |
## Useful functions---------- | |
dplyr::glimpse(tb_1) | |
head(tb_1, n = 2) | |
# Final exercises - slide 12 | |
# Create a factor with the levels "still", "medium" and "sparkling", | |
# and arbitrary instances of the three levels | |
f_ex3 <- factor(c(rep("still", 4), rep("medium", 5), rep("sparkling", 3))) | |
# Get the relative frequencies for “medium” of this factor | |
abs_freqs <- table(f_ex3) | |
n_elements <- length(f_ex3) | |
# Absolute freqs divided by total number: | |
round(abs_freqs / n_elements * 100, 2) | |
# Create a data frame with two columns, one called "nb" containing the | |
# numbers 1 to 5 as double, the other called "char" containing the | |
# numbers 6 to 10 as character | |
# Create columns | |
nb_ <- seq(1, 5) | |
char_ <- as.character(seq(6, 10)) | |
# Create data frame | |
df_s_a <- data.frame( | |
"nb"=nb_, | |
"char"=char_ | |
) | |
df_s <- data.frame( | |
"nb"=as.double(seq(1, 5)), | |
"char"=as.character(seq(6, 10)) | |
) | |
# Transform this data frame into a tibble! | |
tb_s <- tibble::as_tibble(df_s) | |
tb_s | |
# Extract the second column of this tibble such that you have a vector | |
tb_s[["char"]] | |
tb_s["char"] # Would have given a tibble |
🎥 Lecture videos
All the videos are available via this playlist.
Expand to access the videos directly
📚 Mandatory Reading
Read the following tutorials:
🏆 Further readings
I suggest you read these references after you learned about data frames in session 4 and data wrangling techniques in sessions 8 and 9.
- Chapter 16 in Wickham et al. (2023).
✍️ Coursework
- Do the
ObjectTypes2
exercises of the packageDataScienceExercises
Quick code for starting the exercises
- If you have questions or problems, please post them in the Moodle forum
References
Wickham, H., Çetinkaya-Rundel, M. and Grolemund, G. (2023) R for data science: Import, tidy, transform, visualize, and model data, 2nd edition., Beijing et al.: O’Reilly, available at https://r4ds.hadley.nz/.