::run_tutorial(
learnrname = "Visualization1",
package = "DataScienceExercises",
shiny_args=list("launch.browser"=TRUE))
ποΈ Session 7: Visualization
One area for which R is particulary well-known for is the area of visualization. This is particularly because of the package ggplot2. This session introduces ggplot2
and the general approach to generate visualization in R. The good thing is that if you follow the approach described here, you can basically create every visualization type you can think of.
π¨βπ« Lecture Slides
Either click on the slide area below or click here to download the slides.
The R script of this session
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(DataScienceExercises) | |
library(ggplot2) | |
library(scales) | |
# Bubble plot------ | |
gdp_data <- DataScienceExercises::gdplifexp2007 | |
# In the following, lines with changes are marked with '# <---' | |
# 1st step: empty list | |
gdp_plot <- ggplot() | |
gdp_plot | |
# 2nd step: add reference to underlying data set | |
gdp_plot <- ggplot(data = gdp_data) | |
gdp_plot | |
# 3rd step: add aesthetic mappings | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes(x = gdpPercap, y = lifeExp) # <--- | |
) | |
gdp_plot | |
# 4th step: add point geometry | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes(x = gdpPercap, y = lifeExp)) + | |
geom_point() # <--- | |
gdp_plot | |
# 5th step: add size aesthetic | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes( | |
y = lifeExp, | |
color = continent, | |
size = pop, # <--- | |
x = gdpPercap) | |
) + | |
geom_point() | |
gdp_plot | |
# 6th step: specify transparency of points and use different shape to | |
# get difference between color and fill aesthetic | |
# source for different shapes (scroll down to the bottom): | |
# https://ggplot2.tidyverse.org/reference/aes_linetype_size_shape.html | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes( | |
y = lifeExp, | |
fill = continent, # <--- | |
size = pop, | |
x = gdpPercap) | |
) + | |
geom_point(alpha=0.65, shape = 21) # <--- | |
gdp_plot | |
# 7th step: modify the scales for fill, size, and x aesthetic (i.e. the way | |
# the values are mapped on these aesthetics): | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes( | |
y = lifeExp, | |
fill = continent, | |
size = pop, | |
x = gdpPercap) | |
) + | |
geom_point(alpha=0.65, shape = 21) + | |
scale_fill_brewer(palette = "Dark2") + # <--- | |
scale_size_continuous(range = c(0.1, 21)) + # <--- | |
scale_x_continuous( | |
labels = label_number(scale = 0.001, suffix = "k") # <--- | |
) | |
gdp_plot | |
# 8th step: specify the labels for x and y axis, title, and add caption | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes( | |
y = lifeExp, | |
fill = continent, | |
size = pop, | |
x = gdpPercap) | |
) + | |
geom_point(alpha=0.65, shape = 21) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_size_continuous(range = c(0.1, 21)) + | |
scale_x_continuous( | |
labels = label_number(scale = 0.001, suffix = "k") | |
) + | |
labs( # <--- | |
x="GDP per capita", # <--- | |
y = "Life expectancy in years", # <--- | |
title = "Life expectancy and income per capita", # <--- | |
caption = "Data: Gapminder.") # <--- | |
gdp_plot | |
# 9th step: Remove label for size aesthetic | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes( | |
y = lifeExp, | |
fill = continent, | |
size = pop, | |
x = gdpPercap) | |
) + | |
geom_point(alpha=0.65, shape = 21) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_size_continuous(range = c(0.1, 21), guide = "none") + # <--- | |
scale_x_continuous( | |
labels = label_number(scale = 0.001, suffix = "k") | |
) + | |
labs( | |
x="GDP per capita", | |
y = "Life expectancy in years", | |
title = "Life expectancy and income per capita", | |
caption = "Data: Gapminder.") | |
gdp_plot | |
# 10th step: Use theme_bw() to fix background and other smaller plot issues | |
# theme_bw() is one of the many gggplot2() themes that summarize many | |
# changes to the function theme() into one call. See an overview, e.g., here: | |
# https://ggplot2-book.org/themes#sec-themes | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes( | |
y = lifeExp, | |
fill = continent, | |
size = pop, | |
x = gdpPercap) | |
) + | |
geom_point(alpha=0.65, shape = 21) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_size_continuous(range = c(0.1, 21), guide = "none") + | |
scale_x_continuous( | |
labels = label_number(scale = 0.001, suffix = "k") | |
) + | |
labs( | |
x="GDP per capita", | |
y = "Life expectancy in years", | |
title = "Life expectancy and income per capita", | |
caption = "Data: Gapminder.") + | |
theme_bw() + # <--- | |
theme( | |
legend.position = "bottom", # <--- | |
legend.title = element_blank(), # <--- | |
panel.border = element_blank(), # <--- | |
axis.line = element_line(colour = "grey"), # <--- | |
axis.ticks = element_blank() # <--- | |
) | |
gdp_plot | |
# 11th step: Use theme() to fix position of the legend and remove legend title | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes( | |
y = lifeExp, | |
fill = continent, | |
size = pop, | |
x = gdpPercap) | |
) + | |
geom_point(alpha=0.65, shape = 21) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_size_continuous(range = c(0.1, 21), guide = "none") + | |
scale_x_continuous( | |
labels = label_number(scale = 0.001, suffix = "k") | |
) + | |
labs( | |
x="GDP per capita", | |
y = "Life expectancy in years", | |
title = "Life expectancy and income per capita", | |
caption = "Data: Gapminder.") + | |
theme_bw() + | |
theme( | |
legend.position = "bottom", # <--- | |
legend.title = element_blank() # <--- | |
) | |
gdp_plot | |
# 12th step: Use theme() to fix the panel border and axis lines and ticks | |
gdp_plot <- ggplot( | |
data = gdp_data, | |
mapping = aes( | |
y = lifeExp, | |
fill = continent, | |
size = pop, | |
x = gdpPercap) | |
) + | |
geom_point(alpha=0.65, shape = 21) + | |
scale_fill_brewer(palette = "Dark2") + | |
scale_size_continuous(range = c(0.1, 21), guide = "none") + | |
scale_x_continuous( | |
labels = label_number(scale = 0.001, suffix = "k") | |
) + | |
labs( | |
x="GDP per capita", | |
y = "Life expectancy in years", | |
title = "Life expectancy and income per capita", | |
caption = "Data: Gapminder.") + | |
theme_bw() + | |
theme( | |
legend.position = "bottom", | |
legend.title = element_blank(), | |
panel.border = element_blank(), # <--- | |
axis.line = element_line(colour = "grey"), # <--- | |
axis.ticks = element_blank() # <--- | |
) | |
gdp_plot | |
# Line plot------ | |
gdp_data_time <- DataScienceExercises::aggGDPlifexp | |
# The following code is copy-pasted from above, with changes being marked by comments | |
gdp_line_plot <- ggplot( | |
data = gdp_data_time, # Change data set | |
mapping = aes(# Adjust mappings | |
y = gdpPercap, | |
color = continent, | |
x = year) | |
) + | |
geom_point(alpha=0.65) + # Remove shape specification | |
geom_line() + # Add lines to the plot | |
scale_color_brewer(palette = "Dark2") + # Change fill to color | |
# scale_size_continuous( # Not necessary any more | |
# range = c(0.1, 21), guide = "none") + | |
scale_y_continuous(# Change from scale_x | |
labels = scales::label_number(scale = 0.001, suffix = "k") | |
) + | |
labs(# Adjust labels | |
y="GDP per capita", | |
title = "Divergences in income", | |
caption = "Data: Gapminder.") + | |
theme_bw() + | |
theme( | |
legend.position = "bottom", | |
legend.title = element_blank(), | |
panel.border = element_blank(), | |
axis.line = element_line(colour = "grey"), | |
axis.ticks = element_blank(), | |
axis.title.x = element_blank() # Remove title for x axis | |
) | |
gdp_line_plot |
π₯ Lecture videos
Currently, no videos are available for this session. Check out the tutorial instead.
π Mandatory Reading
- The tutorial visualization
Further Reading
- Wickham (2010), who introduces the theory underlying
ggplot2
- ggplot2 cheat sheet
- Browse the website from Data to Viz and try to re-create some of the figures yourself
βοΈ Coursework
- Do the exercises
Visualization1
from theDataScienceExercises
package
Quick code for starting the exercises
References
Wickham, H. (2010) βA Layered Grammar of Graphics,β Journal of Computational and Graphical Statistics 19(1): 3β28.