Data visualization is a powerful tool for understanding your data and teasing out relationships. This lab will illustrate how to visualize the distribution of variables as well as associations between variables using base R
and the tidyverse
plotting package.
For this lab, we will use two important packages: - dplyr
to manage and manipulate our data - ggplot2
to actually produce the visualizations
To see all of the cool data visualization that you can do in R, visit the R Graph Gallery: https://www.r-graph-gallery.com/
#install.packages("ggplot2")
#install.packages('palmerpenguins')
library(ggplot2)
library(dplyr)
library(palmerpenguins)
The data for this lab can be accessed through a package called palmerpenguins
, which contains data on penguins’ species, island, and body dimensions. This dataset will not actually appear in the global environment when you load the palmerpenguins
library, but it is still accessible.
We will start by examining some descriptive statistics for each variable using the summary()
command.
summary(penguins)
## species island bill_length_mm bill_depth_mm
## Adelie :152 Biscoe :168 Min. :32.10 Min. :13.10
## Chinstrap: 68 Dream :124 1st Qu.:39.23 1st Qu.:15.60
## Gentoo :124 Torgersen: 52 Median :44.45 Median :17.30
## Mean :43.92 Mean :17.15
## 3rd Qu.:48.50 3rd Qu.:18.70
## Max. :59.60 Max. :21.50
## NA's :2 NA's :2
## flipper_length_mm body_mass_g sex year
## Min. :172.0 Min. :2700 female:165 Min. :2007
## 1st Qu.:190.0 1st Qu.:3550 male :168 1st Qu.:2007
## Median :197.0 Median :4050 NA's : 11 Median :2008
## Mean :200.9 Mean :4202 Mean :2008
## 3rd Qu.:213.0 3rd Qu.:4750 3rd Qu.:2009
## Max. :231.0 Max. :6300 Max. :2009
## NA's :2 NA's :2
plot(penguins$flipper_length_mm,
penguins$body_mass_g,
xlab = "Flipper length (mm)",
ylab = "Body Mass (g)",
main = "",
pch = 2, col = 'red')
pdf('WeightvsHeight.pdf', height = 4, width = 4)
plot(penguins$flipper_length_mm,
penguins$body_mass_g,
xlab = "Flipper length (mm)",
ylab = "Body Mass (g)",
main = "",
pch = 2, col = 'red')
dev.off()
## quartz_off_screen
## 2
#install.packages('scales', dependencies = T)
library(scales)
max(penguins$flipper_length_mm)
## [1] NA
max(penguins$flipper_length_mm, na.rm = TRUE)
## [1] 231
xlimits <- c(min(penguins$flipper_length_mm,
na.rm = TRUE)-1,
max(penguins$flipper_length_mm,
na.rm = TRUE)+1)
ylimits <- c(min(penguins$body_mass_g,
na.rm = TRUE)-1,
max(penguins$body_mass_g,
na.rm = TRUE)+1)
plot(penguins$flipper_length_mm[penguins$species == "Adelie"],
penguins$body_mass_g[penguins$species == "Adelie"],
xlim = xlimits,
ylim = ylimits,
xlab = "Flipper length (mm)",
ylab = "Body Mass (g)",
pch = 2, col = 'navyblue')
points(penguins$flipper_length_mm[penguins$species == "Chinstrap"],
penguins$body_mass_g[penguins$species == "Chinstrap"],
pch = 19, col = 'deeppink',
lwd = 2)
points(penguins$flipper_length_mm[penguins$species == "Gentoo"],
penguins$body_mass_g[penguins$species == "Gentoo"],
pch = 21, col = 'deepskyblue',
bg = alpha('deepskyblue', 0.5))
legend('topleft', pch = c(2,19,19),
lwd = c(1,2,1), lty = c(NA, NA,NA),
col = c("navyblue",
"deeppink",
alpha("deepskyblue",.5)),
legend = c("Adelie",
"Chinstrap",
"Gentoo"), bty = 'n')
par(mfrow = c(1,3))
plot(penguins$flipper_length_mm[penguins$species == "Adelie"],
penguins$body_mass_g[penguins$species == "Adelie"],
xlim = xlimits,
ylim = ylimits,
xlab = "Flipper length (mm)",
ylab = "Body Mass (g)",
main = "Adelie",
pch = 19, col = 'navyblue')
plot(penguins$flipper_length_mm[penguins$species == "Chinstrap"],
penguins$body_mass_g[penguins$species == "Chinstrap"],
xlim = xlimits,
ylim = ylimits,
xlab = "Flipper length (mm)",
ylab = "Body Mass (g)",
main = "Chinstrap",
pch = 19, col = 'deeppink')
plot(penguins$flipper_length_mm[penguins$species == "Gentoo"],
penguins$body_mass_g[penguins$species == "Gentoo"],
xlim = xlimits,
ylim = ylimits,
xlab = "Flipper length (mm)",
ylab = "Body Mass (g)",
main = 'Gentoo',
pch = 19, col = 'deepskyblue')
titles <- c("Adelie", "Chinstrap", "Gentoo")
colors <- c("navyblue", "deeppink", "deepskyblue")
pdf('MassvsFlipper_species.pdf', height = 4, width = 4)
for(i in 1:3){
par(mfrow = c(1,1))
plot(penguins$flipper_length_mm[penguins$species == titles[i]],
penguins$body_mass_g[penguins$species == titles[i]],
xlim = xlimits,
ylim = ylimits,
xlab = "Flipper length (mm)",
ylab = "Body Mass (g)",
main = titles[i],
pch = 2, col = colors[i])
}
dev.off()
## quartz_off_screen
## 2
?boxplot
.