Plotting in R, continued

Data visualization is a powerful tool for understanding your data and teasing out relationships. This lab will illustrate how to visualize the distribution of variables as well as associations between variables using base R and the tidyverse plotting package.

For this lab, we will use two important packages: - dplyr to manage and manipulate our data - ggplot2 to actually produce the visualizations

To see all of the cool data visualization that you can do in R, visit the R Graph Gallery: https://www.r-graph-gallery.com/

#install.packages("ggplot2")
#install.packages('palmerpenguins')
library(ggplot2)
library(dplyr)
library(palmerpenguins)

The data for this lab can be accessed through a package called palmerpenguins, which contains data on penguins’ species, island, and body dimensions. This dataset will not actually appear in the global environment when you load the palmerpenguins library, but it is still accessible.

We will start by examining some descriptive statistics for each variable using the summary() command.

summary(penguins)
##       species          island    bill_length_mm  bill_depth_mm  
##  Adelie   :152   Biscoe   :168   Min.   :32.10   Min.   :13.10  
##  Chinstrap: 68   Dream    :124   1st Qu.:39.23   1st Qu.:15.60  
##  Gentoo   :124   Torgersen: 52   Median :44.45   Median :17.30  
##                                  Mean   :43.92   Mean   :17.15  
##                                  3rd Qu.:48.50   3rd Qu.:18.70  
##                                  Max.   :59.60   Max.   :21.50  
##                                  NA's   :2       NA's   :2      
##  flipper_length_mm  body_mass_g       sex           year     
##  Min.   :172.0     Min.   :2700   female:165   Min.   :2007  
##  1st Qu.:190.0     1st Qu.:3550   male  :168   1st Qu.:2007  
##  Median :197.0     Median :4050   NA's  : 11   Median :2008  
##  Mean   :200.9     Mean   :4202                Mean   :2008  
##  3rd Qu.:213.0     3rd Qu.:4750                3rd Qu.:2009  
##  Max.   :231.0     Max.   :6300                Max.   :2009  
##  NA's   :2         NA's   :2
plot(penguins$flipper_length_mm,
     penguins$body_mass_g,
     xlab = "Flipper length (mm)",
     ylab = "Body Mass (g)",
     main = "",
     pch = 2, col = 'red')

pdf('WeightvsHeight.pdf', height = 4, width = 4)
plot(penguins$flipper_length_mm,
     penguins$body_mass_g,
     xlab = "Flipper length (mm)",
     ylab = "Body Mass (g)",
     main = "",
     pch = 2, col = 'red')
dev.off()
## quartz_off_screen 
##                 2
#install.packages('scales', dependencies = T)
library(scales)
max(penguins$flipper_length_mm)
## [1] NA
max(penguins$flipper_length_mm, na.rm = TRUE)
## [1] 231
xlimits <- c(min(penguins$flipper_length_mm,
                  na.rm = TRUE)-1,
              max(penguins$flipper_length_mm,
                  na.rm = TRUE)+1)
ylimits <- c(min(penguins$body_mass_g,
                  na.rm = TRUE)-1,
              max(penguins$body_mass_g,
                  na.rm = TRUE)+1)

plot(penguins$flipper_length_mm[penguins$species == "Adelie"],
     penguins$body_mass_g[penguins$species == "Adelie"],
     xlim = xlimits,
     ylim = ylimits,
     xlab = "Flipper length (mm)",
     ylab = "Body Mass (g)",
     pch = 2, col = 'navyblue')
points(penguins$flipper_length_mm[penguins$species == "Chinstrap"],
       penguins$body_mass_g[penguins$species == "Chinstrap"],
       pch = 19, col = 'deeppink',
       lwd = 2)
points(penguins$flipper_length_mm[penguins$species == "Gentoo"],
       penguins$body_mass_g[penguins$species == "Gentoo"],
       pch = 21, col = 'deepskyblue',
       bg = alpha('deepskyblue', 0.5))
legend('topleft', pch = c(2,19,19), 
       lwd = c(1,2,1), lty = c(NA, NA,NA),
       col = c("navyblue",
               "deeppink",
               alpha("deepskyblue",.5)),
       legend = c("Adelie",
                  "Chinstrap",
                  "Gentoo"), bty = 'n')

par(mfrow = c(1,3))
plot(penguins$flipper_length_mm[penguins$species == "Adelie"],
     penguins$body_mass_g[penguins$species == "Adelie"],
     xlim = xlimits,
     ylim = ylimits,
     xlab = "Flipper length (mm)",
     ylab = "Body Mass (g)",
     main = "Adelie",
     pch = 19, col = 'navyblue')
plot(penguins$flipper_length_mm[penguins$species == "Chinstrap"],
     penguins$body_mass_g[penguins$species == "Chinstrap"],
     xlim = xlimits,
     ylim = ylimits,
     xlab = "Flipper length (mm)",
     ylab = "Body Mass (g)",
     main = "Chinstrap",
     pch = 19, col = 'deeppink')
plot(penguins$flipper_length_mm[penguins$species == "Gentoo"],
     penguins$body_mass_g[penguins$species == "Gentoo"],
     xlim = xlimits,
     ylim = ylimits,
     xlab = "Flipper length (mm)",
     ylab = "Body Mass (g)",
     main = 'Gentoo',
     pch = 19, col = 'deepskyblue')

titles <- c("Adelie", "Chinstrap", "Gentoo")
colors <- c("navyblue", "deeppink", "deepskyblue")

pdf('MassvsFlipper_species.pdf', height = 4, width = 4)
for(i in 1:3){
  par(mfrow = c(1,1))
  plot(penguins$flipper_length_mm[penguins$species == titles[i]],
     penguins$body_mass_g[penguins$species == titles[i]],
     xlim = xlimits,
     ylim = ylimits,
     xlab = "Flipper length (mm)",
     ylab = "Body Mass (g)",
     main = titles[i],
     pch = 2, col = colors[i])
}
dev.off()
## quartz_off_screen 
##                 2

Practice 2

  1. What is the mean bill height and depth in this dataset?
  2. If 50mm is long, how many long-billed penguins are in the total dataset (not just females only)?
  3. Create a new variable in the dataset to represent the ratio of bill length to depth.
  4. Plot your new variable in a boxplot and change the color of the inside of the boxplot. HINT: see ?boxplot.