The penguins are back! And this time, it’s for a Beak-Off. They are organising themselves into teams to see which team can show off the biggest range of beak lengths. Retrace our steps in this code-along workshop where we built a graph, made it better (and interactive!), and optimised the parameterised plotting function to address all the judges’ requirements.
Final code, but see below for a few tweaks based on the Q&A
Demo
First we create the function… (unfold to see all the details!)
Code
library(ggplot2)# Set up the datapenguin_df <- palmerpenguins::penguins_raw |> janitor::clean_names() |> dplyr::filter(!is.na(culmen_length_mm)) |> dplyr::mutate(species =gsub("(.)( )(.*)", "\\1", species))# Set up the theme functiontheme_beak_off <-function(base_text_size =16,base_font ="Work Sans",title_font ="Poppins") {theme_minimal(base_size = base_text_size) +theme(text =element_text(family = base_font),axis.text =element_text(colour ="#495058"),legend.position ="none",axis.title =element_blank(),plot.title =element_text(family = title_font,face ="bold",size =rel(1.5) ),panel.grid =element_line(colour ="#FFFFFF"),plot.background =element_rect(fill ="#F4F5F6", colour ="#F4F5F6"),plot.caption =element_text(size =rel(0.8),margin =margin(base_text_size /2, 0, 0, 0) ),plot.margin =margin(rep(base_text_size *1.5, 4)) )}# Create the plot functionmake_beak_off_plot <-function(df = penguin_df, team_name,grouping_variable ="species",base_font ="Work Sans",title_font ="Arial",text_size =16, ...) { df <- dplyr::filter(df, !is.na(get(grouping_variable))) beak_means_df <- df |> dplyr::group_by(get(grouping_variable)) |> dplyr::summarise(mean_length =mean(culmen_length_mm, na.rm =TRUE)) |> dplyr::rename(group =`get(grouping_variable)`) beak_range_df <- df |> dplyr::filter( culmen_length_mm ==max(culmen_length_mm, na.rm =TRUE) | culmen_length_mm ==min(culmen_length_mm, na.rm =TRUE) ) interactive_plot <- df |>ggplot(aes(x = culmen_length_mm, y =get(grouping_variable))) +geom_vline(data = beak_range_df,aes(xintercept = culmen_length_mm),linetype =3,colour ="#1A242F" ) +geom_segment(data = beak_means_df,aes(x = mean_length,xend = mean_length,y =-Inf,yend = group ),linetype =3 ) + ggtext::geom_textbox(data = beak_range_df,aes(y =max(df |> dplyr::pull(get(grouping_variable))),label = dplyr::case_when( culmen_length_mm ==min(culmen_length_mm) ~paste0("🞀 ", culmen_length_mm, "mm"),TRUE~paste0(culmen_length_mm, "mm", " 🞂") ),hjust = dplyr::case_when( culmen_length_mm ==min(culmen_length_mm) ~0,TRUE~1 ),halign = dplyr::case_when( culmen_length_mm ==min(culmen_length_mm) ~0,TRUE~1 ) ),family = base_font,colour ="#1A242F",fontface ="bold",fill =NA,box.padding =unit(0, "pt"),box.colour =NA,nudge_y =0.33 ) + ggtext::geom_textbox(data = beak_means_df,aes(x = mean_length,y = group,label =paste0( stringr::str_to_sentence(group)," mean<br>**", janitor::round_half_up(mean_length),"mm**" ) ),hjust =0,# Thanks to Martine Jansen for this suggestion to avoid the label# boxes overlapping with the lines beyond what's necessary.# (They're deliberately not transparent because that would make# it hard to read if there was a genuine overlap - topical!)width =unit(6, "lines"),nudge_y =-0.3,box.colour =NA,family = base_font,colour ="#1A242F",fill ="#F4F5F6" ) + ggiraph::geom_jitter_interactive(aes(x = culmen_length_mm,y =get(grouping_variable),fill = species,size = body_mass_g,tooltip =paste0("<b>", individual_id, "</b> from ", island) ),shape =21,width =0,height =0.15,colour ="#1A242F",stroke =0.5 ) +labs(title =paste0("Team ", team_name," - ",max(beak_range_df$culmen_length_mm) -min(beak_range_df$culmen_length_mm),"mm" ) ) +scale_fill_manual(values =c("Adelie"="#F49F03","Chinstrap"="#F4B9C4","Gentoo"="#11541F" ) ) +scale_x_continuous(label =function(x) paste0(x, "mm"),limits =c(30, 62) ) +theme_beak_off(base_font = base_font,title_font = title_font,base_text_size = text_size ) +theme(axis.text.y =element_blank(), ...) ggiraph::girafe(ggobj = interactive_plot,options =list(ggiraph::opts_tooltip(css =paste0("background-color:#1A242F;color:#F4F5F6;padding:7.5px;letter-spacing:0.025em;line-height:1.3;border-radius:5px;","font-family:", base_font,";" ) )),# Here I've specified the size of the output, which I didn't do# during the demo because the fig.height specified in the slides# takes care of thatwidth_svg =9,height_svg =9 )}
And now, all we need to do is this…
make_beak_off_plot(# Take a random sample of the data - the penguins didn't want# any long-term damage to their friendshipsdf = dplyr::slice_sample(penguin_df, n =150),# Give the team a nameteam ="Beak Buddies")
or this…
make_beak_off_plot(# Take a different random sample of the datadf = dplyr::slice_sample(penguin_df, n =150),# Give the team a nameteam ="Islands Matter Too",grouping_variable ="island")
or this…
make_beak_off_plot(# Take a smaller sample of the datadf = dplyr::slice_sample(penguin_df, n =25),# Give the team a nameteam ="Small and Mighty")
or this…
make_beak_off_plot(# This team wanted to be more deliberate about representationdf = penguin_df |> dplyr::filter(sex =="MALE") |> dplyr::slice_sample(n =25) |>rbind( penguin_df |> dplyr::filter(sex =="FEMALE") |> dplyr::slice_sample(n =25) ),# Give the team a nameteam ="Equal Beakqual",grouping_variable ="sex")
Have a play around with the function and see what you’d improve! May the best penguins win!