Visualise, Optimise, Parameterise!

The penguins are back! And this time, it’s for a Beak-Off. They are organising themselves into teams to see which team can show off the biggest range of beak lengths. Retrace our steps in this code-along workshop where we built a graph, made it better (and interactive!), and optimised the parameterised plotting function to address all the judges’ requirements.

Published

June 11, 2025

Recording

Slides

View full screen

Code snippets and resources shared during the workshop

Demo

First we create the function… (unfold to see all the details!)

Code
library(ggplot2)

# Set up the data
penguin_df <- palmerpenguins::penguins_raw |>
  janitor::clean_names() |>
  dplyr::filter(!is.na(culmen_length_mm)) |>
  dplyr::mutate(species = gsub("(.)( )(.*)", "\\1", species))

# Set up the theme function
theme_beak_off <- function(
  base_text_size = 16,
  base_font = "Work Sans",
  title_font = "Poppins"
) {
  theme_minimal(base_size = base_text_size) +
    theme(
      text = element_text(family = base_font),
      axis.text = element_text(colour = "#495058"),
      legend.position = "none",
      axis.title = element_blank(),
      plot.title = element_text(
        family = title_font,
        face = "bold",
        size = rel(1.5)
      ),
      panel.grid = element_line(colour = "#FFFFFF"),
      plot.background = element_rect(fill = "#F4F5F6", colour = "#F4F5F6"),
      plot.caption = element_text(
        size = rel(0.8),
        margin = margin(base_text_size / 2, 0, 0, 0)
      ),
      plot.margin = margin(rep(base_text_size * 1.5, 4))
    )
}

# Create the plot function

make_beak_off_plot <- function(
  df = penguin_df,
  team_name,
  grouping_variable = "species",
  base_font = "Work Sans",
  title_font = "Arial",
  text_size = 16,
  ...
) {
  df <- dplyr::filter(df, !is.na(get(grouping_variable)))

  beak_means_df <- df |>
    dplyr::group_by(get(grouping_variable)) |>
    dplyr::summarise(mean_length = mean(culmen_length_mm, na.rm = TRUE)) |>
    dplyr::rename(group = `get(grouping_variable)`)

  beak_range_df <- df |>
    dplyr::filter(
      culmen_length_mm == max(culmen_length_mm, na.rm = TRUE) |
        culmen_length_mm == min(culmen_length_mm, na.rm = TRUE)
    )

  interactive_plot <- df |>
    ggplot(aes(x = culmen_length_mm, y = get(grouping_variable))) +
    geom_vline(
      data = beak_range_df,
      aes(xintercept = culmen_length_mm),
      linetype = 3,
      colour = "#1A242F"
    ) +
    geom_segment(
      data = beak_means_df,
      aes(
        x = mean_length,
        xend = mean_length,
        y = -Inf,
        yend = group
      ),
      linetype = 3
    ) +
    ggtext::geom_textbox(
      data = beak_range_df,
      aes(
        y = max(df |> dplyr::pull(get(grouping_variable))),
        label = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~
            paste0("🞀 ", culmen_length_mm, "mm"),
          TRUE ~ paste0(culmen_length_mm, "mm", " 🞂")
        ),
        hjust = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~ 0,
          TRUE ~ 1
        ),
        halign = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~ 0,
          TRUE ~ 1
        )
      ),
      family = base_font,
      colour = "#1A242F",
      fontface = "bold",
      fill = NA,
      box.padding = unit(0, "pt"),
      box.colour = NA,
      nudge_y = 0.33
    ) +
    ggtext::geom_textbox(
      data = beak_means_df,
      aes(
        x = mean_length,
        y = group,
        label = paste0(
          stringr::str_to_sentence(group),
          " mean<br>**",
          janitor::round_half_up(mean_length),
          "mm**"
        )
      ),
      hjust = 0,
      # Thanks to Martine Jansen for this suggestion to avoid the label
      # boxes overlapping with the lines beyond what's necessary.
      # (They're deliberately not transparent because that would make
      # it hard to read if there was a genuine overlap - topical!)
      width = unit(6, "lines"),
      nudge_y = -0.3,
      box.colour = NA,
      family = base_font,
      colour = "#1A242F",
      fill = "#F4F5F6"
    ) +
    ggiraph::geom_jitter_interactive(
      aes(
        x = culmen_length_mm,
        y = get(grouping_variable),
        fill = species,
        size = body_mass_g,
        tooltip = paste0("<b>", individual_id, "</b> from ", island)
      ),
      shape = 21,
      width = 0,
      height = 0.15,
      colour = "#1A242F",
      stroke = 0.5
    ) +
    labs(
      title = paste0(
        "Team ",
        team_name,
        " - ",
        max(beak_range_df$culmen_length_mm) -
          min(beak_range_df$culmen_length_mm),
        "mm"
      )
    ) +
    scale_fill_manual(
      values = c(
        "Adelie" = "#F49F03",
        "Chinstrap" = "#F4B9C4",
        "Gentoo" = "#11541F"
      )
    ) +
    scale_x_continuous(
      label = function(x) paste0(x, "mm"),
      limits = c(30, 62)
    ) +
    theme_beak_off(
      base_font = base_font,
      title_font = title_font,
      base_text_size = text_size
    ) +
    theme(axis.text.y = element_blank(), ...)

  ggiraph::girafe(
    ggobj = interactive_plot,
    options = list(ggiraph::opts_tooltip(
      css = paste0(
        "background-color:#1A242F;color:#F4F5F6;padding:7.5px;letter-spacing:0.025em;line-height:1.3;border-radius:5px;",
        "font-family:",
        base_font,
        ";"
      )
    )),
    # Here I've specified the size of the output, which I didn't do
    # during the demo because the fig.height specified in the slides
    # takes care of that
    width_svg = 9,
    height_svg = 9
  )
}


And now, all we need to do is this…

make_beak_off_plot(
  # Take a random sample of the data - the penguins didn't want
  # any long-term damage to their friendships
  df = dplyr::slice_sample(penguin_df, n = 150),
  # Give the team a name
  team = "Beak Buddies"
)
55.8mm 🞂 🞀 33.5mm Adelie mean 39mm Chinstrap mean 49mm Gentoo mean 48mm 30mm 40mm 50mm 60mm Team Beak Buddies - 22.3mm


or this…

make_beak_off_plot(
  # Take a different random sample of the data
  df = dplyr::slice_sample(penguin_df, n = 150),
  # Give the team a name
  team = "Islands Matter Too",
  grouping_variable = "island"
)
59.6mm 🞂 🞀 32.1mm Biscoe mean 45mm Dream mean 44mm Torgersen mean 39mm 30mm 40mm 50mm 60mm Team Islands Matter Too - 27.5mm


or this…

make_beak_off_plot(
  # Take a smaller sample of the data
  df = dplyr::slice_sample(penguin_df, n = 25),
  # Give the team a name
  team = "Small and Mighty"
)
58mm 🞂 🞀 34.1mm Adelie mean 39mm Chinstrap mean 51mm Gentoo mean 48mm 30mm 40mm 50mm 60mm Team Small and Mighty - 23.9mm


or this…

make_beak_off_plot(
  # This team wanted to be more deliberate about representation
  df = penguin_df |>
    dplyr::filter(sex == "MALE") |>
    dplyr::slice_sample(n = 25) |>
    rbind(
      penguin_df |>
        dplyr::filter(sex == "FEMALE") |>
        dplyr::slice_sample(n = 25)
    ),
  # Give the team a name
  team = "Equal Beakqual",
  grouping_variable = "sex"
)
🞀 35.9mm 58mm 🞂 Female mean 44mm Male mean 46mm 30mm 40mm 50mm 60mm Team Equal Beakqual - 22.1mm


Have a play around with the function and see what you’d improve! May the best penguins win!

Reuse

Citation

For attribution, please cite this work as:
“Visualise, Optimise, Parameterise!” 2025. June 11, 2025. https://www.cararthompson.com/talks/visualise-optimise-parameterise/.