Visualise, Optimise, Parameterise!

data-visualisation
r-how-to
parameterised-plots
annotations

The penguins are back! And this time, it’s for a Beak-Off. They are organising themselves into teams to see which team can show off the biggest range of beak lengths. Retrace our steps in this code-along workshop where we built a graph, made it better (and interactive!), and optimised the parameterised plotting function to address all the judges’ requirements.

Published

June 11, 2025

Recording

Slides

View full screen

Code snippets and resources shared during the workshop

Demo

First we create the function… (unfold to see all the details!)

Code
library(ggplot2)

# Set up the data
penguin_df <- palmerpenguins::penguins_raw |>
  janitor::clean_names() |>
  dplyr::filter(!is.na(culmen_length_mm)) |>
  dplyr::mutate(species = gsub("(.)( )(.*)", "\\1", species))

# Set up the theme function
theme_beak_off <- function(
  base_text_size = 16,
  base_font = "Work Sans",
  title_font = "Poppins"
) {
  theme_minimal(base_size = base_text_size) +
    theme(
      text = element_text(family = base_font),
      axis.text = element_text(colour = "#495058"),
      legend.position = "none",
      axis.title = element_blank(),
      plot.title = element_text(
        family = title_font,
        face = "bold",
        size = rel(1.5)
      ),
      panel.grid = element_line(colour = "#FFFFFF"),
      plot.background = element_rect(fill = "#F4F5F6", colour = "#F4F5F6"),
      plot.caption = element_text(
        size = rel(0.8),
        margin = margin(base_text_size / 2, 0, 0, 0)
      ),
      plot.margin = margin(rep(base_text_size * 1.5, 4))
    )
}

# Create the plot function

make_beak_off_plot <- function(
  df = penguin_df,
  team_name,
  grouping_variable = "species",
  base_font = "Work Sans",
  title_font = "Arial",
  text_size = 16,
  ...
) {
  df <- dplyr::filter(df, !is.na(get(grouping_variable)))

  beak_means_df <- df |>
    dplyr::group_by(get(grouping_variable)) |>
    dplyr::summarise(mean_length = mean(culmen_length_mm, na.rm = TRUE)) |>
    dplyr::rename(group = `get(grouping_variable)`)

  beak_range_df <- df |>
    dplyr::filter(
      culmen_length_mm == max(culmen_length_mm, na.rm = TRUE) |
        culmen_length_mm == min(culmen_length_mm, na.rm = TRUE)
    )

  interactive_plot <- df |>
    ggplot(aes(x = culmen_length_mm, y = get(grouping_variable))) +
    geom_vline(
      data = beak_range_df,
      aes(xintercept = culmen_length_mm),
      linetype = 3,
      colour = "#1A242F"
    ) +
    geom_segment(
      data = beak_means_df,
      aes(
        x = mean_length,
        xend = mean_length,
        y = -Inf,
        yend = group
      ),
      linetype = 3
    ) +
    ggtext::geom_textbox(
      data = beak_range_df,
      aes(
        y = max(df |> dplyr::pull(get(grouping_variable))),
        label = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~
            paste0("🞀 ", culmen_length_mm, "mm"),
          TRUE ~ paste0(culmen_length_mm, "mm", " 🞂")
        ),
        hjust = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~ 0,
          TRUE ~ 1
        ),
        halign = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~ 0,
          TRUE ~ 1
        )
      ),
      family = base_font,
      colour = "#1A242F",
      fontface = "bold",
      fill = NA,
      box.padding = unit(0, "pt"),
      box.colour = NA,
      nudge_y = 0.33
    ) +
    ggtext::geom_textbox(
      data = beak_means_df,
      aes(
        x = mean_length,
        y = group,
        label = paste0(
          stringr::str_to_sentence(group),
          " mean<br>**",
          janitor::round_half_up(mean_length),
          "mm**"
        )
      ),
      hjust = 0,
      # Thanks to Martine Jansen for this suggestion to avoid the label
      # boxes overlapping with the lines beyond what's necessary.
      # (They're deliberately not transparent because that would make
      # it hard to read if there was a genuine overlap - topical!)
      width = unit(6, "lines"),
      nudge_y = -0.3,
      box.colour = NA,
      family = base_font,
      colour = "#1A242F",
      fill = "#F4F5F6"
    ) +
    ggiraph::geom_jitter_interactive(
      aes(
        x = culmen_length_mm,
        y = get(grouping_variable),
        fill = species,
        size = body_mass_g,
        tooltip = paste0("<b>", individual_id, "</b> from ", island)
      ),
      shape = 21,
      width = 0,
      height = 0.15,
      colour = "#1A242F",
      stroke = 0.5
    ) +
    labs(
      title = paste0(
        "Team ",
        team_name,
        " - ",
        max(beak_range_df$culmen_length_mm) -
          min(beak_range_df$culmen_length_mm),
        "mm"
      )
    ) +
    scale_fill_manual(
      values = c(
        "Adelie" = "#F49F03",
        "Chinstrap" = "#F4B9C4",
        "Gentoo" = "#11541F"
      )
    ) +
    scale_x_continuous(
      label = function(x) paste0(x, "mm"),
      limits = c(30, 62)
    ) +
    theme_beak_off(
      base_font = base_font,
      title_font = title_font,
      base_text_size = text_size
    ) +
    theme(axis.text.y = element_blank(), ...)

  ggiraph::girafe(
    ggobj = interactive_plot,
    options = list(ggiraph::opts_tooltip(
      css = paste0(
        "background-color:#1A242F;color:#F4F5F6;padding:7.5px;letter-spacing:0.025em;line-height:1.3;border-radius:5px;",
        "font-family:",
        base_font,
        ";"
      )
    )),
    # Here I've specified the size of the output, which I didn't do
    # during the demo because the fig.height specified in the slides
    # takes care of that
    width_svg = 9,
    height_svg = 9
  )
}


And now, all we need to do is this…

make_beak_off_plot(
  # Take a random sample of the data - the penguins didn't want
  # any long-term damage to their friendships
  df = dplyr::slice_sample(penguin_df, n = 150),
  # Give the team a name
  team = "Beak Buddies"
)


or this…

make_beak_off_plot(
  # Take a different random sample of the data
  df = dplyr::slice_sample(penguin_df, n = 150),
  # Give the team a name
  team = "Islands Matter Too",
  grouping_variable = "island"
)


or this…

make_beak_off_plot(
  # Take a smaller sample of the data
  df = dplyr::slice_sample(penguin_df, n = 25),
  # Give the team a name
  team = "Small and Mighty"
)


or this…

make_beak_off_plot(
  # This team wanted to be more deliberate about representation
  df = penguin_df |>
    dplyr::filter(sex == "MALE") |>
    dplyr::slice_sample(n = 25) |>
    rbind(
      penguin_df |>
        dplyr::filter(sex == "FEMALE") |>
        dplyr::slice_sample(n = 25)
    ),
  # Give the team a name
  team = "Equal Beakqual",
  grouping_variable = "sex"
)


Have a play around with the function and see what you’d improve! May the best penguins win!

Reuse

Citation

For attribution, please cite this work as:
“Visualise, Optimise, Parameterise!” 2025. June 11, 2025. https://www.cararthompson.com/talks/visualise-optimise-parameterise/.