Beautifully reproducible: (mis)adventures in automating data-to-viz pipelines

Keynote talk for the NHS-OA 2025 conference, in which I explore different steps we can take to make our dataviz functions cope with whatever data we throw at them, and to approach R and package updates with confidence - dataviz tests!

Published

November 13, 2025

Recording

Coming soon!

Slides

View full screen

Final code for the parameterised interactive graph

Code
library(ggplot2)

# Tidy up the overall data ----
all_penguins <- palmerpenguins::penguins_raw |>
  janitor::clean_names() |>
  dplyr::filter(!is.na(culmen_length_mm)) |>
  dplyr::mutate(species = gsub("(.)( )(.*)", "\\1", species)) |>
  dplyr::mutate(
    species = factor(
      species,
      levels = c("Adelie", "Gentoo", "Chinstrap")
    )
  ) |>
  dplyr::rowwise() |>
  dplyr::mutate(jitter_y = sample(runif(100, -0.1, 0.1), 1)) |>
  dplyr::ungroup()


# Set up the theme ----

theme_penguins <- function(
  base_text_size = 16,
  base_font = "Work Sans",
  title_font = "Poppins"
) {
  theme_minimal(base_size = base_text_size) +
    theme(
      text = element_text(family = base_font),
      axis.text = element_text(colour = "#495058"),
      legend.position = "none",
      axis.title = element_blank(),
      plot.title = element_text(
        family = title_font,
        face = "bold",
        hjust = 0.5,
        size = rel(1.5)
      ),
      panel.grid = element_line(colour = "#FFFFFF"),
      plot.background = element_rect(fill = "#F1F1F9", colour = "#F1F1F9"),
      plot.margin = margin_auto(base_text_size * 2)
    )
}

# Set up the colours ----

penguin_colours <- c(
  "Adelie" = "#F49F03",
  "Chinstrap" = "#F4B9C4",
  "Gentoo" = "#11541F"
)

# Set up the dataviz function ----

make_beak_plot <- function(
  df,
  colour = penguin_colours
) {
  beak_means_df <- df |>
    dplyr::group_by(species) |>
    dplyr::summarise(mean_length = mean(culmen_length_mm, na.rm = TRUE))

  beak_range_df <- df |>
    dplyr::filter(
      culmen_length_mm == max(culmen_length_mm, na.rm = TRUE) |
        culmen_length_mm == min(culmen_length_mm, na.rm = TRUE)
    )

  interactive_plot <- df |>
    ggplot(aes(x = culmen_length_mm, y = as.numeric(species))) +
    geom_vline(
      data = beak_range_df,
      aes(xintercept = culmen_length_mm),
      linetype = 3,
      colour = "#1A242F"
    ) +
    geom_segment(
      data = beak_means_df,
      aes(
        x = mean_length,
        xend = mean_length,
        y = -Inf,
        yend = as.numeric(species)
      ),
      linetype = 3
    ) +
    ggiraph::geom_point_interactive(
      aes(
        x = culmen_length_mm,
        y = as.numeric(species) + jitter_y,
        fill = species,
        tooltip = paste0("<b>", individual_id, "</b> from ", island)
      ),
      shape = 21,
      size = 8,
      colour = "#1A242F",
      stroke = 0.5,
      alpha = 0.9
    ) +
    ggtext::geom_textbox(
      data = beak_range_df,
      aes(
        # we're using df to establish which species is at the top of the graph,
        # not beak_range_df, so we need to specify that here
        y = max(as.numeric(df$species)),
        label = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~
            paste0("πŸž€ ", culmen_length_mm, "mm"),
          TRUE ~ paste0(culmen_length_mm, "mm", " πŸž‚")
        ),
        hjust = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~ 0,
          TRUE ~ 1
        ),
        halign = dplyr::case_when(
          culmen_length_mm == min(culmen_length_mm) ~ 0,
          TRUE ~ 1
        )
      ),
      family = "Work Sans",
      colour = "#1A242F",
      fontface = "bold",
      fill = NA,
      size = 8,
      box.padding = unit(0, "pt"),
      box.colour = NA,
      nudge_y = 0.33
    ) +
    ggtext::geom_textbox(
      data = beak_means_df,
      aes(
        x = mean_length,
        y = as.numeric(species),
        label = paste0(
          species,
          " mean<br>**",
          janitor::round_half_up(mean_length),
          "mm**"
        ),
        hjust = dplyr::case_when(mean_length > 45 ~ 1, .default = 0),
        halign = dplyr::case_when(mean_length > 45 ~ 1, .default = 0)
      ),
      nudge_y = -0.3,
      box.colour = NA,
      size = 6,
      family = "Work Sans",
      colour = "#1A242F",
      fill = NA
    ) +
    labs(title = "Beak lengths by species") +
    scale_fill_manual(
      values = c(
        "Adelie" = "#F49F03",
        "Chinstrap" = "#F4B9C4",
        "Gentoo" = "#11541F"
      )
    ) +
    scale_y_continuous(breaks = c(1, 2, 3)) +
    scale_x_continuous(
      label = function(x) paste0(x, "mm"),
      limits = c(32, 60)
    ) +
    theme_penguins() +
    theme(axis.text.y = element_blank(), panel.grid.minor.y = element_blank())

  ggiraph::girafe(
    ggobj = interactive_plot,
    options = list(ggiraph::opts_tooltip(
      css = "background-color:#1A242F;color:#F4F5F6;padding:7.5px;letter-spacing:0.025em;line-height:1.3;border-radius:5px;font-family:Work Sans;"
    )),
    height_svg = 9,
    width_svg = 9
  )
}

Once we have the function set up, we can create all the graphs we want!


make_beak_plot(dplyr::sample_n(all_penguins, 50))
52.5mm πŸž‚ πŸž€ 35.5mm Adelie mean 40mm Gentoo mean 47mm Chinstrap mean 48mm 40mm 50mm 60mm Beak lengths by species

make_beak_plot(dplyr::sample_n(all_penguins, 100))
πŸž€ 35mm 59.6mm πŸž‚ Adelie mean 39mm Gentoo mean 48mm Chinstrap mean 48mm 40mm 50mm 60mm Beak lengths by species

make_beak_plot(dplyr::filter(all_penguins, sex == "FEMALE"))
πŸž€ 32.1mm 58mm πŸž‚ Adelie mean 37mm Gentoo mean 46mm Chinstrap mean 47mm 40mm 50mm 60mm Beak lengths by species

make_beak_plot(dplyr::filter(all_penguins, species == "Gentoo"))
πŸž€ 40.9mm 59.6mm πŸž‚ Gentoo mean 48mm 40mm 50mm 60mm Beak lengths by species

Resources

Packages for setting up dataviz tests

  • {testthat}
  • {waldo}
  • {vdiffr}

Code along

If you want to retrace the steps towards the parameterised plot function, here’s a code-along workshop I did for RMedicine in which we built this function and added a few extra bells and whistles.

πŸ‘‰ Visualise, Optimise, Parameterise!

Happy datavizzing!

Reuse

Citation

For attribution, please cite this work as:
β€œBeautifully Reproducible: (Mis)adventures in Automating Data-to-Viz Pipelines.” 2025. November 13, 2025. https://www.cararthompson.com/talks/beautifully-reproducible-dataviz/.