Speed tests

On my Macbook Air, standard broadband connection.

Setup

library(apicheck)
library(microbenchmark)
library(ggplot2)
library(dplyr)
library(huxtable)

options(repos = "https://cran.rstudio.com")

make_test <- function(fn, search, min_version = NA) {
  if (is.na(min_version)) min_version <- NULL
  bquote(when_fn_exists(.(fn), search = .(search), min_version = .(min_version), progress = FALSE))
}

print_results <- . %>% 
      mutate(date = as.character(date)) %>% 
      as_hux()                          %>% 
      add_colnames()                    %>% 
      set_bold(1, everywhere, TRUE)     %>% 
      set_outer_borders(0.4)            %>% 
      set_number_format(NA)

pretty_print <- . %>% 
      summary()                                          %>% 
      arrange(as.character(expr))                        %>% 
      as_hux()                                           %>% 
      add_colnames()                                     %>% 
      set_bold(1, everywhere, TRUE)                      %>% 
      set_background_color(evens, everywhere, grey(0.9)) %>% 
      set_outer_borders(0.4)                             %>% 
      set_number_format(everywhere, -matches("neval"), 1)

pretty_plot <- function (result) {
  autoplot(result) + 
       geom_point(color = "red", alpha = 0.5, size = 0.5) + 
        scale_x_discrete(limits = rev(sort(names(tests))))
}

The tests

clipr is a small package with few versions. mice is a larger package which requires compilation, with more dependencies.

fns <- c("clipr::dr_clipr", "mice::ampute") 
min_versions <- c(NA, "2.20")
args <- expand.grid(
        fn          = fns,
        search      = c("binary", "forward", "backward", "all", "parallel"), 
        stringsAsFactors = FALSE
      )
args$min_version <- min_versions[match(args$fn, fns)]

test_df <- args %>% rowwise %>% do(expr = make_test(.$fn, .$search, .$min_version)) %>% bind_cols(args) %>% mutate(label = paste(fn, search, sep = " / "))
tests <- test_df$expr
names(tests) <- test_df$label

noquote(as.character(tests))
#>  [1] when_fn_exists("clipr::dr_clipr", search = "binary", min_version = NULL, progress = FALSE)  
#>  [2] when_fn_exists("mice::ampute", search = "binary", min_version = "2.20", progress = FALSE)   
#>  [3] when_fn_exists("clipr::dr_clipr", search = "forward", min_version = NULL, progress = FALSE) 
#>  [4] when_fn_exists("mice::ampute", search = "forward", min_version = "2.20", progress = FALSE)  
#>  [5] when_fn_exists("clipr::dr_clipr", search = "backward", min_version = NULL, progress = FALSE)
#>  [6] when_fn_exists("mice::ampute", search = "backward", min_version = "2.20", progress = FALSE) 
#>  [7] when_fn_exists("clipr::dr_clipr", search = "all", min_version = NULL, progress = FALSE)     
#>  [8] when_fn_exists("mice::ampute", search = "all", min_version = "2.20", progress = FALSE)      
#>  [9] when_fn_exists("clipr::dr_clipr", search = "parallel", min_version = NULL, progress = FALSE)
#> [10] when_fn_exists("mice::ampute", search = "parallel", min_version = "2.20", progress = FALSE)

Results

Backward search should work well for both these functions.

set_lib_dir(".apicheck")
for (f in fns) {
  print(f)
  print_results(when_fn_exists(f, search = "all", progress = FALSE))  
}
#> [1] "clipr::dr_clipr"
#> [1] "mice::ampute"

Pre-downloaded


# DO NOT EDIT: CACHE WILL RESET
  
# cache packages:
set_lib_dir(".apicheck")
invisible(mapply(when_fn_exists, fn = fns, min_version = ifelse(is.na(min_versions), "0.0", min_versions), search = "all", progress = FALSE))


cached <- microbenchmark(
        list = tests,
        times = 20
      )

pretty_print(cached)
expr min lq mean median uq max neval cld
clipr::dr_clipr / all 51.2 58.4 66.7 59.6 63.0 112.1 20 a
clipr::dr_clipr / backward 13.8 15.1 24.4 16.2 20.1 109.5 20 a
clipr::dr_clipr / binary 23.7 27.6 38.2 29.3 33.2 88.2 20 a
clipr::dr_clipr / forward 52.2 56.2 66.0 59.0 61.9 117.3 20 a
clipr::dr_clipr / parallel 18541.5 18957.8 19588.0 19173.4 19973.0 22905.7 20 b
mice::ampute / all 138.0 145.1 165.3 150.5 189.1 222.1 20 a
mice::ampute / backward 89.1 95.0 120.9 114.1 146.6 165.1 20 a
mice::ampute / binary 80.6 84.6 101.4 88.8 113.3 179.9 20 a
mice::ampute / forward 106.4 112.3 127.2 115.9 125.2 223.5 20 a
mice::ampute / parallel 56148.2 57917.1 60031.1 59130.9 61719.0 69071.7 20 c
pretty_plot(cached)
#> Scale for 'x' is already present. Adding another scale for 'x', which
#> will replace the existing scale.

With download


# DO NOT EDIT: CACHE WILL RESET
  
set_lib_dir(NULL)

tests2 <- lapply(tests, function (x) bquote(
  {.(x); clear_package_cache()})
)
names(tests2) <- names(tests)

uncached <- microbenchmark::microbenchmark(list = tests2, times = 5)

pretty_print(uncached)
expr min lq mean median uq max neval cld
clipr::dr_clipr / all 30.6 31.6 31.8 31.7 32.1 33.0 5 d
clipr::dr_clipr / backward 6.3 6.4 6.6 6.6 6.9 7.0 5 a
clipr::dr_clipr / binary 13.6 13.6 13.8 13.8 13.9 14.2 5 b
clipr::dr_clipr / forward 30.2 31.2 31.7 32.0 32.4 32.5 5 d
clipr::dr_clipr / parallel 18.7 18.7 19.1 19.0 19.1 20.1 5 c
mice::ampute / all 108.1 108.7 110.7 109.3 110.7 116.6 5 h
mice::ampute / backward 53.7 53.9 55.2 55.2 56.1 56.9 5 e
mice::ampute / binary 53.1 54.1 55.3 55.2 56.5 57.6 5 e
mice::ampute / forward 88.8 90.2 91.5 91.9 93.0 93.6 5 g
mice::ampute / parallel 58.2 58.9 60.4 60.3 61.5 63.0 5 f
pretty_plot(uncached)
#> Scale for 'x' is already present. Adding another scale for 'x', which
#> will replace the existing scale.

These results suggest that choosing the right algorithm matters as much as parallelism; and parallelism is costly once packages have already been downloaded.