apicheck Performance Tests

Setup

library(apicheck)
library(microbenchmark)
library(ggplot2)
library(dplyr)
library(huxtable)

options(repos = "https://cran.rstudio.com")

make_test <- function(fn, search, min_version = NA) {
  if (is.na(min_version)) min_version <- NULL
  bquote(when_fn_exists(.(fn), search = .(search), min_version = .(min_version), progress = FALSE))
}

print_results <- . %>% 
      mutate(date = as.character(date)) %>% 
      as_hux()                          %>% 
      add_colnames()                    %>% 
      set_bold(1, everywhere, TRUE)     %>% 
      set_outer_borders(0.4)            %>% 
      set_number_format(NA)

pretty_print <- . %>% 
      summary()                                          %>% 
      arrange(as.character(expr))                        %>% 
      as_hux()                                           %>% 
      add_colnames()                                     %>% 
      set_bold(1, everywhere, TRUE)                      %>% 
      set_background_color(evens, everywhere, grey(0.9)) %>% 
      set_outer_borders(0.4)                             %>% 
      set_number_format(everywhere, -matches("neval"), 1)

pretty_plot <- function (result) {
  autoplot(result) + 
       geom_point(color = "red", alpha = 0.5, size = 0.5) + 
        scale_x_discrete(limits = rev(sort(names(tests))))
}

The tests

clipr is a small package with few versions. mice is a larger package which requires compilation, with more dependencies.

fns <- c("clipr::dr_clipr", "mice::ampute") 
min_versions <- c(NA, "2.20")
args <- expand.grid(
        fn          = fns,
        search      = c("binary", "forward", "backward", "all", "parallel"), 
        stringsAsFactors = FALSE
      )
args$min_version <- min_versions[match(args$fn, fns)]

test_df <- args %>% rowwise %>% do(expr = make_test(.$fn, .$search, .$min_version)) %>% bind_cols(args) %>% mutate(label = paste(fn, search, sep = " / "))
tests <- test_df$expr
names(tests) <- test_df$label

noquote(as.character(tests))
#>  [1] when_fn_exists("clipr::dr_clipr", search = "binary", min_version = NULL, progress = FALSE)  
#>  [2] when_fn_exists("mice::ampute", search = "binary", min_version = "2.20", progress = FALSE)   
#>  [3] when_fn_exists("clipr::dr_clipr", search = "forward", min_version = NULL, progress = FALSE) 
#>  [4] when_fn_exists("mice::ampute", search = "forward", min_version = "2.20", progress = FALSE)  
#>  [5] when_fn_exists("clipr::dr_clipr", search = "backward", min_version = NULL, progress = FALSE)
#>  [6] when_fn_exists("mice::ampute", search = "backward", min_version = "2.20", progress = FALSE) 
#>  [7] when_fn_exists("clipr::dr_clipr", search = "all", min_version = NULL, progress = FALSE)     
#>  [8] when_fn_exists("mice::ampute", search = "all", min_version = "2.20", progress = FALSE)      
#>  [9] when_fn_exists("clipr::dr_clipr", search = "parallel", min_version = NULL, progress = FALSE)
#> [10] when_fn_exists("mice::ampute", search = "parallel", min_version = "2.20", progress = FALSE)

Results

Backward search should work well for both these functions.

set_lib_dir(".apicheck")
for (f in fns) {
  print(f)
  print_results(when_fn_exists(f, search = "all", progress = FALSE))  
}
#> [1] "clipr::dr_clipr"
#> [1] "mice::ampute"

Pre-downloaded


# DO NOT EDIT: CACHE WILL RESET
  
# cache packages:
set_lib_dir(".apicheck")
invisible(mapply(when_fn_exists, fn = fns, min_version = ifelse(is.na(min_versions), "0.0", min_versions), search = "all", progress = FALSE))


cached <- microbenchmark(
        list = tests,
        times = 20
      )


pretty_print(cached)

expr	min	lq	mean	median	uq	max	neval	cld
clipr::dr_clipr / all	51.2	58.4	66.7	59.6	63.0	112.1	20	a
clipr::dr_clipr / backward	13.8	15.1	24.4	16.2	20.1	109.5	20	a
clipr::dr_clipr / binary	23.7	27.6	38.2	29.3	33.2	88.2	20	a
clipr::dr_clipr / forward	52.2	56.2	66.0	59.0	61.9	117.3	20	a
clipr::dr_clipr / parallel	18541.5	18957.8	19588.0	19173.4	19973.0	22905.7	20	b
mice::ampute / all	138.0	145.1	165.3	150.5	189.1	222.1	20	a
mice::ampute / backward	89.1	95.0	120.9	114.1	146.6	165.1	20	a
mice::ampute / binary	80.6	84.6	101.4	88.8	113.3	179.9	20	a
mice::ampute / forward	106.4	112.3	127.2	115.9	125.2	223.5	20	a
mice::ampute / parallel	56148.2	57917.1	60031.1	59130.9	61719.0	69071.7	20	c

pretty_plot(cached)
#> Scale for 'x' is already present. Adding another scale for 'x', which
#> will replace the existing scale.

With download


# DO NOT EDIT: CACHE WILL RESET
  
set_lib_dir(NULL)

tests2 <- lapply(tests, function (x) bquote(
  {.(x); clear_package_cache()})
)
names(tests2) <- names(tests)

uncached <- microbenchmark::microbenchmark(list = tests2, times = 5)


pretty_print(uncached)

expr	min	lq	mean	median	uq	max	neval	cld
clipr::dr_clipr / all	30.6	31.6	31.8	31.7	32.1	33.0	5	d
clipr::dr_clipr / backward	6.3	6.4	6.6	6.6	6.9	7.0	5	a
clipr::dr_clipr / binary	13.6	13.6	13.8	13.8	13.9	14.2	5	b
clipr::dr_clipr / forward	30.2	31.2	31.7	32.0	32.4	32.5	5	d
clipr::dr_clipr / parallel	18.7	18.7	19.1	19.0	19.1	20.1	5	c
mice::ampute / all	108.1	108.7	110.7	109.3	110.7	116.6	5	h
mice::ampute / backward	53.7	53.9	55.2	55.2	56.1	56.9	5	e
mice::ampute / binary	53.1	54.1	55.3	55.2	56.5	57.6	5	e
mice::ampute / forward	88.8	90.2	91.5	91.9	93.0	93.6	5	g
mice::ampute / parallel	58.2	58.9	60.4	60.3	61.5	63.0	5	f

pretty_plot(uncached)
#> Scale for 'x' is already present. Adding another scale for 'x', which
#> will replace the existing scale.

These results suggest that choosing the right algorithm matters as much as parallelism; and parallelism is costly once packages have already been downloaded.

apicheck Performance Tests

David Hugh-Jones davidhughjones@gmail.com

2018-03-05

Speed tests

Setup

The tests

Results

Pre-downloaded

With download