On my Macbook Air, standard broadband connection.
library(apicheck)
library(microbenchmark)
library(ggplot2)
library(dplyr)
library(huxtable)
options(repos = "https://cran.rstudio.com")
make_test <- function(fn, search, min_version = NA) {
if (is.na(min_version)) min_version <- NULL
bquote(when_fn_exists(.(fn), search = .(search), min_version = .(min_version), progress = FALSE))
}
print_results <- . %>%
mutate(date = as.character(date)) %>%
as_hux() %>%
add_colnames() %>%
set_bold(1, everywhere, TRUE) %>%
set_outer_borders(0.4) %>%
set_number_format(NA)
pretty_print <- . %>%
summary() %>%
arrange(as.character(expr)) %>%
as_hux() %>%
add_colnames() %>%
set_bold(1, everywhere, TRUE) %>%
set_background_color(evens, everywhere, grey(0.9)) %>%
set_outer_borders(0.4) %>%
set_number_format(everywhere, -matches("neval"), 1)
pretty_plot <- function (result) {
autoplot(result) +
geom_point(color = "red", alpha = 0.5, size = 0.5) +
scale_x_discrete(limits = rev(sort(names(tests))))
}
clipr
is a small package with few versions. mice
is a larger package which requires compilation, with more dependencies.
fns <- c("clipr::dr_clipr", "mice::ampute")
min_versions <- c(NA, "2.20")
args <- expand.grid(
fn = fns,
search = c("binary", "forward", "backward", "all", "parallel"),
stringsAsFactors = FALSE
)
args$min_version <- min_versions[match(args$fn, fns)]
test_df <- args %>% rowwise %>% do(expr = make_test(.$fn, .$search, .$min_version)) %>% bind_cols(args) %>% mutate(label = paste(fn, search, sep = " / "))
tests <- test_df$expr
names(tests) <- test_df$label
noquote(as.character(tests))
#> [1] when_fn_exists("clipr::dr_clipr", search = "binary", min_version = NULL, progress = FALSE)
#> [2] when_fn_exists("mice::ampute", search = "binary", min_version = "2.20", progress = FALSE)
#> [3] when_fn_exists("clipr::dr_clipr", search = "forward", min_version = NULL, progress = FALSE)
#> [4] when_fn_exists("mice::ampute", search = "forward", min_version = "2.20", progress = FALSE)
#> [5] when_fn_exists("clipr::dr_clipr", search = "backward", min_version = NULL, progress = FALSE)
#> [6] when_fn_exists("mice::ampute", search = "backward", min_version = "2.20", progress = FALSE)
#> [7] when_fn_exists("clipr::dr_clipr", search = "all", min_version = NULL, progress = FALSE)
#> [8] when_fn_exists("mice::ampute", search = "all", min_version = "2.20", progress = FALSE)
#> [9] when_fn_exists("clipr::dr_clipr", search = "parallel", min_version = NULL, progress = FALSE)
#> [10] when_fn_exists("mice::ampute", search = "parallel", min_version = "2.20", progress = FALSE)
Backward search should work well for both these functions.
set_lib_dir(".apicheck")
for (f in fns) {
print(f)
print_results(when_fn_exists(f, search = "all", progress = FALSE))
}
#> [1] "clipr::dr_clipr"
#> [1] "mice::ampute"
# DO NOT EDIT: CACHE WILL RESET
# cache packages:
set_lib_dir(".apicheck")
invisible(mapply(when_fn_exists, fn = fns, min_version = ifelse(is.na(min_versions), "0.0", min_versions), search = "all", progress = FALSE))
cached <- microbenchmark(
list = tests,
times = 20
)
pretty_print(cached)
expr | min | lq | mean | median | uq | max | neval | cld |
clipr::dr_clipr / all | 51.2 | 58.4 | 66.7 | 59.6 | 63.0 | 112.1 | 20 | a |
clipr::dr_clipr / backward | 13.8 | 15.1 | 24.4 | 16.2 | 20.1 | 109.5 | 20 | a |
clipr::dr_clipr / binary | 23.7 | 27.6 | 38.2 | 29.3 | 33.2 | 88.2 | 20 | a |
clipr::dr_clipr / forward | 52.2 | 56.2 | 66.0 | 59.0 | 61.9 | 117.3 | 20 | a |
clipr::dr_clipr / parallel | 18541.5 | 18957.8 | 19588.0 | 19173.4 | 19973.0 | 22905.7 | 20 | b |
mice::ampute / all | 138.0 | 145.1 | 165.3 | 150.5 | 189.1 | 222.1 | 20 | a |
mice::ampute / backward | 89.1 | 95.0 | 120.9 | 114.1 | 146.6 | 165.1 | 20 | a |
mice::ampute / binary | 80.6 | 84.6 | 101.4 | 88.8 | 113.3 | 179.9 | 20 | a |
mice::ampute / forward | 106.4 | 112.3 | 127.2 | 115.9 | 125.2 | 223.5 | 20 | a |
mice::ampute / parallel | 56148.2 | 57917.1 | 60031.1 | 59130.9 | 61719.0 | 69071.7 | 20 | c |
pretty_plot(cached)
#> Scale for 'x' is already present. Adding another scale for 'x', which
#> will replace the existing scale.
# DO NOT EDIT: CACHE WILL RESET
set_lib_dir(NULL)
tests2 <- lapply(tests, function (x) bquote(
{.(x); clear_package_cache()})
)
names(tests2) <- names(tests)
uncached <- microbenchmark::microbenchmark(list = tests2, times = 5)
pretty_print(uncached)
expr | min | lq | mean | median | uq | max | neval | cld |
clipr::dr_clipr / all | 30.6 | 31.6 | 31.8 | 31.7 | 32.1 | 33.0 | 5 | d |
clipr::dr_clipr / backward | 6.3 | 6.4 | 6.6 | 6.6 | 6.9 | 7.0 | 5 | a |
clipr::dr_clipr / binary | 13.6 | 13.6 | 13.8 | 13.8 | 13.9 | 14.2 | 5 | b |
clipr::dr_clipr / forward | 30.2 | 31.2 | 31.7 | 32.0 | 32.4 | 32.5 | 5 | d |
clipr::dr_clipr / parallel | 18.7 | 18.7 | 19.1 | 19.0 | 19.1 | 20.1 | 5 | c |
mice::ampute / all | 108.1 | 108.7 | 110.7 | 109.3 | 110.7 | 116.6 | 5 | h |
mice::ampute / backward | 53.7 | 53.9 | 55.2 | 55.2 | 56.1 | 56.9 | 5 | e |
mice::ampute / binary | 53.1 | 54.1 | 55.3 | 55.2 | 56.5 | 57.6 | 5 | e |
mice::ampute / forward | 88.8 | 90.2 | 91.5 | 91.9 | 93.0 | 93.6 | 5 | g |
mice::ampute / parallel | 58.2 | 58.9 | 60.4 | 60.3 | 61.5 | 63.0 | 5 | f |
pretty_plot(uncached)
#> Scale for 'x' is already present. Adding another scale for 'x', which
#> will replace the existing scale.
These results suggest that choosing the right algorithm matters as much as parallelism; and parallelism is costly once packages have already been downloaded.