
Performance
David Hugh-Jones
2025-09-11
Source:vignettes/website-articles/performance.Rmd
performance.Rmd
Speed
The core of santoku is written in C++. It is reasonably fast:
packageVersion("santoku")
#> [1] '1.1.0'
set.seed(27101975)
mb <- bench::mark(min_iterations = 100, check = FALSE,
santoku::chop(rnorm(1e5), -2:2),
base::cut(rnorm(1e5), -2:2),
Hmisc::cut2(rnorm(1e5), -2:2)
)
mb
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(rnorm(1e+05), -2:2) 7.22ms 7.55ms 132. 10.21MB 51.2
#> 2 base::cut(rnorm(1e+05), -2:2) 3.14ms 3.25ms 306. 2.35MB 17.5
#> 3 Hmisc::cut2(rnorm(1e+05), -2:2) 10.68ms 11.16ms 89.7 20.81MB 187.
autoplot(mb, type = "violin")
Many breaks
many_breaks <- seq(-2, 2, 0.001)
mb_breaks <- bench::mark(min_iterations = 100, check = FALSE,
santoku::chop(rnorm(1e4), many_breaks),
base::cut(rnorm(1e4), many_breaks),
Hmisc::cut2(rnorm(1e4), many_breaks)
)
mb_breaks
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(rnorm(10000), many… 21.39ms 21.81ms 45.6 5.14MB 6.81
#> 2 base::cut(rnorm(10000), many_bre… 2.45ms 2.56ms 391. 1.39MB 17.4
#> 3 Hmisc::cut2(rnorm(10000), many_b… 7.35ms 7.58ms 131. 5.86MB 25.0
autoplot(mb_breaks, type = "violin")
Various chops
x <- c(rnorm(9e4), sample(-2:2, 1e4, replace = TRUE))
mb_various <- bench::mark(min_iterations = 100, check = FALSE,
chop(x, -2:2),
chop_equally(x, groups = 20),
chop_n(x, n = 2e4),
chop_quantiles(x, c(0.05, 0.25, 0.5, 0.75, 0.95)),
chop_evenly(x, intervals = 20),
chop_width(x, width = 0.25),
chop_proportions(x, proportions = c(0.05, 0.25, 0.5, 0.75, 0.95)),
chop_mean_sd(x, sds = 1:4),
chop_fn(x, scales::breaks_extended(10)),
chop_pretty(x, n = 10),
chop_spikes(x, -2:2, prop = 0.01),
dissect(x, -2:2, prop = 0.01)
)
mb_various
#> # A tibble: 12 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 chop(x, -2:2) 4.76ms 5.12ms 194. 8.63MB 79.4
#> 2 chop_equally(x, groups = 20) 10.98ms 11.34ms 88.2 12.19MB 63.9
#> 3 chop_n(x, n = 20000) 8.09ms 8.6ms 115. 23.5MB 272.
#> 4 chop_quantiles(x, c(0.05, 0.25,… 6.61ms 6.99ms 143. 12.08MB 108.
#> 5 chop_evenly(x, intervals = 20) 5.74ms 5.95ms 168. 12.48MB 121.
#> 6 chop_width(x, width = 0.25) 6.19ms 6.49ms 154. 12.54MB 107.
#> 7 chop_proportions(x, proportions… 4.85ms 5.22ms 191. 12.48MB 139.
#> 8 chop_mean_sd(x, sds = 1:4) 5.42ms 5.64ms 176. 11.36MB 113.
#> 9 chop_fn(x, scales::breaks_exten… 5.41ms 5.78ms 174. 11.47MB 116.
#> 10 chop_pretty(x, n = 10) 5ms 5.26ms 189. 10.57MB 93.2
#> 11 chop_spikes(x, -2:2, prop = 0.0… 8.38ms 8.86ms 113. 14.62MB 138.
#> 12 dissect(x, -2:2, prop = 0.01) 13.14ms 13.55ms 73.9 22.27MB 154.
autoplot(mb_various, type = "violin")