Skip to contents

Speed

The core of santoku is written in C++. It is reasonably fast:


packageVersion("santoku")
#> [1] '1.1.0'
set.seed(27101975)

mb <- bench::mark(min_iterations = 100, check = FALSE,
        santoku::chop(rnorm(1e5), -2:2),
        base::cut(rnorm(1e5), -2:2),
        Hmisc::cut2(rnorm(1e5), -2:2)
      )
mb
#> # A tibble: 3 × 6
#>   expression                            min  median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                        <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(rnorm(1e+05), -2:2)  7.22ms  7.55ms     132.    10.21MB     51.2
#> 2 base::cut(rnorm(1e+05), -2:2)      3.14ms  3.25ms     306.     2.35MB     17.5
#> 3 Hmisc::cut2(rnorm(1e+05), -2:2)   10.68ms 11.16ms      89.7   20.81MB    187.
autoplot(mb, type = "violin")

Many breaks


many_breaks <- seq(-2, 2, 0.001)

mb_breaks <- bench::mark(min_iterations = 100, check = FALSE,
        santoku::chop(rnorm(1e4), many_breaks),
        base::cut(rnorm(1e4), many_breaks),
        Hmisc::cut2(rnorm(1e4), many_breaks)
      )

mb_breaks
#> # A tibble: 3 × 6
#>   expression                            min  median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                        <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(rnorm(10000), many… 21.39ms 21.81ms      45.6    5.14MB     6.81
#> 2 base::cut(rnorm(10000), many_bre…  2.45ms  2.56ms     391.     1.39MB    17.4 
#> 3 Hmisc::cut2(rnorm(10000), many_b…  7.35ms  7.58ms     131.     5.86MB    25.0
autoplot(mb_breaks, type = "violin")

Various chops


x <- c(rnorm(9e4), sample(-2:2, 1e4, replace = TRUE))

mb_various <- bench::mark(min_iterations = 100, check = FALSE,
        chop(x, -2:2),
        chop_equally(x, groups = 20),
        chop_n(x, n = 2e4),
        chop_quantiles(x, c(0.05, 0.25, 0.5, 0.75, 0.95)),
        chop_evenly(x, intervals = 20),
        chop_width(x, width = 0.25),
        chop_proportions(x, proportions = c(0.05, 0.25, 0.5, 0.75, 0.95)),
        chop_mean_sd(x, sds = 1:4),
        chop_fn(x, scales::breaks_extended(10)),
        chop_pretty(x, n = 10),
        chop_spikes(x, -2:2, prop = 0.01),
        dissect(x, -2:2, prop = 0.01)
      )
      
mb_various
#> # A tibble: 12 × 6
#>    expression                           min  median `itr/sec` mem_alloc `gc/sec`
#>    <bch:expr>                       <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl>
#>  1 chop(x, -2:2)                     4.76ms  5.12ms     194.     8.63MB     79.4
#>  2 chop_equally(x, groups = 20)     10.98ms 11.34ms      88.2   12.19MB     63.9
#>  3 chop_n(x, n = 20000)              8.09ms   8.6ms     115.     23.5MB    272. 
#>  4 chop_quantiles(x, c(0.05, 0.25,…  6.61ms  6.99ms     143.    12.08MB    108. 
#>  5 chop_evenly(x, intervals = 20)    5.74ms  5.95ms     168.    12.48MB    121. 
#>  6 chop_width(x, width = 0.25)       6.19ms  6.49ms     154.    12.54MB    107. 
#>  7 chop_proportions(x, proportions…  4.85ms  5.22ms     191.    12.48MB    139. 
#>  8 chop_mean_sd(x, sds = 1:4)        5.42ms  5.64ms     176.    11.36MB    113. 
#>  9 chop_fn(x, scales::breaks_exten…  5.41ms  5.78ms     174.    11.47MB    116. 
#> 10 chop_pretty(x, n = 10)               5ms  5.26ms     189.    10.57MB     93.2
#> 11 chop_spikes(x, -2:2, prop = 0.0…  8.38ms  8.86ms     113.    14.62MB    138. 
#> 12 dissect(x, -2:2, prop = 0.01)    13.14ms 13.55ms      73.9   22.27MB    154.
autoplot(mb_various, type = "violin")