Skip to contents

Speed

The core of santoku is written in C++. It is reasonably fast:


packageVersion("santoku")
#> [1] '0.10.0'
set.seed(27101975)

mb <- bench::mark(
        santoku::chop(rnorm(1e5), -2:2),
        base::cut(rnorm(1e5), -2:2),
        Hmisc::cut2(rnorm(1e5), -2:2),
        min_iterations = 100,
        check = FALSE
      )
mb
#> # A tibble: 3 × 6
#>   expression                            min  median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                        <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(rnorm(1e+05), -2:2)  7.97ms  8.44ms     118.    15.13MB     53.2
#> 2 base::cut(rnorm(1e+05), -2:2)      4.27ms  4.43ms     225.     3.49MB     22.5
#> 3 Hmisc::cut2(rnorm(1e+05), -2:2)   12.55ms 13.48ms      74.5    26.3MB    116.
autoplot(mb, type = "violin")
#> Loading required namespace: tidyr

Dates


dates <- sample(as.Date("2000-01-01") + 0:364, 1e5, replace = TRUE)
break_dates <- as.Date("2000-01-01") + c(60, 120, 180)

mb_dates <- bench::mark(
              santoku::chop(dates, break_dates),
              base::cut(dates, break_dates),
              Hmisc::cut2(dates, break_dates),
              min_iterations = 100,
              check = FALSE
            )

mb_dates
#> # A tibble: 3 × 6
#>   expression                             min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                        <bch:tm> <bch:>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(dates, break_dates)   5.87ms 6.22ms      162.   10.62MB     79.6
#> 2 base::cut(dates, break_dates)       3.02ms 3.18ms      314.    3.91MB     31.4
#> 3 Hmisc::cut2(dates, break_dates)     4.97ms 5.48ms      183.   15.35MB     98.7
autoplot(mb_dates, type = "violin")

Cutting characters (pure R implementation)


oo <- options(santoku.warn_character = FALSE)


lipsum <- stringi::stri_rand_lipsum(100)

mb_pure_r <- bench::mark(
               santoku::chop(lipsum, letters),
               santoku::chop(seq(1, 26, length = 100), 1:26),
               min_iterations = 100,
               check = FALSE
             )

mb_pure_r
#> # A tibble: 2 × 6
#>   expression                             min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                           <bch> <bch:>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(lipsum, letters)       827µs  862µs     1154.     140KB     17.2
#> 2 santoku::chop(seq(1, 26, length = 1… 410µs  427µs     2324.    39.4KB     30.2

options(oo)
autoplot(mb_pure_r, type = "violin")

Many breaks


many_breaks <- seq(-2, 2, 0.001)

mb_breaks <- bench::mark(
        santoku::chop(rnorm(1e4), many_breaks),
        base::cut(rnorm(1e4), many_breaks),
        Hmisc::cut2(rnorm(1e4), many_breaks),
        min_iterations = 100,
        check = FALSE
      )

mb_breaks
#> # A tibble: 3 × 6
#>   expression                            min  median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                        <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(rnorm(10000), many… 22.64ms 23.16ms      43.1    5.11MB     5.33
#> 2 base::cut(rnorm(10000), many_bre…  3.48ms  3.65ms     274.     1.54MB    10.6 
#> 3 Hmisc::cut2(rnorm(10000), many_b…   9.8ms 10.11ms      98.6     6.2MB    14.7
autoplot(mb_breaks, type = "violin")