Skip to contents

Speed

The core of santoku is written in C++. It is reasonably fast:


packageVersion("santoku")
#> [1] '1.0.0'
set.seed(27101975)

mb <- bench::mark(
        santoku::chop(rnorm(1e5), -2:2),
        base::cut(rnorm(1e5), -2:2),
        Hmisc::cut2(rnorm(1e5), -2:2),
        min_iterations = 100,
        check = FALSE
      )
mb
#> # A tibble: 3 × 6
#>   expression                            min  median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                        <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(rnorm(1e+05), -2:2)  8.46ms  8.92ms     111.    15.05MB     50.0
#> 2 base::cut(rnorm(1e+05), -2:2)      4.47ms   4.7ms     211.     3.49MB     18.1
#> 3 Hmisc::cut2(rnorm(1e+05), -2:2)   13.39ms 14.53ms      68.8   26.39MB     96.6
autoplot(mb, type = "violin")
#> Loading required namespace: tidyr

Dates


dates <- sample(as.Date("2000-01-01") + 0:364, 1e5, replace = TRUE)
break_dates <- as.Date("2000-01-01") + c(60, 120, 180)

mb_dates <- bench::mark(
              santoku::chop(dates, break_dates),
              base::cut(dates, break_dates),
              Hmisc::cut2(dates, break_dates),
              min_iterations = 100,
              check = FALSE
            )

mb_dates
#> # A tibble: 3 × 6
#>   expression                             min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                        <bch:tm> <bch:>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(dates, break_dates)   6.19ms 6.69ms      149.   10.62MB     66.9
#> 2 base::cut(dates, break_dates)       2.92ms 3.09ms      321.    3.91MB     32.9
#> 3 Hmisc::cut2(dates, break_dates)      5.7ms 6.03ms      165.   18.41MB    115.
autoplot(mb_dates, type = "violin")

Cutting characters (pure R implementation)


oo <- options(santoku.warn_character = FALSE)


lipsum <- stringi::stri_rand_lipsum(100)

mb_pure_r <- bench::mark(
               santoku::chop(lipsum, letters),
               santoku::chop(seq(1, 26, length = 100), 1:26),
               min_iterations = 100,
               check = FALSE
             )

mb_pure_r
#> # A tibble: 2 × 6
#>   expression                             min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                           <bch> <bch:>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(lipsum, letters)       863µs  902µs     1093.   140.4KB     19.5
#> 2 santoku::chop(seq(1, 26, length = 1… 432µs  453µs     2159.    36.9KB     32.8

options(oo)
autoplot(mb_pure_r, type = "violin")

Many breaks


many_breaks <- seq(-2, 2, 0.001)

mb_breaks <- bench::mark(
        santoku::chop(rnorm(1e4), many_breaks),
        base::cut(rnorm(1e4), many_breaks),
        Hmisc::cut2(rnorm(1e4), many_breaks),
        min_iterations = 100,
        check = FALSE
      )

mb_breaks
#> # A tibble: 3 × 6
#>   expression                            min  median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                        <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl>
#> 1 santoku::chop(rnorm(10000), many… 23.76ms 24.46ms      40.7    5.29MB     5.03
#> 2 base::cut(rnorm(10000), many_bre…  3.62ms  3.75ms     266.     1.54MB    10.7 
#> 3 Hmisc::cut2(rnorm(10000), many_b… 10.22ms  10.5ms      95.1    6.19MB    14.2
autoplot(mb_breaks, type = "violin")