Performance
David Hugh-Jones
2024-06-04
Source:vignettes/website-articles/performance.Rmd
performance.Rmd
Speed
The core of santoku is written in C++. It is reasonably fast:
packageVersion("santoku")
#> [1] '1.0.0'
set.seed(27101975)
mb <- bench::mark(
santoku::chop(rnorm(1e5), -2:2),
base::cut(rnorm(1e5), -2:2),
Hmisc::cut2(rnorm(1e5), -2:2),
min_iterations = 100,
check = FALSE
)
mb
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(rnorm(1e+05), -2:2) 8.46ms 8.92ms 111. 15.05MB 50.0
#> 2 base::cut(rnorm(1e+05), -2:2) 4.47ms 4.7ms 211. 3.49MB 18.1
#> 3 Hmisc::cut2(rnorm(1e+05), -2:2) 13.39ms 14.53ms 68.8 26.39MB 96.6
autoplot(mb, type = "violin")
#> Loading required namespace: tidyr
Dates
dates <- sample(as.Date("2000-01-01") + 0:364, 1e5, replace = TRUE)
break_dates <- as.Date("2000-01-01") + c(60, 120, 180)
mb_dates <- bench::mark(
santoku::chop(dates, break_dates),
base::cut(dates, break_dates),
Hmisc::cut2(dates, break_dates),
min_iterations = 100,
check = FALSE
)
mb_dates
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(dates, break_dates) 6.19ms 6.69ms 149. 10.62MB 66.9
#> 2 base::cut(dates, break_dates) 2.92ms 3.09ms 321. 3.91MB 32.9
#> 3 Hmisc::cut2(dates, break_dates) 5.7ms 6.03ms 165. 18.41MB 115.
autoplot(mb_dates, type = "violin")
Cutting characters (pure R implementation)
oo <- options(santoku.warn_character = FALSE)
lipsum <- stringi::stri_rand_lipsum(100)
mb_pure_r <- bench::mark(
santoku::chop(lipsum, letters),
santoku::chop(seq(1, 26, length = 100), 1:26),
min_iterations = 100,
check = FALSE
)
mb_pure_r
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch> <bch:> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(lipsum, letters) 863µs 902µs 1093. 140.4KB 19.5
#> 2 santoku::chop(seq(1, 26, length = 1… 432µs 453µs 2159. 36.9KB 32.8
options(oo)
autoplot(mb_pure_r, type = "violin")
Many breaks
many_breaks <- seq(-2, 2, 0.001)
mb_breaks <- bench::mark(
santoku::chop(rnorm(1e4), many_breaks),
base::cut(rnorm(1e4), many_breaks),
Hmisc::cut2(rnorm(1e4), many_breaks),
min_iterations = 100,
check = FALSE
)
mb_breaks
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(rnorm(10000), many… 23.76ms 24.46ms 40.7 5.29MB 5.03
#> 2 base::cut(rnorm(10000), many_bre… 3.62ms 3.75ms 266. 1.54MB 10.7
#> 3 Hmisc::cut2(rnorm(10000), many_b… 10.22ms 10.5ms 95.1 6.19MB 14.2
autoplot(mb_breaks, type = "violin")