Performance
David Hugh-Jones
2024-01-02
Source:vignettes/website-articles/performance.Rmd
performance.Rmd
Speed
The core of santoku is written in C++. It is reasonably fast:
packageVersion("santoku")
#> [1] '0.10.0'
set.seed(27101975)
mb <- bench::mark(
santoku::chop(rnorm(1e5), -2:2),
base::cut(rnorm(1e5), -2:2),
Hmisc::cut2(rnorm(1e5), -2:2),
min_iterations = 100,
check = FALSE
)
mb
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(rnorm(1e+05), -2:2) 7.97ms 8.44ms 118. 15.13MB 53.2
#> 2 base::cut(rnorm(1e+05), -2:2) 4.27ms 4.43ms 225. 3.49MB 22.5
#> 3 Hmisc::cut2(rnorm(1e+05), -2:2) 12.55ms 13.48ms 74.5 26.3MB 116.
autoplot(mb, type = "violin")
#> Loading required namespace: tidyr
Dates
dates <- sample(as.Date("2000-01-01") + 0:364, 1e5, replace = TRUE)
break_dates <- as.Date("2000-01-01") + c(60, 120, 180)
mb_dates <- bench::mark(
santoku::chop(dates, break_dates),
base::cut(dates, break_dates),
Hmisc::cut2(dates, break_dates),
min_iterations = 100,
check = FALSE
)
mb_dates
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(dates, break_dates) 5.87ms 6.22ms 162. 10.62MB 79.6
#> 2 base::cut(dates, break_dates) 3.02ms 3.18ms 314. 3.91MB 31.4
#> 3 Hmisc::cut2(dates, break_dates) 4.97ms 5.48ms 183. 15.35MB 98.7
autoplot(mb_dates, type = "violin")
Cutting characters (pure R implementation)
oo <- options(santoku.warn_character = FALSE)
lipsum <- stringi::stri_rand_lipsum(100)
mb_pure_r <- bench::mark(
santoku::chop(lipsum, letters),
santoku::chop(seq(1, 26, length = 100), 1:26),
min_iterations = 100,
check = FALSE
)
mb_pure_r
#> # A tibble: 2 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch> <bch:> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(lipsum, letters) 827µs 862µs 1154. 140KB 17.2
#> 2 santoku::chop(seq(1, 26, length = 1… 410µs 427µs 2324. 39.4KB 30.2
options(oo)
autoplot(mb_pure_r, type = "violin")
Many breaks
many_breaks <- seq(-2, 2, 0.001)
mb_breaks <- bench::mark(
santoku::chop(rnorm(1e4), many_breaks),
base::cut(rnorm(1e4), many_breaks),
Hmisc::cut2(rnorm(1e4), many_breaks),
min_iterations = 100,
check = FALSE
)
mb_breaks
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 santoku::chop(rnorm(10000), many… 22.64ms 23.16ms 43.1 5.11MB 5.33
#> 2 base::cut(rnorm(10000), many_bre… 3.48ms 3.65ms 274. 1.54MB 10.6
#> 3 Hmisc::cut2(rnorm(10000), many_b… 9.8ms 10.11ms 98.6 6.2MB 14.7
autoplot(mb_breaks, type = "violin")