mean2_subset - HenrikBengtsson/matrixStats GitHub Wiki

matrixStats: Benchmark report


mean2() benchmarks on subsetted computation

This report benchmark the performance of mean2() on subsetted computation.

Data type "integer"

Data

> rvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) {
+     mode <- match.arg(mode)
+     if (mode == "logical") {
+         x <- sample(c(FALSE, TRUE), size = n, replace = TRUE)
+     }     else {
+         x <- runif(n, min = range[1], max = range[2])
+     }
+     storage.mode(x) <- mode
+     if (na_prob > 0) 
+         x[sample(n, size = na_prob * n)] <- NA
+     x
+ }
> rvectors <- function(scale = 10, seed = 1, ...) {
+     set.seed(seed)
+     data <- list()
+     data[[1]] <- rvector(n = scale * 100, ...)
+     data[[2]] <- rvector(n = scale * 1000, ...)
+     data[[3]] <- rvector(n = scale * 10000, ...)
+     data[[4]] <- rvector(n = scale * 1e+05, ...)
+     data[[5]] <- rvector(n = scale * 1e+06, ...)
+     names(data) <- sprintf("n = %d", sapply(data, FUN = length))
+     data
+ }
> data <- rvectors(mode = mode)

Results

n = 1000 vector

> x <- data[["n = 1000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger (Mb) max used  (Mb)
Ncells  3237341 172.9    5709258  305  5709258 305.0
Vcells 12940607  98.8   28038728  214 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 1000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
1 mean2_x_S 0.001391 0.0014275 0.0014746 0.0014445 0.0014750 0.001891
2 mean2_x_S_no_refine 0.001392 0.0014325 0.0014915 0.0014470 0.0015025 0.002195
4 mean2_no_refine(x, idxs) 0.002036 0.0020680 0.0021376 0.0020810 0.0021125 0.004911
3 mean2(x, idxs) 0.002022 0.0020710 0.0021228 0.0020830 0.0021230 0.002821
5 mean2(x[idxs]) 0.002792 0.0029155 0.0030071 0.0029735 0.0030495 0.003842
6 mean2_no_refine(x[idxs]) 0.002820 0.0029085 0.0039671 0.0029995 0.0031200 0.096489
expr min lq mean median uq max
1 mean2_x_S 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
2 mean2_x_S_no_refine 1.000719 1.003503 1.011509 1.001731 1.018644 1.160762
4 mean2_no_refine(x, idxs) 1.463695 1.448686 1.449683 1.440637 1.432203 2.597039
3 mean2(x, idxs) 1.453631 1.450788 1.439660 1.442022 1.439322 1.491803
5 mean2(x[idxs]) 2.007189 2.042382 2.039327 2.058498 2.067458 2.031729
6 mean2_no_refine(x[idxs]) 2.027318 2.037478 2.690380 2.076497 2.115254 51.025383

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 1000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 10000 vector

> x <- data[["n = 10000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger (Mb) max used  (Mb)
Ncells  3233777 172.8    5709258  305  5709258 305.0
Vcells 11808939  90.1   28038728  214 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 10000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
1 mean2_x_S 0.007993 0.0080635 0.0082027 0.0081170 0.0081970 0.011061
2 mean2_x_S_no_refine 0.007994 0.0080785 0.0083935 0.0081425 0.0082375 0.031185
4 mean2_no_refine(x, idxs) 0.013556 0.0136325 0.0137183 0.0136720 0.0137450 0.014743
3 mean2(x, idxs) 0.013537 0.0136295 0.0137370 0.0136985 0.0137950 0.014099
5 mean2(x[idxs]) 0.019365 0.0197740 0.0202089 0.0199010 0.0201385 0.033319
6 mean2_no_refine(x[idxs]) 0.019468 0.0198135 0.0203902 0.0199815 0.0201645 0.032497
expr min lq mean median uq max
1 mean2_x_S 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
2 mean2_x_S_no_refine 1.000125 1.001860 1.023257 1.003142 1.004941 2.819365
4 mean2_no_refine(x, idxs) 1.695984 1.690643 1.672411 1.684366 1.676833 1.332881
3 mean2(x, idxs) 1.693607 1.690271 1.674683 1.687631 1.682933 1.274659
5 mean2(x[idxs]) 2.422745 2.452285 2.463677 2.451768 2.456813 3.012296
6 mean2_no_refine(x[idxs]) 2.435631 2.457184 2.485788 2.461685 2.459985 2.937980

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 10000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 100000 vector

> x <- data[["n = 100000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger (Mb) max used  (Mb)
Ncells  3233876 172.8    5709258  305  5709258 305.0
Vcells 11872517  90.6   28038728  214 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 100000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
1 mean2_x_S 0.073192 0.0733580 0.0735297 0.0734010 0.0735615 0.076538
2 mean2_x_S_no_refine 0.073240 0.0733670 0.0739529 0.0734355 0.0735930 0.100184
4 mean2_no_refine(x, idxs) 0.139412 0.1395635 0.1418755 0.1396525 0.1398425 0.216691
3 mean2(x, idxs) 0.139339 0.1395675 0.1402904 0.1396880 0.1398400 0.163529
5 mean2(x[idxs]) 0.222900 0.2238220 0.2259060 0.2243030 0.2255700 0.293821
6 mean2_no_refine(x[idxs]) 0.223118 0.2237985 0.2253905 0.2243285 0.2250305 0.259972
expr min lq mean median uq max
1 mean2_x_S 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
2 mean2_x_S_no_refine 1.000656 1.000123 1.005756 1.000470 1.000428 1.308945
4 mean2_no_refine(x, idxs) 1.904744 1.902499 1.929499 1.902597 1.901028 2.831156
3 mean2(x, idxs) 1.903746 1.902553 1.907943 1.903080 1.900994 2.136573
5 mean2(x[idxs]) 3.045415 3.051092 3.072310 3.055858 3.066414 3.838891
6 mean2_no_refine(x[idxs]) 3.048393 3.050772 3.065300 3.056205 3.059080 3.396640

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 100000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 1000000 vector

> x <- data[["n = 1000000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger (Mb) max used  (Mb)
Ncells  3233975 172.8    5709258  305  5709258 305.0
Vcells 12502795  95.4   28038728  214 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 1000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
2 mean2_x_S_no_refine 0.819987 0.864055 0.8904314 0.872912 0.9101030 1.059168
1 mean2_x_S 0.746681 0.864348 0.8836420 0.874443 0.9106205 1.020441
3 mean2(x, idxs) 2.070680 2.352730 2.4511763 2.445139 2.5226930 2.946063
4 mean2_no_refine(x, idxs) 2.045235 2.341083 2.4554177 2.446915 2.5772595 2.792526
5 mean2(x[idxs]) 3.233744 4.660319 4.9976479 4.812597 5.0010920 15.858845
6 mean2_no_refine(x[idxs]) 3.281051 4.671983 4.9904019 4.823245 4.9698770 16.243384
expr min lq mean median uq max
2 mean2_x_S_no_refine 1.000000 1.000000 1.0000000 1.000000 1.000000 1.0000000
1 mean2_x_S 0.910601 1.000339 0.9923752 1.001754 1.000569 0.9634364
3 mean2(x, idxs) 2.525260 2.722893 2.7527964 2.801129 2.771876 2.7814879
4 mean2_no_refine(x, idxs) 2.494229 2.709415 2.7575596 2.803164 2.831833 2.6365279
5 mean2(x[idxs]) 3.943653 5.393544 5.6126142 5.513267 5.495083 14.9729269
6 mean2_no_refine(x[idxs]) 4.001345 5.407044 5.6044766 5.525465 5.460785 15.3359845

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 1000000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 10000000 vector

> x <- data[["n = 10000000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells  3234071 172.8    5709258 305.0  5709258 305.0
Vcells 18803453 143.5   33726473 257.4 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 10000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
2 mean2_x_S_no_refine 9.053797 12.12507 13.27875 12.24600 16.13622 18.12965
1 mean2_x_S 9.069925 11.65445 13.40851 12.38970 16.42170 18.62974
4 mean2_no_refine(x, idxs) 80.103785 95.49647 99.35035 97.53584 104.74655 117.62011
3 mean2(x, idxs) 79.685783 96.42031 100.16765 99.56728 105.24282 111.31561
5 mean2(x[idxs]) 128.063078 135.78485 146.60189 141.03012 145.42353 418.95981
6 mean2_no_refine(x[idxs]) 114.357685 136.14016 140.93776 141.52260 145.64412 165.26707
expr min lq mean median uq max
2 mean2_x_S_no_refine 1.000000 1.0000000 1.000000 1.000000 1.000000 1.000000
1 mean2_x_S 1.001781 0.9611864 1.009772 1.011735 1.017692 1.027584
4 mean2_no_refine(x, idxs) 8.847535 7.8759513 7.481908 7.964710 6.491391 6.487721
3 mean2(x, idxs) 8.801366 7.9521435 7.543457 8.130596 6.522146 6.139976
5 mean2(x[idxs]) 14.144682 11.1986848 11.040342 11.516423 9.012240 23.109094
6 mean2_no_refine(x[idxs]) 12.630909 11.2279881 10.613786 11.556639 9.025911 9.115844

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on integer+n = 10000000 data. Outliers are displayed as crosses. Times are in milliseconds.

Data type "double"

Data

> rvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) {
+     mode <- match.arg(mode)
+     if (mode == "logical") {
+         x <- sample(c(FALSE, TRUE), size = n, replace = TRUE)
+     }     else {
+         x <- runif(n, min = range[1], max = range[2])
+     }
+     storage.mode(x) <- mode
+     if (na_prob > 0) 
+         x[sample(n, size = na_prob * n)] <- NA
+     x
+ }
> rvectors <- function(scale = 10, seed = 1, ...) {
+     set.seed(seed)
+     data <- list()
+     data[[1]] <- rvector(n = scale * 100, ...)
+     data[[2]] <- rvector(n = scale * 1000, ...)
+     data[[3]] <- rvector(n = scale * 10000, ...)
+     data[[4]] <- rvector(n = scale * 1e+05, ...)
+     data[[5]] <- rvector(n = scale * 1e+06, ...)
+     names(data) <- sprintf("n = %d", sapply(data, FUN = length))
+     data
+ }
> data <- rvectors(mode = mode)

Results

n = 1000 vector

> x <- data[["n = 1000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells  3234179 172.8    5709258 305.0  5709258 305.0
Vcells 17360460 132.5   40551767 309.4 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 1000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
2 mean2_x_S_no_refine 0.001412 0.0014460 0.0014958 0.0014610 0.0014805 0.002581
4 mean2_no_refine(x, idxs) 0.002062 0.0020965 0.0021521 0.0021165 0.0021555 0.002529
1 mean2_x_S 0.002132 0.0021630 0.0022130 0.0021830 0.0022170 0.002508
3 mean2(x, idxs) 0.002782 0.0028230 0.0028867 0.0028415 0.0029060 0.004155
6 mean2_no_refine(x[idxs]) 0.002972 0.0031115 0.0034228 0.0031920 0.0033410 0.017197
5 mean2(x[idxs]) 0.003691 0.0038675 0.0040252 0.0039535 0.0041580 0.005126
expr min lq mean median uq max
2 mean2_x_S_no_refine 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000
4 mean2_no_refine(x, idxs) 1.460340 1.449862 1.438747 1.448665 1.455927 0.9798528
1 mean2_x_S 1.509915 1.495851 1.479460 1.494182 1.497467 0.9717164
3 mean2(x, idxs) 1.970255 1.952282 1.929779 1.944901 1.962850 1.6098411
6 mean2_no_refine(x[idxs]) 2.104816 2.151798 2.288177 2.184805 2.256670 6.6629213
5 mean2(x[idxs]) 2.614023 2.674620 2.690898 2.706023 2.808511 1.9860519

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 1000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 10000 vector

> x <- data[["n = 10000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells  3234272 172.8    5709258 305.0  5709258 305.0
Vcells 17370413 132.6   40551767 309.4 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 10000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
2 mean2_x_S_no_refine 0.008046 0.0082290 0.0084524 0.0082940 0.0083945 0.021646
4 mean2_no_refine(x, idxs) 0.013675 0.0138290 0.0139197 0.0139055 0.0139785 0.014878
1 mean2_x_S 0.015307 0.0154260 0.0155253 0.0155170 0.0156065 0.015898
3 mean2(x, idxs) 0.020930 0.0210695 0.0212282 0.0211535 0.0212655 0.024405
6 mean2_no_refine(x[idxs]) 0.021730 0.0222470 0.0230560 0.0224820 0.0227340 0.046212
5 mean2(x[idxs]) 0.029109 0.0295505 0.0299383 0.0297495 0.0299955 0.035199
expr min lq mean median uq max
2 mean2_x_S_no_refine 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000
4 mean2_no_refine(x, idxs) 1.699602 1.680520 1.646840 1.676573 1.665198 0.6873325
1 mean2_x_S 1.902436 1.874590 1.836793 1.870871 1.859134 0.7344544
3 mean2(x, idxs) 2.601293 2.560396 2.511502 2.550458 2.533266 1.1274600
6 mean2_no_refine(x[idxs]) 2.700721 2.703488 2.727750 2.710634 2.708202 2.1348979
5 mean2(x[idxs]) 3.617822 3.591020 3.541990 3.586870 3.573233 1.6261203

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 10000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 100000 vector

> x <- data[["n = 100000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells  3234371 172.8    5709258 305.0  5709258 305.0
Vcells 17465522 133.3   40551767 309.4 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 100000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
2 mean2_x_S_no_refine 0.073383 0.0735780 0.0748593 0.0739205 0.0747670 0.112598
1 mean2_x_S 0.145355 0.1455485 0.1467679 0.1458990 0.1471985 0.159998
4 mean2_no_refine(x, idxs) 0.151656 0.1520600 0.1542243 0.1522575 0.1524755 0.310611
3 mean2(x, idxs) 0.251454 0.2519835 0.2533889 0.2521780 0.2524295 0.288298
6 mean2_no_refine(x[idxs]) 0.257278 0.2661055 0.3567621 0.3921090 0.3957230 0.410325
5 mean2(x[idxs]) 0.331040 0.4081495 0.4351360 0.4647425 0.4678355 0.514656
expr min lq mean median uq max
2 mean2_x_S_no_refine 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1 mean2_x_S 1.980772 1.978152 1.960583 1.973728 1.968763 1.420967
4 mean2_no_refine(x, idxs) 2.066637 2.066650 2.060188 2.059747 2.039342 2.758584
3 mean2(x, idxs) 3.426597 3.424713 3.384868 3.411476 3.376215 2.560418
6 mean2_no_refine(x[idxs]) 3.505962 3.616645 4.765767 5.304469 5.292750 3.644159
5 mean2(x[idxs]) 4.511127 5.547168 5.812717 6.287058 6.257246 4.570738

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 100000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 1000000 vector

> x <- data[["n = 1000000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells  3234470 172.8    5709258 305.0  5709258 305.0
Vcells 18410589 140.5   40551767 309.4 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 1000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
2 mean2_x_S_no_refine 1.109013 1.205344 1.248485 1.247311 1.268617 1.450124
1 mean2_x_S 2.270884 2.405847 2.495035 2.488111 2.528594 2.798773
4 mean2_no_refine(x, idxs) 5.103129 5.342005 5.462168 5.455947 5.544610 6.269207
6 mean2_no_refine(x[idxs]) 6.055889 7.278324 8.882099 9.561957 9.753134 18.643234
3 mean2(x, idxs) 8.936119 9.448269 9.695674 9.636117 9.842493 11.401684
5 mean2(x[idxs]) 7.863381 8.747281 10.445387 10.761168 11.028589 20.290996
expr min lq mean median uq max
2 mean2_x_S_no_refine 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1 mean2_x_S 2.047662 1.995984 1.998450 1.994780 1.993190 1.930023
4 mean2_no_refine(x, idxs) 4.601505 4.431934 4.375036 4.374168 4.370596 4.323221
6 mean2_no_refine(x[idxs]) 5.460611 6.038379 7.114300 7.666057 7.688008 12.856303
3 mean2(x, idxs) 8.057723 7.838649 7.765950 7.725513 7.758447 7.862558
5 mean2(x[idxs]) 7.090432 7.257083 8.366448 8.627494 8.693399 13.992594

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 1000000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 10000000 vector

> x <- data[["n = 10000000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> gc()
           used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells  3234569 172.8    5709258 305.0  5709258 305.0
Vcells 27861305 212.6   48742120 371.9 87357391 666.5
> stats <- microbenchmark(mean2_x_S = mean2(x_S, refine = TRUE), mean2_x_S_no_refine = mean2(x_S, refine = FALSE), 
+     `mean2(x, idxs)` = mean2(x, idxs = idxs, refine = TRUE), `mean2_no_refine(x, idxs)` = mean2(x, 
+         idxs = idxs, refine = FALSE), `mean2(x[idxs])` = mean2(x[idxs], refine = TRUE), `mean2_no_refine(x[idxs])` = mean2(x[idxs], 
+         refine = FALSE), unit = "ms")

Table: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 10000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

expr min lq mean median uq max
2 mean2_x_S_no_refine 7.416486 10.97654 13.64744 12.08673 18.71371 21.16901
1 mean2_x_S 15.439379 20.59652 25.01414 23.11340 32.18062 36.78187
4 mean2_no_refine(x, idxs) 95.059996 132.90154 143.80882 140.24072 160.59192 189.81906
6 mean2_no_refine(x[idxs]) 159.109667 167.80084 180.08796 173.82238 186.44614 452.33853
5 mean2(x[idxs]) 169.306052 179.77445 199.31009 193.45681 198.79157 471.73459
3 mean2(x, idxs) 230.929040 297.02068 310.27159 309.52590 325.38551 386.11923
expr min lq mean median uq max
2 mean2_x_S_no_refine 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1 mean2_x_S 2.081765 1.876413 1.832882 1.912295 1.719628 1.737534
4 mean2_no_refine(x, idxs) 12.817390 12.107783 10.537422 11.602868 8.581514 8.966837
6 mean2_no_refine(x[idxs]) 21.453511 15.287228 13.195733 14.381259 9.963080 21.367959
5 mean2(x[idxs]) 22.828338 16.378064 14.604212 16.005720 10.622780 22.284206
3 mean2(x, idxs) 31.137258 27.059596 22.734786 25.608739 17.387551 18.239834

Figure: Benchmarking of mean2_x_S(), mean2_x_S_no_refine(), mean2(x, idxs)(), mean2_no_refine(x, idxs)(), mean2(x[idxs])() and mean2_no_refine(x[idxs])() on double+n = 10000000 data. Outliers are displayed as crosses. Times are in milliseconds.

Appendix

Session information

R version 3.6.1 Patched (2019-08-27 r77078)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS

Matrix products: default
BLAS:   /home/hb/software/R-devel/R-3-6-branch/lib/R/lib/libRblas.so
LAPACK: /home/hb/software/R-devel/R-3-6-branch/lib/R/lib/libRlapack.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] microbenchmark_1.4-6    matrixStats_0.55.0-9000 ggplot2_3.2.1          
[4] knitr_1.24              R.devices_2.16.0        R.utils_2.9.0          
[7] R.oo_1.22.0             R.methodsS3_1.7.1       history_0.0.0-9002     

loaded via a namespace (and not attached):
 [1] Biobase_2.45.0       bit64_0.9-7          splines_3.6.1       
 [4] network_1.15         assertthat_0.2.1     highr_0.8           
 [7] stats4_3.6.1         blob_1.2.0           robustbase_0.93-5   
[10] pillar_1.4.2         RSQLite_2.1.2        backports_1.1.4     
[13] lattice_0.20-38      glue_1.3.1           digest_0.6.20       
[16] colorspace_1.4-1     sandwich_2.5-1       Matrix_1.2-17       
[19] XML_3.98-1.20        lpSolve_5.6.13.3     pkgconfig_2.0.2     
[22] genefilter_1.66.0    purrr_0.3.2          ergm_3.10.4         
[25] xtable_1.8-4         mvtnorm_1.0-11       scales_1.0.0        
[28] tibble_2.1.3         annotate_1.62.0      IRanges_2.18.2      
[31] TH.data_1.0-10       withr_2.1.2          BiocGenerics_0.30.0 
[34] lazyeval_0.2.2       mime_0.7             survival_2.44-1.1   
[37] magrittr_1.5         crayon_1.3.4         statnet.common_4.3.0
[40] memoise_1.1.0        laeken_0.5.0         R.cache_0.13.0      
[43] MASS_7.3-51.4        R.rsp_0.43.1         tools_3.6.1         
[46] multcomp_1.4-10      S4Vectors_0.22.1     trust_0.1-7         
[49] munsell_0.5.0        AnnotationDbi_1.46.1 compiler_3.6.1      
[52] rlang_0.4.0          grid_3.6.1           RCurl_1.95-4.12     
[55] cwhmisc_6.6          rappdirs_0.3.1       labeling_0.3        
[58] bitops_1.0-6         base64enc_0.1-3      boot_1.3-23         
[61] gtable_0.3.0         codetools_0.2-16     DBI_1.0.0           
[64] markdown_1.1         R6_2.4.0             zoo_1.8-6           
[67] dplyr_0.8.3          bit_1.1-14           zeallot_0.1.0       
[70] parallel_3.6.1       Rcpp_1.0.2           vctrs_0.2.0         
[73] DEoptimR_1.0-8       tidyselect_0.2.5     xfun_0.9            
[76] coda_0.19-3         

Total processing time was 2.85 mins.

Reproducibility

To reproduce this report, do:

html <- matrixStats:::benchmark('mean2_subset')

Copyright Dongcan Jiang. Last updated on 2019-09-10 21:06:50 (-0700 UTC). Powered by RSP.

<script> var link = document.createElement('link'); link.rel = 'icon'; link.href = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAA21BMVEUAAAAAAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8BAf4CAv0DA/wdHeIeHuEfH+AgIN8hId4lJdomJtknJ9g+PsE/P8BAQL9yco10dIt1dYp3d4h4eIeVlWqWlmmXl2iYmGeZmWabm2Tn5xjo6Bfp6Rb39wj4+Af//wA2M9hbAAAASXRSTlMAAQIJCgsMJSYnKD4/QGRlZmhpamtsbautrrCxuru8y8zN5ebn6Pn6+///////////////////////////////////////////LsUNcQAAAS9JREFUOI29k21XgkAQhVcFytdSMqMETU26UVqGmpaiFbL//xc1cAhhwVNf6n5i5z67M2dmYOyfJZUqlVLhkKucG7cgmUZTybDz6g0iDeq51PUr37Ds2cy2/C9NeES5puDjxuUk1xnToZsg8pfA3avHQ3lLIi7iWRrkv/OYtkScxBIMgDee0ALoyxHQBJ68JLCjOtQIMIANF7QG9G9fNnHvisCHBVMKgSJgiz7nE+AoBKrAPA3MgepvgR9TSCasrCKH0eB1wBGBFdCO+nAGjMVGPcQb5bd6mQRegN6+1axOs9nGfYcCtfi4NQosdtH7dB+txFIpXQqN1p9B/asRHToyS0jRgpV7nk4nwcq1BJ+x3Gl/v7S9Wmpp/aGquum7w3ZDyrADFYrl8vHBH+ev9AUASW1dmU4h4wAAAABJRU5ErkJggg==" document.getElementsByTagName('head')[0].appendChild(link); </script>
⚠️ **GitHub.com Fallback** ⚠️