matrixStats: Benchmark report

weightedMean() benchmarks on subsetted computation

This report benchmark the performance of weightedMean() on subsetted computation.

Data type "integer"

Data

> rvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) {
+     mode <- match.arg(mode)
+     if (mode == "logical") {
+         x <- sample(c(FALSE, TRUE), size = n, replace = TRUE)
+     }     else {
+         x <- runif(n, min = range[1], max = range[2])
+     }
+     storage.mode(x) <- mode
+     if (na_prob > 0) 
+         x[sample(n, size = na_prob * n)] <- NA
+     x
+ }
> rvectors <- function(scale = 10, seed = 1, ...) {
+     set.seed(seed)
+     data <- list()
+     data[[1]] <- rvector(n = scale * 100, ...)
+     data[[2]] <- rvector(n = scale * 1000, ...)
+     data[[3]] <- rvector(n = scale * 10000, ...)
+     data[[4]] <- rvector(n = scale * 1e+05, ...)
+     data[[5]] <- rvector(n = scale * 1e+06, ...)
+     names(data) <- sprintf("n = %d", sapply(data, FUN = length))
+     data
+ }
> data <- rvectors(mode = mode)
> data <- data[1:4]

Results

n = 1000 vector

> x <- data[["n = 1000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> w <- runif(length(x))
> w_S <- w[idxs]
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3254530 173.9    5709258 305.0  5709258 305.0
Vcells 7938294  60.6   25448368 194.2 87357391 666.5
> stats <- microbenchmark(weightedMean_x_w_S = weightedMean(x_S, w = w_S, na.rm = FALSE), `weightedMean(x, w, idxs)` = weightedMean(x, 
+     w = w, idxs = idxs, na.rm = FALSE), `weightedMean(x[idxs], w[idxs])` = weightedMean(x[idxs], 
+     w = w[idxs], na.rm = FALSE), unit = "ms")

Table: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on integer+n = 1000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	0.001531	0.0015845	0.0016795	0.0016275	0.0016715	0.004561
2	weightedMean(x, w, idxs)	0.002270	0.0023210	0.0032812	0.0023775	0.0024460	0.089417
3	weightedMean(x[idxs], w[idxs])	0.004627	0.0048040	0.0049241	0.0048655	0.0049630	0.008610

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
2	weightedMean(x, w, idxs)	1.482691	1.464815	1.953624	1.460830	1.463356	19.604692
3	weightedMean(x[idxs], w[idxs])	3.022208	3.031871	2.931873	2.989555	2.969189	1.887744

Figure: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on integer+n = 1000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 10000 vector

> x <- data[["n = 10000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> w <- runif(length(x))
> w_S <- w[idxs]
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3250877 173.7    5709258 305.0  5709258 305.0
Vcells 6820531  52.1   25448368 194.2 87357391 666.5
> stats <- microbenchmark(weightedMean_x_w_S = weightedMean(x_S, w = w_S, na.rm = FALSE), `weightedMean(x, w, idxs)` = weightedMean(x, 
+     w = w, idxs = idxs, na.rm = FALSE), `weightedMean(x[idxs], w[idxs])` = weightedMean(x[idxs], 
+     w = w[idxs], na.rm = FALSE), unit = "ms")

Table: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on integer+n = 10000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	0.008241	0.0084615	0.0088564	0.008524	0.0086355	0.036244
2	weightedMean(x, w, idxs)	0.016593	0.0168810	0.0173057	0.017060	0.0172010	0.035846
3	weightedMean(x[idxs], w[idxs])	0.033846	0.0345210	0.0354220	0.034843	0.0353440	0.047289

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.000000	1.000000	1.000000	1.000000	1.000000	1.0000000
2	weightedMean(x, w, idxs)	2.013469	1.995036	1.954019	2.001408	1.991894	0.9890189
3	weightedMean(x[idxs], w[idxs])	4.107026	4.079773	3.999569	4.087635	4.092872	1.3047401

Figure: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on integer+n = 10000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 100000 vector

> x <- data[["n = 100000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> w <- runif(length(x))
> w_S <- w[idxs]
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3250949 173.7    5709258 305.0  5709258 305.0
Vcells 7037091  53.7   25448368 194.2 87357391 666.5
> stats <- microbenchmark(weightedMean_x_w_S = weightedMean(x_S, w = w_S, na.rm = FALSE), `weightedMean(x, w, idxs)` = weightedMean(x, 
+     w = w, idxs = idxs, na.rm = FALSE), `weightedMean(x[idxs], w[idxs])` = weightedMean(x[idxs], 
+     w = w[idxs], na.rm = FALSE), unit = "ms")

Table: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on integer+n = 100000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	0.073563	0.0756315	0.0773616	0.0776105	0.0782045	0.093444
2	weightedMean(x, w, idxs)	0.251108	0.2545820	0.2622681	0.2653170	0.2655925	0.286673
3	weightedMean(x[idxs], w[idxs])	0.410592	0.4208010	0.4347360	0.4344995	0.4377520	0.728485

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
2	weightedMean(x, w, idxs)	3.413509	3.366084	3.390159	3.418571	3.396128	3.067859
3	weightedMean(x[idxs], w[idxs])	5.581502	5.563833	5.619533	5.598463	5.597530	7.795953

Figure: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on integer+n = 100000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 1000000 vector

> x <- data[["n = 1000000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> w <- runif(length(x))
> w_S <- w[idxs]
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3251021 173.7    5709258 305.0  5709258 305.0
Vcells 9197140  70.2   25448368 194.2 87357391 666.5
> stats <- microbenchmark(weightedMean_x_w_S = weightedMean(x_S, w = w_S, na.rm = FALSE), `weightedMean(x, w, idxs)` = weightedMean(x, 
+     w = w, idxs = idxs, na.rm = FALSE), `weightedMean(x[idxs], w[idxs])` = weightedMean(x[idxs], 
+     w = w[idxs], na.rm = FALSE), unit = "ms")

Table: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on integer+n = 1000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.043374	1.214168	1.295131	1.264495	1.360686	1.683094
2	weightedMean(x, w, idxs)	8.374396	9.454982	9.922480	9.753282	10.199404	13.196994
3	weightedMean(x[idxs], w[idxs])	9.828954	14.754568	15.383674	15.084395	15.431275	26.039318

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.000000	1.00000	1.00000	1.000000	1.00000	1.000000
2	weightedMean(x, w, idxs)	8.026265	7.78721	7.66137	7.713187	7.49578	7.840913
3	weightedMean(x[idxs], w[idxs])	9.420355	12.15200	11.87808	11.929189	11.34081	15.471101

Figure: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on integer+n = 1000000 data. Outliers are displayed as crosses. Times are in milliseconds.

Data type "double"

Data

> rvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) {
+     mode <- match.arg(mode)
+     if (mode == "logical") {
+         x <- sample(c(FALSE, TRUE), size = n, replace = TRUE)
+     }     else {
+         x <- runif(n, min = range[1], max = range[2])
+     }
+     storage.mode(x) <- mode
+     if (na_prob > 0) 
+         x[sample(n, size = na_prob * n)] <- NA
+     x
+ }
> rvectors <- function(scale = 10, seed = 1, ...) {
+     set.seed(seed)
+     data <- list()
+     data[[1]] <- rvector(n = scale * 100, ...)
+     data[[2]] <- rvector(n = scale * 1000, ...)
+     data[[3]] <- rvector(n = scale * 10000, ...)
+     data[[4]] <- rvector(n = scale * 1e+05, ...)
+     data[[5]] <- rvector(n = scale * 1e+06, ...)
+     names(data) <- sprintf("n = %d", sapply(data, FUN = length))
+     data
+ }
> data <- rvectors(mode = mode)
> data <- data[1:4]

Results

n = 1000 vector

> x <- data[["n = 1000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> w <- runif(length(x))
> w_S <- w[idxs]
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3251099 173.7    5709258 305.0  5709258 305.0
Vcells 7356020  56.2   25448368 194.2 87357391 666.5
> stats <- microbenchmark(weightedMean_x_w_S = weightedMean(x_S, w = w_S, na.rm = FALSE), `weightedMean(x, w, idxs)` = weightedMean(x, 
+     w = w, idxs = idxs, na.rm = FALSE), `weightedMean(x[idxs], w[idxs])` = weightedMean(x[idxs], 
+     w = w[idxs], na.rm = FALSE), unit = "ms")

Table: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on double+n = 1000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	0.00153	0.0016075	0.0017469	0.0016625	0.0017975	0.004865
2	weightedMean(x, w, idxs)	0.00224	0.0022870	0.0026696	0.0023330	0.0024010	0.032507
3	weightedMean(x[idxs], w[idxs])	0.00457	0.0049725	0.0053566	0.0051435	0.0055415	0.011743

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
2	weightedMean(x, w, idxs)	1.464052	1.422706	1.528164	1.403308	1.335744	6.681809
3	weightedMean(x[idxs], w[idxs])	2.986928	3.093313	3.066363	3.093835	3.082893	2.413772

Figure: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on double+n = 1000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 10000 vector

> x <- data[["n = 10000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> w <- runif(length(x))
> w_S <- w[idxs]
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3251165 173.7    5709258 305.0  5709258 305.0
Vcells 7381126  56.4   25448368 194.2 87357391 666.5
> stats <- microbenchmark(weightedMean_x_w_S = weightedMean(x_S, w = w_S, na.rm = FALSE), `weightedMean(x, w, idxs)` = weightedMean(x, 
+     w = w, idxs = idxs, na.rm = FALSE), `weightedMean(x[idxs], w[idxs])` = weightedMean(x[idxs], 
+     w = w[idxs], na.rm = FALSE), unit = "ms")

Table: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on double+n = 10000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	0.008132	0.0084440	0.0090100	0.0086185	0.0088180	0.023303
2	weightedMean(x, w, idxs)	0.016094	0.0166165	0.0174300	0.0169310	0.0171770	0.040581
3	weightedMean(x[idxs], w[idxs])	0.034302	0.0361780	0.0380877	0.0370390	0.0384335	0.060759

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
2	weightedMean(x, w, idxs)	1.979095	1.967847	1.934503	1.964495	1.947947	1.741450
3	weightedMean(x[idxs], w[idxs])	4.218151	4.284462	4.227254	4.297616	4.358528	2.607347

Figure: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on double+n = 10000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 100000 vector

> x <- data[["n = 100000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> w <- runif(length(x))
> w_S <- w[idxs]
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3251237 173.7    5709258 305.0  5709258 305.0
Vcells 7628674  58.3   25448368 194.2 87357391 666.5
> stats <- microbenchmark(weightedMean_x_w_S = weightedMean(x_S, w = w_S, na.rm = FALSE), `weightedMean(x, w, idxs)` = weightedMean(x, 
+     w = w, idxs = idxs, na.rm = FALSE), `weightedMean(x[idxs], w[idxs])` = weightedMean(x[idxs], 
+     w = w[idxs], na.rm = FALSE), unit = "ms")

Table: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on double+n = 100000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	0.073528	0.0770400	0.0802106	0.0786250	0.0803380	0.102687
2	weightedMean(x, w, idxs)	0.234819	0.2360200	0.2477158	0.2485910	0.2556555	0.272487
3	weightedMean(x[idxs], w[idxs])	0.428035	0.4349115	0.4582200	0.4528735	0.4658515	0.843115

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
2	weightedMean(x, w, idxs)	3.193600	3.063603	3.088319	3.161730	3.182249	2.653569
3	weightedMean(x[idxs], w[idxs])	5.821388	5.645269	5.712714	5.759917	5.798644	8.210533

Figure: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on double+n = 100000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 1000000 vector

> x <- data[["n = 1000000"]]
> idxs <- sample.int(length(x), size = length(x) * 0.7)
> x_S <- x[idxs]
> w <- runif(length(x))
> w_S <- w[idxs]
> gc()
           used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells  3251309 173.7    5709258 305.0  5709258 305.0
Vcells 10104133  77.1   25448368 194.2 87357391 666.5
> stats <- microbenchmark(weightedMean_x_w_S = weightedMean(x_S, w = w_S, na.rm = FALSE), `weightedMean(x, w, idxs)` = weightedMean(x, 
+     w = w, idxs = idxs, na.rm = FALSE), `weightedMean(x[idxs], w[idxs])` = weightedMean(x[idxs], 
+     w = w[idxs], na.rm = FALSE), unit = "ms")

Table: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on double+n = 1000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.222968	1.523889	1.611831	1.609341	1.700403	1.983512
2	weightedMean(x, w, idxs)	10.783339	13.336184	13.620122	13.594388	14.025677	14.598417
3	weightedMean(x[idxs], w[idxs])	12.677596	15.040961	16.662921	15.482764	16.042821	29.830299

	expr	min	lq	mean	median	uq	max
1	weightedMean_x_w_S	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
2	weightedMean(x, w, idxs)	8.817352	8.751414	8.450094	8.447177	8.248443	7.359883
3	weightedMean(x[idxs], w[idxs])	10.366253	9.870116	10.337884	9.620561	9.434717	15.039132

Figure: Benchmarking of weightedMean_x_w_S(), weightedMean(x, w, idxs)() and weightedMean(x[idxs], w[idxs])() on double+n = 1000000 data. Outliers are displayed as crosses. Times are in milliseconds.

Appendix

Session information

R version 3.6.1 Patched (2019-08-27 r77078)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS

Matrix products: default
BLAS:   /home/hb/software/R-devel/R-3-6-branch/lib/R/lib/libRblas.so
LAPACK: /home/hb/software/R-devel/R-3-6-branch/lib/R/lib/libRlapack.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] microbenchmark_1.4-6    matrixStats_0.55.0-9000 ggplot2_3.2.1          
[4] knitr_1.24              R.devices_2.16.0        R.utils_2.9.0          
[7] R.oo_1.22.0             R.methodsS3_1.7.1       history_0.0.0-9002     

loaded via a namespace (and not attached):
 [1] Biobase_2.45.0       bit64_0.9-7          splines_3.6.1       
 [4] network_1.15         assertthat_0.2.1     highr_0.8           
 [7] stats4_3.6.1         blob_1.2.0           robustbase_0.93-5   
[10] pillar_1.4.2         RSQLite_2.1.2        backports_1.1.4     
[13] lattice_0.20-38      glue_1.3.1           digest_0.6.20       
[16] colorspace_1.4-1     sandwich_2.5-1       Matrix_1.2-17       
[19] XML_3.98-1.20        lpSolve_5.6.13.3     pkgconfig_2.0.2     
[22] genefilter_1.66.0    purrr_0.3.2          ergm_3.10.4         
[25] xtable_1.8-4         mvtnorm_1.0-11       scales_1.0.0        
[28] tibble_2.1.3         annotate_1.62.0      IRanges_2.18.2      
[31] TH.data_1.0-10       withr_2.1.2          BiocGenerics_0.30.0 
[34] lazyeval_0.2.2       mime_0.7             survival_2.44-1.1   
[37] magrittr_1.5         crayon_1.3.4         statnet.common_4.3.0
[40] memoise_1.1.0        laeken_0.5.0         R.cache_0.13.0      
[43] MASS_7.3-51.4        R.rsp_0.43.1         tools_3.6.1         
[46] multcomp_1.4-10      S4Vectors_0.22.1     trust_0.1-7         
[49] munsell_0.5.0        AnnotationDbi_1.46.1 compiler_3.6.1      
[52] rlang_0.4.0          grid_3.6.1           RCurl_1.95-4.12     
[55] cwhmisc_6.6          rappdirs_0.3.1       labeling_0.3        
[58] bitops_1.0-6         base64enc_0.1-3      boot_1.3-23         
[61] gtable_0.3.0         codetools_0.2-16     DBI_1.0.0           
[64] markdown_1.1         R6_2.4.0             zoo_1.8-6           
[67] dplyr_0.8.3          bit_1.1-14           zeallot_0.1.0       
[70] parallel_3.6.1       Rcpp_1.0.2           vctrs_0.2.0         
[73] DEoptimR_1.0-8       tidyselect_0.2.5     xfun_0.9            
[76] coda_0.19-3

Total processing time was 14.03 secs.

Reproducibility

To reproduce this report, do:

html <- matrixStats:::benchmark('weightedMean_subset')

weightedMean_subset - HenrikBengtsson/matrixStats GitHub Wiki

weightedMean() benchmarks on subsetted computation

Data type "integer"

Data

Results

n = 1000 vector

n = 10000 vector

n = 100000 vector

n = 1000000 vector

Data type "double"

Data

Results

n = 1000 vector

n = 10000 vector

n = 100000 vector

n = 1000000 vector

Appendix

Session information

Reproducibility

⚠️ GitHub.com Fallback ⚠️

weightedMean_subset - HenrikBengtsson/matrixStats GitHub Wiki

weightedMean() benchmarks on subsetted computation

Data type "integer"

Data

Results

n = 1000 vector

n = 10000 vector

n = 100000 vector

n = 1000000 vector

Data type "double"

Data

Results

n = 1000 vector

n = 10000 vector

n = 100000 vector

n = 1000000 vector

Appendix

Session information

Reproducibility

⚠️ **GitHub.com Fallback** ⚠️

⚠️ GitHub.com Fallback ⚠️