matrixStats: Benchmark report

weightedMedian() benchmarks

This report benchmark the performance of weightedMedian() against alternative methods.

Alternative methods

apply() + limma::weighted.median()
apply() + cwhmisc::w.median()
apply() + laeken::weightedMedian()

Data

> rvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) {
+     mode <- match.arg(mode)
+     if (mode == "logical") {
+         x <- sample(c(FALSE, TRUE), size = n, replace = TRUE)
+     }     else {
+         x <- runif(n, min = range[1], max = range[2])
+     }
+     storage.mode(x) <- mode
+     if (na_prob > 0) 
+         x[sample(n, size = na_prob * n)] <- NA
+     x
+ }
> rvectors <- function(scale = 10, seed = 1, ...) {
+     set.seed(seed)
+     data <- list()
+     data[[1]] <- rvector(n = scale * 100, ...)
+     data[[2]] <- rvector(n = scale * 1000, ...)
+     data[[3]] <- rvector(n = scale * 10000, ...)
+     data[[4]] <- rvector(n = scale * 1e+05, ...)
+     data[[5]] <- rvector(n = scale * 1e+06, ...)
+     names(data) <- sprintf("n = %d", sapply(data, FUN = length))
+     data
+ }
> data <- rvectors(mode = "double")
> data <- data[1:3]

Results

n = 1000 vector

> x <- data[["n = 1000"]]
> w <- runif(length(x))
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3282833 175.4    5709258 305.0  5709258 305.0
Vcells 6806071  52.0   26329732 200.9 87357391 666.5
> stats <- microbenchmark(weightedMedian = weightedMedian(x, w = w, ties = "mean", na.rm = FALSE), 
+     `limma::weighted.median` = limma_weighted.median(x, w = w, na.rm = FALSE), `cwhmisc::w.median` = cwhmisc_w.median(x, 
+         w = w), `laeken::weightedMedian` = laeken_weightedMedian(x, w = w), unit = "ms")

Table: Benchmarking of weightedMedian(), limma::weighted.median(), cwhmisc::w.median() and laeken::weightedMedian() on n = 1000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
1	weightedMedian	0.041404	0.0478795	0.0495992	0.0498755	0.0512730	0.064162
2	limma::weighted.median	0.061138	0.0658225	0.0691307	0.0677805	0.0695805	0.176117
3	cwhmisc::w.median	0.083722	0.0920515	0.0949106	0.0946210	0.0981945	0.109411
4	laeken::weightedMedian	0.128815	0.1360515	0.1430878	0.1392040	0.1432375	0.458474

	expr	min	lq	mean	median	uq	max
1	weightedMedian	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
2	limma::weighted.median	1.476621	1.374753	1.393786	1.358994	1.357059	2.744880
3	cwhmisc::w.median	2.022075	1.922566	1.913551	1.897144	1.915131	1.705231
4	laeken::weightedMedian	3.111173	2.841540	2.884880	2.791030	2.793624	7.145569

Figure: Benchmarking of weightedMedian(), limma::weighted.median(), cwhmisc::w.median() and laeken::weightedMedian() on n = 1000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 10000 vector

> x <- data[["n = 10000"]]
> w <- runif(length(x))
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3281145 175.3    5709258 305.0  5709258 305.0
Vcells 6406585  48.9   26329732 200.9 87357391 666.5
> stats <- microbenchmark(weightedMedian = weightedMedian(x, w = w, ties = "mean", na.rm = FALSE), 
+     `limma::weighted.median` = limma_weighted.median(x, w = w, na.rm = FALSE), `cwhmisc::w.median` = cwhmisc_w.median(x, 
+         w = w), `laeken::weightedMedian` = laeken_weightedMedian(x, w = w), unit = "ms")

Table: Benchmarking of weightedMedian(), limma::weighted.median(), cwhmisc::w.median() and laeken::weightedMedian() on n = 10000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
2	limma::weighted.median	0.548102	0.6031085	0.6093292	0.6062180	0.6124475	0.718089
1	weightedMedian	0.560750	0.6194825	0.6355163	0.6356275	0.6517530	0.754366
4	laeken::weightedMedian	0.639606	0.7083430	0.7198366	0.7179295	0.7318340	0.887779
3	cwhmisc::w.median	0.674368	0.7343000	0.8258406	0.7468600	0.7648505	7.813275

	expr	min	lq	mean	median	uq	max
2	limma::weighted.median	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000
1	weightedMedian	1.023076	1.027149	1.042977	1.048513	1.064178	1.050519
4	laeken::weightedMedian	1.166947	1.174487	1.181359	1.184276	1.194933	1.236308
3	cwhmisc::w.median	1.230369	1.217526	1.355328	1.231999	1.248843	10.880650

Figure: Benchmarking of weightedMedian(), limma::weighted.median(), cwhmisc::w.median() and laeken::weightedMedian() on n = 10000 data. Outliers are displayed as crosses. Times are in milliseconds.

n = 100000 vector

> x <- data[["n = 100000"]]
> w <- runif(length(x))
> gc()
          used  (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 3281226 175.3    5709258 305.0  5709258 305.0
Vcells 6497151  49.6   26329732 200.9 87357391 666.5
> stats <- microbenchmark(weightedMedian = weightedMedian(x, w = w, ties = "mean", na.rm = FALSE), 
+     `limma::weighted.median` = limma_weighted.median(x, w = w, na.rm = FALSE), `cwhmisc::w.median` = cwhmisc_w.median(x, 
+         w = w), `laeken::weightedMedian` = laeken_weightedMedian(x, w = w), unit = "ms")

Table: Benchmarking of weightedMedian(), limma::weighted.median(), cwhmisc::w.median() and laeken::weightedMedian() on n = 100000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.

	expr	min	lq	mean	median	uq	max
2	limma::weighted.median	4.734878	5.171025	5.623710	5.307669	5.583745	17.654005
4	laeken::weightedMedian	5.305862	5.653216	6.284446	5.776996	6.863891	18.354886
3	cwhmisc::w.median	6.091735	6.468177	7.590119	6.613154	7.929010	24.079816
1	weightedMedian	6.728040	7.339554	7.576383	7.496245	7.812422	9.315709

	expr	min	lq	mean	median	uq	max
2	limma::weighted.median	1.000000	1.000000	1.000000	1.000000	1.000000	1.0000000
4	laeken::weightedMedian	1.120591	1.093249	1.117491	1.088424	1.229263	1.0397010
3	cwhmisc::w.median	1.286566	1.250850	1.349664	1.245962	1.420017	1.3639860
1	weightedMedian	1.420953	1.419361	1.347222	1.412342	1.399137	0.5276825

Figure: Benchmarking of weightedMedian(), limma::weighted.median(), cwhmisc::w.median() and laeken::weightedMedian() on n = 100000 data. Outliers are displayed as crosses. Times are in milliseconds.

Appendix

Session information

R version 3.6.1 Patched (2019-08-27 r77078)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS

Matrix products: default
BLAS:   /home/hb/software/R-devel/R-3-6-branch/lib/R/lib/libRblas.so
LAPACK: /home/hb/software/R-devel/R-3-6-branch/lib/R/lib/libRlapack.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] microbenchmark_1.4-6    matrixStats_0.55.0-9000 ggplot2_3.2.1          
[4] knitr_1.24              R.devices_2.16.0        R.utils_2.9.0          
[7] R.oo_1.22.0             R.methodsS3_1.7.1       history_0.0.0-9002     

loaded via a namespace (and not attached):
 [1] Biobase_2.45.0       bit64_0.9-7          splines_3.6.1       
 [4] network_1.15         assertthat_0.2.1     highr_0.8           
 [7] stats4_3.6.1         blob_1.2.0           robustbase_0.93-5   
[10] pillar_1.4.2         RSQLite_2.1.2        backports_1.1.4     
[13] lattice_0.20-38      limma_3.40.6         glue_1.3.1          
[16] digest_0.6.20        colorspace_1.4-1     sandwich_2.5-1      
[19] Matrix_1.2-17        XML_3.98-1.20        lpSolve_5.6.13.3    
[22] pkgconfig_2.0.2      genefilter_1.66.0    purrr_0.3.2         
[25] ergm_3.10.4          xtable_1.8-4         mvtnorm_1.0-11      
[28] scales_1.0.0         tibble_2.1.3         annotate_1.62.0     
[31] IRanges_2.18.2       TH.data_1.0-10       withr_2.1.2         
[34] BiocGenerics_0.30.0  lazyeval_0.2.2       mime_0.7            
[37] survival_2.44-1.1    magrittr_1.5         crayon_1.3.4        
[40] statnet.common_4.3.0 memoise_1.1.0        laeken_0.5.0        
[43] R.cache_0.13.0       MASS_7.3-51.4        R.rsp_0.43.1        
[46] tools_3.6.1          multcomp_1.4-10      S4Vectors_0.22.1    
[49] trust_0.1-7          munsell_0.5.0        AnnotationDbi_1.46.1
[52] compiler_3.6.1       rlang_0.4.0          grid_3.6.1          
[55] RCurl_1.95-4.12      cwhmisc_6.6          rappdirs_0.3.1      
[58] labeling_0.3         bitops_1.0-6         base64enc_0.1-3     
[61] boot_1.3-23          gtable_0.3.0         codetools_0.2-16    
[64] DBI_1.0.0            markdown_1.1         R6_2.4.0            
[67] zoo_1.8-6            dplyr_0.8.3          bit_1.1-14          
[70] zeallot_0.1.0        parallel_3.6.1       Rcpp_1.0.2          
[73] vctrs_0.2.0          DEoptimR_1.0-8       tidyselect_0.2.5    
[76] xfun_0.9             coda_0.19-3

Total processing time was 7.3 secs.

Reproducibility

To reproduce this report, do:

html <- matrixStats:::benchmark('weightedMedian')

weightedMedian - HenrikBengtsson/matrixStats GitHub Wiki

weightedMedian() benchmarks

Alternative methods

Data

Results

n = 1000 vector

n = 10000 vector

n = 100000 vector

Appendix

Session information

Reproducibility

⚠️ GitHub.com Fallback ⚠️

weightedMedian - HenrikBengtsson/matrixStats GitHub Wiki

weightedMedian() benchmarks

Alternative methods

Data

Results

n = 1000 vector

n = 10000 vector

n = 100000 vector

Appendix

Session information

Reproducibility

⚠️ **GitHub.com Fallback** ⚠️

⚠️ GitHub.com Fallback ⚠️