RStan サンプル - you1025/my_something_flagments GitHub Wiki
二項分布を仮定し 32/42, 27/41 の 2 パターンにおけるパラメータ theta
およびそれらの差を推定する。
library(tidyverse)
library(rstan)
library(HDInterval)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
stan_code <- "
data {
int n1;
int x1;
int n2;
int x2;
}
parameters {
real<lower=0, upper=1> theta1;
real<lower=0, upper=1> theta2;
}
model {
x1 ~ binomial(n1, theta1);
x2 ~ binomial(n2, theta2);
}
generated quantities {
real diff_thetas = (theta1 - theta2);
real is_1_greater_than_2 = (diff_thetas > 0);
}
"
model <- rstan::stan_model(model_code = stan_code)
fit <- rstan::sampling(
model,
data = list(
n1 = 42,
x1 = 32,
n2 = 41,
x2 = 27
),
chains = 4,
iter = 3500,
warmup = 1000,
seed = 1234
)
fit
#Inference for Stan model: 61a36f3e0f198d75bc1f8b3ca3bb3043.
#4 chains, each with iter=3500; warmup=1000; thin=1;
#post-warmup draws per chain=2500, total post-warmup draws=10000.
# mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
#theta1 0.75 0.00 0.06 0.61 0.71 0.75 0.79 0.86 8411 1
#theta2 0.65 0.00 0.07 0.50 0.60 0.65 0.70 0.78 7686 1
#diff_thetas 0.10 0.00 0.09 -0.09 0.04 0.10 0.16 0.28 8183 1
#is_1_greater_than_2 0.85 0.00 0.36 0.00 1.00 1.00 1.00 1.00 7136 1
#lp__ -53.55 0.01 0.99 -56.27 -53.95 -53.25 -52.85 -52.58 4587 1
#Samples were drawn using NUTS(diag_e) at Fri Oct 15 16:08:03 2021.
#For each parameter, n_eff is a crude measure of effective sample size,
#and Rhat is the potential scale reduction factor on split chains (at
#convergence, Rhat=1).
サンプル数に対する ESS(Effective Sample Size) の比率
0.1(=10%) より小さいとよろしくないらしい(https://dastatis.github.io/pdf/StanAdvent2018/bayesplot.html)
rstan::stan_ess(fit, c("theta1", "theta2", "diff_thetas"))$data
rstan::stan_ess(fit, c("theta1", "theta2", "diff_thetas"))$data %>%
tibble::as_tibble(rownames = "parameter") %>%
# 各推定値(の中央値)でソート
dplyr::mutate(parameter = forcats::fct_reorder(parameter, stat, .desc = T)) %>%
ggplot(aes(parameter, stat)) +
geom_col() +
scale_y_continuous(limits = c(0, 1))
mcmc_samples <- rstan::extract(fit, c("theta1", "theta2", "diff_thetas"))
eap1 <- mean(mcmc_samples$theta1) # 0.7488828
med1 <- median(mcmc_samples$theta1) # 0.753199
# 密度推定
d1 <- density(mcmc_samples$theta1)
map1 <- d1$x[which.max(d1$y)] # 0.7702992
quantile(mcmc_samples$theta1, probs = c(0.025, 0.25, 0.5, 0.75, 0.975))
# 2.5% 25% 50% 75% 97.5%
#0.6124567 0.7066960 0.7531990 0.7948876 0.8639592
# 密度推定
d1 <- density(mcmc_samples$theta1)
# 95% 区間
HDInterval::hdi(d1, credMass = 0.95)
# lower upper
# 0.6216844 0.8728245
# 99% 区間
HDInterval::hdi(d1, credMass = 0.99)
# lower upper
# 0.5708920 0.8972801
tibble::as_tibble(mcmc_samples) %>%
# 代表値および HDI の算出
dplyr::summarise(
# 各列(推定値)ごとに代表値および HDI を算出
dplyr::across(.fns = function(theta) {
# 密度推定
d <- density(theta)
# MAP
map <- d$x[which.max(d$y)]
# 95% HDI
hdi_95 <- HDInterval::hdi(d, credMass = 0.95)
list(
map = map,
mean = mean(theta),
median = median(theta),
hdi_95_lower = hdi_95[[1]],
hdi_95_upper = hdi_95[[2]]
)
})
) %>%
# 表形式に整形
dplyr::mutate(estimation = names(theta1)) %>%
tidyr::unnest(cols = -estimation) %>%
tidyr::pivot_longer(cols = -estimation, names_to = "parameter") %>%
tidyr::pivot_wider(names_from = estimation, values_from = value)
# A tibble: 3 × 6
# parameter map mean median hdi_95_lower hdi_95_upper
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 theta1 0.767 0.749 0.754 0.620 0.872
#2 theta2 0.659 0.651 0.653 0.509 0.790
#3 diff_thetas 0.119 0.0986 0.0999 -0.0930 0.288