Mat*_*lke 5
如果你想坚持使用 tidyverse,这样的事情可能会起作用:
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
library(broom)
set.seed(42)
n_boot <- 1000
mtcars %>%
select(-c(cyl, vs:carb)) %>%
pivot_longer(-mpg) -> tbl_mtcars_long
tbl_mtcars_long %>%
nest(model_data = c(mpg, value)) %>%
# for mpg and value observations within each level of name (e.g., disp, hp, ...)
mutate(plot_data = map(model_data, ~ {
# calculate information about the observed mpg and value observations
# within each level of name to be used in each bootstrap sample
submodel_data <- .x
n <- nrow(submodel_data)
min_x <- min(submodel_data$value)
max_x <- max(submodel_data$value)
pred_x <- seq(min_x, max_x, length.out = 100)
# do the bootstrapping by
# 1) repeatedly sampling samples of size n with replacement n_boot times,
# 2) for each bootstrap sample, fit a model,
# 3) and make a tibble of predictions
# the _dfr means to stack each tibble of predictions on top of one another
map_dfr(1:n_boot, ~ {
submodel_data %>%
sample_n(n, TRUE) %>%
lm(mpg ~ value, .) %>%
# suppress augment() warnings about dropping columns
{ suppressWarnings(augment(., newdata = tibble(value = pred_x))) }
}) %>%
# the bootstrapping is finished at this point
# now work across bootstrap samples at each value
group_by(value) %>%
# to estimate the lower and upper 95% quantiles of predicted mpgs
summarize(l = quantile(.fitted, .025),
u = quantile(.fitted, .975),
.groups = "drop"
) %>%
arrange(value)
})) %>%
select(-model_data) %>%
unnest(plot_data) -> tbl_plot_data
ggplot() +
# observed points, model, and se
geom_point(aes(value, mpg), tbl_mtcars_long) +
geom_smooth(aes(value, mpg), tbl_mtcars_long,
method = "lm", formula = "y ~ x", alpha = 0.25, fill = "red") +
# overlay bootstrapped se for direct parison
geom_ribbon(aes(value, ymin = l, ymax = u), tbl_plot_data,
alpha = 0.25, fill = "blue") +
facet_wrap(~ name, scales = "free_x")
由reprex 包(v1.0.0)于 2021-07-19 创建
dww*_*dww 5
也许是这样的:
library(data.table)
mt = as.data.table(mtcars_long_numeric)
# helper function to return lm coefficients as a list
lm_coeffs = function(x, y) {
coeffs = as.list(coefficients(lm(y~x)))
names(coeffs) = c('C', "m")
coeffs
}
# generate bootstrap samples of slope ('m') and intercept ('C')
nboot = 100
mtboot = lapply (seq_len(nboot), function(i)
mt[sample(.N,.N,TRUE), lm_coeffs(values, mpg), by=names])
mtboot = rbindlist(mtboot)
# and plot:
ggplot(mt, aes(values, mpg)) +
geom_abline(aes(intercept=C, slope=m), data = mtboot, size=0.3, alpha=0.3, color='forestgreen') +
stat_smooth(method = "lm", colour = "red", geom = "ribbon", fill = NA, size=0.5, linetype='dashed') +
geom_point() +
facet_wrap(~names, scales = 'free_x')
PS 对于那些喜欢 dplyr (不是我)的人,这里是转换为该格式的相同逻辑:
lm_coeffs = function(x, y) {
coeffs = coefficients(lm(y~x))
tibble(C = coeffs[1], m=coeffs[2])
}
mtboot = lapply (seq_len(nboot), function(i)
mtcars_long_numeric %>%
group_by(names) %>%
slice_sample(prop=1, replace=TRUE) %>%
summarise(tibble(lm_coeffs2(values, mpg))))
mtboot = do.call(rbind, mtboot)
更多推荐
facet
发布评论