- n <- 1000 #样本数据集数量
- B <- 1000 # boot.samples
- alpha <- 0.05 #置信概率
- df <- 6
- true.mu <- 0 # 真是均值
- true.sd <- 1 # 真实标准差
- # 模拟数据生成
- x <- rnorm(n,mean = true.mu, sd = true.sd)
- sample.mean.x <- mean(x) #样本均值
- sample.sd.x <- sd(x) #样本标准差
- sample.se.x <- sample.sd.x/sqrt(n) #样本标准误
- sample.conf.x <- sample.mean.x + c(-1,1)*qnorm(alpha/2,lower.tail=F)*sample.se.x #样本95%置信区间
- # 为自助法预留缓存向量
- boot.x <- vector(mode="numeric",length=B)
- boot.mean <- vector(mode="numeric",length=B)
- boot.sd <- vector(mode="numeric",length=B)
- boot.t <- vector(mode="numeric",length=B)
- for(i in 1:B)
- {
- boot.x <- sample(x, size=n, replace=T)
- boot.mean[i] <- mean(boot.x)
- boot.sd[i] <- sd(boot.x)
- boot.t[i] <- (sample.mean.x-boot.mean[i])/(boot.sd[i]/sqrt(n))
- }
- # bootstrap 统计量
- boot.mean.x <- mean(boot.mean)
- boot.sd.x <- mean(boot.sd)
- boot.se.x <- boot.sd.x/sqrt(B)
- boot.t.quantile <- quantile(boot.t,probs = c(alpha/2,1-alpha/2))
- boot.mean.quantile <- quantile(boot.mean,probs = c(alpha/2,1-alpha/2))
- boot.conf.x <- boot.mean.x + c(-1,1)*qt(alpha/2,df=B-1,lower.tail=F)*boot.se.x
- boot.conf.x2 <- boot.mean.x + boot.t.quantile*boot.se.x
- true.conf.x <- 2*sample.mean.x - c(boot.mean.quantile[2],boot.mean.quantile[1])
- #boot.mean.x
- #boot.sd.x
- #boot.se.x
- #boot.conf.x
- #boot.conf.x2
- #true.conf.x
- #sample.mean.x
- #sample.sd.x
- #sample.se.x
- #sample.conf.x
- hist(boot.mean, col = "lightblue", border = "pink",freq = F, main="Normal distribution")
- lines(density(boot.mean),col="blue", lwd=2)
- abline(v=0, col="red",lwd=2)
- message("boot.mean.x = ", format(boot.mean.x,digits = 4)," sample.mean.x = ", format(sample.mean.x,digits = 4),"\n",
- "boot.sd.x = ", format(boot.sd.x,digits = 4)," sample.sd.x = ", format(sample.sd.x,digits = 4),"\n",
- "boot.conf.x = ", format(boot.conf.x,digits = 4)," sample.conf.x = ", format(sample.conf.x,digits = 4),"\n",
- "boot.conf.x2 = ", format(boot.conf.x2,digits = 4)," true.conf.x = ", format(true.conf.x,digits = 4),"\n")
复制代码 这段代码主要简单得阐述了bootstrap技术,并对比了在bootstrap统计量和样本统计量以及(真实)统计量的差异。 |