一、分层抽样 Stratified Sampling
library(tidyverse)
set.seed(1)
df <- data.frame(grade = rep(c('Freshman', 'Sophomore', 'Junior', 'Senior'), each=100),gpa = rnorm(400, mean=85, sd=3))
# 直接使用行数抽样
strat_sample <- df %>%
group_by(grade) %>%
sample_n(size=10)
# 按比例抽样
strat_sample <- df %>%
group_by(grade) %>%
sample_frac(size=.15)
table(strat_sample$grade)
二、聚类抽样 Cluster Sampling
library(tidyverse)
set.seed(1)
df <- data.frame(tour = rep(1:10, each=20),
experience = rnorm(200, mean=7, sd=1))
clusters <- sample(unique(df$tour), size=4, replace=F)
cluster_sample <- df[df$tour %in% clusters, ]
table(cluster_sample$tour)
三、系统抽样 Systematic Sampling
library(tidyverse)
set.seed(1)
randomNames <- function(n = 5000) {
do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
}
df <- data.frame(last_name = randomNames(500),
gpa = rnorm(500, mean=82, sd=3))
obtain_sys <- function(N,n){
k = ceiling(N/n)
r = sample(1:k, 1)
seq(r, r + k*(n-1), k)
}
sys_sample_df <- df[obtain_sys(nrow(df), 100), ]
head(sys_sample_df)