Simulations in R Examples

Download Report

Transcript Simulations in R Examples

BSTA 670 – Statistical Computing
Lecture 16:
Some R Simulations
##############################################################################
# R program demonstrating use of Monte Carlo simulation to estimate the power
# of a two-sample unpaired t-test for the mean, for equal sample sizes.
##############################################################################
# Initialize parameters
# m1 is mean of normal distribution for group 1
# m2 is mean of normal distribution for group 2
# s is standard deviation of normal distribution for both groups
# n is sample size for both groups, observations in in simulated data set
# M is number of simulations to perform
# a is alpha level for test
m1<-5
m2<-7
s<-5
n<-100
M<-10000
a<-0.05
###############################################################################
# Create vector to receive results
reject<-NULL
# Simulation loop
for(i in 1:M) {
x1<-rnorm(n,m1,s) # Generate simulated data for group 1
x2<-rnorm(n,m2,s) # Generate simulated data gor group 2
tp<-t.test(x1,x2)$p.value # Perform t-test and obtain p-value
rejecti<-ifelse(tp<a,1,0) # Determine if test rejects
reject<-c(reject,rejecti) # Save result
}
print(paste("Estimated Power of Test: ", mean(reject)))
#
# Theoretical power of test
power.t.test(n=n,delta=m1-m2,sd=s,sig.level=a,type="two.sample",alternative="two.sided")
Sim_ttest_power1.s
##############################################################################
# R program demonstrating use of Monte Carlo simulation to estimate actual
# level of a two-sample unpaired t-test with equal sample sizes for the mean.
##############################################################################
# Initialize parameters
# m is mean of normal distributions
# s is standard deviation of normal distributions
# n is sample size, number of observations in simulated data sets
# M is number of simulations to perform
# a is confidence interval level
m<-7
s<-5
n<-100
M<-1000
a<-0.05
###############################################################################
# Create vector to receive results
reject<-NULL
# Simulation loop
for(i in 1:M) {
x1<-rnorm(n,m,s) # Generage data for group 1
x2<-rnorm(n,m,s) # Generate data for group 2
tp<-t.test(x1,x2)$p.value # Perform t-test and extract p-value
rejecti<-ifelse(tp<a,1,0) # Determine if test rejects
reject<-c(reject,rejecti) # Add rejection indicator to results vector
}
print(paste("Estimated Level of Test: ", mean(reject)))
Sim_ttest_alpha.s
##############################################################################
# R program demonstrating use of Monte Carlo simulation to estimate the power
# curve of a two-sample unpaired t-test for the mean, for equal sample sizes.
##############################################################################
# Initialize parameters
# s is standard deviation of normal distribution for both groups
# n is sample size for both groups, observations in in simulated data set
# M is number of simulations to perform
# a is alpha level for test
#d
s<-8
n<-100
M<-500
a<-0.05
k<-51
d<-5
###############################################################################
# Create matrix to receive results
pcurve<-NULL
# Loop over delta values
for( delta in seq(from=-d,to=d, by=2*d/k) ){
# Create vector to receive results for current delta
reject<-NULL
# Loop to estimate power ar specified delta
for(i in 1:M) {
x1<-rnorm(n,0,s) # Generate simulated data for group 1, mean=0
x2<-rnorm(n,delta,s) # Generate simulated data for group 2, mean=delta
tp<-t.test(x1,x2)$p.value # Perform t-test and extract p-value
rejecti<-ifelse(tp<a,1,0) # Determine if test rejects
reject<-c(reject,rejecti) # Save result for current simulation i
}
pcurve<-rbind( pcurve, c(mean(reject),delta) ) # Save result for current delta
}
p<-pcurve[,1]
m<-pcurve[,2]
plot(m,p, xlab="Delta",ylab="Power")
Sim_ttest_power_curve.s
##############################################################################
# R program demonstrating use of Monte Carlo simulation to estimate the power
# curve of a Wilcoxon test (two-sample unpaired) for the medians,
# for equal sample sizes.
##############################################################################
# Initialize parameters
# s is standard deviation of normal distribution for both groups
# n is sample size for both groups, observations in in simulated data set
# M is number of simulations to perform
# a is alpha level for test
#d
s<-8
n<-100
M<-500
a<-0.05
k<-51
d<-5
###############################################################################
# Create matrix to receive results
pcurve<-NULL
# Loop over delta values
for( delta in seq(from=-d,to=d, by=2*d/k) ){
# Create vector to receive results for current delta
reject<-NULL
# Loop to estimate power ar specified delta
for(i in 1:M) {
x1<-rnorm(n,0,s) # Generate simulated data for group 1, mean=0
x2<-rnorm(n,delta,s) # Generate simulated data for group 2, mean=delta
tp<-wilcox.test(x1,x2)$p.value # Perform Wilcoxon test and extract p-value
rejecti<-ifelse(tp<a,1,0) # Determine if test rejects
reject<-c(reject,rejecti) # Save result for current simulation i
}
pcurve<-rbind( pcurve, c(mean(reject),delta) ) # Save result for current delta
}
p<-pcurve[,1]
m<-pcurve[,2]
plot(m,p, xlab="Delta",ylab="Power")
Sim_wilcoxon_power_curve.s
##############################################################################
# R program demonstrating use of Monte Carlo simulation to estimate confidence
# interval coverage of asymptotic normal confidence interval for the mean.
# The results of the simulated confidence intervals are plotted, with a line
# denoting the actual mean.
##############################################################################
# Initialize parameters
# m is mean of normal distribution
# s is standard deviation of normal distribution
# n is sample size, number of observations in simulated data sets
# M is number of simulations to perform
# a is confidence interval level
m<-7
s<-5
n<-100
M<-50
a<-0.95
###############################################################################
Sim_coverage_normal.s
# Create vectors to receive results
covers<-NULL
lower<-NULL
upper<-NULL
yplot<-NULL
xplot<-NULL
# Simulation loop
for(i in 1:M) {
x<-rnorm(n,m,s) # Generate data
ci<-t.test(x, conf.level=a) # Obtain confidence interval
loweri<-ci$conf.int[1] # Extract lower confidence limit
upperi<-ci$conf.int[2] # Extract upper confidence limit
coveri<-ifelse(m>=loweri & m<=upperi,1,0) # determine if CI includes real mean
covers<-c(covers,coveri) # add coverage indicator, coveri, to results vector
lower<-c(lower,loweri) # add lower limit, loweri, to results vector
upper<-c(upper,upperi) # add upper limit, upperi, to results vector
xplot<-c(xplot,c(loweri,upperi,NA)) # update plotting vector for x
yplot<-c(yplot, c(i, i, i)) # update plotting vector for y
}
Continued on
next slide
print(paste("Estimated Coverage Probability: ", mean(covers)))
plot(xplot,yplot,type='l', xlab="Confidence Interval",ylab="Simulation Number")
abline(v=m)
Sim_coverage_normal.s
Continued from
previous slide
##############################################################################
# R program demonstrating generation of survival data with fitting of
# parametric survival regression and Cox regression models to verify
# relative rates
##############################################################################
library(survival)
Survival_exponential.s
# Initialize parameters
# n is sample size for both groups, observations in in simulated data set
# m1 is mean for group 1 survival time distribution
# m2 is mean for group 2 survival time distribution
# cm is mean for censoring time distribution
n<-200
m1<-0.5
m2<-0.6
cm<-1.0
###############################################################################
Continued on
next slide
# Create group indicator
x<-c( rep(0,n) , rep(1,n) )
# Generate group 1 and group 2 complete survival times.
y1<-rexp(n, rate=1/m1)
y2<-rexp(n, rate=1/m2)
y<-c(y1,y2)
print(paste("Mean of Group 1 survival time: ", mean(y1)))
print(paste("Mean of Group 2 survival time: ", mean(y2)))
Survival_exponential.s
# Generate censoring times
cen<-rexp(2*n, rate=1/cm)
# Create observed censored survival time
ycen<-pmin(y,cen)
# Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen)
censored<-as.numeric(y<=cen)
# Fit parametric survival regression
out<-survreg(Surv(ycen, censored)~x, dist="exponential")
print(out)
#
print(paste("Relative rate from simulation parameters: ", m2/m1))
print(paste("Estimated relative rate from parametric regression: ", exp(out$coefficients[2])))
#
# Fit Cox model
out<-coxph(formula = Surv(ycen, censored) ~ x)
print(out)
print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef)))
Continued from
previous slide
##############################################################################
# R program demonstrating simulation to obtain power for Cox regression
# model based on specified parameters
##############################################################################
# Initialize parameters
# n is sample size for both groups, observations in in simulated data set
# m1 is mean for group 1 survival time distribution
# m2 is mean for group 2 survival time distribution
# cm is mean for censoring time distribution
# M is number of simulations to perform
# a is alpha level of test
n<-1000
m1<-0.5
m2<-0.6
cm<-1.0
M<-1000
a<-0.05
###############################################################################
# Create vector to receive results
reject<-NULL
Sim_survival_exponential_power.s
Continued on
next slide
# Simulation loop
for(i in 1:M) {
# Create group indicator
x<-c( rep(0,n) , rep(1,n) )
Sim_survival_exponential_power.s
# Generate group 1 and group 2 complete survival times.
y1<-rexp(n, rate=1/m1)
y2<-rexp(n, rate=1/m2)
y<-c(y1,y2)
# print(paste("Mean of Group 1 survival time: ", mean(y1)))
# print(paste("Mean of Group 2 survival time: ", mean(y2)))
# Generate censoring times
cen<-rexp(2*n, rate=1/cm)
# Create observed censored survival time
ycen<-pmin(y,cen)
# Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen)
censored<-as.numeric(y<=cen)
# Fit Cox model
out<-coxph(formula = Surv(ycen, censored) ~ x)
# print(out)
# print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef)))
pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test
rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects
reject<-c(reject,rejecti) # Save result
}
print(paste("Estimated Power of Test: ", mean(reject)))
Continued from
previous slide
##############################################################################
# R program demonstrating simulation to obtain level for Cox regression
# model based on specified parameters
##############################################################################
# Initialize parameters
# n is sample size for both groups, observations in in simulated data set
# m1 is mean for group 1 survival time distribution
# m2 is mean for group 2 survival time distribution
# cm is mean for censoring time distribution
# M is number of simulations to perform
# a is alpha level of test
n<-1000
m<-0.5
cm<-1.0
M<-1000
a<-0.05
###############################################################################
# Create vector to receive results
reject<-NULL
Sim_survival_exponential_level.s
Continued on
next slide
# Simulation loop
for(i in 1:M) {
# Create group indicator
x<-c( rep(0,n) , rep(1,n) )
# Generate group 1 and group 2 complete survival times.
y1<-rexp(n, rate=1/m)
y2<-rexp(n, rate=1/m)
y<-c(y1,y2)
# print(paste("Mean of Group 1 survival time: ", mean(y1)))
# print(paste("Mean of Group 2 survival time: ", mean(y2)))
Sim_survival_exponential_level.s
# Generate censoring times
cen<-rexp(2*n, rate=1/cm)
# Create observed censored survival time
ycen<-pmin(y,cen)
# Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen)
censored<-as.numeric(y<=cen)
# Fit Cox model
out<-coxph(formula = Surv(ycen, censored) ~ x)
# print(out)
# print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef)))
pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test
rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects
reject<-c(reject,rejecti) # Save result
}
print(paste("Estimated Power of Test: ", mean(reject)))
Continued from
previous slide
##############################################################################
# R program demonstrating simulation to obtain level for Cox regression
# model based on specified parameters, WITH TRUNCATION
# NOTE: Set tm1=tm2==0.0000001 (something near 0) to get no truncation.
# In this case get "a" for level.
##############################################################################
library(survival)
# Initialize parameters
# n is sample size for both groups, observations in in simulated data set
# m1 is mean for group 1 survival time distribution
# m2 is mean for group 2 survival time distribution
# cm is mean for censoring time distribution
# M is number of simulations to perform
# a is alpha level of test
# tm1 is truncation mean for group 1
# tm2 is truncation mean for group 2
# ntrunc1 is number obs truncated from group 1
# ntrunc2 is number obs truncated from group 2
# ptrunc1 is proportion truncated from group 1
# ptrunc2 is proportion truncated from group 2
n<-1000
m1<-0.5
m2<-0.5
cm<-1.0
tm1<-0.10
tm2<-0.10
M<-50
a<-0.05
Continued on
next slide
Sim_survival_exponential_truncated_level.s
###############################################################################
# Create vector to receive results
reject<-NULL
ntrunc1<-NULL
ntrunc2<-NULL
ptrunc1<-NULL
ptrunc2<-NULL
# Create group indicator
x<-c( rep(0,n) , rep(1,n) )
Continued from
previous slide
Sim_survival_exponential_truncated_level.s
# Simulation loop
for(i in 1:M) {
# Create vector to receive results
y<-NULL
trunc<-NULL
cen<-NULL
obs1<-0
ntrunci<-0
while(obs1<n) {
# Generate group 1 complete survival times.
yi<-rexp(1, rate=1/m1)
# Generate truncation time
trunci<-rexp(1, rate=1/tm1)
# Generate censoring times
ceni<-rexp(1, rate=1/cm)
# Determine if observation is truncated
truncated<-as.numeric(yi<trunci) # 1 if true
if(truncated==0) {
y<-c(y,yi)
trunc<-c(trunc,trunci)
cen<-c(cen,ceni)
obs1<-obs1+1
}
if(truncated==1) ntrunci<-ntrunci+1
}
ntrunc1<-c(ntrunc1,ntrunci)
ptrunc1<-c(ptrunc1, ntrunci/(n+ntrunci) )
Continued from
previous slide
Sim_survival_exponential_truncated_level.s
obs2<-0
ntrunci<-0
while(obs2<n) {
# Generate group 2 complete survival times.
yi<-rexp(1, rate=1/m2)
# Generate truncation time
trunci<-rexp(1, rate=1/tm2)
# Generate censoring times
ceni<-rexp(1, rate=1/cm)
# Determine if observation is truncated
truncated<-as.numeric(yi<trunci) # 1 if true
if(truncated==0) {
y<-c(y,yi)
trunc<-c(trunc,trunci)
cen<-c(cen,ceni)
obs2<-obs2+1
}
if(truncated==1) ntrunci<-ntrunci+1
}
ntrunc2<-c(ntrunc2,ntrunci)
ptrunc2<-c(ptrunc2, ntrunci/(n+ntrunci) )
Sim_survival_exponential_truncated_level.s
if(i==1) print(paste("Number truncated from group 2 while simulating: ", notused))
if(i==1) print(paste("Mean of Group 1 survival time: ", mean(y[1:n])))
if(i==1) print(paste("Mean of Group 2 survival time: ", mean(y[(n+1):(2*n)])))
# Create observed censored survival time
ycen<-pmin(y,cen)
# Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen)
censored<-as.numeric(y<=cen)
# Fit Cox model
out<-coxph(formula = Surv(ycen, censored) ~ x)
# print(out)
# print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef)))
pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test
rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects
reject<-c(reject,rejecti) # Save result
}
Continued from
previous slide
print(paste("Estimated Power of Test: ", mean(reject)))
print(paste("Average number truncated from group 1 while simulating: ", mean(ntrunc1)))
print(paste("Average number truncated from group 2 while simulating: ", mean(ntrunc2)))
print(paste("Average proportion truncated from group 1 while simulating: ",
mean(ptrunc1)))
print(paste("Average proportion truncated from group 2 while simulating: ",
mean(ptrunc2)))
Continued from
previous slide
Sim_survival_exponential_truncated_level.s