###################################################################
# Simple simulation prograph for power calculation for logisitc
# Regression, assuming two groups as the main predictor, and no
# Other covariate or control variables. Notice that adding covariates
# May decrease the power if these variables are associated with Group.
# But may help power if it is associated with outcome but not with group.
# Version 1.0 - 29 April 2020
#########################################################################
library(dplyr)
library(ggplot2)
# Probability of event in one group. The power depends on whetehr we have a rare
# event or a more common event. Rare events will require larger sample sizes.
p1 = 0.5
# Effect Size = Odds Ratio that we want to have power to detect.
or = 2
# Some calculations to figure out the probability of event in the other group.
odds1 = p1/(1-p1)
odds2 = odds1/or
p2 = odds2/(1+odds2)
# Number of simulations. The more the better, but it can also take a long time
# so we recommend that you use a small number to start and see how long it takes
# then increase it. We start with 400, but larger values would be desirable.
nsim = 400
# Initializing the dataset. You need to do this before running the for loops below.
r <- data.frame()
# Here we define the range of sample sizes we want to test.
# In this case we will test sample size from 20 to 400, in steps of 10
# that is, 20 30 40 50 60 ... ... 380 390 400.
# Notice that in order to have it to run quicker, you can decrease the number
# of sample sizes to be tested. For example, you coudl have seq(20,400,30)
for (n in seq(20,400,10)) {
for (i in 1:nsim) {
s <- data.frame(x = rep(c(1,0), each = n/2),
y = c(rbinom(n/2,1,p1),rbinom(n/2,1,p2)))
m <- glm(y ~ x, data = s, family = binomial)
temp <- data.frame(N = n, coeff = m$coefficients[2],
or = exp(m$coefficients[2]),
p = summary(m)$coefficients[2,4],
sig = summary(m)$coefficients[2,4] < 0.05 )
r <- rbind(r,temp)
}
}
# Displaying the power as a table
power <- r %>%
group_by(N) %>%
summarise(power = mean(sig)) %>%
ungroup() %>%
arrange(N) %>%
print(n = 100)
# Power Graph.
ggplot(power, aes(x = N, y=power)) +
geom_point(size = 2)+
geom_smooth(se = F)+
theme_minimal()
# If the graph shows that the points are too noisy, you probably
# should increase the number of simulations (nsim parameter).