# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: #
# :::::::::::::::::  Jee-Seon Kim, Peter M. Steiner, & Wen-Chiang Ivan Lim (2016)  :::::::::::::: #
# ::: Data analsis in "Mixture modeling strategies for causal inference with multilevel data" ::: #
# :::::::::::::::  in Advances in Multilevel Modeling for Educational Research   :::::::::::::::: # 
# :::::::::  Edited by Jeffrey R. Harring, Laura M. Stapleton, and S.Natashs Beretvas  :::::::::: # 
# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: #


# libraries 

library(flexmix)  # mixture models
library(lme4)     # multilevel models 

# set working directory

setwd("specifiy/your/path/here")

# read QE functions

source("QE_functions.R")

# read data

pisa.complete <- read.table("PISA2012SINGAPORE.csv")

str(pisa.complete)
dim(pisa.complete)  # 2172 x 16
names(pisa.complete)

# create the treatment assignment variables Z1 & Z0 and select covariates 
 
pisa.complete$z <- pisa.complete$private
pisa.complete$z1 <- pisa.complete$z 
pisa.complete$z0 <- 1 - pisa.complete$z


# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# ::::: Latent Class Logistic Regression for Selection Model :::
# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# starting value

set.seed(1297531)

# covariates in the selection models

cov.in.model <- c("SES","G9.or.lower","Eng.at.home","dad.college","two.parents","homebook25","str.moreinfo","private.sch","climate.sch","st.te.rel.sch","te.supp.sch") 

# Selection models with different numbers of latent classes (1 to 4)

(k1 <- flexmix(as.formula(paste('cbind(z1,z0)', '~', paste(cov.in.model, collapse = ' + '), '|', 'sch.id' )), data = pisa.complete, k = 1, model = FLXMRglm(family = "binomial")))

(k2 <- flexmix(as.formula(paste('cbind(z1,z0)', '~', paste(cov.in.model, collapse = ' + '), '|', 'sch.id' )), data = pisa.complete, k = 2, model = FLXMRglm(family = "binomial")))

(k3 <- flexmix(as.formula(paste('cbind(z1,z0)', '~', paste(cov.in.model, collapse = ' + '), '|', 'sch.id' )), data = pisa.complete, k = 3, model = FLXMRglm(family = "binomial")))

(k4 <- flexmix(as.formula(paste('cbind(z1,z0)', '~', paste(cov.in.model, collapse = ' + '), '|', 'sch.id' )), data = pisa.complete, k = 4, model = FLXMRglm(family = "binomial")))    

summary(k1)@logLik
summary(k2)@logLik
summary(k3)@logLik
summary(k4)@logLik

summary(k1)@AIC
summary(k2)@AIC # smallest 2814
summary(k3)@AIC
summary(k4)@AIC

prop.table(table(clusters(k2)))  # .78 .22

pisa.complete$class <- clusters(k2)

head(pisa.complete)


# ::::::::::::::::::::::::::::::::::::::::::::
# ::::::::: Selection models by class ::::::::
# ::::::::::::::::::::::::::::::::::::::::::::


(sel.mdl <- as.formula(paste('z', '~', paste(cov.in.model, collapse = ' + '), '+','(1','|', 'sch.id)'))) 

sel.whole.data <- glmer(sel.mdl, data = pisa.complete, family = 'binomial')
summary(sel.whole.data)

sel.class1 <- glmer(sel.mdl, data = pisa.complete, family = 'binomial', subset = class == 1)
summary(sel.class1)

sel.class2 <- glmer(sel.mdl, data = pisa.complete, family = 'binomial', subset = class == 2)
summary(sel.class2)

# class proportions
prop.table(table(pisa.complete$class))  # .78 .22

# latent class 1
pisa.c1 <- pisa.complete[pisa.complete["class"]==1,]
nrow(pisa.c1)                 # 1701 students
length(table(pisa.c1$sch.id)) #  132  schools

# latent class  2
pisa.c2 <- pisa.complete[pisa.complete["class"]==2,]
nrow(pisa.c2)                 #  471 students
length(table(pisa.c2$sch.id)) #   40  schools


# :::::::::::::::::::::::::::::::::::::::::
# ::::: Estimate the Propensity Score :::::
# :::::::::::::::::::::::::::::::::::::::::

# get PS and PS-logit from selection models 
# glmer() in package lme4 for multilevel logit regression  
# glmer fitted values are the PS

# overall, C1, C2 separately 

pisa.complete$lps <- predict(sel.whole.data)
pisa.complete$ps <- exp(pisa.complete$lps)/(1+exp(pisa.complete$lps))

pisa.complete$lps.LC[pisa.complete$class == 1]  <- predict(sel.class1)  
pisa.complete$lps.LC[pisa.complete$class == 2]  <- predict(sel.class2)   

pisa.complete$ps.LC <- exp(pisa.complete$lps.LC)/(1+exp(pisa.complete$lps.LC))

# inverse-propensity weighting for estimating ATE 
# w = 1/ps if z = 1, w = 1/(1-ps) if z = 0 
# for ATT
# w = 1 if z = 1, w = ps/(1-ps) if z = 0 

pisa.complete$ipwt <- with(pisa.complete, z / ps + (1 - z) / (1 - ps))

pisa.complete$ipwt.LC <- with(pisa.complete, z / ps.LC + (1 - z) / (1 - ps.LC))



# :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# ::::::::::: Covariante Balance & Overlap ::::::::::::::::::
# :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# use QE functions

(imbal <- sapply(pisa.complete[, cov.in.model], select.diff, x = pisa.complete$z))

(imbal.c1 <- sapply(pisa.c1[, cov.in.model], select.diff, x = pisa.c1$z))

(imbal.c2 <- sapply(pisa.c2[, cov.in.model], select.diff, x = pisa.c2$z))

rbind(imbal[4, ], imbal.c1[4, ], imbal.c2[4, ])  


# plot standardized mean difference & variance ratio 

plot(imbal["Cohen's d", ], imbal["Var. Ratio", ], xlim = c(-1, 1), main = 'Imbalance', xlab = 'Std. Mean Difference', ylab = 'Variance Ratio')
abline(h = 1, v = 0)
abline(h = c(4/5, 5/4), v = c(-.1, .1), lty = 3)

plot(imbal.c1["Cohen's d", ], imbal.c1["Var. Ratio", ], xlim = c(-1, 1),  main = 'Imbalance', xlab = 'Std. Mean Difference', ylab = 'Variance Ratio')
abline(h = 1, v = 0)
abline(h = c(4/5, 5/4), v = c(-.1, .1), lty = 3)


plot(imbal.c2["Cohen's d", ], imbal.c2["Var. Ratio", ], xlim = c(-1, 1),   main = 'Imbalance', xlab = 'Std. Mean Difference', ylab = 'Variance Ratio')
abline(h = 1, v = 0)
abline(h = c(4/5, 5/4), v = c(-.1, .1), lty = 3)


# overlap 

m.mat <- with(pisa.complete, cbind(tapply(lps, z, mean), tapply(lps.LC, list(z, class), mean)))
par(mfrow = c(1,3), mar = c(5, 5, 3, 1))
with(pisa.complete, overlap(lps, z, 30), lim = c(-110, 100), lab = F, xlab = '', cex.lab = 1.7, main = 'Overall', cex.main = 2)
points(m.mat[, 1], c(0, 0), pch = 16, cex = 1.5, col = c('blue', 'red')); points(m.mat[, 1], c(0, 0), pch = 16, cex = .6, col = 'white')

with(pisa.complete[pisa.complete$class == 1, ], overlap(lps.LC, z, 30), lim = c(-110, 100), lab = T, xlab = 'PS-logit', cex.lab = 1.7, main = 'Class 1', cex.main = 2, ylab = '')
points(m.mat[, 2], c(0, 0), pch = 16, cex = 1.5, col = c('blue', 'red')); points(m.mat[, 2], c(0, 0), pch = 16, cex = .6, col = 'white')
with(pisa.complete[pisa.complete$class == 2, ], overlap(lps.LC, z, 30), lim = c(-110, 100), lab = F, xlab = '', cex.lab = 1.7, main = 'Class 2', cex.main = 2, ylab = '')
points(m.mat[, 3], c(0, 0), pch = 16, cex = 1.5, col = c('blue', 'red')); points(m.mat[, 3], c(0, 0), pch = 16, cex = .6, col = 'white')


# imbalance between treatement and comparisons groups 

with(dat <- pisa.complete, rbm.plot(dat[, cov.in.model], z, lps))
with(dat <- pisa.complete[pisa.complete$class == 1, ], rbm.plot(dat[, cov.in.model], z, lps.LC))
with(dat <- pisa.complete[pisa.complete$class == 2, ], rbm.plot(dat[, cov.in.model], z, lps.LC))

# balance after PS adjustment

with(dat <- pisa.complete, rbm.plot(dat[, cov.in.model], z, lps, wt = ipwt))
with(dat <- pisa.complete[pisa.complete$class == 1, ], rbm.plot(dat[, cov.in.model], z, lps.LC, wt = ipwt.LC))
with(dat <- pisa.complete[pisa.complete$class == 2, ], rbm.plot(dat[, cov.in.model], z, lps.LC, wt = ipwt.LC))



# :::::::::::::::::::::::::::::::::::::
# ::: Estimate the treatment effect ::: 
# :::::::::::::::::::::::::::::::::::::

# prima facie:  mean.lesson - mean.no.lesson 
# ignores selection bias & multilevel structure

(m.wt <- with (pisa.complete, tapply(math, z, mean)))
m.wt['1'] - m.wt['0']

# same as

summary(lm.out <- lm(math ~ z, data = pisa.complete))   # 2.36 (p = .58)

# multilevel models account for the nested structure but not selection bias

summary(hlm.out <- lmer(math ~ z + (1|sch.id), data = pisa.complete))  # -1.07 (t = -.30)


#::::: with PS adjustment (no covariate) these estimates are reported in the chapter ::::::

summary(ml.out.ipwt <- lmer(math ~ z + (1|sch.id), data = pisa.complete, weights = ipwt))  # -11 (t = -3.15)

summary(ml.out.ipwt.c1 <- lmer(math ~ z + (1|sch.id), data = pisa.complete, weights = ipwt.LC, subset = class == 1))  # -15.82 (t = -3.94)

summary(ml.out.ipwt.c2 <- lmer(math ~ z + (1|sch.id), data = pisa.complete, weights = ipwt.LC, subset = class == 2))  # not sig but large pos, 7.7



# :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# ::::::::: Multilevel outcome models with covariate adjustments  :::::::::::
# :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 

(out.mdl <- as.formula(paste('math', '~', 'z', '+', paste(cov.in.model, collapse = ' + '), '+','(1','|', 'sch.id)'))) 

summary(ml0.out <- lmer(math ~  (1|sch.id), data = pisa.complete))  # empty model to check ICC

varcomp <- function(obj) {
  components <- unlist(lapply(VarCorr(obj), diag))
  residual <- attr(VarCorr(obj), "sc")
  variance_components <- c(components, Residual = residual ^ 2)
  ICC <- components / sum(variance_components)
  list(var.components = variance_components, ICC = ICC)
}

summary(ml0.out.c1 <- lmer(math ~ (1|sch.id), data = pisa.complete, subset = class == 1)) 

summary(ml0.out.c2 <- lmer(math ~ (1|sch.id), data = pisa.complete, subset = class == 2)) 

varcomp(ml0.out)     # ICC = .34
varcomp(ml0.out.c1)  # ICC = .35
varcomp(ml0.out.c2)  # ICC = .28


# doubly robust estimator: with PS & covariate adjustment

# with & without ipwt (doublely robust & covariate adjustment only)

# whole data
summary(out.reg.adj <- lmer(out.mdl, data = pisa.complete))
summary(out.reg.adj.ipwt <- lmer(out.mdl, data = pisa.complete, weights = ipwt))

fixef(out.reg.adj)["z"]        # -13 (t = -3.68)
fixef(out.reg.adj.ipwt)["z"]   # -12 (t = -3.68)

# latent class 1
summary(out.reg.adj.c1 <- lmer(out.mdl, data = pisa.complete, subset = class == 1))
summary(out.reg.adj.ipwt.c1 <- lmer(out.mdl, data = pisa.complete, weights = ipwt.LC, subset = class == 1))
fixef(out.reg.adj.c1)["z"]      # -18 (t = -4.17)
fixef(out.reg.adj.ipwt.c1)["z"] # -17 (t = -4.51)

# latent class 2
summary(out.reg.adj.c2 <- lmer(out.mdl, data = pisa.complete, subset = class == 2))
summary(out.reg.adj.ipwt.c2 <- lmer(out.mdl, data = pisa.complete, weights = ipwt.LC, subset = class == 2))
fixef(out.reg.adj.c2)["z"]      # 3 (t =  .35)
fixef(out.reg.adj.ipwt.c2)["z"] # 9 (t = 1.30)