# ::::: functions for PS techniques ::::: var.wt <- function(x.wt) { # computes weighted mean and variance (ML-estimators!) # x.wt ... matrix with two columns: cbind(x, wt) x.wt <- na.omit(x.wt) x <- x.wt[, 1] wt <- x.wt[, 2] m <- x %*% wt / sum(wt) v <- x^2 %*% wt / sum(wt) - m^2 res <- c(mean.wt = m, var.wt = v) res } rbm.logit <- function(lps, grp, wt = rep(1, length(lps))) { # Rubin Benchmark for propensity score logits (Rubin 2001) # lps ... logit of PS # grp ... grouping variable (treatment/assignment) - factor # wt ... weights (typically PS- or PS-strata weights) g <- length(table(grp)) stat <- unlist(by(cbind(lps, wt), grp, var.wt)) # weighted mean and variance (vector) m <- stat[seq(1, 2*g, by = 2)] v <- stat[seq(2, 2*g, by = 2)] B <- (m[2] - m[1]) / sqrt(sum(v) / 2) # standardized difference in means R <- v[2] / v[1] # variance ratio res <- c(B, R) names(res) <- c('B', 'R') res } rbm.plot <- function(covar, grp, lps = rep(1, length(grp)), wt = rep(1, length(grp)), plt = T) { # plot of descriptive metrics (Rubin 2001) # covar ... data.frame of covariates # lps ... PS-logit # grp ... grouping variable (treatment/assignment) - factor # wt ... weights (typically PS- or PS-strata weights) rbm.mat <- apply(covar, 2, rbm.logit, grp, wt) B <- rbm.mat[1, ] R <- rbm.mat[2, ] rbm.vec <- rbm.logit(lps, grp, wt) if (plt == T) { x11(5, 5); par(mar = c(4.5, 4.5, 1, 1)) plot(B, R, xlim = range(c(B[is.finite(B)], -1, 1)), ylim = range(c(R[is.finite(R)], .5, 2)), pch = 16, xlab = 'standardized difference in means', ylab = 'variance ratio', type = 'n') abline(v = 0, h = 1, col = 'grey50') abline(v = c(-.1, .1), lty = 3, col = 'grey50') abline(h = c(4/5, 5/4), lty = 3, col = 'grey50') points(B, R, pch = 16) points(rbm.vec[1], rbm.vec[2], pch = '+', col = 'red', cex = 2) ind1 <- (abs(B) > .1 | R < 4/5 | R > 5/4) & B < 0 & is.finite(B) ind2 <- (abs(B) > .1 | R < 4/5 | R > 5/4) & B > 0 & is.finite(R) if (sum(ind1) > 0) text(B[ind1], R[ind1], paste(names(covar)[ind1], ' '), adj = 1, cex = .7, xpd = T) if (sum(ind2) > 0) text(B[ind2], R[ind2], paste(' ', names(covar)[ind2]), adj = 0, cex = .7, xpd = T) } res <- round(cbind(B, R), 3) dimnames(res) <- list(names(covar), c('B', 'R')) res } overlap <- function(lps, z, bin = 20) { # plot a histogram of PS-logit by group # lps ... numeric vector of PS-logit # z ... treatment indicator (dummy) # bin ... number of bins for histogram r1 <- range(lps) c.dat <- hist(lps[z == 0], seq(r1[1], r1[2], length = bin), plot = F) t.dat <- hist(lps[z == 1], seq(r1[1], r1[2], length = bin), plot = F) t.dat$counts <- -t.dat$counts plot(c.dat, main = '', axes = F, ylim = c(min(t.dat$counts), max(c.dat$counts)), xlab = 'PS-logit') plot(t.dat, add = T, density = 30) axis(1) ax.loc <- axis(2, labels = F) axis(2, at = ax.loc, labels = abs(ax.loc)) y <- par('usr')[3:4] text(rep(max(lps), 2), c(y[2] - diff(y)*.05, y[1] + diff(y)*.05), c('Control', 'Treatment'), adj = 1, xpd = T) } select.diff <- function(x, y) { # extract only required stats from regression test # x ... independent variable (treatment indicator) # y ... dependent variable (covariates) if (is.factor(y)) y <- as.numeric(y) - 1 # transform dichotomeous factor into dummy rslt <- summary(lm(y ~ x))$coef[2, c(1:2, 4)] # run regression test grp.var <- tapply(y, x, var) grp.n <- tapply(y, x, length) pooled.var <- (grp.n - 1)%*%grp.var/(sum(grp.n)-length(grp.n)) rslt <- c(rslt, rslt[1] / sqrt(mean(grp.var)), rslt[1]/sqrt(pooled.var), grp.var[2] / grp.var[1]) names(rslt) <- c("Mean Diff.", "Std. Error", "p-value", "Cohen's d","d w. pooled.var", "Var. Ratio") round(rslt, 4) }