# ::::: functions for PS techniques :::::

var.wt <- function(x.wt)
{
  # computes weighted mean and variance (ML-estimators!)
  # x.wt ... matrix with two columns: cbind(x, wt)
  
  x.wt <- na.omit(x.wt)
  x <- x.wt[, 1]
  wt <- x.wt[, 2]
  m <- x %*% wt / sum(wt)
  v <- x^2 %*% wt / sum(wt) - m^2
  res <- c(mean.wt = m, var.wt = v)
  res
}

rbm.logit <- function(lps, grp, wt = rep(1, length(lps))) 
{
  # Rubin Benchmark for propensity score logits (Rubin 2001)
  # lps  ... logit of PS
  # grp  ... grouping variable (treatment/assignment) - factor
  # wt   ... weights (typically PS- or PS-strata weights)
  
  g <- length(table(grp))
  stat <- unlist(by(cbind(lps, wt), grp, var.wt))  # weighted mean and variance (vector)
  m <- stat[seq(1, 2*g, by = 2)]
  v <- stat[seq(2, 2*g, by = 2)]
  B <- (m[2] - m[1]) / sqrt(sum(v) / 2)   # standardized difference in means
  R <-  v[2] / v[1]                       # variance ratio
  res <- c(B, R)
  names(res) <- c('B', 'R')
  res
}

rbm.plot <- function(covar, grp, lps = rep(1, length(grp)), wt = rep(1, length(grp)), plt = T) 
{
  # plot of descriptive metrics (Rubin 2001)
  # covar ... data.frame of covariates
  # lps   ... PS-logit
  # grp   ... grouping variable (treatment/assignment) - factor
  # wt    ... weights (typically PS- or PS-strata weights)
  
  rbm.mat <- apply(covar, 2, rbm.logit, grp, wt)
  B <- rbm.mat[1, ]
  R <-  rbm.mat[2, ]
  rbm.vec <- rbm.logit(lps, grp, wt)
  if (plt == T) {
    x11(5, 5); par(mar = c(4.5, 4.5, 1, 1))
    plot(B, R, xlim = range(c(B[is.finite(B)], -1, 1)), ylim = range(c(R[is.finite(R)], .5, 2)), pch = 16,
         xlab = 'standardized difference in means', ylab = 'variance ratio', type = 'n')
    abline(v = 0, h = 1, col = 'grey50') 
    abline(v = c(-.1, .1), lty = 3, col = 'grey50')
    abline(h = c(4/5, 5/4), lty = 3, col = 'grey50')
    points(B, R, pch = 16)
    points(rbm.vec[1], rbm.vec[2], pch = '+', col = 'red', cex = 2)
    ind1 <- (abs(B) > .1 | R < 4/5 | R > 5/4) & B < 0 & is.finite(B)
    ind2 <- (abs(B) > .1 | R < 4/5 | R > 5/4) & B > 0 & is.finite(R)
    if (sum(ind1) > 0) text(B[ind1], R[ind1], paste(names(covar)[ind1], ' '), adj = 1, cex = .7, xpd = T)
    if (sum(ind2) > 0) text(B[ind2], R[ind2], paste(' ', names(covar)[ind2]), adj = 0, cex = .7, xpd = T)
  }
  res <- round(cbind(B, R), 3)
  dimnames(res) <- list(names(covar), c('B', 'R'))
  res
}


overlap <- function(lps, z, bin = 20)
{
  # plot a histogram of PS-logit by group
  # lps ... numeric vector of PS-logit
  # z   ... treatment indicator (dummy)
  # bin ... number of bins for histogram
  
  r1 <- range(lps)
  c.dat <- hist(lps[z == 0], seq(r1[1], r1[2], length = bin), plot = F)
  t.dat <- hist(lps[z == 1], seq(r1[1], r1[2], length = bin), plot = F)
  t.dat$counts <- -t.dat$counts
  plot(c.dat, main = '', axes = F, ylim = c(min(t.dat$counts), max(c.dat$counts)), 
       xlab = 'PS-logit')
  plot(t.dat, add = T, density = 30)
  axis(1)
  ax.loc <- axis(2, labels = F)
  axis(2, at = ax.loc, labels = abs(ax.loc))
  y <- par('usr')[3:4]
  text(rep(max(lps), 2), c(y[2] - diff(y)*.05, y[1] + diff(y)*.05), 
       c('Control', 'Treatment'), adj = 1, xpd = T)
}

select.diff <- function(x, y)

{
   
   # extract only required stats from regression test
   
   # x   ... independent variable (treatment indicator)
   
   # y   ... dependent variable (covariates)
  
 
   
   if (is.factor(y)) y <- as.numeric(y) - 1        # transform dichotomeous factor into dummy
   
   rslt <- summary(lm(y ~ x))$coef[2, c(1:2, 4)]   # run regression test
   grp.var <- tapply(y, x, var)                   
   grp.n <- tapply(y, x, length)
   
   pooled.var <- (grp.n - 1)%*%grp.var/(sum(grp.n)-length(grp.n))
   
   rslt <- c(rslt, rslt[1] / sqrt(mean(grp.var)), rslt[1]/sqrt(pooled.var), grp.var[2] / grp.var[1])
   
   names(rslt) <- c("Mean Diff.", "Std. Error", "p-value", "Cohen's d","d w. pooled.var", "Var. Ratio")
   
   round(rslt, 4)

}