Exp_out
#quickly expoentiate and compute CI's for glm object
exp_out <-function(model_object){
out<-matrix(nrow=1,ncol=3)
out[1,1] <-round(exp(summary(model_object)$coefficients[2,1]),2)
out[1,2] <-round(exp(summary(model_object)$coefficients[2,1] - 1.96*summary(model_object)$coefficients[2,2]),2)
out[1,3] <-round(exp(summary(model_object)$coefficients[2,1] + 1.96*summary(model_object)$coefficients[2,2]),2)
colnames(out) <-c('OR','Lower','Upper')
rownames(out)<-names(model_object$coefficients[2])
print(out)
}
G Computation with Covariate Adjustment
#define inverse logit
expit <-function(x){
exp(x)/(1+exp(x))
}
#out_type is linear, binary, or count
#ate_type is risk_difference, odds_ratio, or rate_ratio
U.DR = function(est,A,Y,X,OR.formula,out_type="linear",ate_type="difference"){
data = data.frame(A,Y,X)
Y_AX = model.matrix(data,object=OR.formula)
par.Y_AX = est[1:ncol(Y_AX)]
ATE = est[ncol(Y_AX)+1]
data1 = data.frame(A=1,Y,X)
data0 = data.frame(A=0,Y,X)
#### setup predicted outcomes
if (out_type=="binary") {
m1 = expit(model.matrix(data1,object=OR.formula)%*%par.Y_AX)
m0 = expit(model.matrix(data0,object=OR.formula)%*%par.Y_AX)
L1 <-mean(m1)
L0 <-mean(m0)
U.DR.Y_AX = c(Y-expit(Y_AX%*%par.Y_AX)) *(Y_AX)
} else if (out_type=="count") {
m1 = exp(model.matrix(data1,object=OR.formula)%*%par.Y_AX)
m0 = exp(model.matrix(data0,object=OR.formula)%*%par.Y_AX)
L1 <-mean(m1)
L0 <-mean(m0)
U.DR.Y_AX = c(Y-exp(Y_AX%*%par.Y_AX)) *(Y_AX)
} else if(out_type=="linear"){
m1 = model.matrix(data1,object=OR.formula)%*%par.Y_AX
m0 = model.matrix(data0,object=OR.formula)%*%par.Y_AX
U.DR.Y_AX = c(Y-(Y_AX%*%par.Y_AX)) *(Y_AX)
}
###construct estimating equations
if(ate_type=="odds_ratio"){
U.DR.ATE = ATE -log((L1/(1-L1))/(L0/(1-L0)))
} else if(ate_type=="rate_ratio"){
U.DR.ATE = ATE -log(L1/L0)
} else if(ate_type=="difference"){
U.DR.ATE = ATE -(m1-m0)
}
return(cbind(OR=U.DR.Y_AX,ATE=U.DR.ATE))
}
G = function(est,A,Y,X,OR.formula,out_type,ate_type){
return(
apply(U.DR(est,A,Y,X,OR.formula,out_type,ate_type),2,sum)
)
}
return.CI <-function(est,A,Y,X,OR.formula,out_type="linear",ate_type="difference"){
meat.half=U.DR(est,A,Y,X,OR.formula,out_type = out_type,ate_type = ate_type)
bread<-numDeriv::jacobian(func=G,x=est, A=A,Y=Y,X=X, OR.formula=OR.formula,out_type = out_type,ate_type = ate_type)
IF = meat.half%*%t(solve(-bread))
### ATE is the last element of est
ATE.var = sum(IF[,ncol(IF)]^2)
out<-matrix(nrow=1,ncol=3)
if(ate_type=="difference"){
out[1,1] <-round(ATE,2)
out[1,2] <-round(ATE-qnorm(0.975)*sqrt(ATE.var),2)
out[1,3] <-round(ATE+qnorm(0.975)*sqrt(ATE.var),2)
} else {
out[1,1] <-round(exp(ATE),2)
out[1,2] <-round(exp(ATE-qnorm(0.975)*sqrt(ATE.var)),2)
out[1,3] <-round(exp(ATE+qnorm(0.975)*sqrt(ATE.var)),2)
}
colnames(out) <-c('ATE estimate','CI Lower','CI Upper')
print(out)
}
G Computation with Spline Propensity Score Adjustment
#define inverse logit
expit <-function(x){
exp(x)/(1+exp(x))
}
#out_type is linear, binary, or count
#ate_type is risk_difference, odds_ratio, or rate_ratio
U.DR_ps = function(est,A,Y,X,PS,PS.formula,OR.formula,out_type="linear",ate_type="difference"){
data = data.frame(A,Y,X,ps)
A_X = model.matrix(data=data,object=PS.formula)
Y_AX = model.matrix(data,object=OR.formula)
par.A_X = est[1:ncol(A_X)]
par.Y_AX = est[ncol(A_X)+ 1:ncol(Y_AX)]
ATE = est[ncol(A_X)+ncol(Y_AX)+1]
data1 = data.frame(A=1,Y,X,ps)
data0 = data.frame(A=0,Y,X,ps)
#### setup predicted outcomes
if (out_type=="binary") {
m1 = expit(model.matrix(data1,object=OR.formula)%*%par.Y_AX)
m0 = expit(model.matrix(data0,object=OR.formula)%*%par.Y_AX)
L1 <-mean(m1)
L0 <-mean(m0)
U.DR.A_X = c(A-expit(A_X%*%par.A_X)) *(A_X)
U.DR.Y_AX = c(Y-expit(Y_AX%*%par.Y_AX)) *(Y_AX)
} else if (out_type=="count") {
m1 = exp(model.matrix(data1,object=OR.formula)%*%par.Y_AX)
m0 = exp(model.matrix(data0,object=OR.formula)%*%par.Y_AX)
L1 <-mean(m1)
L0 <-mean(m0)
U.DR.A_X = c(A-exp(A_X%*%par.A_X)) *(A_X)
U.DR.Y_AX = c(Y-exp(Y_AX%*%par.Y_AX)) *(Y_AX)
} else if(out_type=="linear"){
m1 = model.matrix(data1,object=OR.formula)%*%par.Y_AX
m0 = model.matrix(data0,object=OR.formula)%*%par.Y_AX
U.DR.A_X = c(A-(A_X%*%par.A_X)) *(A_X)
U.DR.Y_AX = c(Y-(Y_AX%*%par.Y_AX)) *(Y_AX)
}
###construct estimating equations
if(ate_type=="odds_ratio"){
U.DR.ATE = ATE -log((L1/(1-L1))/(L0/(1-L0)))
} else if(ate_type=="rate_ratio"){
U.DR.ATE = ATE -log(L1/L0)
} else if(ate_type=="difference"){
U.DR.ATE = ATE -(m1-m0)
}
return(cbind(PS=U.DR.A_X,OR=U.DR.Y_AX,ATE=U.DR.ATE))
}
G_ps = function(est,A,Y,X,PS,PS.formula,OR.formula,out_type,ate_type){
return(
apply(U.DR_ps(est,A,Y,X,PS,PS.formula,OR.formula,out_type,ate_type),2,sum)
)
}
return.CI_ps <-function(est,A,Y,X,PS,PS.formula,OR.formula,out_type="linear",ate_type="difference"){
meat.half=U.DR_ps(est,A,Y,X,PS,PS.formula,OR.formula,out_type = out_type,ate_type = ate_type)
bread<-numDeriv::jacobian(func=G_ps,x=est, A=A,Y=Y,X=X,PS=PS, PS.formula=PS.formula, OR.formula=OR.formula,out_type = out_type,ate_type = ate_type)
IF = meat.half%*%t(solve(-bread))
### ATE is the last element of est
ATE.var = sum(IF[,ncol(IF)]^2)
out<-matrix(nrow=1,ncol=3)
if(ate_type=="difference"){
out[1,1] <-round(ATE,2)
out[1,2] <-round(ATE-qnorm(0.975)*sqrt(ATE.var),2)
out[1,3] <-round(ATE+qnorm(0.975)*sqrt(ATE.var),2)
} else {
out[1,1] <-round(exp(ATE),2)
out[1,2] <-round(exp(ATE-qnorm(0.975)*sqrt(ATE.var)),2)
out[1,3] <-round(exp(ATE+qnorm(0.975)*sqrt(ATE.var)),2)
}
colnames(out) <-c('ATE estimate','CI Lower','CI Upper')
print(out)
}
Weighted RMST
#Original Code by Sarah C. Conner
# see https://github.com/s-conner/akm-rmst
# --- RMST Using Adjusted KM ---
# Time is the time to event
# Status is 0 if censored, 1 if event
# Group should be a factor variable
# Weights can be obtained separately, ie through logistic models
# Tau is a user-specified truncation point.
# If not specified, the default will be the minimum of the each groups' last event time
akm_rmst <- function(time, status, group, weight=NULL, tau=NULL, alpha=.05,
xaxismin=0, xaxismax=max(time),plot=FALSE){
if(sum(time<0)>0){print("Error: times must be positive.")
}else{
if(sum(weight<=0)>0){print("Error: weights must be greater than 0.")
}else{
if(sum(status!=0 & status!=1)>0){print("Error: status must be a vector of 0s and/or 1s.")
}else{
if(is.null(weight)){weight <- rep(1, length(time))}
data <- data.frame(time, status, group, weight)
data <- data[!is.na(data$group) & !is.na(data$time),]
data <- data[order(group),]
#--- If tau not specified, use minimum tau from all groups ---
j=length(unique(data$group))
if(is.null(tau)){
taui = rep(999, 3)
for (i in (1:j)){
groupval <- (levels(data$group)[i])
dat_group <- data[which(data$group==(groupval)),]
taui[i] <- max(dat_group$time[dat_group$status==1])
}
tau <- min(taui)
}
#--- Calculate AKM RMST in each group ---
rmst <- rep(999, length(1:j))
groupval <- rep(999, length(1:j))
rmst_var <- rep(999, length(1:j))
rmst_se <- rep(999, length(1:j))
if(plot==TRUE){
plot(NULL, xlim=c(xaxismin, xaxismax), ylim=c(0,1), xlab='Time',ylab='Adjusted Survival Probability')
title(main='Adjusted Kaplan-Meier')
}
for (i in 1:j){
groupval[i] <- (levels(data$group)[i])
dat_group <- data[which(data$group==(groupval[i])),]
#--- AKM ---
# Based on 'adjusted.KM' function from {IPWsurvival} package
# Author: F. Le Borgne and Y. Foucher
tj <- c(0,sort(unique(dat_group$time[dat_group$status==1])))
dj <- sapply(tj, function(x){sum(dat_group$weight[dat_group$time==x & dat_group$status==1])})
yj <- sapply(tj, function(x){sum(dat_group$weight[dat_group$time>=x])})
st <- cumprod(1-(dj/yj))
m <- sapply(tj, function(x){sum((dat_group$weight[dat_group$time>=x])^2)})
mj <- ((yj^2)/m)
#ft <- data.frame(time=tj, n_risk=yj, n_event=dj, survival=st, variable=i, m=mj)
ft <- data.frame(tj, yj, dj, st, i, mj)
#--- RMST ---
# Based on 'rmst1 function' from {survRM2} package
# Author: Hajime Uno, Lu Tian, Angel Cronin, Chakib Battioui, Miki Horiguchi
rtime <- ft$tj<=tau
tj_r <- sort(c(ft$tj[rtime],tau))
st_r <- ft$st[rtime]
yj_r <- ft$yj[rtime]
dj_r <- ft$dj[rtime]
time_diff <- diff(c(0, tj_r))
areas <- time_diff * c(1, st_r)
rmst[i] <- sum(areas)
#--- Variance ---
mj_r <- ft$mj[rtime]
#var_r <- ifelse((yj_r-dj_r)==0, 0, dj_r /(mj_r *(yj_r - dj_r)))
var_r <- ifelse((yj_r-dj_r)==0, 0, dj_r /(yj_r *(yj_r - dj_r)))
var_r <- c(var_r,0)
rmst_var[i] <- sum(cumsum(rev(areas[-1]))^2 * rev(var_r)[-1])
rmst_se[i] <- sqrt(rmst_var[i])
#--- Plot AKM ---
if(plot==TRUE){
lines(ft$tj, ft$st,type="s", col=(i+2), lwd=2)
}
}
}
}
}
#--- Add legend and tau to plot ---
if(plot==TRUE){
abline(v=tau, col=1, lty=3, lwd=2)
legend('bottomleft', paste("Group", groupval), lty=rep(1, j), lwd=rep(2, j), col=3:(j+2),
cex=.75, bty ="n", inset = c(0, 0))
}
#--- Compare RMST between groups and compile output---
results <- data.frame(groupval,rmst,rmst_var,rmst_se,tau)
pwc <- ((j^2)-j)/2 #number of pairwise comparisons
label_diff <- rep(999,(pwc))
rmst_diff <- rep(999,(pwc))
rmst_diff_se <- rep(999,(pwc))
rmst_diff_low <- rep(999,(pwc))
rmst_diff_upp <- rep(999,(pwc))
rmst_diff_pval <- rep(999,(pwc))
label_ratio <- rep(999,(pwc))
rmst_logratio <- rep(999,(pwc))
rmst_logratio_se <- rep(999,(pwc))
rmst_ratio <- rep(999,(pwc))
rmst_ratio_low <- rep(999,(pwc))
rmst_ratio_upp <- rep(999,(pwc))
rmst_logratio_pval <- rep(999,(pwc))
output_diff <- data.frame(label_diff,rmst_diff,rmst_diff_se,rmst_diff_low,rmst_diff_upp,rmst_diff_pval)
output_ratio <- data.frame(label_ratio,rmst_logratio,rmst_logratio_se,rmst_ratio,rmst_ratio_low,rmst_ratio_upp,rmst_logratio_pval)
l <- 1
for (i in 1:(j-1)){
for (j in (i+1):j){
# Based on 'rmst2 function' from {survRM2} package
# Author: Hajime Uno, Lu Tian, Angel Cronin, Chakib Battioui, Miki Horiguchi
#--- RMST Difference ---
output_diff[l,]$label_diff <- paste('Groups',results[j,]$groupval,'vs.',results[i,]$groupval,' ')
output_diff[l,]$rmst_diff <- (results[j,]$rmst - results[i,]$rmst)
output_diff[l,]$rmst_diff_se <- sqrt(results[j,]$rmst_var + results[i,]$rmst_var)
output_diff[l,]$rmst_diff_low <- output_diff[l,]$rmst_diff - qnorm(1-alpha/2)*output_diff[l,]$rmst_diff_se
output_diff[l,]$rmst_diff_upp <- output_diff[l,]$rmst_diff + qnorm(1-alpha/2)*output_diff[l,]$rmst_diff_se
output_diff[l,]$rmst_diff_pval <- 2*(1-pnorm(abs(output_diff[l,]$rmst_diff)/output_diff[l,]$rmst_diff_se))
#--- RMST Ratio ---
output_ratio[l,]$label_ratio <- paste('Groups',results[j,]$groupval,'vs.',results[i,]$groupval,' ')
output_ratio[l,]$rmst_logratio <- (log(results[j,]$rmst) - log(results[i,]$rmst))
output_ratio[l,]$rmst_logratio_se <- sqrt(results[j,]$rmst_var/(results[j,]$rmst^2) + results[i,]$rmst_var/(results[i,]$rmst^2))
output_ratio[l,]$rmst_ratio <- exp(output_ratio[l,]$rmst_logratio)
output_ratio[l,]$rmst_ratio_low <- exp(output_ratio[l,]$rmst_logratio - qnorm(1-alpha/2)*output_ratio[l,]$rmst_logratio_se)
output_ratio[l,]$rmst_ratio_upp <- exp(output_ratio[l,]$rmst_logratio + qnorm(1-alpha/2)*output_ratio[l,]$rmst_logratio_se)
output_ratio[l,]$rmst_logratio_pval <- 2*(1-pnorm(abs(output_ratio[l,]$rmst_logratio)/output_ratio[l,]$rmst_logratio_se))
l <- l+1 #move to next row
}
}
#cat("\n\n\n")
#cat(paste('RMST calculated up to tau =',round(results$tau[1],3)))
#cat("\n\n\n")
#cat ("Restricted Mean Survival Time (RMST) per Group \n\n")
#colnames(results) <- c("Group", "RMST", "Var", "SE", "Tau")
#rownames(results) <- c(paste("Group", results$Group,' '))
#print(round(results[c(2,4)],3))
#cat("\n\n")
#cat ("Restricted Mean Survival Time (RMST) Differences \n\n")
#colnames(output_diff) <- c("Groups", "Est.", "SE", "CIL", "CIU", "p")
#rownames(output_diff) <- c(output_diff$Groups)
#print(round(output_diff[c(2,3,4,5,6)],3))
#cat("\n\n")
#cat ("Restricted Mean Survival Time (RMST) Ratios \n\n")
#colnames(output_ratio) <- c("Groups", "Log Est.", "SE", "Est.", "CIL", "CIU", "p")
#rownames(output_ratio) <- c(output_ratio$Groups)
#print(round(output_ratio[c(2,3,4,5,6,7)],3))
return(output_diff)
}
Sensitivy Analysis - Vibration of Effects
###Original Code from Chirag Patel chirag@hms.harvard.edu
# see https://github.com/chiragjp/voe/tree/gh-pages/vibration
###Modified for Propensity score applications
library(survival, quietly=T)
library(EValue)
library(survey)
run_model <- function(form, data, family='gaussian', weights=NULL,...) {
args <- list(form, data = data, ...)
if(family == 'gaussian') {
return(do.call(lm, args))
}
if(family == 'cox') {
return(do.call(coxph, args))
}
if(family == 'binomial') {
args <- list(form, data, family=binomial(),weights=weights, ...)
return(do.call(glm, args))
}
if(family == 'poisson') {
args <- list(form, data, family=poisson(), ...)
return(do.call(glm, args))
}
if(family=='match'){
args <- list(form, data, method = "nearest",caliper=.2,ratio=4, ...)
return(do.call(matchit, args))
}
if(family == 'quasibinomial') {
args <- list(form, design=data, family=binomial(link='logit'), ...)
return(do.call(svyglm, args))
}
}
conductVibrationForK_ps <- function(base_formula,base_out_formula,dataFrame,adjustby,k=1,family=c('gaussian', 'binomial', 'cox','poisson'), print_progress=T, ...) {
initFrame <- function(nrows,ncols) {
matrix(NA,nrows,ncols)
}
addToBase <- function(base_formula, adjustingVariables) {
form <- base_formula
if(length(adjustingVariables)) {
addStr <- as.formula(sprintf('~ . + %s', paste(adjustingVariables, collapse='+')))
form <- update.formula(base_formula, addStr)
}
return(form)
}
variablename <- attr(terms(base_formula), 'term.labels')[1]
varname <-'treatment' #all.vars(as.formula(sprintf('~%s', variablename)))
if(print_progress) print(varname);
if(class(adjustby)=='formula') {
adjustby <- attr(terms(adjustby), 'term.labels')
}
n <- length(adjustby)
varComb <- combn(n, k)
retFrame <- NULL
retFrameCounter <- 1
bicFrame <- NULL
for(ii in 1:ncol(varComb)) { # cycle through each possibility
if(print_progress) cat(sprintf('%i/%i\n',ii, ncol(varComb)));
adjustingVariables <- adjustby[varComb[, ii]]
strComb <- paste(sort(varComb[, ii]), collapse=',')
form <- addToBase(base_formula,adjustingVariables)
if(print_progress) print(form);
ps <-run_model(form,dataFrame,family='binomial')
#MODIFY SECTION BASED ON PROPENSITY METHOD
dataFrame$pr_score <- predict(ps, type="response")
dataFrame$pr_score_trim <-ifelse(dataFrame$pr_score<.01,.01,dataFrame$pr_score)
dataFrame$pr_score_trim <-ifelse(dataFrame$pr_score>.99,.99,dataFrame$pr_score_trim)
#IPTW
#dataFrame$IPTW <-dataFrame$treatment/dataFrame$pr_score_trim + (1-dataFrame$treatment)/(1-dataFrame$pr_score_trim)
#design.ps <- svydesign(ids=~1, weights=~IPTW, data=dataFrame)
#Matched
#matched <-run_model(form,dataFrame,family='match')
#matched_data <- match.data(matched)
## run the model
est <- tryCatch(
#matched
#run_model(base_out_formula,matched_data,family='binomial',weights =matched_data$weights ),
#spline
run_model(base_out_formula,dataFrame,family='binomial'),
#IPTW
#run_model(base_out_formula,design.ps,family='quasibinomial' ),
error=function(err) {
message(err)
return(NULL)
}
)
if(!is.null(est)) {
## collect the result
## do unweightedEst here...
frm <- coef(summary(est))
bicMod <- getBIC(est) # do BIC
## modify the above...
### need to get nlevels of variable
rowIndex <- grep(varname, rownames(frm))
nLevels <- length(rowIndex)
if(length(rowIndex) & is.null(retFrame)) {
nrows <- ncol(varComb) * nLevels
ncols <- ncol(frm)
retFrame <- initFrame(nrows,ncols+2) ## need to add 2 columns for the combination and factor_level
bicFrame <- initFrame(ncol(varComb), 3) #
colnames(retFrame) <- c(colnames(frm), 'combination_index', 'factor_level')
colnames(bicFrame) <- c('edf', 'bic', 'combination_index')
}
bicFrame[ii, 'combination_index'] <- ii
bicFrame[ii, 'edf'] <- bicMod[1]
bicFrame[ii, 'bic'] <- bicMod[2]
for(jj in 1:length(rowIndex)) {
retFrame[retFrameCounter, 1:ncol(frm)] <- frm[rowIndex[jj], ]
retFrame[retFrameCounter, ncol(frm)+1] <- ii
retFrame[retFrameCounter, ncol(frm)+2] <- jj
retFrameCounter <- retFrameCounter+1
}
}
}
return(list(vibration=retFrame,bic=bicFrame, k=k,combinations=varComb, family=family, base_formula=base_formula, adjust=adjustby))
}
getBIC <- function(mod) {
return(extractAIC(mod)) # do BIC
}
recomputePvalue <- function(allData, zStatColName, pValColName) {
### some pvalues estimated at 0 because test statistics so large; recompute their pvalues
zeroPval <- !is.na(allData[,pValColName]) & (allData[,pValColName] == 0)
allData[zeroPval, pValColName] <- pnorm(abs(allData[zeroPval, zStatColName]), lower.tail=F)*2 #two sided pvalue
return(allData)
}
conductVibration_ps <- function(base_formula,base_out_formula,dataFrame,adjustby,family=c('gaussian', 'binomial', 'cox','poisson'), kMin=NULL, kMax=NULL, print_progress=T, ...) {
if(is.null(kMin)) {
kMin <- 1
}
if(is.null(kMax)) {
n <- length(attr(terms(adjustby), 'term.labels'))
kMax <- n - 1
}
cat(sprintf('running models; k start:%i, k stop:%i\n', kMin, kMax))
retFrame <- list()
ii <- 1
for(k in kMin:kMax) {
vib <- conductVibrationForK_ps(base_formula, base_out_formula,dataFrame, adjustby, k, family, print_progress, ...)
retFrame[[ii]] <- vib
ii <- ii + 1
}
ret <- gatherFrames(retFrame)
return(ret)
}
gatherVibration <- function(returnFrames) {
## gathers up results from multiple runs; see conductVibration
nrows <- c()
for(ii in 1:length(returnFrames)) {
nrows <- c(nrows, nrow(returnFrames[[ii]]$vibration))
}
retFrame <- matrix(nrow=sum(nrows), ncol=ncol(returnFrames[[1]]$vibration)+1)
colnames(retFrame) <- c(colnames(returnFrames[[1]]$vibration), 'k')
startIndex <- 1
for(ii in 1:length(returnFrames)) {
ncols <- ncol(returnFrames[[ii]]$vibration)
retFrame[startIndex:(startIndex+nrows[ii]-1), 1:ncols] <- returnFrames[[ii]]$vibration
retFrame[startIndex:(startIndex+nrows[ii]-1), ncols+1] <- returnFrames[[ii]]$k
startIndex <- startIndex+nrows[ii]
}
return(retFrame)
}
gatherVibrationBIC <- function(returnFrames) {
nrows <- c()
for(ii in 1:length(returnFrames)) {
nrows <- c(nrows, nrow(returnFrames[[ii]]$bic))
}
retFrame <- matrix(nrow=sum(nrows), ncol=ncol(returnFrames[[1]]$bic)+1)
colnames(retFrame) <- c(colnames(returnFrames[[1]]$bic), 'k')
startIndex <- 1
for(ii in 1:length(returnFrames)) {
ncols <- ncol(returnFrames[[ii]]$bic)
retFrame[startIndex:(startIndex+nrows[ii]-1), 1:ncols] <- returnFrames[[ii]]$bic
retFrame[startIndex:(startIndex+nrows[ii]-1), ncols+1] <- returnFrames[[ii]]$k
startIndex <- startIndex+nrows[ii]
}
return(retFrame)
}
column_headers <- function(vibFrame, family) {
existingColnames <- colnames(vibFrame)
newColnames <- NULL
if(family == 'cox') {
isRobust <- grep('robust', existingColnames)
# if(isRobust) {
#return(c('estimate', 'HR', 'se', 'robust_se', 'z', 'pvalue', 'combination_index', 'factor_level', 'k'))
# } else {
return(c('estimate', 'OR', 'se', 'z', 'pvalue', 'combination_index', 'factor_level', 'k'))
#}
} else if(family == 'gaussian') {
## to do
existingColnames[1] <- 'estimate'
existingColnames[length(existingColnames) - 4] <- 'pvalue'
return(existingColnames)
} else if(family == 'binomial' | family == 'poisson') {
## to do
existingColnames[1] <- 'estimate'
existingColnames[length(existingColnames) - 4] <- 'pvalue'
return(existingColnames)
}
### fill in the rest later for other families
return(existingColnames)
}
harmonizeFrame <- function(vibFrame, family) {
vibFrame <- as.data.frame(vibFrame)
colnames(vibFrame) <- column_headers(vibFrame, family)
if(family %in% c('binomial','poisson')) {
vibFrame$HR <- exp(vibFrame$estimate)
}
return(vibFrame)
}
gatherFrames <- function(returnFrames) {
bic <- gatherVibrationBIC(returnFrames)
vibration <- gatherVibration(returnFrames)
combinations <- list()
for(ii in 1:length(returnFrames)) {
combinations[[ii]] <- returnFrames[[ii]]$combinations
}
family <- returnFrames[[1]]$family
base_formula <- returnFrames[[1]]$base_formula
adjust <- returnFrames[[1]]$adjust
vibration <- harmonizeFrame(vibration, family)
#change based on method
vibration <- recomputePvalue(vibration, 'Pr(>|z|)','pvalue')
return(list(vibFrame=vibration, bicFrame=bic, combinations=combinations, adjust=adjust, family=family, base_formula=base_formula))
}
## vibration of effects
## plot the VoE distribution
## Chirag Patel cjp@stanford.edu
## 07/05/13
library(ggplot2)
library(RColorBrewer)
CBB_PALETTE <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
vib2d_cox <- function(vibObj, factor_num=1,nbins=20) {
vibFrame <- vibObj$vibFrame
nFactor <- length(unique(vibFrame$factor_level))
yRange <- range(c(-log10(.05), -log10(vibFrame$'Pr(>|z|)')), na.rm=T)
xRange <- range(vibFrame$HR, na.rm=T)
probs <- c( 0.5)
hProbs <- c(0.5)
subFrame <- subset(vibFrame, factor_level == factor_num)
subFrame$factor_level <- NULL
subFrame$pvalue <-subFrame$`Pr(>|t|)`
estLevel <- statPerK(subFrame)
estLevel$HR <- exp(estLevel$estimate)
pQuant <- quantilesPvalue(subFrame, probs)
hQuant <- quantilesHR(subFrame, hProbs)
#RHR <- round(hQuant[3,'HR']/hQuant[1,'HR'], 2)
#RPvalue <- round(-log10(pQuant[1,'pvalue']) + log10(pQuant[3,'pvalue']), 2)
p <- ggplot(subFrame, aes(HR, -log10(pvalue)))
if(sum(colnames(subFrame) == 'has_variable')) {
p <- p + geom_hex(aes(colour=factor(has_variable)),bins=nbins) + scale_fill_gradientn(name='', colours=c('blue','yellow'))
} else {
p <- p + geom_hex(bins=nbins) + scale_fill_gradientn(name='', colours=c('blue','yellow'))
}
p <- p + geom_point(data=estLevel, color='red', shape=1) + geom_line(data=estLevel, color='red')
p <- p + geom_text(aes(HR, -log10(pvalue), label=k,vjust=-1), data=estLevel, color='black')
pQuant$x <- max(subFrame$HR)
p <- p + geom_hline(aes(yintercept=-log10(pvalue), alpha=.4), linetype='dashed', data=pQuant)
p <- p + geom_text(aes(x=x, y=-log10(pvalue), label=round(probs*100, 2), vjust=-.2), data=pQuant)
hQuant$y <- max(c(-log10(subFrame$pvalue), -log10(0.05)))
p <- p + geom_vline(aes(xintercept=HR, alpha=.4), linetype='dashed', data=hQuant)
p <- p + geom_text(aes(x=HR, y=y, label=round(probs*100, 2), hjust=-.1, vjust=-.1), data=hQuant)
p <- p + geom_hline(yintercept=-log10(0.05))
p <- p + scale_x_continuous(limits=xRange) + scale_y_continuous(limits=yRange)
#p <- p + ggtitle(sprintf('RHR = %.02f\nRP = %.02f', RHR, RPvalue))
p <- p + xlab('ATE (Odds Ratio Scale)') + ylab('-log10(pvalue)') + theme_bw()
return(p)
}
vibcontour_cox <- function(vibObj, factor_num=1, alpha=1) {
vibFrame <- vibObj$vibFrame
subFrame <- subset(vibObj$vibFrame, factor_level == factor_num)
subFrame$pvalue <-subFrame$`Pr(>|t|)`
contourData <- getContoursForPctile(subFrame)
subFrame$factor_level <- NULL
yRange <- range(c(-log10(.05), -log10(vibFrame$'Pr(>|t|)')), na.rm=T)
xRange <- range(vibFrame$HR, na.rm=T)
probs <- c(0.1,0.5,0.9)
hProbs <- c(0.1,0.5,0.9)
estLevel <- statPerK(subFrame)
estLevel$HR <- exp(estLevel$estimate)
pQuant <- quantilesPvalue(subFrame, probs)
hQuant <- quantilesHR(subFrame, hProbs)
maxk <-max(vibFrame$k)
rowid<- with(vibFrame[vibFrame$k==maxk,], which(HR == quantile(HR, .5, type = 1,na.rm=TRUE)))
medianOR <-vibFrame$HR[rowid]
loor <-exp(log(medianOR) -1.96*vibFrame$`Std. Error`[rowid])
upor <-exp(log(medianOR) +1.96*vibFrame$`Std. Error`[rowid])
Evalues <-evalues.RR(medianOR,loor,upor)
if(loor<=1 & upor>=1){
FEvalue <-as.character(c(1,1,1))
} else {
FEvalue <-as.character(round(Evalues[2,],2))
}
#RHR <- round(hQuant[3,'HR']/hQuant[1,'HR'], 2)
#RPvalue <- round(-log10(pQuant[1,'pvalue']) + log10(pQuant[3,'pvalue']), 2)
p <- ggplot(subFrame, aes(x=HR, y=-log10(pvalue)))
if(sum(colnames(subFrame) == 'has_variable')) {
p <- p + geom_point(aes(colour=factor(has_variable)), alpha=alpha) + scale_colour_manual(values=CBB_PALETTE)
} else {
p <- p + geom_point(alpha=alpha,color="grey")
}
p <- p + geom_contour(data=contourData$densityData, aes(x=x,y=y,z=z), breaks=contourData$levels, size=.3,color="navy",alpha=alpha)
p <- p + geom_point(data=estLevel, aes(color=k))+ scale_color_gradient(low="yellow", high="red") + geom_line(data=estLevel, color='darkorange')
#p <- p + geom_text(aes(HR, -log10(pvalue), label=k,vjust=-1), data=estLevel, color='red4')
pQuant$x <- max(subFrame$HR)
p <- p + geom_hline(aes(yintercept=-log10(pvalue)), linetype='dashed', data=pQuant,alpha=0.3)
p <- p + geom_text(aes(x=x, y=-log10(pvalue), label=round(probs*100, 2), vjust=-.2), data=pQuant)
hQuant$y <- max(c(-log10(subFrame$pvalue), -log10(0.05)))
p <- p + geom_vline(aes(xintercept=HR), linetype='dashed', data=hQuant,alpha=0.3)
p <- p + geom_text(aes(x=HR, y=y, label=round(probs*100, 2), hjust=-.1, vjust=-.1), data=hQuant)
p <- p + geom_hline(yintercept=-log10(0.05),color="darkmagenta",size=1.1)
p <- p + scale_x_continuous(limits=xRange) + scale_y_continuous(limits=yRange)
#grob <- grobTree(textGrob(paste0('Evalue for Full PS Model:', FEvalue[1]), x=0.05, y=0.9, hjust=0,
#gp=gpar(col="black", fontsize=12, fontface="italic")))
#p <- p + annotation_custom(grob) # paste0('Evalue',": ", FEvalue[1]," ","(",FEvalue[2],",", FEvalue[3],")"),
# p <- p + ggtitle("Oral Therapy, IPTW")
p <- p + labs(color="K",
y='PValue (-log10 scale)',
caption=paste0('Evalue for Full PS Model:', FEvalue[1])) + theme_bw() + theme(legend.position = "none")
return(p)
}
find_adjustment_variable <- function(vibObj, adjustment_num=1) {
vibFrame <- vibObj$vibFrame
combinations <- vibObj$combinations
ks <- unique(vibFrame$k)
vibFrame[, 'has_variable'] <- 0
for(ii in 1:length(ks)) {
k <- ks[ii]
adjusters <- combinations[[ii]]
combIndex <- which(apply(adjusters, 2, function(arr) {sum(arr==adjustment_num)})==1) ## gives column
if(length(combIndex)) {
vibFrame[vibFrame$k == k & (vibFrame$combination_index %in% combIndex), 'has_variable'] <- 1
}
}
vibObj$vibFrame <- vibFrame
return(vibObj)
}
plot_vibration_cox <- function(vibObj, type=c('bin', 'contour'), factor_num=1, adjustment_num=NA, ...) {
### plots the vibration of effects for a cox model
if(length(type)) {
type <- type[1]
}
if(!is.na(adjustment_num)) {
vibObj <- find_adjustment_variable(vibObj, adjustment_num)
}
if(type == 'bin') {
return(vib2d_cox(vibObj, factor_num, ...))
} else if(type == 'contour') {
return(vibcontour_cox(vibObj, factor_num, ...))
}
return(NULL)
}
#other called functions
## Chirag Patel
## 4/18/2013
### functions to post processes a vibFrame
library(MASS)
meanEstimate <- function(subFrame) {
pval <- median(subFrame$pvalue)
hr <- median(subFrame$estimate)
return(data.frame(estimate=hr, pvalue=pval))
}
mean_manhattan <- function(arr) {
### computes a manhattan distance (pairwise differences)
### then computes the relative distance and means it
dd <- as.matrix(dist(arr, method='manhattan'))
dd <- dd / abs(arr)
mean(dd[upper.tri(dd)])*100
}
cdfPerPvalue <- function(subFrame, pvalues=c(10^(-10:-2), .02, .03, .04, .05, .06, .07, .08, .09, .1)) {
Fn <- ecdf(subFrame$pvalue)
data.frame(pvalue=pvalues,cdf=Fn(pvalues), number=Fn(pvalues)*nrow(subFrame))
}
quantilesPvalue <- function(subFrame, probs=c(0.01, .25, 0.5, .75, 0.99)) {
qs <- quantile(subFrame$pvalue, probs)
data.frame(probs=probs, pvalue=qs)
}
quantilesHR <- function(subFrame, probs=c()) {
### change this to estimate.
qs <- quantile(subFrame$estimate, probs)
data.frame(probs=probs, HR=exp(qs))
}
quantilesEstimate <- function(subFrame, probs) {
qs <- quantile(subFrame$estimate, probs)
data.frame(probs=probs, estimate=qs)
}
statPerK <- function(vibFrame) {
### computes a mean HR and median p-value for each k and vibration for each k
estLevel <- data.frame()
ks <- sort(unique(vibFrame$k))
levs <- unique(vibFrame$factor_level)
for(ii in ks) {
subFrame <- subset(vibFrame, k==ii)
mn <- meanEstimate(subFrame)
estLevel <- rbind(estLevel, data.frame(k=ii, estimate=mn$estimate, pvalue=mn$pvalue))
}
estLevel
}
statPerKandFactor <- function(vibFrame) {
levs <- unique(vibFrame$factor_level)
estLevels <- data.frame()
for(ii in levs) {
subFrame <- subset(vibFrame, factor_level == ii)
estLevel <- statPerK(subFrame)
estLevel$factor_level <- ii
estLevels <- rbind(estLevels, estLevel)
}
return(estLevels)
}
summary.vibration <- function(vibFrame, bicFrame=NULL) {
### this is for cox model.
### take in a data.frame and compute all summary stats
## do per factor? -- yes.
# HR 99 and HR 1 -- if sign change for the 99 vs. 1?
# P 99 and P 1; how many < thresholds
# HR 99/ HR 1
# -log10P1 + log10P99
# stat per K (mean HR/ median p per K/factor)
levs <- unique(vibFrame$factor_level)
summaryFrame <- data.frame()
pvalue_cdf <- data.frame()
bestMod <- NULL;
if(!is.null(bicFrame)) {
combInd <- bicFrame[which.min(bicFrame[,2]), 3]
bestK <- bicFrame[which.min(bicFrame[,2]), 4]
bestMod <- subset(vibFrame, k == bestK & combination_index == combInd)
}
for(ii in levs) {
subFrame <- subset(vibFrame, factor_level == ii)
hrs <- quantilesHR(subFrame, probs=c(.01,.5, .99))
ps <- quantilesPvalue(subFrame, probs=c(.01,.5, .99))
hr_01 <- hrs[1, 'HR']
hr_50 <- hrs[2, 'HR']
hr_99 <- hrs[3, 'HR']
p_01 <- ps[1, 'pvalue']
p_50 <- ps[2, 'pvalue']
p_99 <- ps[3, 'pvalue']
RHR <- hr_99/hr_01
vibP <- -log10(p_01) + log10(p_99)
frm <- data.frame(HR_01=hr_01, HR_50=hr_50, HR_99=hr_99,pvalue_01=p_01,pvalue_50=p_50, pvalue_99=p_99, rHR=RHR, rPvalue=vibP, factor_level=ii)
if(!is.null(bestMod)) {
bestSub <- subset(bestMod, factor_level == ii)
frm$HR_bic <- bestSub[1, 'HR']
frm$pvalue_bic <- bestSub[1, 'pvalue']
}
summaryFrame <- rbind(summaryFrame, frm)
cdfPerP <- cdfPerPvalue(subFrame)
cdfPerP$factor_level <- ii
pvalue_cdf <- rbind(pvalue_cdf, cdfPerP)
}
perK <- statPerKandFactor(vibFrame)
return(list(summary=summaryFrame, pvalue_cdf = pvalue_cdf, summary_per_k=perK))
}
summary.vibration.stratum <- function(vibFrame) {
## gets a summary per stratum
## for cox model.
strata <- unique(vibFrame$stratum)
summaryFrame <- data.frame()
summary_per_k <- data.frame()
pvalue_cdf <- data.frame()
for(ii in strata) {
perStrat <- summary.vibration(subset(vibFrame, stratum == ii))
perStrat$summary[, 'stratum'] <- ii
perStrat$summary_per_k[, 'stratum'] <- ii
perStrat$pvalue_cdf[, 'stratum'] <- ii
summaryFrame <- rbind(summaryFrame, perStrat$summary)
summary_per_k <- rbind(summary_per_k, perStrat$summary_per_k)
pvalue_cdf <- rbind(pvalue_cdf, perStrat$pvalue_cdf)
}
return(list(summary=summaryFrame, pvalue_cdf=pvalue_cdf, summary_per_k=summary_per_k))
}
getContoursForPctile <- function(vib, pctiles=seq(.05, .95, by=.05)) {
dens <- kde2d(vib$HR, -log10(vib$pvalue), n = 200)
### this is from http://stackoverflow.com/questions/16225530/contours-of-percentiles-on-level-plot/16228938#16228938
### HPDRegionplot code in the emdbook package
dx <- diff(dens$x[1:2])
dy <- diff(dens$y[1:2])
sz <- sort(dens$z)
c1 <- cumsum(sz) * dx * dy
levels <- sapply(pctiles, function(x) {
approx(c1, sz, xout = 1 - x)$y
})
densityData <- data.frame(expand.grid(x = dens$x, y = dens$y), z = as.vector(dens$z))
return(list(levels=levels, densityData=densityData))
}