# CREATED ON : Wednesday, March 7, 2007 at 14:11 # MODIFIED ON : Monday, October 20, 2008 at 09:41 # AUTHOR : HUIXIA JUDY WANG # AFFILIATION : DEPARTMENT OF STATISTICS, NORTH CAROLINA STATE UNIVERSITY # EMAIL : WANG@STAT.NCSU.EDU # R version : R 2.5.1 # FUNCTIONS : Weighted Quantile Rank Score test WQRS1, and Confidence interval based on WQRS1. library(quantreg) #--------------------------------------------------------# # Analysis of a simulated apnea duration data set # # The data is simulated in a canonical form # # based on the real data set analyzed in the paper # #--------------------------------------------------------# # Feed=1: examiner-fed # Feed=2: self-fed # y is the log transformed apnea duration dat = read.csv("07-294-s1.csv") Feed = model.matrix(~factor(dat$Feed))[,2] #0: Fed, 1:self-Fed Vis = model.matrix(~factor(dat$Vis))[,2] #0: water, 1:pudding y = dat$y subject = model.matrix(~-1+factor(dat$ID)) Feed.Vis = Feed*Vis #1: self-Fed * pudding GRP = model.matrix(~-1+factor(dat$Vis)) x = cbind(Vis,Feed) z = Feed.Vis taus = c(0.1, 0.5, 0.9) for(tau in taus) { tmp1=WQRS1.CI(y, x, z, subject, tau = tau, GRP, level=0.05) rownames(tmp1) = c("intersp","Vis","Feed","Feed*Vis") assign(paste("CI.WQRS.tau",tau,sep=""),tmp1) } CI.WQRS.tau0.1 CI.WQRS.tau0.5 CI.WQRS.tau0.9 #--------------------------------# # Functions # #--------------------------------# WQRS1.CI <- function (y, x, z, subject, tau = 0.5, GRP, level=0.05) { # GRP: index of the heterogenous conditions (in this example, it is Viscosity) # level: alpha, confidence coefficient=1-alpha n = length(y) X <- as.matrix(cbind(1, x, z)) p <- ncol(X) # calculate the weight uhat0 = rq(y~X-1, tau=tau)$resid ind1 = which(GRP[,1]==1) ind2 = which(GRP[,2]==1) lo = round(min(uhat0)) up = round(max(uhat0)) d1 = density(uhat0[ind1], from=lo, to=up, cut=0.01, kernel="gaussian") d2 = density(uhat0[ind2], from=lo, to=up, cut=0.01, kernel="gaussian") f1 = d1$y[which(abs(d1$x)==min(abs(d1$x)))] f2 = d2$y[which(abs(d2$x)==min(abs(d2$x)))] w1 = c(f1/max(f1, f2), f2/max(f1, f2)) w = rep(1, n) w[ind1] = w1[1] w[ind2]= w1[2] # initial CI using weighted rq assuming independence rq1 = summary(rq(y~X-1, tau=tau), se="nid")$coef Beta = rq1[,1] coeff = matrix(0, nrow=p, ncol=3) Cut = qchisq(1-level, 1) for(j in 1:p) { b0 = Beta[j] se = rq1[j, 2] # search for the right end of the interval t1 = b0 t2 = b0 + 5*se dif1 = t2 - t1 tol = 1e-08 while(dif1>tol) { tt = (t1+t2)/2 ystar = y - X[,j] * tt ttest = as.data.frame(WQRS1.swallow(x=X[,-j], z=X[,j], ystar, subject=subject, GRP=GRP, tau=tau, weight=w))$Tn if(ttest > Cut) t2 = tt if(ttest < Cut) t1 = tt dif1 = t2-t1 #print(c(t1, t2)) } coeff[j, 1] = b0 coeff[j, 3] = (t1+t2)/2 # search for the left end of the interval t1 = b0 t2 = b0 - 5*se dif1 = t1 - t2 tol = 1e-08 while(dif1>tol) { tt = (t1+t2)/2 ystar = y - X[,j] * tt ttest = as.data.frame(WQRS1.swallow(x=X[,-j], z=X[,j], ystar, subject=subject, GRP=GRP, tau=tau, weight=w))$Tn if(ttest > Cut) t2 = tt if(ttest < Cut) t1 = tt dif1 = t1-t2 #print(c(t1, t2)) } coeff[j, 2] = (t1+t2)/2 } colnames(coeff) = c("coef","lower bd","upper bd") return(coeff) } WQRS1.swallow <- function(x, z, y, subject, GRP, tau=0.5, weight=NA) { if(is.factor(x)==TRUE) x=model.matrix(~-1+x)[,-1] if(is.factor(z)==TRUE) z=model.matrix(~-1+z)[,-1] if(is.factor(subject)==TRUE) subject <- model.matrix(~-1+subject) if(is.vector(subject)==TRUE) subject <- model.matrix(~-1+factor(subject)) block = matrix(apply(subject, 2, function(x) x*GRP),nrow=nrow(subject)) n <- length(y) JK = ncol(GRP) # J*K m = ncol(subject) # no of subjects dim(block) # n*(JK*m), thr first JK columns are for subject1,... mijk = t(as.matrix(t(subject)%*%GRP)) # number of replicates for each subject in J,K, dimension: JK*m q = qr(z)$rank p <- qr(x)$rank ###### calculate the weight if(is.na(weight)) { uhat0 = rq(y~x+z-1, tau=tau)$resid ind1 = which(GRP[,1]==1) ind2 = which(GRP[,2]==1) lo = round(min(uhat0)) up = round(max(uhat0)) d1 = density(uhat0[ind1], from=lo, to=up, cut=0.01, kernel="gaussian") d2 = density(uhat0[ind2], from=lo, to=up, cut=0.01, kernel="gaussian") f1 = d1$y[which(abs(d1$x)==min(abs(d1$x)))] f2 = d2$y[which(abs(d2$x)==min(abs(d2$x)))] w1 = c(f1/max(f1, f2), f2/max(f1, f2)) w = rep(1, n) w[ind1] = w1[1] w[ind2]= w1[2] } else w=weight zstar.w = z-x%*%solve(t(x)%*%diag(w^2)%*%x)%*%t(x)%*%diag(w^2)%*%z P1 <- crossprod(zstar.w) P2 <- t(zstar.w)%*%subject%*%t(subject)%*%zstar.w - P1 rq1 = rq(y~x-1, tau=tau, weights=w) uhat = rq1$resid bn = tau- 1*(uhat<0) an = bn+(1-tau) sn <- 1/sqrt(n)*crossprod(zstar.w, bn) nb = ncol(GRP) tmp = pdelta.est(an=an, subject, n, nb, m, GRP, p, tau, subtractP=TRUE) Sigma = tmp$Sigma delta = tmp$Delta Qn = 1/n*t(zstar.w)%*%Sigma%*%zstar.w + 0.00001 Tn <- t(sn)%*%solve(Qn)%*%sn WQRS1 = 1-pchisq(Tn, q) out = data.frame(WQRS1=WQRS1, Tn=Tn, sn=sn) return(out) } ############## subroutines: given estimated residuals's signs, calculate the covariance matrix V(\Delta). pdelta.est <- function(an=an.A, subject, n, nb, m, GRP, p, tau, subtractP) # x is a vector { #m is the no of subjects Delta=L=array(0,dim=c(m,nb,nb)) for(i in 1:m) { IF = NULL # indicator function block = subject[,i]*GRP tmp = (1-an)*block for(b in 1:nb) { ind = which(block[,b]!=0) IF[[b]] = tmp[ind,b] } for(j in 1:nb) { for(k in 1:nb) { if(j==k) tmp = interprod(IF[[j]], IF[[k]]) if(j!=k) tmp = Prod(IF[[j]], IF[[k]]) Delta[i,j,k] = tmp$prod L[i,j,k] = tmp$L } } } L2 = apply(L, c(2,3), sum) if (subtractP==TRUE) L2 = L2-p if (subtractP==FALSE) L2 = L2 Delta2 = apply(Delta, c(2,3), sum) Delta2 = Delta2/L2 # the matrix of delta Delta2 = pmin(Delta2, tau) Delta2 = pmax(Delta2, tau^2) Sigma = matrix(0,n,n) for(i in 1:m) { ind = which(subject[,i]!=0) block = subject[,i]*GRP mijk = apply(block, 2, sum) tmp = matrixrep(Delta2, mijk)-tau^2 diag(tmp) = tau-tau^2 if(length(ind)!=ncol(tmp)) warning("the no of obs in subject", i, "does not match!") Sigma[ind,ind] = tmp } return(list(Sigma=Sigma, Delta=Delta2)) } Prod <- function(x, y) # the prod of x and y including xi*yi, i.e. [sum_i(x_i)]*[sum_i(y_i))] { prod = sum(x)*sum(y) L = length(x)*length(y) out = list(prod=prod, L=L) return(out) } interprod <- function(x, y) # x and y are vectors # the prod of x and y not including xi*yi { n1 = length(x) n2 = length(y) if(n1==n2) prod = sum(x)*sum(y) - sum(x*y) if(n1>n2) prod = sum(x)*sum(y) - sum(x[1:n2]*y) if(n1