# CREATED ON    :   Wednesday, March 7, 2007 at 14:11
# MODIFIED ON   :   Monday, October 20, 2008 at 09:41
# AUTHOR        :   HUIXIA JUDY WANG
# AFFILIATION   :   DEPARTMENT OF STATISTICS, NORTH CAROLINA STATE UNIVERSITY
# EMAIL         :   WANG@STAT.NCSU.EDU
# R version     :   R 2.5.1
# FUNCTIONS     :   Weighted Quantile Rank Score test WQRS1, and Confidence interval based on WQRS1.

library(quantreg)

            #--------------------------------------------------------#
            #   Analysis of a simulated apnea duration data set      #
            #     The data is simulated in a canonical form          #
            #   based on the real data set analyzed in the paper     #
            #--------------------------------------------------------#
# Feed=1: examiner-fed
# Feed=2: self-fed
# y is the log transformed apnea duration

dat = read.csv("07-294-s1.csv")
Feed = model.matrix(~factor(dat$Feed))[,2]       #0: Fed, 1:self-Fed
Vis = model.matrix(~factor(dat$Vis))[,2]         #0: water, 1:pudding
y = dat$y
subject = model.matrix(~-1+factor(dat$ID))
Feed.Vis = Feed*Vis                              #1: self-Fed * pudding
GRP = model.matrix(~-1+factor(dat$Vis))
x = cbind(Vis,Feed)
z = Feed.Vis

taus = c(0.1, 0.5, 0.9)
for(tau in taus)
{
    tmp1=WQRS1.CI(y, x, z, subject, tau = tau, GRP, level=0.05)
    rownames(tmp1) = c("intersp","Vis","Feed","Feed*Vis")
    assign(paste("CI.WQRS.tau",tau,sep=""),tmp1)
}
CI.WQRS.tau0.1
CI.WQRS.tau0.5
CI.WQRS.tau0.9


                #--------------------------------#
                #          Functions             #
                #--------------------------------#
WQRS1.CI <- function (y, x, z, subject, tau = 0.5, GRP, level=0.05)
{
#  GRP: index of the heterogenous conditions (in this example, it is Viscosity)
# level: alpha, confidence coefficient=1-alpha
    n = length(y)
    X <- as.matrix(cbind(1, x, z))
    p <- ncol(X)

    # calculate the weight
    uhat0 = rq(y~X-1, tau=tau)$resid
    ind1 = which(GRP[,1]==1)
    ind2 = which(GRP[,2]==1)
    lo = round(min(uhat0))
    up = round(max(uhat0))
    d1 = density(uhat0[ind1], from=lo, to=up, cut=0.01, kernel="gaussian")
    d2 = density(uhat0[ind2], from=lo, to=up, cut=0.01, kernel="gaussian")
    f1 = d1$y[which(abs(d1$x)==min(abs(d1$x)))]
    f2 = d2$y[which(abs(d2$x)==min(abs(d2$x)))]
    w1 = c(f1/max(f1, f2), f2/max(f1, f2))
    w = rep(1, n)
    w[ind1] = w1[1]
    w[ind2]= w1[2]   
    # initial CI using weighted rq assuming independence
    rq1 = summary(rq(y~X-1, tau=tau), se="nid")$coef
    Beta = rq1[,1]
    coeff = matrix(0, nrow=p, ncol=3)
    Cut = qchisq(1-level, 1)
        for(j in 1:p)
        {
            b0 = Beta[j]
            se = rq1[j, 2]
            # search for the right end of the interval
            t1 = b0
            t2 = b0 + 5*se
            dif1 = t2 - t1
            tol = 1e-08
            while(dif1>tol)
            {
                tt = (t1+t2)/2
                ystar = y - X[,j] * tt
                ttest = as.data.frame(WQRS1.swallow(x=X[,-j], z=X[,j], ystar, subject=subject, GRP=GRP, tau=tau, weight=w))$Tn
                if(ttest > Cut) t2 = tt
                if(ttest < Cut) t1 = tt
                dif1 = t2-t1
                #print(c(t1, t2))
            }
            coeff[j, 1] = b0
            coeff[j, 3] = (t1+t2)/2
            # search for the left end of the interval
            t1 = b0
            t2 = b0 - 5*se
            dif1 = t1 - t2
            tol = 1e-08
            while(dif1>tol)
            {
                tt = (t1+t2)/2
                ystar = y - X[,j] * tt
                ttest = as.data.frame(WQRS1.swallow(x=X[,-j], z=X[,j], ystar, subject=subject, GRP=GRP, tau=tau, weight=w))$Tn
                if(ttest > Cut) t2 = tt
                if(ttest < Cut) t1 = tt
                dif1 = t1-t2
                #print(c(t1, t2))
            }
            coeff[j, 2] = (t1+t2)/2
        }
    colnames(coeff) = c("coef","lower bd","upper bd")
    return(coeff)
}


WQRS1.swallow <- function(x, z, y, subject, GRP, tau=0.5, weight=NA)
{
    if(is.factor(x)==TRUE) x=model.matrix(~-1+x)[,-1]
    if(is.factor(z)==TRUE) z=model.matrix(~-1+z)[,-1]
    if(is.factor(subject)==TRUE) subject <- model.matrix(~-1+subject) 
    if(is.vector(subject)==TRUE) subject <- model.matrix(~-1+factor(subject)) 
    block = matrix(apply(subject, 2, function(x) x*GRP),nrow=nrow(subject))
    n <- length(y)
    JK = ncol(GRP)      # J*K
    m = ncol(subject)   # no of subjects
    dim(block)          # n*(JK*m), thr first JK columns are for subject1,...
    mijk = t(as.matrix(t(subject)%*%GRP)) # number of replicates for each subject in J,K, dimension: JK*m
    q = qr(z)$rank
    p <- qr(x)$rank

    ###### calculate the weight
    if(is.na(weight))
    {
        uhat0 = rq(y~x+z-1, tau=tau)$resid
        ind1 = which(GRP[,1]==1)
        ind2 = which(GRP[,2]==1)
        lo = round(min(uhat0))
        up = round(max(uhat0))
        d1 = density(uhat0[ind1], from=lo, to=up, cut=0.01, kernel="gaussian")
        d2 = density(uhat0[ind2], from=lo, to=up, cut=0.01, kernel="gaussian")
        f1 = d1$y[which(abs(d1$x)==min(abs(d1$x)))]
        f2 = d2$y[which(abs(d2$x)==min(abs(d2$x)))]
        w1 = c(f1/max(f1, f2), f2/max(f1, f2))
        w = rep(1, n)
        w[ind1] = w1[1]
        w[ind2]= w1[2]    
    }
    else w=weight
    zstar.w = z-x%*%solve(t(x)%*%diag(w^2)%*%x)%*%t(x)%*%diag(w^2)%*%z  
    P1 <- crossprod(zstar.w)
    P2 <- t(zstar.w)%*%subject%*%t(subject)%*%zstar.w - P1
   
    rq1 = rq(y~x-1, tau=tau, weights=w)
    uhat = rq1$resid
    bn = tau- 1*(uhat<0)    
    an = bn+(1-tau)
    sn <- 1/sqrt(n)*crossprod(zstar.w, bn)

    nb = ncol(GRP)
    tmp = pdelta.est(an=an, subject, n, nb, m, GRP, p, tau, subtractP=TRUE) 
    Sigma = tmp$Sigma
    delta = tmp$Delta
    Qn = 1/n*t(zstar.w)%*%Sigma%*%zstar.w + 0.00001
    Tn <- t(sn)%*%solve(Qn)%*%sn
    WQRS1 = 1-pchisq(Tn, q)
    out = data.frame(WQRS1=WQRS1, Tn=Tn, sn=sn)
    return(out)
}

############## subroutines: given estimated residuals's signs, calculate the covariance matrix V(\Delta).
pdelta.est <- function(an=an.A, subject, n, nb, m, GRP, p, tau, subtractP)  # x is a vector
{
    #m is the no of subjects
    Delta=L=array(0,dim=c(m,nb,nb))
    for(i in 1:m)
    {
        IF = NULL # indicator function
        block = subject[,i]*GRP
        tmp = (1-an)*block
        for(b in 1:nb)
        {
            ind = which(block[,b]!=0)
            IF[[b]] = tmp[ind,b]
        }
        for(j in 1:nb)
        {
            for(k in 1:nb)
            {
                if(j==k) tmp = interprod(IF[[j]], IF[[k]])
                if(j!=k) tmp = Prod(IF[[j]], IF[[k]])
                Delta[i,j,k] = tmp$prod
                L[i,j,k] = tmp$L
            }
        }       
    }
    L2 = apply(L, c(2,3), sum)
    if (subtractP==TRUE)  L2 = L2-p
    if (subtractP==FALSE)  L2 = L2
    
    Delta2 = apply(Delta, c(2,3), sum)
    Delta2 = Delta2/L2 # the matrix of delta
    Delta2 = pmin(Delta2, tau)
    Delta2 = pmax(Delta2, tau^2)

    Sigma = matrix(0,n,n)
    for(i in 1:m)
    {
        ind = which(subject[,i]!=0)
        block = subject[,i]*GRP
        mijk = apply(block, 2, sum)
        tmp = matrixrep(Delta2, mijk)-tau^2
        diag(tmp) = tau-tau^2
        if(length(ind)!=ncol(tmp)) 
            warning("the no of obs in subject", i, "does not match!")
        Sigma[ind,ind] = tmp
    }
    return(list(Sigma=Sigma, Delta=Delta2))     
}
Prod <- function(x, y)
# the prod of x and y including xi*yi, i.e. [sum_i(x_i)]*[sum_i(y_i))]
{
    prod = sum(x)*sum(y)
    L = length(x)*length(y)
    out = list(prod=prod, L=L)
    return(out)
}

interprod <- function(x, y) # x and y are vectors
# the prod of x and y not including xi*yi
{
    n1 = length(x)
    n2 = length(y)
    if(n1==n2)  prod = sum(x)*sum(y) - sum(x*y)
    if(n1>n2)   prod = sum(x)*sum(y) - sum(x[1:n2]*y)
    if(n1<n2)   prod = sum(x)*sum(y) - sum(x*y[1:n1])
    L = n1*n2-min(n1,n2)
    out = list(prod=prod, L=L)
    return(out)
}
    

matrixrep <- function(R, Rep)
{
    # R is a square matrix. Rep is no of replication for each block
    nrep = length(Rep)
    if(ncol(R)!=nrep) warning("ncol(R) must equal to the length of Rep!")
    n.row = n.col = sum(Rep)
    out = matrix(0, nrow=n.row, ncol=n.col)
    out = NULL
    for(i in 1:nrep)
    {
        tmp.out = NULL
        for(j in 1:nrep)
        {
            tmp = kronecker(R[i,j], matrix(1, nrow=Rep[i], ncol=Rep[j]))
            tmp.out = cbind(tmp.out, tmp)
        }
        out = rbind(out, tmp.out)
    }
    return(out)
}