########################################################################
# Splus-functions for likelihood ratio tests in the linear mixed model #
########################################################################

# author: 	 Thomas Kneib
# email:	 kneib@stat.uni-muenchen.de
# last modified: 12.06.2003


zero.probability.anova<-function(J=5,N=5,nsim=1000,method="REML",lambda=0)
{
# computes the probability for a local maximum of the likelihood ratio in zero if lambda is the true parameter (by simulation)

if(lambda==0)
#case 1: lambda is zero (equation 22/23 in Crainiceanu/Ruppert/Vogelsang (2002))
  {
  if(method=="ML")
    {
    chi1<-rchisq(nsim,df=N-1)/N
    chi2<-rchisq(nsim,df=J*N-N)/(J*N-N)
    }
  else if(method=="REML")
    {
    d<-(J*N-J)/(J*N-1)
    chi1<-rchisq(nsim,df=N-1)*(J-d)/(J*N-N)
    chi2<-rchisq(nsim,df=J*N-N)*d/(J*N-N)
    }
  test<-1*(chi1<chi2)
  p<-sum(test)/nsim
  }
else
# case 2: lambda>0 (equation 14/15 in Crainiceanu/Ruppert/Vogelsang (2002))
  {
  X<-rep(1,J*N)
  Z<-incident(rep(1:N,each=J),1:N)
  V<-diag(rep(1,J*N))+lambda*Z%*%t(Z)
  e<-eigen(V,symmetric=T)
  sqrtV<-e$vectors%*%diag(sqrt(e$values))%*%t(e$vectors)
  P<-diag(rep(1,J*N))-X%*%solve(t(X)%*%X)%*%t(X)
  if(method=="ML")
    {
    A<-sqrtV%*%P%*%Z%*%t(Z)%*%P%*%sqrtV-sqrtV%*%P%*%sqrtV
    }
  else if(method=="REML")
    {
    A<-sqrtV%*%P%*%Z%*%t(Z)%*%P%*%sqrtV-1/(J*N-1)*spur(t(Z)%*%P%*%Z)*sqrtV%*%P%*%sqrtV
    }
  e<-eigen(A,symmetric=T)$values
  chi1<-rep(0,nsim)
  for(i in 1:(J*N))
    {
    chi1<-chi1+e[i]*rchisq(nsim,df=1)
    }
  test<-1*(chi1<0)
  p<-sum(test)/nsim
  }
return(p)
}

zero.probability.pspline<-function(x,nknot=20,lambda=0,ord=1,deg=0,method="REML",nsim=1000)
{
# computes the probability for a local maximum of the likelihood ratio in zero if lambda is the true parameter (by simulation)
# equation 12/18 in Crainiceanu/Ruppert/Vogelsang (2002)
n<-length(x)
xl<-min(x)
xr<-max(x)
xmin<-xl-(xr-xl)/100
xmax<-xr+(xr-xl)/100
dx<-(xmax-xmin)/(nknot-1)
knots<-seq(xmin-deg*dx,xmax+deg*dx,by=dx)
bs<-spline.des(knots,x,deg+1)
knots<-knots[1:(nknot+deg-1)]
D<-matrix(0,nknot+deg-ord-1,nknot+deg-1)                      
if(ord==1)
  {
  for(i in 1:(nknot+deg-2))
    {
    D[i,i]<--1
    D[i,i+1]<-1
    }
  X<-rep(1,n)
  }
else if(ord==2)
  {
  D<-matrix(0,nknot+deg-ord-1,nknot+deg-1)                      
  for(i in 1:(nknot+deg-3))
    {
    D[i,i]<-1
    D[i,i+1]<--2
    D[i,i+2]<-1
    }
  X<-cbind(rep(1,n),x)
  }
Z<-bs$design%*%t(D)%*%solve(D%*%t(D))
V<-diag(rep(1,n))+lambda*Z%*%t(Z)
e<-eigen(V,symmetric=T)
sqrtV<-e$vectors%*%diag(sqrt(e$values))%*%t(e$vectors)
P<-diag(rep(1,n))-X%*%solve(t(X)%*%X)%*%t(X)
if(method=="ML")
  {
  A<-sqrtV%*%P%*%Z%*%t(Z)%*%P%*%sqrtV-1/n*spur(t(Z)%*%Z)*sqrtV%*%P%*%sqrtV
  }
else if(method=="REML")
  {
  A<-sqrtV%*%P%*%Z%*%t(Z)%*%P%*%sqrtV-1/(n-ord)*spur(t(Z)%*%P%*%Z)*sqrtV%*%P%*%sqrtV
  }
e<-eigen(A,symmetric=T)$values
chi1<-rep(0,nsim)
for(i in 1:n)
  {
  chi1<-chi1+e[i]*rchisq(nsim,df=1)
  }
test<-1*(chi1<0)
p<-sum(test)/nsim
return(p)
}

zero.probability.mrf<-function(K,regions,reg,lambda=0,method="REML",nsim=1000)
{
# computes the probability for a local maximum of the likelihood ratio in zero if lambda is the true parameter (by simulation)
# equation 12/18 in Crainiceanu/Ruppert/Vogelsang (2002)
Z<-incident(reg,regions)
n<-length(reg)

e<-eigen(K,symmetric=T)
e$values[length(regions)]<-0
L<-multdiagback(e$vectors,sqrt(e$values))                             
L<-L[1:length(regions),1:(length(regions)-1)]

B<-Z%*%L%*%solve(t(L)%*%L)
X<-rep(1,n)
P<-diag(rep(1,n))-X%*%solve(t(X)%*%X)%*%t(X)

V<-diag(rep(1,n))+lambda*B%*%t(B)
e<-eigen(V,symmetric=T)
sqrtV<-e$vectors%*%diag(sqrt(e$values))%*%t(e$vectors)

if(method=="ML")
  {
  A<-sqrtV%*%P%*%B%*%t(B)%*%P%*%sqrtV-spur(t(B)%*%B)/n*sqrtV%*%P%*%sqrtV
  }
else if(method=="REML")
  {
  A<-sqrtV%*%P%*%B%*%t(B)%*%P%*%sqrtV-spur(t(B)%*%P%*%B)/(n-1)*sqrtV%*%P%*%sqrtV
  }

e<-eigen(A,symmetric=T)$values
chi1<-rep(0,nsim)
for(i in 1:(n))
  {
  chi1<-chi1+e[i]*rchisq(nsim,df=1)
  }
test<-1*(chi1<0)
p<-sum(test)/nsim
p
}


asy.zero.probability.anova<-function(N=5,method="REML")
{
# computes the probability for a local maximum of the asymptotic likelihood ratio in zero under H0 (exact)
# equation 9/10 in Crainiceanu/Ruppert (2002)

if(method=="REML")
  {
  p<-pchisq(N-1,df=N-1)
  }
else if(method=="ML")
  {
  p<-pchisq(N,df=N-1)
  }
return(p)
}

asy.zero.probability.pspline<-function(nknot=20,ord=1,nsim=1000)
{
# computes the probability for a local maximum of the asymptotic likelihood ratio in zero under H0 (by simulation)
# equation 8 in Crainiceanu/Ruppert (2002)

deg<-0
D<-matrix(0,nknot+deg-ord-1,nknot+deg-1)                      
if(ord==1)
  {
  for(i in 1:(nknot+deg-2))
    {
    D[i,i]<--1
    D[i,i+1]<-1
    }
  }
else if(ord==2)
  {
  for(i in 1:(nknot+deg-3))
    {
    D[i,i]<-1
    D[i,i+1]<--2
    D[i,i+2]<-1
    }
  }
e<-eigen(solve(D%*%t(D))%*%D%*%diag(rep(1/(nknot-1),dim(D)[2]))%*%t(D)%*%solve(D%*%t(D)))$values
nran<-nknot+deg-ord-1
chi1<-rep(0,nsim)
for(i in 1:nran)
  {
  chi1<-chi1+e[i]*rchisq(nsim,df=1)
  }
test<-1*(chi1<sum(e))
p<-sum(test)/nsim
return(p)
}

asy.zero.probability.mrf<-function(K,nsim=1000)
{
# computes the probability for a local maximum of the asymptotic likelihood ratio in zero under H0 (by simulation)
# equation 8 in Crainiceanu/Ruppert (2002)

nreg<-dim(K)[2]
e<-eigen(K,symmetric=T)
e$values[nreg]<-0
ew<-e$values[1:(nreg-1)]
L<-multdiagback(e$vectors,sqrt(e$values))                             
L<-L[1:nreg,1:(nreg-1)]

e<-eigen(diag(1/ew)%*%t(L)%*%diag(rep(1/nreg,nreg))%*%L%*%diag(1/ew))$values
nran<-nreg-1
chi1<-rep(0,nsim)
for(i in 1:nran)
  {
  chi1<-chi1+e[i]*rchisq(nsim,df=1)
  }
test<-1*(chi1<sum(e))
p<-sum(test)/nsim
return(p)
}


sim.distribution.anova<-function(N=5,nsim=1000,qqplot=T,quantiles=0.95)
{
# simulates the asymptotic distribution of the (restricted) likelihood ratio test statistic under H0
# equation 9/10 in Crainiceanu/Ruppert (2002)

chi<-rchisq(nsim,df=(N-1))
rlr<-(chi-(N-1)-(N-1)*log(chi/(N-1)))
rlr[chi<=(N-1)]<-0
lr<-(chi-N-N*log(chi/N))
lr[chi<=N]<-0

if(qqplot==T)
  {
  nqq1<-length(lr[lr>0])
  nqq2<-length(rlr[rlr>0])
  qq1<-sort(lr[lr>0])
  qq2<-sort(rlr[rlr>0])
  qq3<-sort(qchisq((0.5:(nqq1-0.5))/nqq1,df=1))

  par(mfrow=c(2,1))

  plot(qq3,qq1,type="l",xlab=" ",ylab=" ")
  lines(qq3,qq3,type="l",lty=2)
  title("REML")

  qq3<-sort(qchisq((0.5:(nqq2-0.5))/nqq2,df=1))
  plot(qq3,qq2,type="l",xlab=" ",ylab=" ")
  lines(qq3,qq3,type="l",lty=2)
  title("ML")
  }

p0<-matrix(0,2,1)
dimnames(p0)<-list(c("lr","rlr"),NULL)
p0[1,1]<-length(lr[lr==0])/nsim
p0[2,1]<-length(rlr[rlr==0])/nsim

quant<-matrix(0,2,length(quantiles))
help<-quantile(lr,quantiles)
quant[1,]<-help
quant[2,]<-quantile(rlr,quantiles)
dimnames(quant)<-list(c("lr","rlr"),names(help))

erg<-list()
erg$lr<-lr
erg$rlr<-rlr
erg$p0<-p0
erg$quantiles<-quant
return(erg)
}

sim.distribution.pspline<-function(nknot=20,ord=1,nsim=1000,ndgrid=100,lowert=-3,uppert=1.2,quantiles=0.95,qqplot=T)
{
# simulates the asymptotic distribution of the likelihood ratio test statistic under H0
# equation 6/7 inCrainiceanu/Ruppert (2002)

deg<-0
D<-matrix(0,nknot+deg-ord-1,nknot+deg-1)                      
if(ord==1)
  {
  for(i in 1:(nknot+deg-2))
    {
    D[i,i]<--1
    D[i,i+1]<-1
    }
  }
else if(ord==2)
  {
  for(i in 1:(nknot+deg-3))
    {
    D[i,i]<-1
    D[i,i+1]<--2
    D[i,i+2]<-1
    }
  }
e<-eigen(solve(D%*%t(D))%*%D%*%diag(rep(1/(nknot-1),dim(D)[2]))%*%t(D)%*%solve(D%*%t(D)))$values
#1/(nknot-1) is the number of observations per interval between two adjacent knots. 
#knots are assumed to be equidistant and design points are assumed to be uniformly distributed over the range of the knots.

#number of random effects in the model
nran<-nknot+deg-ord-1

w<-matrix(rchisq(nsim*nran,df=1),nrow=nsim,ncol=nran)
r<-matrix(0,nsim,ndgrid)
#dgrid<-seq(0,uppert,length=ndgrid)
dgrid<-c(0,10^(seq(lowert,uppert,length=ndgrid-1)))
i<-1
for(d in dgrid)
  {
  r[,i]<-w%*%(d*e/(1+d*e))-sum(log(1+d*e))
  i<-i+1
  }
lr<-apply(r,1,max)

if(qqplot==T)
# qq-plot of the asymptotic distribution of lr versus chi^2(1) (conditional on lr>0).
# distributions of lr an rlr are identical.
  {
  nqq<-length(lr[lr>0])
  qq1<-sort(qchisq((0.5:(nqq-0.5))/nqq,df=1))
  qq2<-sort(lr[lr>0])
  plot(qq1,qq2,type="l",xlab=" ",ylab=" ")
  lines(qq1,qq1,type="l",lty=2)
  }

#compute requested quantiles
lr[lr<=0]<-0
quant<-quantile(lr,quantiles)

if(length(lr[lr==r[,ndgrid]])>0)
  {
  cat("WARNING: There where ",length(lr[lr==r[,ndgrid]])," values on the upper boundary of the grid.\n",sep="")
  }

erg<-list()
erg$lr<-lr
erg$quantiles<-quant
erg$p0<-length(lr[lr==0])/nsim
return(erg)
}


sim.distribution.mrf<-function(K,nsim=1000,ndgrid=100,lowert=-3,uppert=1.2,qqplot=T,quantiles=0.95)
{
# simulates the asymptotic distribution of the likelihood ratio test statistic under H0
# equation 6/7 inCrainiceanu/Ruppert (2002)

nreg<-dim(K)[2]
e<-eigen(K,symmetric=T)
e$values[nreg]<-0
ew<-e$values[1:(nreg-1)]
L<-multdiagback(e$vectors,sqrt(e$values))                             
L<-L[1:nreg,1:(nreg-1)]

e<-eigen(diag(1/ew)%*%t(L)%*%diag(rep(1/nreg,nreg))%*%L%*%diag(1/ew))$values
nran<-nreg-1
w<-matrix(rchisq(nsim*nran,df=1),nrow=nsim,ncol=nran)
r<-matrix(0,nsim,ndgrid)
#dgrid<-seq(0,uppert,length=ndgrid)
dgrid<-c(0,10^(seq(lowert,uppert,length=ndgrid-1)))
i<-1
for(d in dgrid)
  {
  r[,i]<-w%*%(d*e/(1+d*e))-sum(log(1+d*e))
  i<-i+1
  }
lr<-apply(r,1,max)

if(qqplot==T)
# qq-plot of the asymptotic distribution of lr versus chi^2(1) (conditional on lr>0).
# distributions of lr an rlr are identical.
  {
  nqq<-length(lr[lr>0])
  qq1<-sort(qchisq((0.5:(nqq-0.5))/nqq,df=1))
  qq2<-sort(lr[lr>0])
  plot(qq1,qq2,type="l",xlab=" ",ylab=" ")
  lines(qq1,qq1,type="l",lty=2)
  }

#compute requested quantiles
lr[lr<=0]<-0
quant<-quantile(lr,quantiles)

if(length(lr[lr==r[,ndgrid]])>0)
  {
  cat("WARNING: There where ",length(lr[lr==r[,ndgrid]])," values on the upper boundary of the grid.\n",sep="")
  }

erg<-list()
erg$lr<-lr
erg$quantiles<-quant
erg$p0<-length(lr[lr==0])/nsim
return(erg)
}

# additional functions

incident<-function(a,b)
  {
# returns incident matrix for observed regions stored in a and possible regions stored in b
  erg<-matrix(0,length(a),length(b))
  for(j in 1:length(b))
    {
    erg[a==b[j],j]<-1
    }
  erg
  }

spur<-function(m)
  {
# computes the trace of m
  erg <- sum(diag(m))
  erg
  }

multdiagback<-function(m,w)
  {
#performs the matrix multiplication x*W with W=diag(w)
  for(i in 1:length(w))
    {
    m[,i]<-m[,i]*w[i]
    }
  m
  }