######################################################################## # Splus-functions for likelihood ratio tests in the linear mixed model # ######################################################################## # author: Thomas Kneib # email: kneib@stat.uni-muenchen.de # last modified: 12.06.2003 zero.probability.anova<-function(J=5,N=5,nsim=1000,method="REML",lambda=0) { # computes the probability for a local maximum of the likelihood ratio in zero if lambda is the true parameter (by simulation) if(lambda==0) #case 1: lambda is zero (equation 22/23 in Crainiceanu/Ruppert/Vogelsang (2002)) { if(method=="ML") { chi1<-rchisq(nsim,df=N-1)/N chi2<-rchisq(nsim,df=J*N-N)/(J*N-N) } else if(method=="REML") { d<-(J*N-J)/(J*N-1) chi1<-rchisq(nsim,df=N-1)*(J-d)/(J*N-N) chi2<-rchisq(nsim,df=J*N-N)*d/(J*N-N) } test<-1*(chi10 (equation 14/15 in Crainiceanu/Ruppert/Vogelsang (2002)) { X<-rep(1,J*N) Z<-incident(rep(1:N,each=J),1:N) V<-diag(rep(1,J*N))+lambda*Z%*%t(Z) e<-eigen(V,symmetric=T) sqrtV<-e$vectors%*%diag(sqrt(e$values))%*%t(e$vectors) P<-diag(rep(1,J*N))-X%*%solve(t(X)%*%X)%*%t(X) if(method=="ML") { A<-sqrtV%*%P%*%Z%*%t(Z)%*%P%*%sqrtV-sqrtV%*%P%*%sqrtV } else if(method=="REML") { A<-sqrtV%*%P%*%Z%*%t(Z)%*%P%*%sqrtV-1/(J*N-1)*spur(t(Z)%*%P%*%Z)*sqrtV%*%P%*%sqrtV } e<-eigen(A,symmetric=T)$values chi1<-rep(0,nsim) for(i in 1:(J*N)) { chi1<-chi1+e[i]*rchisq(nsim,df=1) } test<-1*(chi1<0) p<-sum(test)/nsim } return(p) } zero.probability.pspline<-function(x,nknot=20,lambda=0,ord=1,deg=0,method="REML",nsim=1000) { # computes the probability for a local maximum of the likelihood ratio in zero if lambda is the true parameter (by simulation) # equation 12/18 in Crainiceanu/Ruppert/Vogelsang (2002) n<-length(x) xl<-min(x) xr<-max(x) xmin<-xl-(xr-xl)/100 xmax<-xr+(xr-xl)/100 dx<-(xmax-xmin)/(nknot-1) knots<-seq(xmin-deg*dx,xmax+deg*dx,by=dx) bs<-spline.des(knots,x,deg+1) knots<-knots[1:(nknot+deg-1)] D<-matrix(0,nknot+deg-ord-1,nknot+deg-1) if(ord==1) { for(i in 1:(nknot+deg-2)) { D[i,i]<--1 D[i,i+1]<-1 } X<-rep(1,n) } else if(ord==2) { D<-matrix(0,nknot+deg-ord-1,nknot+deg-1) for(i in 1:(nknot+deg-3)) { D[i,i]<-1 D[i,i+1]<--2 D[i,i+2]<-1 } X<-cbind(rep(1,n),x) } Z<-bs$design%*%t(D)%*%solve(D%*%t(D)) V<-diag(rep(1,n))+lambda*Z%*%t(Z) e<-eigen(V,symmetric=T) sqrtV<-e$vectors%*%diag(sqrt(e$values))%*%t(e$vectors) P<-diag(rep(1,n))-X%*%solve(t(X)%*%X)%*%t(X) if(method=="ML") { A<-sqrtV%*%P%*%Z%*%t(Z)%*%P%*%sqrtV-1/n*spur(t(Z)%*%Z)*sqrtV%*%P%*%sqrtV } else if(method=="REML") { A<-sqrtV%*%P%*%Z%*%t(Z)%*%P%*%sqrtV-1/(n-ord)*spur(t(Z)%*%P%*%Z)*sqrtV%*%P%*%sqrtV } e<-eigen(A,symmetric=T)$values chi1<-rep(0,nsim) for(i in 1:n) { chi1<-chi1+e[i]*rchisq(nsim,df=1) } test<-1*(chi1<0) p<-sum(test)/nsim return(p) } zero.probability.mrf<-function(K,regions,reg,lambda=0,method="REML",nsim=1000) { # computes the probability for a local maximum of the likelihood ratio in zero if lambda is the true parameter (by simulation) # equation 12/18 in Crainiceanu/Ruppert/Vogelsang (2002) Z<-incident(reg,regions) n<-length(reg) e<-eigen(K,symmetric=T) e$values[length(regions)]<-0 L<-multdiagback(e$vectors,sqrt(e$values)) L<-L[1:length(regions),1:(length(regions)-1)] B<-Z%*%L%*%solve(t(L)%*%L) X<-rep(1,n) P<-diag(rep(1,n))-X%*%solve(t(X)%*%X)%*%t(X) V<-diag(rep(1,n))+lambda*B%*%t(B) e<-eigen(V,symmetric=T) sqrtV<-e$vectors%*%diag(sqrt(e$values))%*%t(e$vectors) if(method=="ML") { A<-sqrtV%*%P%*%B%*%t(B)%*%P%*%sqrtV-spur(t(B)%*%B)/n*sqrtV%*%P%*%sqrtV } else if(method=="REML") { A<-sqrtV%*%P%*%B%*%t(B)%*%P%*%sqrtV-spur(t(B)%*%P%*%B)/(n-1)*sqrtV%*%P%*%sqrtV } e<-eigen(A,symmetric=T)$values chi1<-rep(0,nsim) for(i in 1:(n)) { chi1<-chi1+e[i]*rchisq(nsim,df=1) } test<-1*(chi1<0) p<-sum(test)/nsim p } asy.zero.probability.anova<-function(N=5,method="REML") { # computes the probability for a local maximum of the asymptotic likelihood ratio in zero under H0 (exact) # equation 9/10 in Crainiceanu/Ruppert (2002) if(method=="REML") { p<-pchisq(N-1,df=N-1) } else if(method=="ML") { p<-pchisq(N,df=N-1) } return(p) } asy.zero.probability.pspline<-function(nknot=20,ord=1,nsim=1000) { # computes the probability for a local maximum of the asymptotic likelihood ratio in zero under H0 (by simulation) # equation 8 in Crainiceanu/Ruppert (2002) deg<-0 D<-matrix(0,nknot+deg-ord-1,nknot+deg-1) if(ord==1) { for(i in 1:(nknot+deg-2)) { D[i,i]<--1 D[i,i+1]<-1 } } else if(ord==2) { for(i in 1:(nknot+deg-3)) { D[i,i]<-1 D[i,i+1]<--2 D[i,i+2]<-1 } } e<-eigen(solve(D%*%t(D))%*%D%*%diag(rep(1/(nknot-1),dim(D)[2]))%*%t(D)%*%solve(D%*%t(D)))$values nran<-nknot+deg-ord-1 chi1<-rep(0,nsim) for(i in 1:nran) { chi1<-chi1+e[i]*rchisq(nsim,df=1) } test<-1*(chi10]) nqq2<-length(rlr[rlr>0]) qq1<-sort(lr[lr>0]) qq2<-sort(rlr[rlr>0]) qq3<-sort(qchisq((0.5:(nqq1-0.5))/nqq1,df=1)) par(mfrow=c(2,1)) plot(qq3,qq1,type="l",xlab=" ",ylab=" ") lines(qq3,qq3,type="l",lty=2) title("REML") qq3<-sort(qchisq((0.5:(nqq2-0.5))/nqq2,df=1)) plot(qq3,qq2,type="l",xlab=" ",ylab=" ") lines(qq3,qq3,type="l",lty=2) title("ML") } p0<-matrix(0,2,1) dimnames(p0)<-list(c("lr","rlr"),NULL) p0[1,1]<-length(lr[lr==0])/nsim p0[2,1]<-length(rlr[rlr==0])/nsim quant<-matrix(0,2,length(quantiles)) help<-quantile(lr,quantiles) quant[1,]<-help quant[2,]<-quantile(rlr,quantiles) dimnames(quant)<-list(c("lr","rlr"),names(help)) erg<-list() erg$lr<-lr erg$rlr<-rlr erg$p0<-p0 erg$quantiles<-quant return(erg) } sim.distribution.pspline<-function(nknot=20,ord=1,nsim=1000,ndgrid=100,lowert=-3,uppert=1.2,quantiles=0.95,qqplot=T) { # simulates the asymptotic distribution of the likelihood ratio test statistic under H0 # equation 6/7 inCrainiceanu/Ruppert (2002) deg<-0 D<-matrix(0,nknot+deg-ord-1,nknot+deg-1) if(ord==1) { for(i in 1:(nknot+deg-2)) { D[i,i]<--1 D[i,i+1]<-1 } } else if(ord==2) { for(i in 1:(nknot+deg-3)) { D[i,i]<-1 D[i,i+1]<--2 D[i,i+2]<-1 } } e<-eigen(solve(D%*%t(D))%*%D%*%diag(rep(1/(nknot-1),dim(D)[2]))%*%t(D)%*%solve(D%*%t(D)))$values #1/(nknot-1) is the number of observations per interval between two adjacent knots. #knots are assumed to be equidistant and design points are assumed to be uniformly distributed over the range of the knots. #number of random effects in the model nran<-nknot+deg-ord-1 w<-matrix(rchisq(nsim*nran,df=1),nrow=nsim,ncol=nran) r<-matrix(0,nsim,ndgrid) #dgrid<-seq(0,uppert,length=ndgrid) dgrid<-c(0,10^(seq(lowert,uppert,length=ndgrid-1))) i<-1 for(d in dgrid) { r[,i]<-w%*%(d*e/(1+d*e))-sum(log(1+d*e)) i<-i+1 } lr<-apply(r,1,max) if(qqplot==T) # qq-plot of the asymptotic distribution of lr versus chi^2(1) (conditional on lr>0). # distributions of lr an rlr are identical. { nqq<-length(lr[lr>0]) qq1<-sort(qchisq((0.5:(nqq-0.5))/nqq,df=1)) qq2<-sort(lr[lr>0]) plot(qq1,qq2,type="l",xlab=" ",ylab=" ") lines(qq1,qq1,type="l",lty=2) } #compute requested quantiles lr[lr<=0]<-0 quant<-quantile(lr,quantiles) if(length(lr[lr==r[,ndgrid]])>0) { cat("WARNING: There where ",length(lr[lr==r[,ndgrid]])," values on the upper boundary of the grid.\n",sep="") } erg<-list() erg$lr<-lr erg$quantiles<-quant erg$p0<-length(lr[lr==0])/nsim return(erg) } sim.distribution.mrf<-function(K,nsim=1000,ndgrid=100,lowert=-3,uppert=1.2,qqplot=T,quantiles=0.95) { # simulates the asymptotic distribution of the likelihood ratio test statistic under H0 # equation 6/7 inCrainiceanu/Ruppert (2002) nreg<-dim(K)[2] e<-eigen(K,symmetric=T) e$values[nreg]<-0 ew<-e$values[1:(nreg-1)] L<-multdiagback(e$vectors,sqrt(e$values)) L<-L[1:nreg,1:(nreg-1)] e<-eigen(diag(1/ew)%*%t(L)%*%diag(rep(1/nreg,nreg))%*%L%*%diag(1/ew))$values nran<-nreg-1 w<-matrix(rchisq(nsim*nran,df=1),nrow=nsim,ncol=nran) r<-matrix(0,nsim,ndgrid) #dgrid<-seq(0,uppert,length=ndgrid) dgrid<-c(0,10^(seq(lowert,uppert,length=ndgrid-1))) i<-1 for(d in dgrid) { r[,i]<-w%*%(d*e/(1+d*e))-sum(log(1+d*e)) i<-i+1 } lr<-apply(r,1,max) if(qqplot==T) # qq-plot of the asymptotic distribution of lr versus chi^2(1) (conditional on lr>0). # distributions of lr an rlr are identical. { nqq<-length(lr[lr>0]) qq1<-sort(qchisq((0.5:(nqq-0.5))/nqq,df=1)) qq2<-sort(lr[lr>0]) plot(qq1,qq2,type="l",xlab=" ",ylab=" ") lines(qq1,qq1,type="l",lty=2) } #compute requested quantiles lr[lr<=0]<-0 quant<-quantile(lr,quantiles) if(length(lr[lr==r[,ndgrid]])>0) { cat("WARNING: There where ",length(lr[lr==r[,ndgrid]])," values on the upper boundary of the grid.\n",sep="") } erg<-list() erg$lr<-lr erg$quantiles<-quant erg$p0<-length(lr[lr==0])/nsim return(erg) } # additional functions incident<-function(a,b) { # returns incident matrix for observed regions stored in a and possible regions stored in b erg<-matrix(0,length(a),length(b)) for(j in 1:length(b)) { erg[a==b[j],j]<-1 } erg } spur<-function(m) { # computes the trace of m erg <- sum(diag(m)) erg } multdiagback<-function(m,w) { #performs the matrix multiplication x*W with W=diag(w) for(i in 1:length(w)) { m[,i]<-m[,i]*w[i] } m }