diff --git a/pkg/R/Rallfun-v44.R b/pkg/R/Rallfun-v44.R index 5fc5483..6c36870 100644 --- a/pkg/R/Rallfun-v44.R +++ b/pkg/R/Rallfun-v44.R @@ -1,95550 +1,95550 @@ -# -#License: USC-RL v1.0 -#The Software is made available for academic or non-commercial purposes only. The license is for -#a copy of the program for an unlimited term. Individuals requesting a license for commercial use must pay for a commercial license. -# USC Stevens Institute for Innovation University of Southern California -#1150 S. Olive Street, Suite 2300 -#Los Angeles, CA 90115, USA -#ATTN: Accounting -#DISCLAIMER. USC MAKES NO EXPRESS OR IMPLIED WARRANTIES, EITHER IN FACT OR BY -#OPERATION OF LAW, BY STATUTE OR OTHERWISE, AND USC SPECIFICALLY AND EXPRESSLY -#DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A -#PARTICULAR PURPOSE, VALIDITY OF THE SOFTWARE OR ANY OTHER INTELLECTUAL PROPERTY -#RIGHTS OR NON-INFRINGEMENT OF THE INTELLECTUAL PROPERTY OR OTHER RIGHTS OF ANY -#THIRD PARTY. SOFTWARE IS MADE AVAILABLE AS-IS. -#LIMITATION OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, IN NO EVENT WILL -#USC BE LIABLE TO ANY USER OF THIS CODE FOR ANY INCIDENTAL, CONSEQUENTIAL, EXEMPLARY -#OR PUNITIVE DAMAGES OF ANY KIND, LOST GOODWILL, LOST PROFITS, LOST BUSINESS AND/OR -#ANY INDIRECT ECONOMIC DAMAGES WHATSOEVER, REGARDLESS OF WHETHER SUCH DAMAGES -#ARISE FROM CLAIMS BASED UPON CONTRACT, NEGLIGENCE, TORT (INCLUDING STRICT LIABILITY -#OR OTHER LEGAL THEORY), A BREACH OF ANY WARRANTY OR TERM OF THIS AGREEMENT, AND -#REGARDLESS OF WHETHER USC WAS ADVISED OR HAD REASON TO KNOW OF THE POSSIBILITY OF -#INCURRING SUCH DAMAGES IN ADVANCE. -#For commercial license pricing and annual commercial update and support pricing, please -#contact: -# -#USC Stevens Institute for Innovation -#University of Southern California -#1150 S. Olive Street, Suite 2300 -#Los Angeles, CA 90015, USA -#Tel: -#Fax: +1 213-821-5001 -#Email: a -# -#and cc to: -#accounting@stevens.usc.edu - - -# Last update: -# May, 2024 - - -madsq<-function(x)mad(x)^2 - -listv2mat<-function(x){ -# -# Each x[[]] has a vector of same length, p -# store in a matrix with p columns -# -p=length(x[[1]]) -n=length(x) -m=matrix(NA,nrow=n,ncol=p) -for(i in 1:n)m[i,]=x[[i]] -m -} - - -DqdifMC<-function(x,y=NULL,q=.25,nboot=1000,plotit=TRUE,xlab='Group 1 - Group 2',SEED=TRUE,alpha=.05){ -# -# Compare two dependent groups by comparing the -# q and 1-q quantiles of the difference scores -# -# q should be < .5 -# if the groups do not differ, then the difference scores should be symmetric -# about zero. -# In particular, the sum of q and 1-q quantiles should be zero. -# -# q indicates the quantiles to be compared. By default, the .25 and .75 quantiles are used. -# -library(parallel) -if(SEED)set.seed(2) -if(q>=.5)stop('q should be less than .5') -if(!is.null(y)){ -xy=elimna(cbind(x,y)) -dif=xy[,1]-xy[,2] -} -if(is.null(y))dif=elimna(x) -x=as.matrix(x) -n=length(dif) -if(plotit)akerd(dif,xlab=xlab) -bvec=NA -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -bvec<-mclapply(data,difQMC_sub,dif,q,mc.preschedule=TRUE) -bvec=matl(bvec) -est1=hd(dif,q=q) -est2=hd(dif,q=1-q) -pv=mean(bvec<0)+.5*mean(bvec==0) -p=2*min(c(pv,1-pv)) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -sbvec=sort(bvec) -ci=sbvec[low] -ci[2]=sbvec[up] -list(est.q=est1,est.1.minus.q=est2,conf.interval=ci,p.value=p) -} - -winsd<-function(x,tr=.2,na.rm=FALSE){ -val=sqrt(winvar(x,tr=tr,na.rm=na.rm)) -val -} - -winsd05<-function(x,tr=.2,na.rm=FALSE){ -val=sqrt(winvar(x,tr=tr,na.rm=na.rm)) -val -} - - - -difQMC_sub<-function(data,dif,q){ -es=hd(dif[data],q)+hd(dif[data],1-q) -es -} - - -ancGparMC<-function(x1,y1,x2,y2,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,eout=FALSE,outfun=outpro, -STAND=TRUE,plotit=TRUE,xlab="X",ylab="Y",ISO=FALSE,...){ -# -# Test hypothesis that for two independent groups, all regression parameters are equal -# By default the Theil--Sen estimator is used -# -# Strategy: Use bootstrap estimate of standard errors followed by -# Johansen type test statistic. -# -# ISO=TRUE, ignore intercept, test only the slope parameters. -# -x1=as.matrix(x1) -p=ncol(x1) -p1=p+1 -xy=elimna(cbind(x1,y1)) -x1=xy[,1:p] -y1=xy[,p1] -x2=as.matrix(x2) -p=ncol(x2) -p1=p+1 -xy=elimna(cbind(x2,y2)) -x2=xy[,1:p] -y2=xy[,p1] -if(plotit){ -xx1=x1 -yy1=y1 -xx2=x2 -yy2=y2 -if(ncol(as.matrix(x1))==1){ -if(eout){ -flag=outfun(cbind(x1,y1),plotit=FALSE,...)$keep -xx1=x1[flag] -yy1=y1[flag] -flag=outfun(cbind(x2,y2),plotit=FALSE,...)$keep -xx2=x2[flag] -yy2=y2[flag] -} -if(xout){ -flag=outfun(xx1,plotit=FALSE,...)$keep -xx1=x1[flag] -yy1=y1[flag] -flag=outfun(xx2,plotit=FALSE,...)$keep -xx2=x2[flag] -yy2=y2[flag] -} -plot(c(xx1,xx2),c(yy1,yy2),type="n",xlab=xlab,ylab=ylab) -points(xx1,yy1) -points(xx2,yy2,pch="+") -abline(regfun(xx1,yy1,...)$coef) -abline(regfun(xx2,yy2,...)$coef,lty=2) -}} -x=list() -y=list() -x[[1]]=x1 -x[[2]]=x2 -y[[1]]=y1 -y[[2]]=y2 -if(!ISO)output=reg1wayMC(x,y,regfun=regfun,nboot=nboot,xout=xout,outfun=outfun, -SEED=SEED,STAND=STAND,...) -if(ISO)output=reg1wayISOMC(x,y,regfun=regfun,nboot=nboot,xout=xout,outfun=outfun, -SEED=SEED,STAND=STAND,...) -output -} - - - -qcomhdMC<-function(x,y,est=hd,q=c(.1,.25,.5,.75,.9),nboot=4000,plotit=TRUE,SEED=TRUE,xlab="Group 1",ylab="Est.1-Est.2",alpha=.05,ADJ.CI=TRUE){ -# -# Compare quantiles using pb2gen -# via hd estimator. Tied values are allowed. -# -# ADJ.CI=TRUE means that the confidence intervals are adjusted based on the level used by the corresponding -# test statistic. If a test is performed with at the .05/3 level, for example, the confidence returned has -# 1-.05/3 probability coverage. -# -# When comparing lower or upper quartiles, both power and the probability of Type I error -# compare well to other methods that have been derived. -# q: can be used to specify the quantiles to be compared -# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles -# -# Function returns p-values and critical p-values based on Hochberg's method. -# -library(parallel) -if(SEED)set.seed(2) -print('Can also use the function qcomhd with the argument MC=TRUE') -pv=NULL -output=matrix(NA,nrow=length(q),ncol=10) -dimnames(output)<-list(NULL,c("q","n1","n2","est.1","est.2","est.1_minus_est.2","ci.low","ci.up","p_crit","p-value")) -for(i in 1:length(q)){ -output[i,1]=q[i] -output[i,2]=length(elimna(x)) -output[i,3]=length(elimna(y)) -output[i,4]=hd(x,q=q[i]) -output[i,5]=hd(y,q=q[i]) -output[i,6]=output[i,4]-output[i,5] -temp=qcom.sub(x,y,nboot=nboot,q=q[i],SEED=FALSE,alpha=alpha) -output[i,7]=temp$ci[1] -output[i,8]=temp$ci[2] -output[i,10]=temp$p.value -} -temp=order(output[,10],decreasing=TRUE) -zvec=alpha/c(1:length(q)) -output[temp,9]=zvec -if(ADJ.CI){ -for(i in 1:length(q)){ -temp=pb2genMC(x,y,nboot=nboot,est=est,q=q[i],SEED=FALSE,alpha=output[i,9],pr=FALSE) -output[i,7]=temp$ci[1] -output[i,8]=temp$ci[2] -output[i,10]=temp$p.value -} -temp=order(output[,10],decreasing=TRUE) -} -output <- data.frame(output) -output$signif=rep("YES",nrow(output)) -for(i in 1:nrow(output)){ -if(output[temp[i],10]>output[temp[i],9])output$signif[temp[i]]="NO" -#if(output[temp[i],10]<=output[temp[i],9])break -} -if(plotit){ -xax=rep(output[,4],3) -yax=c(output[,6],output[,7],output[,8]) -plot(xax,yax,xlab=xlab,ylab=ylab,type="n") -points(output[,4],output[,6],pch="*") -lines(output[,4],output[,6]) -points(output[,4],output[,7],pch="+") -points(output[,4],output[,8],pch="+") -} -output -} - -qcom.sub<-function(x,y,q,alpha=.05,nboot=2000,SEED=TRUE){ -# -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -datax=listm(t(datax)) -datay=listm(t(datay)) -bvecx<-mclapply(datax,hd,q,mc.preschedule=TRUE) -bvecy<-mclapply(datay,hd,q,mc.preschedule=TRUE) -bvecx=as.vector(matl(bvecx)) -bvecy=as.vector(matl(bvecy)) -bvec<-sort(bvecx-bvecy) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) -sig.level<-2*(min(temp,1-temp)) -se<-var(bvec) -list(est.1=hd(x,q),est.2=hd(y,q),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) -} - - - -smeanMC<-function(m,cop=6,MM=FALSE,op=1,outfun=outogk,cov.fun=rmba,...){ -# -# m is an n by p matrix -# -# Compute a multivariate skipped measure of location -# -# op=1: -# Eliminate outliers using a projection method -# That is, first determine center of data using: -# if op=1, a multi-core processor is used via the -# package multicore -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 uses MVE center -# cop=5 uses TBS -# cop=6 uses rmba (Olive's median ball algorithm) -# -# For each point -# consider the line between it and the center, -# project all points onto this line, and -# check for outliers using -# -# MM=F, a boxplot rule. -# MM=T, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# op=2 use mgv (function outmgv) method to eliminate outliers -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# op=3 use outlier method indicated by outfun -# -# Eliminate any outliers and compute means -# using remaining data. -# -m<-elimna(m) -if(op==1){ -temp<-outproMC(m,plotit=FALSE,cop=cop,MM=MM)$keep -} -if(op==2)temp<-outmgv(m,plotit=FALSE,cov.fun=cov.fun)$keep -if(op==3)temp<-outfun(m,plotit=FALSE,...)$keep -val<-apply(m[temp,],2,mean) -val -} - - pb2genMC<-function(x,y,alpha=.05,nboot=2000,est=onestep,SEED=TRUE,pr=FALSE,...){ -# -# Compute a bootstrap confidence interval for the -# the difference between any two parameters corresponding to -# independent groups. -# By default, M-estimators are compared. -# Setting est=mean, for example, will result in a percentile -# bootstrap confidence interval for the difference between means. -# Setting est=onestep will compare M-estimators of location. -# The default number of bootstrap samples is nboot=2000 -# -library(parallel) -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print("Taking bootstrap samples. Please wait.") -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -# -datax=t(datax) -datay=t(datay) -datax=listm(datax) -datay=listm(datay) -bvecx<-mclapply(datax,est,mc.preschedule=TRUE,...) -bvecy<-mclapply(datay,est,mc.preschedule=TRUE,...) -bvec=sort(matl(bvecx)-matl(bvecy)) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) -sig.level<-2*(min(temp,1-temp)) -se<-var(bvec) -list(est.1=est(x,...),est.2=est(y,...),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) -} - -cbmhdMC<-function(x,y,alpha=.05,q=.25,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab="",nboot=600,SEED=TRUE){ -# -# Compute a confidence interval for the sum of the qth and (1-q)th quantiles -# of the distribution of D=X-Y, where X and Y are two independent random variables. -# The Harrell-Davis estimator is used -# If the distribution of X and Y are identical, then in particular the -# distribution of D=X-Y is symmetric about zero. -# -# plotit=TRUE causes a plot of the difference scores to be created -# pop=0 adaptive kernel density estimate -# pop=1 results in the expected frequency curve. -# pop=2 kernel density estimate (Rosenblatt's shifted histogram) -# pop=3 boxplot -# pop=4 stem-and-leaf -# pop=5 histogram -# -library(parallel) -if(SEED)set.seed(2) -if(q>=.5)stop("q should be less than .5") -if(q<=0)stop("q should be greater than 0") -x<-x[!is.na(x)] -y<-y[!is.na(y)] -n1=length(x) -n2=length(y) -m<-outer(x,y,FUN="-") -q2=1-q -est1=hd(m,q) -est2=hd(m,q2) -data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot) -data=cbind(data1,data2) -data=listm(t(data)) -bvec=NA -bvec<-mclapply(data,cbmhd_subMC,x=x,y=y,q=q,q2=q2,n1=n1,n2=n2,mc.preschedule=TRUE) -bvec=list2vec(bvec) -p=mean(bvec>0)+.5*mean(bvec==0) -p=2*min(c(p,1-p)) -sbv=sort(bvec) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=sbv[ilow] -ci[2]=sbv[ihi] -if(plotit){ -if(pop==1 || pop==0){ -if(length(x)*length(y)>2500){ -print("Product of sample sizes exceeds 2500.") -print("Execution time might be high when using pop=0 or 1") -print("If this is case, might consider changing the argument pop") -print("pop=2 might be better") -}} -MM=as.vector(m) -if(pop==0)akerd(MM,xlab=xlab,ylab=ylab) -if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab) -if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab) -if(pop==3)boxplot(MM) -if(pop==4)stem(MM) -if(pop==5)hist(MM,xlab=xlab) -if(pop==6)skerd(MM) -} -list(q=q,Est1=est1,Est2=est2,sum=est1+est2,ci=ci,p.value=p) -} - -cbmhd_subMC<-function(data,cbmhd_subMC,x,y,q,q2,n1,n2){ -np1=n1+1 -nall=n1+n2 -mb=outer(x[data[1:n1]],y[data[np1:nall]],"-") -est=hd(mb,q)+hd(mb,q2) -est -} - -lintestMC<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=FALSE,outfun=out,...){ -# -# Test the hypothesis that the regression surface is a plane. -# Stute et al. (1998, JASA, 93, 141-149). -# -library(parallel) -set.seed(2) -if(identical(regfun,tshdreg))print('When using tshdreg, be sure to include RES=TRUE') -#if(identical(regfun,Qreg))print('When using Qreg, be sure to include res.vals=TRUE') -x<-as.matrix(x) -d<-ncol(x) -temp<-elimna(cbind(x,y)) -x<-temp[,1:d] -x<-as.matrix(x) -y<-temp[,d+1] -if(xout){ -flag<-outfun(x)$keep -x<-x[flag,] -x<-as.matrix(x) -y<-y[flag] -} -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -reg<-regfun(x,y,...) -yhat<-y-reg$residuals -print("Taking bootstrap samples, please wait.") -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-sqrt(12)*(data-.5) # standardize the random numbers. -data=listm(t(data)) -rvalb<-mclapply(data,lintests1,yhat,reg$residuals,mflag,x,regfun,mc.preschedule=TRUE,...) -# An n x nboot matrix of R values -rvalb=matl(rvalb) -rvalb<-rvalb/sqrt(length(y)) -dstatb<-apply(abs(rvalb),2,max) -wstatb<-apply(rvalb^2,2,mean) -# compute test statistic -v<-c(rep(1,length(y))) -rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...) -rval<-rval/sqrt(length(y)) -dstat<-max(abs(rval)) -wstat<-mean(rval^2) -ib<-round(nboot*(1-alpha)) -p.value.d<-1-sum(dstat>=dstatb)/nboot -p.value.w<-1-sum(wstat>=wstatb)/nboot -list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w) -} - - - lloc<-function(x,est=tmean,...){ -if(is.data.frame(x)){ -x=as.matrix(x) -x=apply(x,2,as.numeric) # earlier versions of R require this command -} -if(!is.list(x))val<-est(x,...) -if(is.list(x))val=lapply(x,est,...) -if(is.matrix(x))val<-apply(x,2,est,...) -val -} - -reg2g.p2plot<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,xlab="Var 1",ylab="Var 2",zlab="Var 3",regfun=tsreg,COLOR=TRUE,STAND=TRUE, -tick.marks=TRUE,type="p",pr=TRUE,...){ -# -# Create a 3D plot of points and plot regression surface for two groups. -# -# Assumes that the package scatterplot3d has been installed. -# If not, use the command install.packages("scatterplot3d") -# assuming you are connected to the web. -# -# The regression method used is specified with the argument -# regfun. -# -# type="p", points will be plotted. Use type="n" to get only regression planes plotted -# -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=2)stop("Argument x1 must be stored in a matrix with 2 columns.") -if(ncol(x2)!=2)stop("Argument x2 must be stored in a matrix with 2 columns.") -xy1<-elimna(cbind(x1,y1)) -xy2<-elimna(cbind(x2,y2)) -if(xout){ -if(!STAND)flag1=outfun(xy1[,1:2],plotit=FALSE,...)$keep -if(STAND)flag1=outpro(xy1[,1:2],plotit=FALSE,STAND=TRUE,...)$keep -if(!STAND)flag2=outfun(xy2[,1:2],plotit=FALSE,...)$keep -if(STAND)flag2=outpro(xy2[,1:2],plotit=FALSE,STAND=TRUE,...)$keep -xy1=xy1[flag1,] -xy2=xy2[flag2,] -} -x1=xy1[,1:2] -x2=xy2[,1:2] -y1=xy1[,3] -y2=xy2[,3] -library(scatterplot3d) -temp<-scatterplot3d(rbind(xy1,xy2),xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=tick.marks,type=type) -vals1<-regfun(x1,y1,...)$coef -vals2<-regfun(x2,y2,...)$coef -if(COLOR){ -if(pr)print("First group is blue") -temp$plane(vals1,col="blue") -temp$plane(vals2,col="red") -} -if(!COLOR){ -temp$plane(vals1) -temp$plane(vals2) -} -list(coef.group.1=vals1,coef.group.2=vals2) -} - - -regp2plot<-function(x,y,xout=FALSE,outfun=out,xlab="Var 1",ylab="Var 2",zlab="Var 3",regfun=tsreg,COLOR=FALSE,tick.marks=TRUE,...){ -# -# Create a 3D plot of points and plot regression surface. -# based on the regression estimator indicated by -# regfun -# -# Assumes that the package scatterplot3d has been installed. -# If not, use the command install.packages("scatterplot3d") -# assuming you are connected to the web. -# -# The regression method used is specified with the argument -# regfun. -# -# Package scatterplot3d is required. To install it, use the command -# install.packages("scatterplot3d") -# while connected to the web -# -x=as.matrix(x) -if(ncol(x)!=2)stop("Argument x must be stored in a matrix with 2 columns.") -xy<-elimna(cbind(x,y)) -if(xout){ -flag=outfun(xy[,1:2])$keep -xy=xy[flag,] -} -x=xy[,1:2] -y=xy[,3] -library(scatterplot3d) -temp<-scatterplot3d(xy,xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=tick.marks) -vals<-regfun(x,y,...)$coef -if(COLOR)temp$plane(vals,col="blue") -if(!COLOR)temp$plane(vals) -} - - -reg2plot<-function(x1,y1,x2,y2,regfun=tsreg,xlab="X",ylab="Y",xout=FALSE,outfun=outpro,pch1='.',pch2='+',...){ -# -# For convenience -# plot two regression lines corresponding to two groups. -# -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(xout){ -if(identical(outfun,outblp))flag=outblp(x1,y1,plotit=FALSE)$keep -else -flag<-outfun(x1,plotit=FALSE,...)$keep -x1=x1[flag] -y1=y1[flag] -if(identical(outfun,outblp))flag=outblp(x2,y2,plotit=FALSE)$keep -else -flag<-outfun(x2,plotit=FALSE,...)$keep -x2=x2[flag] -y2=y2[flag] -} -plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab) -points(x1,y1,pch=pch1) -points(x2,y2,pch=pch2) -abline(regfun(x1,y1,...)$coef) -abline(regfun(x2,y2,...)$coef,lty=2) -} - -ghdist<-function(n,g=0,h=0){ -# -# generate n observations from a g-and-h dist. -# -x<-rnorm(n) -if (g>0){ -ghdist<-(exp(g*x)-1)*exp(h*x^2/2)/g -} -if(g==0)ghdist<-x*exp(h*x^2/2) -ghdist -} - - -wincor<-function(x,y=NULL,tr=.2){ -# Compute the Winsorized correlation between x and y. -# -# tr is the amount of Winsorization -# This function also returns the Winsorized covariance -# -# Pairwise deletion of missing values is performed. -# -# x is a vector, or it can be a matrix with two columns when y=NULL -# - -if(!is.null(y[1])){ -m=cbind(x,y) -} -else m=x -m<-elimna(m) -nval=nrow(m) -if(ncol(m)==2){ -a=wincor.sub(m[,1],m[,2],tr=tr) -wcor=a$cor -wcov=a$cov -sig=a$p.value -} -if(ncol(m)>2){ -#if(is.data.frame(m))m=as.matrix(m) -if(!is.matrix(m))stop("The data must be stored in a n by p matrix") -wcor<-matrix(1,ncol(m),ncol(m)) -wcov<-matrix(0,ncol(m),ncol(m)) -siglevel<-matrix(NA,ncol(m),ncol(m)) -for (i in 1:ncol(m)){ -ip<-i -for (j in ip:ncol(m)){ -val<-wincor.sub(m[,i],m[,j],tr) -wcor[i,j]<-val$cor -wcor[j,i]<-wcor[i,j] -if(i==j)wcor[i,j]<-1 -wcov[i,j]<-val$cov -wcov[j,i]<-wcov[i,j] -if(i!=j){ -siglevel[i,j]<-val$p.value -siglevel[j,i]<-siglevel[i,j] -} -}} -sig=siglevel -} -list(n=nval,cor=wcor,cov=wcov,p.value=sig) -} - -wincor.sub<-function(x,y,tr=tr){ -sig<-NA -g<-floor(tr*length(x)) -xvec<-winval(x,tr) -yvec<-winval(y,tr) -wcor<-cor(xvec,yvec) -wcov<-var(xvec,yvec) -if(sum(x==y)!=length(x)){ -test<-wcor*sqrt((length(x)-2)/(1.-wcor^2)) -sig<-2*(1-pt(abs(test),length(x)-2*g-2)) -} -list(cor=wcor,cov=wcov,p.value=sig) -} - -bivar<-function(x){ -# compute biweight midvariance of x -m<-median(x) -u<-abs((x-m)/(9*qnorm(.75)*mad(x))) -av<-ifelse(u<1,1,0) -top<-length(x)*sum(av*(x-m)^2*(1-u^2)^4) -bot<-sum(av*(1-u^2)*(1-5*u^2)) -bi<-top/bot^2 -bi -} - -mjse<-function(x,q=.5,na.rm=FALSE){ -# -# Compute the Maritz-Jarrett estimate of the standard error of -# X sub m, m=[qn+.5] -# The default value for q is .5 -# -if(na.rm)x=elimna(x) -n<-length(x) -m<-floor(q*n+.5) -vec<-seq(along=x) -w<-pbeta(vec/n,m-1,n-m)-pbeta((vec-1)/n,m-1,n-m) # W sub i values -y<-sort(x) -c1<-sum(w*y) -c2<-sum(w*y*y) -mjse<-sqrt(c2-c1^2) -mjse -} - -pbvar<-function(x,beta=.2){ -# Compute the percentage bend midvariance -# -# beta is the bending constant for omega sub N. -# -pbvar=0 -x=elimna(x) -w<-abs(x-median(x)) -w<-sort(w) -m<-floor((1-beta)*length(x)+.5) -omega<-w[m] -if(omega>0){ -y<-(x-median(x))/omega -z<-ifelse(y>1,1,y) -z<-ifelse(z<(-1),-1,z) -pbvar<-length(x)*omega^2*sum(z^2)/(length(x[abs(y)<1]))^2 -} -pbvar -} - -win<-function(x,tr=.2){ -# -# Compute the gamma Winsorized mean for the data in the vector x. -# -# tr is the amount of Winsorization -# -y<-sort(x) -n<-length(x) -ibot<-floor(tr*n)+1 -itop<-length(x)-ibot+1 -xbot<-y[ibot] -xtop<-y[itop] -y<-ifelse(y<=xbot,xbot,y) -y<-ifelse(y>=xtop,xtop,y) -win<-mean(y) -win -} - -hd<-function(x,q=.5,na.rm=TRUE,STAND=NULL,tr=FALSE){ -# -# Compute the Harrell-Davis estimate of the qth quantile -# -# The vector x contains the data, -# and the desired quantile is q -# The default value for q is .5. -# -if(tr)e=thd(x,q=q) -else{ -if(na.rm)x=elimna(x) -n<-length(x) -m1<-(n+1)*q -m2<-(n+1)*(1-q) -vec<-seq(along=x) -w<-pbeta(vec/n,m1,m2)-pbeta((vec-1)/n,m1,m2) # W sub i values -y<-sort(x) -e<-sum(w*y) -} -e -} - -mestse<-function(x,bend=1.28,op=2){ -# -# Estimate the standard error of M-estimator using Huber's Psi -# using estimate of influence function -# -n<-length(x) -mestse<-sqrt(sum((ifmest(x,bend,op=2)^2))/(n*(n-1))) -mestse -} - -omega<-function(x,beta=.1){ -# Compute the estimate of the measure omega as described in -# chapter 3. -# The default value is beta=.1 because this function is used to -# compute the percentage bend midvariance. -# -y<-abs(x-median(x)) -y<-sort(y) -m<-floor((1-beta)*length(x)+.5) -omega<-y[m]/qnorm(1-beta/2) # omega is rescaled to equal sigma -# under normality -omega -} - -qse<-function(x,q=.5,op=3){ -# -# Compute the standard error of qth sample quantile estimator -# based on the single order statistic, x sub ([qn+.5]) (See Ch 3) -# -# Store the data in vector -# x, and the desired quantile in q -# The default value for q is .5 -# -# op=1 Use Rosenblatt's shifted histogram -# op=2 Use expected frequency curve -# op=3 Use adaptive kernel density estimator -# -y <- sort(x) -n <- length(x) -iq <- floor(q * n + 0.5) -qest <- y[iq] -fhat<-NA -if(op==1)fhat<-kerden(x,q) -if(op==2)fhat<-rdplot(x,pts=qest,pyhat=TRUE,plotit=FALSE) -if(op==3)fhat<-akerd(x,pts=qest,pyhat=TRUE,plotit=FALSE) -if(is.na(fhat[1]))stop("Something wrong, op should be 1 or 2 or 3") -qse<-1/(2*sqrt(length(x))*fhat) -qse -} - -winval<-function(x,tr=.2){ -# -# Winsorize the data in the vector x. -# tr is the amount of Winsorization which defaults to .2. -# -# This function is used by several other functions that come with this book. -# -y<-sort(x) -n<-length(x) -ibot<-floor(tr*n)+1 -itop<-length(x)-ibot+1 -xbot<-y[ibot] -xtop<-y[itop] -winval<-ifelse(x<=xbot,xbot,x) -winval<-ifelse(winval>=xtop,xtop,winval) -winval -} - -hdseb<-function(x,q=.5,nboot=100,SEED=TRUE){ -# -# Compute bootstrap estimate of the standard error of the -# Harrell-Davis estimator of the qth quantile. -# The default quantile is the median, q=.5 -# The default number of bootstrap samples is nboot=100 -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,hd,q) -hdseb<-sqrt(var(bvec)) -hdseb -} - -mestseb<-function(x,nboot=1000,bend=1.28,SEED=TRUE){ -# -# Compute bootstrap estimate of the standard error of the -# M-estimators with Huber's Psi. -# The default percentage bend is bend=1.28 -# The default number of bootstrap samples is nboot=100 -# -if(SEED)set.seed(1) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,mest,bend=bend) -mestseb<-sqrt(var(bvec)) -mestseb -} - -onestep<-function(x,bend=1.28,na.rm=FALSE,MED=TRUE){ -# -# Compute one-step M-estimator of location using Huber's Psi. -# The default bending constant is 1.28 -# -# MED=TRUE: initial estimate is the median -# Otherwise use modified one-step M-estimator -# -if(na.rm)x<-x[!is.na(x)] -if(MED)init.loc=median(x) -if(!MED)init.loc=mom(x,bend=bend) -y<-(x-init.loc)/mad(x) #mad in splus is madn in the book. -A<-sum(hpsi(y,bend)) -B<-length(x[abs(y)<=bend]) -onestep<-median(x)+mad(x)*A/B -onestep -} - - -trimse<-function(x,tr=.2,na.rm=FALSE){ -# -# Estimate the standard error of the gamma trimmed mean -# The default amount of trimming is tr=.2. -# -if(na.rm)x<-x[!is.na(x)] -trimse<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x))) -trimse -} - -winvar<-function(x,tr=.2,na.rm=FALSE,STAND=NULL){ -# -# Compute the gamma Winsorized variance for the data in the vector x. -# tr is the amount of Winsorization which defaults to .2. -# -remx=x -x<-x[!is.na(x)] -y<-sort(x) -n<-length(x) -ibot<-floor(tr*n)+1 -itop<-length(x)-ibot+1 -xbot<-y[ibot] -xtop<-y[itop] -y<-ifelse(y<=xbot,xbot,y) -y<-ifelse(y>=xtop,xtop,y) -wv<-var(y) -if(!na.rm)if(sum(is.na(remx)>0))wv=NA -wv -} - -mest<-function(x,bend=1.28,na.rm=FALSE){ -# -# Compute M-estimator of location using Huber's Psi. -# The default bending constant is 1.28 -# -if(na.rm)x<-x[!is.na(x)] -if(mad(x)==0)stop("MAD=0. The M-estimator cannot be computed.") -y<-(x-median(x))/mad(x) #mad in splus is madn in the book. -A<-sum(hpsi(y,bend)) -B<-length(x[abs(y)<=bend]) -mest<-median(x)+mad(x)*A/B -repeat{ -y<-(x-mest)/mad(x) -A<-sum(hpsi(y,bend)) -B<-length(x[abs(y)<=bend]) -newmest<-mest+mad(x)*A/B -if(abs(newmest-mest) <.0001)break -mest<-newmest -} -mest -} - - -hpsi<-function(x,bend=1.28){ -# -# Evaluate Huber`s Psi function for each value in the vector x -# The bending constant defaults to 1.28. -# -hpsi<-ifelse(abs(x)<=bend,x,bend*sign(x)) -hpsi -} - -hdci<-function(x,q=.5,alpha=.05,nboot=100,SEED=TRUE,pr=TRUE){ -# -# Compute a 1-alpha confidence for qth quantile using the -# Harrell-Davis estimator in conjunction with the -# bootstrap estimate of the standard error. -# -# The default quantile is .5. -# The default value for alpha is .05. -# -if(alpha!=.05)stop("Use the function qcipb. Generally works well even when alpha is not equal to .05") -x=elimna(x) -if(pr){ -if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb") -} -se<-hdseb(x,q,nboot,SEED=SEED) -crit<-.5064/(length(x)^(.25))+1.96 -if(q<=.2 || q>=.8){ -if(length(x) <=20)crit<-(-6.23)/length(x)+5.01 -} -if(q<=.1 || q>=.9){ -if(length(x) <=40)crit<-36.2/length(x)+1.31 -} -if(length(x)<=10){ -print("The number of observations is less than 11.") -print("Accurate critical values have not been determined for this case.") -} -low<-hd(x,q)-crit*se -hi<-hd(x,q)+crit*se -list(ci=c(low,hi),crit=crit,se=se) -} - -mestci<-function(x,alpha=.05,nboot=4000,bend=1.28,os=FALSE,pr=TRUE){ -# -# Compute a bootstrap, .95 confidence interval for the -# M-estimator of location based on Huber's Psi. -# The default percentage bend is bend=1.28 -# The default number of bootstrap samples is nboot=4000 -# -# By default, the fully iterated M-estimator is used. To use the -# one-step M-estimator instead, set os=TRUE -# -os<-as.logical(os) -if(pr){ -if(length(x) <=19) -print("The number of observations is less than 20.") -print("This function might fail due to division by zero,") -print("which in turn causes an error in function hpsi") -print("having to do with a missing value.") -} -set.seed(1) # set seed of random number generator so that -# results can be duplicated. -if(pr)print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -if(!os)bvec<-apply(data,1,mest,bend) -if(os)bvec<-apply(data,1,onestep,bend) -bvec<-sort(bvec) -low<-round((alpha/2)*nboot) -up<-nboot-low -low<-low+1 -list(ci=c(bvec[low],bvec[up])) -} - - -sint<-function(x,alpha=.05,pr=FALSE){ -# -# Compute a 1-alpha confidence interval for the median using -# the Hettmansperger-Sheather interpolation method. -# -# The default value for alpha is .05. -# -x=elimna(x) -if(pr){ -if(sum(duplicated(x)>0))print("Duplicate values detected; hdpb might have more power") -} -k<-qbinom(alpha/2,length(x),.5) -gk<-pbinom(length(x)-k,length(x),.5)-pbinom(k-1,length(x),.5) -if(gk >= 1-alpha){ -gkp1<-pbinom(length(x)-k-1,length(x),.5)-pbinom(k,length(x),.5) -kp<-k+1 -} -if(gk < 1-alpha){ -k<-k-1 -gk<-pbinom(length(x)-k,length(x),.5)-pbinom(k-1,length(x),.5) -gkp1<-pbinom(length(x)-k-1,length(x),.5)-pbinom(k,length(x),.5) -kp<-k+1 -} -xsort<-sort(x) -nmk<-length(x)-k -nmkp<-nmk+1 -ival<-(gk-1+alpha)/(gk-gkp1) -lam<-((length(x)-k)*ival)/(k+(length(x)-2*k)*ival) -low<-lam*xsort[kp]+(1-lam)*xsort[k] -hi<-lam*xsort[nmk]+(1-lam)*xsort[nmkp] -sint<-c(low,hi) -sint -} - - - -b2ci<-function(x,y,alpha=.05,nboot=2000,est=bivar,SEED=TRUE,...){ -# -# Compute a bootstrap confidence interval for the -# the difference between any two parameters corresponding to -# independent groups. -# By default, biweight midvariances are compared. -# Setting est=mean, for example, will result in a percentile -# bootstrap confidence interval for the difference between means. -# The default number of bootstrap samples is nboot=399 -# -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -e1=est(x) -e2=est(y) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvecx<-apply(datax,1,est,...) -bvecy<-apply(datay,1,est,...) -bvec<-sort(bvecx-bvecy) -low <- round((alpha/2) * nboot) + 1 -up <- nboot - low -temp <- sum(bvec < 0)/nboot + sum(bvec == 0)/(2 * nboot) -sig.level <- 2 * (min(temp, 1 - temp)) -list(est1=e1,est2=e2,ratio=e1/e2,ci = c(bvec[low], bvec[up]), p.value = sig.level) -} - -ecdf<-function(x,val){ -# compute empirical cdf for data in x evaluated at val -# That is, estimate P(X <= val) -# -ecdf<-length(x[x<=val])/length(x) -ecdf -} - -kswsig<-function(m,n,val){ -# -# Compute significance level of the weighted -# Kolmogorov-Smirnov test statistic -# -# m=sample size of first group -# n=sample size of second group -# val=observed value of test statistic -# -mpn<-m+n -cmat<-matrix(0,m+1,n+1) -umat<-matrix(0,m+1,n+1) -for (i in 1:m-1){ -for (j in 1:n-1)cmat[i+1,j+1]<-abs(i/m-j/n)*sqrt(m*n/((i+j)*(1-(i+j)/mpn))) -} -cmat<-ifelse(cmat<=val,1,0) -for (i in 0:m){ -for (j in 0:n)if(i*j==0)umat[i+1,j+1]<-cmat[i+1,j+1] -else umat[i+1,j+1]<-cmat[i+1,j+1]*(umat[i+1,j]+umat[i,j+1]) -} -term<-lgamma(m+n+1)-lgamma(m+1)-lgamma(n+1) -kswsig<-1.-umat[m+1,n+1]/exp(term) -kswsig -} - - -binomci<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05){ -# Compute a 1-alpha confidence interval for p, the probability of -# success for a binomial distribution, using Pratt's method -# -# y is a vector of 1s and 0s. -# x is the number of successes observed among n trials -# -if(!is.null(y)){ -y=elimna(y) -nn=length(y) -} -if(nn==1)stop("Something is wrong: number of observations is only 1") -n<-nn -if(x!=n && x!=0){ -z<-qnorm(1-alpha/2) -A<-((x+1)/(n-x))^2 -B<-81*(x+1)*(n-x)-9*n-8 -C<-(0-3)*z*sqrt(9*(x+1)*(n-x)*(9*n+5-z^2)+n+1) -D<-81*(x+1)^2-9*(x+1)*(2+z^2)+1 -E<-1+A*((B+C)/D)^3 -upper<-1/E -A<-(x/(n-x-1))^2 -B<-81*x*(n-x-1)-9*n-8 -C<-3*z*sqrt(9*x*(n-x-1)*(9*n+5-z^2)+n+1) -D<-81*x^2-9*x*(2+z^2)+1 -E<-1+A*((B+C)/D)^3 -lower<-1/E -} -if(x==0){ -lower<-0 -upper<-1-alpha^(1/n) -} -if(x==1){ -upper<-1-(alpha/2)^(1/n) -lower<-1-(1-alpha/2)^(1/n) -} -if(x==n-1){ -lower<-(alpha/2)^(1/n) -upper<-(1-alpha/2)^(1/n) -} -if(x==n){ -lower<-alpha^(1/n) -upper<-1 -} -phat<-x/n -list(phat=phat,ci=c(lower,upper),n=n) -} - - - -kssig<-function(m,n,val){ -# -# Compute significance level of the Kolmogorov-Smirnov test statistic -# m=sample size of first group -# n=sample size of second group -# val=observed value of test statistic -# -cmat<-matrix(0,m+1,n+1) -umat<-matrix(0,m+1,n+1) -for (i in 0:m){ -for (j in 0:n)cmat[i+1,j+1]<-abs(i/m-j/n) -} -cmat<-ifelse(cmat<=val,1e0,0e0) -for (i in 0:m){ -for (j in 0:n)if(i*j==0)umat[i+1,j+1]<-cmat[i+1,j+1] -else umat[i+1,j+1]<-cmat[i+1,j+1]*(umat[i+1,j]+umat[i,j+1]) -} -term<-lgamma(m+n+1)-lgamma(m+1)-lgamma(n+1) -kssig<-1.-umat[m+1,n+1]/exp(term) -kssig=max(0,kssig) -kssig -} - -meemul<-function(x,alpha=.05){ -# -# Perform Mee's method for all pairs of J independent groups. -# The familywise type I error probability is controlled by using -# a critical value from the Studentized maximum modulus distribution. -# -# The data are assumed to be stored in $x$ in list mode. -# Length(x) is assumed to correspond to the total number of groups, J -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# The default value for alpha is .05. Any other value results in using -# alpha=.01. -# -if(!is.list(x))stop("Data must be stored in list mode.") -J<-length(x) -CC<-(J^2-J)/2 -test<-matrix(NA,CC,5) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -} -dimnames(test)<-list(NULL,c("Group","Group","phat","ci.lower","ci.upper")) -jcom<-0 -crit<-smmcrit(200,CC) -if(alpha!=.05)crit<-smmcrit01(200,CC) -alpha<-1-pnorm(crit) -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -temp<-mee(x[[j]],x[[k]],alpha) -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-k -test[jcom,3]<-temp$phat -test[jcom,4]<-temp$ci[1] -test[jcom,5]<-temp$ci[2] -}}} -list(test=test) -} - -tsub<-function(isub,x,y,tr){ -# -# Compute test statistic for trimmed means -# when comparing dependent groups. -# By default, 20% trimmed means are used. -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by ydbt -# -tsub<-yuend(x[isub],y[isub],tr=tr)$teststat -tsub -} - -deciles<-function(x,HD=TRUE,type=7){ -# -# Estimate the deciles for the data in vector x -# HD=TRUE: use the Harrell-Davis estimate of the qth quantile -# HD=FALSE:use R function quantile -# -x=elimna(x) -if(HD){ -xs<-sort(x) -n<-length(x) -vecx<-seq(along=x) -xq<-0 -for (i in 1:9){ -q<-i/10 -m1<-(n+1)*q -m2<-(n+1)*(1-q) -wx<-pbeta(vecx/n,m1,m2)-pbeta((vecx-1)/n,m1,m2) # W sub i values -xq[i]<-sum(wx*xs) -}} -if(!HD){ -pts=seq(.1,.9,.1) -xq=quantile(x,probs=pts,type=type) -} -xq -} - - -kstiesig<-function(x,y,val){ -# -# Compute significance level of the Kolmogorov-Smirnov test statistic -# for the data in x and y. -# This function allows ties among the values. -# val=observed value of test statistic -# -m<-length(x) -n<-length(y) -z<-c(x,y) -z<-sort(z) -cmat<-matrix(0,m+1,n+1) -umat<-matrix(0,m+1,n+1) -for (i in 0:m){ -for (j in 0:n){ -if(abs(i/m-j/n)<=val)cmat[i+1,j+1]<-1e0 -k<-i+j -if(k > 0 && k.25)print("Warning: with tr>.25 type I error control might be poor") -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -h1<-length(x)-2*floor(tr*length(x)) -h2<-length(y)-2*floor(tr*length(y)) -q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) -q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) -df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) -crit<-qt(1-alpha/2,df) -dif<-mean(x,tr)-mean(y,tr) -low<-dif-crit*sqrt(q1+q2) -up<-dif+crit*sqrt(q1+q2) -test<-abs(dif/sqrt(q1+q2)) -yuen<-2*(1-pt(test,df)) -list(n1=length(x),n2=length(y),est.1=mean(x,tr),est.2=mean(y,tr),ci=c(low,up),p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test,crit=crit,df=df) -} - -shifthd<-function(x,y,nboot=200,plotit=TRUE,plotop=FALSE,SEED=TRUE){ -# -# Compute confidence intervals for the difference between deciles -# of two independent groups. The simultaneous probability coverage is .95. -# The Harrell-Davis estimate of the qth quantile is used. -# The default number of bootstrap samples is nboot=200 -# -# The results are stored and returned in a 9 by 3 matrix, -# the ith row corresponding to the i/10 quantile. -# The first column is the lower end of the confidence interval. -# The second column is the upper end. -# The third column is the estimated difference between the deciles -# (second group minus first). -# -plotit<-as.logical(plotit) -x<-x[!is.na(x)] -y<-y[!is.na(y)] -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -crit<-80.1/(min(length(x),length(y)))^2+2.73 -m<-matrix(0,9,3) -for (i in 1:9){ -q<-i/10 -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,hd,q) -sex<-var(bvec) -data<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,hd,q) -sey<-var(bvec) -dif<-hd(y,q)-hd(x,q) -m[i,3]<-dif -m[i,1]<-dif-crit*sqrt(sex+sey) -m[i,2]<-dif+crit*sqrt(sex+sey) -} -dimnames(m)<-list(NULL,c("ci.lower","ci.upper","Delta.hat")) -if(plotit){ -if(plotop){ -xaxis<-c(1:9)/10 -xaxis<-c(xaxis,xaxis) -} -if(!plotop)xaxis<-c(deciles(x),deciles(x)) -par(pch="+") -yaxis<-c(m[,1],m[,2]) -if(!plotop)plot(xaxis,yaxis,ylab="delta",xlab="x (first group)") -if(plotop)plot(xaxis,yaxis,ylab="delta",xlab="Deciles") -par(pch="*") -if(!plotop)points(deciles(x),m[,3]) -if(plotop)points(c(1:9)/10,m[,3]) -} -m -} - -shiftdhd<-function(x,y,nboot=200,plotit=TRUE,plotop=FALSE,SEED=TRUE,pr=TRUE,xlab='x (first group)', -ylab='Delta'){ -# -# Compute confidence intervals for the difference between deciles -# of two dependent groups. The simultaneous probability coverage is .95. -# The Harrell-Davis estimate of the qth quantile is used. -# The default number of bootstrap samples is nboot=200 -# -# The results are stored and returned in a 9 by 4 matrix, -# the ith row corresponding to the i/10 quantile. -# The first column is the lower end of the confidence interval. -# The second column is the upper end. -# The third column is the estimated difference between the deciles -# (second group minus first). -# The fourth column contains the estimated standard error. -# -# No missing values are allowed. -# -if(pr){ -print("NOTE: if the goal is to use an alpha value different from .05") -print("use the function qcomdhd or qdec2ci") -} -xy=elimna(cbind(x,y)) -x=xy[,1] -y=xy[,2] -plotit<-as.logical(plotit) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -crit<-37/length(x)^(1.4)+2.75 -if(pr)print("The approximate .05 critical value is") -if(pr)print(crit) -m<-matrix(0,9,6) -if(pr)print("Taking Bootstrap Samples. Please wait.") -data<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot) -xmat<-matrix(x[data],nrow=nboot,ncol=length(x)) -ymat<-matrix(y[data],nrow=nboot,ncol=length(x)) -for (i in 1:9){ -q<-i/10 -bvec<-apply(xmat,1,hd,q)-apply(ymat,1,hd,q) -se<-sqrt(var(bvec)) -dif<-hd(x,q)-hd(y,q) -m[i,1]=hd(x,q) -m[i,2]=hd(y,q) -m[i,3]<-dif -m[i,4]<-dif-crit*se -m[i,5]<-dif+crit*se -m[i,6]<-se -} -dimnames(m)<-list(NULL,c('est.1','est.2','est.dif','ci.lower','ci.upper','se')) -if(plotit){ -if(plotop){ -xaxis<-c(1:9)/10 -xaxis<-c(xaxis,xaxis) -} -if(!plotop)xaxis<-c(deciles(x),deciles(x)) -par(pch="+") -#yaxis<-c(m[,1],m[,2]) -yaxis<-c(m[,4],m[,5]) -if(!plotop)plot(xaxis,yaxis,ylab=ylab,xlab=xlab) -if(plotop)plot(xaxis,yaxis,ylab="delta",xlab="Deciles") -par(pch="*") -if(!plotop)points(deciles(x),m[,3]) -if(plotop)points(c(1:9)/10,m[,3]) -} -m -} - - -smmcrit<-function(nuhat,C){ -# -# Determine the .95 quantile of the C-variate Studentized maximum -# modulus distribution using linear interpolation on inverse -# degrees of freedom -# If C=1, this function returns the .975 quantile of Student's t -# distribution. -# -if(C-round(C)!=0)stop("The number of contrasts, C, must be an integer") -if(C>=29)stop("C must be less than or equal to 28") -if(C<=0)stop("C must be greater than or equal to 1") -if(nuhat<2)stop("The degrees of freedom must be greater than or equal to 2") -if(C==1)smmcrit<-qt(.975,nuhat) -if(C>=2){ -C<-C-1 -m1<-matrix(0,20,27) -m1[1,]<-c(5.57,6.34,6.89,7.31,7.65,7.93,8.17,8.83,8.57, -8.74,8.89,9.03,9.16,9.28,9.39,9.49,9.59, 9.68, -9.77,9.85,9.92,10.00,10.07,10.13,10.20,10.26,10.32) -m1[2,]<-c(3.96,4.43,4.76,5.02,5.23,5.41,5.56,5.69,5.81, -5.92,6.01,6.10,6.18,6.26,6.33,6.39,6.45,6.51, -6.57,6.62,6.67,6.71,6.76,6.80,6.84,6.88, 6.92) -m1[3,]<-c(3.38,3.74,4.01,4.20,4.37,4.50,4.62,4.72,4.82, -4.89,4.97,5.04,5.11,5.17,5.22,5.27,5.32, 5.37, -5.41,5.45,5.49,5.52,5.56,5.59,5.63,5.66,5.69) -m1[4,]<-c(3.09,3.39,3.62,3.79,3.93,4.04,4.14,4.23,4.31, -4.38,4.45,4.51,4.56,4.61,4.66,4.70,4.74,4.78, -4.82,4.85,4.89,4.92,4.95,4.98,5.00,5.03,5.06) -m1[5,]<-c(2.92,3.19,3.39,3.54,3.66,3.77,3.86,3.94,4.01, -4.07,4.13,4.18,4.23,4.28,4.32,4.36,4.39,4.43, -4.46,4.49,4.52,4.55,4.58,4.60,4.63,4.65,4.68) -m1[6,]<-c(2.80,3.06,3.24,3.38,3.49,3.59,3.67,3.74,3.80, -3.86,3.92,3.96,4.01,4.05,4.09,4.13,4.16,4.19, -4.22,4.25,4.28,4.31,4.33,4.35,4.38,4.39,4.42) -m1[7,]<-c(2.72,2.96,3.13,3.26,3.36,3.45,3.53,3.60,3.66, -3.71,3.76,3.81,3.85,3.89,3.93,3.96,3.99, 4.02, -4.05,4.08,4.10,4.13,4.15,4.18,4.19,4.22,4.24) -m1[8,]<-c(2.66,2.89,3.05,3.17,3.27,3.36,3.43,3.49,3.55, -3.60,3.65,3.69,3.73,3.77,3.80,3.84,3.87,3.89, -3.92,3.95,3.97,3.99,4.02,4.04,4.06,4.08,4.09) -m1[9,]<-c(2.61,2.83,2.98,3.10,3.19,3.28,3.35,3.41,3.47, -3.52,3.56,3.60,3.64,3.68,3.71,3.74,3.77,3.79, -3.82,3.85,3.87,3.89,3.91,3.94,3.95, 3.97,3.99) -m1[10,]<-c(2.57,2.78,2.93,3.05,3.14,3.22,3.29,3.35,3.40, -3.45,3.49,3.53,3.57,3.60,3.63,3.66,3.69,3.72, -3.74,3.77,3.79,3.81,3.83,3.85,3.87,3.89,3.91) -m1[11,]<-c(2.54,2.75,2.89,3.01,3.09,3.17,3.24,3.29,3.35, -3.39,3.43,3.47,3.51,3.54,3.57,3.60,3.63,3.65, -3.68,3.70,3.72,3.74,3.76,3.78,3.80,3.82,3.83) -m1[12,]<-c(2.49,2.69,2.83,2.94,3.02,3.09,3.16,3.21,3.26, -3.30,3.34,3.38,3.41,3.45,3.48,3.50,3.53,3.55, -3.58,3.59,3.62,3.64,3.66,3.68,3.69,3.71,3.73) -m1[13,]<-c(2.46,2.65,2.78,2.89,2.97,3.04,3.09,3.15,3.19, -3.24,3.28,3.31,3.35,3.38,3.40,3.43,3.46,3.48, -3.50,3.52,3.54,3.56,3.58,3.59,3.61,3.63,3.64) -m1[14,]<-c(2.43,2.62,2.75,2.85,2.93,2.99,3.05,3.11,3.15, -3.19,3.23,3.26,3.29,3.32,3.35,3.38,3.40,3.42, -3.44,3.46,3.48,3.50,3.52,3.54,3.55,3.57,3.58) -m1[15,]<-c(2.41,2.59,2.72,2.82,2.89,2.96,3.02,3.07,3.11, -3.15,3.19,3.22,3.25,3.28,3.31,3.33,3.36,3.38, -3.39,3.42,3.44,3.46,3.47,3.49,3.50,3.52,3.53) -m1[16,]<-c(2.38,2.56,2.68,2.77,2.85,2.91,2.97,3.02,3.06, -3.09,3.13,3.16,3.19,3.22,3.25,3.27,3.29,3.31, -3.33,3.35,3.37,3.39,3.40,3.42,3.43,3.45,3.46) -m1[17,]<-c(2.35,2.52,2.64,2.73,2.80,2.87,2.92,2.96,3.01, -3.04,3.07,3.11,3.13,3.16,3.18,3.21,3.23,3.25, -3.27,3.29,3.30,3.32,3.33,3.35,3.36,3.37,3.39) -m1[18,]<-c(2.32,2.49,2.60,2.69,2.76,2.82,2.87,2.91,2.95, -2.99,3.02,3.05,3.08,3.09,3.12,3.14,3.17, 3.18, -3.20,3.22,3.24,3.25,3.27,3.28,3.29,3.31,3.32) -m1[19,]<-c(2.29,2.45,2.56,2.65,2.72,2.77,2.82,2.86,2.90, -2.93,2.96,2.99,3.02,3.04,3.06,3.08,3.10, 3.12, -3.14,3.16,3.17,3.19,3.20,3.21,3.23,3.24,3.25) -m1[20,]<-c(2.24,2.39,2.49,2.57,2.63,2.68,2.73,2.77,2.79, -2.83,2.86,2.88,2.91,2.93,2.95,2.97,2.98, 3.01, -3.02,3.03,3.04,3.06,3.07,3.08,3.09,3.11,3.12) -if(nuhat>=200)smmcrit<-m1[20,C] -if(nuhat<200){ -nu<-c(2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,30,40,60,200) -temp<-abs(nu-nuhat) -find<-order(temp) -if(temp[find[1]]==0)smmcrit<-m1[find[1],C] -if(temp[find[1]]!=0){ -if(nuhat>nu[find[1]]){ -smmcrit<-m1[find[1],C]- -(1/nu[find[1]]-1/nuhat)*(m1[find[1],C]-m1[find[1]+1,C])/ -(1/nu[find[1]]-1/nu[find[1]+1]) -} -if(nuhat0)J<-length(grp) -nval<-1 -nrat<-1 -nmax<-0 -rbar<-1 -mrbar<-0 -for (j in grp){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] #Missing values are removed. -nrat[j]<-(length(temp)-1)/length(temp) -nval[j]<-length(temp) -if(j==grp[1])xall<-temp -if(j!=grp[1])xall<-c(xall,temp) -if(length(temp)>nmax)nmax<-length(temp) -} -pv<-array(NA,c(J,nmax,J)) -tv<-matrix(NA,J,nmax) -rv<-matrix(0,J,nmax) -for (i in 1:J){ -data<-x[[i]] -data<-data[!is.na(data)] -for (j in 1:length(data)){ -tempr<-data[j]-xall -rv[i,j]<-length(tempr[tempr>=0]) -for (l in 1:J){ -templ<-x[[l]] -templ<-templ[!is.na(templ)] -temp<-data[j]-templ -pv[i,j,l]<-length(temp[temp>=0]) -} -tv[i,j]<-sum(pv[i,j,])-pv[i,j,i] -} -rbar[i]<-sum(rv[i,])/nval[i] -mrbar<-mrbar+sum(rv[i,]) -} -amat<-matrix(0,J,J) -for(i in 1:J){ -temptv<-tv[i,] -temptv<-temptv[!is.na(temptv)] -amat[i,i]<-(length(temptv)-1)*var(temptv) -for (l in 1:J){ -tempp<-pv[l,,i] -tempp<-tempp[!is.na(tempp)] -if(l!=i){ -amat[i,i]<-amat[i,i]+(length(tempp)-1)*var(tempp) -}} -for (j in 1:J){ -if(j>i){ -for (l in 1:J){ -temp1<-pv[l,,i] -temp2<-pv[l,,j] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -#if(i!=l && l!=j)amat[i,j]<-(length(temp1)-1)*var(temp1,temp2) -if(i!=l && l!=j)amat[i,j]<-amat[i,j]+(length(temp1)-1)*var(temp1,temp2) -} -temp1<-pv[i,,j] -temp2<-tv[i,] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -amat[i,j]<-amat[i,j]-(length(temp1)-1)*var(temp1,temp2) -temp1<-pv[j,,i] -temp2<-tv[j,] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -amat[i,j]<-amat[i,j]-(length(temp1)-1)*var(temp1,temp2) -} -amat[j,i]<-amat[i,j] -}} -N<-sum(nval) -amat<-amat/N^3 -amati<-ginv(amat) -uvec<-1 -mrbar<-mrbar/N -for (i in 1:J)uvec[i]<-nval[i]*(rbar[i]-mrbar)/(N*(N+1)) -testv<-N*prod(nrat)*uvec%*%amati%*%uvec -test<-testv[1,1] -df<-J-1 -siglevel<-1-pchisq(test,df) -list(test=test,p.value=siglevel,df=df) -} - -apanova<-function(data,grp=0){ -# -# Perform Agresti-Pendergast rank test for J dependent groups -# The data are assumed to be stored in an n by J matrix or -# in list mode. In the latter case, length(data)=J. -# -if(is.list(data)){ -x<-matrix(0,length(data[[1]]),length(data)) -for (j in 1:length(data))x[,j]<-data[[j]] -} -if(is.matrix(data))x<-data -if(sum(grp==0))grp<-c(1:ncol(x)) -x<-x[,grp] -J<-ncol(x) -n<-nrow(x) -if(n<=20)print("With n<=20, suggest using bprm") -rm<-matrix(rank(x),n,J) -rv<-apply(rm,2,mean) -sm<-(n-1)*winall(rm,tr=0)$cov/(n-J+1) -jm1<-J-1 -cv<-diag(1,jm1,J) -for (i in 2:J){ -k<-i-1 -cv[k,i]<--1 -} -cr<-cv%*%rv -ftest<-n*t(cr)%*%solve(cv%*%sm%*%t(cv))%*%cr/(J-1) -df1<-J-1 -df2<-(J-1)*(n-1) -siglevel<-1-pf(ftest,df1,df2) -list(FTEST=ftest,df1=df1,df2=df2,p.value=siglevel) -} -box1way<-function(x,tr=.2,grp=c(1:length(x))){ -# -# A heteroscedastic one-way ANOVA for trimmed means -# using a generalization of Box's method. -# -# The data are assumed to be stored in $x$ in list mode. -# Length(x) is assumed to correspond to the total number of groups. -# By default, the null hypothesis is that all groups have a common mean. -# To compare a subset of the groups, use grp to indicate which -# groups are to be compared. For example, if you type the -# command grp<-c(1,3,4), and then execute this function, groups -# 1, 3, and 4 will be compared with the remaining groups ignored. -# -# Missing values are automatically removed. -# -J<-length(grp) # The number of groups to be compared -print("The number of groups to be compared is") -print(J) -h<-vector("numeric",J) -w<-vector("numeric",J) -xbar<-vector("numeric",J) -svec<-vector("numeric",J) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) - # h is the number of observations in the jth group after trimming. -svec[j]<-((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr))/(h[j]-1) -xbar[j]<-mean(x[[grp[j]]],tr) -} -xtil<-sum(h*xbar)/sum(h) -fval<-h/sum(h) -TEST<-sum(h*(xbar-xtil)^2)/sum((1-fval)*svec) -nu1<-sum((1-fval)*svec) -nu1<-nu1^2/((sum(svec*fval))^2+sum(svec^2*(1-2*fval))) -nu2<-(sum((1-fval)*svec))^2/sum(svec^2*(1-fval)^2/(h-1)) -sig<-1-pf(TEST,nu1,nu2) -list(TEST=TEST,nu1=nu1,nu2=nu2,p.value=sig) -} - - - -pairdepb<-function(x,tr=.2,alpha=.05,grp=0,nboot=599){ -# -# Using the percentile t bootstrap method, -# compute a .95 confidence interval for all pairwise differences between -# the trimmed means of dependent groups. -# By default, 20% trimming is used with B=599 bootstrap samples. -# -# x can be an n by J matrix or it can have list mode -# -if(is.data.frame(x)) x <- as.matrix(x) -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -if(sum(grp)==0)grp<-c(1:length(x)) -# put the data in an n by J matrix -mat<-matrix(0,length(x[[1]]),length(grp)) -for (j in 1:length(grp))mat[,j]<-x[[grp[j]]] -} -if(is.matrix(x)){ -if(sum(grp)==0)grp<-c(1:ncol(x)) -mat<-x[,grp] -} -if(sum(is.na(mat)>=1))stop("Missing values are not allowed.") -J<-ncol(mat) -connum<-(J^2-J)/2 -bvec<-matrix(0,connum,nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot) -xcen<-matrix(0,nrow(mat),ncol(mat)) -for (j in 1:J)xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data -it<-0 -for (j in 1:J){ -for (k in 1:J){ -if(j=2)kron<-rbind(kron,m3) -} -kron -} - -rmanova<-function(x,tr=.2,grp=c(1:length(x))){ -# -# A heteroscedastic one-way repeated measures ANOVA for trimmed means. -# -# The data are assumed to be stored in $x$ which can -# be either an n by J matrix, or an R variable having list mode. -# If the data are stored in list mode, -# length(x) is assumed to correspond to the total number of groups. -# By default, the null hypothesis is that all group have a common mean. -# To compare a subset of the groups, use grp to indicate which -# groups are to be compared. For example, if you type the -# command grp<-c(1,3,4), and then execute this function, groups -# 1, 3, and 4 will be compared with the remaining groups ignored. -# -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -J<-length(grp) # The number of groups to be compared -#print("The number of groups to be compared is") -#print(J) -m1<-matrix(x[[grp[1]]],length(x[[grp[1]]]),1) -for(i in 2:J){ # Put the data into an n by J matrix -m2<-matrix(x[[grp[i]]],length(x[[i]]),1) -m1<-cbind(m1,m2) -} -} -if(is.matrix(x)){ -if(length(grp)=ncol(x))m1<-as.matrix(x) -J<-ncol(x) -#print("The number of groups to be compared is") -#print(J) -} -# -# Raw data are now in the matrix m1 -# -m2<-matrix(0,nrow(m1),ncol(m1)) -xvec<-1 -g<-floor(tr*nrow(m1)) #2g is the number of observations trimmed. -for(j in 1:ncol(m1)){ # Putting Winsorized values in m2 -m2[,j]<-winval(m1[,j],tr) -xvec[j]<-mean(m1[,j],tr) -} -xbar<-mean(xvec) -qc<-(nrow(m1)-2*g)*sum((xvec-xbar)^2) -m3<-matrix(0,nrow(m1),ncol(m1)) -m3<-sweep(m2,1,apply(m2,1,mean)) # Sweep out rows -m3<-sweep(m3,2,apply(m2,2,mean)) # Sweep out columns -m3<-m3+mean(m2) # Grand Winsorized mean swept in -qe<-sum(m3^2) -test<-(qc/(qe/(nrow(m1)-2*g-1))) -# -# Next, estimate the adjusted degrees of freedom -# -v<-winall(m1,tr=tr)$cov -vbar<-mean(v) -vbard<-mean(diag(v)) -vbarj<-1 -for(j in 1:J){ -vbarj[j]<-mean(v[j,]) -} -A<-J*J*(vbard-vbar)^2/(J-1) -B<-sum(v*v)-2*J*sum(vbarj^2)+J*J*vbar^2 -ehat<-A/B -etil<-(nrow(m2)*(J-1)*ehat-2)/((J-1)*(nrow(m2)-1-(J-1)*ehat)) -etil<-min(1.,etil) -df1<-(J-1)*etil -df2<-(J-1)*etil*(nrow(m2)-2*g-1) -siglevel<-1-pf(test,df1,df2) -list(num.groups=J,test=test,df=c(df1,df2),p.value=siglevel,tmeans=xvec,ehat=ehat,etil=etil) -} - - - -trimpartt<-function(x,con){ -# -# This function is used by other functions described in chapter 6. -# -trimpartt<-sum(con*x) -trimpartt -} - -bptdmean<-function(isub,x,tr){ -# -# Compute trimmed means -# when comparing dependent groups. -# By default, 20% trimmed means are used. -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by bptd. -# -bptdmean<-mean(x[isub],tr) -bptdmean -} - - -bptdpsi<-function(x,con){ -# Used by bptd to compute bootstrap psihat values -# -bptdpsi<-sum(con*x) -bptdpsi -} -bptdsub<-function(isub,x,tr,con){ -# -# Compute test statistic for trimmed means -# when comparing dependent groups. -# By default, 20% trimmed means are used. -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# con is a J by c matrix. The cth column contains -# a vector of contrast coefficients. -# -# This function is used by bptd. -# -h1 <- nrow(x) - 2 * floor(tr * nrow(x)) -se<-0 -for(j in 1:ncol(x)){ -for(k in 1:ncol(x)){ -djk<-(nrow(x) - 1) * wincor(x[isub,j],x[isub,k], tr)$cov -se<-se+con[j]*con[k]*djk -} -} -se/(h1*(h1-1)) -} - -selby2<-function(m,grpc,coln=NA){ -# Create categories according to the grpc[1] and grpc[2] columns -# of the matrix m. The function puts the values in column coln into -# a vector having list mode. -# -if(is.na(coln))stop("The argument coln is not specified") -if(length(grpc)>4)stop("The argument grpc must have length less than or equal to 4") -x<-vector("list") -ic<-0 -if(length(grpc)==2){ -cat1<-selby(m,grpc[1],coln)$grpn -cat2<-selby(m,grpc[2],coln)$grpn -for (i1 in 1:length(cat1)){ -for (i2 in 1:length(cat2)){ -temp<-NA -it<-0 -for (i in 1:nrow(m)){ -if(sum(m[i,c(grpc[1],grpc[2])]==c(cat1[i1],cat2[i2]))==2){ -it<-it+1 -temp[it]<-m[i,coln] -} -} -if(!is.na(temp[1])){ -ic<-ic+1 -x[[ic]]<-temp -if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2]),1,2) -if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2])) -} -}} -} -if(length(grpc)==3){ -cat1<-selby(m,grpc[1],coln)$grpn -cat2<-selby(m,grpc[2],coln)$grpn -cat3<-selby(m,grpc[3],coln)$grpn -x<-vector("list") -ic<-0 -for (i1 in 1:length(cat1)){ -for (i2 in 1:length(cat2)){ -for (i3 in 1:length(cat3)){ -temp<-NA -it<-0 -for (i in 1:nrow(m)){ -if(sum(m[i,c(grpc[1],grpc[2],grpc[3])]==c(cat1[i1],cat2[i2],cat3[i3]))==3){ -it<-it+1 -temp[it]<-m[i,coln] -}} -if(!is.na(temp[1])){ -ic<-ic+1 -x[[ic]]<-temp -if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3]),1,3) -if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3])) -}}}} -} -if(length(grpc)==4){ -cat1<-selby(m,grpc[1],coln)$grpn -cat2<-selby(m,grpc[2],coln)$grpn -cat3<-selby(m,grpc[3],coln)$grpn -cat4<-selby(m,grpc[4],coln)$grpn -x<-vector("list") -ic<-0 -for (i1 in 1:length(cat1)){ -for (i2 in 1:length(cat2)){ -for (i3 in 1:length(cat3)){ -for (i4 in 1:length(cat4)){ -temp<-NA -it<-0 -for (i in 1:nrow(m)){ -if(sum(m[i,c(grpc[1],grpc[2],grpc[3],grpc[4])]==c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]))==4){ -it<-it+1 -temp[it]<-m[i,coln] -}} -if(!is.na(temp[1])){ -ic<-ic+1 -x[[ic]]<-temp -if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]),1,4) -if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3],cat4[i4])) -}}}}} -} -list(x=x,grpn=grpn) -} - - -lindmsub<-function(isub,x,est,...){ -# -# isub is a vector of length n containing integers between -# randomly sampled with replacement from 1,...,n. -# -# Used by lindm to convert an n by B matrix of bootstrap values, -# randomly sampled from 1, ..., n, with replacement, to a -# J by B matrix of measures of location. -# -# -lindmsub<-est(x[isub],...) -lindmsub -} -lindm<-function(x,con=0,est=onestep,grp=0,alpha=.05,nboot=999,...){ -# -# Compute a 1-alpha confidence interval for a set of d linear contrasts -# involving M-estimators associated with the marginal distributions -# using a bootstrap method. -# Dependent groups are assumed. -# -# The data are assumed to be stored in x in list mode. Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# -# con is a J by d matrix containing the contrast coefficents of interest. -# If unspecified, all pairwise comparisons are performed. -# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first two trimmed means is -# equal to the sum of the second two, and (2) the difference between -# the first two is equal to the difference between the trimmed means of -# groups 5 and 6. -# -# The default number of bootstrap samples is nboot=399 -# -# This function uses the function trimpartt written for this -# book. -# -# -# -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -if(sum(grp)==0)grp<-c(1:length(x)) -# put the data in an n by J matrix -mat<-matrix(0,length(x[[1]]),length(grp)) -for (j in 1:length(grp))mat[,j]<-x[[grp[j]]] -} -if(is.matrix(x)){ -if(sum(grp)==0)grp<-c(1:ncol(x)) -mat<-x[,grp] -} -mat<-elimna(mat) -J<-ncol(mat) -Jm<-J-1 -d<-(J^2-J)/2 -if(sum(con^2)==0){ -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -if(nrow(con)!=ncol(mat))stop("The number of groups does not match the number of contrast coefficients.") -m1<-matrix(0,J,nboot) -m2<-1 # Initialize m2 -mval<-1 -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot) -# data is B by n matrix -xcen<-matrix(0,nrow(mat),ncol(mat)) #An n by J matrix -for (j in 1:J){xcen[,j]<-mat[,j]-est(mat[,j],...) #Center data -mval[j]<-est(mat[,j],...) -} -for (j in 1:J)m1[j,]<-apply(data,1,lindmsub,xcen[,j],est,...) # A J by nboot matrix. -m2<-var(t(m1)) # A J by J covariance matrix corresponding to the nboot values. -boot<-matrix(0,ncol(con),nboot) -bot<-1 -for (d in 1:ncol(con)){ -top<-apply(m1,2,trimpartt,con[,d]) -# A vector of length nboot containing psi hat values -consq<-con[,d]^2 -bot[d]<-trimpartt(diag(m2),consq) -for (j1 in 1:J){ -for (j2 in 1:J){ -if(j1=29)stop("C must be less than or equal to 28") -if(C<=0)stop("C must be greater than or equal to 1") -if(nuhat<2)stop("The degrees of freedom must be greater than or equal to 2") -if(C==1)smmcrit01<-qt(.995,nuhat) -if(C>=2){ -C<-C-1 -m1<-matrix(0,20,27) -m1[1,]<-c(12.73,14.44,15.65,16.59,17.35,17.99,18.53,19.01,19.43, -19.81,20.15,20.46,20.75,20.99,20.99,20.99,20.99,20.99, -22.11,22.29,22.46,22.63,22.78,22.93,23.08,23.21,23.35) -m1[2,]<-c(7.13,7.91,8.48,8.92,9.28,9.58,9.84,10.06,10.27, -10.45,10.61,10.76,10.90,11.03,11.15,11.26,11.37,11.47, -11.56,11.65,11.74,11.82,11.89,11.97,12.07,12.11,12.17) -m1[3,]<-c(5.46,5.99,6.36,6.66,6.89,7.09,7.27,7.43,7.57, -7.69,7.80,7.91,8.01,8.09,8.17,8.25,8.32,8.39, -8.45,8.51,8.57,8.63,8.68,8.73,8.78,8.83,8.87) -m1[4,]<-c(4.70,5.11,5.39,5.63,5.81,5.97,6.11,6.23,6.33, -6.43,6.52,6.59,6.67,6.74,6.81,6.87,6.93,6.98, -7.03,7.08,7.13,7.17,7.21,7.25,7.29,7.33,7.36) -m1[5,]<-c(4.27,4.61,4.85,5.05,5.20,5.33,5.45,5.55,5.64, -5.72,5.79,5.86,5.93,5.99,6.04,6.09,6.14,6.18, -6.23,6.27,6.31,6.34,6.38,6.41,6.45,6.48,6.51) -m1[6,]<-c(3.99,4.29,4.51,4.68,4.81,4.93,5.03,5.12,5.19, -5.27,5.33,5.39,5.45,5.50,5.55,5.59,5.64,5.68, -5.72,5.75,5.79,5.82,5.85,5.88,5.91,5.94,5.96) -m1[7,]<-c(3.81,4.08,4.27,4.42,4.55,4.65,4.74,4.82,4.89, -4.96,5.02,5.07,5.12,5.17,5.21,5.25,5.29, 5.33, -5.36,5.39,5.43,5.45,5.48,5.51,5.54,5.56,5.59) -m1[8,]<-c(3.67,3.92,4.10,4.24,4.35,4.45,4.53,4.61,4.67, -4.73,4.79,4.84,4.88,4.92,4.96,5.01,5.04,5.07, -5.10,5.13,5.16,5.19,5.21,5.24,5.26,5.29,5.31) -m1[9,]<-c(3.57,3.80,3.97,4.09,4.20,4.29,4.37,4.44,4.50, -4.56,4.61,4.66,4.69,4.74,4.78,4.81,4.84,4.88, -4.91,4.93,4.96,4.99,5.01,5.03,5.06,5.08,5.09) -m1[10,]<-c(3.48,3.71,3.87,3.99,4.09,4.17,4.25,4.31,4.37, -4.42,4.47,4.51,4.55,4.59,4.63,4.66,4.69,4.72, -4.75,4.78,4.80,4.83,4.85,4.87,4.89,4.91,4.93) -m1[11,]<-c(3.42,3.63,3.78,3.89,.99,4.08,4.15,4.21,4.26, -4.31,4.36,4.40,4.44,4.48,4.51,4.54,4.57,4.59, -4.62,4.65,4.67,4.69,4.72,4.74,4.76,4.78,4.79) -m1[12,]<-c(3.32,3.52,3.66,3.77,3.85,3.93,3.99,.05,4.10, -4.15,4.19,4.23,4.26,4.29,4.33,4.36,4.39,4.41, -4.44,4.46,4.48,4.50,4.52,4.54,4.56,4.58,4.59) -m1[13,]<-c(3.25,3.43,3.57,3.67,3.75,3.82,3.88,3.94,3.99, -4.03,4.07,4.11,4.14,4.17,4.19,4.23,4.25,4.28, -4.29,4.32,4.34,4.36,4.38,4.39,4.42,4.43,4.45) -m1[14,]<-c(3.19,3.37,3.49,3.59,3.68,3.74,3.80,3.85,3.89, -3.94,3.98,4.01,4.04,4.07,4.10,4.13,4.15,4.18, -4.19,4.22,4.24,4.26,4.28,4.29,4.31,4.33,4.34) -m1[15,]<-c(3.15,3.32,3.45,3.54,3.62,3.68,3.74,3.79,3.83, -3.87,3.91,3.94,3.97,3.99,4.03,4.05,4.07,4.09, -4.12,4.14,4.16,4.17,4.19,4.21,4.22,4.24,4.25) -m1[16,]<-c(3.09,3.25,3.37,3.46,3.53,3.59,3.64,3.69,3.73, -3.77,3.80,3.83,3.86,3.89,3.91,3.94,3.96,3.98, -4.00,4.02,4.04,4.05,4.07,4.09,4.10,4.12,4.13) -m1[17,]<-c(3.03,3.18,3.29,3.38,3.45,3.50,3.55,3.59,3.64, -3.67,3.70,3.73,3.76,3.78,3.81,3.83,3.85,3.87, -3.89,3.91,3.92,3.94,3.95,3.97,3.98,4.00,4.01) -m1[18,]<-c(2.97,3.12,3.22,3.30,3.37,3.42,3.47,3.51,3.55, -3.58,3.61,3.64,3.66,3.68,3.71,3.73,3.75,3.76, -3.78,3.80,3.81,3.83,3.84,3.85,3.87,3.88,3.89) -m1[19,]<-c(2.91,3.06,3.15,3.23,3.29,3.34,3.38,3.42,3.46, -3.49,3.51,3.54,3.56,3.59,3.61,3.63,3.64,3.66, -3.68,3.69,3.71,3.72,3.73,3.75,3.76,3.77,3.78) -m1[20,]<-c(2.81,2.93,3.02,3.09,3.14,3.19,3.23,3.26,3.29, -3.32,3.34,3.36,3.38,3.40,.42,.44,3.45,3.47, -3.48,3.49,3.50,3.52,3.53,3.54,3.55,3.56,3.57) -if(nuhat>=200)smmcrit01<-m1[20,C] -if(nuhat<200){ -nu<-c(2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,30,40,60,200) -temp<-abs(nu-nuhat) -find<-order(temp) -if(temp[find[1]]==0)smmcrit01<-m1[find[1],C] -if(temp[find[1]]!=0){ -if(nuhat>nu[find[1]]){ -smmcrit01<-m1[find[1],C]- -(1/nu[find[1]]-1/nuhat)*(m1[find[1],C]-m1[find[1]+1,C])/ -(1/nu[find[1]]-1/nu[find[1]+1]) -} -if(nuhat=1))ikeep[i]<-0 -e<-m[ikeep[ikeep>=1],] -} -e -} - -pball<-function(m,beta=.2){ -# -# Compute the percentage bend correlation matrix for the -# data in the n by p matrix m. -# -# This function also returns the two-sided significance level -# for all pairs of variables, plus a test of zero correlations -# among all pairs. (See chapter 6 for details.) -# -if(!is.matrix(m))stop("Data must be stored in an n by p matrix") -pbcorm<-matrix(0,ncol(m),ncol(m)) -temp<-matrix(1,ncol(m),ncol(m)) -siglevel<-matrix(NA,ncol(m),ncol(m)) -cmat<-matrix(0,ncol(m),ncol(m)) -for (i in 1:ncol(m)){ -ip1<-i -for (j in ip1:ncol(m)){ -if(i1]) -sx<-ifelse(psi<(-1),0,x) -sx<-ifelse(psi>1,0,sx) -pbos<-(sum(sx)+omhatx*(i2-i1))/(length(x)-i1-i2) -pbos -} - - -tauall<-function(m){ -# -# Compute Kendall's tau for the -# data in the n-by-p matrix m. -# -# This function also returns the two-sided significance level -# for all pairs of variables, plus a test of zero correlations -# among all pairs. (See chapter 6 for details.) -# -if(!is.matrix(m))stop("Data must be stored in an n by p matrix") -taum<-matrix(0,ncol(m),ncol(m)) -siglevel<-matrix(NA,ncol(m),ncol(m)) -for (i in 1:ncol(m)){ -ip1<-i -for (j in ip1:ncol(m)){ -if(i=length(xv)/2)warning("More than half of the w values equal zero") -sumw<-sum(w[ee=.0001) -paste("failed to converge in",iter,"iterations") -list(coef=c(b0,slope),residuals=res) -} - -chreg<-function(x,y,bend=1.345,SEED=TRUE,xout=FALSE,outfun=outpro,pr=TRUE,...){ -# -# Compute Coakley Hettmansperger robust regression estimators -# JASA, 1993, 88, 872-880 -# -# x is a n by p matrix containing the predictor values. -# -# No missing values are allowed -# -# Comments in this function follow the notation used -# by Coakley and Hettmansperger -# -library(MASS) -# with old version of R, need library(lqs) when using ltsreg -# as the initial estimate. -# -if(pr)print('If using chreg with a bootstrap method, use chregF instead') -if(SEED)set.seed(12) # Set seed so that results are always duplicated. -x<-as.matrix(x) -p<-ncol(x) -m<-elimna(cbind(x,y)) -x<-m[,1:p] -p1<-p+1 -y<-m[,p1] -if(xout){ -x<-as.matrix(x) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -x<-as.matrix(x) -cutoff<-bend -mve<-vector("list") -if(ncol(x)==1){ -mve$center<-median(x) -mve$cov<-mad(x)^2 -} -if(ncol(x)>=2)mve<-cov.mve(x) # compute minimum volume ellipsoid measures of - # location and scale and store in mve. -reg0<-ltsReg(x,y) # compute initial regression est using least trimmed - # squares. -# Next, compute the rob-md2(i) values and store in rob -rob<-1 # Initialize vector rob -mx<-mve$center -rob<-mahalanobis(x,mx,mve$cov) -k21<-qchisq(.95,p) -c62<-k21/rob -vecone<-c(rep(1,length(y))) # Initialize vector vecone to 1 -c30<-pmin(vecone,c62) # mallows weights put in c30 -k81<-median(abs(reg0$residuals)) # median of absolute residuals -k72<-1.4826*(1+(5/(length(y)-p-1)))*k81 # lms scale -c60<-reg0$residuals/(k72*c30) # standardized residuals -# compute psi and store in c27 -cvec<-c(rep(cutoff,length(y))) # Initialize vector cvec to cutoff -c27<-pmin(cvec,c60) -c27<-pmax(-1*cutoff,c27) #c27 contains psi values -# -# compute B matrix and put in c66. -# Also, transform B so that i th diag elem = 0 if c27[i] is -# between -cutoff and cutoff, 1 otherwise. -# -c66<-ifelse(abs(c27)<=bend,1,0) # Have derivative of psi in c66 -m1<-cbind(1,x) # X matrix with col of 1's added -m2<-t(m1) #X transpose -m5<-diag(c30) # matrix W, diagonal contains weights -m4<-diag(c66) # B matrix -m6<-m4%*%m1 # BX -m7<-m2%*%m6 # X'BX (nD=X'BX) -m8<-solve(m7) #m8 = (X'-B-X)inverse -m9<-m8%*%m2 #m9=X prime-B-X inverse X' -m9<-m9%*%m5 # m9=X prime-B-X inverse X'W -m10<-m9%*%c27 -c20<-m10*k72 -c21<-reg0$coef+c20 #update initial estimate of parameters. -res<-y-m1%*%c21 -list(coef=t(c21),residuals=res) -} - -regboot<-function(isub,x,y,regfun,...){ -# -# Perform regression using x[isub] to predict y[isub] -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by other functions when computing -# bootstrap estimates. -# -# regfun is some regression method already stored in R -# It is assumed that regfun$coef contains the intercept and slope -# estimates produced by regfun. The regression methods written for -# this book, plus regression functions in R, have this property. -# -# x is assumed to be a matrix containing values of the predictors. -# -xmat<-matrix(x[isub,],nrow(x),ncol(x)) -vals<-regfun(xmat,y[isub],...)$coef -vals -} - - -bmreg<-function(x,y,iter=20,bend=2*sqrt((ncol(x)+1)/nrow(x)),xout=FALSE,outfun=outpro,...){ -# compute a bounded M regression using Huber Psi and Schweppe weights. -# The predictors are assumed to be stored in the n by p matrix x. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x<-as.matrix(x) -init<-lsfit(x,y) -resid<-init$residuals -x1<-cbind(1,x) -nu<-sqrt(1-hat(x1)) -low<-ncol(x)+1 -for(it in 1:iter){ -ev<-sort(abs(resid)) -scale<-median(ev[c(low:length(y))])/qnorm(.75) -rov<-(resid/scale)/nu -psi<-ifelse(abs(rov)<=bend,rov,bend*sign(rov)) # Huber Psi -wt<-nu*psi/(resid/scale) -new<-lsfit(x,y,wt) -if(max(abs(new$coef-init$coef))<.0001)break -init$coef<-new$coef -resid<-new$residuals -} -resid<-y-x1%*%new$coef -if(max(abs(new$coef-init$coef))>=.0001) -paste("failed to converge in",iter,"steps") -list(coef=new$coef,residuals=resid,w=wt) -} - - -reglev<-function(x,y,plotit=TRUE,SEED=TRUE,DIS=FALSE){ -# -# Search for good and bad leverage points using the -# Rousseuw and van Zomeren method. -# -# x is an n by p matrix -# -# The function returns the number of the rows in x that are identified -# as outliers. (The row numbers are stored in outliers.) -# It also returns the distance of the points identified as outliers -# in the variable dis. -# -library(MASS) -xy=elimna(cbind(x,y)) -x=as.matrix(x) -p=ncol(x) -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -plotit<-as.logical(plotit) -if(SEED)set.seed(12) -x<-as.matrix(x) -res<-lmsreg(x,y)$resid -sighat<-sqrt(median(res^2)) -sighat<-1.4826*(1+(5/(length(y)-ncol(x)-1)))*sighat -stanres<-res/sighat -if(ncol(x)>=2)mve<-cov.mve(x) -if(ncol(x)==1){ -mve<-vector("list") -mve$center<-median(x) -mve$cov<-mad(x)^2 -} -dis<-mahalanobis(x,mve$center,mve$cov) -dis<-sqrt(dis) -crit<-sqrt(qchisq(.975,ncol(x))) -chk<-ifelse(dis>crit,1,0) -vec<-c(1:nrow(x)) -id<-vec[chk==1] -chkreg<-ifelse(abs(stanres)>2.5,1,0) -idreg<-vec[chkreg==1] -if(plotit){ -plot(dis,stanres,xlab="Robust distances",ylab="standardized residuals") -abline(-2.5,0) -abline(2.5,0) -abline(v=crit) -} -all=c(id,idreg) -ID=duplicated(all) -blp=all[ID] -vec=c(1:length(y)) -nkeep=vec -if(length(blp)>0)nkeep=vec[-blp] -if(!DIS)dis=NULL -list(levpoints=id,regout=idreg,bad.lev.points=blp,keep=nkeep,dis=dis,stanres=stanres,crit=crit) -} - -winreg<-function(x,y,iter=20,tr=.2,xout=FALSE,outfun=outpro,...){ -# -# Compute a Winsorized regression estimator -# The predictors are assumed to be stored in the n by p matrix x. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x=as.matrix(x) -ma<-matrix(0,ncol(x),1) -m<-matrix(0,ncol(x),ncol(x)) -mvals<-apply(x,2,win,tr) -for (i in 1:ncol(x)){ -ma[i,1]<-wincor(x[,i],y,tr=tr)$cov -for (j in 1:ncol(x))m[i,j]<-wincor(x[,i],x[,j],tr=tr)$cov -} -slope<-solve(m,ma) -b0<-win(y,tr)-sum(slope%*%mvals) -for(it in 1:iter){ -res<-y-x%*%slope-b0 -for (i in 1:ncol(x))ma[i,1]<-wincor(x[,i],res,tr=tr)$cov -slopeadd<-solve(m,ma) -b0add<-win(res,tr)-sum(slopeadd%*%mvals) -if(max(abs(slopeadd),abs(b0add)) <.0001)break -slope<-slope+slopeadd -b0<-b0+b0add -} -if(max(abs(slopeadd),abs(b0add)) >=.0001) -paste("failed to converge in",iter,"iterations") -list(coef=c(b0,slope),resid=res) -} - - -anctgen<-function(x1,y1,x2,y2,pts,fr1=1,fr2=1,tr=.2){ -# -# Compare two independent groups using the ancova method -# in chapter 9. No assumption is made about the form of the regression -# lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# Comparisons are made at the design points contained in the vector -# pts -# -# Comparisons can be made using at most 28 design points, otherwise -# a critical value for controlling the experimentwise type I error cannot -# be computed. -# -if(length(pts)>=29)stop("At most 28 points can be compared") -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),8) -dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi")) -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -test<-yuen(g1,g2,tr=tr) -mat[i,1]<-pts[i] -mat[i,2]<-length(g1) -mat[i,3]<-length(g2) -mat[i,4]<-test$dif -mat[i,5]<-test$teststat -mat[i,6]<-test$se -if(length(pts)>=2)critv<-smmcrit(test$df,length(pts)) -if(length(pts)==1)critv<-qt(.975,test$df) -cilow<-test$dif-critv*test$se -cihi<-test$dif+critv*test$se -mat[i,7]<-cilow -mat[i,8]<-cihi -} -list(output=mat,crit=critv) -} - -near<-function(x,pt,fr=1){ -# determine which values in x are near pt -# based on fr * mad -if(!is.vector(x))stop('x should be a vector') -m<-mad(x) -if(m==0){ -temp<-idealf(x) -m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25)) -} -if(m==0)m<-sqrt(winvar(x)/.4129) -if(m==0)stop("All measures of dispersion are equal to 0") -dis<-abs(x-pt) -dflag<-dis <= fr*m -dflag -} - -regpres1<-function(isub,x,y,regfun,mval){ -# -# Perform regression using x[isub] to predict y[isub] -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by other functions when computing -# bootstrap estimates. -# -# regfun is some regression method already stored in R -# It is assumed that regfun$coef contains the intercept and slope -# estimates produced by regfun. The regression methods written for -# this book, plus regression functions in R, have this property. -# -# x is assumed to be a matrix containing values of the predictors. -# -xmat<-matrix(x[isub,],mval,ncol(x)) -regboot<-regfun(xmat,y[isub]) -regboot<-regboot$coef -regboot -} - -runhat<-function(x,y,pts=x,est=tmean,fr=1,nmin=1,...){ -# -# running interval smoother that can be used with any measure -# of location or scale. By default, a 20% trimmed mean is used. -# This function computes an estimate of y for each x value stored in pts -# -# fr controls amount of smoothing -rmd<-rep(NA,length(pts)) -for(i in 1:length(pts)){ -val<-y[near(x,pts[i],fr)] -if(length(val)>=nmin)rmd[i]<-est(val,...) -} -rmd -} - -sqfun<-function(y,na.rm=FALSE){ -# -sqfun<-sum(y^2,na.rm=na.rm) -sqfun -} - -absfun<-function(y,na.rm=FALSE){ -absfun<-sum(abs(y),na.rm=na.rm) -absfun -} - -ancbootg<-function(x1,y1,x2,y2,pts,fr1=1,fr2=1,tr=.2,nboot=599){ -# -# Compare two independent groups using the ancova method -# in chapter 9. No assumption is made about the form of the regression -# lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# Comparisons are made at the design points contained in the vector -# pts -# -m1=elimna(cbind(x1,y1)) -x1=m1[,1] -y1=m1[,2] -m1=elimna(cbind(x2,y2)) -x2=m1[,1] -y2=m1[,2] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),8) -dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi")) -gv<-vector("list",2*length(pts)) -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -j<-i+length(pts) -gv[[i]]<-g1 -gv[[j]]<-g2 -} -I1<-diag(length(pts)) -I2<-0-I1 -con<-rbind(I1,I2) -test<-linconb(gv,con=con,tr=tr,nboot=nboot) -mat[,1]<-pts -mat[,2]<-n1 -mat[,3]<-n2 -mat[,4]<-test$psihat[,2] -mat[,5]<-test$test[,2] -mat[,6]<-test$test[,3] -mat[,7]<-test$psihat[,3] -mat[,8]<-test$psihat[,4] -list(output=mat,crit=test$crit) -} - -errfun<-function(yhat,y,error=sqfun){ -# -# Compute error terms for regpre -# -# yhat is an n by nboot matrix -# y is n by 1. -# -ymat<-matrix(y,nrow(yhat),ncol(yhat)) -blob<-yhat-ymat -errfun<-error(blob) -errfun -} - -near3d<-function(x,pt,fr=.8,m){ -# determine which values in x are near pt -# based on fr * cov.mve -# -# x is assumed to be an n by p matrix -# pt is a vector of length p (a point in p-space). -# m is cov.mve(x) computed by runm3d -# -library(MASS) -if(!is.matrix(x))stop("Data are not stored in a matrix.") -dis<-sqrt(mahalanobis(x,pt,m$cov)) -dflag<-dis < fr -dflag -} - -run3hat<-function(x,y,pts,fr=.8,tr=.2){ -# -# Compute y hat for each row of data in the matrix pts -# using a running interval method -# -# fr controls amount of smoothing -# tr is the amount of trimming -# x is an n by p matrix of predictors. -# pts is an m by p matrix, m>=1. -# -library(MASS) -set.seed(12) -if(!is.matrix(x))stop("Predictors are not stored in a matrix.") -if(!is.matrix(pts))stop("The third argument, pts, must be a matrix.") -m<-cov.mcd(x) -rmd<-1 # Initialize rmd -nval<-1 -for(i in 1:nrow(pts)){ -rmd[i]<-mean(y[near3d(x,pts[i,],fr,m)],tr) -nval[i]<-length(y[near3d(x,pts[i,],fr,m)]) -} -list(rmd=rmd,nval=nval) -} - - -idb<-function(x,n){ -# -# Determine whether a sequence of integers contains a 1, 2, ..., n. -# Return idb[i]=1 if the value i is in x; 0 otherwise. -# This function is used by regpre -# -m1<-matrix(0,n,n) -m1<-outer(c(1:n),x,"-") -m1<-ifelse(m1==0,1,0) -idb<-apply(m1,1,sum) -idb<-ifelse(idb>=1,0,1) -idb -} - -hratio<-function(x,y,regfun=bmreg){ -# -# Compute a p by p matrix of half-slope ratios -# -# regfun can be any R function that returns the coefficients in -# the vector regfun$coef, the first element of which contains the -# estimated intercept, the second element contains the estimate of -# the first predictor, etc. -# -# OUTPUT: -#The first row reports the half-slope -#ratios when the data are divided into two groups using the first predictor. -#The first column is the half-slope ratio for the first predictor, the -#second column is the half-slope ratio for the second predictor, and so forth. -#The second row contains the half-slope ratios when the data are divided -#into two groups using the second predictor, and so on. -# -x<-as.matrix(x) -xmat<-matrix(0,nrow(x),ncol(x)) -mval<-floor(length(y)/2) -mr<-length(y)-mval -xmatl<-matrix(0,mval,ncol(x)) -xmatr<-matrix(0,mr,ncol(x)) -hmat<-matrix(NA,ncol(x),ncol(x)) -isub<-c(1:length(y)) -ksub<-c(1:ncol(x))+1 -for (k in 1:ncol(x)){ -xord<-order(x[,k]) -yord<-y[xord] -yl<-yord[isub<=mval] -yr<-yord[isub>mval] -for (j in 1:ncol(x)){ -xmat[,j]<-x[xord,j] -xmatl[,j]<-xmat[isub<=mval,j] -xmatr[,j]<-xmat[isub>mval,j] -} -coefl<-regfun(xmatl,yl)$coef -coefr<-regfun(xmatr,yr)$coef -hmat[k,]<-coefr[ksub[ksub>=2]]/coefl[ksub[ksub>=2]] -} -hmat -} - - - -rung3d<-function(x,y,est=onestep,fr=1,plotit=TRUE,theta=50,phi=25,pyhat=FALSE,LP=FALSE, -expand=.5,scale=FALSE,zscale=TRUE, -nmin=0,xout=FALSE,eout=FALSE,outfun=out,SEED=TRUE,STAND=TRUE, -xlab="X",ylab="Y",zlab="",pr=TRUE,duplicate="error",ticktype="simple",...){ -# -# running mean using interval method -# - -# fr (the span) controls amount of smoothing -# est is the measure of location. -# (Goal is to determine est(y) given x.) -# x is an n by p matrix of predictors. -# -# pyhat=T, predicted values are returned. -# -library(MASS) -library(akima) -if(SEED)set.seed(12) # set seed for cov.mve -if(eout && xout)stop("Not allowed to have eout=xout=TRUE") -if(!is.matrix(x))stop("Data are not stored in a matrix.") -if(nrow(x) != length(y))stop("Number of rows in x does not match length of y") -temp<-cbind(x,y) -p<-ncol(x) -p1<-p+1 -temp<-elimna(temp) # Eliminate any rows with missing values. -if(eout){ -keepit<-outfun(temp,plotit=FALSE)$keep -x<-x[keepit,] -y<-y[keepit] -} -if(xout){ -keepit<-outfun(x,plotit=FALSE,STAND=STAND,...)$keep -x<-x[keepit,] -y<-y[keepit] -} -if(zscale){ -for(j in 1:p1){ -temp[,j]<-(temp[,j]-median(temp[,j]))/mad(temp[,j]) -}} -x<-temp[,1:p] -y<-temp[,p1] -m<-cov.mve(x) -iout<-c(1:nrow(x)) -rmd<-1 # Initialize rmd -nval<-1 -for(i in 1:nrow(x))rmd[i]<-est(y[near3d(x,x[i,],fr,m)],...) -for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)]) -if(ncol(x)==2){ -if(plotit){ -if(pr){ -if(!scale)print("With dependence, suggest using scale=TRUE") -} -fitr<-rmd[nval>nmin] -y<-y[nval>nmin] -x<-x[nval>nmin,] -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -if(LP)fitr=lplot(x[iout>=1,],fitr,pyhat=TRUE,pr=FALSE,plotit=FALSE)$yhat -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -persp(fit,theta=theta,phi=phi,expand=expand, -scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -}} -if(pyhat)last<-rmd -if(!pyhat)last <- "Done" - last -} - -mbmreg<-function(x,y,iter=20,bend=2*sqrt(ncol(x)+1)/nrow(x),xout=FALSE,outfun=outpro,...){ -# -# Compute a bounded M regression estimator using -# Huber Psi and Schweppe weights with -# regression outliers getting a weight of zero. -# -# This is the modified M-regression estimator in Chapter 8 -# -# The predictors are assumed to be stored in the n by p matrix x. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x<-as.matrix(x) -if(is.matrix(y)){ -if(ncol(y)==1)y=as.vector(y) -} -x1<-cbind(1,x) -library(MASS) -reslms<-lmsreg(x,y)$resid -sighat<-sqrt(median(reslms^2)) -sighat<-1.4826*(1+(5/(length(y)-ncol(x)-1)))*sighat -if(sighat==0)warning("The estimated measure of scale, based on the residuals using lms regression, is zero") -temp<-ifelse(sighat*reslms>0,abs(reslms)/sighat,0*reslms) -wt<-ifelse(temp<=2.5,1,0) -init<-lsfit(x,y,wt) -resid<-init$residuals -nu<-sqrt(1-hat(x1)) -low<-ncol(x)+1 -for(it in 1:iter){ -ev<-sort(abs(resid)) -scale<-median(ev[c(low:length(y))])/qnorm(.75) -rov<-(resid/scale)/nu -psi<-ifelse(abs(rov)<=bend,rov,bend*sign(rov)) # Huber Psi -wt<-nu*psi/(resid/scale) -wt<-ifelse(temp<=2.5,wt,0) -new<-lsfit(x,y,wt) -if(abs(max(new$coef-init$coef)<.0001))break -init$coef<-new$coef -resid<-new$residuals -} -resid<-y-x1%*%new$coef -if(abs(max(new$coef-init$coef)>=.0001)) -paste("failed to converge in",iter,"steps") -list(coef=new$coef,residuals=resid,w=wt) -} - -rankisub<-function(x,y){ -# -# compute phat and an estimate of its variance -# -x<-x[!is.na(x)] # Remove missing values from x -y<-y[!is.na(y)] # Remove missing values from y -u<-outer(x,y,FUN="<") -p1<-0 -p2<-0 -for (j in 1:length(y)){ -temp<-outer(u[,j],u[,j]) -p1<-p1+sum(temp)-sum(u[,j]*u[,j]) -} -for (i in 1: length(x)){ -temp<-outer(u[i,],u[i,]) -p2<-p2+sum(temp)-sum(u[i,]*u[i,]) -} -p<-sum(u)/(length(x)*length(y)) -pad<-p -if(p==0)pad<-.5/(length(x)*length(y)) -if(p==1)pad<-(1-.5)/(length(x)*length(y)) -p1<-p1/(length(x)*length(y)*(length(x)-1)) -p2<-p2/(length(x)*length(y)*(length(y)-1)) -var<-pad*(1.-pad)*(((length(x)-1)*(p1-p^2)/(pad*(1-pad))+1)/(1-1/length(y))+ -((length(y)-1)*(p2-p^2)/(pad*(1-pad))+1)/(1-1/length(x))) -var<-var/(length(x)*length(y)) -list(phat=p,sqse=var) -} - -pbcor<-function(x,y,beta=.2){ -# Compute the percentage bend correlation between x and y. -# -# beta is the bending constant for omega sub N. -# -if(length(x)!=length(y))stop("The vectors do not have equal lengths") -m1=cbind(x,y) -m1<-elimna(m1) -nval=nrow(m1) -x<-m1[,1] -y<-m1[,2] -# Have eliminated missing values -temp<-sort(abs(x-median(x))) -omhatx<-temp[floor((1-beta)*length(x))] -temp<-sort(abs(y-median(y))) -omhaty<-temp[floor((1-beta)*length(y))] -a<-(x-pbos(x,beta))/omhatx -b<-(y-pbos(y,beta))/omhaty -a<-ifelse(a<=-1,-1,a) -a<-ifelse(a>=1,1,a) -b<-ifelse(b<=-1,-1,b) -b<-ifelse(b>=1,1,b) -pbcor<-sum(a*b)/sqrt(sum(a^2)*sum(b^2)) -test<-pbcor*sqrt((length(x) - 2)/(1 - pbcor^2)) -sig<-2*(1 - pt(abs(test),length(x)-2)) -list(cor=pbcor,test=test,p.value=sig,n=nval) -} - -rmanovab<-function(x,tr=.2,alpha=.05,grp=0,nboot=599){ -# -# A bootstrap-t for comparing the trimmed means of dependent groups. -# By default, 20% trimming is used with B=599 bootstrap samples. -# -# The optional argument grp is used to select a subset of the groups -# and exclude the rest. -# -# x can be an n by J matrix or it can have list mode -# -if(is.data.frame(x))x=as.matrix(x) -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x))x=matl(x) -#{ -#if(sum(grp)==0)grp<-c(1:length(x)) -# put the data in an n by J matrix -#mat<-matrix(0,length(x[[1]]),length(grp)) -#for (j in 1:length(grp))mat[,j]<-x[[grp[j]]] -#} -if(is.matrix(x)){ -if(sum(grp)==0)grp<-c(1:ncol(x)) -mat<-x[,grp] -} -mat=elimna(mat) -J<-ncol(mat) -connum<-(J^2-J)/2 -bvec<-matrix(0,connum,nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot) -xcen<-matrix(0,nrow(mat),ncol(mat)) -for (j in 1:J)xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data -bvec<-apply(data,1,tsubrmanovab,xcen,tr) -# bvec is vector of nboot bootstrap test statistics. -icrit<-round((1-alpha)*nboot) -bvec<-sort(bvec) -crit<-bvec[icrit] -test<-rmanova(mat,tr,grp)$test -pv=mean(test<=bvec) -list(teststat=test,crit=crit,p.value=pv) -} - - -tsubrmanovab<-function(isub,x,tr){ -# -# Compute test statistic for trimmed means -# when comparing dependent groups. -# By default, 20% trimmed means are used. -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by rmanovab -# -tsub<-rmanovab1(x[isub,],tr=tr)$test -tsub -} - - - - - -rmanovab1<-function(x,tr=.2,grp=c(1:length(x))){ -# -# A heteroscedastic one-way repeated measures ANOVA for trimmed means. -# -# The data are assumed to be stored in $x$ which can -# be either an n by J matrix, or an R variable having list mode. -# If the data are stored in list mode, -# length(x) is assumed to correspond to the total number of groups. -# By default, the null hypothesis is that all group have a common mean. -# To compare a subset of the groups, use grp to indicate which -# groups are to be compared. For example, if you type the -# command grp<-c(1,3,4), and then execute this function, groups -# 1, 3, and 4 will be compared with the remaining groups ignored. -# -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -J<-length(grp) # The number of groups to be compared -m1<-matrix(x[[grp[1]]],length(x[[grp[1]]]),1) -for(i in 2:J){ # Put the data into an n by J matrix -m2<-matrix(x[[grp[i]]],length(x[[i]]),1) -m1<-cbind(m1,m2) -} -} -if(is.matrix(x)){ -if(length(grp)=ncol(x))m1<-as.matrix(x) -J<-ncol(x) -} -# -# Raw data are now in the matrix m1 -# -m2<-matrix(0,nrow(m1),ncol(m1)) -xvec<-1 -g<-floor(tr*nrow(m1)) #2g is the number of observations trimmed. -for(j in 1:ncol(m1)){ # Putting Winsorized values in m2 -m2[,j]<-winval(m1[,j],tr) -xvec[j]<-mean(m1[,j],tr) -} -xbar<-mean(xvec) -qc<-(nrow(m1)-2*g)*sum((xvec-xbar)^2) -m3<-matrix(0,nrow(m1),ncol(m1)) -m3<-sweep(m2,1,apply(m2,1,mean)) # Sweep out rows -m3<-sweep(m3,2,apply(m2,2,mean)) # Sweep out columns -m3<-m3+mean(m2) # Grand Winsorized mean swept in -qe<-sum(m3^2) -test<-(qc/(qe/(nrow(m1)-2*g-1))) -# -# Next, estimate the adjusted degrees of freedom -# -v<-winall(m1)$cov -vbar<-mean(v) -vbard<-mean(diag(v)) -vbarj<-1 -for(j in 1:J){ -vbarj[j]<-mean(v[j,]) -} -A<-J*J*(vbard-vbar)^2/(J-1) -B<-sum(v*v)-2*J*sum(vbarj^2)+J*J*vbar^2 -ehat<-A/B -etil<-(nrow(m2)*(J-1)*ehat-2)/((J-1)*(nrow(m2)-1-(J-1)*ehat)) -etil<-min(1.,etil) -df1<-(J-1)*etil -df2<-(J-1)*etil*(nrow(m2)-2*g-1) -siglevel<-1-pf(test,df1,df2) -list(test=test,df=c(df1,df2),p.value=siglevel,tmeans=xvec,ehat=ehat,etil=etil) -} - - - - -mee<-function(x,y,alpha=.05){ -# -# For two independent groups, compute a 1-\alpha confidence interval -# for p=P(X 0){print("Warning: Tied values detected") -print("so even if distributions are identical,") -print("P(X 0) -print("Tied values detected. Interchanging columns might give different results. That is, comparing rows based on P(XY)") -ck<-(K^2-K)/2 -cj<-(J^2-J)/2 -tc<-ck*cj -if(tc>28){ -print("Warning: The number of contrasts exceeds 28.") -print("The critical value being used is based on 28 contrasts") -tc<-28 -} -idmat<-matrix(NA,nrow=tc,ncol=8) -dimnames(idmat)<-list(NULL,c("row","row","col","col","ci.lower","ci.upper","estimate","test.stat")) -crit<-smmcrit(300,tc) -if(alpha != .05){ -crit<-smmcrit01(300,tc) -if(alpha != .01){print("Warning: Only alpha = .05 and .01 are allowed,") -print("alpha = .01 is being assumed.") -} -} -phatsqse<-0 -phat<-0 -allit<-0 -jcount<-0-K -it<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j < jj){ -for(k in 1:K){ -for(kk in 1:K){ -if(k < kk){ -it<-it+1 -idmat[it,1:4]<-c(j,jj,k,kk) -}}}}} -jcount<-jcount+K -for(k in 1:K){ -for(kk in 1:K){ -if(k < kk){ -allit<-allit+1 -xx<-x[[grp[k+jcount]]] -yy<-x[[grp[kk+jcount]]] -temp<-rankisub(xx,yy) -phat[allit]<-temp$phat -phatsqse[allit]<-temp$sqse -}}}} -# -# Compute the contrast matrix. Each row contains a 1, -1 and the rest 0 -# That is, all pairwise comparisons among K groups. -# -con<-matrix(0,cj,J) -id<-0 -Jm<-J-1 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[id,j]<-1 -con[id,k]<-0-1 -}} -IK<-diag(ck) -B<-kron(con,IK) -ntest<-ck*(J^2-J)/2 -test<-0 -civecl<-0 -civecu<-0 -for (itest in 1:ntest){ -temp1<-sum(B[itest,]*phat) -idmat[itest,7]<-temp1 -idmat[itest,8]<-temp1/sqrt(sum(B[itest,]^2*phatsqse)) -idmat[itest,5]<-temp1-crit*sqrt(sum(B[itest,]^2*phatsqse)) -idmat[itest,6]<-temp1+crit*sqrt(sum(B[itest,]^2*phatsqse)) -} -nsig<-sum((abs(idmat[,8])>crit)) -list(phat=phat,ci=idmat,crit=crit,nsig=nsig) -} - - -regts1<-function(vstar,yhat,res,mflag,x,tr){ -ystar<-yhat+res*vstar -bres<-ystar-mean(ystar,tr) -rval<-0 -for (i in 1:nrow(x)){ -rval[i]<-sum(bres[mflag[,i]]) -} -rval -} - -bptd<-function(x,tr=.2,alpha=.05,con=0,nboot=599){ -# -# Using the percentile t bootstrap method, -# compute a .95 confidence interval for all linear contasts -# specified by con, a J by C matrix, where C is the number of -# contrasts to be tested, and the columns of con are the -# contrast coefficients. -# -# If con is not specified, all pairwise comparisons are performed. -# -# The trimmed means of dependent groups are being compared. -# By default, 20% trimming is used with B=599 bootstrap samples. -# -# x can be an n by J matrix or it can have list mode -# -if(is.data.frame(x))x=as.matrix(x) -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -if(is.matrix(con)){ -if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") - }} -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))mat[,j]<-x[[j]] -} -if(is.matrix(x))mat=x -J<-ncol(mat) -Jm<-J-1 -if(sum(con^2)==0){ -d<-(J^2-J)/2 -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -if(is.matrix(x)){ -if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") -mat<-x -} -if(sum(is.na(mat)>=1))stop("Missing values are not allowed.") -J<-ncol(mat) -connum<-ncol(con) -bvec<-matrix(0,connum,nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# data is an nboot by n matrix -xcen<-matrix(0,nrow(mat),ncol(mat)) #An n by J matrix -xbars<-matrix(0,nboot,ncol(mat)) -psihat<-matrix(0,connum,nboot) -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(nrow(xcen),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot) -for (j in 1:J){ -xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data -xbars[,j]<-apply(data,1,bptdmean,xcen[,j],tr) -} -for (ic in 1:connum){ -paste("Working on contrast number",ic) -bvec[ic,]<-apply(data,1,bptdsub,xcen,tr,con[,ic]) -# bvec is a connum by nboot matrix containing the bootstrap sq standard error -psihat[ic,]<-apply(xbars,1,bptdpsi,con[,ic]) -} -bvec<-psihat/sqrt(bvec) #bvec now contains bootstrap test statistics -bvec<-abs(bvec) #Doing two-sided confidence intervals -icrit<-round((1-alpha)*nboot) -critvec<-apply(bvec,2,max) -critvec<-sort(critvec) -crit<-critvec[icrit] -psihat<-matrix(0,connum,4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(NA,connum,3) -dimnames(test)<-list(NULL,c("con.num","test","se")) -isub<-c(1:nrow(mat)) -tmeans<-apply(mat,2,mean,trim=tr) -sqse<-1 -psi<-1 -for (ic in 1:ncol(con)){ -sqse[ic]<-bptdsub(isub,mat,tr,con[,ic]) -psi[ic]<-sum(con[,ic]*tmeans) -psihat[ic,1]<-ic -psihat[ic,2]<-psi[ic] -psihat[ic,3]<-psi[ic]-crit*sqrt(sqse[ic]) -psihat[ic,4]<-psi[ic]+crit*sqrt(sqse[ic]) -test[ic,1]<-ic -test[ic,2]<-psi[ic]/sqrt(sqse[ic]) -test[ic,3]<-sqrt(sqse[ic]) -} -list(test=test,psihat=psihat,crit=crit,con=con) -} - -twomanbt<-function(x,y,tr=.2,alpha=.05,nboot=599){ -# -# Two-sample Behrens-Fisher problem. -# -# For each of two independent groups, -# have p measures for each subject. The goal is to compare the -# trimmed means of the first measure, the trimmed means for the second -# and so on. So there are a total of p comparisons between the two -# groups, one for each measure. -# -# The percentile t bootstrap method is used to -# compute a .95 confidence interval. -# -# By default, 20% trimming is used with B=599 bootstrap samples. -# -# x contains the data for the first group; it -# can be an n by J matrix or it can have list mode. -# y contains the data for the second group. -# -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(!is.list(y) && !is.matrix(y))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -# put the data in an n by p matrix -matx<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))matx[,j]<-x[[j]] -} -if(is.list(y)){ -# put the data in an n by p matrix -maty<-matrix(0,length(y[[1]]),length(y)) -for (j in 1:length(y))maty[,j]<-y[[j]] -} -if(is.matrix(x)){ -matx<-x -} -if(is.matrix(y)){ -maty<-y -} -if(ncol(matx)!=ncol(maty))stop("The number of variables for group one is not equal to the number for group 2") -if(sum(is.na(mat)>=1))stop("Missing values are not allowed.") -J<-ncol(mat) -connum<-ncol(matx) -bvec<-matrix(0,connum,nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -xcen<-matrix(0,nrow(matx),ncol(matx)) -ycen<-matrix(0,nrow(maty),ncol(maty)) -for (j in 1:connum)xcen[,j]<-matx[,j]-mean(matx[,j],tr) #Center data -for (j in 1:connum)ycen[,j]<-maty[,j]-mean(maty[,j],tr) #Center data -print("Taking bootstrap samples. Please wait.") -bootx<-sample(nrow(matx),size=nrow(matx)*nboot,replace=TRUE) -booty<-sample(nrow(maty),size=nrow(maty)*nboot,replace=TRUE) -matval<-matrix(0,nrow=nboot,ncol=connum) -for (j in 1:connum){ -datax<-matrix(xcen[bootx,j],ncol=nrow(matx)) -datay<-matrix(ycen[booty,j],ncol=nrow(maty)) -paste("Working on variable", j) -top<- apply(datax, 1., mean, tr) - apply(datay, 1., mean, tr) -botx <- apply(datax, 1., trimse, tr) -boty <- apply(datay, 1., trimse, tr) -matval[,j]<-abs(top)/sqrt(botx^2. + boty^2.) -} -bvec<-apply(matval,1,max) -icrit<-round((1-alpha)*nboot) -bvec<-sort(bvec) -crit<-bvec[icrit] -psihat<-matrix(0,ncol=4,nrow=connum) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol=3,nrow=connum) -dimnames(test)<-list(NULL,c("con.num","test","se")) -for(j in 1:ncol(matx)){ -temp<-yuen(matx[,j],maty[,j],tr=tr) -test[j,1]<-j -test[j,2]<-abs(temp$test) -test[j,3]<-temp$se -psihat[j,1]<-j -psihat[j,2]<-mean(matx[,j],tr)-mean(maty[,j]) -psihat[j,3]<-mean(matx[,j],tr)-mean(maty[,j])-crit*temp$se -psihat[j,4]<-mean(matx[,j],tr)-mean(maty[,j])+crit*temp$se -} -list(psihat=psihat,teststat=test,critical.value=crit) -} - - - -bootdep<-function(x,tr=.2,nboot=500){ -# -# x is a matrix (n by p) or has list mode -# Goal: Obtain boostrap samples and compute -# the trimmed each for each of the p variables. -# Return the bootstrap means in a matrix -# -# tr is the amount of trimming -# nboot is the number of bootstrap samples -# -if(is.matrix(x))m1<-x -if(is.list(x)){ -# put the data into a matrix -m1<-matrix(NA,ncol=length(x)) -for(j in 1:length(x))m1[,j]<-x[[j]] -} -data<-matrix(sample(nrow(m1),size=nrow(m1)*nboot,replace=TRUE),nrow=nboot) -bvec<-matrix(NA,ncol=ncol(m1),nrow=nboot) -for(j in 1:ncol(m1)){ -temp<-m1[,j] -bvec[,j]<-apply(data, 1., bootdepsub,temp,tr) -} -# return a nboot by p matrix of bootstrap trimmed means. -bvec -} - -bootdepsub<-function(isub,x,tr){ -tsub<-mean(x[isub],tr) -tsub -} -corb<-function(x,y,corfun=pbcor,nboot=599,alpha=.05,plotit=FALSE,xlab='X',ylab='Y',SEED=TRUE,...){ -# -# Compute a 1-alpha confidence interval for a correlation. -# The default correlation is the percentage bend. -# -# The function corfun is any R function that returns a -# correlation coefficient in corfun$cor. The functions pbcor and -# wincor follow this convention. -# -# When using Pearson's correlation, and when n<250, use -# lsfitci instead. -# -# The default number of bootstrap samples is nboot=599 -# -m1=cbind(x,y) -m1<-elimna(m1) # Eliminate rows with missing values -nval=nrow(m1) -x<-m1[,1] -y<-m1[,2] -est<-corfun(x,y,...)$cor -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,corbsub,x,y,corfun,...) # A 1 by nboot matrix. -ihi<-floor((1-alpha/2)*nboot+.5) -ilow<-floor((alpha/2)*nboot+.5) -bsort<-sort(bvec) -corci<-1 -corci[1]<-bsort[ilow] -corci[2]<-bsort[ihi] -phat <- sum(bvec < 0)/nboot -sig <- 2 * min(phat, 1 - phat) -if(plotit)outpro(cbind(x,y),xlab=xlab,ylab=ylab,plotit=TRUE) -list(cor.ci=corci,p.value=sig,cor.est=est) -} - -corbsub<-function(isub,x,y,corfun,...){ -# -# Compute correlation for x[isub] and y[isub] -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by other functions when computing -# bootstrap estimates. -# -# corfun is some correlation function already stored in R -# -corbsub<-corfun(x[isub],y[isub],...)$cor -corbsub -} - - - -depreg<-function(x,y,xout=FALSE,outfun=out,...){ -# -# Compute the depth regression estimator. -# Only a single predictor is allowed in this version -# Perhaps use instead -# -if(is.matrix(x)){ -if(ncol(x)>=2)stop("Only a single predicor is allowed") -x<-as.vector(x) -} -xy=cbind(x,y) -xy=elimna(xy) -if(xout){ -flag<-outfun(xy[,1],plotit=FALSE,...)$keep -xy<-xy[flag,] -} -x=xy[,1] -y=xy[,2] -ord<-order(x) -xs<-x[ord] -ys<-y[ord] -vec1<-outer(ys,ys,"-") -vec2<-outer(xs,xs,"-") -v1<-vec1[vec2>0] -v2<-vec2[vec2>0] -slope<-v1/v2 -vec3<-outer(ys,ys,"+") -vec4<-outer(xs,xs,"+") -v3<-vec3[vec2>0] -v4<-vec4[vec2>0] -deep<-NA -inter<-v3/2-slope*v4/2 -temp<-matrix(c(inter,slope),ncol=2) -deep<-apply(temp,1,rdepth.orig,x,y) -best<-max(deep) -coef<-NA -coef[2]<-mean(slope[deep==best]) -coef[1]<-mean(inter[deep==best]) -res<-y-coef[2]*x-coef[1] -list(coef=coef,residuals=res) -} - -tsgreg<-function(x,y,tries=(length(y)^2-length(y))/2){ -# -# -x<-as.matrix(x) -if(nrow(x)!=length(y))stop("Length of y must match the number of rows of x") -# eliminate any rows with missing values. -m1<-cbind(x,y) -m1<-elimna(m1) -x<-m1[,1:ncol(x)] -y<-m1[,ncol(x)+1] -set.seed(2) -data<-matrix(NA,ncol=ncol(x)+1,nrow=tries) -for(i in 1:tries){ -data[i,]<-sample(length(y),size=ncol(x)+1,replace=FALSE) -} -bvec <- apply(data, 1,tsgregs1,x,y) -coef<-0 -numzero<-0 -loc<-0 -for (i in 1:ncol(x)){ -ip<-i+1 -temp<-bvec[ip,] -loc[i]<-median(x[,i]) -coef[i+1]<-median(temp[temp!=0]) -numzero[i]<-length(temp[temp==0]) -} -ip<-ncol(x)+1 -coef[1]<-median(y)-sum(coef[2:ip]*loc) -res<-y-x %*% coef[2:ip] - coef[1] -list(coef=coef,residuals=res,numzero=numzero) -} -tsgregs1<-function(isub,x,y){ -# -# This function is used by tsgreg -# -# Perform regression using x[isub,] to predict y[isub] -# isub is a vector of length nsub, determined by tsgreg -# -tsgregs1<-lsfit(x[isub,],y[isub])$coef -} - -lts1reg<-function(x,y,tr=.2,h=NA){ -# -# Compute the least trimmed squares regression estimator. -# Only a single predictor is allowed in this version -# -if(is.na(h))h<-length(x)-floor(tr * length(x)) -ord<-order(x) -xs<-x[ord] -ys<-y[ord] -vec1<-outer(ys,ys,"-") -vec2<-outer(xs,xs,"-") -v1<-vec1[vec2>0] -v2<-vec2[vec2>0] -slope<-v1/v2 -vec3<-outer(ys,ys,"+") -vec4<-outer(xs,xs,"+") -v3<-vec3[vec2>0] -v4<-vec4[vec2>0] -val<-NA -inter<-v3/2-slope*v4/2 -for(i in 1:length(slope)){ -#risk<-(y[vec2>0]-slope[i]*x[vec2>0]-inter[i])^2 -risk<-(y-slope[i]*x-inter[i])^2 -risk<-sort(risk) -val[i]<-sum(risk[1:h]) -} -best<-min(val) -coef<-NA -coef[2]<-mean(slope[val==best]) -coef[1]<-mean(inter[val==best]) -res<-y-coef[2]*x-coef[1] -list(coef=coef,residuals=res) -} - -man2pb<-function(x,y,alpha=.05,nboot=NA,crit=NA,SEED=TRUE){ -# -# Two-sample Behrens-Fisher problem. -# -# For each of two independent groups, -# have P measures for each subject. The goal is to compare the 20% -# trimmed means of the first group to the trimmed means for the second; -# this is done for each of the P measures. -# -# The percentile bootstrap method is used to -# compute a .95, or .975, or .99 confidence interval. -# -# Only 20% trimming is allowed. -# -# x contains the data for the first group; it -# can be an n by J matrix or it can have list mode. -# y contains the data for the second group. -# -# Vectors with missing values are eliminated from the analysis. -# -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(!is.list(y) && !is.matrix(y))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -# put the data in an n by p matrix -matx<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))matx[,j]<-x[[j]] -} -if(is.list(y)){ -# put the data in an n by p matrix -maty<-matrix(0,length(y[[1]]),length(y)) -for (j in 1:length(y))maty[,j]<-y[[j]] -} -if(is.matrix(x)){ -matx<-x -} -if(is.matrix(y)){ -maty<-y -} -if(ncol(matx)!=ncol(maty))stop("The number of variables for group 1 is not equal to the number for group 2") -if(sum(is.na(matx)>=1))matx<-elimna(matx) -if(sum(is.na(maty)>=1))maty<-elimna(maty) -J<-ncol(matx) -connum<-ncol(matx) -if(is.na(nboot)){ -if(ncol(matx)<=4)nboot<-2000 -if(ncol(matx)>4)nboot<-5000 -} -# -# Determine critical value -# -if(ncol(matx)==2){ -if(alpha==.05)crit<-.0125 -if(alpha==.025)crit<-.0060 -if(alpha==.01)crit<-.0015 -} -if(ncol(matx)==3){ -if(alpha==.05)crit<-.007 -if(alpha==.025)crit<-.003 -if(alpha==.01)crit<-.001 -} -if(ncol(matx)==4){ -if(alpha==.05)crit<-.0055 -if(alpha==.025)crit<-.0020 -if(alpha==.01)crit<-.0005 -} -if(ncol(matx)==5){ -if(alpha==.05)crit<-.0044 -if(alpha==.025)crit<-.0016 -if(alpha==.01)crit<-.0005 -} -if(ncol(matx)==6){ -if(alpha==.05)crit<-.0038 -if(alpha==.025)crit<-.0018 -if(alpha==.01)crit<-.0004 -} -if(ncol(matx)==7){ -if(alpha==.05)crit<-.0028 -if(alpha==.025)crit<-.0010 -if(alpha==.01)crit<-.0002 -} -if(ncol(matx)==8){ -if(alpha==.05)crit<-.0026 -if(alpha==.025)crit<-.001 -if(alpha==.01)crit<-.0002 -} -if(ncol(matx)>8){ -# Use an approximation of the critical value -if(alpha==.025)warning("Can't determine a critical value when alpha=.025 and the number of groups exceeds 8.") -nmin<-min(nrow(matx),nrow(maty)) -if(alpha==.05){ -if(nmin<100)wval<-smmcrit(60,ncol(matx)) -if(nmin>=100)wval<-smmcrit(300,ncol(matx)) -wval<-0-wval -crit<-pnorm(wval) -} -if(alpha==.01){ -if(nmin<100)wval<-smmcrit01(60,ncol(matx)) -if(nmin>=100)wval<-smmcrit01(300,ncol(matx)) -wval<-0-wval -crit<-pnorm(wval) -} -} -if(is.na(crit))warning("Critical values can be determined for alpha=.05, .025 and .01 only") -icl<-ceiling(crit*nboot) -icu<-ceiling((1-crit)*nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -bootx<-bootdep(matx,tr=.2,nboot) -booty<-bootdep(maty,tr=.2,nboot) - # - # Now have an nboot by J matrix of bootstrap values. - # -test<-1 -for (j in 1:connum){ -test[j]<-sum(bootx[,j].5)test[j]<-1-test[j] -} -output <- matrix(0, connum, 5) - dimnames(output) <- list(NULL, c("variable #", "psihat", "p.value", - "ci.lower", "ci.upper")) - tmeanx <- apply(matx, 2, mean, trim = .2) - tmeany <- apply(maty, 2, mean, trim = .2) - psi <- 1 - for(ic in 1:connum) { - output[ic, 2] <- tmeanx[ic]-tmeany[ic] - output[ic, 1] <- ic - output[ic, 3] <- test[ic] - temp <- sort(bootx[,ic]-booty[,ic]) -#print(length(temp)) - output[ic, 4] <- temp[icl] - output[ic, 5] <- temp[icu] - } - list(output = output, crit.p.value = crit) -} - - -qhatds1<-function(isubx,x,y){ -# -# function used by qhat when working on bootstrap estimates. -# -xx<-x[isubx] -yy<-y[isubx] -group<-disker(xx,yy,x,op=2)$zhat -group -} -qhatd<-function(x,y,nboot=50){ -# -# Estimate Q, a nonparametric measure of effect size, using -# the .632 method of estimating prediction error. -# (See Efron and Tibshirani, 1993, pp. 252--254) -# -# The default number of bootstrap samples is nboot=50 -# -# This function is for dependent groups. For independent groups, use -# qhati -# -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot) -# data is an nboot by n matrix containing subscripts for bootstrap sample -bid<-apply(data,1,idb,length(x)) -# bid is a n by nboot matrix. If the jth bootstrap sample from -# 1, ..., n contains the value i, bid[i,j]=0; otherwise bid[i,j]=1 -yhat<-apply(data,1,qhatds1,x,y) -bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253 -temp<-(bid*yhat) -diff<-apply(temp,1,sum) -temp<-diff/bi -ep0<-sum(temp[!is.na(temp)])/length(y) -aperror<-disker(x,y)$phat # apparent error -regpre<-.368*aperror+.632*ep0 -list(app.error=aperror,qhat.632=regpre) -} - - -winmean<-function(x,tr=.2,na.rm=TRUE){ -if(na.rm)x=elimna(x) -winmean<-mean(winval(x,tr)) -winmean -} - - -kerden<-function(x,q=.5,xval=0){ -# Compute the kernel density estimator of the -# probability density function evaluated at the qth quantile. -# -# x contains vector of observations -# q is the quantile of interest, the default is the median. -# If you want to evaluate f hat at xval rather than at the -# q th quantile, set q=0 and xval to desired value. -# -y<-sort(x) -n<-length(x) -temp<-idealf(x) -h<-1.2*(temp$qu-temp$ql)/n^(.2) -iq<-floor(q*n+.5) -qhat<-y[iq] -if (q==0) qhat<-xval -xph<-qhat+h -A<-length(y[y<=xph]) -xmh<-qhat-h -B<-length(y[y0 & !is.na(l)])+length(u[u<0 & !is.na(u)]) -qhat<-c(1:length(x))/length(x) -m<-matrix(c(qhat,l,u),length(x),3) -dimnames(m)<-list(NULL,c('qhat','lower','upper')) -if(plotit){ -temp2 <- m[, 2] -temp2 <- temp2[!is.na(temp2)] -xsort<-sort(x) -ysort<-sort(y) -del<-0 -for (i in 1:length(x)){ -ival<-round(length(y)*i/length(x)) -if(ival<=0)ival<-1 -if(ival>length(y))ival<-length(y) -del[i]<-ysort[ival]-xsort[i] -} -xaxis<-c(xsort,xsort,xsort) -yaxis<-c(del,m[,2],m[,3]) -plot(xaxis,yaxis,type='n',ylab='delta',xlab='x (first group)') -lines(xsort,del) -lines(xsort,m[,2],lty=2) -lines(xsort,m[,3],lty=2) -temp <- summary(x) - text(temp[3], min(temp2), '+') - text(temp[2], min(temp2), 'o') - text(temp[5], min(temp2), 'o') -} -list(m=m,crit=crit,numsig=num,prob.coverage=1-kswsig(n1,n2,crit)) -} - - -runcor<-function(x,y,z,fr=1,corflag=FALSE,corfun=pbcor,plotit=TRUE,rhat=FALSE){ -# -# Estimate how the correlation between x and y varies with z -# -# running correlation using interval method -# -# fr controls amount of smoothing -# -# corfun is the correlation to be used. It is assumed that -# corfun is an R function that returns a correlation coefficient -# in corfun$cor -# -# To use Pearsons correlation, set corflag=T -# -temp<-cbind(x,y,z) # Eliminate any rows with missing values -temp<-elimna(temp) -x<-temp[,1] -y<-temp[,2] -z<-temp[,3] -plotit<-as.logical(plotit) -rmd<-NA -if(!corflag){ -for(i in 1:length(x)){ -flag<-near(z,z[i],fr) -if(sum(flag)>2)rmd[i]<-corfun(x[flag],y[flag])$cor -}} -if(corflag){ -for(i in 1:length(x)){ -flag<-near(z,z[i],fr) -if(sum(flag)>2)rmd[i]<-cor(x[flag],y[flag]) -}} -if(plotit){ -plot(c(max(z),min(z),z),c(1,-1,rmd),xlab="Modifier",ylab="Correlation",type="n") -sz<-sort(z) -zorder<-order(z) -sysm<-rmd[zorder] -lines(sz,sysm) -} -if(!rhat)rmd<-"Done" -rmd -} - - -pcorb<-function(x,y,SEED=TRUE){ -# Compute a .95 confidence interval for Pearson's correlation coefficient. -# -# This function uses an adjusted percentile bootstrap method that -# gives good results when the error term is heteroscedastic. -# -nboot<-599 #Number of bootstrap samples -xy<-elimna(cbind(x,y)) -x<-xy[,1] -y<-xy[,2] -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples; please wait") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,pcorbsub,x,y) # A 1 by nboot matrix. -ilow<-15 -ihi<-584 -if(length(y) < 250){ -ilow<-14 -ihi<-585 -} -if(length(y) < 180){ -ilow<-11 -ihi<-588 -} -if(length(y) < 80){ -ilow<-8 -ihi<-592 -} -if(length(y) < 40){ -ilow<-7 -ihi<-593 -} -bsort<-sort(bvec) -r<-cor(x,y) -ci<-c(bsort[ilow],bsort[ihi]) -list(r=r,ci=ci) -} - - -twobici<-function(r1=sum(elimna(x)),n1=length(elimna(x)),r2=sum(elimna(y)),n2=length(elimna(y)), -x=NA,y=NA,alpha=.05){ -# -# Compute confidence interval for p1-p2, -# the difference between probabilities of -# success for a two binomials using Beal's method. -# -# r is number of successes -# n is sample size -# if x contains data, r1 is taken to be the -# number of 1s in x and n1 is length(x) -# -if(length(r1)>1)stop("r1 must be a single number, not a vector") -if(length(n1)>1)stop("n1 must be a single number, not a vector") -if(length(r2)>1)stop("r2 must be a single number, not a vector") -if(!is.na(sum(r1)) || !is.na(sum(n1)) || !is.na(sum(r2)) || !is.na(sum(n2))){ -if(r1<0 || n1<0)stop("Both r1 and n1 must be greater than 0") -if(r1 > n1)stop("r1 can't be greater than n1") -if(r2<0 || n2<0)stop("Both r2 and n2 must be greater than 0") -if(r2 > n2)stop("r2 can't be greater than n2") -} -if(!is.na(sum(x))){ -r1<-sum(x) -n1<-length(x) -} -if(!is.na(sum(y))){ -r2<-sum(y) -n2<-length(y) -} -a<-(r1/n1)+(r2/n2) -b<-(r1/n1)-(r2/n2) -u<-.25*((1/n1)+(1/n2)) -v<-.25*((1/n1)-(1/n2)) -V<-u*((2-a)*a-b^2)+2*v*(1-a)*b -crit<-qchisq(1-alpha/2,1) -A<-sqrt(crit*(V+crit*u^2*(2-a)*a+crit*v^2*(1-a)^2)) -B<-(b+crit*v*(1-a))/(1+crit*u) -ci<-NA -ci[1]<-B-A/(1+crit*u) -ci[2]<-B+A/(1+crit*u) -p1<-r1/n1 -p2<-r2/n2 -list(ci=ci,p1=p1,p2=p2) -} - -runmean<-function(x,y,fr=1,tr=.2,pyhat=FALSE,eout=FALSE,outfun=out,plotit=TRUE,xout=FALSE, -xlab="x",ylab="y"){ -# -# running mean using interval method -# -# fr controls amount of smoothing -# tr is the amount of trimming -# -# Missing values are automatically removed. -# -if(eout && xout)xout<-FALSE -temp<-cbind(x,y) -temp<-elimna(temp) # Eliminate any rows with missing values -if(eout){ -flag<-outfun(temp,plotit=FALSE)$keep -temp<-temp[flag,] -} -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -temp<-temp[flag,] -} -x<-temp[,1] -y<-temp[,2] -pyhat<-as.logical(pyhat) -rmd<-c(1:length(x)) -for(i in 1:length(x))rmd[i]<-mean(y[near(x,x[i],fr)],tr) -if(pyhat)return(rmd) -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab) -sx<-sort(x) -xorder<-order(x) -sysm<-rmd[xorder] -tempx<-(!duplicated(sx)) -lines(sx[tempx], sysm[tempx]) -}} - -pcorbsub<-function(isub, x, y) -{ - # - # Compute Pearson's correlation using x[isub] and y[isub] - # isub is a vector of length n, - # a bootstrap sample from the sequence of integers - # 1, 2, 3, ..., n - # - pcorbsub<-cor(x[isub],y[isub]) - pcorbsub -} - -pow1<-function(n,Del,alpha){ -# -# Determine power of Student's T in the -# one-sided, one-sample case where -# -# n=sample size -# Del=(mu0-mu1)/sigma -# alpha=Type I error probability -# mu0 is hypothesized value -# mu1 is some non-null value for the mean. -# -Del<-abs(Del) -if(alpha<=0 || alpha>=1)stop("alpha must be between 0 and 1") -K11<-1-alpha -K5<-sqrt(n)*Del -# Next, use the Kraemer-Paik (1979, Technometrics, 21, 357-360) -# approximation of the noncentral T. -K6<-n-1 -K14<-qt(K11,K6) -K7<-K14*sqrt(1+K5*K5/K6) -K8<-K5*sqrt(1+K14*K14/K6) -K9<-K7-K8 -pow1<-1-pt(K9,K6) -pow1 -} - -stein1<-function(x,del,alpha=.05,pow=.8,oneside=FALSE,n=NULL,VAR=NULL){ -# -# Performs Stein's method on the data in x. -# In the event additional observations are required -# and can be obtained, use the R function stein2. -# -del<-abs(del) -if(is.null(n))n<-length(x) -if(is.null(VAR))VAR=var(x) -df<-n-1 -if(!oneside)alpha<-alpha/2 -d<-(del/(qt(pow,df)-qt(alpha,df)))^2 -N<-max(c(n,floor(VAR/d)+1)) -N -} - -stein2<-function(x1,x2,mu0=0,alpha=.05){ -# -# Do second stage of Stein's method -# x1 contains first stage data -# x2 contains first stage data -# mu0 is the hypothesized value -# -n<-length(x1) -df<-n-1 -N<-n+length(x2) -test<-sqrt(N)*(mean(c(x1,x2))-mu0)/sqrt(var(x1)) -crit <- qt(1 - alpha/2, df) -low<- mean(c(x1,x2))-crit*sqrt(var(x1)) -up<- mean(c(x1,x2))+crit*sqrt(var(x1)) -sig<-2*(1-pt(test,df)) -list(ci = c(low, up), siglevel =sig,mean=mean(c(x1,x2)), -teststat = test, crit = crit, df = df) -} - - -ci2bin<-function(r1=sum(x),n1=length(x),r2=sum(y),n2=length(y),x=NA,y=NA,alpha=0.05){ -# -# Compute a confidence interval for the -# difference between probability of success -# for two independent binomials -# -# r1=number of successes in group 1 -# n1=number of observations in group 1 -# -cr<-qchisq(1-alpha,1) -p1<-r1/n1 -p2<-r2/n2 -a<-p1+p2 -b<-p1-p2 -u<-.25*(1/n1+1/n2) -v<-.25*(1/n1-1/n2) -V<-u*((2-a)*a-b^2)+2*v*(1-a)*b -A<-sqrt(cr*(V+cr*u^2*(2-a)*a+cr*v^2*(1-a)^2)) -B<-(b+cr*v*(1-a))/(1+cr*u) -ci<-NA -ci[1]<-B-A/(1+cr*u) -ci[2]<-B+A/(1+cr*u) -list(ci=ci) -} -powt1est<-function(x,delta=0,ci=FALSE,nboot=800){ -# -# Estimate power for a given value of delta -# -# Only 20% trimming is allowed. -# -temp1<-powest(x,rep(0,5),delta,se=trimse(x)) -if(ci){ -set.seed(2) -pboot<-NA -datay<-rep(0,5) -print("Taking bootstrap samples. Please wait.") -datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE - ), nrow = nboot) -for(i in 1:nboot) { -se <- trimse(datax[i, ]) -pboot[i] <- powest(x, rep(0,5), delta, se) -} -temp <- sort(pboot) -} -ll<-floor(0.05 * nboot + 0.5) -list(est.power=temp1,ci=temp[ll]) -} - -powt1an<-function(x,ci=FALSE,plotit=TRUE,nboot=800){ -# -# Do a power analysis for the one-sample case with 20% trimmed -# mean and when the percentile bootstrap is to be used to test -# hypoltheses. -# -x<-x[!is.na(x)] -lp<-NA -se<-trimse(x) -gval<-NA -dv<-seq(0,3.5*se,length=15) -for(i in 1:length(dv)){ -gval[i]<-powest(x,rep(0,5),dv[i],se) -} -if(!ci){ -if(plotit){ -plot(dv,gval,type="n",xlab="delta",ylab="power") -lines(dv,gval) -}} -if(ci){ -set.seed(2) -print("Taking bootstrap samples. Please wait.") -datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE), - nrow = nboot) -pboot<-matrix(NA,nrow=nboot,ncol=length(dv)) -for(i in 1:nboot){ -se<-trimse(datax[i,]) -for(j in 1:length(dv)){ -pboot[i,j]<-powest(x,rep(0,5),dv[j],se) -}} -ll<-floor(.05*nboot+.5) -for(i in 1:15){ -temp<-sort(pboot[,i]) -lp[i]<-temp[ll] -} -plot(c(dv,dv),c(gval,lp),type="n",xlab="delta",ylab="power") -lines(dv,gval) -lines(dv,lp,lty=2) -} -list(delta=dv,power=gval,lowp=lp) -} - -trimpb2<-function(x,y,tr=.2,alpha=.05,nboot=2000,WIN=FALSE,win=.1,plotit=FALSE,op=4, -SEED=TRUE){ -# -# Compute a 1-alpha confidence interval for -# the difference between two 20% trimmed means. -# Independent groups are assumed. -# -# The default number of bootstrap samples is nboot=2000 -# -# tr is the amount of trimming -# -# win is the amount of Winsorizing before bootstrapping -# when WIN=T. -# -# Missing values are automatically removed. -# -x<-x[!is.na(x)] -y<-y[!is.na(y)] -if(WIN){ -if(win>tr)stop("Cannot Winsorize more than you trim") -if(tr < .2){print("When Winsorizing, the amount of trimming") -print("should be at least .2") -} -if(min(c(length(x),length(y))) < 15){ -print ("Warning: Winsorizing with sample sizes less than 15") -print("can result in poor control over the probability of a Type I error") -} -x<-winval(x,win) -y<-winval(y,win) -} -xx<-list() -xx[[1]]<-x -xx[[2]]<-y -e1=mean(xx[[1]],tr=tr) -e2=mean(xx[[2]],tr=tr) -#est.dif<-tmean(xx[[1]],tr=tr)-tmean(xx[[2]],tr=tr) -est.dif=e1-e2 -crit<-alpha/2 -temp<-round(crit*nboot) -icl<-temp+1 -icu<-nboot-temp -bvec<-matrix(NA,nrow=2,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(j in 1:2){ -data<-matrix(sample(xx[[j]],size=length(xx[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group -} -top<-bvec[1,]-bvec[2,] -test<-sum(top<0)/nboot+.5*sum(top==0)/nboot -if(test > .5)test<-1-test -top<-sort(top) -ci<-NA -ci[1]<-top[icl] -ci[2]<-top[icu] -if(plotit)g2plot(bvec[1,],bvec[2,],op=op) -list(Est1=e1,Est2=e2,p.value=2*test,ci=ci,est.dif=est.dif) -} - -twolsreg<-function(x1,y1,x2,y2){ -# -# Compute a .95 confidence interval for -# the difference between two regression slopes, -# estimated via least squares and -# corresponding to two independent groups. -# -# This function uses an adjusted percentile bootstrap method that -# gives good results when the error term is heteroscedastic. -# -# WARNING: If the number of boostrap samples is altered, it is -# unknown how to adjust the confidence interval when n1+n2 < 250. -# -nboot<-599 #Number of bootstrap samples -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples; please wait") -xy=elimna(cbind(x1,y1)) -if(ncol(xy)>2)stop("This function only allows one covariate") -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -bvec1<-apply(data1,1,twolsregsub,x1,y1) # A 1 by nboot matrix. -data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) -bvec2<-apply(data2,1,twolsregsub,x2,y2) # A 1 by nboot matrix. -bvec<-bvec1-bvec2 -ilow<-15 -ihi<-584 -if(length(y1)+length(y2) < 250){ -ilow<-14 -ihi<-585 -} -if(length(y1)+length(y2) < 180){ -ilow<-11 -ihi<-588 -} -if(length(y1)+length(y2) < 80){ -ilow<-8 -ihi<-592 -} -if(length(y1)+length(y2) < 40){ -ilow<-7 -ihi<-593 -} -bsort<-sort(bvec) -b1<-lsfit(x1,y1)$coef[2] -b2<-lsfit(x2,y2)$coef[2] -ci<-c(bsort[ilow],bsort[ihi]) -list(b1=b1,b2=b2,ci=ci) -} - -twolsregsub<-function(isub, x, y) -{ - # - # Compute least squares estimate of the - # slope using x[isub] and y[isub] - # isub is a vector of length n, - # a bootstrap sample from the sequence of integers - # 1, 2, 3, ..., n - # - twolsregsub<-lsfit(x[isub],y[isub])$coef[2] - twolsregsub -} -bdanova1<-function(x,alpha=.05,power=.9,delta=NA){ -# -# Do the first stage of a Bishop-Dudewicz ANOVA method. -# That is, based on the data in x -# determine N_j, the number of observations needed -# in the jth group to achieve power 1-beta. -# -# The argument x is assumed to have list mode or the -# data is assumed to be stored in an n by J matrix -# -if(is.na(delta))stop("A value for delta was not specified") -if(!is.list(x)){ -if(!is.matrix(x))stop("Data must be stored in matrix or in list mode") -} -y<-x -if(is.list(y))y=matl(y) -x<-list() -for(j in 1:ncol(y))x[[j]]<-elimna(y[,j]) -nvec<-NA -svec<-NA -J<-length(x) -for(j in 1:length(x)){ -nvec[j]<-length(x[[j]]) -svec[j]<-var(x[[j]]) -} -nu<-nvec-1 -nu1<-sum(1/(nu-2)) -nu1<-J/nu1+2 -A<-(J-1)*nu1/(nu1-2) -B<-(nu1^2/J)*(J-1)/(nu1-2) -C<-3*(J-1)/(nu1-4) -D<-(J^2-2*J+3)/(nu1-2) -E<-B*(C+D) -M<-(4*E-2*A^2)/(E-A^2-2*A) -L<-A*(M-2)/M -f<-qf(1-alpha,L,M) -crit<-L*f -b<-(nu1-2)*crit/nu1 -zz<-qnorm(power) -A<-.5*(sqrt(2)*zz+sqrt(2*zz^2+4*(2*b-J+2))) -B<-A^2-b -d<-((nu1-2)/nu1)*delta/B -N<-NA -for(j in 1:length(x)){ -N[j]<-max(c(nvec[j]+1,floor(svec[j]/d)+1)) -} -list(N=N,d=d,crit=crit) -} - - -comvar2<-function(x,y,nboot=1000,SEED=TRUE){ -# -# Compare the variances of two independent groups. -# -x<-x[!is.na(x)] # Remove missing values in x -y<-y[!is.na(y)] # Remove missing values in y -# set seed of random number generator so that -# results can be duplicated. -est1=var(x) -est2=var(y) -sig<-est1-est2 -if(SEED)set.seed(2) -nmin<-min(length(x),length(y)) -datax<-matrix(sample(x,size=nmin*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=nmin*nboot,replace=TRUE),nrow=nboot) -v1<-apply(datax,1,FUN=var) -v2<-apply(datay,1,FUN=var) -boot<-v1-v2 -boot<-sort(boot) - ilow <- 15 - ihi <- 584 - if(nmin < 250) { - ilow <- 13 - ihi <- 586 - } - if(nmin < 180) { - ilow <- 10 - ihi <- 589 - } - if(nmin < 80) { - ilow <- 7 - ihi <- 592 - } - if(nmin < 40) { - ilow <- 6 - ihi <- 593 - } -ilow<-round((ilow/599)*nboot) -ihi<-round((ihi/599)*nboot) -ci<-c(boot[ilow+1],boot[ihi]) -list(n=c(length(x),length(y)),ci=ci,est.1=est1,est.2=est2,vardif=sig,ratio=est1/est2) -} - - -regi<-function(x,y,z,pt=median(z),fr=.8,est=onestep,regfun=tsreg,testit=FALSE,...){ -# -# split the data according to whether z is < or > pt, then -# use runmean2g to plot a smooth of the regression -# lines corresponding to these two groups. -# -m<-cbind(x,y,z) -m<-elimna(m) -x<-m[,1] -y<-m[,2] -z<-m[,3] -flag<-(z=nmin]) -isub[5]<-max(sub[vecn>=nmin]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,3) -dimnames(mat)<-list(NULL,c("X","n1","n2")) -for (i in 1:5){ -j<-i+5 -temp1<-y1[near(x1,x1[isub[i]],fr1)] -temp2<-y2[near(x2,x1[isub[i]],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(temp1) -mat[i,3]<-length(temp2) -gv1[[i]]<-temp1 -gv1[[j]]<-temp2 -} -I1<-diag(npt) -I2<-0-I1 -con<-rbind(I1,I2) -if(flag.est)test<-pbmcp(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...) -if(!flag.est)test<-linconpb(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...) -} -# -if(!is.na(pts[1])){ -npt<-length(pts) -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),3) -dimnames(mat)<-list(NULL,c("X","n1","n2")) -gv<-vector("list",2*length(pts)) -for (i in 1:length(pts)){ -j<-i+npt -temp1<-y1[near(x1,pts[i],fr1)] -temp2<-y2[near(x2,pts[i],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -mat[i,1]<-pts[i] -if(length(temp1)<=5)paste("Warning, there are",length(temp1)," points corresponding to the design point X=",pts[i]) -if(length(temp2)<=5)paste("Warning, there are",length(temp2)," points corresponding to the design point X=",pts[i]) -mat[i,2]<-length(temp1) -mat[i,3]<-length(temp2) -gv1[[i]]<-temp1 -gv1[[j]]<-temp2 -} -I1<-diag(npt) -I2<-0-I1 -con<-rbind(I1,I2) -if(flag.est)test<-pbmcp(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...) -if(!flag.est)test<-linconpb(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...) -} -if(plotit){ -runmean2g(x1,y1,x2,y2,fr=fr1,est=est,LP=LP,xlab=xlab,ylab=ylab,pch1=pch1,pch2=pch2,...) -} -list(mat=mat,output=test$output,con=test$con,num.sig=test$num.sig) -} - -ancboot<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,nboot=599,pts=NA,plotit=TRUE,xout=FALSE,outfun=outpro,...){ -# -# Compare two independent groups using the ancova method -# in chapter 12 of Wilcox, 2017, Intro to Robust Estimation and Hypothesis Testing. -# No assumption is made about the form of the regression -# lines--a running interval smoother is used. -# Confidence intervals are computed using a bootstrap-t bootstrap -# method. Comparisons are made at five empirically chosen design points. -# -# Assume data are in x1 y1 x2 and y2 -# -if(is.na(pts[1])){ -isub<-c(1:5) # Initialize isub -test<-c(1:5) -m1=elimna(cbind(x1,y1)) -x1=m1[,1] -y1=m1[,2] -m1=elimna(cbind(x2,y2)) -x2=m1[,1] -y2=m1[,2] -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,8) -dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","ci.low","ci.hi", -"p.value")) -gv1<-vector("list") -for (i in 1:5){ -j<-i+5 -temp1<-y1[near(x1,x1[isub[i]],fr1)] -temp2<-y2[near(x2,x1[isub[i]],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -mat[i,2]<-length(temp1) -mat[i,3]<-length(temp2) -gv1[[i]]<-temp1 -gv1[[j]]<-temp2 -} -I1<-diag(5) -I2<-0-I1 -con<-rbind(I1,I2) -test<-linconb(gv1,con=con,tr=tr,nboot=nboot) -for(i in 1:5){ -mat[i,1]<-x1[isub[i]] -} -mat[,4]<-test$psihat[,2] -mat[,5]<-test$test[,2] -mat[,6]<-test$psihat[,3] -mat[,7]<-test$psihat[,4] -mat[,8]<-test$test[,4] -} -if(!is.na(pts[1])){ -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -if(n1[i]<=5)paste("Warning, there are",n1[i]," points corresponding to the design point X=",pts[i]) -if(n2[i]<=5)paste("Warning, there are",n2[i]," points corresponding to the design point X=",pts[i]) -} -mat<-matrix(NA,length(pts),9) -dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi", -"p.value")) -gv<-vector("list",2*length(pts)) -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -j<-i+length(pts) -gv[[i]]<-g1 -gv[[j]]<-g2 -} -I1<-diag(length(pts)) -I2<-0-I1 -con<-rbind(I1,I2) -test<-linconb(gv,con=con,tr=tr,nboot=nboot) -mat[,1]<-pts -mat[,2]<-n1 -mat[,3]<-n2 -mat[,4]<-test$psihat[,2] -mat[,5]<-test$test[,2] -mat[,6]<-test$test[,3] -mat[,7]<-test$psihat[,3] -mat[,8]<-test$psihat[,4] -mat[,9]<-test$test[,4] -} -if(plotit){ -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -runmean2g(x1,y1,x2,y2,fr=fr1,est=mean,tr=tr) -} -list(output=mat,crit=test$crit) -} - -spear<-function(x,y=NULL){ -# Compute Spearman's rho -# -if(!is.null(y[1])){ -m=elimna(cbind(x,y)) -n=nrow(m) -x=m[,1] -y=m[,2] -corv<-cor(rank(x),rank(y)) -} -if(is.null(y[1])){ -x=elimna(x) -n=nrow(x) -m<-apply(x,2,rank) -corv<-cor(m) -} -test <-corv * sqrt((n - 2)/(1. - corv^2)) -sig <- 2 * (1 - pt(abs(test), length(x) - 2)) -if(is.null(y[1]))sig<-matrix(sig,ncol=sqrt(length(sig))) -list(cor=corv,p.value = sig) -} - - -linchk<-function(x,y,sp,pv=1,regfun=tsreg,plotit=TRUE,nboot=599,alpha=.05,pr=TRUE,xout=FALSE){ -# -# Split the data into two groups according to whether -# predictor variable pv has a value less than sp. -# Then test the hypothesis that slope coefficients, -# based on the regression method regfun, are equal. -# -x<-as.matrix(x) -if(pr)print(paste("Splitting data using predictor", pv)) -xx<-x[,pv] -flag<-(xx<=sp) -temp<-reg2ci(x[flag,],y[flag],x[!flag,],y[!flag],regfun=regfun,plotit=plotit,nboot=nboot,alpha=alpha,xout=xout) -temp -} - -trimci<-function(x,tr=.2,alpha=.05,null.value=0,pr=TRUE,nullval=NULL){ -# -# Compute a 1-alpha confidence interval for the trimmed mean -# -# The default amount of trimming is tr=.2 -# -if(pr){ -print("The p-value returned by this function is based on the") -print("null value specified by the argument null.value, which defaults to 0") -print('To get a measure of effect size using a Winsorized measure of scale, use trimciv2') -} -if(!is.null(nullval))null.value=nullval -x<-elimna(x) -se<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x))) -trimci<-vector(mode="numeric",length=2) -df<-length(x)-2*floor(tr*length(x))-1 -trimci[1]<-mean(x,tr)-qt(1-alpha/2,df)*se -trimci[2]<-mean(x,tr)+qt(1-alpha/2,df)*se -test<-(mean(x,tr)-null.value)/se -sig<-2*(1-pt(abs(test),df)) -list(estimate=mean(x,tr),ci=trimci,test.stat=test,se=se,p.value=sig,n=length(x)) -} - -trimciv2<-function(x,tr=.2,alpha=.05,null.value=0,pr=TRUE){ -# -# Compute a 1-alpha confidence interval for the trimmed mean -# Same as trimci, only a standardized measure of effect size is reported: -# the difference between the trimmed mean and hypothesized value divided by -# the Winsorized standard deviation, rescaled to estimate the standard deviation -# when sampling from a normal distribution. -# -# The default amount of trimming is tr=.2 -# -library(MASS) -x<-elimna(x) -se<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x))) -trimci<-vector(mode="numeric",length=2) -df<-length(x)-2*floor(tr*length(x))-1 -trimci[1]<-mean(x,tr)-qt(1-alpha/2,df)*se -trimci[2]<-mean(x,tr)+qt(1-alpha/2,df)*se -test<-(mean(x,tr)-null.value)/se -sig<-2*(1-pt(abs(test),df)) -if(tr==0)term=1 -if(tr>0)term=sqrt(area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr) -epow=(mean(x,tr)-null.value)*term/sqrt(winvar(x,tr=tr,na.rm=TRUE)) -list(ci=trimci,estimate=mean(x,tr),test.stat=test,se=se,p.value=sig,n=length(x),Effect.Size=epow) -} - -trimciQS<-function(x,tr=.2,alpha=.05,null.value=0,pr=TRUE,nullval=NULL){ -# -# Compute a 1-alpha confidence interval for the trimmed mean -# Same as trimci plus quantile shift measure of effect size. -# -# The default amount of trimming is tr=.2 -# -if(pr){ -print("The p-value returned by this function is based on the") -print("null value specified by the argument null.value, which defaults to 0") -print('To get a measure of effect size using a Winsorized measure of scale, use trimciv2') -} -if(!is.null(nullval))null.value=nullval -x<-elimna(x) -se<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x))) -trimci<-vector(mode="numeric",length=2) -df<-length(x)-2*floor(tr*length(x))-1 -trimci[1]<-mean(x,tr)-qt(1-alpha/2,df)*se -trimci[2]<-mean(x,tr)+qt(1-alpha/2,df)*se -test<-(mean(x,tr)-null.value)/se -sig<-2*(1-pt(abs(test),df)) -QS=depQS(x,locfun=tmean,tr=tr)$Q.effect -list(ci=trimci,estimate=mean(x,tr),test.stat=test,se=se,p.value=sig,n=length(x),Q.effect=QS) -} - - -msmed<-function(x,y=NA,con=0,alpha=.05){ -# -# Test a set of linear contrasts using Medians -# -# The data are assumed to be stored in $x$ in a matrix or in list mode. -# Length(x) is assumed to correspond to the total number of groups, J -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# Missing values are automatically removed. -# -if(!is.na(y[1])){ -xx<-list() -xx[[1]]<-x -xx[[2]]<-y -if(is.matrix(x) || is.list(x))stop("When y is speficied, x should not have list mode or be a matrix") -x<-xx -} -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-length(x) -h<-vector("numeric",J) -w<-vector("numeric",J) -xbar<-vector("numeric",J) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -if(sum(duplicated(val)>0)){ -print(paste("Warning: Group",j, "has tied values. Might want to used medpb")) -} -x[[j]]<-val[xx] # Remove missing values -xbar[j]<-median(x[[j]]) -w[j]<-msmedse(x[[j]])^2 # Squared standard error. -} -if(sum(con^2!=0))CC<-ncol(con) -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -psihat<-matrix(0,CC,5) -dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) -test<-matrix(NA,CC,7) -dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","p.value",'p.adjusted')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) -test[jcom,6]<-2*(1-pt(test[jcom,3],999)) -test[jcom,7]=1-psmm(abs(test[jcom,3]),CC,500) -sejk<-sqrt(w[j]+w[k]) -test[jcom,5]<-sejk -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -crit<-NA -if(CC==1)crit<-qnorm(1-alpha/2) -if(CC>1){ -crit=qsmm(1-alpha,CC,500) -} -test[jcom,4]<-crit -psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5] -psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5] -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.") -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),6) -dimnames(test)<-list(NULL,c("con.num","test","crit","se","p.value",'p.adjusted')) -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -test[d,5]<-2*(1-pt(abs(test[d,2]),999)) -test[d,6]=1-psmm(abs(test[d,2]),ncol(con),500) -crit<-NA -if(CC==1)crit<-qnorm(1-alpha/2) -if(CC>1)crit=qsmm(1-alpha,CC,500) -test[d,3]<-crit -test[d,4]<-sejk -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -}} -list(test=test,psihat=psihat) -} -selby<-function(m,grpc,coln){ -# -# -# A commmon situation is to have data stored in an n by p matrix where -# one or more of the columns are group identification numbers. -# This function groups all values in column coln according to the -# group numbers in column grpc and stores the results in list mode. -# -# More than one column of data can sorted -# -# grpc indicates the column of the matrix containing group id number -# -if(is.null(dim(m)))stop("Data must be stored in a matrix or data frame") -if(is.na(grpc[1]))stop("The argument grpc is not specified") -if(is.na(coln[1]))stop("The argument coln is not specified") -if(length(grpc)!=1)stop("The argument grpc must have length 1") -x<-vector("list") -grpn<-sort(unique(m[,grpc])) -it<-0 -for (ig in 1:length(grpn)){ -for (ic in 1:length(coln)){ -it<-it+1 -flag<-(m[,grpc]==grpn[ig]) -x[[it]]<-m[flag,coln[ic]] -}} -list(x=x,grpn=grpn) -} - - -med2way<-function(J,K,x,grp=c(1:p),p=J*K, ADJ.P.VALUE=TRUE, iter=5000,SEED=TRUE){ -# -# Perform a J by K (two-way) anova on medians where -# all jk groups are independent. -# -# The argument x is assumed to contain the raw -# data stored in list mode. -# If grp is unspecified, it is assumed x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second factor: level 1,2 -# x[[j+1]] is the data for level 2,1, etc. -# If the data are in wrong order, grp can be used to rearrange the -# groups. For example, for a two by two design, grp<-c(2,4,3,1) -# indicates that the second group corresponds to level 1,1; -# group 4 corresponds to level 1,2; group 3 is level 2,1; -# and group 1 is level 2,2. -# -# It is assumed that the input variable x has length JK, the total number of -# groups being tested. If not, a warning message is printed. -# -if(L.ties(x))print("There are tied values, suggest using the function m2way instead") -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data are not stored in a matrix or in list mode") -if(p!=length(x)){ -print("Warning: The number of groups in your data is not equal to JK") -} -xbar<-0 -h<-0 -d<-0 -R<-0 -W<-0 -d<-0 -r<-0 -w<-0 -nuhat<-0 -omegahat<-0 -DROW<-0 -DCOL<-0 -xtil<-matrix(0,J,K) -aval<-matrix(0,J,K) -for (j in 1:p){ -xbar[j]<-median(x[[grp[j]]]) -h[j]<-length(x[[grp[j]]]) -d[j]<-msmedse(x[[grp[j]]])^2 -} -d<-matrix(d,J,K,byrow=TRUE) -xbar<-matrix(xbar,J,K,byrow=TRUE) -h<-matrix(h,J,K,byrow=TRUE) -for(j in 1:J){ -R[j]<-sum(xbar[j,]) -nuhat[j]<-(sum(d[j,]))^2/sum(d[j,]^2/(h[j,]-1)) -r[j]<-1/sum(d[j,]) -DROW[j]<-sum(1/d[j,]) -} -for(k in 1:K){ -W[k]<-sum(xbar[,k]) -omegahat[k]<-(sum(d[,k]))^2/sum(d[,k]^2/(h[,k]-1)) -w[k]<-1/sum(d[,k]) -DCOL[k]<-sum(1/d[,k]) -} -D<-1/d -for(j in 1:J){ -for(k in 1:K){ -xtil[j,k]<-sum(D[,k]*xbar[,k]/DCOL[k])+sum(D[j,]*xbar[j,]/DROW[j])- -sum(D*xbar/sum(D)) -aval[j,k]<-(1-D[j,k]*(1/sum(D[j,])+1/sum(D[,k])-1/sum(D)))^2/(h[j,k]-3) -} -} -Rhat<-sum(r*R)/sum(r) -What<-sum(w*W)/sum(w) -Ba<-sum((1-r/sum(r))^2/nuhat) -Bb<-sum((1-w/sum(w))^2/omegahat) -Va<-sum(r*(R-Rhat)^2)/((J-1)*(1+2*(J-2)*Ba/(J^2-1))) -Vb<-sum(w*(W-What)^2)/((K-1)*(1+2*(K-2)*Bb/(K^2-1))) -sig.A<-1-pf(Va,J-1,9999999) -sig.B<-1-pf(Vb,K-1,9999999) -# Next, do test for interactions -Vab<-sum(D*(xbar-xtil)^2) -dfinter<-(J-1)*(K-1) -sig.AB<-1-pchisq(Vab,dfinter) -if(ADJ.P.VALUE){ -a=med2way.crit(J,K,h,iter=iter,SEED=SEED) -sig.A=mean(Va<=a$A.dist) -sig.B=mean(Vb<=a$B.dist) -sig.AB=mean(Vab<=a$AB.dist) -} -list(test.A=Va,p.val.A=sig.A,test.B=Vb,p.val.B=sig.B,test.AB=Vab,p.val.AB=sig.AB) -} - - -med2way.sub<-function(J,K,x,grp=c(1:p),p=J*K){ -# -# Perform a J by K (two-way) anova on medians where -# all jk groups are independent. -# -# The argument x is assumed to contain the raw -# data stored in list mode. -# If grp is unspecified, it is assumed x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second factor: level 1,2 -# x[[j+1]] is the data for level 2,1, etc. -# If the data are in wrong order, grp can be used to rearrange the -# groups. For example, for a two by two design, grp<-c(2,4,3,1) -# indicates that the second group corresponds to level 1,1; -# group 4 corresponds to level 1,2; group 3 is level 2,1; -# and group 1 is level 2,2. -# -# It is assumed that the input variable x has length JK, the total number of -# groups being tested. If not, a warning message is printed. -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data are not stored in a matrix or in list mode') -if(p!=length(x)){ -print('Warning: The number of groups in your data is not equal to JK') -} -xbar<-0 -h<-0 -d<-0 -R<-0 -W<-0 -d<-0 -r<-0 -w<-0 -nuhat<-0 -omegahat<-0 -DROW<-0 -DCOL<-0 -xtil<-matrix(0,J,K) -aval<-matrix(0,J,K) -for (j in 1:p){ -xbar[j]<-median(x[[grp[j]]]) -h[j]<-length(x[[grp[j]]]) -d[j]<-msmedse(x[[grp[j]]])^2 -} -d<-matrix(d,J,K,byrow=TRUE) -xbar<-matrix(xbar,J,K,byrow=TRUE) -h<-matrix(h,J,K,byrow=TRUE) -for(j in 1:J){ -R[j]<-sum(xbar[j,]) -nuhat[j]<-(sum(d[j,]))^2/sum(d[j,]^2/(h[j,]-1)) -r[j]<-1/sum(d[j,]) -DROW[j]<-sum(1/d[j,]) -} -for(k in 1:K){ -W[k]<-sum(xbar[,k]) -omegahat[k]<-(sum(d[,k]))^2/sum(d[,k]^2/(h[,k]-1)) -w[k]<-1/sum(d[,k]) -DCOL[k]<-sum(1/d[,k]) -} -D<-1/d -for(j in 1:J){ -for(k in 1:K){ -xtil[j,k]<-sum(D[,k]*xbar[,k]/DCOL[k])+sum(D[j,]*xbar[j,]/DROW[j])- -sum(D*xbar/sum(D)) -aval[j,k]<-(1-D[j,k]*(1/sum(D[j,])+1/sum(D[,k])-1/sum(D)))^2/(h[j,k]-3) -} -} -Rhat<-sum(r*R)/sum(r) -What<-sum(w*W)/sum(w) -Ba<-sum((1-r/sum(r))^2/nuhat) -Bb<-sum((1-w/sum(w))^2/omegahat) -Va<-sum(r*(R-Rhat)^2)/((J-1)*(1+2*(J-2)*Ba/(J^2-1))) -Vb<-sum(w*(W-What)^2)/((K-1)*(1+2*(K-2)*Bb/(K^2-1))) -sig.A<-1-pf(Va,J-1,9999999) -sig.B<-1-pf(Vb,K-1,9999999) -# Next, do test for interactions -Vab<-sum(D*(xbar-xtil)^2) -dfinter<-(J-1)*(K-1) -sig.AB<-1-pchisq(Vab,dfinter) -list(test.A=Va,p.val.A=sig.A,test.B=Vb,p.val.B=sig.B,test.AB=Vab,p.val.AB=sig.AB) -} - -L.ties<-function(x){ -# -# x is assumed to have list mode -# -# Goal: determine whether there are any tied values -# -a=FALSE -if(is.matrix(x))x=listm(x) -if(!is.list(x))stop('x should be a matrix or have list mode') -x=elimna(x) -J=length(x) -for(j in 1:J){ -u=unique(x[[j]]) -if(length(u)!=length(x[[j]]))a=TRUE -} -a -} - -med2way.crit<-function(J,K,n,iter,SEED=TRUE){ -# -# Estimate the null distribution for med2way -# -x=list() -p=J*K -A.dist=NA -B.dist=NA -AB.dist=NA -for(i in 1:iter){ -for(j in 1:p)x[[j]]=rmul(n[j]) -a=med2way.sub(J,K,x) -A.dist[i]=a$test.A -B.dist[i]=a$test.B -AB.dist[i]=a$test.AB -} -list(A.dist=A.dist,B.dist=B.dist,AB.dist=AB.dist) -} - - - -idealf<-function(x,na.rm=FALSE){ -# -# Compute the ideal fourths for data in x -# -if(na.rm)x<-x[!is.na(x)] -j<-floor(length(x)/4 + 5/12) -y<-sort(x) -g<-(length(x)/4)-j+(5/12) -ql<-(1-g)*y[j]+g*y[j+1] -k<-length(x)-j+1 -qu<-(1-g)*y[k]+g*y[k-1] -list(ql=ql,qu=qu) -} - -lintests1<-function(vstar,yhat,res,mflag,x,regfun,...){ -ystar<-yhat+res*vstar -bres<-regfun(x,ystar,...)$residuals -rval<-0 -for (i in 1:nrow(x)){ -rval[i]<-sum(bres[mflag[,i]]) -} -rval -} - - - -#Note: rdepth in library(mrfDepth) eliminates access to rdepth below and it handles p>1 Ind. Var. - - -rdepth.orig<-function(d, x, y, sortx = TRUE) -{ -########################################################################## -# This function computes the regression depth of a line with coordinates d -# relative to the bivariate data set (x,y). -# The first component of the vector d indicates the intercept of the line, -# the second component is the slope. -# -# Input : d : vector with two components -# x,y : vectors of equal length (data set) -# sortx : logical, to set to F if the data set (x,y) is -# already sorted by its x-coordinates -# -# Reference: -# Rousseeuw, P.J. and Hubert, M. (1996), -# Regression Depth, Technical report, University of Antwerp -# submitted for publication. -########################################################################## - if(!is.vector(x) || !is.vector(y)) stop("x and y should be vectors") - n <- length(x) - if(n < 2) - stop("you need at least two observations") - xy <- cbind(x, y) - b <- d[1] - a <- d[2] - if(sortx) - xy <- xy[order(xy[, 1], xy[, 2]), ] - res <- xy[, 2] - a * xy[, 1] - b - res[abs(res) < 9.9999999999999995e-08] <- 0 - posres <- res >= 0 - negres <- res <= 0 - lplus <- cumsum(posres) - rplus <- lplus[n] - lplus - lmin <- cumsum(negres) - rmin <- lmin[n] - lmin - depth <- pmin(lplus + rmin, rplus + lmin) - min(depth) -} - -permg<-function(x,y,alpha=.05,est=mean,nboot=1000){ -# -# Do a two-sample permutation test based on means or any -# other measure of location or scale indicated by the -# argument est. -# -# The default number of permutations is nboot=1000 -# -x<-x[!is.na(x)] -y<-y[!is.na(y)] -xx<-c(x,y) -dif<-est(x)-est(y) -vec<-c(1:length(xx)) -v1<-length(x)+1 -difb<-NA -temp2<-NA -for(i in 1:nboot){ -data <- sample(xx, size = length(xx), replace = FALSE) -temp1<-est(data[c(1:length(x))]) -temp2<-est(data[c(v1:length(xx))]) -difb[i]<-temp1-temp2 -} -difb<-sort(difb) -icl<-floor((alpha/2)*nboot+.5) -icu<-floor((1-alpha/2)*nboot+.5) -reject<-"no" -if(dif>=difb[icu] || dif <=difb[icl])reject<-"yes" -list(dif=dif,lower=difb[icl],upper=difb[icu],reject=reject) -} - - -pb2gen<-function(x,y,alpha=.05,nboot=2000,est=onestep,SEED=TRUE,pr=FALSE,...){ -# -# Compute a bootstrap confidence interval for the -# the difference between any two parameters corresponding to -# independent groups. -# By default, M-estimators are compared. -# Setting est=mean, for example, will result in a percentile -# bootstrap confidence interval for the difference between means. -# Setting est=onestep will compare M-estimators of location. -# The default number of bootstrap samples is nboot=2000 -# -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvecx<-apply(datax,1,est,...) -bvecy<-apply(datay,1,est,...) -bvec<-sort(bvecx-bvecy) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) -sig.level<-2*(min(temp,1-temp)) -se<-var(bvec) -list(est.1=est(x,...),est.2=est(y,...),est.dif=est(x,...)-est(y,...),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) -} - - - - -tmean<-function(x,tr=.2,na.rm=FALSE,STAND=NULL){ -if(na.rm)x<-x[!is.na(x)] -val<-mean(x,tr) -val -} - -depth<-function(U,V,m){ -# -# Compute the halfspace depth of the point (u,v) for the pairs of points -# in the n by 2 matrix m. -# -X<-m[,1] -Y<-m[,2] -FV<-NA -NUMS<-0 -NUMH<-0 -SDEP<-0.0 -HDEP<-0.0 -N<-length(X) -P<-acos(-1) -P2<-P*2.0 -EPS<-0.000001 -ALPHA<-NA -NT<-0 -for(i in 1:nrow(m)){ - DV<-sqrt(((X[i]-U)*(X[i]-U)+(Y[i]-V)*(Y[i]-V))) - if (DV <= EPS){ - NT<-NT+1 - } - else{ - XU<-(X[i]-U)/DV - YU<-(Y[i]-V)/DV - if (abs(XU) > abs(YU)){ - if (X[i] >= U){ - ALPHA[i-NT]<-asin(YU) - if(ALPHA[i-NT] < 0.0) - ALPHA[i-NT]<-P2+ALPHA[i-NT] - } - else{ - ALPHA[i-NT]<-P-asin(YU) - } - } - else{ - if (Y[i] >= V) - ALPHA[i-NT]<-acos(XU) - else - ALPHA[i-NT]<-P2-acos(XU) - } - if (ALPHA[i-NT] >= P2-EPS) ALPHA[i-NT]<-0.0 - } -} -NN<-N-NT -if(NN<=1){ -NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+ -depths1(NT,3) - if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0) - NUMH<-NUMH+NT - HDEP<-(NUMH+0.0)/(N+0.0) - return(HDEP) -} -ALPHA<-sort(ALPHA[1:NN]) -ANGLE<-ALPHA[1]-ALPHA[NN]+P2 -for(i in 2:NN){ -ANGLE<-max(c(ANGLE,ALPHA[i]-ALPHA[i-1])) - } -if(ANGLE > (P+EPS)){ -NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+ -depths1(NT,3) - if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0) - NUMH<-NUMH+NT - HDEP<-(NUMH+0.0)/(N+0.0) - return(HDEP) - } -ANGLE<-ALPHA[1] -NU<-0 -for (i in 1:NN){ -ALPHA[i]<-ALPHA[i]-ANGLE -if(ALPHA[i]<(P-EPS))NU<-NU+1 - } -if(NU >= NN){ -NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+ -depths1(NT,3) - if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0) - NUMH<-NUMH+NT - HDEP<-(NUMH+0.0)/(N+0.0) - return(HDEP) -} -# -# Mergesort the alpha with their antipodal angles beta, -# and at the same time update I, F(I), and NBAD. -# -JA<-1 -JB<-1 - ALPHK<-ALPHA[1] - BETAK<-ALPHA[NU+1]-P - NN2<-NN*2 - NBAD<-0 - I<-NU - NF<-NN -for(J in 1:NN2){ - ADD<-ALPHK+EPS - if (ADD < BETAK){ - NF<-NF+1 - if(JA < NN){ - JA<-JA+1 - ALPHK<-ALPHA[JA] - } - else - ALPHK<-P2+1.0 - } - else{ - I<-I+1 - NN1<-NN+1 - if(I==NN1){ - I<-1 - NF<-NF-NN - } - FV[I]<-NF - NFI<-NF-I - NBAD<-NBAD+depths1(NFI,2) - if(JB < NN){ - JB<-JB+1 - if(JB+NU <= NN) - BETAK<-ALPHA[JB+NU]-P - else - BETAK<-ALPHA[JB+NU-NN]+P - } - else - BETAK<-P2+1.0 - } -} -NUMS<-depths1(NN,3)-NBAD -# -# Computation of NUMH for halfspace depth. -# - GI<-0 - JA<-1 - ANGLE<-ALPHA[1] - dif<-NN-FV[1] - NUMH<-min(FV[1],dif) -for(I in 2:NN){ - AEPS<-ANGLE+EPS - if(ALPHA[I] <= AEPS){ - JA<-JA+1 - } - else{ - GI<-GI+JA - JA<-1 - ANGLE<-ALPHA[I] - } - KI<-FV[I]-GI - NNKI<-NN-KI - NUMH<-min(c(NUMH,min(c(KI,NNKI)))) - } -NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+ -depths1(NT,3) - if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0) - NUMH<-NUMH+NT - HDEP<-(NUMH+0.0)/(N+0.0) - HDEP -} - -rtdep<-function(pts,m,nsamp=100,SEED=NA){ -# -# Determine Tukey depth by randomly sampling -# p-1 points from m (which has p columns), -# combine this with pt, fit a plane, check -# the residuals, and repeat many times. -# Count how many positive residuals -# there are, say pr, how many negative residuals, nr. -# The approximate depth is min (pr,nr) over all samples. -# -set.seed(2) -if(!is.na(SEED))set.seed(SEED) -if(!is.matrix(m))stop("Second argument is not a matrix") -if(ncol(m)==2)tdep<-depth(pts[1],pts[2],m) -if(ncol(m)>2){ -n<-nrow(m) -pts<-matrix(pts,ncol=ncol(m)) -mold<-m -p<-ncol(m) -pm1<-p-1 -mdup<-matrix(rep(pts,nrow(m)),ncol=ncol(m),byrow=TRUE) -dif<-abs(m-mdup) -chk<-apply(dif,1,sum) -flag<-(chk!=0) -m<-m[flag,] -m<-as.matrix(m) -dmin<-sum(chk==0) -m3<-rbind(m,pts) -tdep<-nrow(m)+1 -for(i in 1:nsamp){ -mat<-sample(nrow(m),pm1,T) -if(p>2)x<-rbind(m[mat,2:p],pts[,2:p]) -y<-c(m[mat,1],pts[1]) -if(prod(eigen(var(x))$values) >10^{-8}){ -#print(prod(eigen(var(x))$values)) -temp<-qr(x) -if(temp$rank[1]==ncol(x)){ -temp<-lsfit(x,y)$coef -m2<-cbind(rep(1,nrow(m3)),m3[,2:p]) -res<-m3[,1]-temp%*%t(m2) -p1<-sum((res>0)) -p2<-sum((res<0)) -tdep<-min(c(tdep,p1,p2)) -if(tdep EPS) - { - NSIN <- NSIN + 1 - foundSingular <- T - if (PRINT) - paste( "ERROR: No Eigenvalue = 0 for sample", NRAN) - next - } - - # ------------------------------------------ - # Need to test for singularity - # ------------------------------------------ - if (Eval[NP-1] <= EPS) - { - NSIN <- NSIN + 1 - } - - # ------------------------------------------ - # Projecting all pints on line through - # theta with direction given by the eigen - # vector of the smallest eigenvalue, i.e., - # the direction orthogonal on the hyperplane - # given by the NP-subset. - # Compute the one-dimensional halfspace depth - # of theta on this line. - # ------------------------------------------ - # in Splus the smallest eigenvalue is the - # last one and corresponding vector is the - # last one, hence Eval[NP] is the smallest - # and Evec[,NP] is the corresponding vector - # ------------------------------------------ - eigenVec <- Evec[,NP] - NT <- sum( ifelse( eigenVec <= EPS, 1, 0 ) ) - KT <- sum( ifelse( eigenVec > EPS, PNT * eigenVec, 0 ) ) - if (NT == NP) - { - NSIN <- NSIN + 1 - foundSingular <- T - if (PRINT) - paste( " ERROR: Eigenvector = 0 for sample", NRAN ) - if (foundSingular) next # Do next Sample - } - K <- X %*% eigenVec - K <- K - KT - NUMH <- sum( ifelse( K > EPS, 1, 0 ) ) - NT <- sum( ifelse( abs(K) <= EPS, 1, 0 ) ) - # ------------------------------------------- - # If all projections collapse with theta, - # return to reduce the dimension - # ------------------------------------------- - if (NT == N) - { - NSIN <- -1 - return( list( NDEP=NDEP, NSIN=NSIN, EVEC=Evec ) ) # Will need -#Eigen Vector matrix to reduce dimension - } - - # ------------------------------------------- - # Update halfspace depth - # ------------------------------------------- - NDEP <- min( NDEP, min( NUMH+NT,N-NUMH ) ) - } - - return( list( NDEP=NDEP, NSIN=NSIN, EVEC=Evec ) ) - } - - #================================================ - Reduce <- function( X, PNT, Evec ) - { - Det <- det(Evec) - if (Det==0) - { - return( list( X=X, PNT=PNT, DET=Det ) ) - } - NP <- ncol(X) - - # --------------------------------------- - # Compute (NP-1)-dimentional coordinates - # for all points and theta - # --------------------------------------- - RedEvec <- matrix(Evec[,1:(NP-1)],nrow=NP,ncol=(NP-1)) # Reducing - # dimension by removing the last dimension with 0 variance. - PNT <- PNT %*% RedEvec - X <- X %*% RedEvec - if (!is.matrix(X)) X <- matrix(X,ncol=(NP-1)) - return( list( X=X, PNT=PNT, DET=Det ) ) - } - -# -# PROGRAM BEGINS -# - if (!is.na(SEED)) set.seed( SEED ) - # --------------------------------------- - # Initialize Number of singular samples - # --------------------------------------- - Nsin <- 0 - - X <- as.matrix( X ) - N <- nrow( X ) - NP <- ncol( X ) - -if (length(PNT) != NP){print("Length of 'PNT' has to equal to") -stop("number of columns in X !!! " ) -} - - # --------------------------------------- - # Handle special case where N=1 - # --------------------------------------- - if (N==1) - { - NDEP <- ifelse( abs(X[1,]-PNT) > EPS, 0, 1 ) # if any dimension -# different from point PNT, NDEP=0, else = 1 - NDEP <- min( NDEP ) - DEPTH <- NDEP/ N - return( DEPTH ) - } - - # --------------------------------------- - # Handle special case where NP=1 - # --------------------------------------- - repeat #+++++++++++++++++++++++++++++++++ - { - # In this case depth is equal to number of points <= to T - if (NP==1) - { - MORE <- sum( ifelse( X[,1] >= (PNT-EPS), 1, 0 ) ) - LESS <- sum( ifelse( X[,1] <= (PNT+EPS), 1, 0 ) ) - NDEP <- min( LESS, MORE ) - DEPTH <- NDEP / N - return( DEPTH ) - } - - # --------------------------------------- - # General Case, call function DEP - # --------------------------------------- - if (N > NP) - { - RES <- DEP( X=X, PNT=PNT, NDIR=NDIR, EPS=EPS, PRINT=PRINT ) - NDEP <- RES$NDEP - NSIN <- RES$NSIN - EVEC <- RES$EVEC - } - else - { - NSIN <- -1 # Needs to reduce dimensions - EVEC <- eigen( var( X ) )[[2]] # Getting eigenvector - } - - # --------------------------------------- - # If all points and theta are identified - # as lying on the same hyperplane, reduce - # the dimension of the data set by projection - # on that hyperplane, and compute the depth - # on the reduced data set - # --------------------------------------- - if (NSIN == -1) - { - NSIN <- 0 - if (PRINT) print( " Direction with zero variance detected" ) - RED <- Reduce( X=X, PNT=PNT, Evec=EVEC ) - X <- RED$X - PNT <- RED$PNT - Det <- RED$DET - if (Det==0) - { -print("\n\n\t DIMENSION REDUCTION TERMINATED\n\t EIGENVECTORS ARE NOT") -stop("INDEPENDENT\n\n" ) - } - NP <- ncol(X) - if (PRINT) paste(" Dimension reduced to", NP ) - } - else - { - break # No need to reduce dimension of X and hence no need to -#return, breaks 'repeat' loop - } - } # End repeat+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - - DEPTH <- NDEP / N - return( DEPTH ) -} - - - -depths1<-function(m,j){ -if(m < j)depths1<-0 -else{ -if(j==1)depths1<-m -if(j==2)depths1<-(m*(m-1))/2 -if(j==3)depths1<-(m*(m-1)*(m-2))/6 -} -depths1 -} - -outbox<-function(x,mbox=FALSE,gval=NA,plotit=FALSE,STAND=FALSE){ -# -# This function detects outliers using the -# boxplot rule, but unlike the R function boxplot, -# the ideal fourths are used to estimate the quartiles. -# -# Setting mbox=TRUE results in using the modification -# of the boxplot rule suggested by Carling (2000). -# -x<-x[!is.na(x)] # Remove missing values -if(plotit)boxplot(x) -n<-length(x) -temp<-idealf(x) -if(mbox){ -if(is.na(gval))gval<-(17.63*n-23.64)/(7.74*n-3.71) -cl<-median(x)-gval*(temp$qu-temp$ql) -cu<-median(x)+gval*(temp$qu-temp$ql) -} -if(!mbox){ -if(is.na(gval))gval<-1.5 -cl<-temp$ql-gval*(temp$qu-temp$ql) -cu<-temp$qu+gval*(temp$qu-temp$ql) -} -flag<-NA -outid<-NA -vec<-c(1:n) -for(i in 1:n){ -flag[i]<-(x[i]< cl || x[i]> cu) -} -if(sum(flag)==0)outid<-NULL -if(sum(flag)>0)outid<-vec[flag] -keep<-vec[!flag] -outval<-x[flag] -n.out=sum(length(outid)) -list(out.val=outval,out.id=outid,keep=keep,n=n,n.out=n.out,cl=cl,cu=cu) -} - -mscov<-function(m,STAND=TRUE){ -# -# m is an n by p matrix -# -# Compute a skipped covariance matrix -# -# Eliminate outliers using a projection method -# That is, compute Donoho-Gasko median, for each point -# consider the line between it and the median, -# project all points onto this line, and -# check for outliers using a boxplot rule. -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# Eliminate any outliers and compute covariances -# using remaining data. -# -m<-elimna(m) -temp<-outpro(m,plotit=FALSE,STAND=STAND)$keep -mcor<-var(m[temp,]) -mcor -} - -runm3d<-function(x,y,theta=50,phi=25,fr=.8,tr=.2,plotit=TRUE,pyhat=FALSE,nmin=0, -expand=.5,scale=FALSE,zscale=FALSE,xout=FALSE,outfun=out,eout=FALSE,xlab="X",ylab="Y",zlab="", -pr=TRUE,SEED=TRUE,ticktype="simple"){ -# -# running mean using interval method -# -# fr controls amount of smoothing -# tr is the amount of trimming -# x is an n by p matrix of predictors. -# -# Rows of data with missing values are automatically removed. -# -# When plotting, theta and phi can be used to change -# the angle at which the plot is viewed. -# -# theta is the azimuthal direction and phi the colatitude -# expand controls relative length of z-axis -# -library(MASS) -library(akima) -if(plotit){ -if(pr){ -print("Note: when there is independence, scale=F is probably best") -print("When there is dependence, scale=T is probably best") -}} -if(!is.matrix(x))stop("x should be a matrix") -if(nrow(x) != length(y))stop("number of rows of x should equal length of y") -temp<-cbind(x,y) -p<-ncol(x) -p1<-p+1 -temp<-elimna(temp) # Eliminate any rows with missing values. -if(xout){ -keepit<-rep(TRUE,nrow(x)) -flag<-outfun(x,plotit=FALSE)$out.id -keepit[flag]<-F -x<-x[keepit,] -y<-y[keepit] -} -if(zscale){ -for(j in 1:p1){ -temp[,j]<-(temp[,j]-median(temp[,j]))/mad(temp[,j]) -}} -x<-temp[,1:p] -y<-temp[,p1] -pyhat<-as.logical(pyhat) -plotit<-as.logical(plotit) -if(SEED)set.seed(12) -m<-cov.mve(x) -iout<-c(1:nrow(x)) -rmd<-1 # Initialize rmd -nval<-1 -for(i in 1:nrow(x))rmd[i]<-mean(y[near3d(x,x[i,],fr,m)],tr) -for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)]) -if(plotit){ -if(ncol(x)!=2)stop("When plotting, x must be an n by 2 matrix") -fitr<-rmd[nval>nmin] -y<-y[nval>nmin] -x<-x[nval>nmin,] -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype) -} -last<-"Done" -if(pyhat)last<-rmd -last -} - -rdplot<-function(x,fr=NA,plotit=TRUE,theta=50,phi=25,expand=.5,pyhat=FALSE,pts=NA, -xlab="X",ylab="",ticktype="simple"){ -# -# Expected frequency curve -# -# fr controls amount of smoothing -# theta is the azimuthal direction and phi the colatitude -# -plotit<-as.logical(plotit) -x<-elimna(x) -x<-as.matrix(x) -rmd<-NA -if(ncol(x)==1){ -x=as.vector(x) -if(is.na(fr))fr<-.8 -if(is.na(pts[1]))pts<-x -for(i in 1:length(pts)){ -rmd[i]<-sum(near(x,pts[i],fr)) -} -if(mad(x)!=0)rmd<-rmd/(2*fr*mad(x)) -rmd<-rmd/length(x) -if(plotit){ -plot(pts,rmd,type="n",ylab=ylab,xlab=xlab) -sx<-sort(pts) -xorder<-order(pts) -sysm<-rmd[xorder] -lines(sx,sysm) -}} -x<-as.matrix(x) -if(ncol(x)>1){ -library(MASS) -if(is.na(fr))fr<-.6 -m<-covmve(x) -for(i in 1:nrow(x)){ -rmd[i]<-sum(near3d(x,x[i,],fr,m)) -} -rmd<-rmd/nrow(x) -if(plotit && ncol(x)==2){ -library(akima) -fitr<-rmd -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fit,theta=theta,phi=phi,expand=expand,xlab="Var 1",ylab="Var 2",zlab="", -ticktype=ticktype) -} -} -if(pyhat)last<-rmd -if(!pyhat)last<-"Done" -last -} - - rimul<-function(J,K,x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4){ -# -# Rank-based multiple comparisons for all interactions -# in J by K design. The method is based on an -# extension of Cliff's heteroscedastic technique for -# handling tied values and the Patel-Hoel definition of no interaction. -# -# The familywise type I error probability is controlled by using -# a critical value from the Studentized maximum modulus distribution. -# -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# The default value for alpha is .05. Any other value results in using -# alpha=.01. -# -# Argument grp can be used to rearrange the order of the data. -# - df=Inf -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -CCJ<-(J^2-J)/2 -CCK<-(K^2-K)/2 -CC<-CCJ*CCK -test<-matrix(NA,CC,8) -test.p<-matrix(NA,CC,7) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -} -mat<-matrix(grp,ncol=K,byrow=TRUE) -dimnames(test)<-list(NULL,c("Factor A","Factor A","Factor B","Factor B","delta","ci.lower","ci.upper","p.value")) -jcom<-0 -crit=qsmm(1-alpha,CC,df) -#if(alpha!=.05)crit<-smmcrit01(200,CC) -alpha<-1-pnorm(crit) -for (j in 1:J){ -for (jj in 1:J){ -if (j < jj){ -for (k in 1:K){ -for (kk in 1:K){ -if (k < kk){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-jj -test[jcom,3]<-k -test[jcom,4]<-kk -temp1<-cid(x[[mat[j,k]]],x[[mat[j,kk]]],plotit=FALSE) -temp2<-cid(x[[mat[jj,k]]],x[[mat[jj,kk]]],plotit=FALSE) -delta<-temp2$d-temp1$d -sqse<-temp1$sqse.d+temp2$sqse.d -test[jcom,5]<-delta/2 -test[jcom,6]<-delta/2-crit*sqrt(sqse/4) -test[jcom,7]<-delta/2+crit*sqrt(sqse/4) -test[jcom,8]=2*(1-pnorm(abs((delta/2)/sqrt(sqse/4)))) -}}}}}} -if(J==2 & K==2){ -if(plotit){ -m1<-outer(x[[1]],x[[2]],FUN="-") -m2<-outer(x[[3]],x[[4]],FUN="-") -m1<-as.vector(m1) -m2<-as.vector(m2) -g2plot(m1,m2,op=op) -}} -list(test=test) -} - -ifmest<-function(x,bend=1.28,op=2){ -# -# Estimate the influence function of an M-estimator, using -# Huber's Psi, evaluated at x. -# -# Data are in the vector x, bend is the percentage bend -# -# op=2, use adaptive kernel estimator -# otherwise use Rosenblatt's shifted histogram -# -tt<-mest(x,bend) # Store M-estimate in tt -s<-mad(x)*qnorm(.75) -if(op==2){ -val<-akerd(x,pts=tt,plotit=FALSE,pyhat=TRUE) -val1<-akerd(x,pts=tt-s,plotit=FALSE,pyhat=TRUE) -val2<-akerd(x,pts=tt+s,plotit=FALSE,pyhat=TRUE) -} -if(op!=2){ -val<-kerden(x,0,tt) -val1<-kerden(x,0,tt-s) -val2<-kerden(x,0,tt+s) -} -ifmad<-sign(abs(x-tt)-s)-(val2-val1)*sign(x-tt)/val -ifmad<-ifmad/(2*.6745*(val2+val1)) -y<-(x-tt)/mad(x) -n<-length(x) -b<-sum(y[abs(y)<=bend])/n -a<-hpsi(y,bend)*mad(x)-ifmad*b -ifmest<-a/(length(y[abs(y)<=bend])/n) -ifmest -} - -qmjci<-function(x,q=.5,alpha=.05,op=1,pr=TRUE){ -# -# Compute a 1-alpha confidence for qth quantile using the -# Maritz-Jarrett estimate of the standard error. -# -# The default quantile is .5. -# The default value for alpha is .05. -# -x=elimna(x) -if(pr){ -if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb") -} -if(q <= 0 || q>=1)stop("q must be between 0 and 1") -y<-sort(x) -m<-floor(q*length(x)+.5) -crit<-qnorm(1-alpha/2) -qmjci<-vector(mode="numeric",2) -se<-NA -if(op==1)se<-mjse(x) -if(op==2){ -if(q!=.5)stop("op=2 works only with q=.5") -se<-msmedse(x) -} -if(op==3)se<-qse(x,q) -if(is.na(se))stop("Something is wrong, op should be 1, 2 or 3") -qmjci[1]<-y[m]-crit*se -qmjci[2]<-y[m]+crit*se -qmjci -} - - -bootdpci<-function(x,y,est=onestep,nboot=NA,alpha=.05,plotit=FALSE,dif=TRUE,BA=FALSE,SR=TRUE,...){ -# -# Use percentile bootstrap method, -# compute a .95 confidence interval for the difference between -# a measure of location or scale -# when comparing two dependent groups. -# By default, a one-step M-estimator (with Huber's psi) is used. -# If, for example, it is desired to use a fully iterated -# M-estimator, use fun=mest when calling this function. -# -okay=FALSE -if(identical(est,onestep))okay=TRUE -if(identical(est,mom))okay=TRUE -if(!okay)SR=FALSE -output<-rmmcppb(x,y,est=est,nboot=nboot,alpha=alpha,SR=SR, -plotit=plotit,dif=dif,BA=BA,...)$output -list(output=output) -} - - -relfun<-function(xv,yv,C=36,epsilon=.0001,plotit=TRUE,pch='*',xlab='X',ylab='Y'){ -# Compute the measures of location, scale and correlation used in the -# bivariate boxplot of Goldberg and Iglewicz, -# Technometrics, 1992, 34, 307-320. -# -# The code in relplot plots the boxplot. -# -# This code assumes the data are in xv and yv -# -# This code uses the function biloc, stored in the file biloc.b7 and -# bivar stored in bivar.b7 -# -plotit<-as.logical(plotit) -# -# Do pairwise elimination of missing values -# -temp<-matrix(c(xv,yv),ncol=2) -temp<-elimna(temp) -xv<-temp[,1] -yv<-temp[,2] -tx<-biloc(xv) -ty<-biloc(yv) -sx<-sqrt(bivar(xv)) -sy<-sqrt(bivar(yv)) -z1<-(xv-tx)/sx+(yv-ty)/sy -z2<-(xv-tx)/sx-(yv-ty)/sy -ee<-((z1-biloc(z1))/sqrt(bivar(z1)))^2+ -((z2-biloc(z2))/sqrt(bivar(z2)))^2 -w<-(1-ee/C)^2 -if(length(w[w==0])>=length(xv)/2)warning("More than half of the w values equal zero") -sumw<-sum(w[ee1, a standard percentile bootstrap method is used -# with FWE (the probability of at least one type I error) -# controlled via the Bonferroni inequality. -# -# The predictor values are assumed to be in the n by p matrix x. -# The default number of bootstrap samples is nboot=599 -# -# SEED=T causes the seed of the random number generator to be set to 2, -# otherwise the seed is not set. -# -# Warning: probability coverage has been studied only when alpha=.05 -# -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -temp<-elimna(cbind(x,y)) # Remove any missing values. -x<-temp[,1:p] -y<-temp[,p+1] -if(xout){ -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,pp] -} -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples; please wait") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,regboot,x,y,lsfit) # A p+1 by n matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -if(p==1){ -if(alpha != .05){print("Resetting alpha to .05") -print("With p=1, unknown how to adjust confidence interval") -print("when alpha is not equal to .05.") -} -ilow<-15 -ihi<-584 -if(length(y) < 250){ -ilow<-13 -ihi<-586 -} -if(length(y) < 180){ -ilow<-10 -ihi<-589 -} -if(length(y) < 80){ -ilow<-7 -ihi<-592 -} -if(length(y) < 40){ -ilow<-6 -ihi<-593 -} -ilow<-round((ilow/599)*nboot) -ihi<-round((ihi/599)*nboot) -} -if(p>1){ -ilow<-round(alpha*nboot/2)+1 -ihi<-nboot-ilow -} -lsfitci<-matrix(0,ncol(x),2) -for(i in 1:ncol(x)){ -ip<-i+1 -bsort<-sort(bvec[ip,]) -lsfitci[i,1]<-bsort[ilow+1] -lsfitci[i,2]<-bsort[ihi] -} -bsort<-sort(bvec[1,]) -interceptci<-c(bsort[15],bsort[584]) -crit.level<-NA -pmat<-NA -if(p>1){ -crit.level<-alpha/p -pmat<-matrix(NA,nrow=p,ncol=2) -dimnames(pmat) <- list(NULL, c("Slope","p-value")) -for(pv in 1:p){ -pmat[pv,1]<-pv -pp<-pv+1 -pmat[pv,2]<-(sum(bvec[pp,]<0)+.5*sum(bvec[pp,]==0))/nboot -temp3<-1-pmat[pv,2] -pmat[pv,2]<-2*min(pmat[pv,2],temp3) -}} -list(intercept.ci=interceptci,slope.ci=lsfitci,crit.level=crit.level, -p.values=pmat) -} - -wmve<-function(m,SEED=TRUE){ -# -# Compute skipped measure of location and scatter -# using MVE method -# -if(is.matrix(m))n<-nrow(m) -if(is.vector(m))n<-length(m) -flag<-rep(TRUE,n) -vec<-out(m,plotit=FALSE,SEED=SEED)$out.id -flag[vec]<-FALSE -if(is.vector(m)){ -center<-mean(m[flag]) -scatter<-var(m[flag]) -} -if(is.matrix(m)){ -center<-apply(m[flag,],2,mean) -scatter<-var(m[flag,]) -} -list(center=center,cov=scatter) -} - -wmw<-function(x,y){ -# -# Do Mann-Whitney test -# Return the usual p-value followed by adjusted -# p-value using Hodges, Ramsey and Wechsler (1990) method -# (See Wilcox, 2003, p. 559.) -# -x=elimna(x) -y=elimna(y) -m<-length(x) -n<-length(y) -com<-rank(c(x,y)) -xp1<-length(x)+1 -x<-com[1:length(x)] -y<-com[xp1:length(com)] -u<-sum(y)-n*(n+1)/2 -sigsq<-m*n*(n+m+1)/12 -yv<-(u+.5-m*n/2)/sqrt(sigsq) -kv<-20*m*n*(m+n+1)/(m^2+n^2+n*m+m+n) -S<-yv^2 -T1<-S-3 -T2<-(155*S^2-416*S-195)/42 -cv<-1+T1/kv+T2/kv^2 -sighrw<-2*(1-pnorm(abs(cv*yv))) -z<-(u-(.5*m*n))/sqrt(sigsq) -sig<-2*(1-pnorm(abs(z))) -list(p.value=sig,adj.p.value=sighrw,p.hat=u/(n*m)) -} - -lsfitNci<-function(x,y,alpha=.05){ -# -# Compute confidence interval for least squares -# regression using heteroscedastic method -# recommended by Long and Ervin (2000). -# -x<-as.matrix(x) -if(nrow(x) != length(y))stop("Length of y does not match number of x values") -m<-cbind(x,y) -m<-elimna(m) -y<-m[,ncol(x)+1] -temp<-lsfit(x,y) -x<-cbind(rep(1,nrow(x)),m[,1:ncol(x)]) -xtx<-solve(t(x)%*%x) -h<-diag(x%*%xtx%*%t(x)) -hc3<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^2)%*%x%*%xtx -df<-nrow(x)-ncol(x) -crit<-qt(1-alpha/2,df) -al<-ncol(x) -ci<-matrix(NA,nrow=al,ncol=3) -for(j in 1:al){ -ci[j,1]<-j -ci[j,2]<-temp$coef[j]-crit*sqrt(hc3[j,j]) -ci[j,3]<-temp$coef[j]+crit*sqrt(hc3[j,j]) -} -print("Confidence intervals for intercept followed by slopes:") -list(ci=ci,stand.errors=sqrt(diag(hc3))) -} - - - -pow2an<-function(x,y,ci=FALSE,plotit=TRUE,nboot=800){ -# -# Do a power analysis when comparing the 20% trimmed -# means of two independent groups with the percentile -# bootstrap method. -# -# -x<-x[!is.na(x)] -y<-y[!is.na(y)] -lp<-NA -se<-yuen(x,y)$se -gval<-NA -dv<-seq(0,3.5*se,length=15) -for(i in 1:length(dv)){ -gval[i]<-powest(x,y,dv[i],se) -} -if(!ci){ -if(plotit){ -plot(dv,gval,type="n",xlab="delta",ylab="power") -lines(dv,gval) -}} -if(ci){ -print("Taking bootstrap samples. Please wait.") -datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE), - nrow = nboot) -datay <- matrix(sample(y, size = length(y) * nboot, replace = TRUE), - nrow = nboot) -pboot<-matrix(NA,ncol=15,nrow=nboot) -for(i in 1:nboot){ -se<-yuen(datax[i,],datay[i,])$se -for(j in 1:length(dv)){ -pboot[i,j]<-powest(x,y,dv[j],se) -}} -ll<-floor(.05*nboot+.5) -for(i in 1:15){ -temp<-sort(pboot[,i]) -lp[i]<-temp[ll] -} -plot(c(dv,dv),c(gval,lp),type="n",xlab="delta",ylab="power") -lines(dv,gval) -lines(dv,lp,lty=2) -} -list(delta=dv,power=gval,lowp=lp) -} -powest<-function(x=NA,y=NA,delta=0,se=NA,wv1=NA,wv2=NA,n1=NA,n2=NA){ -# -# wv1 = Winsorized variance for group 1 -# wv2 = Winsorized variance for group 2 -# -# Only 20% trimming is allowed. -# -tr<-.2 -if(is.na(se)){ -if(is.na(wv1)){ -h1 <- length(x) - 2 * floor(tr * length(x)) -h2 <- length(y) - 2 * floor(tr * length(y)) -q1 <- ((length(x) - 1) * winvar(x, tr))/(h1 * (h1 - 1)) -q2 <- ((length(y) - 1) * winvar(y, tr))/(h2 * (h2 - 1)) -} -if(!is.na(wv1)){ -if(is.na(n1))stop("Need to specify sample size for group 1") -if(is.na(n2))stop("Need to specify sample size for group 2") -h1<-n1-2*floor(tr*n1) -h2<-n2-2*floor(tr*n2) -q1<-(n1-1)*wv1/(h1*(h1-1)) -q2<-(n2-1)*wv2/(h2*(h2-1)) -} -se<-sqrt(q1+q2) -} -ygam<-sqrt(2*.01155)*c(0:35)/8 -pow<-c(500.0,540.0,607.0, 706.0, 804.0,981.0,1176.0,1402.0,1681.0, 2008.0, - 2353.0, 2769.0, 3191.0, 3646.0, 4124.0, 4617.0, 5101.0, 5630.0, - 6117.0, 6602.0, 7058.0, 7459.0, 7812.0, 8150.0, 8479.0, 8743.0, - 8984.0, 9168.0, 9332.0, 9490.0, 9607.0, 9700.0, 9782.0, 9839.0, - 9868.0)/10000 -flag<-(delta==0 & se==0) -if(flag)powest<-.05 -else{ -chk<-floor(8*delta/se)+1 -chk1<-chk+1 -gval<-delta/se -d1<-(gval-(chk-1)/8)*8 -if(chk > length(pow))powest<-1 -if(chk == length(pow))pow[chk1]<-1 -if(chk <= length(pow)) -powest<-pow[chk]+d1*(pow[chk1]-pow[chk]) -} -powest -} - -twopcor<-function(x1,y1,x2,y2,SEED=TRUE){ -# -# Compute a .95 confidence interval for -# the difference between two Pearson -# correlations corresponding to two independent -# goups. -# -# This function uses an adjusted percentile bootstrap method that -# gives good results when the error term is heteroscedastic. -# -# WARNING: If the number of bootstrap samples is altered, it is -# unknown how to adjust the confidence interval when n1+n2 < 250. -# -nboot<-599 #Number of bootstrap samples -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -X<-elimna(cbind(x1,y1)) -x1<-X[,1] -y1<-X[,2] -X<-elimna(cbind(x2,y2)) -x2<-X[,1] -y2<-X[,2] -data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -bvec1<-apply(data1,1,pcorbsub,x1,y1) # A 1 by nboot matrix. -data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) -bvec2<-apply(data2,1,pcorbsub,x2,y2) # A 1 by nboot matrix. -bvec<-bvec1-bvec2 -ilow<-15 -ihi<-584 -if(length(y1)+length(y2) < 250){ -ilow<-14 -ihi<-585 -} -if(length(y1)+length(y2) < 180){ -ilow<-11 -ihi<-588 -} -if(length(y1)+length(y2) < 80){ -ilow<-8 -ihi<-592 -} -if(length(y1)+length(y2) < 40){ -ilow<-7 -ihi<-593 -} -bsort<-sort(bvec) -r1<-cor(x1,y1) -r2<-cor(x2,y2) -ci<-c(bsort[ilow],bsort[ihi]) -list(r1=r1,r2=r2,ci=ci) -} - - -tworhobt<-function(X1,Y1,X2,Y2,alpha=.05,nboot=499,SEED=TRUE){ -# -# compare two independent correlations using a bootstrap-t method in conjunction with the HC4 estimator -# -if(SEED)set.seed(2) -r1=cor(X1,Y1) -r2=cor(X2,Y2) -n1=length(X1) -n2=length(X2) -v=NA -Nboot=nboot+1 -for(i in 1:Nboot){ -if(i<=nboot){ -id1=sample(n1,n1,replace=TRUE) -id2=sample(n2,n2,replace=TRUE) -} -if(i==Nboot){ -id1=c(1:n1) -id2=c(1:n2) -} -x1=X1[id1] -y1=Y1[id1] -x2=X2[id2] -y2=Y2[id2] -x1=(x1-mean(x1))/sd(x1) -y1=(y1-mean(y1))/sd(y1) -x2=(x2-mean(x2))/sd(x2) -y2=(y2-mean(y2))/sd(y2) -temp1=olshc4(x1,y1) -temp2=olshc4(x2,y2) -if(i<=nboot)v[i]=(temp1$ci[2,2]-r1-temp2$ci[2,2]+r2)/sqrt(temp1$ci[2,6]^2+temp2$ci[2,6]^2) -if(i==Nboot)v[i]=(temp1$ci[2,2]-temp2$ci[2,2])/sqrt(temp1$ci[2,6]^2+temp2$ci[2,6]^2) -} -ibot<-round(alpha*nboot/2) -itop<-nboot-ibot+1 -ibot=ibot+1 #adjusted so that p-value and confidence interval give consistent results. -vs=sort(v[1:nboot]) -crit=c(vs[ibot],vs[itop]) -test=v[Nboot] -if(test<0)G=mean(test>v[1:nboot]) -if(test>=0)G=mean(test1)pv=1 -if(pv<0)pv=0 -list(test=test,crit.val=crit,p.value=pv) -} - - - -indtall<-function(x,y=NULL,tr=0,nboot=500,SEED=TRUE){ -# -# Test the hypothesis of independence for -# 1. all pairs of variables in matrix x, if y=NA, or -# 2. between each variable stored in the matrix x and y. -# This is done by repeated calls to indt -# -x<-as.matrix(x) -# First, eliminate any rows of data with missing values. -if(!is.null(y[1])){ -temp <- cbind(x, y) - temp <- elimna(temp) - pval<-ncol(temp)-1 - x <- temp[,1:pval] - y <- temp[, pval+1] -} -x<-as.matrix(x) -if(is.null(y[1])){ -ntest<-(ncol(x)^2-ncol(x))/2 -if(ntest==0)stop("Something is wrong. Does x have only one column?") -output<-matrix(NA,nrow=ntest,ncol=4) -dimnames(output)<-list(NULL,c("VAR","VAR","Test Stat.","p-value")) -x<-elimna(x) -ic<-0 -for (j in 1:ncol(x)){ -for (jj in 1:ncol(x)){ -if(jyhat)/length(x) -zhat<-NA -if(!is.na(z[1])){ -# -# Make decisions for the data in z, -# set zhat=1 if decide it came from -# group 1. -# -zxhat<-0 -zyhat<-0 -zhat<-0 -if(op==2){ -zxhat<-akerd(x,pts=z,pyhat=TRUE,plotit=FALSE) -zyhat<-akerd(y,pts=z,pyhat=TRUE,plotit=FALSE) -} -for(i in 1:length(z)){ -if(op==1){ -zxhat[i]<-kerden(x,0,z[i]) -zyhat[i]<-kerden(y,0,z[i]) -} -zhat[i]<-1 -if(is.na(zxhat[i]) || is.na(zyhat[i])){ -# Missing values, -# data can't be used to make a decision, -# so make a random decision about whether a value -# came from first group. -arb<-runif(1) -zhat[i]<-1 -if(arb < .5)zhat[i]<-0 -} -else -if(zxhat[i]=2){ -library(akima) -if(ncol(x)==2 & !scale){ -if(pr){ -print("scale=FALSE is specified.") -print("If there is dependence, might use scale=TRUE") -print("To get a p-value, based on the measure of the") -print("strength of association based on this function,") -print("use the function lplotPV") -}} -x<-m[,1:d] -y<-m[,d+1] -if(eout & xout)stop("Can't have both eout and xout = FALSE") -if(eout){ -flag<-outfun(m,plotit=FALSE,...)$keep -m<-m[flag,] -n.keep=nrow(m) -} -if(xout){ -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -n.keep=nrow(m) -} -x<-m[,1:d] -y<-m[,d+1] -if(d==2)fitr<-fitted(loess(y~x[,1]*x[,2],span=span,family=family)) -if(d==3)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3],span=span,family=family)) -if(d==4)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3]*x[,4],span=span,family=family)) -if(d>4)stop("Can have at most four predictors") -last<-fitr -if(d==2 && plotit){ -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fitr<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -if(!ZLIM)persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype) -if(ZLIM)persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype,zlim=c(0,1)) #used by logreg.plot -}} -if(d==1){ -m<-elimna(cbind(x,y)) -x<-m[,1:d] -y<-m[,d+1] -if(eout && xout)stop("Cannot have both eout and xout = T") -if(eout){ -flag<-outfun(m,plotit=FALSE,...)$keep -m<-m[flag,] -n.keep=nrow(m) -} -if(xout){ -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -n.keep=nrow(m) -} -x<-m[,1:d] -y<-m[,d+1] -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab,pch=pc,frame=frame) -lines(lowess(x,y,f=low.span)) -} -tempxy<-lowess(x,y,f=low.span) -yyy<-tempxy$y -xxx<-tempxy$x -last<-yyy -chkit<-sum(duplicated(x)) -if(chkit>0){ -last<-rep(1,length(y)) -for(j in 1:length(yyy)){ -for(i in 1:length(y)){ -if(x[i]==xxx[j])last[i]<-yyy[j] -}} -} -} -if(!STR)E.power=NA -if(STR){ -E.power<-1 -if(!cor.op)E.power<-varfun(last[!is.na(last)])/varfun(y) -if(cor.op || E.power>=1){ -if(d==1){ -xord<-order(x) -E.power<-cor.fun(last,y[xord])$cor^2 -} -if(d>1)E.power<-cor.fun(last,y)$cor^2 -} -E.power=as.numeric(E.power) -} -if(!pyhat)last <- NULL -list(Strength.Assoc=sqrt(E.power),Explanatory.power=E.power,yhat.values=last,n=n.orig, -n.keep=n.keep) -} -qci<-function(x,q=.5,alpha=.05,op=3){ -# -# Compute a confidence interval for qth quantile -# using an estimate of standard error based on -# adaptive kernel density estimator. -# The qth quantile is estimated with a single order statistic. -# -# For argument op, see the function qse. -# -if(sum(duplicated(x)>0))stop("Duplicate values detected; use hdpb") -n<-length(x) -xsort<-sort(x) -iq <- floor(q * n + 0.5) -qest<-xsort[iq] -se<-qse(x,q,op=op) -crit<-qnorm(1-alpha/2) -ci.low<-qest-crit*se -ci.up<-qest+crit*se -list(ci.low=ci.low,ci.up=ci.up,q.est=qest) -} -qint<-function(x,q=.5,alpha=.05,pr=FALSE){ -# -# Compute a 1-alpha confidence interval for the qth quantile -# The function returns the exact probability coverage. -# -if(pr){ -if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb") -} -n<-length(x) -ii<-floor(q*n+.5) -jj<-ii+1 -if(ii<=0)stop("Cannot compute a confidence interval for this q") -if(jj>n)stop("Cannot compute a confidence interval for this q") -jjm<-jj-1 -iim<-ii-1 -cicov<-pbinom(jjm,n,q)-pbinom(iim,n,q) -while(cicov<1-alpha){ -iim<-max(iim-1,0) -jjm<-min(jjm+1,n) -if(iim==0 && jjm==n)break -cicov<-pbinom(jjm,n,q)-pbinom(iim,n,q) -} -xsort<-sort(x) -low<-xsort[iim+1] -hi<-xsort[jjm+1] -if(cicov<1-alpha){ -if(pr)print("Warning: Desired probability coverage could not be achieved") -} -list(ci.low=low,ci.up=hi,ci.coverage=cicov) -} - - -qest<-function(x,q=.5,na.rm=TRUE){ -# -# Compute an estimate of qth quantile -# using a single order statistic -# -if(na.rm)x<-elimna(x) -if(q<=0 || q>=1)stop("q must be > 0 and < 1") -n<-length(x) -xsort<-sort(x) -iq <- floor(q * n + 0.5) -qest<-NA -if(iq>0 || iq<=n)qest<-xsort[iq] -qest -} -taureg<-function(m,y,corfun=tau,...){ -# -# Compute Kendall's tau between y and each of the -# p variables stored in the n by p matrix m. -# -# Alternative measures of correlation can be used via the -# argument corfun. The only requirement is that the function -# corfun returns the correlation in corfun$cor and the p-value -# in corfun$p.value. -# -# This function also returns the two-sided significance level -# for all pairs of variables, plus a test of zero correlations -# among all pairs. (See chapter 9 of Wilcox, 2005, for details.) -# -m<-as.matrix(m) -tauvec<-NA -siglevel<-NA -for (i in 1:ncol(m)){ -pbc<-corfun(m[,i],y,...) -tauvec[i]<-pbc$cor -siglevel[i]<-pbc$p.value -} -list(cor=tauvec,p.value=siglevel) -} - -cor2M=taureg - -correg.sub<-function(X,theta,corfun=tau){ -np<-ncol(X) -p<-np-1 -x<-X[,1:p] -y<-X[,np] -temp<-t(t(x)*theta) -yhat<-apply(temp,1,sum) -yhat<-yhat -res<-y-yhat -val<-sum(abs(taureg(x,res,corfun=corfun)$cor)) -val -} -correg<-function(x,y,corfun=tau,loc.fun=median){ -# -# A generalization of the Theil-Sen estimator -# Rather than use Kendall's tau, can use an alternative -# correlation via the argument corfun. -# loc.fun determines how the intercept is computed; -# -# The Nelder-Mead method is used rather than -# Gauss-Seidel. -# -# -X<-cbind(x,y) -X<-elimna(X) -np<-ncol(X) -N<-np-1 -temp<-tsreg(x,y)$coef -START<-temp[2:np] -temp<-nelderv2(X,N,FN=correg.sub,START=START,corfun=corfun) -x <- as.matrix(x) -alpha <- loc.fun(y - x %*% temp) -coef <- c(alpha,temp) -res <- y - x %*% temp - alpha -list(coef = coef, residuals = res) -} -rmulnorm<-function(n,p,cmat,SEED=FALSE){ -# -# Generate data from a multivariate normal -# n= sample size -# p= number of variables -# cmat is the covariance (or correlation) matrix -# -# Method (e.g. Browne, M. W. (1968) A comparison of factor analytic -# techniques. Psychometrika, 33, 267-334. -# Let U'U=R be the Cholesky decomposition of R. Generate independent data -# from some dist yielding X. Then XU has population correlation matrix R -# -if(SEED)set.seed(2) -y<-matrix(rnorm(n*p),ncol=p) -rval<-matsqrt(cmat) -y<-t(rval%*%t(y)) -y -} - - matsqrt <- function(x) { - xev1<-NA - xe <- eigen(x) - xe1 <- xe$values - if(all(xe1 >= 0)) { - xev1 <- diag(sqrt(xe1)) - } -if(is.na(xev1[1]))stop("The matrix has negative eigenvalues") - xval1 <- cbind(xe$vectors) - xval1i <- solve(xval1) - y <- xval1 %*% xev1 %*% xval1i -y - } - - -ghmul<-function(n,g=0,h=0,p=2,cmat=diag(rep(1,p)),SEED=FALSE){ -# -# generate n observations from a p-variate dist -# based on the g and h dist. -# -# cmat is the correlation matrix -# -x<-rmulnorm(n,p,cmat,SEED=SEED) -for(j in 1:p){ -if (g>0){ -x[,j]<-(exp(g*x[,j])-1)*exp(h*x[,j]^2/2)/g -} -if(g==0)x[,j]<-x[,j]*exp(h*x[,j]^2/2) -} -x -} - -yhall<-function(x,y,tr=.2,alpha=.05){ -# -# Perform Yuen's test for trimmed means on the data in x and y -# in conjunction with Hall's transformation. -# The default amount of trimming is 20% -# Missing values (values stored as NA) are automatically removed. -# -# A confidence interval for the trimmed mean of x minus the -# the trimmed mean of y is computed and returned in yuen$ci. -# -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -winx<-winval(x,tr=tr) -winy<-winval(y,tr=tr) -m3x<-sum((winx-mean(winx))^3)/length(x) -m3y<-sum((winy-mean(winy))^3)/length(y) -h1<-length(x)-2*floor(tr*length(x)) -h2<-length(y)-2*floor(tr*length(y)) -mwx<-length(x)*m3x/h1 -mwy<-length(y)*m3y/h2 -q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) -q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) -sigtil<-q1+q2 -mtil<-(mwx/h1^2)-(mwy/h2^2) -dif<-mean(x,tr)-mean(y,tr) -thall<-dif+mtil/(6*sigtil)+mtil*dif^2/(3*sigtil^2)+mtil^2*dif^3/(27*sigtil^4) -thall<-thall/sqrt(sigtil) -nhat<-mtil/sigtil^1.5 -list(test.stat=thall,nu.tilda=nhat,sig.tilda=sqrt(sigtil)) -} - -linconm<-function(x,con=0,est=onestep,alpha=.05,nboot=500,pr=TRUE,...){ -# -# Compute a 1-alpha confidence interval for a set of d linear contrasts -# involving M-estimators using a bootstrap method. (See Chapter 6.) -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode. Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# -# con is a J by d matrix containing the contrast coefficents of interest. -# If unspecified, all pairwise comparisons are performed. -# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first two measures of location is -# equal to the sum of the second two, and (2) the difference between -# the first two is equal to the difference between the measure of location for -# groups 5 and 6. -# -# The default number of bootstrap samples is nboot=399 -# -# This function uses the function trimpartt written for this -# book. -# -# -# -# -if(pr){ -print("Note: confidence intervals are adjusted to control FWE") -print("But p-values are not adjusted to control FWE") -} -if(is.matrix(x))x<-listm(x) -con<-as.matrix(con) -if(!is.list(x))stop("Data must be stored in list mode.") -J<-length(x) -Jm<-J-1 -d<-(J^2-J)/2 -if(sum(con^2)==0){ -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -if(nrow(con)!=length(x))stop("The number of groups does not match the number of contrast coefficients.") -m1<-matrix(0,J,nboot) -m2<-1 # Initialize m2 -mval<-1 -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(j in 1:J){ -mval[j]<-est(x[[j]],...) -xcen<-x[[j]]-est(x[[j]],...) -data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -m1[j,]<-apply(data,1,est,...) # A J by nboot matrix. -m2[j]<-var(m1[j,]) -} -boot<-matrix(0,ncol(con),nboot) -bot<-1 -for (d in 1:ncol(con)){ -top<-apply(m1,2,trimpartt,con[,d]) -# A vector of length nboot containing psi hat values -consq<-con[,d]^2 -bot[d]<-trimpartt(m2,consq) -boot[d,]<-abs(top)/sqrt(bot[d]) -} -testb<-apply(boot,2,max) -ic<-floor((1-alpha)*nboot) -testb<-sort(testb) -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper","se","p.value")) -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-trimpartt(mval,con[,d]) -psihat[d,3]<-psihat[d,2]-testb[ic]*sqrt(bot[d]) -psihat[d,4]<-psihat[d,2]+testb[ic]*sqrt(bot[d]) -psihat[d,5]<-sqrt(bot[d]) -pval<-mean((boot[d,]1)fval<-akerdmul(xx,pts=pts,hval=hval,aval=aval,fr=fr,pr=pyhat, -plotit=plotit,theta=theta,phi=phi,expand=expand,scale=scale,ticktype=ticktype) -plotit<-F -} -if(is.matrix(xx) && ncol(xx)==1)xx<-xx[,1] -if(!is.matrix(xx)){ -x<-sort(xx) -if(op==1){ -m<-mad(x) -if(m==0){ -temp<-idealf(x) -m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25)) -} -if(m==0)m<-sqrt(winvar(x)/.4129) -if(m==0)stop("All measures of dispersion are equal to 0") -fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr) -if(m>0)fhat<-fhat/(2*fr*m) -} -if(op==2){ -init<-density(xx) -fhat <- init$y -x<-init$x -} -n<-length(x) -if(is.na(hval)){ -sig<-sqrt(var(x)) -temp<-idealf(x) -iqr<-(temp$qu-temp$ql)/1.34 -A<-min(c(sig,iqr)) -if(A==0)A<-sqrt(winvar(x))/.64 -hval<-1.06*A/length(x)^(.2) -# See Silverman, 1986, pp. 47-48 -} -gm<-exp(mean(log(fhat[fhat>0]))) -alam<-(fhat/gm)^(0-aval) -dhat<-NA -if(is.na(pts[1]))pts<-x -pts<-sort(pts) -for(j in 1:length(pts)){ -temp<-(pts[j]-x)/(hval*alam) -epan<-ifelse(abs(temp)yq) -B<-mean(flag1*flag2) -flag1<-(x>xq) -flag2<-(y<=yq) -C1<-mean(flag1*flag2) -flag1<-(x>xq) -flag2<-(y>yq) -D1<-mean(flag1*flag2) -fx<-akerd(x,pts=xq,plotit=FALSE,pyhat=TRUE) -fy<-akerd(y,pts=yq,plotit=FALSE,pyhat=TRUE) -v1<-(q-1)^2*A -v2<-(q-1)*q*B -v3<-(q-1)*q*C1 -v4<-q*q*D1 -temp<-0-2*(v1+v2+v3+v4)/(fx*fy)+q*(1-q)/fx^2+q*(1-q)/fy^2 -val<-sqrt(temp/n) -val -} - -akerdmul<-function(x,pts=NA,hval=NA,aval=.5,fr=.8,pr=FALSE,plotit=TRUE,theta=50, -phi=25,expand=.5,scale=FALSE,xlab="X",ylab="Y",zlab="",ticktype="simple"){ -# -# Compute adaptive kernel density estimate -# for multivariate data -# (See Silverman, 1986) -# -# Use expected frequency as initial estimate of the density -# -# hval is the span used by the kernel density estimator -# fr is the span used by the expected frequency curve -# pr=T, returns density estimates at pts -# ticktype="detailed" will create ticks as done in two-dimensional plot -# -library(MASS) -library(akima) -if(is.na(pts[1]))pts<-x -if(ncol(x)!=ncol(pts))stop("Number of columns for x and pts do not match") -if(!is.matrix(x))stop("Data should be stored in a matrix") -fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr) -n<-nrow(x) -d<-ncol(x) -pi<-gamma(.5)^2 -cd<-c(2,pi) -if(d==2)A<-1.77 -if(d==3)A<-2.78 -if(d>2){ -for(j in 3:d)cd[j]<-2*pi*cd[j-2]/n # p. 76 -} -if(d>3)A<-(8*d*(d+2)*(d+4)*(2*sqrt(pi))^d)/((2*d+1)*cd[d]) # p. 87 -if(is.na(hval))hval<-A*(1/n)^(1/(d+4)) # Silverman, p. 86 -svec<-NA -for(j in 1:d){ -sig<-sqrt(var(x[,j])) -temp<-idealf(x[,j]) -iqr<-(temp$qu-temp$ql)/1.34 -A<-min(c(sig,iqr)) -x[,j]<-x[,j]/A -svec[j]<-A -} -hval<-hval*sqrt(mean(svec^2)) # Silverman, p. 87 -# Now do adaptive; see Silverman, 1986, p. 101 -gm<-exp(mean(log(fhat[fhat>0]))) -alam<-(fhat/gm)^(0-aval) -dhat<-NA -nn<-nrow(pts) -for(j in 1:nn){ -temp1<-t(t(x)-pts[j,])/(hval*alam) -temp1<-temp1^2 -temp1<-apply(temp1,1,FUN="sum") -temp<-.5*(d+2)*(1-temp1)/cd[d] -epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, p. 76 -dhat[j]<-mean(epan/(alam*hval)^d) -} -if(plotit && d==2){ -fitr<-dhat -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype) -} -m<-"Done" -if(pr)m<-dhat -m -} -cov2med<-function(x,y=NA,q=.5){ -# -# Estimate the covariance between two dependent -# order statistics -# By default, q=.5 meaning that an estimate of -# of covariance is made when a single order statistic -# is used to estimate the median. -# y=NA, function returns squared standard error. -# -if(is.na(y[1]))val<-qse(x,q=q,op=3)^2 -if(!is.na(y[1])){ -if(sum((x-y)^2)==0)val<-qse(x,q=q,op=3)^2 -if(sum((x-y)^2)>0){ -n<-length(x) -m<-floor(q*n+.5) -yord<-sort(y) -flag<-(y<=yord[m]) -xord<-sort(x) -xq<-xord[m] -yord<-sort(y) -yq<-yord[m] -flag1<-(x<=xq) -flag2<-(y<=yq) -A<-mean(flag1*flag2) -flag1<-(x<=xq) -flag2<-(y>yq) -B<-mean(flag1*flag2) -flag1<-(x>xq) -flag2<-(y<=yq) -C1<-mean(flag1*flag2) -flag1<-(x>xq) -flag2<-(y>yq) -D1<-mean(flag1*flag2) -fx<-akerd(x,pts=xq,plotit=FALSE,pyhat=TRUE) -fy<-akerd(y,pts=yq,plotit=FALSE,pyhat=TRUE) -v1<-(q-1)^2*A -v2<-(q-1)*q*B -v3<-(q-1)*q*C1 -v4<-q*q*D1 -val<-((v1+v2+v3+v4)/(fx*fy))/n -}} -val -} - - -covmmed<-function(x,p=length(x),grp=c(1:p),q=.5){ -# -# Estimate the covariance matrix for the sample medians -# based on a SINGLE order statistic, using -# the data in the R variable x. -# (x[[1]] contains the data for group 1, x[[2]] the data for group 2, etc.) -# The function returns a p by p matrix of covariances, the diagonal -# elements being equal to the squared standard error of the sample -# trimmed means, where p is the number of groups to be included. -# By default, all the groups in x are used, but a subset of -# the groups can be used via grp. For example, if -# the goal is to estimate the covariances between the medians -# for groups 1, 2, and 5, use the command grp<-c(1,2,5) -# before calling this function. -# -# Missing values (values stored as NA) are not allowed. -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("The data are not stored in a matrix or list mode.") -p<-length(grp) -pm1<-p-1 -for (i in 1:pm1){ -ip<-i+1 -if(length(x[[grp[ip]]])!=length(x[[grp[i]]]))stop("The number of observations in each group must be equal") -} -n<-length(x[[grp[1]]]) -covest<-matrix(0,p,p) -for(j in 1:p){ -for(k in 1:p){ -if(j==k)covest[j,j]<-cov2med(x[[grp[j]]],q=q) -if(jnullval)p.value=min(p.value,alpha) #very remote chance ci and p.value differ. Force them to agree. -if(ybt[2]tval[1:nboot]) -if(test>=0)G=mean(test=20 -# -if(!is.na(y[1]))x<-cbind(x,y) -if(!is.matrix(x))stop("Something is wrong, with x or y") -x<-elimna(x) -y<-x[,2] -x<-x[,1] -n<-length(y) -df<-n-1 -if(is.na(se.val[1])){ -if(!bop)se.val<-sedm(x,y,q=q) -if(bop)se.val<-bootdse(x,y,est=qest,q=q,pr=FALSE,nboot=nboot) -} -test<-(qest(x,q)-qest(y,q))/se.val -sig.level<-2*(1-pt(abs(test),df)) -list(test.stat=test,p.value=sig.level,se=se.val) -} - -lincdm<-function(x,con=0,alpha=.05,q=.5,mop=FALSE,nboot=100,SEED=TRUE){ -# -# A heteroscedastic test of d linear contrasts among -# dependent groups using medians. -# -# The data are assumed to be stored in $x$ in list mode. -# Length(x) is assumed to correspond to the total number of groups, J -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# q is the quantile used to compare groups. -# con contains contrast coefficients, -# con=0 means all pairwise comparisons are used -# mop=F, use single order statistic -# mop=T, use usual sample median, even if q is not equal to .5 -# in conjunction with a bootstrap estimate of covariances among -# the medians using -# nboot samples. -# -# Missing values are automatically removed. -# -# -if(mop && SEED)set.seed(2) -if(is.list(x)){ -x<-matl(x) -x<-elimna(x) -} -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-length(x) -h<-length(x[[1]]) -w<-vector("numeric",J) -xbar<-vector("numeric",J) -for(j in 1:J){ -if(!mop)xbar[j]<-qest(x[[j]],q=q) -if(mop)xbar[j]<-median(x[[j]]) -} -if(sum(con^2)==0){ -temp<-qdmcp(x,alpha=alpha,q=q,pr=FALSE) -test<-temp$test -psihat<-temp$psihat -num.sig<-temp$num.sig -} -if(sum(con^2)>0){ -ncon<-ncol(con) -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(nrow(con)!=length(x)){ -stop("The number of groups does not match the number of contrast coefficients.") -} -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c("con.num","test","p.value","crit.p.value","se")) -df<-length(x[[1]])-1 -if(!mop)w<-covmmed(x,q=q) -if(mop)w<-bootcov(x,nboot=nboot,pr=FALSE) -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -cvec<-as.matrix(con[,d]) -sejk<-sqrt(t(cvec)%*%w%*%cvec) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -test[d,3]<-2*(1-pt(abs(test[d,2]),df)) -test[d,5]<-sejk -} -temp1<-test[,3] -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -test[temp2,4]<-zvec -psihat[,3]<-psihat[,2]-qt(1-test[,4]/2,df)*test[,5] -psihat[,4]<-psihat[,2]+qt(1-test[,4]/2,df)*test[,5] -num.sig<-sum(test[,3]<=test[,4]) -} -list(test=test,psihat=psihat,num.sig=num.sig) -} -mwwmcp<-function(J,K,x,grp=c(1:p),p=J*K,q=.5,bop=FALSE,alpha=.05,nboot=100, -SEED=TRUE){ -# -# For a J by K anova using quantiles with -# repeated measures on both factors, -# Perform all multiple comparisons for main effects -# and interactions. -# -# q=.5 by default meaning medians are compared -# bop=F means bootstrap option not used; -# with bop=T, function uses usual medians rather -# rather than a single order statistic to estimate median -# in conjunction with a bootstrap estimate of covariances -# among sample medians. -# -# The R variable data is assumed to contain the raw -# data stored in a matrix or in list mode. -# When in list mode data[[1]] contains the data -# for the first level of both factors: level 1,1. -# data[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# data[[K]] is the data for level 1,K -# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. -# -# It is assumed that data has length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -Qa<-NA -Qab<-NA -if(is.data.frame(x))x=as.matrix(x) -if(is.list(x))x<-elimna(matl(x)) -if(is.matrix(x))x<-elimna(x) -data<-x -if(is.matrix(data))data<-listm(data) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups stored in x is") -print(length(data)) -print("Warning: These two values are not equal") -} -if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") -tmeans<-0 - # Create the three contrast matrices - # -Ja<-(J^2-J)/2 -Ka<-(K^2-K)/2 -JK<-J*K -conA<-matrix(0,nrow=JK,ncol=Ja) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j < jj){ -ic<-ic+1 -mat<-matrix(0,nrow=J,ncol=K) -mat[j,]<-1 -mat[jj,]<-0-1 -conA[,ic]<-t(mat) -}}} -conB<-matrix(0,nrow=JK,ncol=Ka) -ic<-0 -for(k in 1:K){ -for(kk in 1:K){ -if(k0){ -ncon<-ncol(con) -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(nrow(con)!=length(x)){ -stop("The number of groups does not match the number of contrast coefficients.") -} -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c("con.num","test","p.value","crit.p.value","se")) -df<-length(x[[1]])-1 -w<-covmtrim(x,tr=tr) -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -cvec<-as.matrix(con[,d]) -sejk<-sqrt(t(cvec)%*%w%*%cvec) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -test[d,3]<-2*(1-pt(abs(test[d,2]),df)) -test[d,5]<-sejk -} -temp1<-test[,3] -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -test[temp2,4]<-zvec -psihat[,3]<-psihat[,2]-qt(1-test[,4]/2,df)*test[,5] -psihat[,4]<-psihat[,2]+qt(1-test[,4]/2,df)*test[,5] -num.sig<-sum(test[,3]<=test[,4]) -} -list(test=test,psihat=psihat,num.sig=num.sig) -} - -sintv2<-function(x,y=NULL,alpha=.05,nullval=0,null.value=NULL,pr=TRUE){ -# -# Compute a 1-alpha confidence interval for the median using -# the Hettmansperger-Sheather interpolation method. -# (See section 4.5.2.) -# -# The default value for alpha is .05. -# -# If y is not null, the function uses x-y, as might be done when comparing dependent variables. -# -if(!is.null(y))x=x-y -x=elimna(x) -if(!is.null(null.value))nullval=null.value -if(pr){ -if(sum(duplicated(x)>0))print("Duplicate values detected; hdpb might have more power") -} -ci<-sint(x,alpha=alpha,pr=FALSE) -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-sint(x,alpha=alph[i],pr=FALSE) -if(chkit[1]>nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2] 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -psihat<-matrix(0,CC,5) -dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c("Group","Group","test","p-value","p.crit","se")) -if(bop)se.val<-bootdse(x,nboot=nboot,pr=pr) -temp1<-0 -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -if(!bop)temp<-qdtest(x[,j],x[,k],q=q,bop=bop) -if(bop)temp<-qdtest(x[,j],x[,k],se.val=se.val[jcom]) -sejk<-temp$se -test[jcom,6]<-sejk -test[jcom,3]<-temp$test.stat -test[jcom,4]<-temp$p.value -if(length(x[,j])<20)test[jcom,4]<-mrm1way(x[,c(j,k)],q=q,SEED=SEED)$p.value -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -}}} -temp1<-test[,4] -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -test[temp2,5]<-zvec -psihat[,4]<-psihat[,3]-qt(1-test[,5]/2,df)*test[,6] -psihat[,5]<-psihat[,3]+qt(1-test[,5]/2,df)*test[,6] -num.sig<-sum(test[,4]<=test[,5]) -list(test=test,psihat=psihat,num.sig=num.sig) -} - - - -qdmcpdif<-function(x, con = 0,alpha = 0.05){ -# -# MCP with medians on difference scores -# FWE controlled with Rom's method -# -if(is.data.frame(x))x=as.matrix(x) -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-ncol(x) -xbar<-vector("numeric",J) -x<-elimna(x) # Remove missing values -nval<-nrow(x) -h1<-nrow(x) -df<-h1-1 -if(sum(con^2!=0))CC<-ncol(con) -if(sum(con^2)==0)CC<-(J^2-J)/2 -ncon<-CC -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(sum(con^2)==0){ -psihat<-matrix(0,CC,5) -dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) -test<-matrix(NA,CC,5) -dimnames(test)<-list(NULL,c("Group","Group","p-value","p.crit","se")) -temp1<-0 -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -dv<-x[,j]-x[,k] -test[jcom,5]<-msmedse(dv) -temp<-sintv2(dv,alpha=alpha/CC) -temp1[jcom]<-temp$p.value -test[jcom,3]<-temp$p.value -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-median(dv) -psihat[jcom,4]<-temp$ci.low -psihat[jcom,5]<-temp$ci.up -}}} -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -if(sum(sigvec)0){ -if(nrow(con)!=ncol(x))print("WARNING: The number of groups does not match the number of contrast coefficients.") -ncon<-ncol(con) -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),4) -dimnames(test)<-list(NULL,c("con.num","sig","crit.sig","se")) -temp1<-NA -for (d in 1:ncol(con)){ -psihat[d,1]<-d -for(j in 1:J){ -if(j==1)dval<-con[j,d]*x[,j] -if(j>1)dval<-dval+con[j,d]*x[,j] -} -temp3<-sintv2(dval) -temp1[d]<-temp3$p.value -test[d,1]<-d -test[d,4]<-msmedse(dval) -psihat[d,2]<-median(dval) -psihat[d,3]<-temp3$ci.low -psihat[d,4]<-temp3$ci.up -} -test[,2]<-temp1 -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -print(c(ncon,zvec)) -sigvec<-(test[temp2,2]>=zvec) -if(sum(sigvec)0)+ sum(psihat[,5]<0) -if(sum(con^2)>0)num.sig<-sum(psihat[,3]>0)+ sum(psihat[,4]<0) -list(test=test,psihat=psihat,con=con,num.sig=num.sig) -} - - - -l2dci<-function(x,y,est=median,alpha=.05,nboot=2000,SEED=TRUE,pr=TRUE,...){ -# -# Compute a bootstrap confidence interval for a -# measure of location associated with -# the distribution of x-y, where x and y are possibly dependent. -# est indicates which measure of location will be used -# -# Function returns confidence interval, p-value and estimate -# of square standard error of the estimator used. -# -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print("Taking bootstrap samples. Please wait.") -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-NA -for(i in 1:nboot)bvec[i]<-loc2dif(datax[i,],datay[i,],est=est) -bvec<-sort(bvec) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) -sig.level<-2*(min(temp,1-temp)) -se<-var(bvec) -list(ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se) -} - - -qdec2ci<-function(x,y=NA,nboot=500,alpha=.05,pr=FALSE,SEED=TRUE,plotit=TRUE){ -# -# Compare the deciles of two dependent groups -# with quantiles estimated with a single order statistic -# -# x: can be a matrix with two columns in which case -# y is ignored. -# -if(SEED)set.seed(2) -if(is.na(y[1])){ -y<-x[,2] -x<-x[,1] -} -xy=elimna(cbind(x,y)) -x=xy[,1] -y=xy[,2] -if(sum(duplicated(x))>0)stop('Tied values detected, use Dqcomhd') -if(sum(duplicated(y))>0)stop('Tied values detected, use Dqcomhd') -bvec<-matrix(NA,nrow=nboot,ncol=9) -if(pr)print("Taking bootstrap samples. Please Wait.") -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(i in 1:nboot)bvec[i,]<-qdec(x[data[i,]])-qdec(y[data[i,]]) -pval<-NA -m<-matrix(0,9,5) -dimnames(m)<-list(NULL,c("lower","upper","Delta.hat","p.values",'p.crit')) -crit <- alpha/2 -icl <- round(crit * nboot) + 1 -icu <- nboot - icl -for(i in 1:9){ -pval[i]<-(sum(bvec[,i]<0)+.5*sum(bvec[,i]==0))/nboot -pval[i]<-2*min(pval[i],1-pval[i]) -temp<-sort(bvec[,i]) -m[i,1]<-temp[icl] -m[i,2]<-temp[icu] -} -m[,3]<-qdec(x)-qdec(y) -m[,4]<-pval -temp=order(pval,decreasing=TRUE) -zvec=alpha/c(1:9) -m[temp,5]=zvec -if(plotit){ -xaxis<-c(qdec(x),qdec(x)) -par(pch="+") -yaxis<-c(m[,1],m[,2]) -plot(xaxis,yaxis,ylab="delta",xlab="x (first group)") -par(pch="*") -points(qdec(x),m[,3]) -} -m -} - - - - -ancovam<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE, -pr=TRUE){ -# -# Compare two independent groups using an ancova method -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# This function is designed specifically for -# MEDIANS -# -# Assume data are in x1 y1 x2 and y2 -# -if(pr){ -print("NOTE: Confidence intervals are adjusted to control the probability") -print("of at least one Type I error.") -print("But p-values are not") -} -if(is.na(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,9) -dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi","p.value")) -critv<-NA -critv=qsmm(1-alpha,5,500) -for (i in 1:5){ -g1<-y1[near(x1,x1[isub[i]],fr1)] -g2<-y2[near(x2,x1[isub[i]],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -test<-msmed(g1,g2) -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(g1) -mat[i,3]<-length(g2) -mat[i,4]<-median(g1)-median(g2) -mat[i,5]<-test$test[3] -mat[i,6]<-test$test[5] -cilow<-mat[i,4]-critv*mat[i,6] -cihi<-mat[i,4]+critv*mat[i,6] -mat[i,7]<-cilow -mat[i,8]<-cihi -mat[i,9]<-test$test[6] -}} -if(!is.na(pts[1])){ -if(length(pts)>=29)stop("At most 28 points can be compared") -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),9) -dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi", -"p.value")) -critv<-NA -if(length(pts)>=2){ -#if(alpha==.05)critv<-smmcrit(500,length(pts)) -#if(alpha==.01)critv<-smmcrit01(500,length(pts)) -#if(is.na(critv))critv<-smmval(rep(999,length(pts)),alpha=alpha) -critv=qsmm(1-alpha,length(pts),500) -} -if(length(pts)==1)critv<-qnorm(1-alpha/2) -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -test<-msmed(g1,g2) -mat[i,1]<-pts[i] -mat[i,2]<-length(g1) -mat[i,3]<-length(g2) -if(length(g1)<=5)print(paste("Warning, there are",length(g1)," points corresponding to the design point X=",pts[i])) -if(length(g2)<=5)print(paste("Warning, there are",length(g2)," points corresponding to the design point X=",pts[i])) -mat[i,4]<-median(g1)-median(g2) -mat[i,5]<-test$test[3] -mat[i,6]<-test$test[5] -cilow<-mat[i,4]-critv*mat[i,6] -cihi<-mat[i,4]+critv*mat[i,6] -mat[i,7]<-cilow -mat[i,8]<-cihi -mat[i,9]<-test$test[6] -}} -if(plotit) -runmean2g(x1,y1,x2,y2,fr=fr1,est=median,sm=sm) -list(output=mat,crit=critv) -} - - -modgen<-function(p,adz=FALSE){ -# -# Used by regpre to generate all models -# p=number of predictors -# adz=T, will add the model where only a measure -# of location is used. -# -# -model<-list() -if(p>5)stop("Current version is limited to 5 predictors") -if(p==1)model[[1]]<-1 -if(p==2){ -model[[1]]<-1 -model[[2]]<-2 -model[[3]]<-c(1,2) -} -if(p==3){ -for(i in 1:3)model[[i]]<-i -model[[4]]<-c(1,2) -model[[5]]<-c(1,3) -model[[6]]<-c(2,3) -model[[7]]<-c(1,2,3) -} -if(p==4){ -for(i in 1:4)model[[i]]<-i -model[[5]]<-c(1,2) -model[[6]]<-c(1,3) -model[[7]]<-c(1,4) -model[[8]]<-c(2,3) -model[[9]]<-c(2,4) -model[[10]]<-c(3,4) -model[[11]]<-c(1,2,3) -model[[12]]<-c(1,2,4) -model[[13]]<-c(1,3,4) -model[[14]]<-c(2,3,4) -model[[15]]<-c(1,2,3,4) -} -if(p==5){ -for(i in 1:5)model[[i]]<-i -model[[6]]<-c(1,2) -model[[7]]<-c(1,3) -model[[8]]<-c(1,4) -model[[9]]<-c(1,5) -model[[10]]<-c(2,3) -model[[11]]<-c(2,4) -model[[12]]<-c(2,5) -model[[13]]<-c(3,4) -model[[14]]<-c(3,5) -model[[15]]<-c(4,5) -model[[16]]<-c(1,2,3) -model[[17]]<-c(1,2,4) -model[[18]]<-c(1,2,5) -model[[19]]<-c(1,3,4) -model[[20]]<-c(1,3,5) -model[[21]]<-c(1,4,5) -model[[22]]<-c(2,3,4) -model[[23]]<-c(2,3,5) -model[[24]]<-c(2,4,5) -model[[25]]<-c(3,4,5) -model[[26]]<-c(1,2,3,4) -model[[27]]<-c(1,2,3,5) -model[[28]]<-c(1,2,4,5) -model[[29]]<-c(1,3,4,5) -model[[30]]<-c(2,3,4,5) -model[[31]]<-c(1,2,3,4,5) -} -if(adz){ -ic<-length(model)+1 -model[[ic]]<-0 -} -model -} - - - -locpre<-function(y,est=mean,error=sqfun,nboot=100,SEED=TRUE,pr=TRUE,mval=round(5*log(length(y)))){ -# -# Estimate the prediction error using a measure of location -# given by the argument -# est -# -# The .632 method is used. -# (See Efron and Tibshirani, 1993, pp. 252--254) -# -# Prediction error is the expected value of the function error. -# The argument error defaults to squared error. -# -# est can be any R function that returns a measure of location -# -# The default value for mval, the number of observations to resample -# for each of the B bootstrap samples is based on results by -# Shao (JASA, 1996, 655-665). (Resampling n vectors of observations -# model selection may not lead to the correct model as n->infinity. -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=mval*nboot,replace=TRUE),nrow=nboot) -bid<-apply(data,1,idb,length(y)) -# bid is an n by nboot matrix. If the jth bootstrap sample from -# 1, ..., mval contains the value i, bid[i,j]=0; otherwise bid[i,j]=1 -# -yhat<-apply(data,1,locpres1,y,est=est) -# yhat is nboot vector -# containing the bootstrap estimates -# -yhat<-matrix(yhat,nrow=length(y),ncol=nboot) # convert to n x nboot matrix -bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253 -temp<-(bid*(yhat-y)) -diff<-apply(temp,1,error) -ep0<-sum(diff/bi)/length(y) -aperror<-error(y-est(y))/length(y) # apparent error -val<-.368*aperror+.632*ep0 -val -} - - -locpres1<-function(isub,x,est){ -# -# Compute a measure of location x[isub] -# isub is a vector of length mval, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# mval is the sample size -# of the bootstrap sample, where mval1){ -#if(alpha==.05)crit<-smmcrit(500,CC) -#if(alpha==.01)crit<-smmcrit01(500,CC) -#if(is.na(crit))warning("Can only be used with alpha=.05 or .01") -crit=qsmm(1-alpha,CC,500) -} -test[jcom,4]<-crit -psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5] -psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5] -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.") -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c("con.num","test","crit","se","df")) -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -crit<-NA -if(CC==1)crit<-qnorm(1-alpha/2) -#if(alpha==.05)crit<-smmcrit(500,ncol(con)) -#if(alpha==.01)crit<-smmcrit01(500,ncol(con)) -crit=qsmm(1-alpha,ncol(con),500) -test[d,3]<-crit -test[d,4]<-sejk -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -}} -list(test=test,psihat=psihat) -} - - - -bpmedse<-function(x){ -# -# compute standard error of the median using method -# recommended by Price and Bonett (2001) -# -y<-sort(x) -n<-length(x) -av<-round((n+1)/2-sqrt(n)) -if(av==0)av<-1 -avm<-av-1 -astar<-pbinom(avm,n,.5) #alpha*/2 -zval<-qnorm(1-astar) -top<-n-av+1 -sqse<-((y[top]-y[av])/(2*zval))^2 # The sq. standard error -se<-sqrt(sqse) -se -} -exmed<-function(x,y=NA,con=0,alpha=.05,iter=1000,se.fun=bpmedse,SEED=TRUE){ -# -# Test a set of linear contrasts using medians -# -# Get exact control over type I errors under normality, provided -# iter is sufficietly large. -# iter determines number of replications used in a simulation -# to determine critical value. -# -# se.fun indicates method used to estimate standard errors. -# default is the method used by Bonett and Price (2002) -# To use the McKean-Shrader method, -# set se.fun=msmedse -# -# The data are assumed to be stored in $x$ in a matrix or in list mode. -# Length(x) is assumed to correspond to the total number of groups, J -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# Missing values are automatically removed. -# -# Function returns the critical value used so that FWE=alpha -# (under the column crit) -# p-values are determined for each test but are not adjusted so -# that FWE=alpha. -# The confidence intervals are adjusted so that the simultaneous -# probability coverage is 1-alpha. -# -if(!is.na(y[1])){ -xx<-list() -xx[[1]]<-x -xx[[2]]<-y -if(is.matrix(x) || is.list(x))stop("When y is speficied, x should not have list mode or be a matrix") -x<-xx -} -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-length(x) -h<-vector("numeric",J) -w<-vector("numeric",J) -nval<-vector("numeric",J) -xbar<-vector("numeric",J) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -xbar[j]<-median(x[[j]]) -nval[j]<-length(x[[j]]) -# w[j]<-msmedse(x[[j]])^2 - w[j]<-se.fun(x[[j]])^2 -} -if(sum(con^2!=0))CC<-ncol(con) -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -psihat<-matrix(0,CC,5) -dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","p.value")) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) -# Next determine p-value for each individual test -temp<-msmedsub(c(nval[j],nval[k]),se.fun=se.fun,SEED=SEED,iter=iter) -test[jcom,6]<-sum((test[jcom,3]<=temp))/iter -sejk<-sqrt(w[j]+w[k]) -test[jcom,5]<-sejk -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -# Determine critical value for controlling FWE -temp<-msmedsub(nval,se.fun=se.fun,SEED=SEED,iter=iter) -ic<-round((1-alpha)*iter) -crit<-temp[ic] -test[jcom,4]<-crit -psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5] -psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5] -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.") -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c("con.num","test","crit","se","p.value")) -# Determine critical value that controls FWE -temp<-msmedsub(nval,con=con,se.fun=se.fun,SEED=SEED,iter=iter) -ic<-round((1-alpha)*iter) -crit<-temp[ic] -for (d in 1:ncol(con)){ -flag<-(con[,d]==0) -nvec<-nval[!flag] -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -# Determine p-value for individual (dth) test -temp<-msmedsub(nvec,iter=iter,se.fun=se.fun,SEED=SEED) -test[d,3]<-crit -test[d,4]<-sejk -test[d,5]<-sum(abs((test[d,2])<=temp))/iter -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -}} -list(test=test,psihat=psihat) -} -msmedsub<-function(n,con=0,alpha=.05,se.fun=bpmedse,iter=1000,SEED=TRUE){ -# -# Determine a Studentized critical value, assuming normality -# and homoscedasticity, for the function msmedv2 -# -# Goal: Test a set of linear contrasts using medians -# -# The data are assumed to be stored in $x$ in a matrix or in list mode. -# Length(x) is assumed to correspond to the total number of groups, J -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -if(SEED)set.seed(2) -con<-as.matrix(con) -J<-length(n) -h<-vector("numeric",J) -w<-vector("numeric",J) -xbar<-vector("numeric",J) -x<-list() -test<-NA -testmax<-NA -for (it in 1:iter){ -for(j in 1:J){ -x[[j]]<-rnorm(n[j]) -xbar[j]<-median(x[[j]]) - w[j]<-se.fun(x[[j]])^2 -} -if(sum(con^2!=0))CC<-ncol(con) -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) -}}}} -if(sum(con^2)>0){ -for (d in 1:ncol(con)){ -sejk<-sqrt(sum(con[,d]^2*w)) -test[d]<-sum(con[,d]*xbar)/sejk -}} -testmax[it]<-max(abs(test)) -} -testmax<-sort(testmax) -testmax -} -cnorm<-function(n,epsilon=.1,k=10){ -# -# generate n observations from a contaminated normal -# distribution -# probability 1-epsilon from a standard normal -# probability epsilon from normal with mean 0 and standard deviation k -# -if(epsilon>1)stop("epsilon must be less than or equal to 1") -if(epsilon<0)stop("epsilon must be greater than or equal to 0") -if(k<=0)stop("k must be greater than 0") -val<-rnorm(n) -uval<-runif(n) -flag<-(uval<=1-epsilon) -val[!flag]<-k*val[!flag] -val -} -twwmcp<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,alpha=.05,dif=FALSE){ -# -# For a J by K anova using quantiles with -# repeated measures on both factors, -# Perform all multiple comparisons for main effects -# and interactions. -# -# tr=.2. default trimming -# bop=F means bootstrap option not used; -# with bop=T, function uses usual medians rather -# rather than a single order statistic to estimate median -# in conjunction with bootstrap estimate of covariances -# among the sample medians. -# -# The R variable data is assumed to contain the raw -# data stored in a matrix or in list mode. -# When in list mode data[[1]] contains the data -# for the first level of both factors: level 1,1. -# data[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# data[[K]] is the data for level 1,K -# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. -# -# It is assumed that data has length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -Qa<-NA -Qab<-NA -if(is.list(x))x<-elimna(matl(x)) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-elimna(x) -data<-x -if(is.matrix(data))data<-listm(data) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups stored in x is") -print(length(data)) -print("Warning: These two values are not equal") -} -if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") -tmeans<-0 -temp<-con2way(J,K) # contrasts matrices stored in temp -Qa<-rmmcp(x,con=temp$conA,alpha=alpha,dif=dif,tr=tr) -# Do test for factor B -Qb<-rmmcp(x,con=temp$conB,alpha=alpha,dif=dif,tr=tr) -# Do test for factor A by B interaction -Qab<-rmmcp(x,con=temp$conAB,alpha=alpha,dif=dif,tr=tr) -list(Qa=Qa,Qb=Qb,Qab=Qab) -} - -medpb.old<-function(x,alpha=.05,nboot=NA,grp=NA,est=median,con=0,bhop=FALSE, -SEED=TRUE,...){ -# -# Multiple comparisons for J independent groups using medians. -# -# A percentile bootstrap method is used. FWE is controlled with Rom's method. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# -# con can be used to specify linear contrasts; see the function lincon -# -# Missing values are allowed. -# -con<-as.matrix(con) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -dvec<-alpha/c(1:ncon) -if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -#print(paste("Working on group ",j)) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group -} -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -tv<-sum(bcon[d,]==0)/nboot -test[d]<-sum(bcon[d,]>0)/nboot+.5*tv -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-tvec[ic,] -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -medpb<-function(x,alpha=.05,nboot=NA,grp=NA,est=median,con=0,bhop=FALSE,method='hoch', -SEED=TRUE,...){ -# -# Multiple comparisons for J independent groups using medians. -# -# A percentile bootstrap method. -# FWE controlled via argument method -# method =hoch Hochberg;s method is used by default -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# -# con can be used to specify linear contrasts; see the function lincon -# -# Missing values are allowed. -# -con<-as.matrix(con) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -dvec<-alpha/c(1:ncon) -if(nrow(con)!=J)stop('Something is wrong with con; the number of rows does not match the number of groups.') -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(j in 1:J){ -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group -} -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -tv<-sum(bcon[d,]==0)/nboot -test[d]<-sum(bcon[d,]>0)/nboot+.5*tv -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,7) -dimnames(output)<-list(NULL,c('con.num','psihat','p.value','p.crit','ci.lower','ci.upper','adj.p.value')) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-tvec[ic,] -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -num.sig<-sum(output[,3]<=output[,4]) -output[,7]=p.adjust(output[,3],method=method) - -list(output=output,con=con,num.sig=num.sig) -} - - -medmcp=medpb - -rbbinom<-function(n,nbin,r,s){ -# -# Generate n values from a beta-binomial, -# r and s are the parameters of the beta distribution. -# nbin is for the binomial distribution, -# Example: nbin=10 means the sample space=c(0:10) -# -x<-NA -for(i in 1:n){ -pval<-rbeta(1,r,s) -x[i]<-rbinom(1,nbin,pval) -} -x -} - -rbeta.binomial=rbbinom - -med2g<-function(x,y,alpha=.05,nboot=2000,SEED=TRUE,...){ -# -# Compare medians of two independent groups using percentile bootstrap -# -# Missing values are allowed. -# -x<-elimna(x) -y<-elimna(y) -mvec<-NA -mvec[1]<-median(x) -mvec[2]<-median(y) -bvec<-NA -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec1<-apply(datax,1,median) # Bootstrapped values for jth group -bvec2<-apply(datay,1,median) # Bootstrapped values for jth group -test<-sum((bvec1>bvec2))/nboot -tv<-sum(bvec1==bvec2)/nboot -test<-test+.5*tv -if(test> .5)test<-1-test -test<-2*test -dvec<-sort(bvec1-bvec2) -icl<-round(alpha*nboot/2)+1 -icu<-nboot-icl-1 -cilow<-dvec[icl] -ciup<-dvec[icu] -list(p.value=test,est.1=mvec[1],est.2=mvec[2],est.dif=mvec[1]-mvec[2],ci.low=cilow,ci.up=ciup) -} - - -twobinom<-function(r1=sum(elimna(x)),n1=length(elimna(x)),r2=sum(elimna(y)),n2=length(elimna(y)),x=NA,y=NA,alpha=.05){ -# -# Test the hypothesis that two independent binomials have equal -# probability of success using the Storer--Kim method. -# -# r1=number of successes in group 1 -# n1=number of observations in group 1 -# -n1p<-n1+1 -n2p<-n2+1 -n1m<-n1-1 -n2m<-n2-1 -chk<-abs(r1/n1-r2/n2) -x<-c(0:n1)/n1 -y<-c(0:n2)/n2 -phat<-(r1+r2)/(n1+n2) -m1<-outer(x,y,"-") -m2<-matrix(1,n1p,n2p) -flag<-(abs(m1)>=chk) -m3<-m2*flag -b1<-1 -b2<-1 -xv<-c(1:n1) -yv<-c(1:n2) -xv1<-n1-xv+1 -yv1<-n2-yv+1 -dis1<-c(1,pbeta(phat,xv,xv1)) -dis2<-c(1,pbeta(phat,yv,yv1)) -pd1<-NA -pd2<-NA -for(i in 1:n1)pd1[i]<-dis1[i]-dis1[i+1] -for(i in 1:n2)pd2[i]<-dis2[i]-dis2[i+1] -pd1[n1p]<-phat^n1 -pd2[n2p]<-phat^n2 -m4<-outer(pd1,pd2,"*") -test<-sum(m3*m4) -list(p.value=test,p1=r1/n1,p2=r2/n2,est.dif=r1/n1-r2/n2) -} - -lband.fun<-function(x,y,crit){ -# -# function used to determine probability of type I error given crit -# -pi<-gamma(.5)^2 -xr<-rank(x) -yr<-rank(y) -temp<-apply(cbind(xr,yr),1,max) -n<-length(x) -fj<-NA -for(i in 1:n)fj[i]<-sum(temp==i) -v1<-NA -for(j in 1:n)v1[j]<-(j-sum(fj[1:j]))/n -psi<-rep(0,n) -for(j in 1:n){ -if(v1[j]>0)psi[j]<-crit*exp(0-crit^2/(2*v1[j]))/sqrt(2*pi*v1[j]^3) -} -res<-mean(fj*psi) -res -} - -lband.fun2<-function(m,crit,alpha=.05){ -x<-m[,1] -y<-m[,2] -val<-abs(alpha-lband.fun(x,y,crit)) -val -} -qdec<-function(x){ -# -# compute deciles using single order statistics -# (function deciles uses Harrell-Davis estimator) -# -vals<-NA -for(i in 1:9){ -vals[i]<-qest(x,i/10) -} -vals -} -m2way<-function(J,K,x,est=hd,alpha=.05,nboot=600,SEED=TRUE,grp=NA,pr=FALSE,...){ -# -# Two-way ANOVA based on forming averages -# -# By default -# est=hd meaning that medians are used with the Harrell-Davis estimator. -# -# The data are assumed to be stored in x in list mode or in a matrix. -# If grp is unspecified, it is assumed x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second factor: level 1,2 -# x[[j+1]] is the data for level 2,1, etc. -# If the data are in wrong order, grp can be used to rearrange the -# groups. For example, for a two by two design, grp<-c(2,4,3,1) -# indicates that the second group corresponds to level 1,1; -# group 4 corresponds to level 1,2; group 3 is level 2,1; -# and group 1 is level 2,2. -# -# Missing values are automatically removed. -# -JK<-J*K -if(is.data.frame(x))x=as.matrix(x) -xcen<-list() - if(is.matrix(x)) - x <- listm(x) - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") - if(!is.na(grp[1])) { - yy <- x - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } -for(j in 1:JK){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} -xx<-list() -mloc<-NA -for(i in 1:JK){ -xx[[i]]<-x[[i]] -mloc[i]<-est(xx[[i]],...) -xcen[[i]]<-xx[[i]]-mloc[i] -} -x<-xx -mat<-matrix(mloc,nrow=J,ncol=K,byrow=TRUE) -leva<-apply(mat,1,mean) # J averages over columns -levb<-apply(mat,2,mean) -gm<-mean(levb) -testa<-sum((leva-mean(leva))^2) -testb<-sum((levb-mean(levb))^2) -testab<-NA -tempab<-matrix(NA,nrow=J,ncol=K) -for(j in 1:J){ -for(k in 1:K){ -tempab[j,k]<-mat[j,k]-leva[j]-levb[k]+gm -}} -testab<-sum(tempab^2) -bvec<-matrix(NA,nrow=JK,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print("Taking bootstrap samples. Please wait.") -for(j in 1:JK){ -if(pr)print(paste("Working on group ",j)) -data<-matrix(sample(xcen[[j]],size=length(xcen[[j]])*nboot,replace=TRUE), -nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # JK by nboot matrix, jth row contains -# bootstrapped estimates for jth group -} -boota<-NA -bootb<-NA -bootab<-NA -for(i in 1:nboot){ -mat<-matrix(bvec[,i],nrow=J,ncol=K,byrow=TRUE) -leva<-apply(mat,1,mean) # J averages over columns -levb<-apply(mat,2,mean) -gm<-mean(mat) -boota[i]<-sum((leva-mean(leva))^2) -bootb[i]<-sum((levb-mean(levb))^2) -for(j in 1:J){ -for(k in 1:K){ -tempab[j,k]<-mat[j,k]-leva[j]-levb[k]+gm -}} -bootab[i]<-sum(tempab^2)} -pvala<-1-sum(testa>=boota)/nboot -pvalb<-1-sum(testb>=bootb)/nboot -pvalab<-1-sum(testab>=bootab)/nboot -list(p.value.A=pvala,p.value.B=pvalb,p.value.AB=pvalab, -test.A=testa,test.B=testb, -test.AB=testab,est.loc=matrix(mloc,nrow=J,ncol=K,byrow=TRUE)) -} - - - -b1way<-function(x,est=onestep,nboot=599,SEED=TRUE,...){ -# -# Test the hypothesis that J measures of location are equal -# using the percentile bootstrap method. -# By default, M-estimators are compared using 599 bootstrap samples. -# -# The data are assumed to be stored in x in list mode. Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# -# -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or a matrix.") -J<-length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -nval<-vector("numeric",length(x)) -gest<-vector("numeric",length(x)) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -bvec<-matrix(0,J,nboot) -#print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -#print(paste("Working on group ",j)) -nval[j]<-length(x[[j]]) -gest[j]<-est(x[[j]]) -xcen<-x[[j]]-est(x[[j]],...) -data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # A J by nboot matrix -# containing the bootstrap values of est. -} -teststat<-wsumsq(gest,nval) -testb<-apply(bvec,2,wsumsq,nval) -p.value<-1 - sum(teststat >= testb)/nboot -teststat<-wsumsq(gest,nval) -if(teststat == 0)p.value <- 1 -list(teststat=teststat,p.value=p.value) -} - - -lintest<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=FALSE,SEED=TRUE, -outfun=out,...){ -# -# Test the hypothesis that the regression surface is a plane. -# Stute et al. (1998, JASA, 93, 141-149). -# -if(SEED)set.seed(2) -#if(identical(regfun,Qreg))print('When using Qreg, be sure to include res.vals=TRUE') -#if(identical(regfun,tshdreg))print('When using tshdreg, be sure to include RES=TRUE') -#if(identical(regfun,MMreg))print('When using MMreg, be sure to include RES=TRUE') # no longer necessary -x<-as.matrix(x) -d<-ncol(x) -temp<-elimna(cbind(x,y)) -x<-temp[,1:d] -x<-as.matrix(x) -y<-temp[,d+1] -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag,] -x<-as.matrix(x) -y<-y[flag] -} -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -reg<-regfun(x,y,...) -yhat<-y-reg$residuals -#print("Taking bootstrap samples, please wait.") -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-sqrt(12)*(data-.5) # standardize the random numbers. -rvalb<-apply(data,1,lintests1,yhat,reg$residuals,mflag,x,regfun,...) -# An n x nboot matrix of R values -rvalb<-rvalb/sqrt(length(y)) -dstatb<-apply(abs(rvalb),2,max) -wstatb<-apply(rvalb^2,2,mean) -# compute test statistic -v<-c(rep(1,length(y))) -rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...) -rval<-rval/sqrt(length(y)) -dstat<-max(abs(rval)) -wstat<-mean(rval^2) -ib<-round(nboot*(1-alpha)) -p.value.d<-1-sum(dstat>=dstatb)/nboot -p.value.w<-1-sum(wstat>=wstatb)/nboot -list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w) -} - - -tauloc<-function(x,cval=4.5){ -# -# Compute the tau measure of location as described in -# Yohai and Zamar (JASA, 83, 406-413). -# -x<-elimna(x) -s<-qnorm(.75)*mad(x) -y<-(x-median(x))/s -W<-(1-(y/cval)^2)^2 -flag<-(abs(W)>cval) -W[flag]<-0 -val<-sum(W*x)/sum(W) -val -} - -tauvar<-function(x,cval=3){ -# -# Compute the tau measure of scale as described in -# Yohai and Zamar (JASA, 1988, 83, 406-413). -# The computational method is described in Maronna and Zamar -# (Technometrics, 2002, 44, 307-317) -# see p. 310 -# -x<-elimna(x) -s<-qnorm(.75)*mad(x) -y<-(x-tauloc(x))/s -cvec<-rep(cval,length(x)) -W<-apply(cbind(y^2,cvec^2),1,FUN="min") -val<-s^2*sum(W)/length(x) -val -} - -gkcor<-function(x,y,varfun=tauvar,ccov=FALSE,...){ -# -# Compute a correlation coefficient using the Gnanadesikan-Ketterning -# estimator. -# ccov=T, computes covariance instead. -# (cf. Marrona & Zomar, 2002, Technometrics -# -val<-.25*(varfun(x+y,...)-varfun(x-y,...)) -if(!ccov)val<-val/(sqrt(varfun(x,...))*sqrt(varfun(y,...))) -val -} -covroc<-function(x){ -# -# compute Rocke's TBS covariance matrix -# - library(robust) -temp<-covRob(x,estim="M") -val<-temp[2]$cov -val -} -indt<-function(x,y,nboot=500,flag=1,SEED=TRUE,pr=TRUE){ -# -# Test the hypothesis of independence between x and y by -# testing the hypothesis that the regression surface is a horizontal plane. -# Stute et al. (1998, JASA, 93, 141-149). -# -# flag=1 gives Kolmogorov-Smirnov test statistic -# flag=2 gives the Cramer-von Mises test statistic -# flag=3 causes both test statistics to be reported. -# -# tr=0 results in the Cramer-von Mises test statistic when flag=2 -# With tr>0, a trimmed version of the test statistic is used. -# -# Modified Dec 2005. -# -tr=0 -#if(tr<0)stop("Amount trimmed must be > 0") -#if(tr>.5)stop("Amount trimmed must be <=.5") -if(SEED)set.seed(2) -x<-as.matrix(x) -# First, eliminate any rows of data with missing values. -temp <- cbind(x, y) - temp <- elimna(temp) - pval<-ncol(temp)-1 - x <- temp[,1:pval] - y <- temp[, pval+1] -x<-as.matrix(x) -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -# ith row of mflag indicates which rows of the matrix x are less -# than or equal to ith row of x -# -yhat<-mean(y) -res<-y-yhat -if(pr)print("Taking bootstrap sample, please wait.") -data<-matrix(runif(length(y)*nboot),nrow=nboot)# -data<-(data-.5)*sqrt(12) # standardize the random numbers. -rvalb<-apply(data,1,regts1,yhat,res,mflag,x,tr) -# An n x nboot matrix of R values -rvalb<-rvalb/sqrt(length(y)) -dstatb<-apply(abs(rvalb),2,max) -wstatb<-apply(rvalb^2,2,mean,tr=tr) -v<-c(rep(1,length(y))) -rval<-regts1(v,yhat,res,mflag,x,tr=0) -rval<-rval/sqrt(length(y)) -dstat<-NA -wstat<-NA -critd<-NA -critw<-NA -p.vald<-NA -p.valw<-NA -if(flag==1 || flag==3){ -dstat<-max(abs(rval)) -p.vald<-1-sum(dstat>=dstatb)/nboot -} -if(flag==2 || flag==3){ -wstat<-mean(rval^2,tr=tr) -p.valw<-1-sum(wstat>=wstatb)/nboot -} -list(dstat=dstat,wstat=wstat,p.value.d=p.vald,p.value.w=p.valw) -} - - -taulc<-function(x,mu.too=FALSE){ -# -val<-tauvar(x) -if(mu.too){ -val[2]<-val -val[1]<-tauloc(x) -} -val -} - - -trimww.sub<-function(cmat,vmean,vsqse,h,J,K){ -# -# This function is used by trimww -# -# The function performs a variation of Johansen's test of C mu = 0 for -# a within by within design -# C is a k by p matrix of rank k and mu is a p by 1 matrix of -# of unknown medians. -# The argument cmat contains the matrix C. -# vmean is a vector of length p containing the p medians -# vsqe is matrix containing the -# estimated covariances among the medians -# h is the sample size -# -p<-J*K -yvec<-matrix(vmean,length(vmean),1) -test<-cmat%*%vsqse%*%t(cmat) -invc<-solve(test) -test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec -temp<-0 -mtem<-vsqse%*%t(cmat)%*%invc%*%cmat -temp<-(sum(diag(mtem%*%mtem))+(sum(diag(mtem)))^2)/(h-1) -A<-.5*sum(temp) -cval<-nrow(cmat)+2*A-6*A/(nrow(cmat)+2) -test<-test/cval -test -} - - - -trimww<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2){ -# -# Perform a J by K anova using trimmed means with -# repeated measures on both factors. -# -# tr=.2 is default trimming -# -# The R variable data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of both factors: level 1,1. -# data[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# data[[K]] is the data for level 1,K -# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. -# -# It is assumed that data has length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -if(is.list(x))x<-elimna(matl(x)) -if(is.matrix(x))x<-elimna(x) -data<-x -if(is.matrix(data))data<-listm(data) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups stored in x is") -print(length(data)) -print("Warning: These two values are not equal") -} -if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") -tmeans<-0 -h<-length(data[[grp[1]]]) -v<-matrix(0,p,p) -for (i in 1:p)tmeans[i]<-mean(data[[grp[i]]],tr=tr,na.rm=TRUE) -v<-covmtrim(data,tr=tr) -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -jm1<-J-1 -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -# Do test for factor A -cmat<-kron(cj,ik) # Contrast matrix for factor A -#Qa<-johansp(cmat,tmeans,v,h,J,K) -Qa<-trimww.sub(cmat,tmeans,v,h,J,K) -#Qa.siglevel<-1-pf(Qa$teststat,J-1,999) -Qa.siglevel<-1-pf(Qa,J-1,999) -# Do test for factor B -cmat<-kron(ij,ck) # Contrast matrix for factor B -#Qb<-johansp(cmat,tmeans,v,h,J,K) -Qb<-trimww.sub(cmat,tmeans,v,h,J,K) -Qb.siglevel<-1-pf(Qb,K-1,999) -# Do test for factor A by B interaction -cmat<-kron(cj,ck) # Contrast matrix for factor A by B -#Qab<-johansp(cmat,tmeans,v,h,J,K) -Qab<-trimww.sub(cmat,tmeans,v,h,J,K) -Qab.siglevel<-1-pf(Qab,(J-1)*(K-1),999) -list(Qa=Qa,Qa.siglevel=Qa.siglevel, -Qb=Qb,Qb.siglevel=Qb.siglevel, -Qab=Qab,Qab.siglevel=Qab.siglevel) -} - - -msmedci<-function(x,alpha=.05,nullval=0){ -# -# Confidence interval for the median -# -se<-msmedse(x) -est<-median(x) -ci.low<-est-qnorm(1-alpha/2)*se -ci.hi<-est+qnorm(1-alpha/2)*se -test<-(est-nullval)/se -p.value<-2*(1-pnorm(abs(test))) -list(test=test,ci.low=ci.low,ci.hi=ci.hi,p.value=p.value,median=est) -} -medcipb<-function(x,alpha=.05,null.val=NA,nboot=500,SEED=TRUE,...){ -# -# Bootstrap confidence interval for the median of single variable. -# The usual sample median is used. hdpb uses the Harrell--Davis estimator -# Missing values are allowed. -# -x<-elimna(x) -est=median(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,median) # Bootstrapped values -test<-NULL -if(!is.na(null.val)){ -tv<-sum(bvec==null.val)/nboot -test<-sum(bvec>null.val)/nboot+.5*tv -if(test> .5)test<-1-test -test<-2*test -} -bvec<-sort(bvec) -icl<-round(alpha*nboot/2)+1 -icu<-nboot-icl-1 -cilow<-bvec[icl] -ciup<-bvec[icu] -list(Est.=est,ci.low=cilow,ci.up=ciup,p.value=test) -} - -regtest<-function(x,y,regfun=tsreg,nboot=600,alpha=.05,plotit=TRUE, -grp=c(1:ncol(x)),nullvec=c(rep(0,length(grp))),xout=FALSE,outfun=outpro,SEED=TRUE,pr=TRUE,...){ -# -# Test the hypothesis that q of the p predictors are equal to -# some specified constants. By default, the hypothesis is that all -# p predictors have a coefficient equal to zero. -# The method is based on a confidence ellipsoid. -# The critical value is determined with the percentile bootstrap method -# in conjunction with Mahalanobis distance. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -if(pr)print("Default for outfun is now outpro") -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,regfun=regfun,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x<-as.matrix(x) -if(length(grp)!=length(nullvec))stop("The arguments grp and nullvec must have the same length.") -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,regboot,x,y,regfun) # A p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -grp<-grp+1 #Ignore the intercept. -est<-regfun(x,y)$coef -estsub<-est[grp] -bsub<-t(bvec[grp,]) -if(length(grp)==1){ -m1<-sum((bvec[grp,]-est)^2)/(length(y)-1) -dis<-(bsub-estsub)^2/m1 -} -if(length(grp)>1){ -mvec<-apply(bsub,2,FUN=mean) -m1<-var(t(t(bsub)-mvec+estsub)) -dis<-mahalanobis(bsub,estsub,m1) -} -dis2<-order(dis) -dis<-sort(dis) -critn<-floor((1-alpha)*nboot) -crit<-dis[critn] -test<-mahalanobis(t(estsub),nullvec,m1) -sig.level<-1-sum(test>dis)/nboot -if(length(grp)==2 && plotit){ -plot(bsub,xlab="Parameter 1",ylab="Parameter 2") -points(nullvec[1],nullvec[2],pch=0) -xx<-bsub[dis2[1:critn],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -list(test=test,crit=crit,p.value=sig.level,nullvec=nullvec,est=estsub,n=length(y)) -} - -reg2ci<-function(x,y,x1,y1,regfun=tsreg,nboot=599,alpha=.05,plotit=TRUE,SEED=TRUE, -xout=FALSE,outfun=outpro,xlab="X",ylab="Y",pr=FALSE,...){ -# -# Compute a .95 confidence interval for the difference between the -# the intercepts and slopes corresponding to two independent groups. -# The default regression method is Theil-Sen. -# -# The predictor values for the first group are -# assumed to be in the n by p matrix x. -# The predictors for the second group are in x1 -# -# The default number of bootstrap samples is nboot=599 -# -# regfun can be any R function that returns the coefficients in -# the vector regfun$coef, the first element of which contains the -# estimated intercept, the second element contains the estimate of -# the first predictor, etc. -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -x1<-as.matrix(x1) -xx1<-cbind(x1,y1) -xx1<-elimna(xx1) -x1<-xx1[,1:ncol(x1)] -x1<-as.matrix(x1) -y1<-xx1[,ncol(x1)+1] -x=as.matrix(x) -x1=as.matrix(x1) -if(xout){ -if(pr)print("outfun now defaults to outpro rather than out") -if(identical(outfun,outblp)){ -flag1=outblp(x,y,plotit=FALSE)$keep -flag2=outblp(x1,y2,plotit=FALSE)$keep -} -if(!identical(outfun,outblp)){ -flag1=outfun(x,plotit=FALSE)$keep -flag2=outfun(x1,plotit=FALSE)$keep -} -x=x[flag1,] -y=y[flag1] -x1=x1[flag2,] -y1=y1[flag2] -} -n=length(y) -n[2]=length(y1) -x<-as.matrix(x) -x1<-as.matrix(x1) -est1=regfun(x,y,...)$coef -est2=regfun(x1,y1,...)$coef -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) # A p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -bvec1<-apply(data,1,regboot,x1,y1,regfun,xout=FALSE,...) -bvec<-bvec-bvec1 -p1<-ncol(x)+1 -regci<-matrix(0,p1,6) -dimnames(regci)<-list(NULL, -c("Parameter","ci.lower","ci.upper","p.value","Group 1","Group 2")) -ilow<-round((alpha/2)*nboot)+1 -ihi<-nboot-(ilow-1) -for(i in 1:p1){ -temp<-sum(bvec[i,]<0)/nboot+sum(bvec[i,]==0)/(2*nboot) -regci[i,4]<-2*min(temp,1-temp) -bsort<-sort(bvec[i,]) -regci[i,2]<-bsort[ilow] -regci[i,3]<-bsort[ihi] -regci[,1]<-c(0:ncol(x)) -} -regci[,5]=est1 -regci[,6]=est2 -if(ncol(x)==1 && plotit){ -plot(c(x,x1),c(y,y1),type="n",xlab=xlab,ylab=ylab) -points(x,y) -points(x1,y1,pch="+") -abline(regfun(x,y,...)$coef) -abline(regfun(x1,y1,...)$coef,lty=2) -} -list(n=n,output=regci) -} - - -anova1<-function(x){ -# -# conventional one-way anova -# -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -x=elimna(x) -A<-0 -B<-0 -C<-0 -N<-0 -for(j in 1:length(x)){ -N<-N+length(x[[j]]) -A<-A+sum(x[[j]]^2) -B<-B+sum(x[[j]]) -C<-C+(sum(x[[j]]))^2/length(x[[j]]) -} -SST<-A-B^2/N -SSBG<-C-B^2/N -SSWG<-A-C -nu1<-length(x)-1 -nu2<-N-length(x) -MSBG<-SSBG/nu1 -MSWG<-SSWG/nu2 -FVAL<-MSBG/MSWG -pvalue<-1-pf(FVAL,nu1,nu2) -list(F.test=FVAL,p.value=pvalue,df1=nu1,df2=nu2,MSBG=MSBG,MSWG=MSWG) -} -twodcor8<-function(x,y){ -# -# Compute a .95 confidence interval for -# the difference between two dependent -# correlations corresponding to two independent -# goups. -# -# -# x is a matrix with two columns, -# y is a vector -# Goal: test equality of Pearson correlation for x1, y versus x2, y. -# -# For general use, twodcor10 is probably better, -# which calls this function and estimates an adjusted p-value. -# -X<-elimna(cbind(x,y)) -Z1<-(X[,1]-mean(X[,1]))/sqrt(var(X[,1])) -Z2<-(X[,2]-mean(X[,2]))/sqrt(var(X[,2])) -temp<-cor.test(Z1-Z2,X[,3]) -temp<-temp[3]$p.value -list(p.value=temp) -} - -twodcor10<-function(x,y,nboot=500,SEED=TRUE,alpha=.05){ -# -# Compute a .95 confidence interval for -# the difference between two dependent -# correlations corresponding to two independent -# goups. -# -# x is a matrix with two columns, -# y is a vector -# Goal: test equality of Pearson correlation for x1, y versus x2, y. -# -# This function uses an adjusted p-value, the adjustment -# being made assuming normality. -# -# nboot indicates how many samples from a normal distribution -# are used to approximate the adjustment. -# -# Simulations suggest that this fucntion -# continues to work well under non-normality. -# -if(SEED)set.seed(2) -X<-elimna(cbind(x,y)) -if(ncol(X)!=3)stop("x should be a matrix with two columns") -n<-nrow(X) -cval<-cor(X) -nval<-(cval[1,3]+cval[2,3])/2 -cmat<-bdiag(1,3,nval) -cmat[1,2]<-nval -cmat[2,1]<-nval -pval<-NA -for(i in 1:nboot){ -d<-rmul(n,p=3,cmat=cmat) -pval[i]<-twodcor8(d[,1:2],d[,3])$p.value -} -pval<-sort(pval) -iv<-round(alpha*nboot) -est.p<-pval[iv] -adp<-alpha/est.p -test<-twodcor8(X[,1:2],X[,3])$p.value -p.value<-test*adp -if(p.value>1)p.value<-1 -list(p.value=p.value) -} - -matsplit<-function(m,coln=NULL){ -# -# Column coln of matrix m is assumed to have a binary variable -# This functions removes rows with missing values -# and then splits m into two matrices based on the values -# in column coln -# -if(is.null(coln))stop("specify coln") -x<-m[,coln] -val<-unique(x) -if(length(val)>2)stop("More than two values detected in specified column") -flag<-(x==val[1]) -m1<-m[flag,] -m2<-m[!flag,] -list(m1=m1,m2=m2) -} -tkmcp<-function(x,alpha=.05,ind.pval=TRUE){ -# -# conventional Tukey-Kramer multiple comparison procedure -# for all pairiwise comparisons. -# -# ind.pval=T, computes p-value for each individual test -# ind.pval=F computes p-value based on controlling the -# familywise error rate. (The alpha level at which the -# Tukey-Kramer test would reject.) -# -if(is.matrix(x))x<-listm(x) -J<-length(x) -A<-0 -B<-0 -C<-0 -N<-0 -for(j in 1:J){ -N<-N+length(x[[j]]) -A<-A+sum(x[[j]]^2) -B<-B+sum(x[[j]]) -C<-C+(sum(x[[j]]))^2/length(x[[j]]) -} -SST<-A-B^2/N -SSBG<-C-B^2/N -SSWG<-A-C -nu1<-length(x)-1 -nu2<-N-length(x) -MSBG<-SSBG/nu1 -MSWG<-SSWG/nu2 -numcom<-length(x)*(length(x)-1)/2 -output<-matrix(nrow=numcom,ncol=7) -dimnames(output)<-list(NULL,c("Group","Group","t.test","est.difference", -"ci.lower","ci.upper","p.value")) -ic<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic<-ic+1 -output[ic,1]<-j -output[ic,2]<-k -dif<-mean(x[[j]])-mean(x[[k]]) -output[ic,3]<-abs(dif)/sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2) -output[ic,4]<-dif -crit<-qtukey(1-alpha,length(x),nu2) -output[ic,5]<-dif-crit*sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2) -output[ic,6]<-dif+crit*sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2) -if(!ind.pval)output[ic,7]<-1-ptukey(output[ic,3],length(x),nu2) -if(ind.pval)output[ic,7]<-2*(1-pt(output[ic,3],nu2)) -}}} -output -} - -lstest4<-function(vstar,yhat,res,x){ -ystar <- yhat + res * vstar -p<-ncol(x) -pp<-p+1 -vals<-t(as.matrix(lsfit(x,ystar)$coef[2:pp])) -sa<-lsfitNci4(x, ystar)$cov[-1, -1] -sai<-solve(sa) -test<-(vals)%*%sai%*%t(vals) -test<-test[1,1] -test -} -twodcor10<-function(x,y,nboot=500,SEED=TRUE,alpha=.05){ -# -# Compute a .95 confidence interval for -# the difference between two dependent -# correlations corresponding to two independent -# goups. -# -# x is a matrix with two columns, -# y is a vector -# Goal: test equality of Pearson correlation for x1, y versus x2, y. -# -# This function uses an adjusted p-value, the adjustment -# being made assuming normality. -# -# nboot indicates how many samples from a normal distribution -# are used to approximate the adjustment. -# -# Simulations suggest that this fucntion -# continues to work well under non-normality. -# -if(SEED)set.seed(2) -X<-elimna(cbind(x,y)) -if(ncol(X)!=3)stop("x should be a matrix with two columns") -n<-nrow(X) -cval<-cor(X) -nval<-(cval[1,3]+cval[2,3])/2 -cmat<-bdiag(1,3,nval) -cmat[1,2]<-nval -cmat[2,1]<-nval -pval<-NA -for(i in 1:nboot){ -d<-rmul(n,p=3,cmat=cmat) -pval[i]<-twodcor8(d[,1:2],d[,3])$p.value -} -pval<-sort(pval) -iv<-round(alpha*nboot) -est.p<-pval[iv] -adp<-alpha/est.p -test<-twodcor8(X[,1:2],X[,3])$p.value -p.value<-test*adp -if(p.value>1)p.value<-1 -list(p.value=p.value) -} - -twodcor8<-function(x,y){ -# -# Compute a .95 confidence interval for -# the difference between two dependent -# correlations corresponding to two independent -# goups. -# -# -# x is a matrix with two columns, -# y is a vector -# Goal: test equality of Pearson correlation for x1, y versus x2, y. -# -# For general use, twodcor10 is probably better, -# which calls this function and estimates an adjusted p-value. -# -X<-elimna(cbind(x,y)) -Z1<-(X[,1]-mean(X[,1]))/sqrt(var(X[,1])) -Z2<-(X[,2]-mean(X[,2]))/sqrt(var(X[,2])) -temp<-cor.test(Z1-Z2,X[,3]) -temp<-temp[3]$p.value -list(p.value=temp) -} - -lsfitNci4<-function(x,y,alpha=.05){ -# -# Compute confidence for least squares -# regression using heteroscedastic method -# recommended by Cribari-Neto (2004). -# -x<-as.matrix(x) -if(nrow(x) != length(y))stop("Length of y does not match number of x values") -m<-cbind(x,y) -m<-elimna(m) -y<-m[,ncol(x)+1] -temp<-lsfit(x,y) -x<-cbind(rep(1,nrow(x)),m[,1:ncol(x)]) -xtx<-solve(t(x)%*%x) -h<-diag(x%*%xtx%*%t(x)) -n<-length(h) -d<-(n*h)/sum(h) -for(i in 1:length(d)){ - d[i]<-min(4, d[i]) -} -hc4<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^d)%*%x%*%xtx -df<-nrow(x)-ncol(x) -crit<-qt(1-alpha/2,df) -al<-ncol(x) -ci<-matrix(NA,nrow=al,ncol=3) -for(j in 1:al){ -ci[j,1]<-j -ci[j,2]<-temp$coef[j]-crit*sqrt(hc4[j,j]) -ci[j,3]<-temp$coef[j]+crit*sqrt(hc4[j,j]) -} -list(ci=ci,stand.errors=sqrt(diag(hc4)), cov=hc4) -} - - -hc4qtest<-function(x,y,k,nboot=500,SEED=TRUE){ -# -# Test the hypothesis that a OLS slope is zero using HC4 wild bootstrap using quasi-t test. -# k is the index of coefficient being tested -# -if(SEED)set.seed(2) -x<-as.matrix(x) -# First, eliminate any rows of data with missing values. -temp <- cbind(x, y) - temp <- elimna(temp) - pval<-ncol(temp)-1 - x <- temp[,1:pval] - y <- temp[, pval+1] -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -temp<-lsfit(x,y) -yhat<-mean(y) -res<-y-yhat -s<-lsfitNci4(x, y)$cov[-1, -1] -s<-as.matrix(s) -si<-s[k,k] -b<-temp$coef[2:pp] -qtest<-b[k]/sqrt(si) -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-(data-.5)*sqrt(12) # standardize the random numbers. -rvalb<-apply(data,1,lsqtest4,yhat,res,x, k) -sum<-sum(abs(rvalb)>= abs(qtest[1])) -p.val<-sum/nboot -list(p.value=p.val) -} - -lsqtest4<-function(vstar,yhat,res,x, k){ -ystar <- yhat + res * vstar -p<-ncol(x) -pp<-p+1 -vals<-lsfit(x,ystar)$coef[2:pp] -sa<-lsfitNci4(x, ystar)$cov[-1, -1] -sa<-as.matrix(sa) -sai<-sa[k,k] -test<-vals[k]/sqrt(sai) -test -} -mrm1way<-function(x,q=.5,grp=NA,bop=FALSE,SEED=TRUE,mop=FALSE){ -# Perform a within groups one-way ANOVA using medians -# -# If grp specified, do analysis on only the groups in grp. -# Example: grp=(c(1,4)), compare groups 1 and 4 only. -# -# bop=F, use non-bootstrap estimate of covariance matrix -# bop=T, use bootstrap -# -# mop=T, use usual median, otherwise use single order statistic -# -if(is.data.frame(x))x=as.matrix(x) -if(SEED)set.seed(2) -if(is.matrix(x))x<-listm(x) -K<-length(x) # Number of groups -p<-K -if(is.na(grp[1]))grp<-c(1:p) -x<-x[grp] -if(!is.list(x))stop("Data are not stored in list mode or a matrix") -tmeans<-0 -n<-length(x[[1]]) -v<-matrix(0,p,p) -if(!mop){ -for (i in 1:p)tmeans[i]<-qest(x[[i]],q=q) -if(!bop)v<-covmmed(x,q=q) -if(bop)v<-bootcov(x,pr=FALSE,est=qest,q=q) -} -if(mop){ -tmeans[i]<-median(x[[i]]) -v<-bootcov(x,pr=FALSE) -} -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -Qb<-johansp(ck,tmeans,v,n,1,K) -p.value<-Qb$p.value -if(n>=20)p.value<-1-pf(Qb$teststat,K-1,999) -list(test.stat=Qb$teststat,p.value=p.value) -} - -rmul<-function(n,p=2,cmat=NULL,rho=0, -mar.fun=ghdist,OP=FALSE,g=0,h=0,...){ -# -# generate n observations from a p-variate dist -# By default, use normal distributions. -# -# Can generate data form a g-and-h distribution via the arguments -# g and h -# -# To adjust rho so that Pearson = remains equal to rho after transforming, use rngh -# -# Example rmul(30,p=4,rho=.3,g=.5,h=.2) will -# generate 30 vectors from a 4-variate distribution where the marginals -# have a g-and-h distribution with g=.5 and h=.2. -# -# This function is similar to ghmul, only here, generate the marginal values -# and then transform the data to have correlation matrix cmat -# -# cmat: if specified, is the correlation matrix that is used to generate data -# -# If not specified, data are generated with a common correlation -# rho -# -#OP= TRUE: -# Method (e.g. Browne, M. W. (1968) A comparison of factor analytic -# techniques. Psychometrika, 33, 267-334. -# Let U'U=R be the Cholesky decomposition of R. Generate independent data -# from some dist yielding X. Then XU has population correlation matrix R -# -# OP=FALSE, use mvrnorm to generate data then transform marginals to g-and-h distribution. -# -if(!is.null(cmat)){ -if(ncol(cmat)!=p)stop('cmat: number of columns must equal the value in the argument p') -} -if(abs(rho)>1)stop('rho must be between -1 and 1') -if(is.null(cmat)){ -cmat<-matrix(rho,p,p) -diag(cmat)<-1 -} -if(OP){ -np<-n*p -if(identical(mar.fun,ghdist))x<-matrix(mar.fun(np,g=g,h=h),nrow=n,ncol=p) -else x<-matrix(mar.fun(np,...),nrow=n,ncol=p) -rmat<-matsqrt(cmat) -x<-x%*%rmat -} -if(!OP){ -library(MASS) -x=mvrnorm(n,rep(0,p),cmat) -if(g==0)x=x*exp(h*x^2/2) -if(g>0)x=(exp(g*x)-1)*exp(h*x^2/2)/g -} -x -} - - -L1medcen <- function(X, tol = 1e-08, maxit = 200, m.init = apply(X, 2, median), - trace = FALSE) -{ - ## L1MEDIAN calculates the multivariate L1 median - ## I/O: mX=L1median(X,tol); - ## - ## X : the data matrix - ## tol: the convergence criterium: - ## the iterative process stops when ||m_k - m_{k+1}|| < tol. - ## maxit: maximum number of iterations - ## init.m: starting value for m; typically coordinatewise median - ## - ## Ref: Hossjer and Croux (1995) - ## "Generalizing Univariate Signed Rank Statistics for Testing - ## and Estimating a Multivariate Location Parameter"; - ## Non-parametric Statistics, 4, 293-308. - ## - ## Implemented by Kristel Joossens - ## Many thanks to Martin Maechler for improving the program! - - ## slightly faster version of 'sweep(x, 2, m)': - centr <- function(X,m) X - rep(m, each = n) - ## computes objective function in m based on X and a: - mrobj <- function(X,m) sum(sqrt(rowSums(centr(X,m)^2))) - d <- dim(X); n <- d[1]; p <- d[2] - m <- m.init - if(!is.numeric(m) || length(m) != p) - stop("'m.init' must be numeric of length p =", p) - k <- 1 - if(trace) nstps <- 0 - while (k <= maxit) { - mold <- m - obj.old <- if(k == 1) mrobj(X,mold) else obj - X. <- centr(X, m) - Xnorms <- sqrt(rowSums(X. ^ 2)) - inorms <- order(Xnorms) - dx <- Xnorms[inorms] # smallest first, i.e., 0's if there are - X <- X [inorms,] - X. <- X.[inorms,] - ## using 1/x weighting {MM: should this be generalized?} - w <- ## (0 norm -> 0 weight) : - if (all(dn0 <- dx != 0)) 1/dx - else c(rep.int(0, length(dx)- sum(dn0)), 1/dx[dn0]) - delta <- colSums(X. * rep(w,p)) / sum(w) - nd <- sqrt(sum(delta^2)) - - maxhalf <- if (nd < tol) 0 else ceiling(log2(nd/tol)) - m <- mold + delta # computation of a new estimate - ## If step 'delta' is too far, we try halving the stepsize - nstep <- 0 - while ((obj <- mrobj(X, m)) >= obj.old && nstep <= maxhalf) { - nstep <- nstep+1 - m <- mold + delta/(2^nstep) - } - if(trace) { - if(trace >= 2) - cat(sprintf("k=%3d obj=%19.12g m=(",k,obj), - paste(formatC(m),collapse=","), - ")", if(nstep) sprintf(" nstep=%2d halvings",nstep) else "", - "\n", sep="") - nstps[k] <- nstep - } - if (nstep > maxhalf) { ## step halving failed; keep old - m <- mold - ## warning("step halving failed in ", maxhalf, " steps") - break - } - k <- k+1 - } - if (k > maxit) warning("iterations did not converge in ", maxit, " steps") - if(trace == 1) - cat("needed", k, "iterations with a total of", - sum(nstps), "stepsize halvings\n") -# return(m) -list(center=m) -} -spatcen<-function(x){ -# -# compute spatial median -# x is an n by p matrix -# -if(!is.matrix(x))stop("x must be a matrix") -x<-elimna(x) -START<-apply(x,2,median) -val=optim(START,spat.sub,x=x,method='BFGS')$par -list(center=val) -} -olswbtest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,alpha=.05){ -# -# Compute confidence intervals for all OLS slopes -# using HC4 wild bootstrap and Wald test. -# -# This function calls the functions -# olshc4 and -# lstest4 -# -if(SEED)set.seed(2) -x<-as.matrix(x) -# First, eliminate any rows of data with missing values. -temp <- cbind(x, y) - temp <- elimna(temp) - pval<-ncol(temp)-1 - x <- temp[,1:pval] - y <- temp[, pval+1] -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -temp<-lsfit(x,y) -yhat<-mean(y) -res<-y-yhat -s<-olshc4(x, y)$cov[-1, -1] -si<-solve(s) -b<-temp$coef[2:pp] -test=abs(b)*sqrt(diag(si)) -if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot) -if(!RAD){ -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-(data-.5)*sqrt(12) # standardize the random numbers. -} -rvalb<-apply(data,1,olswbtest.sub,yhat,res,x) #a p by nboot matrix -rvalb=abs(rvalb) -ic=round((1-alpha)*nboot) -if(p==1)rvalb=t(as.matrix(rvalb)) -temp=apply(rvalb,1,sort) # nboot by p matrix -pvals=NA -for(j in 1:p)pvals[j]=mean((rvalb[j,]>=test[j])) -cr=temp[ic,] -ci=b-cr/diag(sqrt(si)) #dividing because si is reciprocal of sq se -ci=cbind(ci,b+cr/diag(sqrt(si))) -ci=cbind(b,ci) -ci=cbind(c(1:nrow(ci)),ci,test,pvals) -dimnames(ci)<- -list(NULL,c("Slope_No.","Slope_est","Lower.ci","Upper.ci","Test.Stat","p.value")) -ci -} -olswbtest.sub<-function(vstar,yhat,res,x){ -ystar <- yhat + res * vstar -p<-ncol(x) -pp<-p+1 -vals<-t(as.matrix(lsfit(x,ystar)$coef[2:pp])) -sa<-olshc4(x, ystar)$cov[-1, -1] -sai<-solve(sa) -test<-vals*sqrt(diag(sai)) -test -} - - - -regpre<-function(x,y,regfun=lsfit,error=absfun,nboot=100,adz=TRUE, -mval=round(5*log(length(y))),model=NULL,locfun=mean,pr=FALSE, -xout=FALSE,outfun=out,STAND=TRUE, -plotit=TRUE,xlab="Model Number",ylab="Prediction Error",SEED=TRUE,...){ -# -# Estimate prediction error using the regression method -# regfun. The .632 method is used. -# (See Efron and Tibshirani, 1993, pp. 252--254) -# -# The predictor values are assumed to be in the n-by-p matrix x. -# The default number of bootstrap samples is nboot=100 -# -# Prediction error is the expected value of the function error. -# The argument error defaults to squared error. -# -# regfun can be any R function that returns the coefficients in -# the vector regfun$coef, the first element of which contains the -# estimated intercept, the second element contains the estimate of -# the first predictor, etc. -# -# The default value for mval, the number of observations to resample -# for each of the B bootstrap samples is based on results by -# Shao (JASA, 1996, 655-665). (Resampling n vectors of observations -# model selection may not lead to the correct model as n->infinity. -# -# The argument model should have list mode, model[[1]] indicates -# which predictors are used in the first model. For example, storing -# 1,4 in model[[1]] means predictors 1 and 4 are being considered. -# If model is not specified, and number of predictors is at most 5, -# then all models are considered. -# -# If adz=T, added to the models to be considered is where -# all regression slopes are zero. That is, use measure of location only -# corresponding to -# locfun. -# -if(pr){ -print("By default, least squares regression is used, ") -print("But from Wilcox, R. R. 2008, Journal of Applied Statistics, 35, 1-8") -print("Setting regfun=tsreg appears to be a better choice for general use.") -print("That is, replace least squares with the Theil-Sen estimator") -print("Note: Default for the argument error is now absfun") -print(" meaning absolute error is used") -print("To use squared error, set error=sqfun") -} -x<-as.matrix(x) -d<-ncol(x) -p1<-d+1 -temp<-elimna(cbind(x,y)) -x<-temp[,1:d] -y<-temp[,d+1] -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -if(!STAND)flag<-outfun(x,plotit=FALSE,...)$keep -if(STAND)flag<-outpro(x,STAND=TRUE,plotit=FALSE)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(is.null(model)){ -if(d<=5)model<-modgen(d,adz=adz) -if(d>5)model[[1]]<-c(1:ncol(x)) -} -mout<-matrix(NA,length(model),5,dimnames=list(NULL,c("apparent.error", -"boot.est","err.632","var.used","rank"))) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=mval*nboot,replace=TRUE),nrow=nboot) -bid<-apply(data,1,idb,length(y)) -# bid is an n by nboot matrix. If the jth bootstrap sample from -# 1, ..., mval contains the value i, bid[i,j]=0; otherwise bid[i,j]=1 -for (imod in 1:length(model)){ -nmod=length(model[[imod]])-1 -temp=c(nmod:0) -mout[imod,4]=sum(model[[imod]]*10^temp) -if(sum(model[[imod]]==0)!=1){ -xx<-x[,model[[imod]]] -xx<-as.matrix(xx) -if(sum(model[[imod]]==0)!=1)bvec<-apply(data,1,regpres1,xx,y,regfun,mval,...) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -if(sum(model[[imod]]==0)!=1)yhat<-cbind(1,xx)%*%bvec -if(sum(model[[imod]]==0)==1){ -bvec0<-matrix(0,nrow=p1,ncol=nboot) -for(it in 1:nboot){ -bvec0[1,it]<-locfun(y[data[it,]]) -} -yhat<-cbind(1,x)%*%bvec0 -} -# yhat is n by nboot matrix of predicted values based on - # bootstrap regressions. -bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253 -temp<-(bid*(yhat-y)) -diff<-apply(temp,1,error) -ep0<-sum(diff/bi)/length(y) -aperror<-error(regfun(xx,y,...)$resid)/length(y) # apparent error -regpre<-.368*aperror+.632*ep0 -mout[imod,1]<-aperror -mout[imod,3]<-regpre -temp<-yhat-y -diff<-apply(temp,1,error) -mout[imod,2]<-sum(diff)/(nboot*length(y)) -} -if(sum(model[[imod]]==0)==1){ -mout[imod,3]<-locpre(y,error=error,est=locfun,SEED=SEED,mval=mval) -}} -mout[,5]=rank(mout[,3]) -if(plotit)plot(c(1:nrow(mout)),mout[,3],xlab=xlab,ylab=ylab) -list(estimates=mout) -} -push<-function(mat){ -# -# For every column of mat, move entry down 1 -# -matn<-matrix(NA,nrow=nrow(mat),ncol=ncol(mat)) -Jm<-nrow(mat)-1 -for (k in 1:ncol(mat)){ -temp<-mat[,k] -vec<-0 -vec[2:nrow(mat)]<-temp[1:Jm] -matn[,k]<-vec -} -matn -} - -ancova<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE,method="EP",SEED=TRUE, -pr=TRUE,xout=FALSE,outfun=out,LP=FALSE,SCAT=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+', -skip.crit=FALSE,nmin=12,crit.val=1.09,...){ -# -# Compare two independent groups using the ancova method with a single covariate -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# -# sm=TRUE will create smooths using bootstrap bagging. -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -# Argument method indicates which measure of effect size will be used -# EP: explanatory measure of effect size (default) -# QS: quantile shift measure of effect size -# AKP: trimmed mean Winsorized variance analog of Cohen's d -# WMW: P(X1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop("x1 and y1 have different lengths") -if(length(x2)!=length(y2))stop("x2 and y2 have different lengths") -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(pr){ -print("NOTE: Confidence intervals are adjusted to control the probability") -print("of at least one Type I error.") -#print("But p-values are not") -print('Effect size is based on the argument method, default is explanatory measure of effect size') -print('Other options: QS, quantile shift; AKP, robust analog of Cohen d; WMW, P(X=nmin]) -isub[5]<-max(sub[vecn>=nmin]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,14) -dimnames(mat)<-list(NULL,c("X","n1","n2","Est1","Est2","DIF","TEST","se","ci.low","ci.hi","p.value","crit.val","Effect.Size",'p.adjusted')) -for (i in 1:5){ -g1<-y1[near(x1,x1[isub[i]],fr1)] -g2<-y2[near(x2,x1[isub[i]],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -test<-yuen(g1,g2,tr=tr) -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(g1) -mat[i,3]<-length(g2) -mat[i,4]<-test$est.1 -mat[i,5]<-test$est.2 -mat[i,6]<-test$dif -mat[i,7]<-test$teststat -mat[i,8]<-test$se -mat[i,13]=ESfun(g1,g2,method=method,pr=FALSE,SEED=SEED) -mat[i,14]=1-psmm(abs(test$teststat),5,test$df) -if(skip.crit)critv=crit.val -#if(!skip.crit){ -critv<-NA -#if(alpha==.05)critv<-smmcrit(test$df,5) -#if(alpha==.01)critv<-smmcrit01(test$df,5) -#if(is.na(critv))critv<-smmval(test$df,5,alpha=alpha) -critv=qsmm(1-alpha,5,test$df) -mat[i,12]<-critv -#} -cilow<-test$dif-critv*test$se -cihi<-test$dif+critv*test$se -mat[i,9]<-cilow -mat[i,10]<-cihi -mat[i,11]<-test$p.value -}} -if(!is.na(pts[1])){ -#if(!skip.crit){ -#if(length(pts)>=29)stop("At most 28 points can be compared") -#} -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),14) -dimnames(mat)<-list(NULL,c("X","n1","n2","Est1","Est2","DIF","TEST","se","ci.low","ci.hi", -"p.value","crit.val","Effect.Size",'p.adjusted')) -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -test<-yuen(g1,g2,tr=tr) -mat[i,1]<-pts[i] -mat[i,2]<-length(g1) -mat[i,3]<-length(g2) -if(length(g1)<=5)print(paste("Warning, there are",length(g1)," points corresponding to the design point X=",pts[i])) -if(length(g2)<=5)print(paste("Warning, there are",length(g2)," points corresponding to the design point X=",pts[i])) -mat[i,4]<-test$est.1 -mat[i,5]<-test$est.2 -mat[i,6]<-test$dif -mat[i,7]<-test$teststat -mat[i,8]<-test$se -mat[i,13]=ESfun(g1,g2,method=method,pr=FALSE,SEED=SEED) -mat[i,14]=1-psmm(abs(test$teststat),length(pts),test$df) -if(skip.crit)critv=crit.val -if(!skip.crit){ -if(length(pts)>=2)critv=qsmm(1-alpha,length(pts),test$df) #smmcrit(test$df,length(pts)) -if(length(pts)==1)critv<-qt(.975,test$df) -} -cilow<-test$dif-critv*test$se -cihi<-test$dif+critv*test$se -mat[i,9]<-cilow -mat[i,10]<-cihi -mat[i,11]<-test$p.value -mat[i,12]<-critv -}} -if(plotit){ -runmean2g(x1,y1,x2,y2,fr=fr1,est=tmean,tr=tr,sm=sm,xout=FALSE,LP=LP, -SCAT=SCAT,xlab=xlab,ylab=ylab,pch1=pch1,pch2=pch2,...) -} -list(output=mat) -} -miss2na<-function(m,na.val=NULL){ -# -# Convert any missing value, indicatd by na.val, -# to NA. -# -# Example, if 999 is missing value, use miss2na(m,999) -# -if(is.null(na.val))stop("Specify a missing value") -if(is.vector(m)){ -if(!is.list(m)){ -flag=(m==na.val) -m[flag]=NA -}} -if(is.matrix(m)){ -for(j in 1:ncol(m)){ -x=m[,j] -flag=(x==na.val) -x[flag]=NA -m[,j]=x -}} -if(is.list(m)){ -for(j in 1:length(m)){ -x=m[[j]] -flag=(x==na.val) -x[flag]=NA -m[[j]]=x -}} -m -} - -plotCI <- function (x, y = NULL, uiw=NULL, liw = uiw, aui=NULL, ali=aui, - err="y", ylim=NULL, sfrac = 0.01, gap=0, add=FALSE, - col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab=NULL, - ylab=NULL, ...) { -## originally from Bill Venables, R-list - if (is.list(x)) { - y <- x$y - x <- x$x - } - if (is.null(y)) { - if (is.null(x)) - stop("both x and y NULL") - y <- as.numeric(x) - x <- seq(along = x) - } - if (missing(xlab)) xlab <- deparse(substitute(x)) - if (missing(ylab)) ylab <- deparse(substitute(y)) - if (missing(uiw)) { ## absolute limits - ui <- aui - li <- ali - } - else { ## relative limits - if (err=="y") z <- y else z <- x - if(is.null(uiw))stop("Argument uiw, the width of the interval, must be specified") - ui <- z + uiw - li <- z - liw - } - if (is.null(ylim)) ylim <- range(c(y, ui, li), na.rm=TRUE) - if (add) { - points(x, y, col=col, lwd=lwd, ...) - } else { - plot(x, y, ylim = ylim, col=col, lwd=lwd, xlab=xlab, ylab=ylab, ...) - } - if (gap==TRUE) gap <- 0.01 ## default gap size - ul <- c(li, ui) - if (err=="y") { - gap <- rep(gap,length(x))*diff(par("usr")[3:4]) # smidge <- diff(par("usr")[1:2]) * sfrac - smidge <- par("fin")[1] * sfrac -# segments(x , li, x, pmax(y-gap,li), col=col, lwd=lwd, lty=slty) -# segments(x , ui, x, pmin(y+gap,ui), col=col, lwd=lwd, lty=slty) - arrows(x , li, x, pmax(y-gap,li), col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1) - arrows(x , ui, x, pmin(y+gap,ui), col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1) - ## horizontal segments -# x2 <- c(x, x) -# segments(x2 - smidge, ul, x2 + smidge, ul, col=col, lwd=lwd) - } - else if (err=="x") { - gap <- rep(gap,length(x))*diff(par("usr")[1:2]) - smidge <- par("fin")[2] * sfrac -# smidge <- diff(par("usr")[3:4]) * sfrac - arrows(li, y, pmax(x-gap,li), y, col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1) - arrows(ui, y, pmin(x+gap,ui), y, col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1) - ## vertical segments -# y2 <- c(y, y) -# segments(ul, y2 - smidge, ul, y2 + smidge, col=col, lwd=lwd) - } - invisible(list(x = x, y = y)) -} -bdanova2<-function(x1,x2=NULL,alpha=.05,power=.9,delta){ -# -# Do the second stage of the Bishop-Duewicz ANOVA -# -if(is.null(x2[1])){ -stage1=bdanova1(x1,alpha=alpha,power=power,delta=delta) -return(list(N=stage1$N,d=stage1$d,crit=stage1$crit)) -} -if(!is.null(x2[1])){ -if(is.na(delta))stop("A value for delta was not specified") -if(!is.list(x1)){ -if(!is.matrix(x1))stop("Data must be stored in a matrix or in list mode") -y<-x1 -x1<-list() -for(j in 1:ncol(y))x1[[j]]<-y[,j] -} -if(is.na(delta))stop("A value for delta was not specified") -if(!is.list(x2)){ -if(!is.matrix(x2))stop("Data must be stored in matrix or in list mode") -y<-x2 -x2<-list() -for(j in 1:ncol(y))x2[[j]]<-y[,j] -} -if(length(x1)!=length(x2))stop("Length of x1 does not match the length of x2") -TT<-NA -U<-NA -J<-length(x1) -nvec<-NA -nvec2<-NA -svec<-NA -for(j in 1:length(x1)){ -nvec[j]<-length(x1[[j]]) -nvec2[j]<-length(x2[[j]]) -svec[j]<-var(x1[[j]]) -TT[j]<-sum(x1[[j]]) -U[j]<-sum(x2[[j]]) -} -temp<-bdanova1(x1,alpha=alpha,power=power,delta=delta) -need<-temp$N-nvec -#for(j in 1:length(x1))print(c(nvec2[j],need[j])) -for(j in 1:length(x1))if(nvec2[j]=dv[1:nboot])/nboot -if(op==4)print(sig.level) - -list(p.value=sig.level,output=output) -} - -rm2mcp<-function(J,K,x,est=tmean,alpha=.05,grp=NA,dif=TRUE,nboot=NA, -plotit=FALSE,BA=FALSE,hoch=FALSE,...){ -# -# This function performs multiple comparisons for -# dependent groups in a within by within designs. -# It creates the linear contrasts and calls rmmcppb -# assuming that main effects and interactions for a -# two-way design are to be tested. -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # -if(is.data.frame(x))x=as.matrix(x) - JK <- J * K - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp[1])) { - yy <- x - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") - for(j in 1:JK) { - xx <- x[[j]] - # xx[[j]] <- xx[!is.na(xx)] - x[[j]] <- xx[!is.na(xx)] - } - # - # Create the three contrast matrices - # -temp<-con2way(J,K) -conA<-temp$conA -conB<-temp$conB -conAB<-temp$conAB - ncon <- max(nrow(conA), nrow(conB), nrow(conAB)) -FacA<-rmmcppb(x,con=conA,est=est,plotit=plotit,dif=dif,grp=grp, -nboot=nboot,BA=TRUE,hoch=FALSE,...) -FacB<-rmmcppb(x,con=conB,est=est,plotit=plotit,dif=dif,grp=grp, -nboot=nboot,BA=TRUE,hoch=FALSE,...) -FacAB<-rmmcppb(x,con=conAB,est=est,plotit=plotit,dif=dif,grp=grp, -nboot=nboot,BA=TRUE,hoch=FALSE,...) -list(Factor.A=FacA,Factor.B=FacB,Factor.AB=FacAB) - -} - -acbinomci<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05){ -# -# Compute a 1-alpha confidence interval for p, the probability of -# success for a binomial distribution, using a generalization of the -# Agresti-Coull method that was studied by Brown, Cai DasGupta -# (Annals of Statistics, 2002, 30, 160-201.) -# -# y is a vector of 1s and 0s. -# x is number of successes. -# -if(!is.null(y[1])){ -y=elimna(y) -nn=length(y) -} -if(nn==1)stop("Something is wrong: number of observations is only 1") -n<-nn -cr=qnorm(1-alpha/2) -ntil=n+cr^2 -ptil=(x+cr^2/2)/ntil -if(x!=n && x!=0){ -lower=ptil-cr*sqrt(ptil*(1-ptil)/ntil) -upper=ptil+cr*sqrt(ptil*(1-ptil)/ntil) -} -if(x==0){ #Use Clopper-Pearson -lower<-0 -upper<-1-alpha^(1/n) -} -if(x==1){ -upper<-1-(alpha/2)^(1/n) -lower<-1-(1-alpha/2)^(1/n) -} -if(x==n-1){ -lower<-(alpha/2)^(1/n) -upper<-(1-alpha/2)^(1/n) -} -if(x==n){ -lower<-alpha^(1/n) -upper<-1 -} -phat<-x/n -list(phat=phat,se=sqrt(ptil*(1-ptil)/ntil),ci=c(lower,upper),n=n) -} - -covmtrim<-function(x,tr=.2,p=length(x),grp=c(1:p)){ -# -# Estimate the covariance matrix for the sample trimmed means corresponding -# to the data in the R variable x, -# which is assumed to be stored in list mode or a matrix. -# (x[[1]] contains the data for group 1, x[[2]] the data for group 2, etc.) -# The function returns a p by p matrix of covariances, the diagonal -# elements being equal to the squared standard error of the sample -# trimmed means, where p is the number of groups to be included. -# By default, all the groups in x are used, but a subset of -# the groups can be used via grp. For example, if -# the goal is to estimate the covariances between the sample trimmed -# means for groups 1, 2, and 5, use the command grp<-c(1,2,5) -# before calling this function. -# -# The default amount of trimming is 20% -# -# Missing values (values stored as NA) are not allowed. -# -# This function uses winvar from chapter 2. -# -if(is.list(x))x=matl(x) -x=elimna(x) -x=listm(x) -if(!is.list(x))stop("The data are not stored in list mode or a matrix.") -p<-length(grp) -pm1<-p-1 -for (i in 1:pm1){ -ip<-i+1 -if(length(x[[grp[ip]]])!=length(x[[grp[i]]]))stop("The number of observations in each group must be equal") -} -n<-length(x[[grp[1]]]) -h<-length(x[[grp[1]]])-2*floor(tr*length(x[[grp[1]]])) -covest<-matrix(0,p,p) -covest[1,1]<-(n-1)*winvar(x[[grp[1]]],tr)/(h*(h-1)) -for (j in 2:p){ -jk<-j-1 -covest[j,j]<-(n-1)*winvar(x[[grp[j]]],tr)/(h*(h-1)) -for (k in 1:jk){ -covest[j,k]<-(n-1)*wincor(x[[grp[j]]],x[[grp[k]]],tr)$cov/(h*(h-1)) -covest[k,j]<-covest[j,k] -} -} -covmtrim<-covest -covmtrim -} -bwwcovm<-function(J,K,L,x,tr=.2){ -# -# compute covariance matrix for a between by within by within design -# -p=J*K*L -idep=K*L -mat=matrix(0,nrow=p,ncol=p) -id=c(1:idep) -for(j in 1:J){ -mat[id,id]=covmtrim(x[id],tr=tr) -id=id+idep -} -mat -} -bwwmatna<-function(J,K,L,x){ -# -# data are assumed to be stored in a matrix -# for a between by within by within (three-way) anova, -# for the last two factors, eliminate any missing values -# and then store the data in list mode. -# -if(is.data.frame(x))x=as.matrix(x) -y=list() -ad=K*L -ilow=1 -iup=ad -ic=0 -for(j in 1:J){ -z=x[,ilow:iup] -d=elimna(z) -im=0 -for(k in 1:K){ -for(l in 1:L){ -ic=ic+1 -im=im+1 -y[[ic]]=d[,im] -}} -ilow=ilow+ad -iup=iup+ad -} -y -} -bwwna<-function(J,K,L,x){ -# -# data are assumed to be stored in list mode -# for a between by within by within (three-way) anova, -# for the last two factors, eliminate any missing values. -# -if(is.data.frame(x))x=as.matrix(x) -y=list() -ad=K*L -ilow=1 -iup=ad -ic=0 -for(j in 1:J){ -z=x[ilow:iup] -d=elimna(matl(z)) -#print(d) -im=0 -for(k in 1:K){ -for(l in 1:L){ -ic=ic+1 -im=im+1 -y[[ic]]=d[,im] -}} -ilow=ilow+ad -iup=iup+ad -} -y -} -bwwtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L){ -# Perform a between by within by within (three-way) anova -# on trimmed means where -# -# J independent groups, KL dependent groups -# -# The variable data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of all three factors: level 1,1,1. -# data][2]] is assumed to contain the data for level 1 of the -# first two factors and level 2 of the third factor: level 1,1,2 -# data[[L]] is the data for level 1,1,L -# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. -# data[[KL+1]] is level 2,1,1, etc. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JKL, the total number of -# groups being tested. -# -if(is.data.frame(data))data=as.matrix(data) -if(is.list(data))data=bwwna(J,K,L,data) # remove missing values -if(is.matrix(data))data=bwwmatna(J,K,L,data) #remove missing values -# and convert to list mode -if(!is.list(data))stop("The data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in data is") -print(length(data)) -print("Warning: These two values are not equal") -} -tmeans<-0 -h<-0 -v<-0 -for (i in 1:p){ -tmeans[i]<-mean(data[[grp[i]]],tr) -h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) -# h is the effective sample size -} -v=bwwcovm(J,K,L,data,tr=tr) -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -il<-matrix(c(rep(1,L)),1,L) -jm1<-J-1 -cj<-diag(1,jm1,J) -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -lm1<-L-1 -cl<-diag(1,lm1,L) -for (i in 1:lm1)cl[i,i+1]<-0-1 -# Do test for factor A -cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A -Qa=bwwtrim.sub(cmat, tmeans, v, h,p) -Qa.siglevel <- 1 - pf(Qa, J - 1, 999) -# Do test for factor B -cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B -Qb=bwwtrim.sub(cmat, tmeans, v, h,p) - Qb.siglevel <- 1 - pf(Qb, K - 1, 999) -# Do test for factor C -cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C -Qc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qc.siglevel <- 1 - pf(Qc, L - 1, 999) -# Do test for factor A by B interaction -cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B -Qab<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999) -# Do test for factor A by C interaction -cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C -Qac<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999) -# Do test for factor B by C interaction -cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C -Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999) -# Do test for factor A by B by C interaction -cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C -Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999) -list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.p.value=Qb.siglevel, -Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel, -Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel, -Qabc=Qabc,Qabc.p.value=Qabc.siglevel) -} - - -bbwcovm<-function(J,K,L,x,tr=.2){ -# -# compute covariance matrix for a between by between by within design -# -p=J*K*L -idep=L -mat=matrix(0,nrow=p,ncol=p) -id=c(1:idep) -for(j in 1:J){ -for(k in 1:K){ -mat[id,id]=covmtrim(x[id],tr=tr) -id=id+idep -}} -mat -} -bbwmatna<-function(J,K,L,x){ -# -# data are assumed to be stored in a matrix -# for a between by within by within (three-way) anova. -# For the last factor, eliminate any missing values -# and then store the data in list mode. -# -y=list() -ad=L -ilow=1 -iup=ad -ic=0 -for(j in 1:J){ -for(k in 1:K){ -z=x[,ilow:iup] -d=elimna(z) -im=0 -for(l in 1:L){ -ic=ic+1 -im=im+1 -y[[ic]]=d[,im] -} -ilow=ilow+ad -iup=iup+ad -}} -y -} -bbwna<-function(J,K,L,x){ -# -# x: data are assumed to be stored in list mode -# for a between by within by within (three-way) anova. -# For the last factor, eliminate any missing values. -# -y=list() -ad=L -ilow=1 -iup=ad -ic=0 -for(j in 1:J){ -for(k in 1:K){ -z=x[ilow:iup] -d=as.matrix(elimna(matl(z))) -im=0 -ilow=ilow+ad -iup=iup+ad -for(l in 1:L){ -ic=ic+1 -im=im+1 -y[[ic]]=d[,im] -}} -} -y -} -bbwtrim<-function(J,K,L,data,tr=.2,alpha=.05,p=J*K*L){ -# Perform a between-between-within (three-way) anova on trimmed means where -# -# JK independent groups, L dependent groups -# -# The variable data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of all three factors: level 1,1,1. -# data][2]] is assumed to contain the data for level 1 of the -# first two factors and level 2 of the third factor: level 1,1,2 -# data[[L]] is the data for level 1,1,L -# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. -# data[[KL+1]] is level 2,1,1, etc. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JKL, the total number of -# groups being tested. -# -if(is.data.frame(data)) data <- as.matrix(data) -if(is.list(data))data=bbwna(J,K,L,data) -if(is.matrix(data))data=bbwmatna(J,K,L,data) -grp=c(1:p) -data=bbwna(J,K,L,data) -if(!is.list(data))stop("Data are not stored in list mode") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups is") -print(length(data)) -print("Warning: These two values are not equal") -} -tmeans<-0 -h<-0 -v<-0 -for (i in 1:p){ -tmeans[i]<-mean(data[[grp[i]]],tr) -h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) -# h is the effective sample size -} -v=bbwcovm(J,K,L,data,tr=tr) -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -il<-matrix(c(rep(1,L)),1,L) -jm1<-J-1 -cj<-diag(1,jm1,J) -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -lm1<-L-1 -cl<-diag(1,lm1,L) -for (i in 1:lm1)cl[i,i+1]<-0-1 -# Do test for factor A -cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A -Qa=bwwtrim.sub(cmat, tmeans, v, h,p) -Qa.siglevel <- 1 - pf(Qa, J - 1, 999) -# Do test for factor B -cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B -Qb=bwwtrim.sub(cmat, tmeans, v, h,p) - Qb.siglevel <- 1 - pf(Qb, K - 1, 999) -# Do test for factor C -cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C -Qc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qc.siglevel <- 1 - pf(Qc, L - 1, 999) -# Do test for factor A by B interaction -cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B -Qab<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999) -# Do test for factor A by C interaction -cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C -Qac<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999) -# Do test for factor B by C interaction -cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C -Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999) -# Do test for factor A by B by C interaction -cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C -Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999) -list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.p.value=Qb.siglevel, -Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel, -Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel, -Qabc=Qabc,Qabc.p.value=Qabc.siglevel) -} - - -bwwtrim.sub<-function(cmat,vmean,vsqse,h,p){ -# -# The function computes variation of Johansen's test statistic -# used to test the hypothesis C mu = 0 where -# C is a k by p matrix of rank k and mu is a p by 1 matrix of -# of unknown trimmed means. -# The argument cmat contains the matrix C. -# vmean is a vector of length p containing the p trimmed means -# vsqe is matrix containing the -# estimated covariances among the trimmed means -# h is the sample size -# -yvec<-matrix(vmean,length(vmean),1) -test<-cmat%*%vsqse%*%t(cmat) -invc<-solve(test) -test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec -temp<-0 -mtem<-vsqse%*%t(cmat)%*%invc%*%cmat -temp<-(sum(diag(mtem%*%mtem))+(sum(diag(mtem)))^2)/(h-1) -A<-.5*sum(temp) -cval<-nrow(cmat)+2*A-6*A/(nrow(cmat)+2) -test<-test/cval -test -} - -ghmean<-function(g,h){ -# -#Compute the mean and variance of a g-and-h distribution -# -val=0 -if(h==0){ -if(g>0){ -val=(exp(g^2/2)-1)/g -val2=(1-2*exp(g^2/2)+exp(2*g^2))/g^2 -val2=val2-val^2 -}} -if(g>0 & h!=0){ -if(h<1) -val=(exp(g^2/(2*(1-h)))-1)/(g*sqrt(1-h)) -val2=NA -if(h>0){ -if(h<.5) -val2=(exp(2*g^2/(1-2*h))-2*exp(g^2/(2*(1-2*h)))+1)/(g^2*sqrt(1-2*h))- -(exp(g^2/(2*(1-h)))-1)^2/(g^2*(1-h)) -}} -if(g==0){ -val=0 -val2=1/(1-2*h)^1.5 #Headrick et al. (2008) -} -list(mean=val,variance=val2) -} - -gskew<-function(g){ -# -# skew and kurtosis of a g-and-h distribution when h=0 -# -# -v1=sqrt(3*exp(2*g^2)+exp(3*g^2)-4) -v2=3*exp(2*g^2)+2*exp(3*g^2)+exp(4*g^2)-3 #Headrick has -6 not -3, but based on n=1000000, -3 works -list(skew=v1,kurtosis=v2) -} - - -skew<-function(x){ -# -# Compute skew and kurtosis -# -x=elimna(x) -m1<-mean(x) -m2<-var(x) -m3<-sum((x-m1)^3)/length(x) -m4<-sum((x-m1)^4)/length(x) -sk<-m3/m2^1.5 -ku<-m4/m2^2 -list(skew=sk,kurtosis=ku) -} - -t3pval<-function(cmat,tmeans,v,h){ -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-johan(cmat,tmeans,v,h,alph[i]) -if(chkit$teststat>chkit$crit)break -} -p.value <- irem/100 - if(p.value <= 0.1) { - iup <- (irem + 1)/100 - alph <- seq(0.001, iup, 0.001) - for(i in 1:length(alph)) { - p.value <- alph[i] - chkit<-johan(cmat,tmeans,v,h,alph[i]) -if(chkit$teststat>chkit$crit)break - } - } - if(p.value <= 0.001) { - alph <- seq(0.0001, 0.001, 0.0001) - for(i in 1:length(alph)) { - p.value <- alph[i] -chkit<-johan(cmat,tmeans,v,h,alph[i]) -if(chkit$teststat>chkit$crit)break - } - } -p.value -} - -t1way<-function(x,tr=.2,grp=NA,MAT=FALSE,lev.col=1,var.col=2,IV=NULL,pr=TRUE){ -# -# A heteroscedastic one-way ANOVA for trimmed means -# using a generalization of Welch's method. -# -# The data are assumed to be stored in $x$ in a matrix or in list mode. -# -# MAT=F, if x is a matrix, columns correspond to groups. -# if MAT=T, assumes argument -# lev.col -# indicates which column of x denotes the groups. And -# var.col indicates the column where the data are stored. -# -# if x has list mode: -# length(x) is assumed to correspond to the total number of groups. -# By default, the null hypothesis is that all groups have a common mean. -# To compare a subset of the groups, use grp to indicate which -# groups are to be compared. For example, if you type the -# command grp<-c(1,3,4), and then execute this function, groups -# 1, 3, and 4 will be compared with the remaining groups ignored. -# -# IV, if specified, taken to be the independent variable -# That is, the group id values -# and x is assumed to be a vector containing all of the data -# -# Missing values are automatically removed. -# -if(is.data.frame(x))x=as.matrix(x) -if(tr==.5)print("Warning: Comparing medians should not be done with this function") -if(!is.null(IV[1])){ -if(pr)print("Assuming x is a vector containing all of the data, the dependent variable") -xi=elimna(cbind(x,IV)) -x=fac2list(xi[,1],xi[,2]) -} -if(MAT){ -if(!is.matrix(x))stop("With MAT=T, data must be stored in a matrix") -if(length(lev.col)!=1)stop("Argument lev.col should have 1 value") -temp=selby(x,lev.col,var.col) -x=temp$x -grp2=rank(temp$grpn) -x=x[grp2] -} -if(is.matrix(x))x<-listm(x) -#nv=lapply(x,length) -if(is.na(sum(grp[1])))grp<-c(1:length(x)) -if(!is.list(x))stop("Data are not stored in a matrix or in list mode.") -J<-length(grp) -h<-vector("numeric",J) -w<-vector("numeric",J) -xbar<-vector("numeric",J) -nv=NA -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -nv[j]=length(x[[j]]) -h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) - # h is the number of observations in the jth group after trimming. -if(winvar(x[[grp[j]]],tr)==0)print(paste('The Winsorized variance is zero for group',j)) -w[j]<-h[j]*(h[j]-1)/((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr)) -xbar[j]<-mean(x[[grp[j]]],tr) -} -u<-sum(w) -xtil<-sum(w*xbar)/u -A<-sum(w*(xbar-xtil)^2)/(J-1) -B<-2*(J-2)*sum((1-w/u)^2/(h-1))/(J^2-1) -TEST<-A/(B+1) -nu1<-J-1 -nu2<-1./(3*sum((1-w/u)^2/(h-1))/(J^2-1)) -sig<-1-pf(TEST,nu1,nu2) -list(TEST=TEST,nu1=nu1,nu2=nu2,n=nv,p.value=sig) -} - -t3wayv2<-function(J,K,L,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,MAT=FALSE, -lev.col=c(1:3),var.col=4,pr=TRUE){ -# Perform a J by K by L (three-way) anova on trimmed means where -# all JKL groups are independent. -# -# Same as t3way, only computes p-values -# -# if MAT=F (default) -# The R variable data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of all three factors: level 1,1,1. -# data][2]] is assumed to contain the data for level 1 of the -# first two factors and level 2 of the third factor: level 1,1,2 -# data[[L]] is the data for level 1,1,L -# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. -# data[[KL+1]] is level 2,1,1, etc. -# -# MAT=T, assumes data are stored in matrix with 3 columns indicating -# levels of the three factors. -# That is, this function calls selby2 for you. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JKL, the total number of -# groups being tested. -# -if(is.data.frame(x))x=as.matrix(x) -data=x #Yes, odd code -if(MAT){ -if(!is.matrix(data))stop("With MAT=T, data must be a matrix") -if(length(lev.col)!=3)stop("Argument lev.col should have 3 values") -temp=selby2(data,lev.col,var.col) -lev1=length(unique(temp$grpn[,1])) -lev2=length(unique(temp$grpn[,2])) -lev3=length(unique(temp$grpn[,3])) -gv=apply(temp$grpn,2,rank) -gvad=100*gv[,1]+10*gv[,2]+gv[,3] -grp=rank(gvad) -if(pr){ -print(paste("Factor 1 has", lev1, "levels")) -print(paste("Factor 2 has", lev2, "levels")) -print(paste("Factor 3 has", lev3, "levels")) -} -if(J!=lev1)warning("J is being reset to the number of levels found") -if(K!=lev2)warning("K is being reset to the number of levels found") -if(L!=lev3)warning("K is being reset to the number of levels found") -J=lev1 -K=lev2 -L=lev2 -data=temp$x -} -if(is.matrix(data))data=listm(data) -if(!is.list(data))stop("Data is not stored in list mode") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in data is") -print(length(data)) -print("Warning: These two values are not equal") -} -tmeans<-0 -h<-0 -v<-0 -for (i in 1:p){ -tmeans[i]<-mean(data[[grp[i]]],tr) -h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) -# h is the effective sample size -v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1)) -# v contains the squared standard errors -} -v<-diag(v,p,p) # Put squared standard errors in a diag matrix. -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -il<-matrix(c(rep(1,L)),1,L) -jm1<-J-1 -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -lm1<-L-1 -cl<-diag(1,lm1,L) -for (i in 1:lm1)cl[i,i+1]<-0-1 -# Do test for factor A -cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A -Qa <- johan(cmat, tmeans, v, h, alpha) -Qa.pv=t3pval(cmat, tmeans, v, h) -# Do test for factor B -cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B -Qb<-johan(cmat,tmeans,v,h,alpha) -Qb.pv=t3pval(cmat, tmeans, v, h) -# Do test for factor C -cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C -Qc<-johan(cmat,tmeans,v,h,alpha) -Qc.pv=t3pval(cmat, tmeans, v, h) -# Do test for factor A by B interaction -cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B -Qab<-johan(cmat,tmeans,v,h,alpha) -Qab.pv=t3pval(cmat, tmeans, v, h) -# Do test for factor A by C interaction -cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C -Qac<-johan(cmat,tmeans,v,h,alpha) -Qac.pv=t3pval(cmat, tmeans, v, h) -# Do test for factor B by C interaction -cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C -Qbc<-johan(cmat,tmeans,v,h,alpha) -Qbc.pv=t3pval(cmat, tmeans, v, h) -# Do test for factor A by B by C interaction -cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C -Qabc<-johan(cmat,tmeans,v,h,alpha) -Qabc.pv=t3pval(cmat, tmeans, v, h) -list(Qa=Qa$teststat,Qa.crit=Qa$crit,Qa.p.value=Qa.pv, -Qb=Qb$teststat,Qb.crit=Qb$crit,Qb.p.value=Qb.pv, -Qc=Qc$teststat,Qc.crit=Qc$crit,Qc.p.value=Qc.pv, -Qab=Qab$teststat,Qab.crit=Qab$crit,Qab.p.value=Qab.pv, -Qac=Qac$teststat,Qac.crit=Qac$crit,Qac.p.value=Qac.pv, -Qbc=Qbc$teststat,Qbc.crit=Qbc$crit,Qbc.p.value=Qbc.pv, -Qabc=Qabc$teststat,Qabc.crit=Qabc$crit,Qabc.p.value=Qabc.pv) -} - - -olshc4<-function(x,y,alpha=.05,CN=FALSE, -xout=FALSE,outfun=outpro,HC3=FALSE,plotit=FALSE,xlab = "X", ylab = "Y", zlab = "Z",...){ -# -# Compute confidence intervals via least squares -# regression using heteroscedastic method -# recommended by Cribari-Neto (2004). -# CN=F, degrees of freedom are n-p -# CN=T degrees of freedom are infinite, as done by Cribari-Neto (2004) -# All indications are that CN=F is best for general use. -# -# HC3=TRUE, will replace the HC4 estimator with the HC3 estimator. -# -x<-as.matrix(x) -pnum=ncol(x) -if(nrow(x) != length(y))stop("Length of y does not match number of x values") -m<-cbind(x,y) -m<-elimna(m) -y<-m[,ncol(x)+1] -x=m[,1:ncol(x)] -n=length(y) -nrem=n -n.keep=length(y) -x<-as.matrix(x) -if(xout){ -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -x<-as.matrix(x) -x<-x[flag,] -y<-y[flag] -n.keep=length(y) -x<-as.matrix(x) -} -temp<-lsfit(x,y) -rsq=Rsq.ols(x,y) -x<-cbind(rep(1,nrow(x)),x) -xtx<-solve(t(x)%*%x) -h<-diag(x%*%xtx%*%t(x)) -n<-length(h) -d<-(n*h)/sum(h) -for(i in 1:length(d)){ - d[i]<-min(4, d[i]) -} -if(HC3)d=2 -hc4<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^d)%*%x%*%xtx -df<-nrow(x)-ncol(x) -crit<-qt(1-alpha/2,df) -if(CN)crit=qnorm(1-alpha/2) -al<-ncol(x) -p=al-1 -ci<-matrix(NA,nrow=al,ncol=6) -lab.out=rep("Slope",p) -dimnames(ci)<-list(c("(Intercept)",lab.out),c("Coef.","Estimates", -"ci.lower","ci.upper","p-value","Std.Error")) -for(j in 1:al){ -ci[j,1]<-j-1 -ci[j,2]<-temp$coef[j] -ci[j,3]<-temp$coef[j]-crit*sqrt(hc4[j,j]) -ci[j,4]<-temp$coef[j]+crit*sqrt(hc4[j,j]) -test<-temp$coef[j]/sqrt(hc4[j,j]) -names(test)=NULL -ci[j,5]<-2*(1-pt(abs(test),df)) -if(CN)ci[j,5]<-2*(1-pnorm(abs(test),df)) -} -ci[,6]=sqrt(diag(hc4)) -if(plotit){ -if(pnum==1){ -plot(x[,-1],y,xlab=xlab,ylab=ylab) -abline(ci[,2]) -} -if(pnum==2){ -regp2plot(x[,-1],y,regfun=ols,xlab=xlab,ylab=ylab,zlab=zlab) -}} -list(n=nrem,n.keep=n.keep,ci=ci, cov=hc4, test.stat=test,R.squared=rsq) -} - -olsci<-olshc4 - -hc4test<-function(x,y,pval=c(1:ncol(x)),xout=FALSE,outfun=outpro,pr=TRUE,plotit=FALSE,xlab="X",ylab="Y",...){ -# -# Perform omnibus test using OLS and HC4 estimator -# That is, test the hypothesis that all of the slope parameters -# are equal to 0 in a manner that allows heteroscedasticity. -# -# recommended by Cribari-Neto (2004). -# Seems to work well with p=1 but can be unsatisfactory wit p>4 predictors, -# Unknown how large n must be when p>1 -# -x<-as.matrix(x) -if(ncol(x)>1 && pr)print("WARNING: more than 1 predictor, olstest might be better") -if(nrow(x) != length(y))stop("Length of y does not match number of x values") -m<-cbind(x,y) -m<-elimna(m) -p=ncol(x) -p1=p+1 -y<-m[,p1] -x=m[,1:p] -nrem=length(y) -n=length(y) -n.keep=n -x<-as.matrix(x) -if(xout){ -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,...)$keep -x<-as.matrix(x) -x<-x[flag,] -y<-y[flag] -n.keep=length(y) -x<-as.matrix(x) -} -n=n.keep -pvalp1<-pval+1 -temp<-lsfit(x,y) # unrestricted -if(plotit){ -if(p==1){ -plot(x[,1],y,xlab=xlab,ylab=ylab) -abline(temp$coef) -}} -x<-cbind(rep(1,nrow(x)),x) -hval<-x%*%solve(t(x)%*%x)%*%t(x) -hval<-diag(hval) -hbar<-mean(hval) -delt<-cbind(rep(4,n),hval/hbar) -delt<-apply(delt,1,min) -aval<-(1-hval)^(0-delt) -x2<-x[,pvalp1] -pval<-0-pvalp1 -x1<-x[,pval] -df<-length(pval) -x1<-as.matrix(x1) -imat<-diag(1,n) -M1<-imat-x1%*%solve(t(x1)%*%x1)%*%t(x1) -M<-imat-x%*%solve(t(x)%*%x)%*%t(x) -uval<-as.vector(M%*%y) -R2<-M1%*%x2 -rtr<-solve(t(R2)%*%R2) -temp2<-aval*uval^2 -S<-diag(aval*uval^2) -V<-n*rtr%*%t(R2)%*%S%*%R2%*%rtr -nvec<-as.matrix(temp$coef[pvalp1]) -test<-n*t(nvec)%*%solve(V)%*%nvec -test<-test[1,1] -p.value<-1-pchisq(test,df) -list(n=nrem,n.keep=n.keep,test=test,p.value=p.value,coef=temp$coef) -} - - - -standm<-function(x,locfun=lloc,est=mean,scat=var,...){ -# standardize a matrix x -# -x=elimna(x) -x=as.matrix(x) -m1=lloc(x,est=est) -v1=apply(x,2,scat) -p=ncol(x) -for(j in 1:p)x[,j]=(x[,j]-m1[j])/sqrt(v1[j]) -x -} - -t2way<-function(J,K,x,tr=.2,grp=c(1:p),p=J*K,MAT=FALSE, -lev.col=c(1:2),var.col=3,pr=TRUE,IV1=NULL,IV2=NULL){ -# Perform a J by K (two-way) ANOVA on trimmed means where -# all groups are independent. -# -# The R variable x is assumed to contain the raw -# data stored in list mode, or a matrix with columns -# corresponding to groups. If stored in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1,. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second factor: level 1,2 -# -# The default amount of trimming is tr=.2 -# -# It is assumed that x has length JK, the total number of -# groups being tested. -# -# MAT=T, assumes x are stored in matrix with 3 columns -# with two of the columns indicated by the argument -# lev.col -# specifying the columns of x containing the values of the -# levels of the two factors. -# The outcome variable is in column -# var.col -# which defaults to column 3 -# That is, this function calls selby2 for you. -# -# IV1 and IV2: if specified, taken to be the independent variable -# That is, the group id values -# and x is assumed to be a vector containing all of the data -# EXAMPLE: t2way(x=data,IV1=iv1,IV2=iv2) -# would do a two-way ANOVA based on group id's in iv1 and iv2 and -# dependent variable data -# -if(is.data.frame(x))data=as.matrix(x) -if(tr==.5){ -print("For medians, use med2way if there are no ties") -print("With ties, use linear contrasts in conjunction with medpb") -stop("") -} -if(MAT){ -if(!is.matrix(x))stop("With MAT=T, data must be a matrix") -if(length(lev.col)!=2)stop("Argument lev.col should have 3 values") -temp=selby2(x,lev.col,var.col) -lev1=length(unique(temp$grpn[,1])) -lev2=length(unique(temp$grpn[,2])) -gv=apply(temp$grpn,2,rank) -gvad=10*gv[,1]+gv[,2] -grp=rank(gvad) -if(pr){ -print(paste("Factor 1 has", lev1, "levels")) -print(paste("Factor 2 has", lev2, "levels")) -} -if(J!=lev1)warning("J is being reset to the number of levels found") -if(K!=lev2)warning("K is being reset to the number of levels found") -J=lev1 -K=lev2 -x=temp$x -} -if(!is.null(IV1[1])){ -if(is.null(IV2[1]))stop("IV2 is NULL") -if(pr)print("Assuming data is a vector containing all of the data; the dependent variable") -xi=elimna(cbind(x,IV1,IV2)) -J=length(unique(xi[,2])) -K=length(unique(xi[,3])) -x=fac2list(xi[,1],xi[,2:3]) -} -if(is.matrix(x))x=listm(x) -if(!is.list(x))stop("Data are not stored in list mode") -if(p!=length(x)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups is") -print(length(x)) -print("Warning: These two values are not equal") -} -tmeans<-0 -h<-0 -v<-0 -for (i in 1:p){ -x[[grp[i]]]=elimna(x[[grp[i]]]) -tmeans[i]<-mean(x[[grp[i]]],tr) -h[i]<-length(x[[grp[i]]])-2*floor(tr*length(x[[grp[i]]])) -# h is the effective sample size -if(winvar(x[[grp[i]]],tr)==0)print(paste('The Winsorized variance is zero for group',i)) -v[i]<-(length(x[[grp[i]]])-1)*winvar(x[[grp[i]]],tr)/(h[i]*(h[i]-1)) -# v contains the squared standard errors -} -v<-diag(v,p,p) # Put squared standard errors in a diag matrix. -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -jm1<-J-1 -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -# Do test for factor A -cmat<-kron(cj,ik) # Contrast matrix for factor A -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -Qa<-johan(cmat,tmeans,v,h,alval[i]) -if(i==1)dfA=Qa$df -if(Qa$teststat>Qa$crit)break -} -A.p.value=irem/1000 -# Do test for factor B -cmat<-kron(ij,ck) # Contrast matrix for factor B -for(i in 1:999){ -irem<-i -Qb<-johan(cmat,tmeans,v,h,alval[i]) -if(i==1)dfB=Qb$df -if(Qb$teststat>Qb$crit)break -} -B.p.value=irem/1000 -# Do test for factor A by B interaction -cmat<-kron(cj,ck) # Contrast matrix for factor A by B -for(i in 1:999){ -irem<-i -Qab<-johan(cmat,tmeans,v,h,alval[i]) -if(i==1)dfAB=Qab$df -if(Qab$teststat>Qab$crit)break -} -AB.p.value=irem/1000 -tmeans=matrix(tmeans,J,K,byrow=TRUE) -list(Qa=Qa$teststat,A.p.value=A.p.value, df.A=dfA, -Qb=Qb$teststat,B.p.value=B.p.value,df.B=dfB, -Qab=Qab$teststat,AB.p.value=AB.p.value,df.AB=dfAB,means=tmeans) -} - -mcskew <- function(z) -{ - n=length(z) - y1=0 - y2=0 - left=0 - right=0 - q=0 - p=0 - eps=0.0000000000001 - z=-z - xmed=pull(z,n,floor(n/2)+1) - if (n%%2 == 0) - { - xmed=(xmed+pull(z,n,floor(n/2)))/2 - } - z=z-xmed - y=-sort(z) - y1=y[y>-eps] - y2=y[y<=eps] - h1=length(y1) - h2=length(y2) - left[1:h2]=1 - right[1:h2]=h1 - nl=0 - nr=h1*h2 - knew=floor(nr/2)+1 - IsFound=0 - while ((nr-nl>n) & (IsFound==0)) - { - weight=0 - work=0 - j=1 - for (i in 1:h2) - { - if (left[i]<=right[i]) - { - weight[j]=right[i]-left[i]+1 - k=left[i]+floor(weight[j]/2) - work[j]=calwork(y1[k],y2[i],k,i,h1+1,eps) - j=j+1 - } - } - trial=whimed(work,weight,j-1) - j=1 - for (i in h2:1) - { - while ((j<=h1)&(calwork(y1[min(j,h1)],y2[i],j,i,h1+1,eps)>trial)) - { - j=j+1 - } - p[i]=j-1 - } - j=h1 - for (i in 1:h2) - { - while ((j>=1)&(calwork(y1[max(j,1)],y2[i],j,i,h1+1,eps)sumq) - { - left[1:h2]=q[1:h2] - nl=sumq - } - else - { - medc=trial - IsFound=1 - } - } - } - if (IsFound==0) - {work=0 - j=1 - for (i in 1:h2) - { - if (left[i]<=right[i]) - { - for (jj in left[i]:right[i]) - { - work[j]=0-calwork(y1[jj],y2[i],jj,i,h1+1,eps) - j=j+1 - } - } - } - medc=0-pull(work,j-1,knew-nl) - } - medc -} - -pull <- function(a,n,k) -{ - b=0 - b=a - l=1 - lr=n - while (lax) - { - j=j-1 - } - if (jnc<=j) - { - buffer=b[jnc] - b[jnc]=b[j] - b[j]=buffer - jnc=jnc+1 - j=j-1 - } - } - if (jtrial,rep(F,n-nn))]) - wmid=sum(iw[c(a[1:nn]==trial,rep(F,n-nn))]) - - if ((2*wrest+2*wleft)>wtotal) - { - i=c(a[1:nn]wtotal) - { - whmed=trial - IsFound=1 - } - else - { - i=c(a[1:nn]>trial,rep(F,n-nn)) - acand=a[i] - iwcand=iw[i] - nn=length(acand) -# nn_kcand_length(acand) - wrest=wrest+wleft+wmid - } - } - a[1:nn]=acand[1:nn] - iw[1:nn]=iwcand[1:nn] - } - whmed -} - -calwork <- function(a,b,ai,bi,ab,eps) -{ - if (abs(a-b) < 2.0*eps) - { - if (ai+bi==ab) - { - cwork=0 - } - else - { - if (ai+bi (n-p)/(2*n) ) r <- (n-p)/(2*n)} -# maximum achievable breakdown -# -# if rejection is not achievable, use c1=0 and best rejection -# - limvec <- rejpt.bt.lim(p,r) - if (1-limvec[2] <= alpha) - { - c1 <- 0 - M <- sqrt(qchisq(1-alpha,p)) - } - else - { - c1.plus.M <- sqrt(qchisq(1-alpha,p)) - M <- sqrt(p) - c1 <- c1.plus.M - M - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - deps <- 1e-4 - M.old <- M - c1.old <- c1 - er <- erho.bt(p,c1,M) - fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30) - fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps - fcM <- (erho.bt(p,c1,M+deps)-er)/deps - fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30) - M <- M - fc/fcp - if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2} - c1 <- c1.plus.M - M -# if (M-c1 < 0) M <- c1.old+(M.old-c1.old)/2 - crit <- abs(fc) - iter <- iter+1 - } - } -list(c1=c1,M=M,r1=r) -} -erho.bt.lim <- function(p,c1) -# expectation of rho(d) under chi-squared p - return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1)) -erho.bt.lim.p <- function(p,c1) -# derivative of erho.bt.lim wrt c1 - return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1)) - - -rejpt.bt.lim <- function(p,r){ -# find p-value of translated biweight limit c -# that gives a specified breakdown - c1 <- 2*p - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - c1.old <- c1 - fc <- erho.bt.lim(p,c1) - c1^2*r - fcp <- erho.bt.lim.p(p,c1) - 2*c1*r - c1 <- c1 - fc/fcp - if (c1 < 0) c1 <- c1.old/2 - crit <- abs(fc) - iter <- iter+1 - } - return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p)))) -} -chi.int.p <- function(p,a,c1) - return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) -chi.int2.p <- function(p,a,c1) - return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) -ksolve.bt <- function(d,p,c1,M,b0){ -# find a constant k which satisfies the s-estimation constraint -# for modified biweight - k <- 1 - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - k.old <- k - fk <- mean(rho.bt(d/k,c1,M))-b0 - fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2) - k <- k - fk/fkp - if (k < k.old/2) k <- k.old/2 - if (k > k.old*1.5) k <- k.old*1.5 - crit <- abs(fk) -# print(c(iter,k.old,crit)) - iter <- iter+1 - } -# print(c(iter,k,crit)) - return(k) -} -rho.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1*(x^2/2) - +ivec2*(M^2/2+c1*(5*c1+16*M)/30) - +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4) - +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2 - +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3 - +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4 - -4*M*x^5/(5*c1^4)+x^6/(6*c1^4))) -} -psi.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2) -} -psip.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1)) -} -wt.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2) -} -v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M)) - -rung3dlchk<-function(x,y,est=onestep,regfun=tsreg,beta=.2,plotit=FALSE,nmin=0, -fr=NA,...){ -# -# running mean using interval method -# Same as runm3d, but empirically determine the span, f, -# by maximizing the percentage bend correlation using the -# leave-three-out method. -# -# x is an n by p matrix of predictors. -# -# fr controls amount of smoothing and is determined by this function. -# If fr is missing, function first considers fr=.8(.05)1.2. If -# measure of scale of residuals is mininmized for fr=.8, then consider -# fr=.2(.05).75. -# -# -if(!is.matrix(x))stop("Data are not stored in a matrix.") -plotit<-as.logical(plotit) -chkcor<-1 -frtry<-c(.7,.75,.8,.85,.9,.95,1.,1.05,1.1,1.15,1.2) -if(!is.na(fr[1]))frtry<-fr -chkit<-0 -for (it in 1:length(frtry)){ -fr<-frtry[it] -rmd<-runm3ds1(x,y,fr,tr,FALSE,nmin) # Using leave-three-out method. -xm<-y[!is.na(rmd)] -rmd<-rmd[!is.na(rmd)] -dif<-xm-rmd -chkcor[it]<-pbvar(dif,beta) -} -if(sum(is.na(chkcor))== length(chkcor)) -{stop("A value for the span cannot be determined with these data.")} -tempc<-sort(chkcor) -chkcor[is.na(chkcor)]<-tempc[length(tempc)] -temp<-order(chkcor) -fr1<-frtry[temp[1]] -fr2<-fr1 -val1<-min(chkcor) -chkcor2<-0 -if(is.na(fr)){ -if(temp[1] == 1){ -frtry<-c(.2,.25,.3,.35,.4,.45,.5,.55,.6,.65,.7,.75) -for (it in 1:length(frtry)){ -fr<-frtry[it] -rmd<-runm3ds1(x,y,fr,tr,FALSE,nmin) -xm<-y[!is.na(rmd)] -rmd<-rmd[!is.na(rmd)] -dif<-xm-rmd -chkcor2[it]<-pbvar(dif,beta) -} -tempc<-sort(chkcor2) -chkcor2[is.na(chkcor2)]<-tempc[length(tempc)] -print(chkcor2) -temp2<-order(chkcor2) -fr2<-frtry[temp2[1]] -} -} -sortc<-sort(chkcor2) -chkcor2[is.na(chkcor2)]<-sortc[length(sortc)] -val2<-min(chkcor2) -fr<-fr1 -if(val2 < val1)fr<-fr2 -rmd<-runm3d(x,y,fr=fr,tr,plotit=FALSE,nmin,pyhat=TRUE,pr=FALSE) -xm<-y[!is.na(rmd)] -rmd<-rmd[!is.na(rmd)] -etasq<-pbcor(rmd,xm)$cor^2 -# Next, fit regression line -temp<-y-regfun(x,y)$res -pbc<-pbcor(temp,y)$cor^2 -temp<-(etasq-pbc)/(1-pbc) -list(gamma.L=temp,pbcorsq=pbc,etasq=etasq,fr=fr,rmd=rmd,yused=xm,varval=chkcor) -} - -near3dl1<-function(x,pt,fr=1,m){ -dis<-mahalanobis(x,pt,m$cov) -sdis<-sqrt(sort(dis)) -dflag<-(dis < fr & dis > sdis[3]) -dflag -} - -listm<-function(x){ -# -# Store the data in a matrix or data frame in a new -# R variable having list mode. -# Col 1 will be stored in y[[1]], col 2 in y[[2]], and so on. -# -if(is.null(dim(x)))stop("The argument x must be a matrix or data frame") -y<-list() -for(j in 1:ncol(x))y[[j]]<-x[,j] -y -} - -m2l=listm - -matrix2list=listm - -pbanova<-function(x,tr=.2,alpha=.05,nboot=NA,grp=NA,WIN=FALSE,win=.1){ -# -# Test the hypothesis that J independent groups have -# equal trimmed means using the percentile bootstrap method. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# tr is the amount of trimming -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# WIN=T means data are Winsorized before taking bootstraps by the -# amount win. -# -# Missing values are allowed. -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] -x<-xx -} -J<-length(x) -tempn<-0 -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -} -Jm<-J-1 -if(WIN){ -if(tr < .2){print("Warning: When Winsorizing,") -print("the amount of trimming should be at least.2") -} -if(win > tr)stop("Amount of Winsorizing must be <= amount of trimming") -if(min(tempn) < 15){ -print("Warning: Winsorizing with sample sizes less than 15") -print("can result in poor control over the probability of a Type I error") -} -for (j in 1:J){ -x[[j]]<-winval(x[[j]],win) -} -} -con<-matrix(0,J,J-1) -for (j in 1:Jm){ -jp<-j+1 -con[j,j]<-1 -con[jp,j]<-0-1 -} -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(Jm > 10){ -avec<-.05/c(11:Jm) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(Jm > 10){ -avec<-.01/c(11:Jm) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:Jm) -bvec<-matrix(NA,nrow=J,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -paste("Working on group ",j) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group -} -test<-NA -for (d in 1:Jm){ -dp<-d+1 -test[d]<-sum(bvec[d,]>bvec[dp,])/nboot -if(test[d]> .5)test[d]<-1-test[d] -} -test<-(0-1)*sort(-2*test) -sig<-sum((test0)print("Significant result obtained: Reject") -if(sig==0)print("No significant result obtained: Fail to reject") -list(test.vec=test,crit.vec=dvec[1:Jm]) -} - -pbanovag<-function(x,alpha=.05,nboot=NA,grp=NA,est=onestep,...){ -# -# Test the hypothesis that J independent groups have -# equal measures of location using the percentile bootstrap method. -# (Robust measures of scale can be compared as well.) -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to a M-estimator -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are allowed. -# -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ -# Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] -x<-xx -} -J<-length(x) -tempn<-0 -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -} -Jm<-J-1 -icl<-ceiling(crit*nboot) -icu<-ceiling((1-crit)*nboot) -con<-matrix(0,J,J-1) -for (j in 1:Jm){ -jp<-j+1 -con[j,j]<-1 -con[jp,j]<-0-1 -} -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(Jm > 10){ -avec<-.05/c(11:Jm) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(Jm > 10){ -avec<-.01/c(11:Jm) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:Jm) -bvec<-matrix(NA,nrow=J,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -paste("Working on group ",j) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped trimmed means for jth group -} -test<-NA -for (d in 1:Jm){ -dp<-d+1 -test[d]<-sum(bvec[d,]>bvec[dp,])/nboot -if(test[d]> .5)test[d]<-1-test[d] -} -test<-(0-1)*sort(-2*test) -sig<-sum((test0)print("Significant result obtained: Reject") -if(sig==0)print("No significant result obtained: Fail to reject") -list(test.vec=test,crit.vec=dvec[1:Jm]) -} -bootse<-function(x,nboot=1000,est=median,SEED=TRUE,...){ -# -# Compute bootstrap estimate of the standard error of the -# estimator est -# The default number of bootstrap samples is nboot=100 -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,est,...) -bootse<-sqrt(var(bvec)) -bootse -} - - -rananova<-function(x,tr=.2,grp=NA){ -# -# A heteroscedastic one-way random effects ANOVA for trimmed means. -# -# The data are assumed to be stored in a matrix on in list mode. -# If in list mode, -# Length(x) is assumed to correspond to the total number of groups. -# If the data are stored in a matrix, groups correspond to columns. -# By default, the null hypothesis is that all group have a common mean. -# To compare a subset of the groups, use grp to indicate which -# groups are to be compared. For example, if you type the -# command grp<-c(1,3,4), and then execute this function, groups -# 1, 3, and 4 will be compared with the remaining groups ignored. -# -if(is.matrix(x))x<-listm(x) -if(is.na(grp[1]))grp<-c(1:length(x)) -if(!is.list(x))stop("Data are not stored in a matrix or in list mode") -J<-length(grp) # The number of groups to be compared -#if(pr)print("The number of groups to be compared is") -#print(J) -h<-1 -xbar<-1 -ybar<-1 -wvar<-1 -ell<-0 -for(j in 1:J){ -ell[j]<-length(x[[grp[j]]])/(length(x[[grp[j]]])+1) -h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) - # h is the number of observations in the jth group after trimming. -ybar[j]<-winmean(x[[grp[j]]],tr) -xbar[j]<-mean(x[[grp[j]]],tr) -wvar[j]<-winvar(x[[grp[j]]],tr) -} -q<-NA -bsst<-var(xbar) -for (j in 1:J)q[j]<-(length(x[[grp[j]]]-1)-1)*wvar[j]/(h[j]*(h[j]-1)) -wssw<-mean(q) -D<-bsst/wssw -g<-q/J -nu1<-((J-1)*sum(q))^2/((sum(q))^2+(J-2)*J*sum(q^2)) -nu2<-(sum(J*q))^2/sum((J*q)^2/(h-1)) -sig<-1-pf(D,nu1,nu2) -# Next, estimate the Winsorized intraclass correlation -sighat<-mean(ell*(ybar-(sum(ell*ybar)/sum(ell)))^2) -rho<-sighat/(sighat+winmean(wvar,tr)) -list(teststat=D,df=c(nu1,nu2),p.value=sig,rho=rho,num.groups=J) -} - - -linpbg<-function(x,con=0,alpha=.05,nboot=NA,est=mest,...){ -# -# Compute a 1-alpha confidence interval -# for a set of d linear contrasts -# involving trimmed means using the percentile bootstrap method. -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode or in a matrix. -# Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. -# If x has list mode, length(x)=the number of groups = J, say. -# -# Missing values are automatically removed. -# -# con is a J by d matrix containing the -# contrast coefficents of interest. -# If unspecified, all pairwise comparisons are performed. -# For example, con[,1]=c(1,1,-1,-1,0,0) -# and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first -# two trimmed means is -# equal to the sum of the second two, -# and (2) the difference between -# the first two is equal to the difference -# between the trimmed means of -# groups 5 and 6. -# -# -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -J<-length(x) -for(j in 1:J){ -xx<-x[[j]] -x[[j]]<-xx[!is.na(xx)] # Remove any missing values. -} -Jm<-J-1 -d<-(J^2-J)/2 -if(sum(con^2)==0){ -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 #If con not specified do all pairwise comparisons -con[k,id]<-0-1 -}}} -if(nrow(con)!=length(x)){ -stop("The number of groups does not match the number of contrast coefficients.") -} -if(is.na(nboot)){ -nboot<-5000 -if(ncol(con)<=4)nboot<-2000 -} -m1<-matrix(0,nrow=J,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -paste("Working on group ",j) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -m1[j,]<-apply(data,1,est,...) -} -testb<-NA -boot<-matrix(0,ncol(con),nboot) -testvec<-NA -for (d in 1:ncol(con)){ -boot[d,]<-apply(m1,2,trimpartt,con[,d]) -# A vector of length nboot containing psi hat values -# and corresponding to the dth linear contrast -testb[d]<-sum((boot[d,]>0))/nboot -testvec[d]<-min(testb[d],1-testb[d]) -} -# -# Determine critical value -# -dd<-ncol(con) -if(alpha==.05){ -if(dd==1)crit<-alpha/2 -if(dd==2)crit<-.014 -if(dd==3)crit<-.0085 -if(dd==4)crit<-.007 -if(dd==5)crit<-.006 -if(dd==6)crit<-.0045 -if(dd==10)crit<-.0023 -if(dd==15)crit<-.0016 -} -else{ -crit<-alpha/(2*dd) -} -icl<-round(crit*nboot) -icu<-round((1-crit)*nboot) -psihat<-matrix(0,ncol(con),4) -test<-matrix(0,ncol(con),3) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -dimnames(test)<-list(NULL,c("con.num","test","crit.val")) -for (d in 1:ncol(con)){ -test[d,1]<-d -psihat[d,1]<-d -testit<-lincon(x,con[,d],tr) -test[d,2]<-testvec[d] -temp<-sort(boot[d,]) -psihat[d,3]<-temp[icl] -psihat[d,4]<-temp[icu] -psihat[d,2]<-testit$psihat[1,2] -test[d,3]<-crit -} -list(psihat=psihat,test=test,con=con) -} - - - - -lintpb<-function(x,con=0,tr=.2,alpha=.05,nboot=NA){ -# -# Compute a 1-alpha confidence interval -# for a set of d linear contrasts -# involving trimmed means using the percentile bootstrap method. -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode or in a matrix. -# Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. -# If x has list mode, length(x)=the number of groups = J, say. -# -# Missing values are automatically removed. -# -# con is a J by d matrix containing the -# contrast coefficents of interest. -# If unspecified, all pairwise comparisons are performed. -# For example, con[,1]=c(1,1,-1,-1,0,0) -# and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first -# two trimmed means is -# equal to the sum of the second two, -# and (2) the difference between -# the first two is equal to the difference -# between the trimmed means of -# groups 5 and 6. -# -# -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -J<-length(x) -for(j in 1:J){ -xx<-x[[j]] -xx[[j]]<-xx[!is.na(xx)] # Remove any missing values. -} -Jm<-J-1 -d<-(J^2-J)/2 -if(sum(con^2)==0){ -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 #If con not specified do all pairwise comparisons -con[k,id]<-0-1 -}}} -if(nrow(con)!=length(x)){ -stop("The number of groups does not match the number of contrast coefficients.") -} -if(is.na(nboot)){ -nboot<-5000 -if(ncol(con)<=4)nboot<-2000 -} -m1<-matrix(0,nrow=J,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -paste("Working on group ",j) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -m1[j,]<-apply(data,1,mean,tr) -} -testb<-NA -boot<-matrix(0,ncol(con),nboot) -testvec<-NA -for (d in 1:ncol(con)){ -boot[d,]<-apply(m1,2,trimpartt,con[,d]) -# A vector of length nboot containing psi hat values -# and corresponding to the dth linear contrast -testb[d]<-sum((boot[d,]>0))/nboot -testvec[d]<-min(testb[d],1-testb[d]) -} -# -# Determine critical value -# -dd<-ncol(con) -if(alpha==.05){ -if(dd==1)crit<-alpha/2 -if(dd==2)crit<-.014 -if(dd==3)crit<-.0085 -if(dd==4)crit<-.007 -if(dd==5)crit<-.006 -if(dd==6)crit<-.0045 -if(dd==10)crit<-.0023 -if(dd==15)crit<-.0016 -} -else{ -crit<-alpha/(2*dd) -} -icl<-round(crit*nboot) -icu<-round((1-crit)*nboot) -psihat<-matrix(0,ncol(con),4) -test<-matrix(0,ncol(con),3) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -dimnames(test)<-list(NULL,c("con.num","test","crit.val")) -for (d in 1:ncol(con)){ -test[d,1]<-d -psihat[d,1]<-d -testit<-lincon(x,con[,d],tr) -test[d,2]<-testvec[d] -temp<-sort(boot[d,]) -psihat[d,3]<-temp[icl] -psihat[d,4]<-temp[icu] -psihat[d,2]<-testit$psihat[1,2] -test[d,3]<-crit -} -list(psihat=psihat,test=test,con=con) -} - -t2waypbg<-function(J,K,x,alpha=.05,nboot=NA,grp=NA,est=onestep,...){ -# -# Two-way ANOVA for independent groups based on -# robust measures of location -# and a percentile bootstrap method. - -# The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. -# -# Missing values are automatically removed. -# -if(is.data.frame(x))x=as.matrix(x) -JK<-J*K -if(is.matrix(x))x<-listm(x) -if(!is.na(grp)){ -yy<-x -for(j in 1:length(grp)) -x[[j]]<-yy[[grp[j]]] -} -if(!is.list(x))stop("Data must be stored in list mode or a matrix.") -for(j in 1:JK){ -xx<-x[[j]] -x[[j]]<-xx[!is.na(xx)] # Remove any missing values. -} -# -# Create the three contrast matrices -# - ij <- matrix(c(rep(1, J)), 1, J) - ik <- matrix(c(rep(1, K)), 1, K) - jm1 <- J - 1 - cj <- diag(1, jm1, J) - for(i in 1:jm1) - cj[i, i + 1] <- 0 - 1 - km1 <- K - 1 - ck <- diag(1, km1, K) - for(i in 1:km1) - ck[i, i + 1] <- 0 - 1 -conA<-t(kron(cj,ik)) -conB<-t(kron(ij,ck)) -conAB<-t(kron(cj,ck)) -ncon<-max(nrow(conA),nrow(conB),nrow(conAB)) -if(JK!=length(x)){ -print("Warning: The number of groups does not match") -print("the number of contrast coefficients.") -} -if(is.na(nboot)){ -nboot<-5000 -if(ncon<=4)nboot<-2000 -} -m1<-matrix(0,nrow=JK,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:JK){ -paste("Working on group ",j) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -m1[j,]<-apply(data,1,est,...) -} -bootA<-matrix(0,ncol(conA),nboot) -bootB<-matrix(0,ncol(conB),nboot) -bootAB<-matrix(0,ncol(conAB),nboot) -testA<-NA -testB<-NA -testAB<-NA -testvecA<-NA -testvecB<-NA -testvecAB<-NA -for (d in 1:ncol(conA)){ -bootA[d,]<-apply(m1,2,trimpartt,conA[,d]) -# A vector of length nboot containing psi hat values -# corresponding to the dth linear contrast -testA[d]<-sum((bootA[d,]>0))/nboot -testA[d]<-min(testA[d],1-testA[d]) -} -for (d in 1:ncol(conB)){ -bootB[d,]<-apply(m1,2,trimpartt,conB[,d]) -# A vector of length nboot containing psi hat values -# corresponding to the dth linear contrast -testB[d]<-sum((bootB[d,]>0))/nboot -testB[d]<-min(testB[d],1-testB[d]) -} -for (d in 1:ncol(conAB)){ -bootAB[d,]<-apply(m1,2,trimpartt,conAB[,d]) -# A vector of length nboot containing psi hat values -# corresponding to the dth linear contrast -testAB[d]<-sum((bootAB[d,]>0))/nboot -testAB[d]<-min(testAB[d],1-testAB[d]) -} -# -# Determine critical value -# -Jm<-J-1 -Km<-K-1 -JKm<-(J-1)*(K-1) -dvecA <- alpha/c(1:Jm) -dvecB <- alpha/c(1:Km) -dvecAB <- alpha/c(1:JKm) -testA<-(0 - 1) * sort(-2 * testA) -testB<-(0 - 1) * sort(-2 * testB) -testAB<-(0 - 1) * sort(-2 * testAB) -sig <- sum((testA < dvecA[1:Jm])) -if(sig > 0) -print("Significant result obtained for Factor A: Reject") -if(sig == 0) -print("No significant result Factor A: Fail to reject") -sig <- sum((testB < dvecB[1:Km])) -if(sig > 0) -print("Significant result obtained for Factor B: Reject") -if(sig == 0) -print("No significant result Factor B: Fail to reject") -sig <- sum((testAB < dvecAB[1:JKm])) -if(sig > 0) -print("Significant Interaction: Reject") -if(sig == 0) -print("No significant Interaction: Fail to reject") -list(testA=testA,crit.vecA=dvecA,testB=testB,crit.vecB=dvecB,testAB=testAB,crit.vecAB=dvecAB) -} - -regout<-function(x,y,regest=stsreg,plotit=TRUE,mbox=TRUE){ -# -# Check for regression outliers by fitting a -# a line to data using regest and then applying -# a boxplot rule to the residuals. -# mbox=T uses Carling's method -# mbox=F uses ideal fourths with conventional boxplot rules. -# -chk<-regest(x,y) -flag<-outbox(chk$residuals,mbox=mbox)$out.id -if(plotit){ -plot(x,y) -points(x[flag],y[flag],pch="o") -abline(chk$coef) -} -list(out.id=flag) -} - -stsregp1<-function(x,y,sc=pbvar,xout=FALSE,outfun=out,...){ -# -# Compute the S-type modification of -# the Theil-Sen regression estimator. -# Only a single predictor is allowed in this version -# -xy=elimna(cbind(x,y)) -p=ncol(as.matrix(x)) -if(p!=1)stop("Current version is limited to one predictor") -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -ord<-order(x) -xs<-x[ord] -ys<-y[ord] -vec1<-outer(ys,ys,"-") -vec2<-outer(xs,xs,"-") -v1<-vec1[vec2>0] -v2<-vec2[vec2>0] -slope<-v1/v2 -allvar<-NA -for(i in 1:length(slope))allvar[i]<-sc(y-slope[i]*x,...) -temp<-order(allvar) -coef<-0 -coef[2]<-slope[temp[1]] -coef[1]<-median(y)-coef[2]*median(x) -res<-y-coef[2]*x-coef[1] -list(coef=coef,residuals=res) -} - -stsreg<-function(x,y,xout=FALSE,outfun=outpro,iter=10,sc=pbvar,varfun=pbvar, -corfun=pbcor,plotit=FALSE,...){ -# -# Compute Theil-Sen regression estimator -# -# Use Gauss-Seidel algorithm -# when there is more than one predictor -# -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(ncol(x)==1){ -temp1<-stsregp1(x,y,sc=sc) -coef<-temp1$coef -res<-temp1$res -} -if(ncol(x)>1){ -for(p in 1:ncol(x)){ -temp[p]<-tsp1reg(x[,p],y)$coef[2] -} -res<-y-x%*%temp -alpha<-median(res) -r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) -tempold<-temp -for(it in 1:iter){ -for(p in 1:ncol(x)){ -r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] -temp[p]<-stsregp1(x[,p],r[,p],sc=sc)$coef[2] -} -alpha<-median(y-x%*%temp) -tempold<-temp -} -coef<-c(alpha,temp) -res<-y-x%*%temp-alpha -} -yhat<-y-res -stre=NULL -e.pow<-varfun(yhat)/varfun(y) -if(!is.na(e.pow)){ -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -e.pow=as.numeric(e.pow) -stre=sqrt(e.pow) -} -list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow) -} - - - -yuend<-function(x,y,tr=.2,alpha=.05){ -# -# Compare the trimmed means of two dependent random variables -# using the data in x and y. -# The default amount of trimming is 20% -# -# Any pair with a missing value is eliminated -# The function rm2miss allows missing values. -# -# A confidence interval for the trimmed mean of x minus the -# the trimmed mean of y is computed and returned in yuend$ci. -# The significance level is returned in yuend$p.value -# -# For inferences based on difference scores, use trimci -# -if(length(x)!=length(y))stop("The number of observations must be equal") -m<-cbind(x,y) -m<-elimna(m) -x<-m[,1] -y<-m[,2] -h1<-length(x)-2*floor(tr*length(x)) -q1<-(length(x)-1)*winvar(x,tr) -q2<-(length(y)-1)*winvar(y,tr) -q3<-(length(x)-1)*wincor(x,y,tr)$cov -df<-h1-1 -se<-sqrt((q1+q2-2*q3)/(h1*(h1-1))) -crit<-qt(1-alpha/2,df) -dif<-mean(x,tr)-mean(y,tr) -low<-dif-crit*se -up<-dif+crit*se -test<-dif/se -yuend<-2*(1-pt(abs(test),df)) -list(ci=c(low,up),p.value=yuend,est1=mean(x,tr),est2=mean(y,tr),dif=dif,se=se,teststat=test,n=length(x),df=df) -} - - -rmmcppbtm<-function(x,alpha=.05,con=0,tr=.2,grp=NA,nboot=NA){ -# -# Using the percentile bootstrap method, -# compute a .95 confidence interval for all linear contasts -# specified by con, a J by C matrix, where C is the number of -# contrasts to be tested, and the columns of con are the -# contrast coefficients. -# -# The trimmed means of dependent groups are being compared. -# By default, 20% trimming is used. -# -# nboot is the bootstrap sample size. If not specified, a value will -# be chosen depending on the number of contrasts there are. -# -# x can be an n by J matrix or it can have list mode -# -# For alpha=.05, some critical values have been -# determined via simulations and are used by this function; -# otherwise an approximation is used. -# -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -if(is.matrix(con)){ -if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") -}} -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))mat[,j]<-x[[j]] -} -if(is.matrix(x) && is.matrix(con)){ -if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") -mat<-x -} -if(is.matrix(x))mat<-x -if(!is.na(sum(grp)))mat<-mat[,grp] -mat<-elimna(mat) # Remove rows with missing values. -J<-ncol(mat) -Jm<-J-1 -if(sum(con^2)==0){ -d<-(J^2-J)/2 -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -d<-ncol(con) -if(is.na(crit) && tr != .2){ -print("A critical value must be specified when") -stop("the amount of trimming differs from .2") -} -if(is.na(nboot)){ -if(d<=3)nboot<-1000 -if(d==6)nboot<-2000 -if(d==10)nboot<-4000 -if(d==15)nboot<-8000 -if(d==21)nboot<-8000 -if(d==28)nboot<-10000 -} -n<-nrow(mat) -crit<-NA -if(alpha==.05){ -if(d==1)crit<-alpha/2 -if(d==3){ -crit<-.004 -if(n>=15)crit<-.006 -if(n>=30)crit<-.007 -if(n>=40)crit<-.008 -if(n>=100)crit<-.009 -} -if(d==6){ -crit<-.001 -if(n>=15)crit<-.002 -if(n>=20)crit<-.0025 -if(n>=30)crit<-.0035 -if(n>=40)crit<-.004 -if(n>=60)crit<-.0045 -} -if(d==10){ -crit<-.00025 -if(n>=15)crit<-.00125 -if(n>=20)crit<-.0025 -} -if(d==15){ -crit<-.0005 -if(n>=20)crit<-.0010 -if(n>=30)crit<-.0011 -if(n>=40)crit<-.0016 -if(n>=100)crit<-.0019 -} -if(d==21){ -crit<-.00025 -if(n>=20)crit<-.00037 -if(n>=30)crit<-.00075 -if(n>=40)crit<-.00087 -if(n>=60)crit<-.00115 -if(n>=100)crit<-.00125 -} -if(d==28){ -crit<-.0004 -if(n>=30)crit<-.0006 -if(n>=60)crit<-.0008 -if(n>=100)crit<-.001 -} -} -if(is.na(crit)){ -crit<-alpha/(2*d) -if(n<20)crit<-crit/2 -if(n<=10)crit<-crit/2 -} -icl<-ceiling(crit*nboot)+1 -icu<-ceiling((1-crit)*nboot) -connum<-ncol(con) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# data is an nboot by n matrix -xbars<-matrix(0,nboot,ncol(mat)) -psihat<-matrix(0,connum,nboot) -print("Taking bootstrap samples. Please wait.") -bvec<-bootdep(mat,tr,nboot) -# -# Now have an nboot by J matrix of bootstrap values. -# -test<-1 -for (ic in 1:connum){ -psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) -test[ic]<-sum((psihat[ic,]>0))/nboot -test[ic]<-min(test[ic],1-test[ic]) -} -print("Reminder: Test statistic must be less than critical value in order to reject.") -output<-matrix(0,connum,5) -dimnames(output)<-list(NULL,c("con.num","psihat","test","ci.lower","ci.upper")) -tmeans<-apply(mat,2,mean,trim=tr) -psi<-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(psihat[ic,]) -output[ic,4]<-temp[icl] -output[ic,5]<-temp[icu] -} -list(output=output,crit=crit,con=con) -} - -mcppb20<-function(x,crit=NA,con=0,tr=.2,alpha=.05,nboot=2000,grp=NA,WIN=FALSE, -win=.1){ -# -# Compute a 1-alpha confidence interval for a set of d linear contrasts -# involving trimmed means using the percentile bootstrap method. -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode. Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# -# By default, all pairwise comparisons are performed, but contrasts -# can be specified with the argument con. -# The columns of con indicate the contrast coefficients. -# Con should have J rows, J=number of groups. -# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first two trimmed means is -# equal to the sum of the second two, and (2) the difference between -# the first two is equal to the difference between the trimmed means of -# groups 5 and 6. -# -# The default number of bootstrap samples is nboot=2000 -# -# -con<-as.matrix(con) -if(is.matrix(x)){ -xx<-list() -for(i in 1:ncol(x)){ -xx[[i]]<-x[,i] -} -x<-xx -} -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] -x<-xx -} -J<-length(x) -tempn<-0 -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -} -Jm<-J-1 -d<-ifelse(sum(con^2)==0,(J^2-J)/2,ncol(con)) -if(is.na(crit) && tr != .2){ -print("A critical value must be specified when") -stop("the amount of trimming differs from .2") -} -if(WIN){ -if(tr < .2){ -print("Warning: When Winsorizing, the amount") -print("of trimming should be at least .2") -} -if(win > tr)stop("Amount of Winsorizing must <= amount of trimming") -if(min(tempn) < 15){ -print("Warning: Winsorizing with sample sizes") -print("less than 15 can result in poor control") -print("over the probability of a Type I error") -} -for (j in 1:J){ -x[[j]]<-winval(x[[j]],win) -} -} -if(is.na(crit)){ -if(d==1)crit<-alpha/2 -if(d==2 && alpha==.05 && nboot==1000)crit<-.014 -if(d==2 && alpha==.05 && nboot==2000)crit<-.014 -if(d==3 && alpha==.05 && nboot==1000)crit<-.009 -if(d==3 && alpha==.05 && nboot==2000)crit<-.0085 -if(d==3 && alpha==.025 && nboot==1000)crit<-.004 -if(d==3 && alpha==.025 && nboot==2000)crit<-.004 -if(d==3 && alpha==.01 && nboot==1000)crit<-.001 -if(d==3 && alpha==.01 && nboot==2000)crit<-.001 -if(d==4 && alpha==.05 && nboot==2000)crit<-.007 -if(d==5 && alpha==.05 && nboot==2000)crit<-.006 -if(d==6 && alpha==.05 && nboot==1000)crit<-.004 -if(d==6 && alpha==.05 && nboot==2000)crit<-.0045 -if(d==6 && alpha==.025 && nboot==1000)crit<-.002 -if(d==6 && alpha==.025 && nboot==2000)crit<-.0015 -if(d==6 && alpha==.01 && nboot==2000)crit<-.0005 -if(d==10 && alpha==.05 && nboot<=2000)crit<-.002 -if(d==10 && alpha==.05 && nboot==3000)crit<-.0023 -if(d==10 && alpha==.025 && nboot<=2000)crit<-.0005 -if(d==10 && alpha==.025 && nboot==3000)crit<-.001 -if(d==15 && alpha==.05 && nboot==2000)crit<-.0016 -if(d==15 && alpha==.025 && nboot==2000)crit<-.0005 -if(d==15 && alpha==.05 && nboot==5000)crit<-.0026 -if(d==15 && alpha==.025 && nboot==5000)crit<-.0006 -} -if(is.na(crit) && alpha==.05)crit<-0.0268660714*(1/d)-0.0003321429 -if(is.na(crit))crit<-alpha/(2*d) -if(d> 10 && nboot <5000){ -print("Warning: Suggest using nboot=5000") -print("when the number of contrasts exceeds 10.") -} -icl<-round(crit*nboot)+1 -icu<-round((1-crit)*nboot) -if(sum(con^2)==0){ -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c("con.num","psihat","se","ci.lower", -"ci.upper","p-value")) -if(nrow(con)!=length(x)){ -print("The number of groups does not match") -stop("the number of contrast coefficients.") -} -bvec<-matrix(NA,nrow=J,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -paste("Working on group ",j) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group -} -test<-NA -for (d in 1:ncol(con)){ -top<-0 -for (i in 1:J){ -top<-top+con[i,d]*bvec[i,] -} -test[d]<-(sum(top>0)+.5*sum(top==0))/nboot -test[d]<-min(test[d],1-test[d]) -top<-sort(top) -psihat[d,4]<-top[icl] -psihat[d,5]<-top[icu] -} -for (d in 1:ncol(con)){ -psihat[d,1]<-d -testit<-lincon(x,con[,d],tr,pr=FALSE) -psihat[d,6]<-2*test[d] -psihat[d,2]<-testit$psihat[1,2] -psihat[d,3]<-testit$test[1,4] -} -list(psihat=psihat,crit.p.value=2*crit,con=con) -} - -comvar2d<-function(x,y,SEED=TRUE){ -# -# Compare the variances of two dependent groups. -# -nboot<-599 -m<-cbind(x,y) -m<-elimna(m) # Remove missing values -U<-m[,1]-m[,2] -V<-m[,1]+m[,2] -ci<-pcorb(U,V,SEED=SEED)$ci -list(n=nrow(m),ci=ci) -} -mom<-function(x,bend=2.24,na.rm=TRUE){ -# -# Compute MOM-estimator of location. -# The default bending constant is 2.24 -# -if(na.rm)x<-x[!is.na(x)] #Remove missing values -flag1<-(x>median(x)+bend*mad(x)) -flag2<-(xnull.value)+.5*mean(bvec==null.value) -pv=2*min(c(pv,1-pv)) -list(ci=c(bvec[low],bvec[up]),p.value=pv,est.mom=est) -} - - -rmanogsub<-function(isub,x,est=onestep,...){ -tsub <- est(x[isub],...) -tsub -} - -bd1way1<-function(isub,xcen,est,misran,...){ -# -# Compute test statistic for bd1way -# -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# xcen is an n by J matrix containing the input data -# -val<-vector("numeric") -for (j in 1:ncol(xcen))val[j]<-est(xcen[isub,j],na.rm=misran,...) -bd1way1<-(length(val)-1)*var(val) -bd1way1 -} - - -bicovm<-function(x){ -# -# compute a biweight midcovariance matrix for the vectors of -# observations in x, where x is assumed to have list mode, or -# x is an n by p matrix -# -if(is.matrix(x)){ -mcov<-matrix(0,ncol(x),ncol(x)) -mcor<-matrix(0,ncol(x),ncol(x)) -for (i in 1:ncol(x)){ -for (j in 1:ncol(x))mcov[i,j]<-bicov(x[,i],x[,j]) -} -} -if(is.list(x)){ -mcov<-matrix(0,length(x),length(x)) -mcor<-matrix(0,length(x),length(x)) -for (i in 1:length(x)){ -for (j in 1:length(x))mcov[i,j]<-bicov(x[[i]],x[[j]]) -} -} -for (i in 1:ncol(mcov)){ -for (j in 1:ncol(mcov))mcor[i,j]<-mcov[i,j]/sqrt(mcov[i,i]*mcov[j,j]) -} -list(mcov=mcov,mcor=mcor) -} - -bicovM<-function(x){ -M=bicovm(x)$mcov -M -} - -apdis<-function(m,est=sum,...){ -# -# For bivariate data, -# compute distance between each pair -# of points and measure depth of a point -# in terms of its distance to all -# other points -# -# m is an n by 2 matrix -# (In this version, ncol(m)=2 only, for general -# case, use apgdis -# -m<-elimna(m) # eliminate any missing values -disx<-outer(m[,1],m[,1],"-") -disy<-outer(m[,2],m[,2],"-") -temp<-sqrt(disx^2+disy^2) -dis<-apply(temp,1,est,...) -dis -temp2<-order(dis) -center<-m[temp2[1],] -list(center=center,distance=dis) -} - -onesampb<-function(x,est=onestep,alpha=.05,nboot=2000,SEED=TRUE,nv=0,null.value=NULL,...){ -# -# Compute a bootstrap, .95 confidence interval for the -# measure of location corresponding to the argument est. -# By default, a one-step -# M-estimator of location based on Huber's Psi is used. -# The default number of bootstrap samples is nboot=500 -# -# nv=null value when computing a p-value -# -if(!is.null(null.value))nv=null.value -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -x=elimna(x) -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,est,...) -bvec<-sort(bvec) -low<-round((alpha/2)*nboot) -up<-nboot-low -low<-low+1 -pv=mean(bvec>nv)+.5*mean(bvec==nv) -pv=2*min(c(pv,1-pv)) -estimate=est(x,...) -list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv) -} - - -pdep<-function(x,y,alpha=.05){ -# -# For two dependent variables, x and y, -# estimate p=P(X.5)pvec[i]<-1-pvec[i] -regci[i,1]<-bsort[ilow] -regci[i,2]<-bsort[ihi] -se[i]<-sqrt(var(bvec[i,])) -} -pvec<-2*pvec -list(regci=regci,p.value=pvec,se=se) -} - - -pbcan<-function(x,nboot=1000,grp=NA,est=onestep,...){ -# -# Test the hypothesis that J independent groups have -# equal measures of location using the percentile bootstrap method. -# in conjunction with a partially centering technique. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to an M-estimator -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are allowed. -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] -x<-xx -} -J<-length(x) -tempn<-0 -vecm<-0 -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -vecm[j]<-est(x[[j]],...) -} -xcen<-list() -flag<-rep(TRUE,J) -for(j in 1:J){ -flag[j]<-FALSE -temp<-mean(vecm[flag]) -xcen[[j]]<-x[[j]]-temp -flag[j]<-T -} -icrit<-round((1-alpha)*nboot) -bvec<-matrix(NA,nrow=J,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -paste("Working on group ",j) -data<-matrix(sample(xcen[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group -} -vvec<-NA -for(j in 1:J){ -vvec[j]<-sum((bvec[j,]-vecm[j])^2)/(nboot-1) -} -dis<-NA -for(i in 1:nboot){ -dis[i]<-sum((bvec[,i]-vecm)^2/vvec) -} -tvec<-sum((0-vecm)^2/vvec) -dis<-sort(dis) -print(tvec) -print(dis[icrit]) -print(vecm) -sig<-1-sum((tvec>=dis))/nboot -list(p.value=sig) -} - -rmaseq<-function(x,est=onestep,alpha=.05,grp=NA,nboot=NA,...){ -# -# Using the percentile bootstrap method, -# test hypothesis that all marginal distributions -# among J dependent groups -# have a common measure of location. -# This is done by using a sequentially rejective method -# of J-1 pairs of groups. -# That is, compare group 1 to group 2, group 2 to group 3, etc. -# -# By default, onestep M-estimator is used. -# -# nboot is the bootstrap sample size. If not specified, a value will -# be chosen depending on the number of groups -# -# x can be an n by J matrix or it can have list mode -# grp can be used to specify a subset of the groups for analysis -# -# the argument ... can be used to specify options associated -# with the argument est. -# -if(!is.list(x) && !is.matrix(x)){ -stop("Data must be stored in a matrix or in list mode.") -} -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))mat[,j]<-x[[j]] -} -if(is.matrix(x))mat<-x -mat<-elimna(mat) # Remove rows with missing values. -J<-ncol(mat) -Jm<-J-1 -con<-matrix(0,ncol=Jm,nrow=J) -for(j in 1:Jm){ -jp<-j+1 -for(k in j:jp){ -con[j,j]<-1 -con[jp,j]<-0-1 -}} -rmmcp(x,est=est,alpha=alpha,con=con,nboot=nboot,...) -} - -rmanog<-function(x,alpha=.05,est=onestep,grp=NA,nboot=NA,...){ -# -# Using the percentile bootstrap method, -# test the hypothesis that all differences among J -# dependent groups have a -# measure of location equal to zero. -# That is, if -# Dij is the difference between ith observations -# in groups j and j+1, -# and Dij has measure of location muj -# the goal is to test -# H0: mu1=mu2=...=0 -# -# By default, an M-estimator is used. -# -# nboot is the bootstrap sample size. If not specified, a value will -# be chosen depending on the number of groups -# -# x can be an n by J matrix or it can have list mode -# grp can be used to specify a subset of the groups for analysis -# -# the argument ... can be used to specify options associated -# with the argument est. -# -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))mat[,j]<-x[[j]] -} -if(is.matrix(x))mat<-x -mat<-elimna(mat) # Remove rows with missing values. -J<-ncol(mat) -Jm<-J-1 -jp<-0 -dif<-matrix(NA,nrow=nrow(mat),ncol=Jm) -for(j in 1:Jm){ -jp<-j+1 -dif[,j]<-mat[,j]-mat[,jp] -} -if(is.na(nboot)){ -nboot<-5000 -if(Jm <= 4)nboot<-1000 -} -print("Taking bootstrap samples. Please wait.") -data <- matrix(sample(nrow(mat), size = nrow(mat) * nboot, replace = T), - nrow = nboot) -bvec <- matrix(NA, ncol = ncol(dif), nrow = nboot) - for(j in 1:ncol(dif)) { - temp <- dif[, j] - bvec[, j] <- apply(data, 1., rmanogsub, temp, est) - } #bvec is an nboot by Jm matrix -testvec<-NA -for(j in 1:Jm){ -testvec[j]<-sum(bvec[,j]>0)/nboot -if(testvec[j] > .5)testvec[j]<-1-testvec[j] -} -critvec<-alpha/c(1:Jm) -#testvec<-2*testvec[order(-1*testvec)] -test<-2*testvec -test.sort<-order(-1*test) -chk<-sum((test.sort <= critvec)) -if(chk > 0)print("Significant difference found") -output<-matrix(0,Jm,6) -dimnames(output)<-list(NULL,c("con.num","psihat","sig","crit.sig","ci.lower","ci.upper")) -tmeans<-apply(dif,2,est,...) -psi<-1 -output[,2]<-tmeans -for (ic in 1:Jm){ -output[ic,1]<-ic -output[ic,3]<-test[ic] -crit<-critvec[ic] -output[test.sort[ic],4]<-crit -} -for(ic in 1:Jm){ -icrit<-output[ic,4] -icl<-round(icrit*nboot/2)+1 -icu<-round((1-icrit/2)*nboot) -temp<-sort(bvec[,ic]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -list(output=output) -} - -ecor<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,outkeep=FALSE,outfun=outmgvf){ -# -# Estimate the explanatory correlation between x and y -# -# It is assumed that x is a vector or a matrix having one column only -xx<-elimna(cbind(x,y)) # Remove rows with missing values -x<-xx[,1] -y<-xx[,2] -x<-as.matrix(x) -if(ncol(x) > 1)stop("x must be a vector or matrix with one column") -flag<-rep(TRUE,nrow(x)) -if(!outkeep){ -temp<-outfun(cbind(x,y))$out.id -flag[temp]<-FALSE -} -coef<-regfun(x,y)$coef -ip<-ncol(x)+1 -yhat<-x %*% coef[2:ip] + coef[1] -if(pcor)epow2<-cor(yhat[flag],y[flag])^2 -if(!pcor)epow2<-corfun(yhat[flag],y[flag])$cor^2 -ecor<-sqrt(epow2)*sign(coef[2]) -ecor -} -ocor<-function(x,y,corfun=pbcor,outfun=outmgvf,pcor=FALSE,plotit=FALSE){ -# -# Compute a correlation when outliers are ignored. -# -xx<-elimna(cbind(x,y)) # Remove rows with missing values -x<-xx[,1] -y<-xx[,2] -flag<-rep(TRUE,length(x)) -temp<-outfun(cbind(x,y),plotit=plotit)$out.id -flag[temp]<-FALSE -if(pcor)ocor<-cor(x[flag],y[flag]) -if(!pcor)ocor<-corfun(x[flag],y[flag])$cor -list(cor=ocor) -} - - -rmdzero<-function(x,est=mom,grp=NA,nboot=500,SEED=TRUE,...){ -# -# Do ANOVA on dependent groups -# using # depth of zero among bootstrap values -# based on difference scores. -# -# The data are assumed to be stored in x in list mode -# or in a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, columns correspond to groups. -# -# grp is used to specify some subset of the groups, if desired. -# By default, all J groups are used. -# -# The default number of bootstrap samples is nboot=500 -# -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))mat[,j]<-x[[j]] -} -if(is.matrix(x))mat<-x -if(!is.na(grp[1])){ -mat<-mat[,grp] -} -mat<-elimna(mat) # Remove rows with missing values. -J<-ncol(mat) -jp<-0 -Jall<-(J^2-J)/2 -dif<-matrix(NA,nrow=nrow(mat),ncol=Jall) -ic<-0 -for(j in 1:J){ -for(k in 1:J){ -if(jcrit,1,0) -id<-vec[chk==1] -keep<-vec[chk==0] -x<-as.matrix(x) -if(plotit && ncol(x)==2){ -plot(x[,1],x[,2],xlab="X",ylab="Y",type="n") -flag<-rep(TRUE,nrow(x)) -flag[id]<-FALSE -points(x[flag,1],x[flag,2]) -if(sum(chk)!=0)points(x[!flag,1],x[!flag,2],pch=outsym) -} -if(SEED) { - assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) -} -list(out.id=id,keep.id=keep,dis=dis,crit=crit) -} - -rundis<-function(x,y,est=onestep,plotit=TRUE,pyhat=FALSE,...){ -# -# Do a smooth where x is discrete with a -# relatively small number of values. -# -temp<-sort(unique(x)) -yhat<-NA -for(i in 1:length(temp)){ -flag<-(temp[i]==x) -yhat[i]<-est(y[flag],...) -} -plot(x,y) -lines(temp,yhat) -output<-"Done" -if(pyhat)output<-yhat -output -} - -bdm<-function(x,grp=NA){ -# -# Perform the Brunner, Dette, Munk rank-based ANOVA -# (JASA, 1997, 92, 1494--1502) -# -# x can be a matrix with columns corresponding to groups -# or it can have list mode. -# -if(is.matrix(x))x<-listm(x) -J<-length(x) -xx<-list() -if(is.na(grp[1]))grp<-c(1:J) -for(j in 1:J)xx[[j]]<-x[[grp[j]]] -Ja<-matrix(1,J,J) -Ia<-diag(1,J) -Pa<-Ia-Ja/J -cona<-Pa -outA<-bdms1(xx,cona) -outA -} -cori<-function(x,y,z,pt=median(z),fr=.8,est=onestep,corfun=pbcor,testit=FALSE, -nboot=599,sm=FALSE,xlab="X",ylab="Y",...){ -# -# Split the data according to whether z is < or > pt, then -# use runmean2g to plot a smooth of the regression -# lines corresponding to these two groups. -# -# If testit=T, the hypothesis of equal correlations is tested using the -# the R function twocor -# -m<-cbind(x,y,z) -m<-elimna(m) -x<-m[,1] -y<-m[,2] -z<-m[,3] -flag<-(z0] -v2<-vec2[vec2>0] -slope<-v1/v2 -tmin<-wrregfun(slope[1],x,y) -ikeep<-1 -for(i in 2:length(slope)){ -tryit<-wrregfun(slope[i],x,y) -if(tryit1){ -for(p in 1:ncol(x)){ -temp[p]<-wsp1reg(x[,p],y)$coef[2] -} -res<-y-x%*%temp -alpha<-median(res) -r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) -tempold<-temp -for(it in 1:iter){ -for(p in 1:ncol(x)){ -r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] -temp[p]<-wsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2] -} -alpha<-median(y-x%*%temp) -if(max(abs(tempold-temp))<.0001)break -tempold<-temp -} -coef<-c(alpha,temp) -res<-y-x%*%temp-alpha -} -list(coef=coef,residuals=res) -} - -mgvar<-function(m,se=FALSE,op=0,cov.fun=covmve,SEED=TRUE){ -# -# Find the center of a scatterplot, add point that -# increases the generalized variance by smallest amount -# continue for all points -# return the generalized variance -# values corresponding to each point. -# The central values and point(s) closest to it get NA -# -# op=0 find central points using pairwise differences -# op!=0 find central points using measure of location -# used by cov.fun -# -# choices for cov.fun include -# covmve -# covmcd -# tbs (Rocke's measures of location -# rmba (Olive's median ball algorithm) -# -if(op==0)temp<-apgdis(m,se=se)$distance -if(op!=0)temp<-out(m,cov.fun=cov.fun,plotit=FALSE,SEED=SEED)$dis -flag<-(temp!=min(temp)) -temp2<-temp -temp2[!flag]<-max(temp) -flag2<-(temp2!=min(temp2)) -flag[!flag2]<-F -varvec<-NA -while(sum(flag)>0){ -ic<-0 -chk<-NA -remi<-NA -for(i in 1:nrow(m)){ -if(flag[i]){ -ic<-ic+1 -chk[ic]<-gvar(rbind(m[!flag,],m[i,])) -remi[ic]<-i -}} -sor<-order(chk) -k<-remi[sor[1]] -varvec[k]<-chk[sor[1]] -flag[k]<-F -} -varvec -} - -outmgv<-function(x,y=NULL,plotit=TRUE,outfun=outbox,se=TRUE,op=1,ndir=1000, -cov.fun=rmba,xlab="X",ylab="Y",SEED=TRUE,STAND=FALSE,...){ -# -# Check for outliers using mgv method -# -# NOTE: if columns of the input matrix are reordered, this can -# have an effect on the results due to rounding error when calling -# the R function eigen. -# -# (Argument STAND is included simply to avoid programming issues when outmgv is called by other functions.) -# -if(is.null(y[1]))m<-x -if(!is.null(y[1]))m<-cbind(x,y) -m=elimna(m) -m=as.matrix(m) -nv=nrow(m) -temp<-mgvar(m,se=se,op=op,cov.fun=cov.fun,SEED=SEED) -temp[is.na(temp)]<-0 -if(ncol(m)==1){ -temp2=outpro(m) -nout=temp2$n.out -keep=temp2$keep -temp2=temp2$out.id -} -if(ncol(m)>1){ -if(ncol(m)==2)temp2<-outfun(temp,...) -if(ncol(m)>2){ -temp2<-outbox(temp,mbox=TRUE,gval=sqrt(qchisq(.975,ncol(m)))) -} -if(plotit && ncol(m)==2){ -x<-m[,1] -y<-m[,2] -plot(x,y,type="n",xlab=xlab,ylab=ylab) -points(x[temp2$keep],y[temp2$keep],pch="*") -if(!is.null(temp2$out.id))points(x[temp2$out.id],y[temp2$out.id],pch="o") - -d=prodepth(m,ndir=ndir,SEED=SEED) -dis=1/d -id.cen=which(d==max(d)) -if(length(id.cen)==1)center=m[id.cen,] -else -center=apply(m[id.cen,],2,mean) -points(center[1],center[2],pch="+") -flag=which(d>=median(d)) -xx<-m[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -nout=0 -if(!is.na(temp2[1]))nout=length(temp2$out.id) -} -list(n=nv,n.out=nout,out.id=temp2$out.id,keep=temp2$keep) -} - -outmgvf<-function(x,y=NA,plotit=TRUE,outfun=outbox,se=TRUE,ndir=1000,SEED=TRUE,...){ -# -# Check for outliers using inward mgv method -# This method is faster than outmgv. -# -if(is.na(y[1]))m<-x -if(!is.na(y[1]))m<-cbind(x,y) -m<-elimna(m) # eliminate any rows with missing values -if(se){ -for(i in 1:ncol(m))m[,i]<-(m[,i]-median(m[,i]))/mad(m[,i]) -} -iflag<-rep(TRUE,nrow(m)) -dval<-0 -for(i in 1:nrow(m)){ -dval[i]<-gvar(m[-i,]) -} -temp2<-outfun(dval,...) -if(plotit && ncol(m)==2){ -flag=which(dval<=median(dval)) -x<-m[,1] -y<-m[,2] -plot(x,y,type="n",xlab="X",ylab="Y") -points(x[temp2$keep],y[temp2$keep],pch='*') -d=prodepth(m,ndir=ndir,SEED=SEED) -dis=1/d -id.cen=which(d==max(d)) -center=apply(m[id,],2,mean) -points(center[1],center[2],pch="+") -flag=which(d>=median(d)) -xx<-m[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -if(!is.null(temp2$out.id))points(x[temp2$out.id],y[temp2$out.id],pch="o") -} -list(n=temp2$n,out.id=temp2$out.id,keep=temp2$keep,out.val=m[temp2$out.id,],depth.values=dval) -} - -epow<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,outkeep=FALSE,outfun=outmgvf,varfun=pbvar,op=TRUE){ -# -# Estimate the explanatory power between x and y -# -xx<-elimna(cbind(x,y)) -pval<-1 -if(is.matrix(x))pval<-ncol(x) -pp<-pval+1 -x<-xx[,1:pval] -y<-xx[,pp] -x<-as.matrix(x) -flag<-rep(TRUE,nrow(x)) -temp<-regfun(x,y) -ip<-ncol(x)+1 -yhat<-y-temp$res -if(!outkeep){ -temp<-outfun(cbind(x,y),plotit=FALSE)$out.id -flag[temp]<-FALSE -} -epow1<-varfun(yhat[flag])/varfun(y[flag]) -if(pcor)epow2<-cor(yhat[flag],y[flag])^2 -if(!pcor)epow2<-corfun(yhat[flag],y[flag])$cor^2 -if(op)est<-epow2 -if(!op)est<-epow1 -est -} - -cmanova<-function(J,K,x,grp=c(1:JK),JK=J*K){ -# -# Perform the Choi and Marden -# multivariate one-way rank-based ANOVA -# (Choi and Marden, JASA, 1997, 92, 1581-1590. -# -# x can be a matrix with columns corresponding to groups -# or it can have list mode. -# -# Have a J by K design with J independent levels and K dependent -# measures -# -# -x=elimna(x) -if(is.matrix(x))x<-listm(x) -xx<-list() -nvec<-NA -jk<-0 -for(j in 1:J){ -for(k in 1:K){ -jk<-jk+1 -xx[[jk]]<-x[[grp[jk]]] -if(k==1)nvec[j]<-length(xx[[jk]]) -}} -N<-sum(nvec) -RVALL<-matrix(0,nrow=N,K) -x<-xx -jk<-0 -rmean<-matrix(NA,nrow=J,ncol=K) -for(j in 1:J){ -RV<-matrix(0,nrow=nvec[j],ncol=K) -jk<-jk+1 -temp1<-matrix(x[[jk]],ncol=1) -for(k in 2:K){ -jk<-jk+1 -temp1<-cbind(temp1,x[[jk]]) -} -X<-temp1 -if(j==1)XALL<-X -if(j>1)XALL<-rbind(XALL,X) -n<-nvec[j] -for(i in 1:n){ -for (ii in 1:n){ -temp3<-sqrt(sum((X[i,]-X[ii,])^2)) -if(temp3 != 0)RV[i,]<-RV[i,]+(X[i,]-X[ii,])/temp3 -} -RV[i,]<-RV[i,]/nvec[j] -if(j==1 && i==1)sighat<-RV[i,]%*%t(RV[i,]) -if(j>1 || i>1)sighat<-sighat+RV[i,]%*%t(RV[i,]) -} -} -# Assign ranks to pooled data and compute R bar for each group -for(i in 1:N){ -for (ii in 1:N){ -temp3<-sqrt(sum((XALL[i,]-XALL[ii,])^2)) -if(temp3 != 0)RVALL[i,]<-RVALL[i,]+(XALL[i,]-XALL[ii,])/temp3 -} -RVALL[i,]<-RVALL[i,]/N -} -bot<-1-nvec[1] -top<-0 -for(j in 1:J){ -bot<-bot+nvec[j] -top<-top+nvec[j] -flag<-c(bot:top) -rmean[j,]<-apply(RVALL[flag,],2,mean) -} -sighat<-sighat/(N-J) -shatinv<-solve(sighat) -KW<-0 -for(j in 1:J){ -KW<-KW+nvec[j]*t(rmean[j,])%*%shatinv%*%rmean[j,] -} -df<-K*(J-1) -sig.level<-1-pchisq(KW,df) -list(test.stat=KW[1,1],df=df,p.value=sig.level) -} - - -signt<-function(x,y=NULL,dif=NULL,alpha=.05,method='AC',AUTO=TRUE,PVSD=FALSE){ -# -# Do a sign test on data in x and y -# If y=NA, assume x is a matrix with -# two columns or has list mode. -# -# Returns n, the original sample size -# N, number of paired observations that are not equal to one another. -# phat, an estimate of p, the probability that xnullval || chkit[2]nullval || chkit[2]nullval || chkit[2] 28)qval<-2.383904*connum^.1-.202 -aval<-4*(1-pnorm(qval)) -if(J==2 && K==2)aval<-.05 -if(J==5 && K==2)aval<-2*(1-pnorm(qval)) -if(J==3 && K==2)aval<-3*(1-pnorm(qval)) -if(J==4 && K==2)aval<-3*(1-pnorm(qval)) -if(J==2 && K==3)aval<-3*(1-pnorm(qval)) -for (j in 1:J){ -for (jj in 1:J){ -if(j=80, hochberg's method is used. -# -if(!is.null(y[1]))x<-cbind(x,y) -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -if(is.matrix(con)){ -if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") -}} -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matl(x) -} -if(is.matrix(x) && is.matrix(con)){ -if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") -mat<-x -} -if(is.matrix(x))mat<-x -if(!is.na(sum(grp)))mat<-mat[,grp] -x<-mat -mat<-elimna(mat) # Remove rows with missing values. -x<-mat -J<-ncol(mat) -n=nrow(mat) -if(n>=80)hoch=TRUE -Jm<-J-1 -if(sum(con^2)==0){ -d<-(J^2-J)/2 -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -d<-ncol(con) -if(is.na(nboot)){ -nboot<-5000 -if(d<=10)nboot<-3000 -if(d<=6)nboot<-2000 -if(d<=4)nboot<-1000 -} -n<-nrow(mat) -crit.vec<-alpha/c(1:d) -connum<-ncol(con) -# Create set of differences based on contrast coefficients -xx<-x%*%con -xx<-as.matrix(xx) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -psihat<-matrix(0,connum,nboot) -bvec<-matrix(NA,ncol=connum,nrow=nboot) -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -# data is an nboot by n matrix -if(ncol(xx)==1){ -for(ib in 1:nboot)psihat[1,ib]<-est(xx[data[ib,]],...) -} -if(ncol(xx)>1){ -for(ib in 1:nboot)psihat[,ib]<-apply(elimna(xx[data[ib,],]),2,est,...) -} -# -# Now have an nboot by connum matrix of bootstrap values. -# -test<-1 -icl<-round(alpha*nboot/2)+1 -icu<-nboot-icl-1 -cimat=matrix(NA,nrow=connum,ncol=2) -for (ic in 1:connum){ -test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot -test[ic]<-min(test[ic],1-test[ic]) -temp=sort(psihat[ic,]) -cimat[ic,1]=temp[icl] -cimat[ic,2]=temp[icu] -} -test<-2*test -ncon<-ncol(con) -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvec[2]<-alpha/2 -} -if(hoch)dvec<-alpha/(2*c(1:ncon)) -dvec<-2*dvec -if(plotit && connum==1){ -plot(c(psihat[1,],0),xlab="",ylab="Est. Difference") -points(psihat[1,]) -abline(0,0) -} -temp2<-order(0-test) -ncon<-ncol(con) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) -tmeans<-apply(xx,2,est,...) -psi<-1 -output[temp2,4]<-zvec -for (ic in 1:ncol(con)){ -output[ic,2]<-tmeans[ic] -output[ic,1]<-ic -output[ic,3]<-test[ic] -output[ic,5:6]<-cimat[ic,] -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - - -bdms1<-function(x,con){ -# This function is used by bdm -# -# Pool all data and rank -pool<-x[[1]] -JK<-length(x) -for (j in 2:JK)pool<-c(pool,x[[j]]) -N<-length(pool) -rval<-rank(pool) -rvec<-list() -up<-length(x[[1]]) -rvec[[1]]<-rval[1:up] -rbar<-mean(rvec[[1]]) -nvec<-length(rvec[[1]]) -for(j in 2:JK){ -down<-up+1 -up<-down+length(x[[j]])-1 -rvec[[j]]<-rval[down:up] -nvec[j]<-length(rvec[[j]]) -rbar[j]<-mean(rvec[[j]]) -} -phat<-(rbar-.5)/N -phat<-as.matrix(phat) -svec<-NA -for(j in 1:JK)svec[j]<-sum((rvec[[j]]-rbar[j])^2)/(nvec[j]-1) -svec<-svec/N^2 -VN<-N*diag(svec/nvec) -top<-con[1,1]*sum(diag(VN)) -Ftest<-N*(t(phat)%*%con%*%phat)/top -nu1<-con[1,1]^2*sum(diag(VN))^2/sum(diag(con%*%VN%*%con%*%VN)) -lam<-diag(1/(nvec-1)) -nu2<-sum(diag(VN))^2/sum(diag(VN%*%VN%*%lam)) -sig<-1-pf(Ftest,nu1,nu2) -list(F=Ftest,nu1=nu1,nu2=nu2,q.hat=phat,p.value=sig) -} - -r1mcp<-function(x,alpha=.05,bhop=FALSE){ -# -# Do all pairwise comparisons using a modification of -# the Brunner, Dette and Munk (1997) rank-based method. -# FWE is controlled using Rom's technique. -# -# Setting bhop=T, FWE is controlled using the -# Benjamini-Hochberg Method. -# -# The data are assumed to be stored in x in list mode or in a matrix. -# -# Missing values are automatically removed. -# - if(is.matrix(x))x <- listm(x) - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") -J<-length(x) - for(j in 1:J) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] # Remove missing values - } -# -CC<-(J^2-J)/2 -# Determine critical values -ncon<-CC -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -output<-matrix(0,CC,5) -dimnames(output)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit")) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j < jj){ -ic<-ic+1 -output[ic,1]<-j -output[ic,2]<-jj -temp<-bdm(x[c(j,jj)]) -output[ic,3]<-temp$F -output[ic,4]<-temp$p.value -}}} -temp2<-order(0-output[,4]) -output[temp2,5]<-dvec[1:length(temp2)] -list(output=output) -} - - -tamhane<-function(x,x2=NA,cil=NA,crit=NA){ -# -# First stage of Tamhane's method -# -# x contains first stage data -# x2 contains second stage data -# -# cil is the desired length of the confidence intervals. -# That is, cil is the distance between the upper and lower -# ends of the confidence intervals. -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -J<-length(x) -tempn<-0 -svec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -svec[j]<-var(temp) -} -A<-sum(1/(tempn-1)) -df<-J/A -paste("The degrees of freedom are:",df) -if(is.na(crit))stop("Enter a critical value and reexecute this function") -if(is.na(cil))stop("To proceed, you must specify the length of the confidence intervals.") -d<-(cil/(2*crit))^2 -n.vec<-NA -for(j in 1:J){ -n.vec[j]<-max(tempn[j]+1,floor(svec[j]/d)+1) -} -ci.mat<-NA -if(!is.na(x2[1])){ -if(is.matrix(x2))x2<-listm(x2) -if(!is.list(x2))stop("Data must be stored in list mode or in matrix mode.") -TT<-NA -U<-NA -J<-length(x) -nvec2<-NA -for(j in 1:length(x)){ -nvec2[j]<-length(x2[[j]]) -if(nvec2[j] 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -Fac.A<-matrix(0,CC,5) -dimnames(Fac.A)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit")) -mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j < jj){ -ic<-ic+1 -Fac.A[ic,1]<-j -Fac.A[ic,2]<-jj -temp<-bdm2way(2,K,x[c(mat[j,],mat[jj,])]) -#Fac.A[ic,3]<-temp$outputA$F -#Fac.A[ic,4]<-temp$outputA$sig -Fac.A[ic,3]<-temp$A.F -Fac.A[ic,4]<-temp$p.valueA -}}} -temp2<-order(0-Fac.A[,4]) -Fac.A[temp2,5]<-dvec[1:length(temp2)] -CCB<-(K^2-K)/2 -ic<-0 -Fac.B<-matrix(0,CCB,5) -dimnames(Fac.B)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit")) -for(k in 1:K){ -for(kk in 1:K){ -if(k1){ -for(k in 2:K){ -con1<-push(con1) -con<-cbind(con,con1) -}}} -d<-ncol(con) -if(is.na(nboot)){ -if(d<=4)nboot<-1000 -if(d>4)nboot<-5000 -} -# -# Now take bootstrap samples from jth level -# of Factor A and average K corresponding estimates -# of location. -# -bloc<-matrix(NA,nrow=J,ncol=nboot) -print("Taking bootstrap samples. Please wait.") -mvec<-NA -ik<-0 -for(j in 1:J){ -paste("Working on level ",j," of Factor A") -x<-matrix(NA,nrow=nvec[j],ncol=K) -# -for(k in 1:K){ -ik<-ik+1 -x[,k]<-xx[[ik]] -if(!avg)mvec[ik]<-est(xx[[ik]],...) -} -tempv<-apply(x,2,est,...) -data<-matrix(sample(nvec[j],size=nvec[j]*nboot,replace=TRUE),nrow=nboot) -bvec<-matrix(NA,ncol=K,nrow=nboot) -mat<-listm(x) -for(k in 1:K){ -temp<-x[,k] -bvec[,k]<-apply(data,1,rmanogsub,temp,est,...) # An nboot by K matrix -} -if(avg){ -mvec[j]<-mean(tempv) -bloc[j,]<-apply(bvec,1,mean) -} -if(!avg){ -if(j==1)bloc<-bvec -if(j>1)bloc<-cbind(bloc,bvec) -} -} -if(avg)bloc<-t(bloc) -connum<-d -psihat<-matrix(0,connum,nboot) -test<-1 -for (ic in 1:connum){ -psihat[ic,]<-apply(bloc,1,bptdpsi,con[,ic]) -#test[ic]<-sum((psihat[ic,]>0))/nboot -test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot -test[ic]<-min(test[ic],1-test[ic]) -} -ncon<-ncol(con) -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvec[1]<-alpha/2 -} -temp2<-order(0-test) -ncon<-ncol(con) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.sig","ci.lower","ci.upper")) -tmeans<-mvec -psi<-1 -output[temp2,4]<-zvec -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(psihat[ic,]) -temp3<-round(output[ic,4]*nboot)+1 -icl<-round(dvec[ncon]*nboot)+1 -icu<-nboot-(icl-1) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -output[,3]<-2*output[,3] -output[,4]<-2*output[,4] -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -spmcpi<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),alpha=.05,nboot=NA, -SEED=TRUE,pr=TRUE,SR=FALSE,...){ -# -# Multiple comparisons for interactions -# in a split-plot design. -# The analysis is done by taking difference scores -# among all pairs of dependent groups and -# determining which of -# these differences differ across levels of Factor A. -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} -if(pr)print("As of Sept. 2005, est defaults to tmean") -JK<-J*K -if(JK!=length(x)){ -print("Something is wrong.") -paste(" Expected ",JK," groups but x contains ", length(x), "groups instead.") -stop() -} -MJ<-(J^2-J)/2 -MK<-(K^2-K)/2 -JMK<-J*MK -Jm<-J-1 -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -jp<-1-K -kv<-0 -kv2<-0 -for(j in 1:J){ -jp<-jp+K -xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) -for(k in 1:K){ -kv<-kv+1 -xmat[,k]<-x[[kv]] -} -xmat<-elimna(xmat) -for(k in 1:K){ -kv2<-kv2+1 -x[[kv2]]<-xmat[,k] -}} -xx<-x -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# Next determine the n_j values -nvec<-NA -jp<-1-K -for(j in 1:J){ -jp<-jp+K -nvec[j]<-length(x[[jp]]) -} -# -MJMK<-MJ*MK -con<-matrix(0,nrow=JMK,ncol=MJMK) -cont<-matrix(0,nrow=J,ncol=MJ) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j1){ -for(k in 2:MK){ -con1<-push(con1) -con<-cbind(con,con1) -}} -d<-ncol(con) -if(is.na(nboot)){ -if(d<=4)nboot<-1000 -if(d>4)nboot<-5000 -} -connum<-d -psihat<-matrix(0,connum,nboot) -# -# Now take bootstrap samples from jth level -# of Factor A and average K corresponding estimates -# of location. -# -bloc<-matrix(NA,ncol=J,nrow=nboot) -print("Taking bootstrap samples. Please wait.") -mvec<-NA -it<-0 -for(j in 1:J){ -paste("Working on level ",j," of Factor A") -x<-matrix(NA,nrow=nvec[j],ncol=MK) -# -im<-0 -for(k in 1:K){ -for(kk in 1:K){ -if(k1)bloc<-cbind(bloc,bvec) -} -test<-1 -for (ic in 1:connum){ -psihat[ic,]<-apply(bloc,1,bptdpsi,con[,ic]) -#test[ic]<-sum((psihat[ic,]>0))/nboot -test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot -test[ic]<-min(test[ic],1-test[ic]) -} -ncon<-ncol(con) -dvec<-alpha/c(1:ncon) -if(SR){ -okay=FALSE -if(identical(est,onestep))okay=TRUE -if(identical(est,mom))okay=TRUE -if(!okay)stop('For estimators other than onestep and mom, use SR=FALSE') -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvec[1]<-alpha/2 -}} -temp2<-order(0-test) -ncon<-ncol(con) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) -tmeans<-mvec -psi<-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -output[temp2,4]<-zvec -temp<-sort(psihat[ic,]) -icl<-round(dvec[ncon]*nboot)+1 -icu<-nboot-(icl-1) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -output[,3]<-2*output[,3] -if(SR)output[,4]<-2*output[,4] -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -sppbb<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),nboot=500,SEED=TRUE,pr=TRUE,...){ -# -# A percentile bootstrap for main effects -# among dependent groups in a split-plot design -# The analysis is done based on all pairs -# of difference scores. The null hypothesis is that -# all such differences have a typical value of zero. -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -if(pr)print('As of Oct, 2014, the argument est defaults to tmean') - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} - -JK<-J*K -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -jp<-1-K -kv<-0 -kv2<-0 -for(j in 1:J){ -jp<-jp+K -xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) -for(k in 1:K){ -kv<-kv+1 -xmat[,k]<-x[[kv]] -} -xmat<-elimna(xmat) -for(k in 1:K){ -kv2<-kv2+1 -x[[kv2]]<-xmat[,k] -}} -xx<-x -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# Next determine the n_j values -nvec<-NA -jp<-1-K -for(j in 1:J){ -jp<-jp+K -nvec[j]<-length(x[[jp]]) -} -# -# Now stack the data in an N by K matrix -# -x<-matrix(NA,nrow=nvec[1],ncol=K) -# -for(k in 1:K)x[,k]<-xx[[k]] -kc<-K -for(j in 2:J){ -temp<-matrix(NA,nrow=nvec[j],ncol=K) -for(k in 1:K){ -kc<-kc+1 -temp[,k]<-xx[[kc]] -} -x<-rbind(x,temp) -} -# Now call function rmdzero to do the analysis -temp<-rmdzero(x,est=est,nboot=nboot,...) -list(p.value=temp$p.value,center=temp$center) -} - - -spmcpb<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),dif=TRUE,alpha=.05,SEED=TRUE, -nboot=NA,...){ -# -# A percentile bootstrap for all pairwise -# multiple comparisons -# among dependent groups in a split-plot design -# -# Levels of A are ignored. -# -# If dif=T, the analysis is done based on all pairs -# of difference scores. -# Otherwise, marginal measures of location are used. -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# - if(is.matrix(x) || is.data.frame(x))x=listm(x) -JK<-J*K -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x=data -x=pool.fun(J,K,x) -temp<-rmmcppb(x,est=est,nboot=nboot,dif=dif,alpha=alpha,plotit=FALSE,SEED=SEED,...) -list(output=temp$output,con=temp$con,num.sig=temp$num.sig) -} - - - -bwamcp<-function(J,K,x,tr=.2,JK=J*K,grp=c(1:JK),alpha=.05,op=TRUE){ -# -# All pairwise comparisons among levels of Factor A -# in a split-plot design using trimmed means. -# -# Data among dependent groups are pooled for each level -# of Factor A. -# Then this function calls lincon. -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} - -JK<-J*K -if(!op){ -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -data<-list() -jp<-1-K -kv<-0 -for(j in 1:J){ -jp<-jp+K -for(k in 1:K){ -kv<-kv+1 -if(k==1)temp<-x[[jp]] -if(k>1)temp<-c(temp,x[[kv]]) -} -data[[j]]<-temp -} -print("Group numbers refer to levels of Factor A") -temp<-lincon(data,tr=tr,alpha=alpha) -} -if(op){ -MJK<-K*(J^2-J)/2 # NUMBER OF COMPARISONS -JK<-J*K -MJ<-(J^2-J)/2 -cont<-matrix(0,nrow=J,ncol=MJ) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j1){ -for(k in 2:K){ -con1<-push(con1) -con<-cbind(con,con1) -}} -print("Contrast Matrix Used:") -print(con) -temp<-lincon(x,con=con,tr=tr,alpha=alpha) -} -temp -} - -pcor<-function(x,y=NA){ -if(!is.na(y[1]))temp<-wincor(x,y,tr=0) -if(is.na(y[1]))temp<-winall(x,tr=0) -list(cor=temp$cor,p.value=temp$p.value) -} - -apgdis<-function(m,est=sum,se=TRUE,...){ -# -# For multivariate data, -# compute distance between each pair -# of points and measure depth of a point -# in terms of its distance to all -# other points -# -# Using se=T ensures that ordering of distance -# will not change with a change in scale. -# -# m is an n by p matrix -# -m<-elimna(m) # eliminate any missing values -temp<-0 -if(se){ -for(j in 1:ncol(m))m[,j]<-(m[,j]-median(m[,j]))/mad(m[,j]) -} -for(j in 1:ncol(m)){ -disx<-outer(m[,j],m[,j],"-") -temp<-temp+disx^2 -} -temp<-sqrt(temp) -dis<-apply(temp,1,est,...) -temp2<-order(dis) -center<-m[temp2[1],] -list(center=center,distance=dis) -} - - -rd2plot<-function(x,y,fr=.8,xlab="",ylab=""){ -# -# Expected frequency curve -# for two groups. -# -# fr controls amount of smoothing -x<-elimna(x) -y<-elimna(y) -rmdx<-NA -rmdy<-NA -for(i in 1:length(x)){ -rmdx[i]<-sum(near(x,x[i],fr)) -} -for(i in 1:length(y)){ -rmdy[i]<-sum(near(y,y[i],fr)) -} -rmdx<-rmdx/length(x) -rmdy<-rmdy/length(y) -plot(c(x,y),c(rmdx,rmdy),type="n",ylab=ylab,xlab=xlab) -sx<-sort(x) -xorder<-order(x) -sysm<-rmdx[xorder] -lines(sx,sysm) -sy<-sort(y) -yorder<-order(y) -sysm<-rmdy[yorder] -lines(sy,sysm,lty=2) -} - -depth2<-function(x,pts=NA,plotit=TRUE,xlab="VAR 1",ylab="VAR 2"){ -# -# Compute exact depths for bivariate data -if(ncol(x)!=2)stop("x must be a matrix with 2 columns") -x<-elimna(x) -if(is.na(pts[1]))pts<-x -if(ncol(pts)!=2)stop("Argument pts must be stored as a matrix with 2 columns") -pts<-as.matrix(pts) -ndepth<-NA -for(i in 1:nrow(pts)){ -ndepth[i]<-depth(pts[i,1],pts[i,2],x) -} -if(plotit){ -m<-x -plot(m,xlab=xlab,ylab=ylab) -flag<-(ndepth==max(ndepth)) -if(sum(flag)==1)center<-m[flag,] -if(sum(flag)>1)center<-apply(m[flag,],2,mean) -points(center[1],center[2],pch="+") -temp<-ndepth -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -ndepth -} - -fdepth<-function(m,pts=NA,plotit=TRUE,cop=3,center=NA,xlab="VAR 1", -ylab="VAR 2"){ -# -# Determine depth of points in pts, relative to -# points in m. If pts is not specified, -# depth of all points in m are determined. -# -# m and pts can be vectors or matrices with -# p columns (the number of variables). -# -# Determine center, for each point, draw a line -# connecting it with center, project points onto this line -# and determine depth of the projected points. -# The final depth of a point is its minimum depth -# among all projections. -# -# plotit=TRUE creates a scatterplot when working with -# bivariate data and pts=NA -# -# There are three options for computing the center of the -# cloud of points when computing projections, assuming center=NA: -# -# cop=2 uses MCD center -# cop=3 uses median of the marginal distributions. -# cop=4 uses MVE center -# -# If a value for center is passed to this function, -# this value is used to determine depths. -# -# When plotting, -# center is marked with a cross, +. -# -library(MASS) -if(cop!=2 && cop!=3 && cop!=4)stop("Only cop=2, 3 or 4 is allowed") -if(is.list(m))stop("Store data in a matrix; might use function listm") -m<-as.matrix(m) -pts<-as.matrix(pts) -if(!is.na(pts[1]))remm<-m -nm<-nrow(m) -nm1<-nm+1 -if(!is.na(pts[1])){ -if(ncol(m)!=ncol(pts))stop("Number of columns of m is not equal to number of columns for pts") -} -m<-elimna(m) # Remove missing values -m<-as.matrix(m) -if(ncol(m)==1)dep<-unidepth(as.vector(m[,1]),pts=pts) -if(ncol(m)>1){ -if(is.na(center[1])){ -if(cop==2){ -center<-cov.mcd(m)$center -} -if(cop==4){ -center<-cov.mve(m)$center -} -if(cop==3){ -center<-apply(m,2,median) -}} -if(is.na(pts[1])){ -mdep <- matrix(NA,nrow=nrow(m),ncol=nrow(m)) -} -if(!is.na(pts[1])){ -mdep <- matrix(NA,nrow=nrow(m),ncol=nrow(pts)) -} -for (i in 1:nrow(m)){ -B<-m[i,]-center -dis<-NA -BB<-B^2 -bot<-sum(BB) -if(bot!=0){ -if(is.na(pts[1])){ -for (j in 1:nrow(m)){ -A<-m[j,]-center -temp<-sum(A*B)*B/bot -dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2)) -}} -if(!is.na(pts[1])){ -m<-rbind(remm,pts) -for (j in 1:nrow(m)){ -A<-m[j,]-center -temp<-sum(A*B)*B/bot -dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2)) -}} -# -# For ith projection, store depths of -# points in mdep[i,] -# -if(is.na(pts[1]))mdep[i,]<-unidepth(dis) -if(!is.na(pts[1])){ -mdep[i,]<-unidepth(dis[1:nm],dis[nm1:nrow(m)]) -}} -if(bot==0)mdep[i,]<-rep(0,ncol(mdep)) -} -dep<-apply(mdep,2,min) -if(ncol(m)==2 && is.na(pts[1])){ -flag<-chull(m) -dep[flag]<-min(dep) -} -} -if(ncol(m)==2){ -if(is.na(pts[1]) && plotit){ -plot(m,xlab=xlab,ylab=ylab) -points(center[1],center[2],pch="+") -x<-m -temp<-dep -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -}} -dep<-round(dep*nrow(m))/nrow(m) -dep -} - -unidepth<-function(x,pts=NA){ -# -# Determine depth of points in the vector x -# -if(!is.vector(x))stop("x should be a vector") -if(is.na(pts[1]))pts<-x -pup<-apply(outer(pts,x,FUN="<="),1,sum)/length(x) -pdown<-apply(outer(pts,x,FUN="<"),1,sum)/length(x) -pdown<-1-pdown -m<-matrix(c(pup,pdown),nrow=2,byrow=TRUE) -dep<-apply(m,2,min) -dep -} - -opreg<-function(x,y,regfun=tsreg,cop=3,MC=FALSE,varfun=pbvar,corfun=pbcor,STAND=TRUE,xout=FALSE){ -# -# Do regression on points not labled outliers -# using projection-type outlier detection method -# -# Note: argument xout is not relevant here, but is included to avoid conflicts when using regci. -# -if(MC)library(parallel) -x<-as.matrix(x) -m<-cbind(x,y) -m<-elimna(m) # eliminate any rows with missing data -if(!MC)ivec<-outpro(m,plotit=FALSE,cop=cop,STAND=STAND)$keep -if(MC)ivec<-outproMC(m,plotit=FALSE,cop=cop,STAND=STAND)$keep -np1<-ncol(x)+1 -coef<-regfun(m[ivec,1:ncol(x)],m[ivec,np1])$coef -vec<-rep(1,length(y)) -residuals<-y-cbind(vec,x)%*%coef -stre=NULL -yhat<-y-residuals -e.pow<-varfun(yhat)/varfun(y) -if(!is.na(e.pow)){ -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -stre=sqrt(e.pow) -} -list(coef=coef,residuals=residuals,Strength.Assoc=stre,Explanatory.Power=e.pow) -} - -mgvdep<-function(m,se=FALSE){ -# -# Find the center of a scatterplot, add point that -# increases the generalized variance by smallest amount -# continue for all points -# return the MGV depths. -# -# Essentially the same as mgvar which -# determine MGV distances, only here, -# follow convention that deepest points -# have the largest numerical value. Here -# depth of the deepest values equal one. -# -temp<-apgdis(m,se=se)$distance -icen<-ncol(m) -temp3<-order(temp) -chkit<-sum(duplicated(temp[temp3[1:icen]])) -icen<-icen+chkit -flag<-rep(TRUE,length(temp)) -flag[temp3[1:icen]]<-FALSE -# set duplicated central values to F -varvec<-0 -varvec[!flag]<-NA -while(sum(flag)>0){ -ic<-0 -chk<-NA -remi<-NA -for(i in 1:nrow(m)){ -if(flag[i]){ -ic<-ic+1 -chk[ic]<-gvar(rbind(m[!flag,],m[i,])) -remi[ic]<-i -}} -sor<-order(chk) -k<-remi[sor[1]] -varvec[k]<-chk[sor[1]] -flag[k]<-F -} -varvec[is.na(varvec)]<-0 -varvec<-1/(1+varvec) -varvec -} - - -fdepthv2<-function(m,pts=NA,plotit=TRUE){ -# -# Determine depth of points in pts relative to -# points in m -# -# Draw a line between each pair of distinct points -# and determine depth of the projected points. -# The final depth of a point is its minimum depth -# among all projections. -# -# This function is slower than fdepth and requires -# space for a nc by nc matrix, nc=(n^2-n)/2. -# But it allows -# data to have a singular covariance matrix -# and it provides a more accurate approximation of -# halfspace depth. -# -# plotit=TRUE creates a scatterplot when working with -# bivariate data and pts=NA -# -# When plotting, -# center is marked with a cross, +. -# -m<-elimna(m) # Remove missing values -if(!is.na(pts[1]))remm<-m -if(!is.matrix(m))dep<-unidepth(m) -if(is.matrix(m)){ -nm<-nrow(m) -nt<-nm -nm1<-nm+1 -if(!is.na(pts[1])){ -if(ncol(m)!=ncol(pts))stop("Number of columns of m is not equal to number of columns for pts") -nt<-nm+nrow(pts) -}} -if(ncol(m)==1)depth<-unidepth(m) -if(ncol(m)>1){ -m<-elimna(m) # Remove missing values -nc<-(nrow(m)^2-nrow(m))/2 -if(is.na(pts[1]))mdep <- matrix(0,nrow=nc,ncol=nrow(m)) -if(!is.na(pts[1])){ -mdep <- matrix(0,nrow=nc,ncol=nrow(pts)) -} -ic<-0 -for (iall in 1:nm){ -for (i in 1:nm){ -if(iall < i){ -ic<-ic+1 -B<-m[i,]-m[iall,] -dis<-NA -BB<-B^2 -bot<-sum(BB) -if(bot!=0){ -if(is.na(pts[1])){ -for (j in 1:nrow(m)){ -A<-m[j,]-m[iall,] -temp<-sum(A*B)*B/bot -dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2)) -}} -if(!is.na(pts[1])){ -m<-rbind(remm,pts) -for (j in 1:nrow(m)){ -A<-m[j,]-m[iall,] -temp<-sum(A*B)*B/bot -dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2)) -}} -# -# For ic_th projection, store depths of -# points in mdep[ic,] -# -if(is.na(pts[1]))mdep[ic,]<-unidepth(dis) -if(!is.na(pts[1])){ -mdep[ic,]<-unidepth(dis[1:nm],dis[nm1:nrow(m)]) -}} -if(bot==0)mdep[ic,]<-rep(0,ncol(mdep)) -}}} -dep<-apply(mdep,2,min) -} -if(ncol(m)==2 &&is.na(pts[1])){ -flag<-chull(m) -dep[flag]<-min(dep) -} -if(ncol(m)==2){ -if(is.na(pts[1]) && plotit){ -plot(m) -x<-m -temp<-dep -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -}} -dep -} - -g2plot<-function(x1,x2,op=4,rval=15,fr=.8,aval=.5,xlab="X",ylab=""){ -# -# plot estimates of the density functions for two groups. -# -# op=1: Use Rosenblatt shifted histogram -# -# op=2: -# Use kernel density estimate -# Using the built-in S+ function density, -# -# op=3: Use expected frequency curve. -# -# op=4: Use adaptive kernel estimator -# -x1<-elimna(x1) -x2<-elimna(x2) -if(op==3){ -rd2plot(x1,x2,fr=fr,xlab=xlab,ylab=ylab) -print("Might consider using op=4 if graph is ragged") -} -if(op==2){ -tempx<-density(x1,na.rm=TRUE,kernel="epanechnikov") -tempy<-density(x2,na.rm=TRUE,kernel="epanechnikov") -plot(c(tempx$x,tempy$x),c(tempx$y,tempy$y),type="n",xlab=xlab,ylab=ylab) -lines(tempx$x,tempx$y) -lines(tempy$x,tempy$y,lty=2) -} -if(op==1){ - y1 <- sort(x1) - z1 <- 1 - z2 <- 1 - par(yaxt = "n") - temp <- floor(0.01 * length(x1)) - if(temp == 0) - temp <- 5 - ibot <- y1[temp] - itop <- y1[floor(0.99 * length(x1))] - xaxis1 <- seq(ibot, itop, length = rval) - for(i in 1:rval) - z1[i] <- kerden(x1, 0, xaxis1[i]) - y2 <- sort(x2) - temp <- floor(0.01 * length(x2)) - if(temp == 0) - temp <- 5 - ibot <- y2[temp] - itop <- y2[floor(0.99 * length(x2))] - xaxis2 <- seq(ibot, itop, length = rval) - for(i in 1:rval) - z2[i] <- kerden(x2, 0, xaxis2[i]) -plot(c(xaxis1,xaxis2),c(z1,z2), xlab =xlab, ylab =ylab, type = "n") -lines(xaxis1,z1) -lines(xaxis2,z2,lty=2) -} -if(op==4){ -x1<-sort(x1) -x2<-sort(x2) -z1<-akerd(x1,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) -z2<-akerd(x2,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) -plot(c(x1,x2),c(z1,z2), xlab =xlab, ylab =ylab, type = "n") -lines(x1,z1) -lines(x2,z2,lty=2) -} -} - -mulwmw<-function(m1,m2,plotit=TRUE,cop=3,alpha=.05,nboot=1000,pop=4,fr=.8,pr=FALSE,SEED=TRUE,tr=.5,NC=TRUE){ -# -# -# Determine center correpsonding to two -# independent groups, project all points onto line -# connecting the centers, -# then based on the projected distances, -# estimate p=probability that a randomly sampled -# point from group 1 is less than a point from group 2 -# based on the projected distances. -# -# plotit=TRUE creates a plot of the projected data -# pop=1 plot two dotplots based on projected distances -# pop=2 boxplots -# pop=3 expected frequency curve. -# pop=4 adaptive kernel density -# -# There are three options for computing the center of the -# cloud of points when computing projections: -# cop=1 uses Donoho-Gasko median -# cop=2 uses MCD center -# cop=3 uses median of the marginal distributions. -# -# When using cop=2 or 3, default critical value for outliers -# is square root of the .975 quantile of a -# chi-squared distribution with p degrees -# of freedom. -# -# NC=F: critical values not computed -# -# Donoho-Gasko (Tukey) median is marked with a cross, +. -# -if(is.null(dim(m1))||dim(m1)[2]<2){print("Data are assumed to be stored in") -print(" a matrix or data frame having two or more columns.") -stop(" For univariate data, use the function outbox or out") -} -m1<-elimna(m1) # Remove missing values -m2<-elimna(m2) -n1=nrow(m1) -n2=nrow(m2) -if(cop==1){ -if(ncol(m1)>2){ -center1<-dmean(m1,tr=.5) -center2<-dmean(m2,tr=.5) -} -if(ncol(m1)==2){ -tempd<-NA -for(i in 1:nrow(m1)) -tempd[i]<-depth(m1[i,1],m1[i,2],m1) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center1<-m1[flag,] -if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) -for(i in 1:nrow(m2)) -tempd[i]<-depth(m2[i,1],m2[i,2],m2) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center2<-m2[flag,] -if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) -}} -if(cop==2){ -center1<-cov.mcd(m1)$center -center2<-cov.mcd(m2)$center -} -if(cop==3){ -center1<-apply(m1,2,mean,tr=tr) -center2<-apply(m2,2,mean,tr=tr) -} -if(cop==4){ -center1<-smean(m1) -center2<-smean(m2) -} -center<-(center1+center2)/2 -B<-center1-center2 -if(sum(center1^2)2){ -center1<-dmean(m1,tr=.5) -center2<-dmean(m2,tr=.5) -} -if(ncol(m1)==2){ -tempd<-NA -for(i in 1:nrow(m1)) -tempd[i]<-depth(m1[i,1],m1[i,2],m1) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center1<-m1[flag,] -if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) -for(i in 1:nrow(m2)) -tempd[i]<-depth(m2[i,1],m2[i,2],m2) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center2<-m2[flag,] -if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) -}} -if(cop==2){ -center1<-cov.mcd(m1)$center -center2<-cov.mcd(m2)$center -} -if(cop==3){ -center1<-apply(m1,2,median) -center2<-apply(m2,2,median) -} -center<-(center1+center2)/2 -B<-center1-center2 -if(sum(center1^2)>sum(center2^2))B<-(0-1)*B -BB<-B^2 -bot<-sum(BB) -disx<-NA -disy<-NA -if(bot!=0){ -for (j in 1:nrow(m1)){ -AX<-m1[j,]-center -tempx<-sum(AX*B)*B/bot -disx[j]<-sign(sum(AX*B))*sqrt(sum(tempx^2)) -} -for (j in 1:nrow(m2)){ -AY<-m2[j,]-center -tempy<-sum(AY*B)*B/bot -disy[j]<-sign(sum(AY*B))*sqrt(sum(tempy^2)) -}} -m<-outer(disx,disy,FUN="-") -m<-sign(m) -val[it]<-(1-mean(m))/2 -if(bot==0)val[it]<-.5 -if(pr)print(paste("Iteration ",it," of ",iter," complete")) -} -val<-sort(val) -low<-round(alpha*iter/2)+1 -up<-iter-low -crit<-NA -crit[1]<-val[low] -crit[2]<-val[up] -crit -} - - -dmean<-function(m,tr=.2,dop=1,cop=2){ -# -# Compute multivariate measure of location -# using Donoho-Gasko method. -# -# dop=1, use fdepth to compute depths -# dop=2, use fdepthv2 to compute depths -# -# cop=1, Tukey median; can't be used here. -# cop=2, use MCD in fdepth -# cop=3, use marginal medians in fdepth -# cop=4, use MVE in fdepth -# -if(is.list(m))m<-matl(m) -if(!is.matrix(m))stop("Data must be stored in a matrix or in list mode.") -if(ncol(m)==1){ -if(tr==.5)val<-median(m) -if(tr>.5)stop("Amount of trimming must be at most .5") -if(tr<.5)val<-mean(m,tr) -} -if(ncol(m)>1){ -temp<-NA -if(ncol(m)!=2){ -# Use approximate depth -if(dop==1)temp<-fdepth(m,plotit=FALSE,cop=cop) -if(dop==2)temp<-fdepthv2(m) -} -# Use exact depth if ncol=2 -if(ncol(m)==2){ -for(i in 1:nrow(m)) -temp[i]<-depth(m[i,1],m[i,2],m) -} -mdep<-max(temp) -flag<-(temp==mdep) -if(tr==.5){ -if(sum(flag)==1)val<-m[flag,] -if(sum(flag)>1)val<-apply(m[flag,],2,mean) -} -if(tr<.5){ -flag2<-(temp>=tr) -if(sum(flag2)==0 && sum(flag)>1)val<-apply(as.matrix(m[flag,]),2,mean) -if(sum(flag2)==0 && sum(flag)==1)val=m[flag,] -if(sum(flag2)==1)val<-m[flag2,] -if(sum(flag2)>1)val<-apply(m[flag2,],2,mean) -}} -val -} - -lsqs2<-function(x,y,MD=FALSE,tr=.05,plotit=TRUE){ -# cf Liu and Singh, JASA 1993, 252-260 -# -if(is.list(x))x<-matl(x) -if(is.list(y))y<-matl(y) -disyx<-NA # depth of y in x -disxy<-NA # depth of x in y -if(!is.matrix(x) && !is.matrix(y)){ -x<-x[!is.na(x)] -y<-y[!is.na(y)] -# -tempxx<-NA -for(i in 1:length(x)){ -tempxx[i]<-sum(x[i]<=x)/length(x) -if(tempxx[i]>.5)tempxx[i]<-1-tempxx[i] -} -for(i in 1:length(x)){ -temp<-sum(x[i]<=y)/length(y) -if(temp>.5)temp<-1-temp -disxy[i]<-mean(temp>tempxx) -} -tempyy<-NA -for(i in 1:length(y)){ -tempyy[i]<-sum(y[i]<=y)/length(y) -if(tempyy[i]>.5)tempyy[i]<-1-tempyy[i] -} -for(i in 1:length(y)){ -temp<-sum(y[i]<=x)/length(x) -if(temp>.5)temp<-1-temp # depth of y_i in x -disyx[i]<-mean(temp>tempyy) -} -qhatxy<-mean(disyx) -qhatyx<-mean(disxy) -qhat<-(qhatxy+qhatyx)/2 -} -if(is.matrix(x) && is.matrix(x)){ -if(!MD){ -if(ncol(x)!=2 || ncol(y)!=2){ -# Use approximate depth -tempyy<-fdepth(y) -temp<-fdepth(y,x) -for(i in 1:nrow(x)){ -disxy[i]<-mean(temp[i]>tempyy) -} -tempxx<-NA -tempxx<-fdepth(x) -temp<-fdepth(x,pts=y) -for(i in 1:nrow(y)){ -disyx[i]<-mean(temp[i]>tempxx) -}} -if(ncol(x)==2 && ncol(y)==2){ -if(plotit){ -plot(rbind(x,y),type="n",xlab="Var 1",ylab="VAR 2") -points(x) -points(y,pch="o") -temp<-NA -for(i in 1:nrow(x)){ -temp[i]<-depth(x[i,1],x[i,2],x) -} -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -temp<-NA -for(i in 1:nrow(y)){ -temp[i]<-depth(y[i,1],y[i,2],y) -} -flag<-(temp>=median(temp)) -xx<-y[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -flag<-(temp>=median(temp)) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,],lty=2) -lines(xx[c(temp[1],temp[length(temp)]),],lty=2) -} -tempyy<-NA -for(i in 1:nrow(y))tempyy[i]<-depth(y[i,1],y[i,2],y) -for(i in 1:nrow(x)){ -temp<-depth(x[i,1],x[i,2],y) -disxy[i]<-mean(temp>tempyy) -} -tempxx<-NA -for(i in 1:nrow(x))tempxx[i]<-depth(x[i,1],x[i,2],x) -for(i in 1:nrow(y)){ -temp<-depth(y[i,1],y[i,2],x) -disyx[i]<-mean(temp>tempxx) -} -}} -if(MD){ -mx<-apply(x,2,median) -my<-apply(y,2,median) -vx<-apply(x,2,winval,tr=tr)-apply(x,2,mean,trim=tr)+mx -vx<-var(vx) -vy<-apply(y,2,winval,tr=tr)-apply(y,2,mean,trim=tr)+my -vy<-var(vy) -tempxx<-1/(1+mahalanobis(x,mx,vx)) -tempyx<-1/(1+mahalanobis(y,mx,vx)) -for(i in 1:nrow(y)){ -disyx[i]<-mean(tempyx[i]>tempxx) -} -tempyy<-1/(1+mahalanobis(y,my,vy)) -tempxy<-1/(1+mahalanobis(x,my,vy)) -for(i in 1:nrow(x)){ -disxy[i]<-mean(tempxy[i]>tempyy) -} -} -qhatxy<-sum(disxy) -qhatyx<-sum(disyx) -qhat<-(qhatxy+qhatyx)/(length(disxy)+length(disyx)) -} -qhatyx<-mean(disyx) -qhatxy<-mean(disxy) -list(qhatxy,qhatyx,qhat) -} - -depthg2<-function(x,y,alpha=.05,nboot=500,MD=FALSE,plotit=TRUE,op=FALSE,fast=FALSE,SEED=TRUE, -xlab="VAR 1",ylab="VAR 2"){ -# -# Compare two independent groups based on p measures -# for each group. -# -# The method is based on Tukey's depth if MD=F; -# otherwise the Mahalanobis depth is used. -# If p>2, then Mahalanobis depth is used automatically -# -# The method is designed to be sensitive to differences in scale -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -x=elimna(x) -y=elimna(y) -x=as.matrix(x) -y=as.matrix(y) -if(is.matrix(x) && is.matrix(y)){ # YES, code is odd. -nv1<-nrow(x) -nv2<-nrow(y) -if(ncol(x)!=ncol(y))stop("Number of columns of x is not equal to number for y") -if(ncol(x) >2)MD<-T -if(ncol(x)==2 && plotit){ -plot(rbind(x,y),type="n",xlab=xlab,ylab=ylab) -points(x,pch="*") -points(y,pch="o") -temp<-NA -for(i in 1:nrow(x)){ -temp[i]<-depth(x[i,1],x[i,2],x) -} -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -temp<-NA -for(i in 1:nrow(y)){ -temp[i]<-depth(y[i,1],y[i,2],y) -} -flag<-(temp>=median(temp)) -xx<-y[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -flag<-(temp>=median(temp)) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,],lty=2) -lines(xx[c(temp[1],temp[length(temp)]),],lty=2) -} -print("Taking bootstrap samples. Please wait.") -data1<-matrix(sample(nv1,size=nv1*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(nv2,size=nv2*nboot,replace=TRUE),nrow=nboot) -qhatd<-NA -dhatb<-NA -for(ib in 1:nboot){ -if(op)print(paste("Bootstrap sample ",ib," of ",nboot, "is complete.")) -if(!fast)temp<-lsqs2(x[data1[ib,],],y[data2[ib,],],plotit=FALSE,MD=MD) -if(fast)temp<-lsqs2.for(x[data1[ib,],],y[data2[ib,],],plotit=FALSE,MD=MD) -qhatd[ib]<-temp[[1]]-temp[[2]] -} -temp<-sort(qhatd) -lv<-round(alpha*nboot/2) -uv<-nboot-lv -difci<-c(temp[lv+1],temp[uv]) -} -# -if(!is.matrix(x) && !is.matrix(y)){ -nv1<-length(x) -nv2<-length(y) -print("Taking bootstrap samples. Please wait.") -data1<-matrix(sample(nv1,size=nv1*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(nv2,size=nv2*nboot,replace=TRUE),nrow=nboot) -qhatd<-NA -dhatb<-NA -for(ib in 1:nboot){ -if(!fast)temp<-lsqs2(x[data1[ib,]],y[data2[ib,]],plotit=FALSE,MD=MD) -if(fast)temp<-lsqs2.for(x[data1[ib,]],y[data2[ib,]],plotit=FALSE,MD=MD) -qhatd[ib]<-temp[[1]]-temp[[2]] -dhatb[ib]<-(temp[[1]]+temp[[2]])/2 -}} -temp<-sort(qhatd) -temp2<-sort(dhatb) -lv<-round(alpha*nboot/2) -uv<-nboot-lv -difci<-c(temp[lv+1],temp[uv]) -list(difci=difci) -} - -hochberg<- -function(x,x2=NA,cil=NA,con=0,tr=.2,alpha=.05){ -# -# A generalization of Hochberg's two-stage method -# method to trimmed mean# -# -# THIS FUNCTION WAS UPDATED FEB., 2024. IT NOW HAS A MORE CONVENIENT AND -# SLIGHTLY MORE ACCURATE METHOD FOR -# COMPUTING THE CRITICAL VALUE; NO NEED TO USE TABLES AS BEFORE. -# -# x contains first stage data -# x2 contains second stage data -# -# cil is the desired length of the confidence intervals. -# That is, cil is the distance between the upper and lower -# ends of the confidence intervals. -# -x3<-x2 -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -J<-length(x) -tempn<-0 -svec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -svec[j]<-winvar(temp,tr=tr)/(1-2*tr)^2 -} -tempt<-floor((1-2*tr)*tempn) -A<-sum(1/(tempt-1)) -df<-J/A -if(!is.list(x2) && !is.matrix(x2)){ -x2<-list() -for(j in 1:J)x2[[j]]<-NA -} -if(is.na(cil))stop("To proceed, you must specify the maximum length of the confidence intervals.") -#crit<-trange(tempn-1,alpha=alpha,iter=iter,SEED=SEED) #OLD CODE -crit=qtukey(1-alpha,J,df) -# -if(con[1] == 0){ - Jm<-J-1 - ncon <- (J^2 - J)/2 - con <- matrix(0, J, ncon) - id <- 0 - for(j in 1:Jm) { - jp <- j + 1 - for(k in jp:J) { - id <- id + 1 - con[j, id] <- 1 - con[k, id] <- 0 - 1 - } - } - } - ncon <- ncol(con) -avec<-NA -for(i in 1:ncon){ -temp<-con[,i] -avec[i]<-sum(temp[temp>0]) -} -dvec<-(cil/(2*crit*avec))^2 -d<-max(dvec) -n.vec<-NA -for(j in 1:J){ -n.vec[j]<-max(tempn[j],floor(svec[j]/d)+1) -print(paste("Need an additional ", n.vec[j]-tempn[j], -" observations for group", j)) -} -# -# Do second stage if data are supplied -# -ci.mat=NULL -if(!is.na(x2[1])){ -if(is.matrix(x2))x2<-listm(x2) -temp2<-n.vec-tempn -#if(!is.list(x3) && !is.matrix(x3) && sum(temp2)>0)stop("No second stage data supplied; this function is terminating") -if(length(x) != length(x2))warning("Number of groups in first stage data does not match the number in the second stage.") -ci.mat<-NA -if(!is.na(x2[1]) || sum(temp2)==0){ -xtil<-NA -nvec2<-NA -for(j in 1:J){ -nvec2[j]<-0 -temp<-x2[[j]] -if(!is.na(temp[1]))nvec2[j]<-length(x2[[j]]) -if(nvec2[j] 0]) -C<-0-sum(bvec[bvec<0]) -D<-max(A,C) -ci.mat[ic,2]<-sum(con[,ic]*xtil)-crit*D -ci.mat[ic,3]<-sum(con[,ic]*xtil)+crit*D -}}} -list(ci.mat=ci.mat,con=con) -} - -trange<-function(dfvec,iter=10000,alpha=.05,SEED=TRUE){ -if(SEED)set.seed(1) -dfv<-length(dfvec)/sum(1/dfvec) -vals<-NA -tvals<-NA -J<-length(dfvec) -for(i in 1:iter){ -for(j in 1:J){ -tvals[j]<-rt(1,dfvec[j]) -} -vals[i]<-max(tvals)-min(tvals) -} -vals<-sort(vals) -ival<-round((1-alpha)*iter) -qval<-vals[ival] -qval -} - - -lsqs3<-function(x,y,plotit=TRUE,cop=2,ap.dep=FALSE,v2=FALSE,pv=FALSE,SEED=TRUE,nboot=1000,ypch="o",xpch="+"){ -# -# Compute the typical depth of x in y, -# Compute the typical depth of y in x, -# use the maximum of the two typical depths -# as a test statistic. -# This method is designed to be sensitive to -# shifts in location. -# -# Use Tukey's depth; bivariate case only. -# -# cop=2 use MCD location estimator when -# computing depth with function fdepth -# cop=3 uses medians -# cop=3 uses MVE -# -# xpch="+" means when plotting the data, data from the first -# group are indicated by a + -# ypch="o" are data from the second group -# -if(is.list(x))x<-matl(x) -if(is.list(y))y<-matl(y) -x<-elimna(x) -y<-elimna(y) -x<-as.matrix(x) -y<-as.matrix(y) -nx=nrow(x) -ny=nrow(y) -if(ncol(x) != ncol(y))stop("Number of variables not equal") -disyx<-NA # depth of y in x -disxy<-NA # depth of x in y -# -if(ncol(x)==2){ -if(plotit){ -plot(rbind(x,y),type="n",xlab="VAR 1",ylab="VAR 2") -points(x,pch=xpch) -points(y,pch=ypch) -if(nrow(x)>50){ -if(!ap.dep){ -print("If execution time is high, might use ap.dep=FALSE") -} -if(!ap.dep)temp<-depth2(x,plotit=FALSE) -if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop) -} -if(!ap.dep)temp<-depth2(x,plotit=FALSE) -if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop) -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -if(ap.dep)temp<-fdepth(y,plotit=FALSE,cop=cop) -if(!ap.dep)temp<-depth2(y,plotit=FALSE) -if(!ap.dep)temp<-depth2(y,plotit=FALSE) -if(!ap.dep)temp<-fdepth(y,plotit=FALSE) -flag<-(temp>=median(temp)) -xx<-y[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -flag<-(temp>=median(temp)) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,],lty=2) -lines(xx[c(temp[1],temp[length(temp)]),],lty=2) -} -tempyx<-NA -tempxy<-NA -if(ap.dep)tempyx<-fdepth(x,y,plotit=FALSE,cop=cop) -if(!ap.dep)tempyx<-depth2(x,y,plotit=FALSE) -if(ap.dep)tempxy<-fdepth(y,x,plotit=FALSE,cop=cop) -if(!ap.dep)tempxy<-depth2(y,x,plotit=FALSE) -} -if(ncol(x)==1){ -tempyx<-unidepth(as.vector(x),as.vector(y)) -tempxy<-unidepth(as.vector(y),as.vector(x)) -} -if(ncol(x)>2){ -if(!v2){ -tempxy<-fdepth(y,x,plotit=FALSE,cop=cop) -tempyx<-fdepth(x,y,plotit=FALSE,cop=cop) -} -if(v2){ -tempxy<-fdepthv2(y,x,plotit=FALSE) -tempyx<-fdepthv2(x,y,plotit=FALSE) -}} -qhatxy<-mean(tempxy) -qhatyx<-mean(tempyx) -qhat<-max(c(qhatxy,qhatyx)) -n1<-nrow(x) -n2<-nrow(y) -nv<-(3*min(c(n1,n2))+max(c(n1,n2)))/4 -if(ncol(x)==1)crit<-.2536-.4578/sqrt(nv) -if(ncol(x)==2)crit<-.1569-.3/sqrt(nv) -if(ncol(x)==3)crit<-.0861-.269/sqrt(nv) -if(ncol(x)==4)crit<-.054-.1568/sqrt(nv) -if(ncol(x)==5)crit<-.0367-.0968/sqrt(nv) -if(ncol(x)==6)crit<-.0262-.0565/sqrt(nv) -if(ncol(x)==7)crit<-.0174-.0916/sqrt(nv) -if(ncol(x)>7)crit<-.013 -rej<-"Fail to reject" -if(qhat<=crit)rej<-"Reject" -testv=NULL -pval=NULL -if(pv){ -if(SEED)set.seed(2) -rej="NULL" -for(i in 1:nboot)testv[i]=lsqs3.sub(rmul(n1,ncol(x)),rmul(n2,ncol(x)),cop=cop,ap.dep=ap.dep,v2=v2,)$test -pval=mean(qhat>=testv) -} -list(n1=nx,n2=ny,avg.depth.of.x.in.y=qhatxy,avg.depth.of.y.in.x=qhatyx,test=qhat,crit=crit,Decision=rej,p.value=pval) -} - -# The next function is used to compute p-values for lsqs3; it avoids lsqs3 calling itself. - -lsqs3.sub<-function(x,y,plotit=FALSE,cop=2,ap.dep=FALSE,v2=FALSE,pv=FALSE,SEED=TRUE,nboot=1000,ypch="o",xpch="+"){ -# -# Compute the typical depth of x in y, -# Compute the typical depth of y in x, -# use the maximum of the two typical depths -# as a test statistic. -# This method is designed to be sensitive to -# shifts in location. -# -# Use Tukey's depth; bivariate case only. -# -# cop=2 use MCD location estimator when -# computing depth with function fdepth -# cop=3 uses medians -# cop=3 uses MVE -# -# xpch="+" means when plotting the data, data from the first -# group are indicated by a + -# ypch="o" are data from the second group -# -if(is.list(x))x<-matl(x) -if(is.list(y))y<-matl(y) -x<-elimna(x) -y<-elimna(y) -x<-as.matrix(x) -y<-as.matrix(y) -nx=nrow(x) -ny=nrow(y) -if(ncol(x) != ncol(y))stop("Number of variables not equal") -disyx<-NA # depth of y in x -disxy<-NA # depth of x in y -# -if(ncol(x)==2){ -if(plotit){ -plot(rbind(x,y),type="n",xlab="VAR 1",ylab="VAR 2") -points(x,pch=xpch) -points(y,pch=ypch) -if(nrow(x)>50){ -if(!ap.dep){ -print("If execution time is high, might use ap.dep=FALSE") -} -if(!ap.dep)temp<-depth2(x,plotit=FALSE) -if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop) -} -if(!ap.dep)temp<-depth2(x,plotit=FALSE) -if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop) -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -if(ap.dep)temp<-fdepth(y,plotit=FALSE,cop=cop) -if(!ap.dep)temp<-depth2(y,plotit=FALSE) -if(!ap.dep)temp<-depth2(y,plotit=FALSE) -if(!ap.dep)temp<-fdepth(y,plotit=FALSE) -flag<-(temp>=median(temp)) -xx<-y[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -flag<-(temp>=median(temp)) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,],lty=2) -lines(xx[c(temp[1],temp[length(temp)]),],lty=2) -} -tempyx<-NA -tempxy<-NA -if(ap.dep)tempyx<-fdepth(x,y,plotit=FALSE,cop=cop) -if(!ap.dep)tempyx<-depth2(x,y,plotit=FALSE) -if(ap.dep)tempxy<-fdepth(y,x,plotit=FALSE,cop=cop) -tempxy<-depth2(y,x,plotit=FALSE) -} -if(ncol(x)==1){ -tempyx<-unidepth(as.vector(x),as.vector(y)) -tempxy<-unidepth(as.vector(y),as.vector(x)) -} -if(ncol(x)>2){ -if(!v2){ -tempxy<-fdepth(y,x,plotit=FALSE,cop=cop) -tempyx<-fdepth(x,y,plotit=FALSE,cop=cop) -} -if(v2){ -tempxy<-fdepthv2(y,x,plotit=FALSE) -tempyx<-fdepthv2(x,y,plotit=FALSE) -}} -qhatxy<-mean(tempxy) -qhatyx<-mean(tempyx) -qhat<-max(c(qhatxy,qhatyx)) -n1<-nrow(x) -n2<-nrow(y) -nv<-(3*min(c(n1,n2))+max(c(n1,n2)))/4 -if(ncol(x)==1)crit<-.2536-.4578/sqrt(nv) -if(ncol(x)==2)crit<-.1569-.3/sqrt(nv) -if(ncol(x)==3)crit<-.0861-.269/sqrt(nv) -if(ncol(x)==4)crit<-.054-.1568/sqrt(nv) -if(ncol(x)==5)crit<-.0367-.0968/sqrt(nv) -if(ncol(x)==6)crit<-.0262-.0565/sqrt(nv) -if(ncol(x)==7)crit<-.0174-.0916/sqrt(nv) -if(ncol(x)>7)crit<-.013 -rej<-"Fail to reject" -if(qhat<=crit)rej<-"Reject" -testv=NULL -pval=NULL -if(pv){ -if(SEED)set.seed(2) -rej="NULL" -for(i in 1:nboot)testv[i]=lsqs3.sub(rmul(n1,ncol(x)),rmul(n2,ncol(x)),cop=cop,ap.dep=ap.dep,v2=v2,)$test -pval=mean(qhat>=testv) -} -list(n1=nx,n2=ny,avg.depth.of.x.in.y=qhatxy,avg.depth.of.y.in.x=qhatyx,test=qhat,crit=crit,Decision=rej,p.value=pval) -} - - - - -kercon<-function(x,y,pyhat=FALSE,cval=NA,plotit=TRUE,eout=FALSE,xout=FALSE, -outfun=out,iran=.05,xlab="X",ylab="Y",pch='.'){ -# -# Compute conditional local weighted regression with Epanechnikov kernel -# -# cf. Fan, Annals of Statistics, 1993, 21, 196-217. -# -d<-ncol(x) -if(d!=2)stop("Argument x should have two columns only") -np1<-d+1 -m<-elimna(cbind(x,y)) -x<-m[,1:d] -y<-m[,np1] -yhat1<-NA -if(eout && xout)stop("Can't have both eout and xout=F") -if(eout){ -flag<-outfun(m)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(x)$keep -m<-m[flag,] -} -x<-m[,1:d] -y<-m[,np1] -if(is.na(cval[1])){temp<-idealf(x[,2]) -cval<-c(temp$ql,median(x[,2]),temp$qu) -} -xrem<-x -x2<-x[,2] -n<-nrow(x) -sig<-sqrt(var(x2)) -temp<-idealf(x2) -iqr<-(temp$qu-temp$ql)/1.34 -A1<-min(c(sig,iqr)) -A<-1.77 -hval<-A*(1/n)^(1/6) # Silverman, 1986, p. 86 -svec<-NA -for(j in 1:d){ -sig<-sqrt(var(x[,j])) -temp<-idealf(x[,j]) -iqr<-(temp$qu-temp$ql)/1.34 -A<-min(c(sig,iqr)) -svec[j]<-A -x[,j]<-x[,j]/A -} -hval<-hval*sqrt(mean(svec^2)) -ilow<-round(iran*length(y)) -iup<-round((1-iran)*length(y)) -for(il in 1:length(cval)){ -temp4<-NA -for(j in 1:nrow(x)){ -temp4[j]<-((x2[j]-cval[il])/A1)^2 -} -yhat<-NA -epan1<-ifelse(temp4<1,.75*(1-temp4),0) # Epanechnikov kernel for x2 -for(j in 1:n){ -yhat[j]<-NA -temp1<-cbind(x[,1]-x[j,1],x[,2]-cval[il]/A)/hval -temp1<-temp1^2 -temp1<-apply(temp1,1,FUN="sum") -temp<-.5*(d+2)*(1-temp1)/gamma(.5)^2 -epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, for both x1 and x2 -if(epan1[j]>0)epan[j]<-epan[j]/epan1[j] -if(epan1[j]==0)epan[j]<-0 -chkit<-sum(epan!=0) -if(chkit >= np1){ -vals<-lsfit(x[,1],y,wt=epan)$coef -yhat[j]<-x[j,1]*vals[2]+vals[1] -}} -if(plotit){ -xorder<-order(xrem[,1]) -if(il==1)plot(xrem[,1],y,xlab=xlab,ylab=ylab,pch=pch) -lines(xrem[xorder[ilow:iup],1],yhat[xorder[ilow:iup]],lty=il) -}} -m<-"Done" -if(pyhat)m<-yhat -m -} - -mscor<-function(m,corfun=spear,cop=3,MM=FALSE,gval=NA,ap=TRUE,pw=TRUE,STAND=TRUE, -outfun=outpro,alpha=.05){ -# -# m is an n by p matrix -# -# Compute a skipped correlation matrix -# -# corfun indicates the correlation to be used -# corfun=pcor uses Pearson's correlation -# corfun=spear uses Spearman's correlation -# -# When calling outpro, -# STAND=T means marginals are first standardized. -# This function returns the p by p matrix of correlations -# -# Method: Eliminate outliers using a projection technique. -# That is, compute Donoho-Gasko median, for each point -# consider the line between it and the median, -# project all points onto this line, and -# check for outliers using a boxplot rule. -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# cop determines how center of the scatterplot is -# estimated; see the function outpro. -# cop=l Donoho-Gasko halfspace median -# cop=2 MCD measure of location -# cop=3 marginal medians -# cop=4 MVE measure of location -# -# Eliminate any outliers and compute -# correlations using remaining data. -# -# gval is critical value for determining whether a point -# is an outlier. It is determined automatically if not specified, -# assuming that Spearman's correlation is used. Critical -# values when using some other correlation have not been -# determined. -# -# Hypothesis of zero correlations tested with FWE=.05 -# -# AGRUMENTS: -# MM; see function outpro -# ap=T all pairwise comparisons are tested -# ap=F first variable is tested versus all others -# (for a total of p-1 tests). -# pw=T, print message about high execution time -# pw=F, suppress the message. -# -if(alpha!=.05)stop('For alpha other than .05, use mscorpb or mscorpbMC') -m<-elimna(m) -p<-ncol(m) -pm<-p-1 -n<-nrow(m) -if(p<2)stop("Something wrong; number of variables is < 2") -if(pw && cop==1){ -print("If execution time is too high,") -print("use cop=2 or 4 rather than 1") -} -if(ap){ -inter<-c(2.374,2.780,3.030,3.208,3.372,3.502,3.722,3.825,3.943) -slope<-c(5.333,8.8,25.67,32.83,51.53,75.02,111.34,123.16,126.72) -expo<-c(-1,-1,-1.2,-1.2,-1.3,-1.4,-1.5,-1.5,-1.5) -if(p>10){ -qvec<-NA -for(i in 1:9)qvec[i]<-inter[i]+slope[i]*n^expo[i] -pval<-c(2:10) -temp<-lsfit(pval,qvec)$coef -} -} -if(!ap){ -inter<-c(2.374,2.54,2.666,2.92,2.999,3.097,3.414,3.286,3.258) -slope<-c(5.333,8.811,14.89,20.59,51.01,52.15,58.498,64.934,59.127) -expo<-c(-1,-1,-1.2,-1.2,-1.5,-1.5,-1.5,-1.5,-1.5) -if(p>10){ -qvec<-NA -for(i in 1:9)qvec[i]<-inter[i]+slope[i]*n^expo[i] -pval<-c(1:9) -temp<-lsfit(pval,qvec)$coef -} -} -if(p<=10)crit<-inter[pm]+slope[pm]*n^expo[pm] -if(p>10)crit<-temp[2]*p+temp[1] -if(cop!=1 && is.na(gval))gval<-sqrt(qchisq(.975,ncol(m))) -temp<-outfun(m,plotit=FALSE,MM=MM,gval=gval,cop=cop,STAND=STAND)$keep -mcor<-corfun(m[temp,])$cor -test<-abs(mcor*sqrt((nrow(m)-2)/(1-mcor^2))) -diag(test) <- NA -if(!ap){ -test<-as.matrix(test[1,]) -} -list(cor=mcor,crit.val=crit,test.stat=test) -} - -dfried<-function(m,plotit=TRUE,pop=0,fr=.8,v2=FALSE,op=FALSE){ -# -# Compare dependent groups using halfspace depth of -# 0 relative to distribution of differences. -# -# When plotting differences scores: -# pop=1 Plot expected frequency curve -# pop=2 kernel density estimate -# pop=3 S+ kernel density estimate -# pop=4 boxplot -# -if(is.list(m))m<-matl(m) -if(!is.matrix(m))stop("m should be a matrix having at least 2 columns.") -m<-elimna(m) -library(MASS) -K<-ncol(m) -n<-nrow(m) -if(n<=10 && !op)print("With n<=10, might want to use op=T") -J<-(K^2-K)/2 -dcen<-cov.mcd(m)$center -center<-NA -pval<-matrix(NA,ncol=J,nrow=nrow(m)) -zvec<-rep(0,J) -ic<-0 -for(k in 1:K){ -for(kk in 1:K){ -if(k1)temp<-fdepth(pval0,center=center) -} -if(v2){ -if(ncol(pval)>1)temp<-fdepthv2(pval0) -} -big.dep<-max(temp) -if(op){ -v3<-dmean(pval,tr=.5,dop=2) -v3<-t(as.matrix(v3)) -big.dep<-max(max(temp),fdepthv2(pval0,v3)) -} -phat<-temp[nrow(m)+1]/big.dep -# Determine critical value -if(K==2)crit<-0.95-1.46/n^.5 -if(K==3)crit<-1.00-1.71/n^.5 -if(K==4)crit<-1.06-1.77/n^.5 -if(K==5)crit<-1.11-1.76/n^.5 -if(K==6)crit<-1.41-1.62/n^.3 -if(K==7)crit<-1.49-1.71/n^.3 -if(K>=8)crit<-1.39-1.38/n^.3 -crit<-min(c(crit,1)) -if(plotit && ncol(pval)==1){ -if(pop==0)akerd(pval,fr=fr) -if(pop==1)rdplot(pval,fr=fr) -if(pop==2)kdplot(pval) -if(pop==3)skerd(pval) -if(pop==4)boxplot(pval) -} -list(phat=phat,crit.val=crit) -} - -wrregfun<-function(slope,x=x,y=y){ -x<-as.matrix(x) -res<-y-x%*%slope -v1<-rank(res) -v2<-sqrt(12)*(v1/(length(y)+1)-.5) -wrregfun<-sum(v2*res) -wrregfun -} - -spat.sub<-function(x,theta){ -xx<-x -for(i in 1:ncol(x))xx[,i]<-x[,i]-theta[i] -xx<-xx^2 -temp<-sqrt(apply(xx,1,sum)) -val<-mean(temp) -val -} -spat<-function(x){ -# -# compute spatial median -# x is an n by p matrix -# -if(!is.matrix(x))stop("x must be a matrix") -x<-elimna(x) -START<-apply(x,2,median) -val=optim(START,spat.sub,x=x,method='BFGS')$par -val -} - -rungen<-function(x,y,est=onestep,fr=1,plotit=TRUE,scat=TRUE,pyhat=FALSE, -eout=FALSE,xout=FALSE,xlab="x",ylab="y",outfun=out,LP=TRUE,pch='.',...){ -# -# running interval smoother that can be used with any measure -# of location or scale. By default, an M-estimator is used. -# -# LP=TRUE, the plot is further smoothed via lows -# -# fr controls amount of smoothing -plotit<-as.logical(plotit) -scat<-as.logical(scat) -m<-cbind(x,y) -m<-elimna(m) -if(eout && xout)stop("Not allowed to have eout=xout=T") -if(eout){ -flag<-outfun(m,plotit=FALSE)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(m[,1])$keep -m<-m[flag,] -} -x=m[,1] -y=m[,2] -rmd<-c(1:length(x)) -for(i in 1:length(x))rmd[i]<-est(y[near(x,x[i],fr)],...) -if(LP){ -ord=order(x) -x=x[ord] -rmd=rmd[ord] -y=y[ord] -rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE,STR=FALSE)$yhat -} -if(plotit){ -if(scat){ -plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type="n") -points(x,y,pch=pch) -} -if(!scat)plot(c(x,x),c(y,rmd),type="n",ylab=ylab,xlab=xlab) -points(x,rmd,type="n") -sx<-sort(x) -xorder<-order(x) -sysm<-rmd[xorder] -lines(sx,sysm) -} -if(pyhat)output<-rmd -if(!pyhat)output<-"Done" -list(output=output) -} - - -#adpchk<-function(x,y,adfun=adrun,gfun=runm3d,xlab="First Fit", -adpchk<-function(x,y,adfun=adrun,gfun=rplot,xlab="First Fit", -ylab="Second Fit",...){ -# -# Compare adfun, usually an additive fit, to fit -# based on gfun. -# -fit1<-adfun(x,y,pyhat=TRUE,plotit=FALSE) -if(is.list(fit1))fit1=fit1$yhat -fit2<-gfun(x,y,pyhat=TRUE,plotit=FALSE)$yhat -if(is.list(fit2))fit2=fit2$yhat -plot(fit1,fit2,xlab=xlab,ylab=ylab) -abline(0,1) -} - - - -riplot<-function(x,y,adfun=adrun,plotfun=lplot,eout=FALSE,xout=TRUE,scale=FALSE){ -# -# Plot used to investigate regression interaction -# (the extent a generalized additive model does not fit data). -# Compute additive fit, plot residuals -# versus x, an n by 2 matrix. -# -if(!is.matrix(x))stop(" x must be a matrix") -if(ncol(x)!=2)stop(" x must have two columns only") -yhat<-adfun(x,y,pyhat=TRUE,eout=eout,xout=xout,plotit=FALSE) -plotfun(x,y-yhat,eout=eout,xout=xout,scale=scale) -} - -adtestv2<-function(x,y,est=tmean,nboot=500,alpha=.05,fr=NA,xout=TRUE,outfun=outpro,com.pval=FALSE,SEED=TRUE,qval=.5,...){ -# -# For two predictors, test the hypothesis that the regression model is additive. That is, there is no interaction. -# In essence, for the model Y=g_1(X_1)+g_2(X_2)+g_3(X_1X_2), test H_0: g_3(X_1X_2)=0 -# -# The method fits an additive model using running interval smoother and the backfitting -# algorithm and then tests the hypothesis that the median of X_1X_2, given the residuals, -# is a straight horizontal line. -# -if(ncol(x)!=2)stop("There should be two predictors") -temp<-cbind(x,y) -p<-ncol(x) -p1<-p+1 -temp<-elimna(temp) -x<-temp[,1:p] -x<-as.matrix(x) -y<-temp[,p1] -if(xout){ -keepit<-rep(TRUE,nrow(x)) -flag<-outfun(x,plotit=FALSE,...)$out.id -keepit[flag]<-FALSE -x<-x[keepit,] -y<-y[keepit] -} -if(alpha<.05 && nboot<=100)warning("You used alpha<.05 and nboot<=100") -if(is.na(fr)){ -fr<-.8 -if(ncol(x)==2){ -nval<-c(20,30,50,80,100,200,300,400) -fval<-c(0.40,0.36,0.3,0.25,0.23,.12,.08,.015) -if(length(y)<=400)fr<-approx(nval,fval,length(y))$y -if(length(y)>400)fr<-.01 -} -} -if(SEED)set.seed(2) -x<-as.matrix(x) -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -yhat<-adrun(x,y,est=est,plotit=FALSE,fr=fr,pyhat=TRUE) -regres<-y-yhat -test2=medind(regres,x[,1]*x[,2],qval=qval,nboot=nboot,com.pval=com.pval,SEED=SEED,alpha=alpha, -pr=TRUE,xout=xout,outfun=outfun,...) -test2 -} - - -adtests1<-function(vstar,yhat,res,mflag,x,fr){ -ystar<-yhat+res*vstar -bres<-adrun(x,ystar,fr=fr,pyhat=TRUE,plotit=FALSE) -bres<-ystar-bres -rval<-0 -for (i in 1:nrow(x)){ -rval[i]<-sum(bres[mflag[,i]]) -} -rval -} -runsm2g<-function(x1,y1,x2,val=median(x2),est=tmean,sm=FALSE,fr=.8,xlab="X", -ylab="Y",...){ -# -# Plot of running interval smoother for two groups -# Groups are defined according to whether x2=1. -# -if(!is.matrix(x))stop("Predictors are not stored in a matrix.") -if(!is.matrix(pts))stop("The third argument, pts, must be a matrix.") -library(MASS) -if(DET)m=DETMCD(x) -else m<-cov.mve(x) -rmd<-1 # Initialize rmd -nval<-1 -for(i in 1:nrow(pts)){ -rmd[i]<-est(y[near3d(x,pts[i,],fr,m)],...) -nval[i]<-length(y[near3d(x,pts[i,],fr,m)]) -} -list(rmd=rmd,nval=nval) -} - - -lta.sub<-function(X,theta,h){ -np<-ncol(X) -p<-np-1 -x<-X[,1:p] -y<-X[,np] -temp<-t(t(x)*theta[2:np]) -yhat<-apply(temp,1,sum)+theta[1] -res<-abs(y-yhat) -res<-sort(res) -val<-sum(res[1:h]) -val -} - ltareg<-function(x, y, tr = 0.2, h = NA,op=2) -{ - # - # Compute the least trimmed absolute value regression estimator. - # The default amount of trimming is .2 -# op=1, use ltsreg as initial estimate -# op!=1, use tsreg -# -# If h is specfied, use h smallest residuals, and ignore tr -# -x<-as.matrix(x) -library(MASS) -if(is.na(h)) h <- length(y) - floor(tr * length(y)) -X<-cbind(x,y) -X<-elimna(X) -np<-ncol(X) -p<-np-1 -x<-X[,1:p] -x<-as.matrix(x) -y<-X[,np] -if(op==1)temp<-ltsreg(x,y)$coef -if(op!=1)temp<-tsreg(x,y)$coef -START<-temp -coef<-nelderv2(X,np,FN=lta.sub,START=START,h=h) - res <- y - x%*%coef[2:np] - coef[1] - list(coef = coef, residuals = res) -} - - - -nelderv2<-function(x,N,FN,START=c(rep(1,N)),STEP=c(rep(1,N)),REQMIN=.0001, -XMIN=c(rep(0,N)),XSEC=c(rep(0,N)),...){ -# NELDER-MEAD method for minimzing a function -# -# TAKEN FROM OLSSON, J QUALITY TECHNOLOGY, 1974, 6, 56. -# -# x= n by p matrix containing data; it is used by -# function to be minimized. -# N= number of parameters -# -# FN=the function to be minimized -# FORM: FN(x,theta), theta is vector containing -# values for N parameters. -# -# START = starting values. -# STEP=initial step. -# This function returns the N values for theta that minimize FN -# - ICOUNT<-500 - NN<-N+1 - P<-matrix(NA,nrow=N,ncol=NN) - P[,NN]<-START - PBAR<-NA - RCOEFF<-1 - ECOEFF<-2 - CCOEFF<-.5 - KCOUNT<-ICOUNT - ICOUNT<-0 - DABIT<-2.04067e-35 - BIGNUM<-1.e38 - KONVGE<-5 - XN<-N - DN<-N - Y<-rep(0,NN) - Y[NN]<-FN(x,START,...) - ICOUNT<-ICOUNT+1 - for(J in 1:N){ - DCHK<-START[J] - START[J]<-DCHK+STEP[J] - for(I in 1:N){ - P[I,J]<-START[I] -} - Y[J]<-FN(x,START,...) - ICOUNT<-ICOUNT+1 - START[J]<-DCHK -} - I1000<-TRUE - while(I1000){ - YLO<-Y[1] - YNEWLO<-YLO - ILO<-1 - IHI<-1 - for(I in 2:NN){ - if(Y[I] < YLO){ - YLO<-Y[I] - ILO<-I} - if(Y[I] > YNEWLO){ - YNEWLO<-Y[I] - IHI<-I} -} - DCHK<-(YNEWLO+DABIT)/(YLO+DABIT)-1 - if(abs(DCHK) < REQMIN){ - I1000<-FALSE - next -} - KONVGE<-KONVGE-1 - if(KONVGE == 0){ - KONVGE<-5 - for(I in 1:N){ - COORD1<-P[I,1] - COORD2<-COORD1 - for(J in 2:NN){ - if(P[I,J] < COORD1)COORD1<-P[I,J] - if(P[I,J] > COORD2)COORD2<-P[I,J] -} # 2010 CONTINUE - DCHK<-(COORD2+DABIT)/(COORD1+DABIT)-1 - if(abs(DCHK) > REQMIN)break -} -} - if(ICOUNT >= KCOUNT){ - I1000<-F - next -} - for(I in 1:N){ - Z<-0.0 - Z<-sum(P[I,1:NN]) # 6 - Z<-Z-P[I,IHI] - PBAR[I]<-Z/DN -} - PSTAR<-(1.+RCOEFF)*PBAR-RCOEFF*P[,IHI] - YSTAR<-FN(x,PSTAR,...) - ICOUNT<-ICOUNT+1 - if(YSTAR < YLO && ICOUNT >= KCOUNT){ - P[,IHI]<-PSTAR - Y[IHI]<-YSTAR - next -} - IFLAG<-TRUE - if(YSTAR < YLO){ - P2STAR<-ECOEFF*PSTAR+(1-ECOEFF)*PBAR - Y2STAR<-FN(x,P2STAR,...) - ICOUNT<-ICOUNT+1 - if(Y2STAR >= YSTAR){ - P[,IHI]<-PSTAR - Y[IHI]<-YSTAR - next #In essence, go to 19 which goes to 1000 -} - IFLAG<-TRUE - while(YSTAR < Y[IHI]){ - P[,IHI]<-P2STAR - Y[IHI]<-Y2STAR - IFLAG<-FALSE - break - L<-sum(Y[1:NN] > YSTAR) - if(L > 1){ - P[,IHI]<-PSTAR - Y[IHI]<-YSTAR - IFLAG<-TRUE - break -} - if(L > 1)break # go to 19 - if(L != 0){ - P[1:N,IHI]<-PSTAR[1:N] - Y[IHI]<-YSTAR -} -I1000<-FALSE -break - if(ICOUNT >= KCOUNT){ - I1000<-FALSE - next -} - P2STAR[1:N]<-CCOEFF*P[1:N,IHI]+(1-CCOEFF)*PBAR[1:N] - Y2STAR<-FN(x,P2STAR,...) - ICOUNT<-ICOUNT+1 -} # END WHILE -} -if(IFLAG){ -for(J in 1:NN){ -P[,J]=(P[,J]+P[,ILO])*.5 - XMIN<-P[,J] - Y[J]<-FN(x,XMIN,...) -} - ICOUNT<-ICOUNT+NN - if(ICOUNT < KCOUNT)next - I1000<-F -next -} - P[1:N,IHI]<-PSTAR[1:N] - Y[IHI]<-YSTAR -} - for(J in 1:NN){ - XMIN[1:N]<-P[1:N,J] -} - Y[J]<-FN(x,XMIN,...) - YNEWLO<-BIGNUM - for(J in 1:NN){ - if (Y[J] < YNEWLO){ - YNEWLO<-Y[J] - IBEST<-J -}} - Y[IBEST]<-BIGNUM - YSEC<-BIGNUM -for(J in 1:NN){ -if(Y[J] < YSEC){ - YSEC<-Y[J] - ISEC<-J -}} - XMIN[1:N]<-P[1:N,IBEST] - XSEC[1:N]<-P[1:N,ISEC] -XMIN -} - - - - -nelder<-function(x,N,FN,START=c(rep(1,N)),STEP=c(rep(1,N)), -XMIN=c(rep(0,N)),XSEC=c(rep(0,N))){ -# NELDER-MEAD method for minimzing a function -# -# TAKEN FROM OLSSON, J QUALITY TECHNOLOGY, 1974, 6, 56. -# -# x= n by p matrix containing data; it is used by -# function to be minimized. -# N= number of parameters -# -# FN=the function to be minimized -# FORM: FN(x,theta), theta is vector containing -# values for N parameters. -# -# START = starting values. -# STEP=initial step. -# This function returns the N values for theta that minimize FN -# - ICOUNT<-500 - REQMIN<-.0000001 - NN<-N+1 - P<-matrix(NA,nrow=N,ncol=NN) - P[,NN]<-START - PBAR<-NA - RCOEFF<-1 - ECOEFF<-2 - CCOEFF<-.5 - KCOUNT<-ICOUNT - ICOUNT<-0 - DABIT<-2.04067e-35 - BIGNUM<-1.e38 - KONVGE<-5 - XN<-N - DN<-N - Y<-rep(0,NN) - Y[NN]<-FN(x,START) - ICOUNT<-ICOUNT+1 - for(J in 1:N){ - DCHK<-START[J] - START[J]<-DCHK+STEP[J] - for(I in 1:N){ - P[I,J]<-START[I] -} - Y[J]<-FN(x,START) - ICOUNT<-ICOUNT+1 - START[J]<-DCHK -} - I1000<-T - while(I1000){ - YLO<-Y[1] - YNEWLO<-YLO - ILO<-1 - IHI<-1 - for(I in 2:NN){ - if(Y[I] < YLO){ - YLO<-Y[I] - ILO<-I} - if(Y[I] > YNEWLO){ - YNEWLO<-Y[I] - IHI<-I} -} - DCHK<-(YNEWLO+DABIT)/(YLO+DABIT)-1 - if(abs(DCHK) < REQMIN){ - I1000<-F - next -} - KONVGE<-KONVGE-1 - if(KONVGE == 0){ - KONVGE<-5 - for(I in 1:N){ - COORD1<-P[I,1] - COORD2<-COORD1 - for(J in 2:NN){ - if(P[I,J] < COORD1)COORD1<-P[I,J] - if(P[I,J] > COORD2)COORD2<-P[I,J] -} # 2010 CONTINUE - DCHK<-(COORD2+DABIT)/(COORD1+DABIT)-1 - if(abs(DCHK) > REQMIN)break -} -} - if(ICOUNT >= KCOUNT){ - I1000<-F - next -} - for(I in 1:N){ - Z<-0.0 - Z<-sum(P[I,1:NN]) # 6 - Z<-Z-P[I,IHI] - PBAR[I]<-Z/DN -} - PSTAR<-(1.+RCOEFF)*PBAR-RCOEFF*P[,IHI] - YSTAR<-FN(x,PSTAR) - ICOUNT<-ICOUNT+1 - if(YSTAR < YLO && ICOUNT >= KCOUNT){ - P[,IHI]<-PSTAR - Y[IHI]<-YSTAR - next -} - IFLAG<-T - if(YSTAR < YLO){ - P2STAR<-ECOEFF*PSTAR+(1-ECOEFF)*PBAR - Y2STAR<-FN(x,P2STAR) - ICOUNT<-ICOUNT+1 - if(Y2STAR >= YSTAR){ - P[,IHI]<-PSTAR - Y[IHI]<-YSTAR - next #In essence, go to 19 which goes to 1000 -} - IFLAG<-T - while(YSTAR < Y[IHI]){ - P[,IHI]<-P2STAR - Y[IHI]<-Y2STAR - IFLAG<-F - break - L<-sum(Y[1:NN] > YSTAR) - if(L > 1){ - P[,IHI]<-PSTAR - Y[IHI]<-YSTAR - IFLAG<-T - break -} - if(L > 1)break # go to 19 - if(L != 0){ - P[1:N,IHI]<-PSTAR[1:N] - Y[IHI]<-YSTAR -} -I1000<-F -break - if(ICOUNT >= KCOUNT){ - I1000<-F - next -} - P2STAR[1:N]<-CCOEFF*P[1:N,IHI]+(1-CCOEFF)*PBAR[1:N] - Y2STAR<-FN(x,P2STAR) - ICOUNT<-ICOUNT+1 -} # END WHILE -} -if(IFLAG){ -for(J in 1:NN){ -P[,J]<-(P[,J]+P[,ILO])*.5 - XMIN<-P[,J] - Y[J]<-FN(x,XMIN) -} - ICOUNT<-ICOUNT+NN - if(ICOUNT < KCOUNT)next - I1000<-F -next -} - P[1:N,IHI]<-PSTAR[1:N] - Y[IHI]<-YSTAR -} - for(J in 1:NN){ - XMIN[1:N]<-P[1:N,J] -} - Y[J]<-FN(x,XMIN) - YNEWLO<-BIGNUM - for(J in 1:NN){ - if (Y[J] < YNEWLO){ - YNEWLO<-Y[J] - IBEST<-J -}} - Y[IBEST]<-BIGNUM - YSEC<-BIGNUM -for(J in 1:NN){ -if(Y[J] < YSEC){ - YSEC<-Y[J] - ISEC<-J -}} - XMIN[1:N]<-P[1:N,IBEST] - XSEC[1:N]<-P[1:N,ISEC] -XMIN -} - - -stein1.tr<-function(x,del,alpha=.05,pow=.8,tr=.2){ -# -# Extension of Stein's method when performing all pairwise -# comparisons among J dependent groups. -# -# If x represents a single group, one-sample analysis is performed. -# -if(tr < 0 || tr >=.5)stop("Argument tr must be between 0 and .5") -if(is.matrix(x))m<-x -if(is.list(x))m<-matl(x) -if(!is.matrix(x) && !is.list(x))m<-matrix(x,ncol=1) -m<-elimna(m) -m<-as.matrix(m) -ntest<-1 -n<-nrow(m) -J<-ncol(m) -if(ncol(m) > 1)ntest<-(J^2-J)/2 -g<-floor(tr*nrow(m)) -df<-n-2*g-1 -t1<-qt(pow,df) -t2<-qt(alpha/(2*ntest),df) -dv<-(del/(t1-t2))^2 -nvec<-NA -if(ntest > 1){ -ic<-0 -for (j in 1:ncol(m)){ -for (jj in 1:ncol(m)){ -if(j=.5)stop("Argument tr must be between 0 and .5") -if(is.matrix(x))m<-x -if(is.list(x))m<-matl(x) -if(is.list(y))y<-matl(y) -if(!is.matrix(x) && !is.list(x))m<-matrix(x,ncol=1) -if(!is.matrix(y) && !is.list(y))y<-matrix(y,ncol=1) -m<-elimna(m) -m<-as.matrix(m) -g<-floor(tr*nrow(m)) -df<-nrow(m)-2*g-1 -m<-rbind(m,y) -ic<-0 -ntest<-(ncol(m)^2-ncol(m))/2 -if(ntest==0)ntest<-1 -test<-matrix(NA,ncol=3,nrow=ntest) -for (j in 1:ncol(m)){ -for (jj in 1:ncol(m)){ -if(j nmin -# atr is amount of trimming when averaging over the bagged -# values -# est is the measure of location to be estimated -# est=tmean means estimate 20% trimmed mean of y given x -# -if(SEED)set.seed(2) -temp<-cbind(x,y) -if(ncol(temp)>2)stop("Use run3bo with more than 1 predictor") -temp<-elimna(temp) # Eliminate any rows with missing values -if(eout && xout)stop("Not allowed to have eout=xout=T") -if(eout){ -flag<-outfun(temp,plotit=FALSE)$keep -temp<-temp[flag,] -} -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -temp<-temp[flag,] -} -x<-temp[,1] -y<-temp[,2] -pts<-as.matrix(pts) -mat<-matrix(NA,nrow=nboot,ncol=nrow(pts)) -vals<-NA -for(it in 1:nboot){ -idat<-sample(c(1:length(y)),replace=TRUE) -xx<-temp[idat,1] -yy<-temp[idat,2] -mat[it,]<-runhat(xx,yy,pts=pts,est=est,fr=fr,...) -} -rmd<-apply(mat,2,mean,na.rm=RNA,tr=atr) -if(plotit){ -if(scat){ -plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type="n") -points(x,y,pch=pch) -} -if(!scat)plot(c(x,x),c(y,rmd),type="n",xlab=xlab,ylab=ylab) -points(x, rmd, type = "n") -sx <- sort(x) -xorder <- order(x) -sysm <- rmd[xorder] -lines(sx, sysm) -} -output="Done" -if(pyhat)output<-rmd -output -} - - -run3bo<-function(x,y,fr=1,est=tmean,theta = 50, phi = 25,nmin=0, -pyhat=FALSE,eout=FALSE,outfun=out,plotit=TRUE,xout=FALSE,nboot=40,SEED=TRUE,STAND=TRUE, -expand=.5,scale=FALSE,xlab="X",ylab="Y",zlab="",ticktype="simple",...){ -# -# running mean using interval method -# -# fr controls amount of smoothing -# tr is the amount of trimming -# -# Missing values are automatically removed. -# -library(MASS) -library(akima) -if(SEED)set.seed(2) -temp<-cbind(x,y) -x<-as.matrix(x) -p<-ncol(x) -p1<-p+1 -if(p>2)plotit<-FALSE -temp<-elimna(temp) # Eliminate any rows with missing values. -x<-temp[,1:p] -x<-as.matrix(x) -y<-temp[,p1] -if(xout){ -keepit<-rep(TRUE,nrow(x)) -flag<-outfun(x,plotit=FALSE,STAND=STAND,...)$out.id -keepit[flag]<-FALSE -x<-x[keepit,] -y<-y[keepit] -} -mat<-matrix(NA,nrow=nboot,ncol=length(y)) -vals<-NA -for(it in 1:nboot){ -idat<-sample(c(1:length(y)),replace=TRUE) -xx<-temp[idat,1:p] -yy<-temp[idat,p1] -tmy<-rung3hat(xx,yy,pts=x,est=est,fr=fr,...)$rmd -mat[it,]<-tmy -} -rmd<-apply(mat,2,mean,na.rm=TRUE) -flag<-!is.na(rmd) -rmd<-elimna(rmd) -x<-x[flag,] -y<-y[flag] -nval<-NA -m<-cov.mve(x) -for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)]) -if(plotit && ncol(x)==2){ -#if(ncol(x)!=2)stop("When plotting, x must be an n by 2 matrix") -fitr<-rmd[nval>nmin] -y<-y[nval>nmin] -x<-x[nval>nmin,] -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype) -} -last<-"Done" -if(pyhat)last<-rmd -list(output=last) -} - - -ancom<-function(x1,y1,x2,y2,dchk=FALSE,plotit=TRUE,plotfun=rplot,nboot=500, -alpha=.05,SEED=TRUE,PARTEST=FALSE,tr=0,...){ -# -# Omnibus ANCOVA -# tr=0 is recommended for general use. tr>0 might result in -# poor control over the probability of a Type I error. -# PARTEST=T will test the hypothesis of parallel regression lines. -# -# Setting plotfun=rplotsm will smooth the plots via bagging -# -# dchk=T, points in design space with a halfspace of zero are eliminated -# -# PARTEST=F tests hypothesis that regression surface is a horizontal -# plane through the origin -# PARTEST=T tests the hypothesis that the two regression surfaces -# are parallel. -# -flag1<-rep(TRUE,length(y1)) -flag2<-rep(TRUE,length(y2)) -if(dchk){ -dep1<-fdepth(x2,x1) # depth of points in x1 relative to x2 -dep2<-fdepth(x1,x2) -flag1<-(dep1>0) -flag2<-(dep2>0) -} -n1<-sum(flag1) -n2<-sum(flag2) -n<-n1+n2 -y<-c(n2*y1[flag1]/n,0-n1*y2[flag2]/n) -x1<-as.matrix(x1) -x1<-x1[flag1,] -x2<-as.matrix(x2) -x2<-x2[flag2,] -x1<-as.matrix(x1) -x2<-as.matrix(x2) -x<-rbind(x1,x2) -if(plotit){ -if(ncol(x)<=2)plotfun(x,y,...) -} -if(PARTEST)output<-indt(x,y,nboot=nboot,SEED=SEED) -if(!PARTEST)output<-indt0(x,y,nboot=nboot,alpha=alpha,SEED=SEED) -list(dstat=output$dstat,critd=output$critd) -} -indt0<-function(x,y,nboot=500,alpha=.05,flag=1,SEED=TRUE){ -# -# Test the hypothesis that the regression plane -# between x and y is a flat horizontal plane with intercept 0 -# The method is based on results in -# Stute et al. (1998, JASA, 93, 141-149). -# -# flag=1 gives Kolmogorov-Smirnov test statistic -# flag=2 gives the Cramer-von Mises test statistic -# flag=3 causes both test statistics to be reported. -# -if(SEED)set.seed(2) -x<-as.matrix(x) -# First, eliminate any rows of data with missing values. -temp <- cbind(x, y) - temp <- elimna(temp) - pval<-ncol(temp)-1 - x <- temp[,1:pval] - y <- temp[, pval+1] -x<-as.matrix(x) -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -# ith row of mflag indicates which rows of the matrix x are less -# than or equal to ith row of x -# -yhat<-0 -res<-y-yhat -print("Taking bootstrap sample, please wait.") -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-(data-.5)*sqrt(12) # standardize the random numbers. -rvalb<-apply(data,1,indt0sub,yhat,res,mflag,x,tr) -# An n x nboot matrix of R values -rvalb<-rvalb/sqrt(length(y)) -dstatb<-apply(abs(rvalb),2,max) -wstatb<-apply(rvalb^2,2,mean) -mstatb<-apply(abs(rvalb),2,median) -dstatb<-sort(dstatb) -wstatb<-sort(wstatb) -mstatb<-sort(mstatb) -# compute test statistic -v<-c(rep(1,length(y))) -rval<-indt0sub(v,yhat,res,mflag,x,tr) -rval<-rval/sqrt(length(y)) -dstat<-NA -wstat<-NA -critd<-NA -critw<-NA -ib<-round(nboot*(1-alpha)) -if(flag==1 || flag==3){ -dstat<-max(abs(rval)) -critd<-dstatb[ib] -} -if(flag==2 || flag==3){ -wstat<-mean(rval^2) -critw<-wstatb[ib] -} -list(dstat=dstat,wstat=wstat,critd=critd,critw=critw) -} - - -indt0sub<-function(vstar,yhat,res,mflag,x,tr){ -bres<-res*vstar -rval<-0 -for (i in 1:nrow(x)){ -rval[i]<-sum(bres[mflag[,i]]) -} -rval -} - -smeancr<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE,FAST=FALSE, -nboot=500,plotit=TRUE,xlab="VAR 1",ylab="VAR 2",STAND=TRUE){ -# -# m is an n by p matrix -# -# Test hypothesis that multivariate skipped estimators -# are all equal to the null value, which defaults to zero. -# The level of the test is .05. -# -# Eliminate outliers using a projection method -# That is, determine center of data using: -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 MVE -# -# For each point -# consider the line between it and the center -# project all points onto this line, and -# check for outliers using -# -# MM=F, a boxplot rule. -# MM=T, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# Eliminate any outliers and compute means -# using remaining data. -# -if(SEED)set.seed(2) -#if(!is.na(SEED))set.seed(SEED) -m<-elimna(m) -n<-nrow(m) -crit.level<-.05 -if(n<=120)crit.level<-.045 -if(n<=80)crit.level<-.04 -if(n<=60)crit.level<-.035 -if(n<=40)crit.level<-.03 -if(n<=30)crit.level<-.025 -if(n<=20)crit.level<-.02 -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -val<-matrix(NA,ncol=ncol(m),nrow=nboot) -for(j in 1: nboot){ -mm<-m[data[j,],] -if(FAST)temp<-outpro.depth(mm,plotit=FALSE,SEED=FALSE)$keep -if(!FAST)temp<-outpro(mm,plotit=FALSE,cop=cop,STAND=STAND)$keep -val[j,]<-apply(mm[temp,],2,mean) -} -temp<-pdis(rbind(val,nullv)) -sig.level<-sum(temp[nboot+1]1){ -if(ncol(x)==2 && !scale){ -if(pr){ -print("scale=F is specified.") -print("If there is dependence, use scale=T") -}} -if(ncol(x)>2)plotit<-F -val<-run3bo(x,y,est=est,fr=fr,nmin=nmin,plotit=plotit,pyhat=TRUE,phi=phi, -theta=theta,xlab=xlab,ylab=ylab,ticktype=ticktype,STAND=STAND, -SEED=SEED,expand=expand,scale=scale,nboot=nboot,...) -val<-val$output -} -E.power<-varfun(val[!is.na(val)])/varfun(y) -if(!pyhat)val <- NULL -E.power=as.numeric(E.power) -list(Strength.Assoc=sqrt(E.power),Explanatory.Power = E.power, yhat = val) -} - -zdepth<-function(m,pts=m,zloc=median,zscale=mad){ -# -# Compute depth of points as in Zuo, Annals, 2003 -# -if(!is.matrix(m))stop("argument m should be a matrix") -if(!is.matrix(pts))stop("argument pts should be a matrix") -if(ncol(m)!=ncol(pts))stop("Number of columns for m and pts are not equal") -np<-ncol(m) -val<-NA -for(i in 1:nrow(pts)){ -pval<-pts[i,] -START<-rep(1,np)/sqrt(np) -temp<-nelderv2(m,np,FN=zdepth.sub,START=START,zloc=zloc,zscale=zscale,pts=pval) -temp<-temp/sqrt(sum(temp^2)) -y<-t(t(m)*temp) -y<-apply(y,1,sum) -ppro<-sum(pval*temp) -val[i]<-abs(ppro-zloc(y))/zscale(y) -} -val -} - -zdepth.sub<-function(x,theta,zloc=median,zscale=mad,pts=NA){ -theta<-theta/sqrt(sum(theta^2)) -temp<-t(t(x)*theta) -ppro<-sum(t(t(pts)*theta)) -yhat<-apply(temp,1,sum) -val<-0-abs(ppro-zloc(yhat))/zscale(yhat) -val -} - -zdist=zdepth - -opregpb<-function(x,y,nboot=1000,alpha=.05,om=TRUE,ADJ=TRUE,SEED=TRUE, -nullvec=rep(0,ncol(x)+1),plotit=TRUE,opdis=2,gval=sqrt(qchisq(.95,ncol(x)+1))){ -# -# generate bootstrap estimates -# use projection-type outlier detection method followed by -# TS regression. -# -# om=T and ncol(x)>1, means an omnibus test is performed, -# otherwise only individual tests of parameters are performed. -# -# opdis=2, means that Mahalanobis distance is used -# opdis=1, means projection-type distance is used -# -# gval is critical value for projection-type outlier detection -# method -# -# ADJ=T, Adjust p-values as described in Section 11.1.5 of the text. -# -if(SEED)set.seed(2) -x<-as.matrix(x) -m<-cbind(x,y) -p1<-ncol(x)+1 -m<-elimna(m) # eliminate any rows with missing data -x<-m[,1:ncol(x)] -x<-as.matrix(x) -y<-m[,p1] -if(nrow(x)!=length(y))stop("Sample size of x differs from sample size of y") -if(!is.matrix(x))stop("Data should be stored in a matrix") -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,regboot,x,y,regfun=opreg) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -# using Hochberg method -bvec<-t(bvec) -dvec<-alpha/(c(1:ncol(x))) -test<-NA -icl0<-round(alpha*nboot/2) -icl<-round(alpha*nboot/(2*ncol(x))) -icu0<-nboot-icl0 -icu<-nboot-icl -output<-matrix(0,p1,6) -vlabs="Intercept" -for(j in 2:p1)vlabs[j]=paste("Slope",j-1) -dimnames(output)<-list(vlabs,c("Param.","p.value","p.crit", -"ci.lower","ci.upper","s.e.")) -pval<-NA -for(i in 1:p1){ -output[i,1]<-i-1 -se.val<-var(bvec[,i]) -temp<-sort(bvec[,i]) -output[i,6]<-sqrt(se.val) -if(i==1){ -output[i,4]<-temp[icl0+1] -output[i,5]<-temp[icu0] -} -if(i>1){ -output[i,4]<-temp[icl+1] -output[i,5]<-temp[icu] -} -pval[i]<-sum((temp>nullvec[i]))/length(temp) -if(pval[i]>.5)pval[i]<-1-pval[i] -} -fac<-2 -if(ADJ){ -# Adjust p-value if n<60 -nval<-length(y) -if(nval<20)nval<-20 -if(nval>60)nval<-60 -fac<-2-(60-nval)/40 -} -pval[1]<-2*pval[1] -pval[2:p1]<-fac*pval[2:p1] -output[,2]<-pval -temp2<-order(0-pval[2:p1]) -zvec<-dvec[1:ncol(x)] -sigvec<-(test[temp2]>=zvec) -output[temp2+1,3]<-zvec -output[1,3]<-NA -output[,2]<-pval -om.pval<-NA -temp<-opreg(x,y)$coef -if(om && ncol(x)>1){ -temp2<-rbind(bvec[,2:p1],nullvec[2:p1]) -if(opdis==1)dis<-pdis(temp2,center=temp[2:p1]) -if(opdis==2){ -cmat<-var(bvec[,2:p1]-apply(bvec[,2:p1],2,mean)+temp[2:p1]) -dis<-mahalanobis(temp2,temp[2:p1],cmat) -} -om.pval<-sum((dis[nboot+1]<=dis[1:nboot]))/nboot -} -# do adjusted p-value -nval<-length(y) -if(nval<20)nval<-20 -if(nval>60)nval<-60 -adj.pval<-om.pval/2+(om.pval-om.pval/2)*(nval-20)/40 -if(ncol(x)==2 && plotit){ -plot(bvec[,2],bvec[,3],xlab="Slope 1",ylab="Slope 2") -temp.dis<-order(dis[1:nboot]) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],2:3] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -list(output=output,om.pval=om.pval,adj.om.pval=adj.pval) -} - - -kslope<-function(x,y,pyhat=FALSE,pts=x){ -# -# Estimate slope at points in pts using kernel method -# -# See Doksum et al. 1994, JASA, 89, 571- -# -m<-elimna(cbind(x,y)) -x<-m[,1] -y<-m[,2] -n<-length(y) -sig<-sqrt(var(x)) -temp<-idealf(x) -iqr<-(temp$qu-temp$ql)/1.34 -A<-min(c(sig,iqr)) -yhat<-NA -vval<-NA -vals<-NA -rhosq<-NA -for(k in 1:n){ -temp1<-NA -for(j in 1:n){ -temp1[j]<-((x[j]-x[k])/A)^2 -} -epan<-ifelse(temp1<1,.75*(1-temp1),0) # Epanechnikov kernel, p. 76 -chkit<-sum(epan!=0) -if(chkit >= 2){ -temp4<-lsfit(x,y,wt=epan) -vals[k]<-temp4$coef[2] -}} -vals -} - -nearl<-function(x,pt,fr=1){ -# determine which values in x are near and less than pt -# based on fr * mad -m<-mad(x) -if(m==0){ -temp<-idealf(x) -m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25)) -} -if(m==0)m<-sqrt(winvar(x)/.4129) -if(m==0)stop("All measures of dispersion are equal to 0") -dis<-abs(x-pt) -dflag<-dis <= fr*m -flag2<-(xpt) -dflag<-dflag*flag2 -dflag -} -mgvmean<-function(m,op=0,outfun=outbox,se=TRUE){ -# -# m is an n by p matrix -# -# Compute a multivariate skipped measure of location -# using the MGV method -# -# Eliminate outliers using MGV method -# -# op=0 pairwise distances of points -# op=1 MVE distances -# op=2 MCD distances -# -# outfun indicates outlier rule to be applied to -# the MGV distances. -# By default, use boxplot rule -# -# Eliminate any outliers and compute means -# using remaining data. -# -m<-elimna(m) -temp<-outmgv(m,op=op,plotit=FALSE)$keep -val<-apply(m[temp,],2,mean) -val -} - -smgvcr<-function(m,nullv=rep(0,ncol(m)),SEED=TRUE,op=0, -nboot=500,plotit=TRUE){ -# -# m is an n by p matrix -# -# Test hypothesis that estimand of the MGV estimator -# is equal to the null value, which defaults to zero vector. -# The level of the test is .05. -# -# Argument op: See function outmgv -# -if(SEED)set.seed(2) -m<-elimna(m) -n<-nrow(m) -crit.level<-.05 -if(n<=120)crit.level<-.045 -if(n<=80)crit.level<-.04 -if(n<=60)crit.level<-.035 -if(n<=40)crit.level<-.03 -if(n<=30)crit.level<-.025 -if(n<=20)crit.level<-.02 -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -val<-matrix(NA,ncol=ncol(m),nrow=nboot) -for(j in 1: nboot){ -mm<-m[data[j,],] -temp<-outmgv(mm,plotit=FALSE,op=op)$keep -val[j,]<-apply(mm[temp,],2,mean) -} -temp<-mgvar(rbind(val,nullv),op=op) -flag2<-is.na(temp) -if(sum(flag2)>0)temp[flag2]<-0 -sig.level<-sum(temp[nboot+1]0)pts<-seq(min(x),max(x),length=np) -if(np==0)pts<-x -} -pts<-sort(pts) -for(i in 1:length(pts)){ -yhat[i]<-NA -for(j in 1:length(x)){ -temp[j]<-((x[j]-pts[i])/A)^2 -} -epan<-ifelse(temp<1,.75*(1-temp),0) -chkit<-sum(epan!=0) -if(chkit > 1){ -vals<-lsfit(x,y,wt=epan)$coef -yhat[i]<-vals[2]*pts[i]+vals[1] -} -} -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab,pch=pch) -if(np>0){ -ilow<-round(.1*np) -iup<-round(.9*np) -} -if(np==0){ -ilow<-1 -iup<-length(pts) -} -lines(pts[ilow:iup],yhat[ilow:iup]) -} -m<-"Done" -if(pyhat)m<-yhat -m -} - -qreg.sub<-function(X,theta,qval=.5){ -np<-ncol(X) -p<-np-1 -x<-X[,1:p] -y<-X[,np] -temp<-t(t(x)*theta[2:np]) -yhat<-apply(temp,1,sum)+theta[1] -res<-y-yhat -flag<-(res<=0) -rval<-(qval-flag)*res -val<-sum(rval) -val -} - -rmmcppb<-function(x,y=NULL,alpha=.05, -con=0,est=onestep,plotit=FALSE,dif=TRUE,grp=NA,nboot=NA,BA=FALSE,hoch=FALSE,xlab="Group 1",ylab="Group 2",pr=TRUE,SEED=TRUE,SR=FALSE,...){ -# -# Use a percentile bootstrap method to compare dependent groups. -# By default, -# compute a .95 confidence interval for all linear contrasts -# specified by con, a J-by-C matrix, where C is the number of -# contrasts to be tested, and the columns of con are the -# contrast coefficients. -# If con is not specified, all pairwise comparisons are done. -# -# If est=onestep or mom, method SR (see my book on robust methods) -# is used to control the probability of at least one Type I error. -# -# Otherwise, Hochberg is used. -# -# dif=T indicates that difference scores are to be used -# dif=F indicates that measure of location associated with -# marginal distributions are used instead. -# -# nboot is the bootstrap sample size. If not specified, a value will -# be chosen depending on the number of contrasts there are. -# -# x can be an n by J matrix or it can have list mode -# for two groups, data for second group can be put in y -# otherwise, assume x is a matrix (n by J) or has list mode. -# -# A sequentially rejective method is used to control alpha using method SR. -# -# Argument BA: When using dif=F, BA=T uses a correction term -# when computing a p-value. -# -if(hoch)SR=FALSE #Assume Hochberg if hoch=TRUE even if SR=TRUE -if(SR){ -okay=FALSE -if(identical(est,onestep))okay=TRUE -if(identical(est,mom))okay=TRUE -SR=okay # 'Only use method SR (argument SR=TRUE) when est=onestep or mom -} -if(dif){ -if(pr){print("dif=TRUE, so analysis is done on difference scores.") -print(" Each confidence interval has probability coverage 1-alpha.") -print("Also note that a sequentially rejective method is being used") -} -temp<-rmmcppbd(x,y=y,alpha=alpha,con=con,est,plotit=plotit,grp=grp,nboot=nboot, SEED=SEED, -hoch=TRUE,...) -output<-temp$output -con<-temp$con -} -if(!dif){ -if(pr){ -print("dif=FALSE, so analysis is done on marginal distributions") -if(!BA){ -if(identical(est,onestep))print("With M-estimator or MOM, suggest using BA=TRUE and hoch=TRUE") -if(identical(est,mom))print("With M-estimator or MOM, suggest using BA=TRUE and hoch=TRUE") -}} -if(!is.null(y[1]))x<-cbind(x,y) -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -if(is.matrix(con)){ -if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") -}} -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matl(x) -} -if(is.matrix(x) && is.matrix(con)){ -if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") -mat<-x -} -if(is.matrix(x))mat<-x -if(!is.na(sum(grp)))mat<-mat[,grp] -mat<-elimna(mat) # Remove rows with missing values. -x<-mat -J<-ncol(mat) -xcen<-x -for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j],...) -Jm<-J-1 -if(sum(con^2)==0){ -d<-(J^2-J)/2 -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -d<-ncol(con) -if(is.na(nboot)){ -if(d<=4)nboot<-1000 -if(d>4)nboot<-5000 -} -n<-nrow(mat) -crit.vec<-alpha/c(1:d) -connum<-ncol(con) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -xbars<-apply(mat,2,est,...) -psidat<-NA -for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) -psihat<-matrix(0,connum,nboot) -psihatcen<-matrix(0,connum,nboot) -bvec<-matrix(NA,ncol=J,nrow=nboot) -bveccen<-matrix(NA,ncol=J,nrow=nboot) -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec[ib,]<-apply(x[data[ib,],],2,est,...) -bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...) -} -# -# Now have an nboot by J matrix of bootstrap values. -# -test<-1 -bias<-NA -for (ic in 1:connum){ -psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) -psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic]) -bias[ic]<-sum((psihatcen[ic,]>0))/nboot-.5 -ptemp<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot -if(BA)test[ic]<-ptemp-.1*bias[ic] -if(!BA)test[ic]<-ptemp -test[ic]<-min(test[ic],1-test[ic]) -test[ic]<-max(test[ic],0) # bias corrected might be less than zero -} -test<-2*test -ncon<-ncol(con) -dvec<-alpha/c(1:ncon) # Assume Hochberg unless specified otherwise -if(SR){ -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvecba<-dvec -dvec[2]<-alpha -}} -if(hoch)dvec<-alpha/c(1:ncon) -dvecba<-dvec -if(plotit && ncol(bvec)==2){ -z<-c(0,0) -one<-c(1,1) -plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") -points(bvec) -totv<-apply(x,2,est,...) -cmat<-var(bvec) -dis<-mahalanobis(bvec,totv,cmat) -temp.dis<-order(dis) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -abline(0,1) -} -temp2<-order(0-test) -ncon<-ncol(con) -zvec<-dvec[1:ncon] -if(BA)zvec<-dvecba[1:ncon] -sigvec<-(test[temp2]>=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) -tmeans<-apply(mat,2,est,...) -psi<-1 -output[temp2,4]<-zvec -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(psihat[ic,]) -#icl<-round(output[ic,4]*nboot/2)+1 # This adjustment causes confusion; it's not based on Hochberg -icl<-round(alpha*nboot/2)+1 -icu<-nboot-(icl-1) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -} -ids=NA -num.sig=nrow(output) -ior=order(output[,3],decreasing=TRUE) -for(j in 1:nrow(output)){ -if(output[ior[j],3]<=output[ior[j],4])break -else num.sig=num.sig-1 -} -list(output=output,con=con,num.sig=num.sig) -} - -linconb<-function(x,con=0,tr=.2,alpha=.05,nboot=599,pr=FALSE,SEED=TRUE,method='holm'){ -# -# Compute a 1-alpha confidence interval for a set of d linear contrasts -# involving trimmed means using the bootstrap-t bootstrap method. -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode. Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# -# Missing values are automatically removed. -# -# con is a J by d matrix containing the contrast coefficents of interest. -# If unspecified, all pairwise comparisons are performed. -# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first two trimmed means is -# equal to the sum of the second two, and (2) the difference between -# the first two is equal to the difference between the trimmed means of -# groups 5 and 6. -# -# The default number of bootstrap samples is nboot=599 -# -# This function uses functions trimparts and trimpartt written for this -# book. -# -# -# -# -if(is.data.frame(x))x=as.matrix(x) -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -for(j in 1:J){ -xx<-x[[j]] -x[[j]]<-xx[!is.na(xx)] # Remove any missing values. -} -Jm<-J-1 -d<-(J^2-J)/2 -if(sum(con^2)==0){ -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -if(nrow(con)!=length(x))stop('The number of groups does not match the number of contrast coefficients.') -bvec<-array(0,c(J,2,nboot)) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print('Taking bootstrap samples. Please wait.') -nsam=matl(lapply(x,length)) -for(j in 1:J){ -paste('Working on group ',j) -xcen<-x[[j]]-mean(x[[j]],tr) -data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row -# contains the bootstrap trimmed means, the second row -# contains the bootstrap squared standard errors. -} -m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means -m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq. se. -boot<-matrix(0,ncol(con),nboot) -for (d in 1:ncol(con)){ -top<-apply(m1,2,trimpartt,con[,d]) -# A vector of length nboot containing psi hat values -consq<-con[,d]^2 -bot<-apply(m2,2,trimpartt,consq) -boot[d,]<-abs(top)/sqrt(bot) -} -testb<-apply(boot,2,max) -ic<-floor((1-alpha)*nboot) -testb<-sort(testb) -psihat<-matrix(0,ncol(con),4) -test<-matrix(0,ncol(con),5) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper')) -dimnames(test)<-list(NULL,c('con.num','test','se','p.value','p.adjusted')) -for (d in 1:ncol(con)){ -test[d,1]<-d -psihat[d,1]<-d -testit<-lincon(x,con[,d],tr,pr=FALSE) -test[d,2]<-testit$test[1,2] -pval<-mean((abs(testit$test[1,2])1){ -if(STAND){ -x=standm(x) -m1=apply(x,1,mean) -v=apply(x,1,sd) -for(j in 1:ncol(x))pts[,j]=(pts[,j]-m1[j])/v[j] -}} -outmat<-matrix(NA,ncol=nrow(x),nrow=nrow(pts)) -for(i in 1:nrow(pts)){ -center<-pts[i,] -if(!MC)blob<-pdis(x,center=center,MM=MM) -if(MC)blob<-pdisMC(x,center=center,MM=MM) -# -# Note: distances already divided by -# interquartile range -# -# Determine which points in m are close to pts -flag2<-(blob < fr) -outmat[i,]<-flag2 -} -# Return matrix, ith row indicates which points -# in x are close to pts[i,] -# -outmat -} - -adtestl<-function(x,y,est=tmean,nboot=100,alpha=.05,fr=NA,SEED=TRUE,...){ -# -# Test the hypothesis that the regression model is additive. -# Use a variation of Stute et al. (1998, JASA, 93, 141-149). -# method, and running interval version of the backfitting -# algorithm -# -if(!is.matrix(x))stop("X values should be stored in a matrix") -if(ncol(x)==1)stop("There should be two or more predictors") -temp<-cbind(x,y) -p<-ncol(x) -p1<-p+1 -temp<-elimna(temp) -x<-temp[,1:p] -x<-as.matrix(x) -y<-temp[,p1] -if(alpha<.05 && nboot<=100)warning("You used alpha<.05 and nboot<=100") -if(is.na(fr)){ -fr<-.8 -if(ncol(x)==2){ -nval<-c(20,30,50,80,150) -fval<-c(0.40,0.36,0.18,0.15,0.09) -if(length(y)<=150)fr<-approx(nval,fval,length(y))$y -if(length(y)>150)fr<-.09 -} -} -if(SEED)set.seed(2) -x<-as.matrix(x) -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -yhat<-adrunl(x,y,plotit=FALSE,fr=fr,pyhat=TRUE) -regres<-y-yhat -print("Taking bootstrap sample, please wait.") -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-sqrt(12)*(data-.5) # standardize the random numbers. -rvalb<-apply(data,1,adtestls1,yhat,regres,mflag,x,fr) -# An n x nboot matrix of R values -rvalb<-rvalb/sqrt(length(y)) -dstatb<-apply(abs(rvalb),2,max) -wstatb<-apply(rvalb^2,2,mean) -dstatb<-sort(dstatb) -wstatb<-sort(wstatb) -# compute test statistic -v<-c(rep(1,length(y))) -rval<-adtestls1(v,yhat,regres,mflag,x,fr) -rval<-rval/sqrt(length(y)) -dstat<-max(abs(rval)) -wstat<-mean(rval^2) -ib<-round(nboot*(1-alpha)) -critd<-dstatb[ib] -critw<-wstatb[ib] -list(dstat=dstat,wstat=wstat,critd=critd,critw=critw) -} - - -adtestls1<-function(vstar,yhat,res,mflag,x,fr){ -ystar<-yhat+res*vstar -bres<-adrunl(x,ystar,fr=fr,pyhat=TRUE,plotit=FALSE) -bres<-ystar-bres -rval<-0 -for (i in 1:nrow(x)){ -rval[i]<-sum(bres[mflag[,i]]) -} -rval -} -adcom<-function(x,y,est=mean,tr=0,nboot=600,alpha=.05,fr=NA, -jv=NA,SEED=TRUE,...){ -# -# Test the hypothesis that component -# jv -# is zero. That is, in a generalized additive model, test -# H_0: f_jv(X_jv) = 0. -# Use a variation of Stute et al. (1998, JASA, 93, 141-149). -# method, and running interval version of the backfitting -# algorithm -# -# if jv=NA, all components are tested. -# -# Current version allows only 0 or 20% trimming -# -x=as.matrix(x) -if(!is.matrix(x))stop("X values should be stored in a matrix") -if(ncol(x)==1)stop("There should be two or more predictors") -temp<-cbind(x,y) -p<-ncol(x) -p1<-p+1 -temp<-elimna(temp) -x<-temp[,1:p] -x<-as.matrix(x) -y<-temp[,p1] -if(is.na(fr)){ -if(tr==.2){ -nval<-c(20,40,60,80,120,160) -fval<-c(1.2,1,.85,.75,.65,.65) -if(length(y)<=160)fr<-approx(nval,fval,length(y))$y -if(length(y)>160)fr<-.65 -} -if(tr==0){ -nval<-c(20,40,60,80,120,160) -fval<-c(.8,.7,.55,.5,.5,.5) -if(length(y)<=160)fr<-approx(nval,fval,length(y))$y -if(length(y)>160)fr<-.6 -} -} -if(is.na(fr))stop("Span can be deteremined only for 0 or .2 trimming") -if(SEED)set.seed(2) -x<-as.matrix(x) -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -if(!is.na(jv))prval<-jv -if(is.na(jv))prval<-c(1:ncol(x)) -c.sum<-matrix(NA,nrow=length(prval),ncol=2) -dimnames(c.sum)<-list(NULL,c("d.stat","p.value")) -for(ip in 1:length(prval)){ -flag<-rep(TRUE,ncol(x)) -flag[prval[ip]]<-FALSE -yhat<-adrun(x[,flag],y,plotit=FALSE,fr=fr,pyhat=TRUE) -regres<-y-yhat -temp<-indt(x[,!flag],regres) -c.sum[ip,1]<-temp$dstat -c.sum[ip,2]<-temp$p.value.d -} -list(results=c.sum) -} - -logadr<-function(x,y,est=mean,iter=10,pyhat=FALSE,plotit=TRUE,fr=.8,xout=FALSE,eout=xout, -outfun=out,theta=50,phi=25,expand=.5,STAND=TRUE,ticktype="simple",scale=FALSE,...){ -# -# additive model based on a variation of Copas' (1983) smooth -# for binary outcomes. -# (Use backfitting algorithm.) -# -m<-elimna(cbind(x,y)) -x<-as.matrix(x) -p<-ncol(x) -p1<-p+1 -y<-m[,p1] -x<-m[,1:p] -x<-as.matrix(x) -if(STAND){ -for (ip in 1:p)x[,ip]<-(x[,ip]-mean(x[,ip]))/sqrt(var(x[,ip])) -} -if(xout){ -keepit<-rep(TRUE,nrow(x)) -flag<-outfun(x,plotit=FALSE)$out.id -keepit[flag]<-FALSE -x<-x[keepit,] -y<-y[keepit] -} -x<-as.matrix(x) -if(p==1)val<-logrsm(x[,1],y,pyhat=TRUE,plotit=plotit,fr=fr,...)$output -if(p>1){ -np<-p+1 -x<-m[,1:p] -y<-m[,np] -fhat<-matrix(NA,ncol=p,nrow=length(y)) -fhat.old<-matrix(NA,ncol=p,nrow=length(y)) -res<-matrix(NA,ncol=np,nrow=length(y)) -dif<-1 -for(i in 1:p) -fhat.old[,i]<-logrsm(x[,i],y,pyhat=TRUE,plotit=FALSE,fr=fr)$output -eval<-NA -for(it in 1:iter){ -for(ip in 1:p){ -res[,ip]<-y -for(ip2 in 1:p){ -if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2] -} -fhat[,ip]=logrsm(x[,ip],y,pyhat=TRUE,plotit=FALSE,fr=fr)$output -} -eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2)))) -if(it > 1){ -itm<-it-1 -dif<-abs(eval[it]-eval[itm]) -} -fhat.old<-fhat -if(dif<.01)break -} -#print(fhat) -val<-apply(fhat,1,sum) -aval<-est(y-val,...) -val<-val+aval -flag=(val<0) -val[flag]=0 -flag=(val>1) -val[flag]=1 -if(plotit && p==2){ -fitr<-val -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fitr<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fitr,theta=theta,phi=phi,expand=expand,xlab="x1",ylab="x2",zlab="", -scale=scale,ticktype=ticktype) -}} -if(!pyhat)val<-"Done" -val -} - -qhomtsub<-function(isub,x,y,qval){ -# -# Perform quantile regression using x[isub] to predict y[isub] -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by other functions when computing -# bootstrap estimates. -# -# regfun is some regression method already stored in R -# It is assumed that regfun$coef contains the intercept and slope -# estimates produced by regfun. The regression methods written for -# this book, plus regression functions in R, have this property. -# -# x is assumed to be a matrix containing values of the predictors. -# -xmat<-matrix(x[isub,],nrow(x),ncol(x)) -temp<-qplotreg(xmat,y[isub],qval=qval,plotit=FALSE) -regboot<-temp[1,2]-temp[2,2] -regboot -} - -qplotreg<-function(x, y,qval=c(.2,.8),q=NULL,plotit=TRUE,xlab="X",ylab="Y",xout=FALSE, -outfun=outpro,pch='*',...){ -# -# Compute the quantile regression line for each of the -# quantiles indicated by qval. -# plotit=TRUE, plot the results. -# -if(!is.null(q))qval=q -xy=elimna(cbind(x,y)) -if(ncol(xy)>2)stop("Only One Predictor Allowed") -x=xy[,1] -y=xy[,2] -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -} -n<-length(qval) -coef<-matrix(NA,ncol=2,nrow=n) -x<-as.matrix(x) -if(ncol(x)>1)stop("This version allows one predictor only.") -if(plotit)plot(x,y,xlab=xlab,ylab=ylab,pch=pch) -for(it in 1:n){ -coef[it,]<-qreg(x,y,qval=qval[it],pr=FALSE)$coef -dimnames(coef)=list(NULL,c("Inter.","Slope")) -if(plotit)abline(coef[it,1],coef[it,2]) -} -coef -} - - -ancmpbpb<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,pts=NA,est=tmean,nboot=NA, -bhop=FALSE,SEED=TRUE,...){ -print("This function has been eliminated. Please use ancmppb instead.") -} - - -qsm<-function(x,y,qval=c(.2,.5,.8),fr=.8,plotit=TRUE,scat=TRUE,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,op=TRUE,LP=TRUE,tr=FALSE, -xlab='X',ylab='Y',pch='.'){ -# -# running interval smoother for the quantiles stored in -# qval -# -# fr controls amount of smoothing -# op=T, use Harrell-Davis estimator -# op=F, use single order statistic -# -# LP=TRUE: The initial smooth is smoothed again using LOESS -# -plotit<-as.logical(plotit) -scat<-as.logical(scat) -m<-cbind(x,y) -if(ncol(m)!=2)stop("Must have exactly one predictor. For more than one, use qhdsm.") -m<-elimna(m) -x<-m[,1] -y<-m[,2] -if(eout && xout)stop("Not allowed to have eout=xout=T") -if(eout){ -flag<-outfun(m,plotit=FALSE)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(x)$keep -m<-m[flag,] -} -x<-m[,1] -y<-m[,2] -rmd<-c(1:length(x)) -if(pyhat)outval<-matrix(NA,ncol=length(qval),nrow=length(x)) -if(scat)plot(x,y,xlab=xlab,ylab=ylab,pch=pch) -if(!scat)plot(x,y,type="n",xlab=xlab,ylab=ylab) -for(it in 1:length(qval)){ -if(!op)for(i in 1:length(x))rmd[i]<-qest(y[near(x,x[i],fr)],q=qval[it]) -if(op)for(i in 1:length(x))rmd[i]<-hd(y[near(x,x[i],fr)],q=qval[it],tr=tr) -if(pyhat)outval[,it]<-rmd -points(x,rmd,type="n") -sx<-sort(x) -xorder<-order(x) -sysm<-rmd[xorder] -if(LP)sysm=lplot(sx,sysm,pyhat=TRUE,plotit=FALSE,pr=FALSE)$yhat.values -lines(sx,sysm) -} -if(pyhat)output<-outval -if(!pyhat)output<-"Done" -list(output=output) -} -locvar<-function(x,y,pyhat=FALSE,pts=x,plotit=TRUE){ -# -# For each x, estimate VAR(y|x) -# with the method used by Bjerve and Doksum -# i.e., use Fan's kernel regression method. -# -yhat<-locreg(x,y,pyhat=TRUE,plotit=FALSE,pts=x) -val<-locreg(x,(y-yhat)^2,pyhat=pyhat,pts=pts,plotit=plotit) -val -} - -smmval<-function(dfvec,iter=10000,alpha=.05,SEED=TRUE){ -# -# Determine the upper 1-alpha quantile of the maximum of -# K independent Student's T random variables. -# dfvec is a vector of length K containing the degrees of freedom -# -# So this distribution is similar to a Studentized maximum modulus distribution but -# the T statistics are not based on an estimate of an assumed common variance. -# -if(SEED)set.seed(1) -vals<-NA -tvals<-NA -J<-length(dfvec) -for(i in 1:iter){ -for(j in 1:J){ -tvals[j]<-rt(1,dfvec[j]) -} -vals[i]<-max(abs(tvals)) -} -vals<-sort(vals) -ival<-round((1-alpha)*iter) -qval<-vals[ival] -qval -} - - -bwmedimcp<-function(J,K,x,JK=J*K,grp=c(1:JK),alpha=.05){ -# -# Multiple comparisons for interactions -# in a split-plot design. -# The analysis is done by taking difference scores -# among all pairs of dependent groups and -# determining which of -# these differences differ across levels of Factor A -# using trimmed means. -# -# For MOM or M-estimators, use spmcpi which uses a bootstrap method -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} - -JK<-J*K -if(JK!=length(x))stop("Something is wrong. Expected ",JK," groups but x contains ", length(x), "groups instead.") -MJ<-(J^2-J)/2 -MK<-(K^2-K)/2 -JMK<-J*MK -MJMK<-MJ*MK -Jm<-J-1 -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -output<-matrix(0,MJMK,7) -dimnames(output)<-list(NULL,c("A","A","B","B","psihat","sig","crit.sig")) -jp<-1-K -kv<-0 -kv2<-0 -test<-NA -for(j in 1:J){ -jp<-jp+K -xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) -for(k in 1:K){ -kv<-kv+1 -xmat[,k]<-x[[kv]] -} -xmat<-elimna(xmat) -for(k in 1:K){ -kv2<-kv2+1 -x[[kv2]]<-xmat[,k] -}} -m<-matrix(c(1:JK),J,K,byrow=TRUE) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -for (ic in 1:ncol(con)){ -output[temp2,7]<-zvec -} -output -} - - - - -bwmedbmcp<-function(J,K,x,JK=J*K,grp=c(1:JK),con=0,alpha=.05,dif=FALSE,pool=FALSE,bop=FALSE,nboot=100,SEED=TRUE){ -# -# All pairwise comparisons among levels of Factor B -# in a split-plot design using trimmed means. -# -# Data are pooled for each level -# of Factor B. -# bop=T, use bootstrap estimates of standard errors. -# FWE controlled with Rom's method -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} -JK<-J*K -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -if(pool){ -data<-list() -m1<-matrix(c(1:JK),J,K,byrow=TRUE) -for(k in 1:K){ -for(j in 1:J){ -flag<-m1[j,k] -if(j==1)temp<-x[[flag]] -if(j>1){ -temp<-c(temp,x[[flag]]) -}} -data[[k]]<-temp -} -print("Group numbers refer to levels of Factor B") -if(!dif)temp<-lincdm(data,con=con,alpha=alpha,nboot=nboot,mop=bop) -if(dif)temp<-qdmcpdif(data,con=con,alpha=alpha) -return(temp) -} -if(!pool){ -mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) -for(j in 1:J){ -data<-list() -ic<-0 -for(k in 1:K){ -ic<-ic+1 -data[[ic]]<-x[[mat[j,k]]] -} -print(paste("For level ", j, " of Factor A:")) -if(!dif)temp<-lincdm(data,con=con,alpha=alpha,nboot=nboot,mop=bop) -if(dif)temp<-qdmcpdif(data,con=con,alpha=alpha) -print(temp$test) -print(temp$psihat) -}} -} - -gamplot<-function(x,y,sop=TRUE,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,plotit=TRUE, -xlab="X",ylab="",zlab="",theta=50,phi=25,expand=.5,scale=TRUE,ticktype="simple"){ -# -# Plot regression surface using generalized additive model -# -# sop=F, use usual linear model y~x1+x2... -# sop=T, use splines -# -library(akima) -library(mgcv) -x<-as.matrix(x) -np<-ncol(x) -np1<-np+1 -if(ncol(x)>4)stop("x should have at most four columns of data") -m<-elimna(cbind(x,y)) -x<-m[,1:np] -x<-as.matrix(x) -y<-m[,np1] -if(xout && eout)stop("Can't have xout=eout=T") -if(eout){ -flag<-outfun(m)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -} -x<-m[,1:np] -x<-as.matrix(x) -y<-m[,np1] -if(!sop){ -if(ncol(x)==1)fitr<-fitted(gam(y~x[,1])) -if(ncol(x)==2)fitr<-fitted(gam(y~x[,1]+x[,2])) -if(ncol(x)==3)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3])) -if(ncol(x)==4)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3]+x[,4])) -} -if(sop){ -if(ncol(x)==1)fitr<-fitted(gam(y~s(x[,1]))) -if(ncol(x)==2)fitr<-fitted(gam(y~s(x[,1])+s(x[,2]))) -if(ncol(x)==3)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3]))) -if(ncol(x)==4)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3])+s(x[,4]))) -} -last<-fitr -if(plotit){ -if(ncol(x)==1){ -plot(x,fitr,xlab=xlab,ylab=ylab) -} -if(ncol(x)==2){ -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fitr<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fitr,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab, -scale=scale,ticktype=ticktype) -} -} -if(!pyhat)last <- "Done" -last -} - -rgvar<-function(x,est=covmcd,...){ -# -# compute a robust generalized variance -# -# choices for est are: -# var -# covmcd -# covmve -# skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method) -# op=2 (projection method for outliers) -# covroc (S+ only as of Dec, 2005) -# Rocke's measure of scatter, this requires that the command -# library(robust) has been executed. -# -library(MASS) -val<-prod(eigen(est(x,...))$values) -val -} -rgvarseb<-function(x,nboot=100,est=skipcov,SEED=TRUE,...){ -# -n<-nrow(x) -val<-NA -for(i in 1:nboot){ -data<-sample(n,n,replace=TRUE) -val[i]<-rgvar(x[data,],est=est,...) -} -se<-sqrt(var(val)) -se -} -covmve<-function(x){ -library(MASS) -oldSeed <- .Random.seed -val<-cov.mve(x) -assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) -list(center=val$center,cov=val$cov) -} - -mvecov<-function(x){ -library(MASS) -val<-cov.mve(x) -val$cov -} - - -rgvar2g<-function(x,y,nboot=100,est=covmcd,alpha=.05,cop=3,op=2,SEED=TRUE,...){ -# -# Two independent groups. -# Test hypothesis of equal generalized variances. -# -# Choices for est include: -# var -# covmcd -# covmve -# skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method) -# op=2 (projection method for outliers) -# covroc Rocke's measure of scatter, this requires that the command -# library(robust) has been executed. -# -if(SEED)set.seed(2) -se1<-rgvarseb(x,nboot=nboot,est=est,SEED=SEED,...) -se2<-rgvarseb(y,nboot=nboot,est=est,SEED=SEED,...) -dif<-rgvar(x,est=est,...)-rgvar(y,est=est,...) -test.stat<-dif/sqrt(se1^2+se2^2) -test.stat -} - -covmcd<-function(x,nsamp="sample"){ -# -# nsamp="best" is the default used by R, -# meaning that the number of samples is chosen so that -# exhaustive enumeration is done up to 5000 samples -# nsamp="sample" the number of samples -# is min(5*p, 3000) -# -library(MASS) -oldSeed <- .Random.seed -val<-cov.mcd(x,nsamp=nsamp) -assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) -list(center=val$center,cov=val$cov) -} - - -mcdcov<-function(x,nsamp="sample"){ -# -# nsamp="best" is the default used by R, -# meaning that the number of samples is chosen so that -# exhaustive enumeration is done up to 5000 samples -# nsamp="sample" the number of samples -# is min(5*p, 3000) -# -#library(lqs) -library(MASS) -oldSeed <- .Random.seed -val<-cov.mcd(x,nsamp=nsamp) - assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) -val$cov -} - -ancdes<-function(x,depfun=fdepth,DH=FALSE,FRAC=.5,...){ -# -# Choose points for design of an ANCOVA -# x is the n by p matrix m. -# -# FRAC some value between 0 and 1. -# -# FRAC is the fraction of the least deep points that will not be returned when -# DH=TRUE -# That is, return 1-FRAC deepest points. -# For example, FRAC=.2 means that the deepest 80% of the -# data will be returned. -# -# DH=F, return deepest point and those points on the -# .5 depth contour -# -if(is.data.frame(x))x=as.matrix(x) -if(!is.matrix(x))stop("x must be a matrix or a data frame") -temp<-depfun(x,plotit=FALSE,...) -temp2<-order(temp) -if(!DH){ -val<-matrix(x[temp2[length(temp)],],ncol=ncol(x)) -nmid<-round(length(temp)/2) -id2<-(temp[temp2[nmid]]==temp) -val2<-matrix(x[id2,],ncol=ncol(x)) -if(!is.matrix(val2))val2<-t(as.matrix(val2)) -val<-rbind(val,val2) -} -if(DH){ -bot=round(length(temp)*FRAC) -val=matrix(x[temp2[bot:length(temp)],],ncol=ncol(x)) -} -val=elimna(val) -val -} - - -stacklist<-function(x){ -# -# Assumes x has list mode with each entry a -# matrix having p columns. -# -# Goal: stack the data into a matrix having p columns. -# -p<-ncol(x[[1]]) -xx<-as.matrix(x[[1]]) -for(j in 2:length(x)){ -temp<-as.matrix(x[[j]]) -xx<-rbind(xx,temp) -} -xx -} - -smvar<-function(x,y,fr=.6,xout=TRUE,eout=FALSE,xlab="X",ylab="VAR(Y|X)",pyhat=FALSE,plotit=TRUE,nboot=40, -RNA=FALSE,SEED=TRUE){ -# -# Estimate VAR(Y|X) using bagged version of running interval method -# -# xout=T eliminates all points for which x is an outlier. -# eout=F eliminates all points for which (x,y) is an outlier. -# -# pyhat=T will return estimate for each x. -# -# RNA=T removes missing values when applying smooth -# with RNA=F, might get NA for some pyhat values. -# -# plotit=TRUE, scatterplot of points x versus square of -# predicted y minus y -# stemming from a smooth. Then plots a line indicating -# var(y|x) using bagged smooth -# -temp <- cbind(x, y) -temp <- elimna(temp) -x <- temp[, 1] -y <- temp[, 2] -yhat<-lplot(x, y, pyhat = TRUE, plotit = FALSE)$yhat.values -yvar<-(y-yhat)^2 -estvar<-runmbo(x,y,est=var,pyhat=TRUE,fr=fr,plotit=FALSE,RNA=RNA,nboot=nboot) -if(plotit){ -plot(c(x,x),c(yvar,estvar),type="n",xlab=xlab,ylab=ylab) -points(x,yvar) -sx<-sort(x) -xorder<-order(x) -sysm<-estvar[xorder] -lines(sx,sysm) -} -output <- "Done" -if(pyhat)output <- estvar -output -} -locvarsm<-function(x,y,pyhat=FALSE,pts=x,plotit=TRUE,nboot=40,RNA=TRUE,xlab="X", -ylab="VAR(Y|X)",op=2,xout=TRUE,eout=FALSE,pr=TRUE,fr=.6,scat=TRUE,outfun=out,SEED=TRUE){ -# -# For each x, estimate VAR(y|x) using bootstrap bagging. -# with -# op=1 uses Fan's kernel method plus bootstrap bagging. -# op=2 uses running interval smoother plus bootstrap bagging -# -# xout=T eliminates points where there are outliers among x values -# this option applies only when using op=2 and when using -# running interval smoother. -# eout=T eliminates outliers among cloud of all data. -# -if(SEED)set.seed(2) -temp<-cbind(x,y) -temp<-elimna(temp) -x<-temp[,1] -y<-temp[,2] -if(op==2){ -if(pr){ -print("Running interval method plus bagging has been chosen") -print("op=1 will use Fan's method plus bagging") -}} -if(op==1){ -if(pr){ -print("Fan's method plus bagging has been chosen (cf. Bjerve and Doksum)") -print("op=2 will use running interval plus bagging") -} -mat <- matrix(NA, nrow = nboot, ncol = nrow(temp)) -for(it in 1:nboot) { -idat <- sample(c(1:length(y)), replace = T) -xx <- temp[idat, 1] -yy <- temp[idat, 2] -mat[it, ] <- locvar(xx,yy,pts=x,pyhat=TRUE,plotit=FALSE) -} -rmd<-apply(mat,2,mean) - if(plotit) { -plot(c(x, x), c(y, rmd), type = "n", xlab = xlab, ylab= ylab) -sx <- sort(x) -xorder <- order(x) -sysm <- rmd[xorder] -lines(sx, sysm) -} - -output<-"Done" -if(pyhat)output <- rmd -} -if(op==2){ -output<-runmbo(x,y,fr=fr,est=var,xlab=xlab,ylab=ylab,pyhat=pyhat,eout=eout, -xout=xout,RNA=RNA,plotit=plotit,scat=scat,nboot=nboot,outfun=outfun,SEED=SEED) -} -output -} - -mcp2atm<-function(J,K,x,tr=.2,alpha=.05,grp=NA,op=FALSE,pr=TRUE){ -# -# Test all linear contrasts associated with -# main effects for Factor A and B and all interactions based on trimmed means -# By default, -# tr=.2, meaning 20% trimming is used. -# -# bbmcpEP has an option for pooling over the levels of the factors. -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # - JK <- J * K - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp[1])) { - yy <- x - x<-list() - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") - for(j in 1:JK) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] # Remove missing values - } - # - - if(JK != length(x)) - warning("The number of groups does not match the number of contrast coefficients.") -for(j in 1:JK){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} - # Create the three contrast matrices -temp<-con2way(J,K) -conA<-temp$conA -conB<-temp$conB -conAB<-temp$conAB -if(!op){ -Factor.A<-lincon(x,con=conA,tr=tr,alpha=alpha,pr=pr) -Factor.B<-lincon(x,con=conB,tr=tr,alpha=alpha,pr=FALSE) -Factor.AB<-lincon(x,con=conAB,tr=tr,alpha=alpha,pr=FALSE) -} -All.Tests<-NA -if(op){ -Factor.A<-NA -Factor.B<-NA -Factor.AB<-NA -con<-cbind(conA,conB,conAB) -All.Tests<-lincon(x,con=con,tr=tr,alpha=alpha) -} -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,All.Tests=All.Tests,conA=conA,conB=conB,conAB=conAB) -} - -bbmcp=mcp2atm - -mdifloc<-function(x,y,est=tukmed,...){ -# -# Compute multivariate measure of location associated -# with the distribution of x-y -# -# By default, use Tukey's median. -# -x<-as.matrix(x) -y<-as.matrix(y) -FLAG<-F -if(ncol(x)!=ncol(y))stop("x and y should have the same number of columns") -if(ncol(x)==1 && ncol(y)==1)FLAG<-T -if(FLAG)val<-loc2dif(x,y,est=est,...) -if(!FLAG){ -J<-(ncol(x)^2-ncol(x))/2 -mat<-matrix(NA,ncol=ncol(x),nrow=nrow(x)*nrow(y)) -for(j in 1:ncol(x))mat[,j]<-as.vector(outer(x[,j], y[,j], FUN = "-")) -val<-est(mat,...) -} -val -} -mdiflcr<-function(m1,m2,tr=.5,nullv=rep(0,ncol(m1)),plotit=TRUE, -SEED=TRUE,pop=1,fr=.8,nboot=600){ -# -# For two independent groups, let D=X-Y. -# Let theta_D be median of marginal distributions -# Goal: Test theta_D=0 -# -# This is a multivariate analog of Wilcoxon-Mann-Whitney method -# Only alpha=.05 can be used. -# -# When plotting: -# pop=1 Use scatterplot -# pop=2 Use expected frequency curve. -# pop=3 Use adaptive kernel density -# -if(!is.matrix(m1))stop("m1 is not a matrix") -if(!is.matrix(m2))stop("m2 is not a matrix") -if(ncol(m1)!=ncol(m2))stop("number of columns for m1 and m2 are not equal") -n1<-nrow(m1) -n2<-nrow(m2) -if(SEED)set.seed(2) -data1 <- matrix(sample(n1, size = n1 * nboot, replace = T), nrow = nboot) -data2 <- matrix(sample(n2, size = n2 * nboot, replace = T), nrow = nboot) -bcon <- matrix(NA, ncol = ncol(m1), nrow = nboot) -for(j in 1:nboot)bcon[j,]<-mdifloc(m1[data1[j,],],m2[data2[j,],],est=lloc,tr=tr) -tvec<-mdifloc(m1,m2,est=lloc,tr=tr) -tempcen <- apply(bcon, 1, mean) -smat <- var(bcon - tempcen + tvec) -temp <- bcon - tempcen + tvec -bcon <- rbind(bcon, nullv) -dv <- mahalanobis(bcon, tvec, smat) -bplus <- nboot + 1 -sig.level <- 1 - sum(dv[bplus] >= dv[1:nboot])/nboot -if(plotit && ncol(m1)==2){ -if(pop==2)rdplot(mdif,fr=fr) -if(pop==1){ -plot(mdif[,1],mdif[,2],xlab="VAR 1",ylab="VAR 2",type="n") -points(mdif[,1],mdif[,2],pch=".") -points(center[1],center[2],pch="o") -points(0,0,pch="+") -} -if(pop==3)akerdmul(mdif,fr=fr) -} -list(p.value=sig.level,center=tvec) -} - -mwmw<-function(m1,m2,cop=5,pr=TRUE,plotit=TRUE,pop=1,fr=.8,op=1,dop=1){ -# -# Compute measure of effect size, p, -# a multivariate analog of Wilcoxon-Mann-Whitney p -# -# When plotting: -# pop=1 Use scatterplot -# pop=2 Use expected frequency curve. -# pop=3 Use adaptive kernel density -# -# dop=1, use method A1 approximation of halfspace depth -# dop=2, use method A2 approximation of halfspace depth -# -# cop determines how center of data is determined when -# approximating halfspace depth -# cop=1, Halfspace median -# cop=2, MCD -# cop=3, marginal medians -# cop=4, MVE -# cop=5, skipped mean -# -library(akima) -if(is.null(dim(m1)))stop("m1 is not a matrix or data frame") -if(is.null(dim(m2)))stop("m2 is not a matrix or data frame") -if(ncol(m1)!=ncol(m2))stop("number of columns for m1 and m2 are not equal") -if(ncol(m1)==1)stop("Use R function cid or bmp") -nn<-min(c(nrow(m1),nrow(m2))) -mdif<-matrix(as.vector(outer(m1[,1],m2[,1],"-")),ncol=1) -for(j in 2:ncol(m1)){ -mdif<-cbind(mdif,matrix(as.vector(outer(m1[,j],m2[,j],"-")),ncol=1)) -} -if(op==1){ -if(ncol(m1)==2)temp2<-depth2(rbind(mdif,c(rep(0,ncol(m1))))) -#if(ncol(m1)==3)temp2<-depth3(rbind(mdif,c(rep(0,ncol(m1))))) -if(ncol(m1)>2){ -if(cop==1)center<-dmean(mdif,tr=.5,dop=dop) -if(cop==2)center<-cov.mcd(mdif)$center -if(cop==3)center<-apply(mdif,2,median) -if(cop==4)center<-cov.mve(mdif)$center -if(cop==5)center<-smean(mdif) -temp2<-fdepth(rbind(mdif,c(rep(0,ncol(m1))))) -}} -if(op==2){ -temp2<-pdis(rbind(mdif,c(rep(0,ncol(m1))))) -temp2<-1/(temp2+1) -} -center<-dmean(mdif,tr=.5,dop=dop) -phat<-temp2[nrow(mdif)+1]/max(temp2) -# phat is relative depth of zero vector -# Determine critical value -crit<-NA -alpha<-c(.1,.05,.025,.01) -crit[1]<-1-1.6338/sqrt(nn) -crit[2]<-1-1.8556/sqrt(nn) -crit[3]<-1-2.0215/sqrt(nn) -crit[4]<-1-2.1668/sqrt(nn) -if(pr){ -print("For alpha=.1,.05,.025,.01, the correspoding critical values are") -print(crit) -print("Reject if phat is less than or equal to the critical value") -} -if(plotit && ncol(m1)==2){ -if(pop==2)rdplot(mdif,fr=fr) -if(pop==1){ -plot(mdif[,1],mdif[,2],xlab="VAR 1",ylab="VAR 2",type="n") -points(mdif[,1],mdif[,2],pch=".") -points(center[1],center[2],pch="o") -points(0,0,pch="+") -} -if(pop==3)akerdmul(mdif,fr=fr) -} -list(phat=phat,center=center,crit.val=crit) -} - -qreg<-function(x, y,qval=.5, q=NULL,pr=FALSE,xout=FALSE, outfun=outpro,plotit=FALSE,xlab="X",ylab="Y",op=1,v2=TRUE,method='br',WARN=FALSE,...) -{ -# -# Compute the quantile regression line. That is, the goal is to -# determine the qth (qval) quantile of Y given X using the -# the Koenker-Bassett approach. -# -# v2=T, uses the function rq in the R library quantreg -# v2=F, uses an older and slower version -# op=1 has to do with the old version. -# -# method=scad, see Wu and Liu (2009). VARIABLE SELECTION IN QUANTILE REGRESSION, Statistica Sinica 19, 801-817. -# -if(!is.null(q))qval=q -x<-as.matrix(x) -X<-cbind(x,y) -X<-elimna(X) -np<-ncol(X) -p<-np-1 -x<-X[,1:p] -x<-as.matrix(x) -y<-X[,np] -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(!v2){ -temp<-ltareg(x,y,0,op=op) -if(qval==.5){ -coef<-temp$coef -res<-temp$res -} -if(qval!=.5){ -START<-temp$coef -coef<-nelderv2(X,np,FN=qreg.sub,START=START,qval=qval) -}} -if(v2){ -library(quantreg) -x<-as.matrix(x) -if(!WARN)options(warn=-1) -temp<-rq(y~x,tau=qval,method=method) -coef<-temp[1]$coefficients -if(!WARN)options(warn=0) -} -if(ncol(x)==1){ -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab) -abline(coef) -}} -res <- y - x%*%coef[2:np] - coef[1] -list(coef = coef, residuals = res) -} - - - -qindbt.sub<-function(isub,x,y,qval){ -# -# Perform regression using x[isub] to predict y[isub] -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by other functions when computing -# bootstrap estimates. -# -# regfun is some regression method already stored in R -# It is assumed that regfun$coef contains the intercept and slope -# estimates produced by regfun. The regression methods written for -# this book, plus regression functions in R, have this property. -# -# x is assumed to be a matrix containing values of the predictors. -# -xmat<-matrix(x[isub,],nrow(x),ncol(x)) -regboot<-NA -for(i in 1:length(qval)){ -regboot[i]<-qreg(xmat,y[isub],qval[i])$coef[2] -} -regboot -} - - - - - -runmq<-function(x,y,HD=FALSE,qval=c(.2,.5,.8),xlab="X",ylab="Y",fr=1, -sm=FALSE,nboot=40,SEED=TRUE,eout=FALSE,xout=FALSE,...){ -# -# Plot of running interval smoother based on specified quantiles in -# qval -# -# fr controls amount of smoothing -# tr is the amount of trimming -# -# Missing values are automatically removed. -# -rmd1<-NA -xx<-cbind(x,y) -p<-ncol(xx)-1 -xx<-elimna(xx) -x<-xx[,1:p] -y<-xx[,ncol(xx)] -plot(x,y,xlab=xlab,ylab=ylab) -sx1<-sort(x) -xorder1<-order(x) -for(it in 1:length(qval)){ -if(!sm){ -if(!HD)temp<-rungen(x,y,est=qest,fr=fr,pyhat=TRUE,plotit=FALSE,q=qval[it]) -if(HD)temp<-rungen(x,y,est=hd,fr=fr,pyhat=TRUE,plotit=FALSE,q=qval[it]) -rmd1<-temp[1]$output -sysm1<-rmd1[xorder1] -lines(sx1,sysm1) -} -if(sm){ -if(!HD)temp<-runmbo(x,y,est=qest,fr=fr,pyhat=TRUE,plotit=FALSE,SEED=SEED, -nboot=nboot,eout=FALSE,xout=FALSE,q=qval[it]) -if(HD)temp<-runmbo(x,y,est=hd,fr=fr,pyhat=TRUE,plotit=FALSE,SEED=SEED, -nboot=nboot,eout=FALSE,xout=FALSE,q=qval[it]) -rmd1<-temp -sysm1<-rmd1[xorder1] -lines(sx1,sysm1) -} -}} - - -ritest<-function(x,y,adfun=adrun,plotfun=lplot,eout=FALSE,xout=TRUE,plotit=TRUE,flag=3, -nboot=500,alpha=.05,tr=.2,...){ -# -# There are two methods for testing for regression interactions -# using robust smooths. -# The first, performed by this function, fits an additive model -# and test the hypothesis that the residuals, given x, is a -# horizontal plane. -# -# The second, which is done by function adtest, tests the hypothesis -# that a generalized additive model fits the data. -# -# Plot used to investigate regression interaction -# (the extent a generalized additive model does not fit data). -# Compute additive fit, plot residuals -# versus x, an n by 2 matrix. -# -if(!is.matrix(x))stop(" x must be a matrix") -if(ncol(x)!=2)stop(" x must have two columns only") -yhat<-adfun(x,y,pyhat=TRUE,eout=eout,xout=xout,plotit=FALSE) -res<-y-yhat -output<-indt(x,res,flag=flag,nboot=nboot) -if(plotit)plotfun(x,y-yhat,eout=eout,xout=xout,expand = 0.5,scale=FALSE,xlab="X", -ylab="Y",zlab="",theta=50,phi=25,...) -output -} - -gvar2g<-function(x,y,nboot=100,DF=TRUE,eop=1,est=skipcov, -alpha=.05,cop=3,op=1,MM=FALSE,SEED=TRUE,pr=FALSE,fast=FALSE,...){ -# -# Two independent groups. -# Test hypothesis of equal generalized variances. -# -# DF=T, means skipcov with MM=F is used. -# -# That is, W-estimator based on a projection outlier detection method -# and Carling's method applied to projections. -# if equal sample sizes, adjusted critical value is used where appopriate -# -# DF=F -# no adjusted critical value is used and any robust measure of -# scatter can be used. -# -# Choices for est include: -# var -# covmcd -# covmve -# skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method) -# op=2 (projection method for outliers) -# covroc Rocke's measure of scatter, -# -# op, cop and eop, see skipcov -# adjusted critical level should be used with -# skipcov and alpha=.05 only. -# fast=T, will use skipcov.for if it is available. -# -# Function returns ratio of first estimate divided by second estimate -# -if(SEED)set.seed(2) -#if(!is.matrix(x))stop("x should be a matrix with ncol>1") -if(is.null(dim(x)))stop("x should be a matrix or data frame with ncol>1") -if(is.null(dim(y)))stop("y should be a matrix or data frame with ncol>1") -#if(!is.matrix(y))stop("y should be a matrix with ncol>1") -if(ncol(x)==1 || ncol(y)==1)stop("Only multivariate data are allowed") -n1<-nrow(x) -n2<-nrow(y) -adalpha<-NA -if(DF){ -if(n1==n2 && alpha==.05){ -p1<-ncol(x) -if(p1==2){ -if(n1>=20)adalpha<-1.36/n1+.05 -} -if(p1==3){ -if(n1>=20)adalpha<-1.44/n+.05 -} -if(p1==4){ -if(n1>=40)adalpha<-2.47/n1+.05 -} -if(p1==5){ -if(n1>=40)adalpha<-3.43/n+.05 -} -if(p1==6){ -if(n1>=60)adalpha<-4.01/n1+.05 -}}} -val<-NA -for(j in 1:nboot) { - data1 <- sample(n1, size = n1, replace = T) - data2 <- sample(n2, size = n2, replace = T) -if(!DF){ -val[j]<-rgvar(as.matrix(x[data1,]),est=est,...)- -rgvar(as.matrix(y[data2,]),est=est,...) -} -if(DF){val[j]<- -if(!fast){ -rgvar(as.matrix(x[data1,]),est=skipcov,op=op,outpro.cop=cop,MM=MM,...)- -rgvar(as.matrix(y[data2,]),est=skipcov,op=op,outpro.cop=cop,MM=MM,...) -} -if(fast){ -rgvar(as.matrix(x[data1,]),est=skipcov.for,op=op,outpro.cop=cop,MM=MM,...)- -rgvar(as.matrix(y[data2,]),est=skipcov.for,op=op,outpro.cop=cop,MM=MM,...) -} -if(pr)print(c(j,val[j])) -}} -p.value<-sum(val<0)/nboot -p.value<-2*min(p.value,1-p.value) -est1=rgvar(x,est=est) -est2=rgvar(y,est=est) -list(p.value=p.value,adjusted.crit.level=adalpha,ratio.of.estimates=est1/est2,n1=n1,n2=n2) -} - -grit<-function(x,y,itest=1,sm.fun=rplot,nboot=500,alpha=.05,SEED=TRUE, -fr=1,plot.fun=rplot,plotit=TRUE,...){ -# -# Fit a running interval smoother using projection distances -# excluding the predictor variable itest -# itest=1 by default, meaning that the goal is to test -# the hypothesis that the first variable does not contribute -# to the regression model -# -# Method fits a smooth using x_1, ..., x_p, excluding variabe itest -# Then x_itest and the resulting residuals are passed to indt -# Alternative choices for smooth include -# sm.fun=lplot, and if p>2, runpd -# -if(!is.matrix(x))stop("Should have two or more predictors stored in a matrix") -p<-ncol(x) -pp<-p+1 -x<-elimna(cbind(x,y)) -y<-x[,pp] -x<-x[,1:p] -flag<-rep(TRUE,ncol(x)) -flag[itest]<-FALSE -temp<-sm.fun(x[,flag],y,plotit=FALSE,pyhat=TRUE,fr=fr) -res<-y-temp -test.it<-indt(x[,itest],res) -if(plotit)plot.fun(x[,itest],res,...) -test.it -} -stackit<-function(x,jval){ -# -# Take a matrix having p columns and convert -# it to a matrix having jval columns and np/jval rows -# So take first jval columns, and rbind this with -# next jval columns, etc. -# -x<-as.matrix(x) -chkit<-ncol(x)%%jval -if(chkit!=0)stop("ncol(x) is not a multiple of jval") -xval<-x[,1:jval] -xval<-as.matrix(xval) -iloop<-ncol(x)/jval-1 -il<-1 -iu<-jval -for(i in 1:iloop){ -il<-il+jval -iu<-iu+jval -temp<-x[,il:iu] -temp<-as.matrix(temp) -xval<-rbind(xval,temp) -} -xval -} -ancmg<-function(x,y,pool=TRUE,jcen=1,fr=1,depfun=fdepth,nmin=8,op=3,tr=.2,pts=NULL, -SEED=TRUE,pr=TRUE,cop=3,con=0,nboot=NA,alpha=.05,bhop=FALSE){ -# -# ANCOVA -# for two or more groups based on trimmed means or medians -# Two or more covariates is assumed. -# -# op=1 use omnibus test for trimmed means, with trimming given by tr -# op=2 use omnibus test for medians. -# (Not recommended when there are tied values, use op=4) -# op=3 multiple comparisons using trimming and percentile bootstrap. -# This method seems best for general use. -# op=4 multiple comparisons using medians and percentile bootstrap -# -# y is matrix with J columns, so have J groups. -# or y can have list mode with length J -# -# x is a matrix with Jp columns, so first p columns -# correspond to the p covariates in the first group, etc. -# Or, -# x can have list mode with length J and each component -# being a matrix with p columns. -# So if covariates for group 1 are in the matrix m1 -# x[[1]]<-m1 will store them in x, x having list mode -# -# nmin is the minimum sample size allowed for any group -# when testing hypotheses. -# If a design point results in a sample size ncol(x))stop("jcen has an invalid value") -xcen<-x[,js:jcenp] -} -if(is.list(x))xcen<-x[[jcen]] -if(pool){ -if(is.matrix(x))xval<-stackit(x,pval) -if(is.list(x))xval<-stacklist(x) -mval<-cov.mve(xval) -if(is.null(pts))pts<-ancdes(xval,depfun=depfun,cop=cop) -} -if(!pool){ -if(is.null(pts))pts<-ancdes(xcen,depfun=depfun,cop=cop) -mval<-cov.mve(xcen) -} -npts=1 -if(is.matrix(pts))npts=nrow(pts) -nval<-matrix(NA,ncol=J,nrow=npts) -icl<-0-pval+1 -icu<-0 -for(j in 1:J){ -icl<-icl+pval -icu<-icu+pval -for(i in 1:nrow(pts)){ -if(is.matrix(x) && is.matrix(y)){ -nval[i,j]<-length(y[near3d(x[,icl:icu],pts[i,],fr,mval),j]) -} -if(is.matrix(x) && is.list(y)){ -tempy<-y[[j]] -nval[i,j]<-length(tempy[near3d(x[,icl:icu],pts[i,],fr,mval)]) -} -if(is.list(x) && is.matrix(y)){ -xm<-as.matrix(x[[j]]) -nval[i,j]<-length(y[near3d(xm,pts[i,],fr,mval),j]) -} -if(is.list(x) && is.list(y)){ -tempy<-y[[j]] -xm<-as.matrix(x[[j]]) -nval[i,j]<-length(tempy[near3d(xm,pts[i,],fr,mval)]) -} -# -}} -flag<-rep(TRUE,nrow(pts)) -for(i in 1:npts){ -if(min(nval[i,])=nmin && sum(flagr)>=nmin){ -yl<-est(y[flagl],...) -yr<-est(y[flagr],...) -xl<-est(x[flagl],...) -xr<-est(x[flagr],...) -vals[i]<-(yr-yl)/(xr-xl) -}} -if(plotit){ -plot(c(x,x[1],x[2]),c(vals,-5,5),xlab=xlab,ylab=ylab) -xord<-order(x) -lines(x[xord],vals[xord]) -} -vals -} - - -rslopesm<-function(x,y,fr=1,est=tmean,nmin=10,pts=x,plotit=FALSE,xlab="X", -ylab="Y",SEED=TRUE,nboot=40,xout=FALSE,RNA=TRUE,atr=.2,scat=TRUE,pyhat=TRUE,...){ -# -# For a regression line predicting Y given X -# Estimate slope at points in pts with bagging -# followed by a smooth. -# -# pyhat=T, returns estimated slopes corresponding to the sorted -# x values. -# fr controls amount of smoothing -# atr controls the amount of trimming. -# -# OUTPUT: by default, the estimated slopes at -# X_1<=X_2<=...<=X_n -# That is, for the x values written in ascending order, the -# slope is estimated for each value. If the slope is not considered -# estimable, the estimate is set to NA. -# -# pts is used if the goal is to estimate the slope for some -# other collection of points. -# -# nmin controls how many points close to x are required when -# deciding that the slope is estimable. -# plotit=TRUE will plot the estimates. -# -# The plotted points are the estimates using rslope and -# the solid line gives the estimated values reported by this function -# -# Missing values are automatically removed. -# -if(SEED) set.seed(2) -temp<-cbind(x,y) -if(ncol(temp)!=2)stop("One predictor only is allowed") -temp<-elimna(temp) # Eliminate any rows with missing values -if(xout) { - flag <- outfun(temp[, 1], plotit = FALSE)$keep - temp <- temp[flag, ] -x<-temp[,1] -y<-temp[,2] -} -flag<-order(x) -x<-x[flag] -y<-y[flag] -mat<-matrix(NA,nrow=nboot,ncol=length(pts)) -vals<-NA - for(it in 1:nboot) { - idat <- sample(c(1:length(y)), replace = T) - xx <- temp[idat, 1] - yy <- temp[idat, 2] -# mat[it, ] <- runhat(xx, yy, pts = x, est = est, fr = fr, ...) -mat[it,]<-rslope(xx,yy,fr=fr,est=est,nmin=nmin,pts=x,plotit=FALSE) - } -rmd<-apply(mat,2,mean,na.rm=RNA,tr=atr) -flag<-is.na(rmd) -rmdsm<-lplot(x,rmd,pyhat=TRUE,plotit=plotit) -output<-"Done" -if(pyhat){ -temp<-rep(NA,length(x)) -temp[!flag]<-rmdsm$yhat.values -output<-temp -} -output -} - -m1way<-function(x,est=hd,nboot=599,SEED=TRUE,...){ -# -# Test the hypothesis that J measures of location are equal -# using the percentile bootstrap method. -# By default, medians are compared using 599 bootstrap samples. -# and the Harrell-Davis Estimator. To use the usual sample median, set -# est=median -# -# The data are assumed to be stored in x in list mode. Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or a matrix.") -J<-length(x) -nval<-vector("numeric",length(x)) -gest<-vector("numeric",length(x)) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -bvec<-matrix(0,J,nboot) -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -print(paste("Working on group ",j)) -nval[j]<-length(x[[j]]) -gest[j]<-est(x[[j]]) -xcen<-x[[j]]-est(x[[j]],...) -data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # A J by nboot matrix -# containing the bootstrap values of est. -} -teststat<-wsumsq(gest,nval) -testb<-apply(bvec,2,wsumsq,nval) -p.value<-1 - sum(teststat >= testb)/nboot -teststat<-wsumsq(gest,nval) -list(teststat=teststat,p.value=p.value) -} - -oancpb<-function(x1,y1,x2,y2,est=tmean,tr=.2,pts=NA,fr1=1,fr2=1,nboot=600, -alpha=.05,plotit=TRUE,SEED=TRUE,PRO=FALSE,...){ -# -# Compare two independent groups using an ancova method -# with a percentile bootstrap combined with a running interval -# smooth. -# -# CURRENTLY SEEMS THAT THE R FUNCTION ancGLOB is better. -# -# This function performs an omnibus test using data corresponding -# to K design points specified by the argument pts. If -# pts=NA, K=5 points are chosen for you (see Introduction to Robust -# Estimation and Hypothesis Testing.) -# Null hypothesis is that conditional distribution of Y, given X for first -# group, minus the conditional distribution of Y, given X for second -# group is equal to zero. -# The strategy is to choose K specific X values -# and then test the hypothesis that all K differences are zero. -# -# If you want to choose specific X values, Use the argument -# pts -# Example: pts=c(1,3,5) will use X=1, 3 and 5. -# -# For multiple comparisons using these J points, use ancpb -# -# Assume data are in x1 y1 x2 and y2 -# -# PRO=F, means Mahalanobis distance is used. -# PRO=T, projection distance is used. -# -# fr1 and fr2 are the spans used to fit a smooth to the data. -# -stop('USE ancGLOB') -# -# -gv1<-vector("list") -if(is.na(pts[1])){ -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -for (i in 1:5){ -j<-i+5 -temp1<-y1[near(x1,x1[isub[i]],fr1)] -temp2<-y2[near(x2,x1[isub[i]],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -gv1[[i]]<-temp1 -gv1[[j]]<-temp2 -} -# -loc<-NA -if(SEED)set.seed(2) -bvec<-matrix(NA,nrow=nboot,ncol=5) -for(j in 1:5){ -k<-j+5 -loc[j]<-est(gv1[[j]])-est(gv1[[k]]) -xx<-matrix(sample(gv1[[j]],size=length(gv1[[j]])*nboot,replace=TRUE), -nrow=nboot) -yy<-matrix(sample(gv1[[k]],size=length(gv1[[k]])*nboot,replace=TRUE), -nrow=nboot) -bvec[,j]<-apply(xx,1,FUN=est,...)-apply(yy,1,FUN=est,...) -} -nullv<-rep(0,5) -if(!PRO){ -mvec<-apply(bvec,2,FUN=mean) -m1<-var(t(t(bvec)-mvec+loc)) -temp<-mahalanobis(rbind(bvec,nullv),loc,m1) -} -if(PRO){ -temp<-pdis(rbind(bvec,nullv)) -} -sig.level<-sum(temp[nboot+1]nullval || chkit[2]nullval || chkit[2]150)fr<-7.57/length(y)+.05 -} -} -if(SEED)set.seed(2) -x<-as.matrix(x) -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -yhat<-adrun(x,y,est=est,plotit=FALSE,fr=fr,pyhat=TRUE) -regres<-y-yhat -print("Taking bootstrap samples, please wait.") -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-sqrt(12)*(data-.5) # standardize the random numbers. -rvalb<-apply(data,1,adtests1,yhat,regres,mflag,x,fr) -# An n x nboot matrix of R values -rvalb<-rvalb/sqrt(length(y)) -dstatb<-apply(abs(rvalb),2,max) -wstatb<-apply(rvalb^2,2,mean) -v<-c(rep(1,length(y))) -rval<-adtests1(v,yhat,regres,mflag,x,fr) -rval<-rval/sqrt(length(y)) -dstat<-max(abs(rval)) -wstat<-mean(rval^2) -p.value.d<-1-sum(dstat>=dstatb)/nboot -p.value.w<-1-sum(wstat>=wstatb)/nboot -list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w) -} - -rhom<-function(x,y,op=1,op2=FALSE,tr=.2,plotit=TRUE,xlab="NA",ylab="NA",zlab="ABS(res)", -est=median,sm=FALSE,SEED=TRUE,xout=FALSE,outfun=outpro,...){ -# For regression model, Y=m(X)+s(X)e, -# where s(X) models heteroscedasticity, and e has median 0, -# test hypothesis s(X)=1 for any X -# -# For p>1, method tests for each p whether residuals and x_j -# have a horizontal regression line. -# -# op2=F, tests for homogeneity using running interval smoother -# op2=T, test of independence based on Y-M(Y), M(Y) some measure -# of location given by argument est. -# In general, op2=T should NOT be used when the goal is to test -# the hypothesis of a homoscedastic error term. -# -# op=1 test using regression method (function regci) -# op=2 test using Winsorized correlation -# tr is amount of Winsorizing. A heteroscedastic bootstrap method is used. wincor is not asymptotically correct. -# op=3 test using a wild boostrap method -# -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,pp] -x<-as.matrix(x) -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,pp] -x<-as.matrix(x) -} -output<-NA -if(ncol(x)==1){ -if(!op2)res<-y-runhat(x[,1],y,est=est,pts=x) -if(op2)res<-y-est(y) -if(op==1)output<-regci(x,abs(res),SEED=SEED,pr=FALSE)$regci[2,5] -if(op==2)output<-wincorci(x,abs(res),tr=tr,SEED=SEED)$p.value -if(op==3)output<-indt(x,abs(res),SEED=SEED)$p.value.d -} -if(ncol(x)>1){ -pv<-ncol(x)+1 -if(!op2)res<-y-rung3hat(x,y,est=est,pts=x)$rmd -if(op2)res<-y-est(y) -if(op==1)output<-regci(x,abs(res),pr=FALSE)$regci[2:pv,5] -if(op==2)output<-winall(cbind(x,abs(res)),tr=tr)$p.values[1:ncol(x),pv] -if(op==3)output<-indt(x,abs(res),SEED=SEED)$p.value.d -} -if(plotit){ -if(ncol(x)==1){ -if(xlab=='NA')xlab="X" -if(ylab=='NA')ylab="ABS(res)" -if(!sm)rungen(x,abs(res),est=est,xlab=xlab,ylab=ylab) -if(sm)runmbo(x,abs(res),est=est,xlab=xlab,ylab=ylab) -} -if(ncol(x)==2){ -if(xlab=='NA')xlab="X1" -if(ylab=='NA')ylab="X2" -if(sm)rung3d(x,abs(res),est=est,xlab=xlab,ylab=ylab,zlab=zlab) -if(!sm)run3bo(x,abs(res),est=est,xlab=xlab,ylab=ylab,zlab=zlab) -}} -list(p.value=output) -} - -gk.sigmamu <- function(x, c1 = 4.5, c2 = 3.0, mu.too = FALSE, ...) -{ - n <- length(x) - - medx <- median(x) - sigma0 <- median(abs(x - medx)) -w <- abs(x - medx) / sigma0 -w <- ifelse(w<=c1,(1.0 - (w / c1)^2)^2,0) - mu <- sum(x * w) / sum(w) - - x <- (x - mu) / sigma0 - rho <- x^2 - rho[rho > c2^2] <- c2^2 - sigma2 <- sigma0^2 / n * sum(rho) - - if(mu.too) - c(mu, sqrt(sigma2)) - else - sqrt(sigma2) -} - -gk <- function(x, y, ...) -{ - ((gk.sigmamu(x + y, ...))^2 - (gk.sigmamu(x - y, ...))^2) / 4.0 -} - -hard.rejection <- function(distances, p, beta = 0.9, ...) -{ - d0 <- qchisq(beta, p) * median(distances) / qchisq(0.5, p) - weights <- double(length(distances)) - weights[distances <= d0] <- 1.0 - weights -} -# -# -# - -gkcov<-function(x,y,gk.sigmamu=taulc,...){ -# -# Compute robust covariance using the Gnanadesikan-Kettenring -# estimator. -# (cf. Marrona & Zomar, 2002, Technometrics -# -val<-.25*(gk.sigmamu(x+y,...)-gk.sigmamu(x-y,...)) -val -} -covogk<-function(x,sigmamu=taulc,v=gkcov,n.iter=5,beta=.9,...){ -# -# Compute robust (weighted) covariance matrix in Maronna and Zamar -# (2002, Technometrics, eq. 7). -# -# x is an n by p matrix -# n.iter number of iterations. 1 seems to be best -# sigmamu is any user supplied function having the form -# sigmamu(x,mu.too=F) and which computes a robust measure of -# of dispersion if mu.too=F. If mu.too=T, it returns -# a robust measure of location as well. -# v is any robust covariance -# -if(!is.matrix(x))stop("x should be a matrix") -x<-elimna(x) # remove any rows with missing data -temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...)$wcovmat -temp -} -ogk<-function(x,sigmamu=taulc,v=gkcov,n.iter=1,beta=.9,...){ -# -# Compute robust (weighted) covariance matrix in Maronna and Zamar -# (2002, Technometrics, eq. 7). -# -# x is an n by p matrix -# n.iter number of iterations. 1 seems to be best -# sigmamu is any user supplied function having the form -# sigmamu(x,mu.too=F) and which computes a robust measure of -# of dispersion if mu.too=F. If mu.too=T, it returns -# a robust measure of location as well. -# v is any robust covariance -# -if(!is.matrix(x))stop("x should be a matrix") -x<-elimna(x) # remove any rows with missing data -temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...) -list(center=temp$wcenter,cov=temp$wcovmat) -} - -ogk.pairwise <- function(X,n.iter=1,sigmamu=taulc,v=gkcov,beta=.9,...) -#weight.fn=hard.rejection,beta=.9,...) -{ -# Downloaded (and modified slightly) from www.stats.ox.ac.uk/~konis/pairwise.q -# Corrections noted by V. Todorov have been incorporated -# - data.name <- deparse(substitute(X)) - X <- as.matrix(X) - n <- dim(X)[1] - p <- dim(X)[2] - Z <- X - U <- diag(p) - A <- list() - # Iteration loop. - for(iter in 1:n.iter) { - # Compute the vector of standard deviations d and - # the correlation matrix U. - d <- apply(Z, 2, sigmamu, ...) - Z <- sweep(Z, 2, d, '/') - - for(i in 1:(p - 1)) { - for(j in (i + 1):p) { - U[j, i] <- U[i, j] <- v(Z[ , i], Z[ , j], ...) - } - } - - # Compute the eigenvectors of U and store them in - # the columns of E. - - E <- eigen(U, symmetric = TRUE)$vectors - - # Compute A, there is one A for each iteration. - - A[[iter]] <- d * E - - # Project the data onto the eigenvectors. - - Z <- Z %*% E - } - - # End of orthogonalization iterations. - - # Compute the robust location and scale estimates for - # the transformed data. - -# sqrt.gamma <- apply(Z, 2, sigmamu, mu.too = TRUE, ...) - sqrt.gamma <- apply(Z, 2, sigmamu, mu.too = TRUE) - center <- sqrt.gamma[1, ] - sqrt.gamma <- sqrt.gamma[2, ] - - # Compute the mahalanobis distances. - - Z <- sweep(Z, 2, center) - Z <- sweep(Z, 2, sqrt.gamma, '/') - distances <- rowSums(Z^2) - - # From the inside out compute the robust location and - # covariance matrix estimates. See equation (5). - - covmat <- diag(sqrt.gamma^2) - - for(iter in seq(n.iter, 1, -1)) { - covmat <- A[[iter]] %*% covmat %*% t(A[[iter]]) - center <- A[[iter]] %*% center - } - - center <- as.vector(center) - - # Compute the reweighted estimate. First, compute the - # weights using the user specified weight function. - - #weights <- weight.fn(distances, p, ...) -weights <- hard.rejection(distances, p, beta=beta,...) - sweights <- sum(weights) - - # Then compute the weighted location and covariance - # matrix estimates. - - wcenter <- colSums(sweep(X, 1, weights, '*')) / sweights - - Z <- sweep(X, 2, wcenter) - Z <- sweep(Z, 1, sqrt(weights), '*') - wcovmat <- (t(Z) %*% Z) / sweights; - - list(center = center, - covmat = covmat, - wcenter = wcenter, - wcovmat = wcovmat, - distances = distances, - sigmamu = deparse(substitute(sigmamu)), - v = deparse(substitute(v)), - data.name = data.name, - data = X) -} - - -gk.sigmamu <- function(x, c1 = 4.5, c2 = 3.0, mu.too = FALSE, ...) -{ - n <- length(x) - - medx <- median(x) - sigma0 <- median(abs(x - medx)) -# w <- (x - medx) / sigma0 -# w <- (1.0 - (w / c1)^2)^2 - #w[w < 0.0] <- 0.0 -w <- abs(x - medx) / sigma0 -w <- ifelse(w<=c1,(1.0 - (w / c1)^2)^2,0) - mu <- sum(x * w) / sum(w) - - x <- (x - mu) / sigma0 - rho <- x^2 - rho[rho > c2^2] <- c2^2 - sigma2 <- sigma0^2 / n * sum(rho) - - if(mu.too) - c(mu, sqrt(sigma2)) - else - sqrt(sigma2) -} - -gk <- function(x, y, ...) -{ - ((gk.sigmamu(x + y, ...))^2 - (gk.sigmamu(x - y, ...))^2) / 4.0 -} - -hard.rejection <- function(distances, p, beta = 0.9, ...) -{ - d0 <- qchisq(beta, p) * median(distances) / qchisq(0.5, p) - weights <- double(length(distances)) - weights[distances <= d0] <- 1.0 - weights -} - -outogk<-function(x,sigmamu=taulc,v=gkcov,op=TRUE,SEED=FALSE, -beta=max(c(.95,min(c(.99,1/nrow(x)+.94)))),n.iter=1,plotit=TRUE,...){ -# -# Use the ogk estimator to -# determine which points are outliers -# -# op=T uses robust Mahalanobis distance based on -# the OGK estimator with beta adjusted so that -# the outside rate per observation is approximately .05 -# under normality. -# op=F returns the outliers based on the distances used -# by the OGK estimator -# (Currently, op=T seems best for detecting outliers.) -# -if(!is.matrix(x))stop("x should be a matrix") -x<-elimna(x) -if(!op){ -temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,beta=beta,n.iter=n.iter,...) -vals<-hard.rejection(temp$distances,p=ncol(x),beta=beta,...) -flag<-(vals==1) -vals<-c(1:nrow(x)) -outid<-vals[!flag] -keep<-vals[flag] -if(is.matrix(x)){ -if(ncol(x)==2 && plotit){ -plot(x[,1],x[,2],xlab="X", ylab="Y",type="n") -points(x[flag,1],x[flag,2]) -if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="o") -}}} -if(op){ -temp<-out(x,cov.fun=ogk,beta=beta,plotit=plotit,SEED=SEED) -outid<-temp$out.id -keep<-temp$keep -} -list(out.id=outid,keep=keep,distances=temp$dis) -} - -splot<-function(x,op=TRUE,VL=FALSE,xlab="X",ylab="Rel. Freq.",frame.plot=TRUE,plotit=TRUE){ -# -# Frequency plot -# -# For each unique value in x, -# the relatively frequency is determined and plotted. -# -# op=TRUE a line connecting the relative frequencies is drawn if VL=FALSE. -# VL=TRUE, a vertical line is drawn for each unique value in x; -# the height of the line indicates the relative frequency. -# -# op=FALSE. No lines are drawn -# -# The function returns the sample size as well as the frequencies -# associated with each unique value stored in x. -# -x<-x[!is.na(x)] -temp<-sort(unique(x)) -freq<-NA -for(i in 1:length(temp)){ -freq[i]<-sum(x==temp[i]) -} -rmfreq=freq -nval=sum(freq) -freq<-freq/length(x) -tfreq<-freq -tfreq[1]<-0 -tfreq[2]<-max(freq) -if(plotit){ -plot(temp,tfreq,xlab=xlab,ylab=ylab,type="n",frame.plot=frame.plot) -points(temp,freq,pch="*") -if(op) -if(!VL)lines(temp,freq) -if(VL){ -for(i in 1:length(temp))lines(c(temp[i],temp[i]),c(0,freq[i])) -}} -den=sum(rmfreq) -list(obs.values=temp,n=nval,frequencies=rmfreq,rel.freq=rmfreq/den) -} - -outcov<-function(x,y=NA,outfun=outogk,plotit=FALSE){ -# -# Remove outliers and compute covariances -# -if(!is.na(y[1]))x<-cbind(x,y) -keep<-outfun(x,plotit=plotit)$keep -val<-var(x[keep,]) -if(ncol(val)==2)val<-val[1,2] -list(cov=val) -} - -covout<-function(x,y=NA,outfun=outogk,plotit=FALSE){ -# -# Remove outliers and compute covariances -# -if(!is.na(y[1]))x<-cbind(x,y) -keep<-outfun(x,plotit=plotit)$keep -val<-var(x[keep,]) -if(ncol(val)==2)val<-val[1,2] -val -} - -tbscor<-function(x,y=NA){ -# -# Compute a correlation coefficient using the TBS measure of scatter -# -if(!is.na(y[1]))x<-cbind(x,y) -if(!is.matrix(x))stop("x should be a matrix") -x<-elimna(x) -n<-nrow(x) -p<-ncol(x) -temp<-tbs(x)$cov -val<-matrix(NA,p,p) -for(j in 1:p){ -for(k in 1:p){ -val[j,k]<-temp[k,j]/sqrt(temp[k,k]*temp[j,j]) -}} -test<-abs(val*sqrt((n-2)/(1-val^2))) -if(p==2){ -val<-val[1,2] -p.value<-c("Greater than .1") -crit<-20.20/n+1.89 -if(test>=crit)p.value<-c("Less than .1") -crit<-30.41/n+2.21 -if(test>=crit)p.value<-c("Less than .05") -crit<-39.72/n+2.5 -if(test>=crit)p.value<-c("Less than .025") -crit<-58.55/n+2.80 -if(test>=crit)p.value<-c("Less than .01") -} -list(cor=val,test.stat=test,p.value=p.value) -} - -skiptbs<-function(x,y=NA,plotit=FALSE){ -# -# Remove outliers and compute correlations -# -if(!is.na(y[1]))x<-cbind(x,y) -x<-elimna(x) -n<-nrow(x) -keep<-outtbs(x,plotit=plotit)$keep -val<-cor(x[keep,]) -p.value<-NA -test<-NA -crit.05<-30.41/n+2.21 -vat<-val -diag(vat)<-0 -test<-abs(vat*sqrt((n-2)/(1-vat^2))) -diag(test)<-NA -if(ncol(val)==2){ -p.value<-c("Greater than .1") -val<-val[1,2] -test<-abs(val*sqrt((n-2)/(1-val^2))) -p.value<-c("Greater than .1") -crit<-20.20/n+1.89 -if(test>=crit)p.value<-c("Less than .1") -crit<-30.41/n+2.21 -if(test>=crit)p.value<-c("Less than .05") -crit<-39.72/n+2.5 -if(test>=crit)p.value<-c("Less than .025") -crit<-58.55/n+2.80 -if(test>=crit)p.value<-c("Less than .01") -} -list(cor=val,test.stat=test,p.value=p.value,crit.05=crit.05) -} -skipogk<-function(x,y=NA,plotit=FALSE){ -# -# Remove outliers and compute correlations -# -if(!is.na(y[1]))x<-cbind(x,y) -x<-elimna(x) -n<-nrow(x) -keep<-outogk(x,plotit=plotit)$keep -val<-cor(x[keep,]) -p.value<-NA -test<-NA -crit.05<-15.49/n+2.68 -vat<-val -diag(vat)<-0 -test<-abs(vat*sqrt((n-2)/(1-vat^2))) -diag(test)<-NA -if(ncol(val)==2){ -p.value<-c("Greater than .1") -val<-val[1,2] -test<-abs(val*sqrt((n-2)/(1-val^2))) -crit<-4.8/n+2.72 -if(test>=crit)p.value<-c("Less than .1") -crit<-15.49/n+2.68 -if(test>=crit)p.value<-c("Less than .05") -crit<-14.22/n+3.26 -if(test>=crit)p.value<-c("Less than .025") -crit<-24.83/n+3.74 -if(test>=crit)p.value<-c("Less than .01") -} -list(cor=val,test.stat=test,p.value=p.value,crit.05=crit.05) -} - -rqfit<-function(x,y,qval=0.5,alpha=0.05,xout=FALSE,outfun=outpro,res=FALSE,method='br',...){ -# -# Do a quantile regression fit -# -if(alpha!=.05)stop("This function only allows alpha=0.05. Use qregci") -library(quantreg) -xx<-cbind(x,y) -p<-ncol(xx)-1 -xx<-elimna(xx) -x<-xx[,1:p] -y<-xx[,ncol(xx)] -x=as.matrix(x) -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -} -residuals<-NA -if(res)residuals<-rq(y~x)$residuals -temp<-summary(rq(y~x,tau=qval,alpha=alpha,method=method)) -temp0<-temp[[4]] -if(is.matrix(temp[[3]]))temp0<-temp[[3]] #Newer R version -temp<-temp0 -coef<-temp[,1] -ci<-temp[,2:3] -list(coef=coef,ci=ci,residuals=residuals) -} -rqtest.sub<-function(isub,x,y,qval=.5){ -# -# Perform regression using x[isub] to predict y[isub] -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by other functions when computing -# bootstrap estimates. -# -# x is assumed to be a matrix containing values of the predictors. -# -xmat<-matrix(x[isub,],nrow(x),ncol(x)) -#regboot<-rqfit(xmat,y[isub],qval=qval)$coef -regboot<-qreg(xmat,y[isub],qval=qval)$coef -regboot -} - - - -erho.bt <- function(p,c1,M) -# expectation of rho(d) under chi-squared p - return(chi.int(p,2,M)/2 - +(M^2/2+c1*(5*c1+16*M)/30)*chi.int2(p,0,M+c1) - +(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4))*( -chi.int(p,0,M+c1)-chi.int(p,0,M)) - +(1/2+M^4/(2*c1^4)-M^2/c1^2)*(chi.int(p,2,M+c1)-chi.int(p,2,M)) - +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*(chi.int(p,3,M+c1)-chi.int(p,3,M)) - +(3*M^2/(2*c1^4)-1/(2*c1^2))*(chi.int(p,4,M+c1)-chi.int(p,4,M)) - -(4*M/(5*c1^4))*(chi.int(p,5,M+c1)-chi.int(p,5,M)) - +(1/(6*c1^4))*(chi.int(p,6,M+c1)-chi.int(p,6,M))) -chi.int <- function(p,a,c1) -# partial expectation d in (0,c1) of d^a under chi-squared p - return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*pchisq(c1^2,p+a) ) -chi.int2 <- function(p,a,c1) -# partial expectation d in (c1,\infty) of d^a under chi-squared p - return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*(1-pchisq(c1^2,p+a))) -cgen.bt <- function(n,p,r,alpha,asymp=FALSE){ -# find constants c1 and M that gives a specified breakdown r -# and rejection point alpha -if (asymp == FALSE){if (r > (n-p)/(2*n) ) r <- (n-p)/(2*n)} -# maximum achievable breakdown -# -# if rejection is not achievable, use c1=0 and best rejection -# - limvec <- rejpt.bt.lim(p,r) - if (1-limvec[2] <= alpha) - { - c1 <- 0 - M <- sqrt(qchisq(1-alpha,p)) - } - else - { - c1.plus.M <- sqrt(qchisq(1-alpha,p)) - M <- sqrt(p) - c1 <- c1.plus.M - M - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - deps <- 1e-4 - M.old <- M - c1.old <- c1 - er <- erho.bt(p,c1,M) - fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30) - fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps - fcM <- (erho.bt(p,c1,M+deps)-er)/deps - fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30) - M <- M - fc/fcp - if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2} - c1 <- c1.plus.M - M -# if (M-c1 < 0) M <- c1.old+(M.old-c1.old)/2 - crit <- abs(fc) - iter <- iter+1 - } - } -list(c1=c1,M=M,r1=r) -} -erho.bt.lim <- function(p,c1) -# expectation of rho(d) under chi-squared p - return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1)) -erho.bt.lim.p <- function(p,c1) -# derivative of erho.bt.lim wrt c1 - return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1)) - - -rejpt.bt.lim <- function(p,r){ -# find p-value of translated biweight limit c -# that gives a specified breakdown - c1 <- 2*p - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - c1.old <- c1 - fc <- erho.bt.lim(p,c1) - c1^2*r - fcp <- erho.bt.lim.p(p,c1) - 2*c1*r - c1 <- c1 - fc/fcp - if (c1 < 0) c1 <- c1.old/2 - crit <- abs(fc) - iter <- iter+1 - } - return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p)))) -} -chi.int.p <- function(p,a,c1) - return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) -chi.int2.p <- function(p,a,c1) - return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) -ksolve.bt <- function(d,p,c1,M,b0){ -# find a constant k which satisfies the s-estimation constraint -# for modified biweight - k <- 1 - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - k.old <- k - fk <- mean(rho.bt(d/k,c1,M))-b0 - fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2) - k <- k - fk/fkp - if (k < k.old/2) k <- k.old/2 - if (k > k.old*1.5) k <- k.old*1.5 - crit <- abs(fk) - iter <- iter+1 - } - return(k) -} -rho.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1*(x^2/2) - +ivec2*(M^2/2+c1*(5*c1+16*M)/30) - +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4) - +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2 - +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3 - +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4 - -4*M*x^5/(5*c1^4)+x^6/(6*c1^4))) -} -psi.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2) -} -psip.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1)) -} -wt.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2) -} -v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M)) - - -olstests1<-function(vstar,yhat,res,x){ -ystar <- yhat + res * vstar -p<-ncol(x) -pp<-p+1 -vals<-lsfit(x,ystar)$coef[2:pp] -test<-sum(vals^2) -test -} -kerreg<-function(x,y,pyhat=FALSE,pts=NA,plotit=TRUE,theta=50,phi=25,expand=.5, -scale=FALSE,zscale=FALSE,eout=FALSE,xout=FALSE,outfun=out,np=100,xlab="X",ylab="Y",zlab="Z", -varfun=pbvar,e.pow=TRUE,pr=TRUE,ticktype="simple",pch='.',...){ -# -# Compute local weighted regression with Epanechnikov kernel -# -# See Fan, Annals of Statistics, 1993, 21, 196-217. -# cf. Bjerve and Doksum, Annals of Statistics, 1993, 21, 890-902 -# -# With a single predictor, this function calls locreg -# See locreg for information about np and plotting -# -library(akima) -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -d<-ncol(x) -np1<-d+1 -m<-elimna(cbind(x,y)) -if(xout && eout)stop("Can't have eout=xout=T") -if(eout){ -flag<-outfun(m,plotit=FALSE,...)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -} -if(zscale){ -for(j in 1:np1){ -m[,j]<-(m[,j]-median(m[,j]))/mad(m[,j]) -}} -x<-m[,1:d] -x<-as.matrix(x) -y<-m[,np1] -n<-nrow(x) -if(d>1){ -xrem<-x -pi<-gamma(.5)^2 -cd<-c(2,pi) -if(d==2)A<-1.77 -if(d==3)A<-2.78 -if(d>2){ -for(j in 3:d)cd[j]<-2*pi*cd[j-2]/j # p. 76 -} -if(d>3)A<-(8*d*(d+2)*(d+4)*(2*sqrt(pi))^d)/((2*d+1)*cd[d]) # p. 87 -hval<-A*(1/n)^(1/(d+4)) # p. 86 -for(j in 1:d){ -sig<-sqrt(var(x[,j])) -temp<-idealf(x[,j]) -iqr<-(temp$qu-temp$ql)/1.34 -A<-min(c(sig,iqr)) -x[,j]<-x[,j]/A -} -xx<-cbind(rep(1,nrow(x)),x) -yhat<-NA -for(j in 1:n){ -yhat[j]<-NA -temp1<-t(t(x)-x[j,])/(hval) -temp1<-temp1^2 -temp1<-apply(temp1,1,FUN="sum") -temp<-.5*(d+2)*(1-temp1)/cd[d] -epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, p. 76 -chkit<-sum(epan!=0) -if(chkit >= np1){ -vals<-lsfit(x,y,wt=epan)$coef -yhat[j]<-xx[j,]%*%vals -}} -if(plotit && d==2){ -if(pr){ -if(!scale){ -print("scale=F is specified") -print("If there is dependence, might use scale=T") -}} -m<-elimna(cbind(xrem,yhat)) -xrem<-m[,1:d] -yhat<-m[,np1] -fitr<-yhat -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(xrem[i,]==xrem[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] -mkeep<-xrem[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fit,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab, -scale=scale,ticktype=ticktype) -}} -if(d==1){ -yhat<-locreg(x[,1],y,pyhat=TRUE,np=np,plotit=plotit,pts=pts, -xlab=xlab,ylab=ylab,pch=pch) -yhat2<-locreg(x[,1],y,pyhat=TRUE,np=0,plotit=FALSE) -} -if(d>1)yhat2<-yhat -m<-NULL -#E.pow<-varfun(yhat2[!is.na(yhat2)])/varfun(y) -# Estimate of explanatory power performs poorly. -if(pyhat)m<-yhat -#list(Strength.Assoc=sqrt(E.pow),Explanatory.Power=E.pow,yhat=m) -m -} - - -attract<-function(X, Y, k = 5) -{ -# Works in Splus but not in R. -# For simple linear regression: plots k elemental starts and -# their domains of attraction. Calls conc2. - l1coef <- l1fit(X, Y)$coef - X <- as.matrix(X) - nr <- dim(X)[1] - nc <- dim(X)[2] + 1 - J <- 1:nc - dom <- matrix(nrow = k, ncol = nc) - par(mfrow = c(1, 2)) - plot(X, Y) - title("a) 5 Elemental Starts") - for(i in 1:k) { -## get J - J <- sample(nr, nc) ## get bJ, the elem fit - if(abs(X[J[1]] - X[J[2]]) < 1/100000000) { - slope <- 0 - } - else { - slope <- (Y[J[1]] - Y[J[2]])/(X[J[1]] - X[J[2]]) - } - int <- Y[J[1]] - slope * X[J[1]] - fit <- c(int, slope) - yhat <- X %*% fit[2:nc] + fit[1] - lines(X, yhat) - ## get the domain of attraction for LTA concentration - dom[i, ] <- conc2(X, Y, start = fit)$coef - } - plot(X, Y) - for(i in 1:k) { - fit <- dom[i, ] - yhat <- X %*% fit[2:nc] + fit[1] - lines(X, yhat) - } - title("b) The Corresponding Attractors") -} - -bg2ci<-function(x, alpha = 0.05) -{ -#gets BGse with middle n^0.8 cases for sample median and -#the corresponding robust 100 (1-alpha)% CI. This is optimal -#for estimating the SE but is not resistant. - n <- length(x) - up <- 1 - alpha/2 - med <- median(x) - ln <- max(1,floor(n/2) - ceiling(0.5 * n^0.8)) - un <- n - ln - rdf <- un - ln - 1 - cut <- qt(up, rdf) - d <- sort(x) - se2 <- (d[un] - d[ln])/(2 * n^0.3) - rval <- cut * se2 - rlo2 <- med - rval - rhi2 <- med + rval - #got low and high endpoints of robust CI - list(int = c(rlo2, rhi2), med = med, se2 = se2) -} - -cav<-function(alpha = 0.01, k = 5) -{ -#gets n(asy var) for the alpha trimmed mean -#and T_(A,n)(k) if errors are Cauchy(0,1) - z <- tan(pi * (alpha - 0.5)) - val <- (z - atan(z))/((1 - 2 * alpha) * atan(z)) - ntmav <- val + (2 * alpha * (tan(pi * (alpha - 0.5)))^2)/(1 - 2 * alpha - )^2 - zj <- k - alphaj <- 0.5 + atan( - k)/pi - alphaj <- ceiling(100 * alphaj)/100 - zj <- tan(pi * (alphaj - 0.5)) - val <- (zj - atan(zj))/((1 - 2 * alphaj) * atan(zj)) - natmav <- val + (2 * alphaj * (tan(pi * (alphaj - 0.5)))^2)/(1 - 2 * - alphaj)^2 - return(ntmav, natmav) -} - -cci<-function(x, alpha = 0.05) -{ -#gets classical 100 (1-alpha)% CI -#defaults are alpha = .05 - n <- length(x) - up <- 1 - alpha/2 - mn <- mean(x) - v <- var(x) - se <- sqrt(v/n) - val <- qt(up, n - 1) * se - lo <- mn - val - hi <- mn + val - list(int = c(lo, hi), mean = mn, se = se) -} - -cgci<-function(x, alpha = 0.05, ks = 3.5) -{ -#gets T_S,n with a coarse grid -# and the corresponding robust 100 (1-alpha)% CI - n <- length(x) - up <- 1 - alpha/2 - med <- median(x) - madd <- mad(x, constant = 1) - d <- sort(x) ##get robust T_S,n CI - lo <- sum(x < (med - ks * madd)) - hi <- sum(x > (med + ks * madd)) - tp <- max(hi, lo)/n - if(tp == 0) - tp <- 0 - if(tp > 0 && tp <= 0.01) - tp <- 0.01 - if(tp > 0.01 && tp <= 0.1) - tp <- 0.1 - if(tp > 0.1 && tp <= 0.25) - tp <- 0.25 - if(tp > 0.25 && tp <= 0.4) - tp <- 0.4 - if(tp > 0.4) - tp <- 0.49 - tstmn <- mean(x, trim = tp) - #have obtained the two stage trimmed mean - ln <- floor(n * tp) - un <- n - ln - if(ln > 0) { - d[1:ln] <- d[(ln + 1)] - d[(un + 1):n] <- d[un] - } - den <- ((un - ln)/n)^2 - swv <- var(d)/den - #got the scaled Winsorized variance - rdf <- un - ln - 1 - rval <- qt(up, rdf) * sqrt(swv/n) - tslo <- tstmn - rval - tshi <- tstmn + rval - ##got low and high endpoints of robust T_S,n CI - list(int = c(tslo, tshi), tp = tp) -} - - -cltv<- -function(gam = 0.5) -{ -# Gets asy var for lts(h) and lta(h)at Cauchy C(0,1) -# where h/n -> gam. - k <- tan((pi * gam)/2) - num <- 2 * k - pi * gam - den <- pi * (gam - (2 * k)/(pi * (1 + k^2)))^2 - ltsv <- num/den - num <- gam - den <- 4 * (1/pi - 1/(pi * (1 + k^2)))^2 - ltav <- num/den - return(ltsv, ltav) -} - -cmba2<- -function(x, csteps = 5, ii = 1) -{ -# gets the covmba estimator using 98, 95, 90, 80, 70, 60 and 50% trimming - n <- dim(x)[1] - p <- dim(x)[2] - mds <- matrix(nrow = n, ncol = 8, 0) ##get the DGK estimator - covs <- var(x) - mns <- apply(x, 2, mean) - cmd <- sqrt(mahalanobis(x, mns, covs)) ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - mds[, 8] <- sqrt(mahalanobis(x, mns, covs)) - covb <- covs - mnb <- mns ##get the square root of det(covb) - critb <- prod(diag(chol(covb))) ##get the resistant estimator - covv <- diag(p) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = med, covv) - smd2 <- sort(md2) - val <- p + 3 - tem <- 1:7 - tem[1] <- smd2[val + floor(0.02 * n)] - tem[2] <- smd2[val + floor(0.05 * n)] - tem[3] <- smd2[val + floor(0.1 * n)] - tem[4] <- smd2[val + floor(0.2 * n)] - tem[5] <- smd2[val + floor(0.3 * n)] - tem[6] <- smd2[val + floor(0.4 * n)] - tem[7] <- median(md2) - medd2 <- tem[7] - for(j in ii:7) { -## get the start - val2 <- tem[j] - mns <- apply(x[md2 <= val2, ], 2, mean) - covs <- var(x[md2 <= val2, ]) ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - mds[, j] <- sqrt(mahalanobis(x, mns, covs)) - plot(cmd, mds[, j]) - identify(cmd, mds[, j]) - crit <- prod(diag(chol(covs))) - if(crit < critb) { - critb <- crit - covb <- covs - mnb <- mns - } - } - pairs(mds) ##scale for better performance at MVN - rd2 <- mahalanobis(x, mnb, covb) - const <- median(rd2)/(qchisq(0.5, p)) - covb <- const * covb - list(center = mnb, cov = covb, mds = mds) -} - -conc2<- -function(x, y, start = l1fit(x, y)$coef) -{ #Finds that LTA attractor of the start. - nc <- dim(x)[2] + 1 - res <- y - (x %*% start[2:nc] + start[1]) - ares <- abs(res) - cov <- ceiling(length(y)/2) - m <- sort(ares, partial = cov)[cov] - old <- sum(ares[ares <= m]) - new <- old - 1 - ct <- 0 - while(new < old) { - ct <- ct + 1 - start <- l1fit(x[ares <= m, ], y[ares <= - m])$coef - res <- y - (x %*% start[2:nc] + start[1 - ]) - ares <- abs(res) - m <- sort(ares, partial = cov)[cov] - new <- sum(ares[ares <= m]) #print(old) - if(new < old) { - old <- new - new <- new - 1 - } - } - list(coef = start, ct = ct) -} - -concmv<- -function(n = 100, csteps = 5, gam = 0.4, outliers = TRUE, start = 2) -{ -#Shows how concentration works when p = 2. -# Use start = 1 for DGK, start = 2 for MBA sphere, start = 3 for MBA MAD - p <- 2 #A <- cbind(c(1, 0.9), c(0.9, 1)) - x <- matrix(rnorm(n * p), ncol = p, nrow = n) #A <- diag(sqrt(1:p)) -#if(outliers == T) { -# val <- floor(gam * n) -# tem <- 10 + 0 * 1:p -# x[1:val, ] <- x[1:val, ] + tem -#} -#x <- x %*% A - A <- cbind(c(1, 0.4), c(0.4, 1)) - B <- cbind(c(0.5, 0), c(0, 0.5)) - if(outliers == T) { - val <- floor(gam * n) - x[(val + 1):n, ] <- x[(val + 1):n, ] %*% A - x[1:val, ] <- x[1:val, ] %*% B - x[1:val, 1] <- x[1:val, 1] + 0 - x[1:val, 2] <- x[1:val, 2] + 6 - } - else { - x <- x %*% A - } - if(start == 1) { - covs <- var(x) - mns <- apply(x, 2, mean) - } - if(start == 2) { - covv <- diag(p) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = med, covv) - medd2 <- median(md2) ## get the start - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - if(start >= 2) { - tem <- apply(x, 2, mad)^2 - covv <- diag(tem) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = med, covv) - medd2 <- median(md2) ## get the start - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } -## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - plot(x[, 1], x[, 2]) - points(x[md2 <= medd2, 1], x[md2 <= medd2, 2], pch = 15) - identify(x[, 1], x[, 2]) - } -} - -concsim<- -function(n = 100, p = 2, steps = 5, gam = 0.4, runs = 20) -{ -# This Splus function is used to determine when the DD -# plot separates outliers from non-outliers for various starts. - A <- sqrt(diag(1:p)) - mbact <- 0 - fmcdct <- 0 - mbct <- 0 - madct <- 0 - dgkct <- 0 - for(i in 1:runs) { - x <- matrix(rnorm(n * p), ncol = p, nrow = n) - ## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T - val <- floor(gam * n) - tem <- 10 + 0 * 1:p - x[1:val, ] <- x[1:val, ] + tem - x <- x %*% A #MBA - out <- covmba(x, csteps = steps) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - if(min(rd2[1:val]) > max(rd2[(val + 1):n])) -mbact <- mbact + 1 - #DGK - covs <- var(x) - mns <- apply(x, 2, mean) ## concentrate - for(i in 1:steps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - rd2 <- mahalanobis(x, mns, covs) - if(min(rd2[1:val]) > max(rd2[(val + 1):n])) dgkct <- dgkct + 1 - #Median Ball start - covv <- diag(p) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = med, covv) - medd2 <- median(md2) ## get the start - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) ## concentrate - for(i in 1:steps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - rd2 <- mahalanobis(x, mns, covs) - if(min(rd2[1:val]) > max(rd2[(val + 1):n])) mbct <- mbct + 1 - #MAD start - tem <- apply(x, 2, mad)^2 - covv <- diag(tem) - md2 <- mahalanobis(x, center = med, covv) - medd2 <- median(md2) ## get the start - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) ## concentrate - for(i in 1:steps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - rd2 <- mahalanobis(x, mns, covs) - if(min(rd2[1:val]) > max(rd2[(val + 1):n])) madct <- madct + 1 - #FMCD - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - if(min(rd2[1:val]) > max(rd2[(val + 1):n])) - fmcdct <- fmcdct + 1 - } - list(mbact = mbact, fmcdct = fmcdct, dgkct = dgkct, mbct = mbct, madct - = madct) -} - -corrsim<- -function(n = 100, p = 3, eps = 0.4, nruns = 100, type = 1) -{ -#For R, first type "library(lqs)" before using this function -# This function generates 100 n by p matrices x. -# The output is the 100 sample correlations between the MDi and RDi -# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for type = 3 -# mahalanobis gives squared Maha distances - corrs <- 1:nruns - for(i in 1:nruns) { - wt <- 0 * (1:n) - x <- matrix(rnorm(n * p), ncol = p, nrow = n) - #The following 3 commands make x elliptically contoured. -#zu <- runif(n) -#x[zu < eps,] <- x[zu < eps,]*5 -#x <- x^2 -# To make marginals of x lognormal, use -#x <- exp(x) - center <- apply(x, 2, mean) - cov <- var(x) - md2 <- mahalanobis(x, center, cov) - if(type == 1) { - out <- covmba(x) - } - if(type == 2) { - out <- rmba(x) - } - if(type == 3) { - out <- cov.mcd(x) - } - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - # need square roots for the usual distances - md <- sqrt(md2) - rd <- sqrt(rd2) - const <- sqrt(qchisq(0.5, p))/median(rd) - rd <- const * rd - # wt[rd < sqrt(qchisq(0.975, p))] <- 1 -# corrs[i] <- cor(md[wt > 0], rd[wt > 0])} - corrs[i] <- cor(md, rd) - } - cmean <- mean(corrs) - cmin <- min(corrs) - clt95 <- sum(corrs < 0.95) - clt80 <- sum(corrs < 0.8) - list(cmean = cmean, cmin = cmin, clt95 = clt95, clt80 = clt80, - corrs = corrs) -} - - -covdgk<- -function(x, csteps = 10) -{ -#computes the scaled DGK multivariate estimator - p <- dim(x)[2] - covs <- var(x) - mns <- apply(x, 2, mean) ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, - mean) - covs <- var(x[md2 <= medd2, ]) - } -##scale for consistency at MVN - rd2 <- mahalanobis(x, mns, covs) - const <- median(rd2)/(qchisq(0.5, p)) - covs <- const * covs - list(center = mns, cov = covs) -} - -covmba <- function(x, csteps = 5) -{ # gets the MBA estimator - zx <- x - x <- as.matrix(x) - p <- dim(x)[2] - ##get the DGK estimator - covs <- var(x) - mns <- apply(x, 2, mean) ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - if(p > 1){ - mns <- apply(x[md2 <= medd2, ], 2, - mean) - covs <- var(x[md2 <= medd2, ]) - } - if(p == 1){ - mns <- mean(x[md2 <= medd2]) - covs <- var(x[md2 <= medd2]) - } - } - covb <- covs - mnb <- mns ##get the square root of det(covb) - critb <- prod(diag(chol(covb))) - ##get the resistant estimator - covv <- diag(p) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = med, covv) - medd2 <- median(md2) ## get the start - if(p > 1){ - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - if(p == 1){ - mns <- mean(zx[md2 <= medd2]) - covs <- var(zx[md2 <= medd2]) - } - ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - if(p > 1){ - mns <- apply(x[md2 <= medd2, ], 2, - mean) - covs <- var(x[md2 <= medd2, ]) - } - if(p == 1){ - mns <- mean(zx[md2 <= medd2]) - covs <- var(zx[md2 <= medd2]) - } - } - crit <- prod(diag(chol(covs))) - if(crit < critb) { - critb <- crit - covb <- covs - mnb <- mns - } -##scale for better performance at MVN - rd2 <- mahalanobis(x, mnb, covb) - const <- median(rd2)/(qchisq(0.5, p)) - covb <- const * covb - list(center = mnb, cov = covb) -} - -covmba2<- -function(x, csteps = 5) -{ # gets the MBA estimator, use covmba2 instead of covmba if p > 1 - p <- dim(x)[2] - ##get the DGK estimator - covs <- var(x) - mns <- apply(x, 2, mean) ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, - mean) - covs <- var(x[md2 <= medd2, ]) - } - covb <- covs - mnb <- mns ##get the square root of det(covb) - critb <- prod(diag(chol(covb))) - ##get the resistant estimator - covv <- diag(p) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = med, covv) - medd2 <- median(md2) ## get the start - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, - mean) - covs <- var(x[md2 <= medd2, ]) - } - crit <- prod(diag(chol(covs))) - if(crit < critb) { - critb <- crit - covb <- covs - mnb <- mns - } -##scale for better performance at MVN - rd2 <- mahalanobis(x, mnb, covb) - const <- median(rd2)/(qchisq(0.5, p)) - covb <- const * covb - list(center = mnb, cov = covb) -} - -covsim2<- -function(n=100, p = 2, steps = 5, gam = 0.4, runs = 20) -{ -# This Splus function is used to determine when the DD -# plot separates outliers from non-outliers. - A <- sqrt(diag(1:p)) - mbact <- 0 - for(i in 1:runs) { - x <- matrix(rnorm(n * p), ncol = p, nrow = n) - ## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T - val <- floor(gam * n) - tem <- 10 + 0 * 1:p - x[1:val, ] <- x[1:val, ] + tem - x <- x %*% A - out <- covmba(x, csteps = steps) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - if(min(rd2[1:val]) > max(rd2[(val + 1):n])) - mbact <- mbact + 1 - } - list(mbact = mbact) -} - -ctrviews<- -function(x, Y, ii = 1) -{ -# Uses classical distances instead of robust distances. -# Trimmed views for 90, 80, ... 0 percent -# trimming. Allows visualization of m -# and crude estimatation of c beta in models -# of the form y = m(x^T beta) + e. -# Workstation: activate a graphics -# device with command "X11()" or "motif()." -# R needs command "library(lqs)." -# Advance the view with the right mouse button. -# In R, highight "stop." - x <- as.matrix(x) - center <- apply(x, 2, mean) - cov <- var(x) - rd2 <- mahalanobis(x, center, cov) - labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", - "0%") - tem <- seq(0.1, 1, 0.1) - for(i in ii:10) { - val <- quantile(rd2, tem[i]) - bhat <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$coef - ESP <- x %*% bhat[-1] - plot(ESP, Y) - title(labs[i]) - identify(ESP, Y) - print(bhat) - } -} - -ddcomp<- -function(x, steps = 5) -{ -# Makes 4 DD plots using the FMCD and MBA estimators. -# Click left mouse button to identify points. -# Click right mouse button to end the function. -# Unix systems turn on graphics device eg enter -# command "X11()" or "motif()" before using. -# R users need to type "library(lqs)" before using. - p <- dim(x)[2] - par(mfrow = c(2, 2)) - center <- apply(x, 2, mean) - cov <- var(x) - md2 <- mahalanobis(x, center, cov) - # MD is the classical and RD the robust distance - MD <- sqrt(md2) #DGK start - md2 <- mahalanobis(x, center, cov) - medd2 <- median(md2) ## get the start - mns <- center - covs <- cov ## concentrate - for(i in 1:steps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - rd2 <- mahalanobis(x, mns, covs) - rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line -#if the data is multivariate normal. - const <- sqrt(qchisq(0.5, p))/median(rd) - RDdgk <- const * rd - plot(MD, RDdgk) - abline(0, 1) - identify(MD, RDdgk) - title("DGK DD Plot") #MBA - out <- covmba(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - rd <- sqrt(rd2) #Scale the RD so the plot follows the identity line -#if the data is multivariate normal. - const <- sqrt(qchisq(0.5, p))/median(rd) - RDm <- const * rd - plot(MD, RDm) - abline(0, 1) - identify(MD, RDm) - title("MBA DD Plot") #FMCD - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line -#if the data is multivariate normal. - const <- sqrt(qchisq(0.5, p))/median(rd) - RDf <- const * rd - plot(MD, RDf) - abline(0, 1) - identify(MD, RDf) - title("FMCD DD Plot") #Median Ball start - covv <- diag(p) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = med, covv) - medd2 <- median(md2) ## get the start - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) ## concentrate - for(i in 1:steps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - mns <- apply(x[md2 <= medd2, ], 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - rd2 <- mahalanobis(x, mns, covs) - rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line -#if the data is multivariate normal. - const <- sqrt(qchisq(0.5, p))/median(rd) - RDmb <- const * rd - plot(MD, RDmb) - abline(0, 1) - identify(MD, RDmb) - title("Med Ball DD Plot") -} - -ddmv<- -function(n = 100, p = 2, steps = 5, gam = 0.4, - outtype = 2, est = 1) -{ -# This Splus function is used to determine when the DD -# plot separates outliers from non-outliers for various starts. -# Workstation needs to activate a graphics -# device with the command "X11()" or "motif()." -# Advance the view with the right mouse button. -## est = 1 for DGK, 2 for median ball, 3 for MAD - A <- sqrt(diag(1:p)) - x <- matrix(rnorm(n * p), ncol = p, nrow - = n) - val <- floor(gam * n) - tem <- 10 + 0 * 1:p - x[1:val, ] <- x[1:val, ] + tem - #if outtype = 1, outliers are Np(10 1, Ip) nonoutliers Np(0,Ip) - if(outtype == 2) x <- x %*% A - ## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T -## get the start - if(est == 1) { -#DGK classical start - covs <- var(x) - mns <- apply(x, 2, mean) - } - if(est == 2) { -#Median Ball high breakdown start - covv <- diag(p) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = - med, covv) - medd2 <- median(md2) - ## get the start - mns <- apply(x[md2 <= medd2, ], - 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - if(est == 3) { -#MAD high breakdown start - tem <- apply(x, 2, mad)^2 - covv <- diag(tem) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = - med, covv) - medd2 <- median(md2) - ## get the start - mns <- apply(x[md2 <= medd2, ], - 2, mean) - covs <- var(x[md2 <= medd2, ]) - } -## concentrate and plot, highlighting outliers - MD <- sqrt(mahalanobis(x, mns, covs)) - for(i in 1:steps) { - md <- sqrt(mahalanobis(x, mns, - covs)) - medd <- median(md) - mns <- apply(x[md <= medd, ], 2, - mean) - covs <- var(x[md <= medd, ]) - rd <- sqrt(mahalanobis(x, mns, - covs)) - plot(MD, rd) - points(MD[1:val], rd[1:val], pch - = 15) - identify(MD, rd) - } -} - - -ddplot<- -function(x) -{ -# Makes a DD plot. cov.mcd is used for the RDi. -# Click left mouse button to identify points. -# Click right mouse button to end the function. -# Unix systems turn on graphics device eg enter -# command "X11()" or "motif()" before using. -# R users need to type "library(lqs)" before using. - p <- dim(x)[2] - center <- apply(x, 2, mean) - cov <- var(x) - md2 <- mahalanobis(x, center, cov) - out <- cov.mcd(x) # or use out <- cov.mve(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - # md is the classical and rd the robust distance - MD <- sqrt(md2) - rd <- sqrt(rd2) - #Scale the RD so the plot follows the 0-1 line -#if the data is multivariate normal. - const <- sqrt(qchisq(0.5, p))/median(rd) - RD <- const * rd - plot(MD, RD) - abline(0, 1) - identify(MD, RD) # list(MD = MD, RD = RD) -} - - -ddsim<- -function(n = 100, p = 3, eps = 0.4, type = 1) -{ -# R: type "library(lqs)" before using if type = 3. -# Rapidly plots 20 DD plots in a row. -# Unix: type "X11()" or "motif()" to -# turn on a graphics device. -# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for type = 3 - med <- 1:20 - for(i in 1:20) { - x <- matrix(rnorm(n * p), ncol = p, nrow = n) - ## For elliptically contoured data, use: -#zu <- runif(n) -#x[zu < eps,] <- x[zu < eps,]*5 -#x <- x^2 -##For lognormal marginals, add: -#x <- exp(x) - center <- apply(x, 2, mean) - cov <- var(x) - md2 <- mahalanobis(x, center, cov) - if(type == 1) { - out <- covmba(x) - } - if(type == 2) { - out <- rmba(x) - } - if(type == 3) { - out <- cov.mcd(x) - } - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - md <- sqrt(md2) - rd <- sqrt(rd2) #Scale the RDi so plot follows 0-1 line -#if the data is multivariate normal. - const <- sqrt(qchisq(0.5, p))/median(rd) - rd <- const * rd - plot(md, rd) - abline(0, 1) - med[i] <- median(md) #The following command can be inserted -#to slow down the plots "identify(md,rd)" - } - list(med = med) -} - - -deav<- -function(alpha = 0.01, k = 5) -{ -#gets n(asy var) for the alpha trimmed mean -#and T_(A,n)(k) if errors are DE(0,1) - z <- - log(2 * alpha) - num <- 2 - (2 + 2 * z + z^2) * exp( - z) - den <- (1 - exp( - z)) * (1 - 2 * alpha) - val1 <- num/den - num <- 2 * alpha * z^2 - den <- (1 - 2 * alpha)^2 - ntmav <- val1 + num/den - zj <- k * log(2) - alphaj <- 0.5 * exp( - zj) - alphaj <- ceiling(100 * alphaj)/100 - zj <- - log(2 * alphaj) - num <- 2 - (2 + 2 * zj + zj^2) * exp( - zj) - den <- (1 - exp( - zj)) * (1 - 2 * alphaj) - val1 <- num/den - num <- 2 * alphaj * zj^2 - den <- (1 - 2 * alphaj)^2 - natmav <- val1 + num/den - return(ntmav, natmav) -} - - -deltv<- -function(gam = 0.5) -{ -# Gets asy var for lts(h) and lta(h) at standard double exp -# where h/n -> gam. - k <- -1 * log(1 - gam) - num <- 2 - (2 + 2 * k + k^2) * exp( - k) - den <- (gam - k * exp( - k))^2 - ltsv <- num/den - ltav <- 1/gam - return(ltsv, ltav) -} - -diagplot<- -function(x, Y) -{ -# Scatterplot matrix of OLS diagnostics. -# Workstation need to activate a graphics -# device with command "X11()" or "motif()." - n <- length(Y) - rmat <- matrix(nrow = n, ncol = 7) - out <- lsfit(x, Y) - tem <- ls.diag(out) - rmat[, 1] <- tem$cooks - rmat[, 2] <- tem$hat - rmat[, 3] <- tem$std.res - rmat[, 4] <- tem$stud.res - rmat[, 5] <- tem$dfits - rmat[, 6] <- Y - out$resid - rmat[, 7] <- Y - pairs(rmat, labels = c("Cook's CD", "leverages", "stand resid", - "stud resid", "DFFITS", "YHAT", "Y")) -} - -ellipse <- function(x, center = apply(x, 2, mean), cov = var(x), alph = 0.95) -{# Makes a covering interval. The x should have 2 columns. - mu1 <- center[1] - mu2 <- center[2] - w <- solve(cov) - w11 <- w[1, 1] - w12 <- w[1, 2] - w22 <- w[2, 2] - tem <- x[, 2] - mu2 - y2 <- seq(min(tem), max(tem), length = 100) - xc <- qchisq(alph, 2) - el <- matrix(0, 2, 2) - ind <- 0 - for(i in 1:100) { - j1 <- (y2[i] * w12)^2 - j2 <- w11 * ((y2[i])^2 * w22 - xc) - # print(i) -# print(j1 - j2) - if((j1 - j2) >= 0) { - ind <- ind + 2 - tem <- (y2[i] * w12)^2 - tem <- tem - w11 * ((y2[i])^2 * - w22 - xc) - tem <- sqrt(tem) - term <- ( - y2[i] * w12 + tem)/ - w11 - el <- rbind(el, c((term + mu1), ( - y2[i] + mu2))) - term <- ( - y2[i] * w12 - tem)/ - w11 - el <- rbind(el, c((term + mu1), ( - y2[i] + mu2))) - } - } - el <- el[3:ind, ] - nn <- dim(x)[1] - if((ind - 2) > nn) { - tem <- sample((ind - 2), nn) - el <- el[tem, ] - } - xt <- cbind(x[, 1], el[, 1]) - yt <- cbind(x[, 2], el[, 2]) - matplot(xt, yt) -} - -essp<- -function(x, Y, M = 50) -{ -# Trimmed view or ESSP for M percent -# trimming. Allows visualization of g -# and crude estimation of c beta in models -# of the form y = g(x^T beta,e). -# Workstation need to activate a graphics -# device with command "X11()" or "motif()." -# R needs command "library(lqs)." -# Click on the right mouse button to finish. -# In R, highlight "stop." - x <- as.matrix(x) - tval <- M/100 - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - val <- quantile(rd2, (1 - tval)) - bhat <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$ - coef - ESP <- x %*% bhat[-1] - plot(ESP, Y) - identify(ESP, Y) - return(bhat[-1]) -} - -ffL<- -function(x, y) -{ -# for unix, use X11() to turn on the graphics device before using this function -# this function makes a FF lambda plot where the competing models are Y^L - n <- length(y) - rmat <- matrix(nrow = n, ncol = 5) - rmat[, 1] <- y - lsfit(x, y)$resid - ytem <- (y^(0.5) - 1)/0.5 - rmat[, 2] <- ytem - lsfit(x, ytem)$resid - rmat[, 3] <- log(y) - lsfit(x, log(y))$resid - ytem <- (y^(-0.5) - 1)/-0.5 - rmat[, 4] <- ytem - lsfit(x, ytem)$resid - ytem <- (y^(-1) - 1)/-1 - rmat[, 5] <- ytem - lsfit(x, ytem)$resid - pairs(rmat, labels = c("YHAT", "YHAT^(0.5)", "YHAT^(0)", "YHAT^(-0.5)", - "YHAT^(-1)")) - min(cor(rmat)) -} - -fflynx<-function(){ -# R users need to type library(ts) and data(lynx) -Y <- log10(lynx) -FAR2 <- 1:114 -FAR11 <- 1:114 -FAR12 <- 1:114 -SETAR272 <- 1:114 -SETAR252 <- 1:114 -for(i in 3:114){ -FAR2[i ] <- 1.05 + 1.41*Y[i-1] -0.77*Y[i-2]} -for(i in 12:114){ -FAR11[i ] <- 1.13*Y[i-1] -0.51*Y[i-2] + .23*Y[i-3] -0.29*Y[i-4] - + .14*Y[i-5] -0.14*Y[i-6] + 0.08*Y[i-7] -0.04*Y[i-8] - + .13*Y[i-9] + 0.19*Y[i-10] - .31*Y[i-11] } -for(i in 13:114){ -FAR12[i ] <- 1.123 + 1.084*Y[i-1] -0.477*Y[i-2] + .265*Y[i-3] -0.218*Y[i-4] - + .180*Y[i-9] - .224*Y[i-12] } -for(i in 13:114){ -if( Y[i-2] <= 3.116){ -SETAR272[i ] <- 0.546 + 1.032*Y[i-1] -0.173*Y[i-2] + .171*Y[i-3] -0.431*Y[i-4] - + .332*Y[i-5] - .284*Y[i-6] + .210*Y[i-7]} -else {SETAR272[i ] <- 2.632 + 1.492*Y[i-1] -1.324*Y[i-2]} -} -for(i in 13:114){ -if( Y[i-2] <= 3.05){ -SETAR252[i ] <- 0.768 + 1.064*Y[i-1] -0.200*Y[i-2] + .164*Y[i-3] -0.428*Y[i-4] - + .181*Y[i-5] } -else {SETAR252[i ] <- 2.254 + 1.474*Y[i-1] -1.202*Y[i-2]} -} -x <- cbind(Y,FAR2,FAR11,FAR12,SETAR272,SETAR252) -x <- x[13:114,] -print(cor(x)) -pairs(x) -} - - -ffplot<- -function(x, y, nsamps = 7) -{ -# For Unix, use X11() to turn on the graphics device before -# using this function. For R, first type library(lqs). -# Makes an FF plot with several resistant estimators. -# Need the program mbareg.. - n <- length(y) - rmat <- matrix(nrow = n, ncol = 6) - lsfit <- y - lsfit(x, y)$residuals - print("got OLS") - l1fit <- y - l1fit(x, y)$residuals - print("got L1") - almsfit <- y - lmsreg(x, y)$resid - print("got ALMS") - altsfit <- y - ltsreg(x, y)$residuals - print("got ALTS") - mbacoef <- mbareg(x, y, nsamp = nsamps)$coef - MBAFIT <- mbacoef[1] + x %*% mbacoef[-1] - print("got MBA") - rmat[, 1] <- y - rmat[, 2] <- lsfit - rmat[, 3] <- l1fit - rmat[, 4] <- almsfit - rmat[, 5] <- altsfit - rmat[, 6] <- MBAFIT - pairs(rmat, labels = c("Y", "OLS Fit", "L1 Fit", "ALMS Fit", - "ALTS Fit", "MBAREG Fit")) -} - -ffplot2<- -function(x, y, nsamps = 7) -{ -# For Unix, use X11() to turn on the graphics device before -# using this function. For R, first type library(lqs). -# Makes an FF plot with several resistiant estimators. -# Need the program mbareg. - n <- length(y) - rmat <- matrix(nrow = n, ncol = 5) - lsfit <- y - lsfit(x, y)$residuals - print("got OLS") - almsfit <- y - lmsreg(x, y)$resid - print("got ALMS") - altsfit <- y - ltsreg(x, y)$residuals - print("got ALTS") - mbacoef <- mbareg(x, y, nsamp = nsamps)$coef - MBAFIT <- mbacoef[1] + x %*% mbacoef[-1] - print("got MBA") - rmat[, 1] <- y - rmat[, 2] <- lsfit - rmat[, 3] <- almsfit - rmat[, 4] <- altsfit - rmat[, 5] <- MBAFIT - pairs(rmat, labels = c("Y", "OLS Fit", "ALMS Fit", "ALTS Fit", "MBAREG Fit")) -} - -fysim<-function( runs = 20) -{ -# 20 FY plots for simulated AR(2) time series data -fycorr <- 1:runs -for(i in 1: runs){ -Y <- ardata()$arts -out <- ar.yw(Y) -Yts <- Y[10:200] -FIT <- Yts - out$resid[10:200] -plot(FIT,Yts) -abline(0,1) -fycorr[i] <- cor(FIT,Yts) -} -list(fycorr=fycorr) -} - -gamper<- -function(h, k=500) -{ - n <- 10000 - c <- 5000 - gam0 <- min((n - c)/n, (1 - (1 - 0.2^(1/k))^(1/ - h))) * 100 - print(gam0) -} - -gamper2<- -function(p, k = 500) -{ -##estimates the amount of contamination fmcd can tolerate - n <- 10000 - c <- 5000 - h <- p + 1 - gam0 <- min((n - c)/n, (1 - (1 - 0.2^(1/k))^(1/h))) * 100 - print(gam0) -} - - -llrdata <- function(n = 100, q=5) -{ -# Generates data for loglinear regression. -# - y <- 0 * 1:n - beta <- 0 * 1:q - beta[1:3] <- 1 - alpha <- -2.5 - x <- matrix(rnorm(n * q), nrow = n, - ncol = q) - x <- 0.5*x + 1 - SP <- alpha + x%*%beta - y <- rpois(n,lambda=exp(SP)) - list(x=x,y=y) -} - -llressp <- function(x,y) -{ -# Makes the ESSP for loglinear regression. -# Workstation: need to activate a graphics -# device with command "X11()" or "motif()." -# -# If q is changed, change the formula in the glm statement. - q <- 5 -# change formula to x[,1]+ ... + x[,q] with q - out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + - x[, 4] + x[,5], family = poisson) - ESP <- x %*% out$coef[-1] + out$coef[1] - Y <- y - plot(ESP,Y) - abline(mean(y),0) - fit <- y - fit <- exp(ESP) - indx <- sort.list(ESP) - lines(ESP[indx],fit[indx]) - lines(lowess(ESP,y),type="s") - } - -llrplot<- -function(x, y) -{ -# Makes ESSP, the weighted forward response and residual plots for loglinear regression. -# -# If q is changed, change the formula in the glm statement. - q <- 5 # change formula to x[,1]+ ... + x[,q] with q - out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = - poisson) - ESP <- x %*% out$coef[-1] + out$coef[1] - Y <- y - par(mfrow = c(2, 2)) - plot(ESP, Y) - abline(mean(y), 0) - Ehat <- exp(ESP) - indx <- sort.list(ESP) - lines(ESP[indx], Ehat[indx]) - lines(lowess(ESP, y), type = "s") - title("a) ESSP") - Vhat <- (y - Ehat)^2 - plot(Ehat, Vhat) - abline(0, 1) - #abline(lsfit(Ehat, Vhat)$coef) - title("b)") - Z <- y - Z[y < 1] <- Z[y < 1] + 0.5 - MWRES <- sqrt(Z) * (log(Z) - x %*% out$coef[-1] - out$coef[1]) - MWFIT <- sqrt(Z) * log(Z) - MWRES - plot(MWFIT, sqrt(Z) * log(Z)) - abline(0, 1) - #abline(lsfit(MWFIT, sqrt(Z) * log(Z))$coef) - title("c) WFRP Based on MLE") - plot(MWFIT, MWRES) - title("d) WRP Based on MLE") -} - -llrsim<- -function(n = 100, nruns = 1, type = 1) -{ -# Runs llrpot 10 times on simulated LLR. -# Type = 1 for Poisson data, Type = 2 for negative binomial data -# Calls llrdata, oddata, llrplot. - q <- 5 - for(i in 1:nruns) { - if(type == 1) - out <- llrdata(n, q) - else out <- oddata(n, q) - x <- out$x - y <- out$y - llrplot(x, y) #identify(MWFIT, MWRES) - } -} - -llrwtfrp <- function(x,y) -{ -# Makes the weighted forward response and residual plots for loglinear regression. -# Workstation: need to activate a graphics -# device with command "X11()" or "motif()." - -# -# If q is changed, change the formula in the glm statement. - q <- 5 -# change formula to x[,1]+ ... + x[,q] with q - out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + - x[, 4] + x[,5], family = poisson) - ESP <- x %*% out$coef[-1] + out$coef[1] - Z <- y - Z[y<1] <- Z[y<1] + 0.5 - out2<-lsfit(x,y=log(Z),wt=Z) - #WRES <- sqrt(Z)*(log(Z) - x%*%out2$coef[-1] - out2$coef[1]) - WRES <- out2$res - WFIT <- sqrt(Z)*log(Z) - WRES - MWRES <- sqrt(Z)*(log(Z) - x%*%out$coef[-1] - out$coef[1]) - MWFIT <- sqrt(Z)*log(Z) - MWRES - par(mfrow=c(2,2)) - plot(WFIT,sqrt(Z)*log(Z)) - abline(0,1) - title("a) Weighted Forward Response Plot") - plot(WFIT,WRES) - title("b) Weighted Residual Plot") - plot(MWFIT,sqrt(Z)*log(Z)) - abline(0,1) - title("c) WFRP Based on MLE") - plot(MWFIT,MWRES) - title("d) WRP Based on MLE") - } - -lmsviews<- -function(x, Y, ii = 1) -{ -# Trimmed views using lmsreg for 90, 80, ... 0 percent -# trimming. Allows visualization of m -# and crudely estimation of c beta in models -# of the form y = m(x^T beta) + e. -# Workstation: activate a graphics device -# with commands "X11()" or "motif()." -# R needs command "library(lqs)." -# Advance the view with the right mouse button and -# in R, highight "stop." - x <- as.matrix(x) - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", - "0%") - tem <- seq(0.1, 1, 0.1) - for(i in ii:10) { - val <- quantile(rd2, tem[i]) - b <- lmsreg(x[rd2 <= val, ], Y[rd2 <= val])$coef - ESP <- x %*% b[-1] - plot(ESP, Y) - title(labs[i]) - identify(ESP, Y) - print(b) - } -} - -lrdata <- function(n = 200, type = 3) -{ -# Generates data for logistic regression. -# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = -0.5 ||mu_1||^2. -# -# If q is changed, change the formula in the glm statement. - q <- 5 - y <- 0 * 1:n - y[(n/2 + 1):n] <- y[(n/2 + 1):n] + 1 - beta <- 0 * 1:q - if(type == 1) { - beta[1] <- 1 - alpha <- -0.5 - } - if(type == 2) { - beta <- beta + 1 - alpha <- -q/2 - } - if(type == 3) { - beta[1:3] <- 1 - alpha <- -1.5 - } - x <- matrix(rnorm(n * q), nrow = n, - ncol = q) - if(type == 1) { - x[(n/2 + 1):n, 1] <- x[(n/2 + 1 - ):n, 1] + 1 - } - if(type == 2) { - x[(n/2 + 1):n, ] <- x[(n/2 + 1 - ):n, ] + 1 - } - if(type == 3) { - x[(n/2 + 1):n, 1:3 ] <- x[(n/2 + 1 - ):n, 1:3 ] + 1 - } - #X|y=0 ~ N(0, I) and X|y=1 ~ N(beta,I) - # change formula to x[,1]+ ... + x[,q] with q - out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + - x[, 4] + x[,5], family = binomial) - list(alpha = alpha, beta = beta, lrcoef = out$coef,x=x,y=y) -} - -lressp <- function(x,y,slices=10) -{ -# Makes the ESSP for logistic regression. -# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = ||mu_1||^2. -# Workstation need to activate a graphics -# device with command "X11()" or "motif()." -# R needs command "library(lqs)." -# Advance the view with the right mouse button. -# In R, highlight "stop." -# -# If q is changed, change the formula in the glm statement. - q <- 5 -# change formula to x[,1]+ ... + x[,q] with q - out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + - x[, 4] + x[,5], family = binomial) - ESP <- x %*% out$coef[-1] + out$coef[1] - Y <- y - plot(ESP,Y) - abline(mean(y),0) - fit <- y - fit <- exp(ESP)/(1 + exp(ESP)) - # lines(sort(ESP),sort(fit)) - indx <- sort.list(ESP) - lines(ESP[indx],fit[indx]) - fit2 <- fit - n <- length(y) - val <- as.integer(n/slices) - for(i in 1: (slices-1)){ - fit2[((i-1)*val+1):(i*val)] <- mean(y[indx[((i-1)*val+1):(i*val)]]) - } - fit2[((slices-1)*val+1):n] <- mean(y[indx[((slices-1)*val+1):n]]) -# fit2 is already sorted in order corresponding to indx - lines(ESP[indx],fit2) -#list(fit2=fit2,n=n,slices=slices,val=val) - } - - -lsviews<- -function(x, Y, ii = 1) -{ -# This function is the same as tvreg except that the untrimmed -# cases are highlighted. It compares the LS fits for 90, 80, -# ..., 0 percent trimming. Used to visualize g if y = g(beta^T x,e). -# Workstation: activate a graphics -# device with command "X11()" or "motif()." -# R needs command "library(lqs)." -# Advance the view with the right mouse button. -# In R, highlight ``stop." - x <- as.matrix(x) - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", - "0%") - tem <- seq(0.1, 1, 0.1) - for(i in ii:10) { - val <- quantile(rd2, tem[i]) - bhat <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$coef - ESP <- bhat[1] + x %*% bhat[-1] - plot(ESP, Y) - points(ESP[rd2 <= val], Y[rd2 <= val], pch = 15, cex = 1.4) - abline(0, 1) - title(labs[i]) - identify(ESP, Y) - print(bhat) - } -} - -maha<- -function(x) -{ -# Generates the classical mahalanobis distances. - center <- apply(x, 2, mean) - cov <- var(x) - return(sqrt(mahalanobis(x, center, cov))) -} - -mbalata<- -function(x, y, k=6, nsamp = 7) -{ -#gets the median ball fit with 7 centers, med resid crit, 7 ball sizes - x <- as.matrix(x) - n <- dim(x)[1] - q <- dim(x)[2] - # q + 1 is number of predictors including intercept - vals <- c(q + 3 + floor(n/100), q + 3 + floor(n/40), q + 3 + - floor(n/20), q + 3 + floor(n/10), q + 3 + floor(n/5), q + - 3 + floor(n/3), q + 3 + floor(n/2)) - covv <- diag(q) - centers <- sample(n, nsamp) - temp <- lsfit(x, y) - mbaf <- temp$coef ## get LATA criterion - res <- temp$residuals - crit <- k^2*median(res^2) - cn <- sum(res^2 <= crit) - absres <- sort(abs(res)) - critf <- sum(absres[1:cn]) ## - for(i in 1:nsamp) { - md2 <- mahalanobis(x, center = x[centers[i], ], covv) - smd2 <- sort(md2) - for(j in 1:7) { - temp <- lsfit(x[md2 <= smd2[vals[j]], ], y[md2 <= - smd2[vals[j]]]) - #Use OLS on rows with md2 <= cutoff = smd2[vals[j]] - res <- y - temp$coef[1] - x %*% temp$coef[-1] - ## get LATA criterion - crit <- k^2*median(res^2) - cn <- sum(res^2 <= crit) - absres <- sort(abs(res)) - crit <- sum(absres[1:cn]) ## - if(crit < critf) { - critf <- crit - mbaf <- temp$coef - } - } - } - list(coef = mbaf, critf = critf) -} - -mbamv<- -function(x, y, nsamp = 7) -{ -# This function is for simple linear regression. The -# highlighted boxes get weight 1. Click on right -# mouse button to advance plot. Only uses 50% trimming. - x <- as.matrix(x) - n <- dim(x)[1] - q <- dim(x)[2] - covv <- diag(q) - centers <- sample(n, nsamp) - for(i in 1:nsamp) { - md2 <- mahalanobis(x, center = x[centers[i], ], covv) - med <- median(md2) - plot(x, y) - points(x[md2 < med], y[md2 < med], pch = 15) - abline(lsfit(x[md2 < med],y[md2 < med])) - identify(x, y) - } -} - -mbamv2<- -function(x, Y, nsamp = 7) -{ -# This function is for multiple linear regression. The -# highlighted boxes get weight 1. Click on right -# mouse button to advance plot. Only uses 50% trimming. - x <- as.matrix(x) - n <- dim(x)[1] - q <- dim(x)[2] - covv <- diag(q) - centers <- sample(n, nsamp) - for(i in 1:nsamp) { - md2 <- mahalanobis(x, center = x[centers[i], ], covv) - med <- median(md2) - if(q ==1){out <- lsfit(x[md2 < med],Y[md2 < med])} - else{out <- lsfit(x[md2 < med,],Y[md2 < med])} - FIT <- out$coef[1] + x%*%out$coef[-1] - RES <- Y - FIT - par(mfrow=c(2,1)) - plot(FIT,Y) - points(FIT[md2 < med], Y[md2 < med], pch = 15) - abline(0,1) - identify(FIT, Y) - plot(FIT,RES) - points(FIT[md2 < med], RES[md2 < med], pch = 15) - abline(0,0) - identify(FIT, RES) - } -} - -mbareg<- -function(x, y, nsamp = 7) -{ -#gets the mbareg fit with 7 centers, med resid crit, 7 ball sizes - x <- as.matrix(x) - n <- dim(x)[1] - q <- dim(x)[2] # q + 1 is number of predictors including intercept - vals <- c(q + 3 + floor(n/100), q + 3 + floor(n/40), q + 3 + floor(n/20 - ), q + 3 + floor(n/10), q + 3 + floor(n/5), q + 3 + floor(n/3), - q + 3 + floor(n/2)) - covv <- diag(q) - centers <- sample(n, nsamp) - temp <- lsfit(x, y) - mbaf <- temp$coef - critf <- median(temp$residuals^2) - for(i in 1:nsamp) { - md2 <- mahalanobis(x, center = x[centers[i], ], covv) - smd2 <- sort(md2) - for(j in 1:7) { - temp <- lsfit(x[md2 <= smd2[vals[j]], ], y[md2 <= smd2[ - vals[j]]]) - #Use OLS on rows with md2 <= cutoff = smd2[vals[j]] - res <- y - temp$coef[1] - x %*% temp$coef[-1] - crit <- median(res^2) - if(crit < critf) { - critf <- crit - mbaf <- temp$coef - } - } - } - list(coef = mbaf, critf = critf) -} - -med2ci<- -function(x, cc = 4, alpha = 0.05) -{ -#gets ~ 50% trimmed mean se for sample median and the corresponding robust 100 (1-alpha)% CI -#defaults are alpha = .05, cc = 5 may be better than the default - up <- 1 - alpha/2 - n <- length(x) - med <- median(x) - ln <- floor(n/2) - ceiling(sqrt(n/cc)) - un <- n - ln - low <- ln + 1 - d <- sort(x) - if(ln > 0) { - d[1:ln] <- d[(low)] - d[(un + 1):n] <- d[un] - } - den <- ((un - ln)/n)^2 - swv <- var(d)/den - #got the scaled Winsorized variance - rdf <- un - low - rval <- qt(up, rdf) * sqrt(swv/n) - rlo <- med - rval - rhi <- med + rval - list(int = c(rlo, rhi), med = med, swv = swv) -} - -medci<- -function(x, alpha = 0.05) -{ -#gets Bloch and Gastwirth SE for sample median and the corresponding resistant 100 (1-alpha)% CI -#defaults are alpha = .05 - n <- length(x) - up <- 1 - alpha/2 - med <- median(x) - ln <- floor(n/2) - ceiling(sqrt(n/4)) - un <- n - ln - d <- sort(x) - rdf <- un - ln - 1 - cut <- qt(up, rdf) - sebg <- 0.5 * (d[un] - d[ln + 1]) - rval <- cut * sebg - rlo <- med - rval - rhi <- med + rval - list(int = c(rlo, rhi), med = med, sebg = sebg) -} -MLRplot<-function(x, Y) -{ -# Forward response plot and residual plot. -# R needs command "library(lqs)" if a robust estimator replaces lsfit. -# Advance the view with the right mouse button. - x <- as.matrix(x) - out <- lsfit(x, Y) - cook <- ls.diag(out)$cooks - n <- dim(x)[1] - p <- dim(x)[2] + 1 - tem <- cook > min(0.5, (2 * p)/n) - bhat <- out$coef - FIT <- bhat[1] + x %*% bhat[-1] - par(mfrow = c(2, 1)) - plot(FIT, Y) - abline(0, 1) - points(FIT[tem], Y[tem], pch = 15) - identify(FIT, Y) - title("Forward Response Plot") - RES <- Y - FIT - plot(FIT, RES) - points(FIT[tem], RES[tem], pch = 15) - identify(FIT, RES) - title("Residual Plot") -} - -mlrplot2 <- function(x, Y) -{ -# Forward response plot and residual plot for two mbareg estimators. -# Workstation need to activate a graphics -# device with command "X11()" or "motif()." -# R needs command "library(lqs)" if a robust estimator replaces lsfit. -# Advance the view with the right mouse button. - x <- as.matrix(x) - out <- mbareg(x, Y) - bhat <- out$coef - FIT <- bhat[1] + x %*% bhat[-1] - par(mfrow = c(2, 2)) - plot(FIT, Y) - abline(0, 1) - identify(FIT, Y) - title("MBA Forward Response Plot") - RES <- Y - FIT - plot(FIT, RES) - identify(FIT, RES) - title("MBA Residual Plot") -# - out <- mbalata(x, Y) - bhat <- out$coef - FIT <- bhat[1] + x %*% bhat[-1] - plot(FIT, Y) - abline(0, 1) - identify(FIT, Y) - title("MBALATA Forward Response Plot") - RES <- Y - FIT - plot(FIT, RES) - identify(FIT, RES) - title("MBALATA Residual Plot") -} - - -mplot<- -function(x) -{ -# Makes a DD plot only using the MDi, the RDi are not used. - p <- dim(x)[2] - center <- apply(x, 2, mean) - cov <- var(x) - md2 <- mahalanobis(x, center, cov) - md <- sqrt(md2) - rd <- md - const <- sqrt(qchisq(0.5, p))/median(rd) - rd <- const * rd - plot(md, rd) - abline(0, 1) - identify(md, rd) -} - -nav<- -function(alpha = 0.01, k = 5) -{ -#gets n(asy var) for the alpha trimmed mean -#and T_(A,n)(k) if errors are N(0,1) - z <- - qnorm(alpha) - den <- 1 - (2 * z * dnorm(z))/(2 * pnorm(z) - 1 - ) - val <- den/(1 - 2 * alpha) - ntmav <- val + (2 * alpha * z^2)/(1 - 2 * alpha - )^2 - zj <- k * qnorm(0.75) - alphaj <- pnorm( - zj) - alphaj <- ceiling(100 * alphaj)/100 - zj <- - qnorm(alphaj) - den <- 1 - (2 * zj * dnorm(zj))/(2 * pnorm(zj) - - 1) - val <- den/(1 - 2 * alphaj) - natmav <- val + (2 * alphaj * zj^2)/(1 - 2 * - alphaj)^2 - return(ntmav, natmav) -} - -nltv<- -function(gam = 0.5) -{ -# Gets asy var for lts(h) and lta(h) at standard normal -# where h/n -> gam. - k <- qnorm(0.5 + gam/2) - den <- gam - 2 * k * dnorm(k) - ltsv <- 1/den - tem <- (1 - exp( - (k^2)/2))^2 - ltav <- (2 * pi * gam)/(4 * tem) - return(ltsv, ltav) -} - -oddata<- -function(n = 100, q = 5, theta = 1) -{ -# Generates overdispersion (negative binomial) data for loglinear regression. -# - y <- 1:n - pr <- 1/(1 + theta) - beta <- 0 * 1:q - beta[1:3] <- 1 - alpha <- -2.5 - x <- matrix(rnorm(n * q), nrow = n, ncol = q) - x <- 0.5 * x + 1 - SP <- alpha + x %*% beta - y <- rnbinom(n, size = ceiling(exp(SP)), pr) - list(x = x, y = y) -} - -pifclean<- -function(k, gam) -{ - p <- floor(log(3/k)/log(1 - gam)) - list(p = p) -} - -piplot<-function(x, y, alpha = 0.05) -{ -# Makes an FY plot with prediction limits added. - x <- as.matrix(x) - p <- dim(x)[2] + 1 - n <- length(y) - up <- 1:n - low <- up - out <- lsfit(x, y) - tem <- ls.diag(out) - lev <- tem$hat - res <- out$residuals - FIT <- y - res - Y <- y - corfac <- (1 + 15/n)*sqrt(n/(n - p)) - val2 <- quantile(res, c(alpha/2, 1 - alpha/2)) - #get lower and upper PI limits for each case - for(i in 1:n) { - val <- sqrt(1 + lev[i]) - val3 <- as.single(corfac * val2[1] * val) - val4 <- as.single(corfac * val2[2] * val) - up[i] <- FIT[i] + val4 - low[i] <- FIT[i] + val3 - } - zy <- c(min(low), Y, max(up)) - zx <- c(min(FIT), FIT, max(FIT)) - #change labels so plot labels are good - ff <- FIT - yy <- Y - Y <- zy - FIT <- zx - plot(FIT, Y, type = "n") - points(ff, yy) - abline(0, 1) - points(ff, up, pch = 17) - points(ff, low, pch = 17) -} - -pisim<-function(n = 100, q = 7, nruns = 100, alpha = 0.05, eps = 0.1, shift = 9, type - = 1) -{ -# compares new and classical PIs for multiple linear regression -# if type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors -# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors -# constant = 1 so there are p = q+1 coefficients - b <- 0 * 1:q + 1 - cpicov <- 0 - npicov <- 0 - acpicov <- 0 - opicov <- 0 - val3 <- 1:nruns - val4 <- val3 - val5 <- val3 - pilen <- matrix(0, nrow = nruns, ncol = 4) - coef <- matrix(0, nrow = nruns, ncol = q + 1) - corfac <- (1 + 15/n) * sqrt(n/(n - q - 1)) - corfac2 <- sqrt(n/(n - q - 1)) - for(i in 1:nruns) { - x <- matrix(rnorm(n * q), nrow = n, ncol = q) - if(type == 1) { - y <- 1 + x %*% b + rnorm(n) - xf <- rnorm(q) - yf <- 1 + xf %*% b + rnorm(1) - } - if(type == 2) { - y <- 1 + x %*% b + rt(n, df = 3) - xf <- rnorm(q) - yf <- 1 + xf %*% b + rt(1, df = 3) - } - if(type == 3) { - y <- 1 + x %*% b + rexp(n) - 1 - xf <- rnorm(q) - yf <- 1 + xf %*% b + rexp(1) - 1 - } - if(type == 4) { - y <- 1 + x %*% b + runif(n, min = -1, max = 1) - xf <- rnorm(q) - yf <- 1 + xf %*% b + runif(1, min = -1, max = 1) - } - if(type == 5) { - err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift) - y <- 1 + x %*% b + err - xf <- rnorm(q) - yf <- 1 + xf %*% b + rnorm(1, sd = 1 + rbinom(1, 1, eps - ) * shift) - } - out <- lsfit(x, y) - fres <- out$resid - coef[i, ] <- out$coef - yfhat <- out$coef[1] + xf %*% out$coef[-1] - w <- cbind(1, x) - xtxinv <- solve(t(w) %*% w) - xf <- c(1, xf) - hf <- xf %*% xtxinv - hf <- hf %*% xf - val <- sqrt(1 + hf) #get classical PI - mse <- sum(fres^2)/(n - q - 1) - val2 <- qt(1 - alpha/2, n - q - 1) * sqrt(mse) * val - up <- yfhat + val2 - low <- yfhat - val2 - pilen[i, 1] <- up - low - if(low < yf && up > yf) cpicov <- cpicov + 1 - #get semiparametric PI - val2 <- quantile(fres, c(alpha/2, 1 - alpha/2)) - val3[i] <- as.single(corfac * val2[1] * val) - val4[i] <- as.single(corfac * val2[2] * val) - up <- yfhat + val4[i] - low <- yfhat + val3[i] - pilen[i, 2] <- up - low - if(low < yf && up > yf) npicov <- npicov + 1 - # asymptotically conservative PI - val6 <- corfac2 * max(abs(val2)) - val5[i] <- val6 * val - up <- yfhat + val5[i] - low <- yfhat - val5[i] - pilen[i, 3] <- up - low - if(low < yf && up > yf) acpicov <- acpicov + 1 - # asymptotically optimal PI - sres <- sort(fres) - cc <- ceiling(n * (1 - alpha)) - rup <- sres[cc] - rlow <- sres[1] - olen <- rup - rlow - if(cc < n) { - for(j in (cc + 1):n) { - zlen <- sres[j] - sres[j - cc + 1] - if(zlen < olen) { - olen <- zlen - rup <- sres[j] - rlow <- sres[j - cc + 1] - } - } - } - up <- yfhat + corfac * val * rup - low <- yfhat + corfac * val * rlow - pilen[i, 4] <- up - low - if(low < yf && up > yf) - opicov <- opicov + 1 - } - pimnlen <- apply(pilen, 2, mean) - mnbhat <- apply(coef, 2, mean) - lcut <- mean(val3) - hcut <- mean(val4) - accut <- mean(val5) - cpicov <- cpicov/nruns - npicov <- npicov/nruns - acpicov <- acpicov/nruns - opicov <- opicov/nruns - list(mnbhat = mnbhat, pimenlen = pimnlen, cpicov = cpicov, npicov = - npicov, acpicov = acpicov, opicov = opicov, lcut = lcut, hcut - = hcut, accut = accut) -} - -ratmn<- -function(x, k1 = 6, k2 = 6) -{ -#robust 2 stage asymmetically trimmed mean - madd <- mad(x, constant = 1) - med <- median(x) - LM <- sum(x < (med - k1 * madd)) - nmUM <- sum(x > (med + k2 * madd)) - n <- length(x) - # ll (hh) is the percentage to be trimmed to the left (right) - ll <- ceiling((100 * LM)/n) - hh <- ceiling((100 * (nmUM))/n) - tem <- sort(x) - ln <- floor((ll * n)/100) - un <- floor((n * (100 - hh))/100) - low <- ln + 1 - val1 <- tem[low] - val2 <- tem[un] - rtmn <- mean(x[(x >= val1) & (x <= val2)]) - trmn -} - -rmaha<- -function(x) -{ -# Produces robust Mahalanobis distances (scaled for normal data). - p <- dim(x)[2] - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd <- mahalanobis(x, center, cov) - const <- sqrt(qchisq(0.5, p))/median(rd) - return(const * sqrt(rd)) -} - -robci <- function(x, alpha = 0.05, trmp = 0.25, ka = 6, ks = 3.5 - ) -{ -#Gets several robust 100 (1-alpha)% CI's for data x. -#defaults are alpha = .05 - n <- length(x) - up <- 1 - alpha/2 - med <- median(x) - madd <- mad(x, constant = 1) - d <- sort(x) - dtem <- d ## get the CI for T_A, - LM <- sum(x < (med - ka * madd)) - nmUM <- sum(x > (med + ka * madd)) - # ll (hh) is the percentage to be trimmed to the left (right) - ll <- ceiling((100 * LM)/n) - hh <- ceiling((100 * (nmUM))/n) - ln <- floor((ll * n)/100) - un <- floor((n * (100 - hh))/100) - low <- ln + 1 - val1 <- dtem[low] - val2 <- dtem[un] - tstmn <- mean(x[(x >= val1) & (x <= val2)]) - #have obtained the two stage asymmetrically trimmed mean - if(ln > 0) { - d[1:ln] <- d[low] - } - if(un < n) { - d[(un + 1):n] <- d[un] - } - den <- ((un - ln)/n)^2 - swv <- var(d)/den - #got the scaled Winsorized variance - rdf <- un - low - rval <- qt(up, rdf) * sqrt(swv/n) - talo <- tstmn - rval - tahi <- tstmn + rval - ##got low and high endpoints of robust T_A,n CI -##get robust T_S,n CI - d <- dtem - lo <- sum(x < (med - ks * madd)) - hi <- sum(x > (med + ks * madd)) - low <- ceiling((100 * lo)/n) - high <- ceiling((100 * hi)/n) - tp <- min(max(low, high)/100, 0.5) - tstmn <- mean(x, trim = tp) - #have obtained the two stage symetrically trimmed mean - ln <- floor(n * tp) - un <- n - ln - if(ln > 0) { - d[1:ln] <- d[(ln + 1)] - } - if(un < n) { - d[(un + 1):n] <- d[un] - } - den <- ((un - ln)/n)^2 - swv <- var(d)/den - #got the scaled Winsorized variance - rdf <- un - ln - 1 - rval <- qt(up, rdf) * sqrt(swv/n) - tslo <- tstmn - rval - tshi <- tstmn + rval - ##got low and high endpoints of robust T_S,n CI -##get median CI that uses a scaled Winsorized variance - d <- dtem - lnbg <- floor(n/2) - ceiling(sqrt(n/4)) - unbg <- n - lnbg - lowbg <- lnbg + 1 - if(lnbg > 0) { - d[1:lnbg] <- d[(lowbg)] - } - if(unbg < n) { - d[(unbg + 1):n] <- d[unbg] - } - den <- ((unbg - lnbg)/n)^2 - swv <- var(d)/den - #got the scaled Winsorized variance - rdf <- unbg - lnbg - 1 - cut <- qt(up, rdf) - rval <- cut * sqrt(swv/n) - rlo <- med - rval - rhi <- med + rval - ##got median CI that uses a scaled Winsorized variance -##get BG CI - se2 <- 0.5 * (d[unbg] - d[lowbg]) - rval <- cut * se2 - rlo2 <- med - rval - rhi2 <- med + rval - #got low and high endpoints of BG CI -## get classical CI - mn <- mean(x) - v <- var(x) - se <- sqrt(v/n) - val <- qt(up, n - 1) * se - lo <- mn - val - hi <- mn + val ##got classical CI endpoints -## get trimmed mean CI - d <- dtem - ln <- floor(n * trmp) - un <- n - ln - trmn <- mean(x, trim = trmp) - if(ln > 0) { - d[1:ln] <- d[(ln + 1)] - } - if(un < n) { - d[(un + 1):n] <- d[un] - } - den <- ((un - ln)/n)^2 - swv <- var(d)/den - #got the scaled Winsorized variance - rdf <- un - ln - 1 - rval <- qt(up, rdf) * sqrt(swv/n) - trlo <- trmn - rval - trhi <- trmn + rval - ##got trimmed mean CI endpoints - list(tint = c(lo, hi), taint = c(talo, tahi), - tsint = c(tslo, tshi), bgint = c(rlo2, - rhi2), mint = c(rlo, rhi), trint = c( - trlo, trhi)) -} - - -rrplot<- -function(x, y, nsamps = 7) -{ -# Makes an RR plot. Needs the mbareg function. - n <- length(y) - rmat <- matrix(nrow = n, ncol = 5) - lsres <- lsfit(x, y)$residuals - print("got OLS") - l1res <- l1fit(x, y)$residuals - print("got L1") - almsres <- lmsreg(x, y)$resid - print("got ALMS") - altsres <- ltsreg(x, y)$residuals - print("got ALTS") - out <- mba$coef - mbacoef <- mbareg(x, y, nsamp = nsamps)$coef - MBARES <- y - mbacoef[1] - x %*% mbacoef[-1] - print("got MBA") - rmat[, 1] <- lsres - rmat[, 2] <- l1res - rmat[, 3] <- almsres - rmat[, 4] <- altsres - rmat[, 5] <- MBARES - pairs(rmat, labels = c("OLS residuals", - "L1 residuals", "ALMS residuals", - "ALTS residuals", "MBA residuals")) -} - -rrplot2<- -function(x, y, nsamps = 7) -{ -# Makes an RR plot. Needs the mbareg function. - n <- length(y) - rmat <- matrix(nrow = n, ncol = 4) - lsres <- lsfit(x, y)$residuals - print("got OLS") - almsres <- lmsreg(x, y)$resid - print("got ALMS") - altsres <- ltsreg(x, y)$residuals - print("got ALTS") - out <- mba$coef - mbacoef <- mbareg(x, y, nsamp = nsamps)$coef - MBARES <- y - mbacoef[1] - x %*% mbacoef[-1] - print("got MBA") - rmat[, 1] <- lsres - rmat[, 2] <- almsres - rmat[, 3] <- altsres - rmat[, 4] <- MBARES - pairs(rmat, labels = c("OLS residuals", - "ALMS residuals", - "ALTS residuals", "MBA residuals")) -} - -rstmn<- -function(x, k1 = 5, k2=5) -{ -#robust symmetically trimmed 2 stage mean -#truncates too many cases when the contamination is asymmetric - madd <- mad(x, constant = 1) - med <- median(x) - LM <- sum(x < (med - k1 * madd)) - nmUM <- sum(x > (med + k2 * madd)) - n <- length(x) #ll (hh) is the percentage trimmed to the left (right) -# tp is the trimming proportion - ll <- ceiling((100 * LM)/n) - hh <- ceiling((100 * nmUM)/n) - tp <- min(max(ll, hh)/100, 0.5) - mean(x, trim = tp) -} - -sir<- -function(x, y, h) -{ -# Obtained from STATLIB. Contributed by Thomas Koetter. -# Calculates the effective dimension-reduction (e.d.r.) -# directions by Sliced Inverse Regression (K.C. Li 1991, JASA 86, 316-327) -# -# Input: x n x p matrix, explanatory variable -# y n x 1 vector, dependent variable -# h scalar: if h >= 2 number of slices -# if h <= -2 number of elements within a slice -# 0 < h < 1 width of a slice: h = slicewidth / -# range -# -# Output: list(edr, evalues) -# edr p x p matrix, estimates for the e.d.r. directions -# evalues p x 1 vector, the eigenvalues to the directions -# -# written by Thomas Koetter (thomas@wiwi.hu-berlin.de) 1995 -# last modification: 7/18/95 -# based on the implementation in XploRe -# a full description of the XploRe program can be found in (chapter 11) -# 'XploRe: An interactive statistical computing environment', -# W. Haerdle, S. Klinke, B.A. Turlach, Springer, 1995 -# -# This software can be freely used for non-commercial purposes and freely -# distributed. -#+-----------------------------------------------------------------------------+ -#| Thomas Koetter | -#| Institut fuer Statistik und Oekonometrie | -#| Fakultaet Wirtschaftswissenschaften | -#| Humboldt-Universitaet zu Berlin, 10178 Berlin, GERMANY | -#+-----------------------------------------------------------------------------+ -#| Tel. voice: +49 30 2468-321 | -#| Tel. FAX: +49 30 2468-249 | -#| E-mail: thomas@wiwi.hu-berlin.de | -#+-----------------------------------------------------------------------------+ - n <- nrow(x) - ndim <- ncol(x) - if(n != length(c(y))) { - stop("length of y doesn't match to number of rows of x !!") - } - if( - h > n) { - stop("Number of elements within slices can't exceed number of data !!" - ) - } -# stanardize the x variable to z (mean 0 and cov I) - xb <- apply(x, 2, mean) - si2 <- solve(chol(var(x))) - xt <- (x - matrix(xb, nrow(x), ncol(x), byrow = T)) %*% si2 - # sort the data regarding y. x values are now packed into slices - ord1 <- order(y) - data <- cbind(y[ord1], xt[ord1, ]) # determine slicing strategy - if(h <= -2) { -# abs(h) is number of elements per slice - h <- abs(h) - ns <- floor(n/h) - condit <- 1:n - choice <- (1:ns) * h - # if there are observations left, add them to the first and last slice - if(h * ns != n) { - hk <- floor((n - h * ns)/2) - choice <- choice + hk - choice[ns] <- n # to aviod numerical problems - } - } - else if(h >= 2) { -# h is number of slices - ns <- h - slwidth <- (data[n, 1] - data[1, 1])/ns - slend <- seq(data[1, 1] + slwidth, length = ns, by = slwidth) - slend[ns] <- data[n, 1] - condit <- c(data[, 1]) - choice <- slend - } - else if((0 < h) && (h < 1)) { -# h is widht of a slice divides by the range of y - ns <- floor(1/h) - slwidth <- (data[n, 1] - data[1, 1]) * h - slend <- seq(data[1, 1] + slwidth, length = ns, by = slwidth) - slend[ns] <- data[n, 1] # to aviod numerical problems - condit <- c(data[, 1]) - choice <- slend - } - else stop("values of third parameter not valid") - v <- matrix(0, ndim, ndim) # estimate for Cov(E[z|y]) - ind <- rep(TRUE, n) # index for already sliced elements - ndim <- ndim + 1 - j <- 1 # loop counter - while(j <= ns) { - sborder <- (condit <= choice[j]) & ind # index of slice j - if(any(sborder)) { -# are there elements in slice j ? - ind <- ind - sborder - xslice <- data[sborder, 2:ndim] - if(sum(sborder) == 1) { -# xslice is a vector ! - xmean <- xslice - v <- v + outer(xmean, xmean, "*") - } - else { - xmean <- apply(xslice, 2, mean) - v <- v + outer(xmean, xmean, "*") * nrow(xslice - ) - } - } - j <- j + 1 - } - if(any(ind)) { - print("Error: elements unused !!") - print(ind) - } - v <- (v + t(v))/(2 * n) # to prevent numerical errors (v is symmetric) - eig <- eigen(v) - b <- si2 %*% eig$vectors # estimates for e.d.r. directions - data <- sqrt(apply(b * b, 2, sum)) - b <- t(b)/data - return(list(edr = t(b), evalues = eig$values)) -} - -sirviews<- -function(x, Y, ii = 1) -{ -# Uses the function "sir" from STATLIB. -# Trimmed views for 90, 80, ... 0 percent -# trimming. Allows visualization of m -# and crude estimation of c beta in models -# of the form y = m(x^T beta) + e. -# beta is obtained from SIR. -# Workstation need to activate a graphics -# device with command "X11()" or "motif()." -# R needs command "library(lqs)." -# Advance the view with the right mouse button. -# In R, highlight "stop." - x <- as.matrix(x) - q <- dim(x)[2] - out <- cov.mcd(x) # or use out <- cov.mve(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", - "0%") - tem <- seq(0.1, 1, 0.1) - h <- q + 7 - for(i in ii:10) { - val <- quantile(rd2, tem[i]) - b <- sir(x[rd2 <= val, ], Y[rd2 <= val], h)$edr[, 1] - ESP <- x %*% b - plot(ESP, Y) - title(labs[i]) - identify(ESP, Y) - print(b) - } -} - -stmci<- -function(x, alpha = 0.05, ks = 3.5) -{ -#gets se for sample median and the corresponding robust 100 (1-alpha)% CI -#defaults are alpha = .05 - n <- length(x) - up <- 1 - alpha/2 - med <- median(x) - madd <- mad(x, constant = 1) - lo <- sum(x < (med - ks * madd)) - hi <- sum(x > (med + ks * madd)) - low <- ceiling((100 * lo)/n) - high <- ceiling((100 * hi)/n) - tp <- min(max(low, high)/100, 0.5) - tstmn <- mean(x, trim = tp) - #have obtained the two stage symetrically trimmed mean - ln <- floor(n * tp) - un <- n - ln - d <- sort(x) - if(ln > 0) { - d[1:ln] <- d[(ln + 1)] - d[(un + 1):n] <- d[un] - } - den <- ((un - ln)/n)^2 - swv <- var(d)/den - #got the scaled Winsorized variance - rdf <- un - ln - 1 - rval <- qt(up, rdf) * sqrt(swv/n) - tslo <- tstmn - rval - tshi <- tstmn + rval - list(int = c(tslo, tshi), tp = tp) -} - -symviews<- -function(x, Y) -{ -# Makes trimmed views for 90, 80, ..., 0 -# percent trimming and sometimes works even if m -# is symmetric about E(x^t beta) where -# y = m(x^T beta ) + e. -# For work stations, activate a graphics -# device with command "X11()" or "motif()." -# For R, use "library(lqs)." -# Use the rightmost mouse button to advance -# the view. In R, highlight ``stop." - x <- as.matrix(x) - tem <- seq(0.1, 1, 0.1) - bols <- lsfit(x, Y)$coef - fit <- x %*% bols[-1] - temx <- x[fit > median(fit), ] - temy <- Y[fit > median(fit)] - out <- cov.mcd(temx) # or use out <- cov.mve(temx) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(temx, center, cov) - for(i in 1:10) { - val <- quantile(rd2, tem[i]) - bhat <- lsfit(temx[rd2 <= val, ], temy[rd2 <= val])$coef - ESP <- x %*% bhat[-1] - plot(ESP, Y) - identify(ESP, Y) - print(bhat) - } -} - - -tmci<- -function(x, alpha = 0.05, tp = 0.25) -{ -#gets se for the tp trimmed mean and the corresponding robust 100 (1-alpha)% CI -#defaults are alpha = .05 - n <- length(x) - up <- 1 - alpha/2 - tmn <- mean(x, trim = tp) - ln <- floor(n * tp) - un <- n - ln - d <- sort(x) - if(ln > 0) { - d[1:ln] <- d[(ln + 1)] - d[(un + 1):n] <- d[un] - } - den <- ((un - ln)/n)^2 - swv <- var(d)/den - #got the scaled Winsorized variance - rdf <- un - ln - 1 - rval <- qt(up, rdf) * sqrt(swv/n) - tmlo <- tmn - rval - tmhi <- tmn + rval - list(int = c(tmlo, tmhi), tp = tp) -} - -Tplt<- -function(x, y) -{ -# For Unix, use X11() to turn on the graphics device before using this function. -# This function plots y^L vs OLS fit. If plot is linear for L, use y^L instead of y. -# This is a graphical method for a response transform. - olsfit <- y - lsfit(x, y)$resid - lam <- c(-1, -2/3, -1/2, -1/3, -1/4, 0, 1/4, 1/ - 3, 1/2, 2/3, 1) - xl <- c("Y**(-1)", "Y**(-2/3)", "Y**(-0.5)", - "Y**(-1/3)", "Y**(-1/4)", "LOG(Y)", - "Y**(1/4)", "Y**(1/3)", "Y**(1/2)", - "Y**(2/3)", "Y") - for(i in 1:length(lam)) { - if(lam[i] == 0) - ytem <- log(y) - else if(lam[i] == 1) - ytem <- y - else ytem <- (y^lam[i] - 1)/lam[i] - plot(olsfit, ytem, xlab = "YHAT", ylab - = xl[i]) - abline(lsfit(olsfit, ytem)$coef) - identify(olsfit, ytem) - } -} - -trviews<- -function(x, Y, ii = 1) -{ -# Trimmed views for 90, 80, ... 0 percent -# trimming. Increase ii if 90% trimming is too harsh. -# Allows visualization of m and crudely estimation of -# c beta in models of the form y = m(x^T beta) + e. -# Workstation: activate a graphics device -# with commands "X11()" or "motif()." -# R needs command "library(lqs)." -# Advance the view with the right mouse button and -# in R, highight "stop." - x <- as.matrix(x) - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", - "20%","10%","0%") - tem <- seq(0.1, 1, 0.1) - for(i in ii:10) { - val <- quantile(rd2, tem[i]) - b <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$coef - ESP <- x %*% b[-1] - plot(ESP, Y) - title(labs[i]) - identify(ESP, Y) - print(b) - } -} - -tvreg<- -function(x, Y, ii = 1) -{ -# Trimmed views (TV) regression for 90, 80, ..., 0 percent -# trimming. Increase ii if 90% trimming is too harsh. -# Workstation: activate a graphics device -# with commands "X11()" or "motif()." -# R needs command "library(lqs)." -# Advance the view with the right mouse button and -# in R, highight "stop." - x <- as.matrix(x) - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - labs <- c("90%", "80%", "70%", "60%", "50%", - "40%", "30%", "20%", "10%", "0%") - tem <- seq(0.1, 1, 0.1) - for(i in ii:10) { - val <- quantile(rd2, tem[i]) - b <- lsfit(x[rd2 <= val, ], Y[rd2 <= - val])$coef - FIT <- x %*% b[-1] + b[1] - plot(FIT, Y) - abline(0, 1) - title(labs[i]) - identify(FIT, Y) - print(b) - } -} - -tvreg2<- -function(X, Y, M = 0) -{ -# Trimmed views regression for M percent trimming. -# Workstation: activate a graphics device -# with commands "X11()" or "motif()." -# R needs command "library(lqs)." - X <- as.matrix(X) - out <- cov.mcd(X) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(X, center, cov) - tem <- (100 - M)/100 - val <- quantile(rd2, tem) - b <- lsfit(X[rd2 <= val, ], Y[rd2 <= val])$coef - FIT <- X %*% b[-1] + b[1] - plot(FIT, Y) - abline(0, 1) - identify(FIT, Y) - list(coef = b) -} - - -wddplot<- -function(x) -{# Shows the southwest corner of the DD plot. - n <- dim(x)[1] - wt <- 0 * (1:n) - p <- dim(x)[2] - center <- apply(x, 2, mean) - cov <- var(x) - md2 <- mahalanobis(x, center, cov) - out <- cov.mcd(x) - center <- out$center - cov <- out$cov - rd2 <- mahalanobis(x, center, cov) - md <- sqrt(md2) - rd <- sqrt(rd2) - const <- sqrt(qchisq(0.5, p))/median(rd) - rd <- const * rd - wt[rd < sqrt(qchisq(0.975, p))] <- 1 - MD <- md[wt > 0] - RD <- rd[wt > 0] - plot(MD, RD) -} - -skipcov<-function(m,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,STAND=TRUE){ -# -# m is an n by p matrix -# -# Compute skipped covariance matrix -# -# op=1: -# Eliminate outliers using a projection method -# That is, first determine center of data using: -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 uses MVE center -# cop=5 uses TBS -# cop=6 uses rmba (Olive's median ball algorithm) -# -# For each point -# consider the line between it and the center, -# project all points onto this line, and -# check for outliers using -# -# MM=F, a boxplot rule. -# MM=T, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# -# op=2 use mgv (function outmgv) method to eliminate outliers -# -# Eliminate any outliers and compute means -# using remaining data. -# mgv.op=0, mgv uses all pairwise distances to determine center of the data -# mgv.op=1 uses MVE -# mgv.op=2 uses MCD -# -temp<-NA -m<-elimna(m) -m<-as.matrix(m) -if(op==2)temp<-outmgv(m,plotit=FALSE,op=mgv.op)$keep -if(op==1)temp<-outpro(m,plotit=FALSE,MM=MM,cop=outpro.cop,STAND=STAND,pr=FALSE)$keep -val<-var(m[temp,]) -val -} - -hc4wtest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,xout=FALSE,outfun=outpro,...){ -# -# Test the hypothesis that all OLS slopes are zero -# using HC4 wild bootstrap using wald test. -# -# This function calls the functions -# olshc4 and -# lstest4 -# -if(SEED)set.seed(2) -x<-as.matrix(x) -# First, eliminate any rows of data with missing values. -m<-elimna(cbind(x,y)) -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -x<-m[,1:p] -y<-m[,pp] -if(xout){ -flag<-outfun(x,...)$keep -x<-as.matrix(x) -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -temp<-lsfit(x,y) -Rsq=ols(x,y)$R.squared -yhat<-mean(y) -res<-y-yhat -s<-olshc4(x, y)$cov[-1, -1] -si<-solve(s) -b<-temp$coef[2:pp] -wtest<-t(b)%*%si%*%b -if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot) -if(!RAD){ -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-(data-.5)*sqrt(12) # standardize the random numbers. -} -rvalb<-apply(data,1,lstest4,yhat,res,x) -sum<-sum(rvalb>= wtest[1,1]) -p.val<-sum/nboot -list(p.value=p.val,R.squared=Rsq) -} -lscale<-function(x,m,q) -{ -# -# Compute the L-scale as used by Marrona -# Technometrics, 2005, 47, 264-273 -# -# so it is assumed that values in x have been centered -# (a measure of location has been subtracted from each value) -# and the results squared. -# -# q is defined in Marrona. For principal components, want to reduce -# to p dimensional data, q=ncol(x)-p -# -hval<-floor((length(x)+m-q+2)/2) -flag<-(x<0) -if(sum(flag)>0)stop("For lscale, all values must be nonnegative") -x<-sort(x) -val<-sum(x[1:hval]) -val -} -ortho<-function(x){ -# Orthnormalize x -# -y<-qr(x) -y<-qr.Q(y) -y -} - -Mpca<-function(x,N1=3,N2=2,tol=.001,N2p=10,Nran=50, -Nkeep=10,SEED=TRUE,op.pro=.1,SCORES=FALSE,pval=NULL){ -# -# Robust PCA using Marrona's method (2005, Technometrics) -# -# x is an N by m matrix containing data -# N1, N2, N2p, Nran and Nkeep indicate how many -# iterations are used in the various portions of the -# Marrona robust PCA; see Marrona's paper. -# -# op.pro is the maximum proportion of unexplained -# variance that is desired. If pval is not specified, will -# add variables until this proportion is less than op.pro. -# -# pval, if specified, will use p=pval of the m variables only and report -# the proportion of unexplained variance. -# The weighted covariance matrix is returned as well. -# -# SCORES=T, scores are reported and return based on the number of -# variables indicated by pval. pval must be specified. -# -# pval not specified, computes proportion of unexplained variance -# using p=1, 2 ... variables; results returned in -# -scores<-NULL -wt.cov<-NULL -x<-elimna(x) -if(SEED)set.seed(2) -m<-ncol(x) -n<-nrow(x) -bot<-marpca(x,p=0,N1=N1,N2=N2,tol=tol,N2p=N2p,Nran=Nran,Nkeep=Nkeep,SEED=SEED) -bot<-bot$var.op -mn1<-m-1 -rat<-1 -it<-0 -ratval<-NULL -if(is.null(pval)){ -ratval<-matrix(nrow=mn1,ncol=2) -dimnames(ratval)<-list(NULL,c("p","pro.unex.var")) -ratval[,1]<-c(1:mn1) -for(it in 1:mn1){ -if(rat>op.pro){ -temp<-marpca(x,p=it,N1=N1,N2=N2,tol=tol,N2p=N2p,Nran=Nran,Nkeep=Nkeep, -SEED=SEED) -rat<-temp$var.op/bot -ratval[it,2]<-rat -}}} -if(!is.null(pval)){ -if(pval>=m)stop("This method assumes pval0))cor.b=rcovb/temp - list(center = rmnb, cov = rcovb, cor=cor.b) -} -tbscov <- function(x,eps=1e-3,maxiter=20,r=.45,alpha=.05){ -# Rocke's contrained s-estimator -# returns covariance matrix only. For both locatiion and scatter, use tbs -# -# r=.45 is the breakdown point -# alpha=.05 is the asymptotic rejection probability. -# -if(!is.matrix(x))stop("x should be a matrix with two or more columns") -x<-elimna(x) -library(MASS) -temp<-cov.mve(x) -t1<-temp$center -s<-temp$cov - n <- nrow(x) - p <- ncol(x) -if(p==1)stop("x should be a matrix with two or more columns") -c1M<-cgen.bt(n,p,r,alpha,asymp=FALSE) -c1<-c1M$c1 -if(c1==0)c1<-.001 #Otherwise get division by zero -M<-c1M$M - b0 <- erho.bt(p,c1,M) - crit <- 100 - iter <- 1 - w1d <- rep(1,n) - w2d <- w1d - while ((crit > eps)&(iter <= maxiter)) - { - t.old <- t1 - s.old <- s - wt.old <- w1d - v.old <- w2d - d2 <- mahalanobis(x,center=t1,cov=s) - d <- sqrt(d2) - k <- ksolve.bt(d,p,c1,M,b0) - d <- d/k - w1d <- wt.bt(d,c1,M) - w2d <- v.bt(d,c1,M) - t1 <- (w1d %*% x)/sum(w1d) - s <- s*0 - for (i in 1:n) - { - xc <- as.vector(x[i,]-t1) - s <- s + as.numeric(w1d[i])*(xc %o% xc) - } - s <- p*s/sum(w2d) - mnorm <- sqrt(as.vector(t.old) %*% as.vector(t.old)) - snorm <- eigen(s.old)$values[1] - crit1 <- max(abs(t1 - t.old)) -# crit <- max(crit1,crit2) - crit <- max(abs(w1d-wt.old))/max(w1d) - iter <- iter+1 - } -# mnorm <- sqrt(as.vector(t1) %*% as.vector(t1)) -# snorm <- eigen(s)$values[1] -# return(list(t1=t1,s=s)) -s -} -erho.bt <- function(p,c1,M) -# expectation of rho(d) under chi-squared p - return(chi.int(p,2,M)/2 - +(M^2/2+c1*(5*c1+16*M)/30)*chi.int2(p,0,M+c1) - +(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4))*( -chi.int(p,0,M+c1)-chi.int(p,0,M)) - +(1/2+M^4/(2*c1^4)-M^2/c1^2)*(chi.int(p,2,M+c1)-chi.int(p,2,M)) - +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*(chi.int(p,3,M+c1)-chi.int(p,3,M)) - +(3*M^2/(2*c1^4)-1/(2*c1^2))*(chi.int(p,4,M+c1)-chi.int(p,4,M)) - -(4*M/(5*c1^4))*(chi.int(p,5,M+c1)-chi.int(p,5,M)) - +(1/(6*c1^4))*(chi.int(p,6,M+c1)-chi.int(p,6,M))) -chi.int <- function(p,a,c1) -# partial expectation d in (0,c1) of d^a under chi-squared p - return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*pchisq(c1^2,p+a) ) -chi.int2 <- function(p,a,c1) -# partial expectation d in (c1,\infty) of d^a under chi-squared p - return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*(1-pchisq(c1^2,p+a))) -cgen.bt <- function(n,p,r,alpha,asymp=FALSE){ -# find constants c1 and M that gives a specified breakdown r -# and rejection point alpha -if (asymp == FALSE){if (r > (n-p)/(2*n) ) r <- (n-p)/(2*n)} -# maximum achievable breakdown -# -# if rejection is not achievable, use c1=0 and best rejection -# - limvec <- rejpt.bt.lim(p,r) - if (1-limvec[2] <= alpha) - { - c1 <- 0 - M <- sqrt(qchisq(1-alpha,p)) - } - else - { - c1.plus.M <- sqrt(qchisq(1-alpha,p)) - M <- sqrt(p) - c1 <- c1.plus.M - M - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - deps <- 1e-4 - M.old <- M - c1.old <- c1 - er <- erho.bt(p,c1,M) - fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30) - fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps - fcM <- (erho.bt(p,c1,M+deps)-er)/deps - fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30) - M <- M - fc/fcp - if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2} - c1 <- c1.plus.M - M -# if (M-c1 < 0) M <- c1.old+(M.old-c1.old)/2 - crit <- abs(fc) - iter <- iter+1 - } - } -list(c1=c1,M=M,r1=r) -} -erho.bt.lim <- function(p,c1) -# expectation of rho(d) under chi-squared p - return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1)) -erho.bt.lim.p <- function(p,c1) -# derivative of erho.bt.lim wrt c1 - return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1)) - - -rejpt.bt.lim <- function(p,r){ -# find p-value of translated biweight limit c -# that gives a specified breakdown - c1 <- 2*p - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - c1.old <- c1 - fc <- erho.bt.lim(p,c1) - c1^2*r - fcp <- erho.bt.lim.p(p,c1) - 2*c1*r - c1 <- c1 - fc/fcp - if (c1 < 0) c1 <- c1.old/2 - crit <- abs(fc) - iter <- iter+1 - } - return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p)))) -} -chi.int.p <- function(p,a,c1) - return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) -chi.int2.p <- function(p,a,c1) - return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) -ksolve.bt <- function(d,p,c1,M,b0){ -# find a constant k which satisfies the s-estimation constraint -# for modified biweight - k <- 1 - iter <- 1 - crit <- 100 - eps <- 1e-5 - while ((crit > eps)&(iter<100)) - { - k.old <- k - fk <- mean(rho.bt(d/k,c1,M))-b0 - fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2) - k <- k - fk/fkp - if (k < k.old/2) k <- k.old/2 - if (k > k.old*1.5) k <- k.old*1.5 - crit <- abs(fk) - iter <- iter+1 - } - return(k) -} -rho.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1*(x^2/2) - +ivec2*(M^2/2+c1*(5*c1+16*M)/30) - +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4) - +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2 - +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3 - +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4 - -4*M*x^5/(5*c1^4)+x^6/(6*c1^4))) -} -psi.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2) -} -psip.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1)) -} -wt.bt <- function(x,c1,M) -{ - x1 <- (x-M)/c1 - ivec1 <- (x1 < 0) - ivec2 <- (x1 > 1) - return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2) -} -v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M)) - -gvarg<-function(m,var.fun=cov.mba,...){ -# -# Compute the generalized variance of a matrix m -# It is assumed that var.fun returns a covariance matrix only -# -# (Some functions return a covariance matrix in list mode: $cov -# These functions do not work here.) -# -# other possible choices for var.fun: -# skipcov -# tbscov -# covout -# covogk -# mgvcov -# mvecov -# mcdcov -# -m<-elimna(m) -m<-as.matrix(m) -temp<-var.fun(m,...) -gvar<-prod(eigen(temp)$values) -gvar -} -marpca<-function(x,p=ncol(x)-1,N1=3,N2=2,tol=.001,N2p=10,Nran=50, -Nkeep=10,SEED=TRUE,LSCALE=TRUE,SCORES=FALSE){ -# -# Marrona (2005, Technometrics, 47, 264-273) robust PCA -# -# x is an n by m matrix, pNran)stop("Must have Nkeep<=Nran") -if(SEED)set.seed(2) -n<-nrow(x) -m<-ncol(x) -q<-m-p -if(q<0)stop("p should have value between 0 and ncol(x)") -if(q>0){ -bkeep<-array(dim=c(q,m,Nran)) -akeep<-matrix(nrow=Nran,ncol=q) -sig.val<-NA -for(it in 1:Nran){ -temp<-marpca.sub(x,p,N1=N1,N2=N2,tol=tol,LSCALE=LSCALE) -bkeep[,,it]<-temp$B -akeep[it,]<-temp$a -sig.val[it]<-temp$var.op -} -ord<-order(sig.val) -bkeep2<-array(dim=c(q,m,Nkeep)) -cmatkeep<-array(dim=c(m,m,Nkeep)) -akeep2<-matrix(nrow=Nkeep,ncol=q) -sig.val2<-NA -for(it in 1:Nkeep){ -temp<-marpca.sub(x,p,N1=0,N2=N2p,tol=tol,B=bkeep[,,ord[it]],a=akeep[ord[it],], -LSCALE=LSCALE) -bkeep2[,,it]<-temp$B -akeep2[it,]<-temp$a -sig.val2[it]<-temp$var.op -cmatkeep[,,it]<-temp$wt.cov -} -ord<-order(sig.val2) -B<-bkeep2[,,ord[1]] -a<-akeep2[ord[1],] -var.op<-sig.val2[ord[1]] -Cmat<-cmatkeep[,,ord[1]] -} -wt.mu<-NULL -if(q==0){ -output<-marpca.sub(x,0,LSCALE=LSCALE) -B<-output$B -a<-output$a -var.op<-output$var.op -wt.mu<-output$mu -Cmat<-output$wt.cov -} -scores<-NULL -if(SCORES){ -ev<-eigen(Cmat) -ord.val<-order(ev$values) -mn1<-m-p+1 -wt.mu<-marpca.sub(x,p=p)$mu -Bp<-ev$vectors[,ord.val[mn1:m]] #m by m -xmmu<-x -for(j in 1:m)xmmu[,j]<-x[,j]-wt.mu[j] -scores<-matrix(ncol=p,nrow=n) -for(i in 1:n)scores[i,]<-t(Bp)%*%as.matrix(xmmu[i,]) -} -list(B=B,a=a,var.op=var.op,wt.cov=Cmat,wt.mu=wt.mu,scores=scores) -} - - - -marpca.sub<-function(x,p=ncol(x)-1,N1=3,N2=2,tol=.001,B=NULL,a=NULL, -LSCALE=TRUE){ -# -# Marrona (2005, Technometrics, 47, 264-273) robust PCA -# -# Note: setting -# p=0 causes B to be the identity matrix, which is used in the case -# p=ncol(x) to estimate proportion of unexplained variance. -# -wt.cov<-NULL -if(!is.null(B)){ -B<-as.matrix(B) -if(ncol(B)==1)B<-t(B) -} -n<-nrow(x) -m<-ncol(x) -q<-m-p -if(q<0)stop("p and q should have values between 1 and ncol(x)") -hval<-floor((n + m - q + 2)/2) -DEL<-Inf -sig0<-Inf -if(is.null(B)){ -if(p>0 && ptol){ -r<-NA -for(i in 1:n)r[i]<-sum(Bx[i,]-a)^2 -if(LSCALE)sig<-lscale(r,m,q) -if(!LSCALE){ -delta<-delta<-(n-m+q-1)/(2*n) -sig<-mscale(r,delta) -} -DEL<-1-sig/sig0 -sig0<-sig -ord.r<-order(r) -w<-rep(0,n) -w[ord.r[1:hval]]<-1 -xx<-x -for(i in 1:n)xx[i,]<-x[i,]*w[i] -mu<-apply(xx,2,FUN="sum")/sum(w) #m by 1 locations -Cmat<-matrix(0,nrow=m,ncol=m) -for(i in 1:n){ -temp<-w[i]*as.matrix(x[i,]-mu)%*%t(as.matrix(x[i,]-mu)) -Cmat<-Cmat+temp -} -wt.cov<-Cmat/sum(w) -if(it>N1){ -temp<-eigen(wt.cov) -ord.eig<-order(temp$values) -for(iq in 1:q)B[iq,]<-temp$vectors[,ord.eig[iq]] -} -a<-B%*%mu -it<-it+1 -} -list(B=B,a=a,var.op=sig,mu=mu,wt.cov=wt.cov) -} - -qregsm<-function(x, y,est=hd,qval=.5,sm=TRUE,plotit=TRUE,pyhat=FALSE,fr=0.8,nboot=40,xlab="X", -ylab="Y",xout=FALSE,outfun=outpro,STAND=TRUE,...) -{ -# -# Do a smooth of x versus the quantiles of y -# -# qval indicates quantiles of interest. -# Example: qval=c(.2,.8) will create two smooths, one for the -# .2 quantile and the other for the .8 quantile. -# -# est can be any quantile estimator having the argument qval, indicating -# the quantile to be used. -# -# est = hd uses Harrel Davis estimator, -# est = qest uses a single order statistic. -# -# sm=T, bagging will be used. -# pyhat=T returns the estimates -# -chk=FALSE -if(identical(est,hd))chk=TRUE -#if(identical(est,qest))chk=TRUE -if(!chk)stop('For current version, argument est must be hd') -x<-as.matrix(x) -X<-cbind(x,y) -X<-elimna(X) -np<-ncol(X) -p<-np-1 -x<-X[,1:p] -x<-as.matrix(x) -y<-X[,np] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,np] -} -vals<-matrix(NA,ncol=length(y),nrow=length(qval)) -for(i in 1:length(qval)){ -if(sm)vals[i,]<-rplotsm(x,y,est=est,q=qval[i],pyhat=TRUE,plotit=FALSE,fr=fr,nboot=nboot, -na.rm=FALSE,STAND=STAND)$yhat -#if(!sm)vals[i,]<-rungen(x,y,est=est,q=qval[i],pyhat=TRUE,plotit=FALSE,fr=fr,na.rm=FALSE)$output -if(!sm)vals[i,]<-rplot(x,y,est=est,q=qval[i],pyhat=TRUE,plotit=FALSE,fr=fr,na.rm=FALSE)$yhat -} -if(p==1){ -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab) -for(i in 1:length(qval)){ -sx <- sort(x) -xorder <- order(x) -sysm <- vals[i,] -#lines(sx, sysm) -lines(sx, sysm[xorder]) -}}} -output <- "Done" -if(pyhat)output <- vals -output -} - -L1median <- function(X, tol = 1e-08, maxit = 200, m.init = apply(X, 2, median), - trace = FALSE) -{ - ## L1MEDIAN calculates the multivariate L1 median - ## I/O: mX=L1median(X,tol); - ## - ## X : the data matrix - ## tol: the convergence criterium: - ## the iterative process stops when ||m_k - m_{k+1}|| < tol. - ## maxit: maximum number of iterations - ## init.m: starting value for m; typically coordinatewise median - ## - ## Ref: Hossjer and Croux (1995) - ## "Generalizing Univariate Signed Rank Statistics for Testing - ## and Estimating a Multivariate Location Parameter"; - ## Non-parametric Statistics, 4, 293-308. - ## - ## Implemented by Kristel Joossens - ## Many thanks to Martin Maechler for improving the program! - - ## slightly faster version of 'sweep(x, 2, m)': - centr <- function(X,m) X - rep(m, each = n) - ## computes objective function in m based on X and a: - mrobj <- function(X,m) sum(sqrt(rowSums(centr(X,m)^2))) - - d <- dim(X); n <- d[1]; p <- d[2] - m <- m.init - if(!is.numeric(m) || length(m) != p) - stop("'m.init' must be numeric of length p =", p) - k <- 1 - if(trace) nstps <- 0 - while (k <= maxit) { - mold <- m - obj.old <- if(k == 1) mrobj(X,mold) else obj - X. <- centr(X, m) - Xnorms <- sqrt(rowSums(X. ^ 2)) - inorms <- order(Xnorms) - dx <- Xnorms[inorms] # smallest first, i.e., 0's if there are - X <- X [inorms,] - X. <- X.[inorms,] - ## using 1/x weighting {MM: should this be generalized?} - w <- ## (0 norm -> 0 weight) : - if (all(dn0 <- dx != 0)) 1/dx - else c(rep.int(0, length(dx)- sum(dn0)), 1/dx[dn0]) - delta <- colSums(X. * rep(w,p)) / sum(w) - nd <- sqrt(sum(delta^2)) - - maxhalf <- if (nd < tol) 0 else ceiling(log2(nd/tol)) - m <- mold + delta # computation of a new estimate - ## If step 'delta' is too far, we try halving the stepsize - nstep <- 0 - while ((obj <- mrobj(X, m)) >= obj.old && nstep <= maxhalf) { - nstep <- nstep+1 - m <- mold + delta/(2^nstep) - } - if(trace) { - if(trace >= 2) - cat(sprintf("k=%3d obj=%19.12g m=(",k,obj), - paste(formatC(m),collapse=","), - ")", if(nstep) sprintf(" nstep=%2d halvings",nstep) else "", - "\n", sep="") - nstps[k] <- nstep - } - if (nstep > maxhalf) { ## step halving failed; keep old - m <- mold - ## warning("step halving failed in ", maxhalf, " steps") - break - } - k <- k+1 - } - if (k > maxit) warning("iterations did not converge in ", maxit, " steps") - if(trace == 1) - cat("needed", k, "iterations with a total of", - sum(nstps), "stepsize halvings\n") - return(m) -} -llocv2<-function(x,est=median,...){ -if(!is.list(x))val<-est(x,...) -if(is.list(x)){ -val<-NA -for(i in 1:length(x))val[i]<-est(x[[i]],...) -} -if(is.matrix(x))val<-apply(x,2,est,...) -list(center=val) -} -mcppb<-function(x,crit=NA,con=0,tr=.2,alpha=.05,nboot=2000,grp=NA,WIN=FALSE, -win=.1){ -# -# Compute a 1-alpha confidence interval for a set of d linear contrasts -# involving trimmed means using the percentile bootstrap method. -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode. Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# -# Or the data can be stored in a matrix with J columns -# -# By default, all pairwise comparisons are performed, but contrasts -# can be specified with the argument con. -# The columns of con indicate the contrast coefficients. -# Con should have J rows, J=number of groups. -# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first two trimmed means is -# equal to the sum of the second two, and (2) the difference between -# the first two is equal to the difference between the trimmed means of -# groups 5 and 6. -# -# The default number of bootstrap samples is nboot=2000 -# -# -con<-as.matrix(con) -if(is.matrix(x)){ -xx<-list() -for(i in 1:ncol(x)){ -xx[[i]]<-x[,i] -} -x<-xx -} -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] -x<-xx -} -J<-length(x) -tempn<-0 -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -} -Jm<-J-1 -d<-ifelse(sum(con^2)==0,(J^2-J)/2,ncol(con)) -if(is.na(crit) && tr != .2)stop("A critical value must be specified when -the amount of trimming differs from .2") -if(WIN){ -if(tr < .2)warning("When Winsorizing, the amount of trimming should be at least -.2") -if(win > tr)stop("Amount of Winsorizing must <= amount of trimming") -if(min(tempn) < 15){warning("Winsorizing with sample sizes less than 15 can") -warning(" result in poor control over the probability of a Type I error") -} -for (j in 1:J){ -x[[j]]<-winval(x[[j]],win) -} -} -if(is.na(crit)){ -if(d==1)crit<-alpha/2 -if(d==2 && alpha==.05 && nboot==1000)crit<-.014 -if(d==2 && alpha==.05 && nboot==2000)crit<-.014 -if(d==3 && alpha==.05 && nboot==1000)crit<-.009 -if(d==3 && alpha==.05 && nboot==2000)crit<-.0085 -if(d==3 && alpha==.025 && nboot==1000)crit<-.004 -if(d==3 && alpha==.025 && nboot==2000)crit<-.004 -if(d==3 && alpha==.01 && nboot==1000)crit<-.001 -if(d==3 && alpha==.01 && nboot==2000)crit<-.001 -if(d==4 && alpha==.05 && nboot==2000)crit<-.007 -if(d==5 && alpha==.05 && nboot==2000)crit<-.006 -if(d==6 && alpha==.05 && nboot==1000)crit<-.004 -if(d==6 && alpha==.05 && nboot==2000)crit<-.0045 -if(d==6 && alpha==.025 && nboot==1000)crit<-.002 -if(d==6 && alpha==.025 && nboot==2000)crit<-.0015 -if(d==6 && alpha==.01 && nboot==2000)crit<-.0005 -if(d==10 && alpha==.05 && nboot<=2000)crit<-.002 -if(d==10 && alpha==.05 && nboot==3000)crit<-.0023 -if(d==10 && alpha==.025 && nboot<=2000)crit<-.0005 -if(d==10 && alpha==.025 && nboot==3000)crit<-.001 -if(d==15 && alpha==.05 && nboot==2000)crit<-.0016 -if(d==15 && alpha==.025 && nboot==2000)crit<-.0005 -if(d==15 && alpha==.05 && nboot==5000)crit<-.0026 -if(d==15 && alpha==.025 && nboot==5000)crit<-.0006 -} -if(is.na(crit) && alpha==.05)crit<-0.0268660714*(1/d)-0.0003321429 -if(is.na(crit))crit<-alpha/(2*d) -if(d> 10 && nboot <5000)warning("Suggest using nboot=5000 when the number -of contrasts exceeds 10.") -icl<-round(crit*nboot)+1 -icu<-round((1-crit)*nboot) -if(sum(con^2)==0){ -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c("con.num","psihat","se","ci.lower", -"ci.upper","p.value")) -if(nrow(con)!=length(x))stop("The number of groups does not match the number - of contrast coefficients.") -bvec<-matrix(NA,nrow=J,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(j in 1:J){ -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group -} -test<-NA -for (d in 1:ncol(con)){ -top<-0 -for (i in 1:J){ -top<-top+con[i,d]*bvec[i,] -} -test[d]<-sum((top>0))/nboot -test[d]<-min(test[d],1-test[d]) -top<-sort(top) -psihat[d,4]<-top[icl] -psihat[d,5]<-top[icu] -} -for (d in 1:ncol(con)){ -psihat[d,1]<-d -testit<-lincon(x,con[,d],tr,pr=FALSE) -psihat[d,6]<-test[d] -psihat[d,2]<-testit$psihat[1,2] -psihat[d,3]<-testit$test[1,4] -} -print("Reminder: To control FWE, reject if the p-value is less than") -print("the crit.p.value listed in the output.") -list(psihat=psihat,crit.p.value=crit,con=con) -} - -llocv2<-function(x,est=median,...){ -if(!is.list(x))val<-est(x,...) -if(is.list(x)){ -val<-NA -for(i in 1:length(x))val[i]<-est(x[[i]],...) -} -if(is.matrix(x))val<-apply(x,2,est,...) -list(center=val) -} -NMpca<-function(x,B,...){ -# -# Robust PCA using orthogonal matrices and -# robust generalized variance method -# This function is used by Ppca -# -n<-x[1] -m<-x[2] -p=x[3] -x=matrix(x[4:length(x)],ncol=m) -B=matrix(B,ncol=m) -vals<-NA -z<-matrix(nrow=n,ncol=p) -B <- t(ortho(t(B))) # so rows are orthogonal -for(i in 1:n)z[i,]<-B%*%as.matrix(x[i,]) -vals<-0-gvarg(z) -vals -} - -ancbbpb<-function(x1,y1,x2,y2,fr1=1,est=tmean,fr2=1,nboot=200,pts=NA,plotit=TRUE,SCAT=TRUE, -pch1='+',pch2='o', -SEED=TRUE,alpha=.05,RNA=TRUE,sm=FALSE,LP=TRUE,xout=FALSE,outfun=outpro,...){ -# -# Compare two independent groups using an ancova method. -# A running-interval smooth is used to estimate the regression lines and is -# based in part on bootstrap bagging. -# -# This function is limited to two groups and one covariate. -# -# No assumption is made about the parametric form of the regression -# lines. -# Confidence intervals are computed using a percentile bootstrap -# method. Comparisons are made at five empirically chosen design points when -# pts=NA. To compare groups at specified x values, use pts. -# Example: pts=c(60,70,80) will compare groups at the three design points -# 60, 70 and 80. -# -# xout=F, when plotting, keep leverage points -# sm=F, when plotting, do not use bootstrap bagging -# -# Assume data are in x1 y1 x2 and y2 -# -# fr1 and fr2 are the spans used by the smooth. -# -# SCAT=FALSE will suppress the scatterplot when plotting the regression lines. -# -# RNA=F, when computing bagged estimate, NA values are not removed -# resulting in no estimate of Y at the specified design point, -# RNA=T, missing values are removed and the remaining values are used. -# -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -# -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -if(SEED)set.seed(2) -flag=TRUE -if(is.na(pts[1])){ -flag=FALSE -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,8) -dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi","p.value","p.crit")) -gv1<-vector("list") -for (i in 1:5){ -j<-i+5 -temp1<-y1[near(x1,x1[isub[i]],fr1)] -temp2<-y2[near(x2,x1[isub[i]],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(temp1) -mat[i,3]<-length(temp2) -mat[,4]<-runmbo(x1,y1,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,RNA=RNA)- -runmbo(x2,y2,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,RNA=RNA) -gv1[[i]]<-temp1 -gv1[[j]]<-temp2 -} -I1<-diag(5) -I2<-0-I1 -con<-rbind(I1,I2) -estmat1<-matrix(nrow=nboot,ncol=length(isub)) -estmat2<-matrix(nrow=nboot,ncol=length(isub)) -data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) -# -for(ib in 1:nboot){ -estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=x1[isub], -pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...) -estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=x1[isub], -pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...) -} -dif<-(estmat1 maxhalf) { ## step halving failed; keep old - m <- mold - ## warning("step halving failed in ", maxhalf, " steps") - break - } - k <- k+1 - } - if (k > maxit) warning("iterations did not converge in ", maxit, " steps") - if(trace == 1) - cat("needed", k, "iterations with a total of", - sum(nstps), "stepsize halvings\n") -# return(m) -list(center=m) -} - -matl<-function(x){ -# -# take data in list mode and store it in a matrix -# -J=length(x) -nval=NA -for(j in 1:J)nval[j]=length(x[[j]]) -temp<-matrix(NA,ncol=J,nrow=max(nval)) -for(j in 1:J)temp[1:nval[j],j]<-x[[j]] -temp -} - -list2mat=matl - -list2vec<-function(x){ -if(!is.list(x))stop("x should have list mode") -res=as.vector(matl(x)) -res -} - - -list2matrix<-function(x){ -# -# take data in list mode and store it in a matrix -# -J=length(x) -nval=NA -for(j in 1:J)nval[j]=length(x[[j]]) -temp<-matrix(NA,ncol=J,nrow=max(nval)) -for(j in 1:J)temp[1:nval[j],j]<-x[[j]] -temp -} -Aband<-function(x,alpha=.05,plotit=TRUE,sm=TRUE,SEED=TRUE,nboot=500,grp=c(1:4), -xlab="X (First Factor)",ylab="Delta",crit=NA,print.all=FALSE,plot.op=FALSE){ -# -# Apply the shift function when analyzing main effect in a -# 2 by 2 design. -# -# For variables x1, x2, x3 and x4, -# In effect, this function applies a shift function to the distributions -# d1=(x1+x2)/2 and d2=(x3+x4)/2 -# That is, focus on first factor. -# For second factor, use Bband. -# -# grp indicates the groups to be compared. By default grp=c(1,2,3,4) -# meaning that the first level of factor A consists of groups 1 and 2 -# and the 2nd level of factor A consists of groups 3 and 4. -# (So level 1 of factor B consists of groups 1 and 3 -# -# print.all=F, -# returns number sig, meaning number of confidence intervals that do not -# contain zero, -# the critical value used as well as the KS test statistics. -# print.all=T reports all confidence intervals, the number of which can -# be large. -# -if(!is.list(x) && !is.matrix(x))stop("store data in list mode or a matrix") -if(SEED)set.seed(2) -if(is.matrix(x))x<-listm(x) -for(j in 1:length(x))x[[j]]=elimna(x[[j]])/2 -if(length(grp)<4)stop("There must be at least 4 groups") -if(length(x)!=4)stop("The argument grp must have 4 values") -x<-x[grp] -n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]])) -# Approximate the critical value -# -vals<-NA -y<-list() -if(is.na(crit)){ -print("Approximating critical value. Please wait.") -for(i in 1:nboot){ -for(j in 1:4) -y[[j]]<-rnorm(n[j]) -temp<-ks.test(outer(y[[1]],y[[2]],FUN="+"),outer(y[[3]],y[[4]],FUN="+")) -vals[i]<-temp[1]$statistic -} -vals<-sort(vals) -ic<-(1-alpha)*nboot -crit<-vals[ic] -} -if(plot.op){ -plotit<-F -g2plot(v1,v2) -} -output<-sband(outer(x[[1]],x[[2]],FUN="+"),outer(x[[3]],x[[4]],FUN="+"), -plotit=plotit,crit=crit,flag=FALSE,sm=sm,xlab=xlab,ylab=ylab) -if(!print.all){ -numsig<-output$numsig -ks.test.stat<-ks.test(outer(x[[1]],x[[2]],FUN="+"), -outer(x[[3]],x[[4]],FUN="+"))$statistic -output<-matrix(c(numsig,crit,ks.test.stat),ncol=1) -dimnames(output)<-list(c("number sig","critical value","KS test statistics"), -NULL) -} -output -} - -Bband<-function(x,alpha=.05,plotit=TRUE,sm=TRUE,SEED=TRUE,nboot=500,grp=c(1:4), -xlab="X (First Level)",ylab="Delta",crit=NA,print.all=FALSE,plot.op=FALSE){ -# -# Apply the shift function when analyzing main effect in a -# 2 by 2 design. -# -# For variables x1, x2, x3 and x4, -# In effect, this function applies a shift function to the distributions -# d1=(x1+x3)/2 and d2=(x2+x4)/2. -# That is, focus on main effects of Factor B. -# -# grp indicates the groups to be compared. By default grp=c(1,2,3,4) -# meaning that the first level of factor A consists of groups 1 and 2 -# and the 2nd level of factor A consists of groups 3 and 4. -# (So level 1 of factor B consists of groups 1 and 3 -# -# print.all=F, -# returns number sig, meaning number of confidence intervals that do not -# contain zero, -# the critical value used as well as the KS test statistics. -# print.all=T reports all confidence intervals, the number of which can -# be large. -# -if(!is.list(x) && !is.matrix(x))stop("store data in list mode or a matrix") -if(SEED)set.seed(2) -if(is.matrix(x))x<-listm(x) -for(j in 1:length(x))x[[j]]=elimna(x[[j]])/2 -if(length(x)<4)stop("There must be at least 4 groups") -if(length(grp)!=4)stop("The argument grp must have 4 values") -x<-x[grp] -grp=c(1,3,2,4) -x<-x[grp] # Arrange groups for main effects on factor B -n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]])) -# Approximate the critical value -# -vals<-NA -y<-list() -if(is.na(crit)){ -print("Approximating critical value. Please wait.") -for(i in 1:nboot){ -for(j in 1:4) -y[[j]]<-rnorm(n[j]) -temp<-ks.test(outer(y[[1]],y[[2]],FUN="+"),outer(y[[3]],y[[4]],FUN="+")) -vals[i]<-temp[1]$statistic -} -vals<-sort(vals) -ic<-(1-alpha)*nboot -crit<-vals[ic] -} -if(plot.op){ -plotit<-F -g2plot(v1,v2) -} -output<-sband(outer(x[[1]],x[[2]],FUN="+"),outer(x[[3]],x[[4]],FUN="+"), -plotit=plotit,crit=crit,flag=FALSE,sm=sm,xlab=xlab,ylab=ylab) -if(!print.all){ -numsig<-output$numsig -ks.test.stat<-ks.test(outer(x[[1]],x[[2]],FUN="+"), -outer(x[[3]],x[[4]],FUN="+"))$statistic -output<-matrix(c(numsig,crit,ks.test.stat),ncol=1) -dimnames(output)<-list(c("number sig","critical value","KS test statistics"), -NULL) -} -output -} - -iband<-function(x,alpha=.05,q = c(0.1, 0.25, 0.5, 0.75, 0.9), method='BH', SW=FALSE, plotit=FALSE,SEED=TRUE,nboot=500,grp=c(1:4), -xlab='X'){ -# -# 2 by 2 design. -# -# For variables x1, x2, x3 and x4, -# This function compares the quantiles of the distributions -# d1=x1-x2 and d2=x3-x4 -# -# SW=TRUE: switch rows and columns -# -if(SEED)set.seed(2) -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -if(length(x)!=4)stop('Should be exactly 4 groups') -for(j in 1:length(x))x[[j]]=elimna(x[[j]]) -if(SW)x=x[c(1,3,2,4)] -n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]])) -nq=length(q) -output=matrix(NA,nrow=length(q),ncol=8) -dimnames(output)=list(NULL,c('Quant','Est.Lev 1','Est.Lev 2','Dif','ci.low','ci.up','p-value','p.adj')) -output[,1]=q -for(j in 1:nq)output[j,2]=hd(outer(x[[1]],x[[2]],FUN='-'),q=q[j]) -for(j in 1:nq)output[j,3]=hd(outer(x[[3]],x[[4]],FUN='-'),q=q[j]) -output[,4]=output[,2]-output[,3] -e=lapply(q,iband.sub,x=x,nboot=nboot) -for(j in 1:nq)output[j,5]=e[[j]]$ci[1] -for(j in 1:nq)output[j,6]=e[[j]]$ci[2] -for(j in 1:nq)output[j,7]=e[[j]]$p.value -output[,8]=p.adjust(output[,7],method=method) -if(plotit){ -g2plot(outer(x[[1]],x[[2]],FUN='-'),outer(x[[3]],x[[4]],FUN='-'),xlab=xlab) - -} -output -} - -iband.sub<-function(q,x,nboot=500,alpha=.05,SEED=FALSE){ -# -# -# -if(SEED)set.seed(2) -if(is.matrix(x))x<-listm(x) -if(length(x)!=4)stop('There must be 4 groups') -for(j in 1:length(x))x[[j]]=elimna(x[[j]]) -v1=NA -v2=NA -B=list() -for(i in 1:nboot){ -for(j in 1:4)B[[j]]=sample(x[[j]],replace=TRUE) -v1[i]=hd(outer(B[[1]],B[[2]],FUN='-'),q=q) -v2[i]=hd(outer(B[[3]],B[[4]],FUN='-'),q=q) -} -p=mean(v10 & !is.na(l)])+length(u[u<0 & !is.na(u)]) -qhat<-c(1:length(x))/length(x) -m<-matrix(c(qhat,l,u),length(x),3) -dimnames(m)<-list(NULL,c("qhat","lower","upper")) -xsort<-sort(x) -ysort<-sort(y) -del<-0 -for (i in 1:length(x)){ -ival<-round(length(y)*i/length(x)) -if(ival<=0)ival<-1 -if(ival>length(y))ival<-length(y) -del[i]<-ysort[ival]-xsort[i] -} -if(iloop==1){ -allx<-c(xsort,xsort,xsort) -ally<-c(del,m[,2],m[,3]) -} -if(iloop==2){ -allx<-c(allx,xsort,xsort,xsort) -ally<-c(ally,del,m[,2],m[,3]) -plot(allx,ally,type="n",ylab=ylab,xlab=xlab) -} -ik<-rep(F,length(xsort)) -if(sm){ -if(op==1){ -ik<-duplicated(xsort) -del<-lowess(xsort,del)$y -} -if(op!=1)del<-runmean(xsort,del,pyhat=TRUE) -} -if(iloop==1){ -xsort1=xsort[!ik] -del1=del[!ik] -} -if(iloop==2){ -lines(xsort1,del1,lty=iloop) -lines(xsort[!ik],del[!ik],lty=iloop) -}} -done="Done" -done -} - - -scor<-function(x,y=NULL,corfun=pcor,gval=NA,plotit=FALSE,op=TRUE,MM=FALSE,cop=3,xlab='VAR 1', -ylab='VAR 2',STAND=TRUE,pr=TRUE,SEED=TRUE,MC=FALSE,RAN=FALSE){ -# -# Compute a skipped correlation coefficient. -# -# Eliminate outliers using a projection method -# That is, compute Donoho-Gasko median, for each point -# consider the line between it and the median, -# project all points onto this line, and -# check for outliers using a boxplot rule. -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# For information about the argument cop, see the function -# outpro. -# -# Eliminate any outliers and compute correlation using -# remaining data. -# -# MC=TRUE, the multicore version of outpro is used -# -# corfun=pcor means Pearson's correlation is used. -# corfun=spear means Spearman's correlation is used. -# corfun=tau means Kendall tau is used. -# -#. RAN=TRUE uses random projections instead, which results in faster execution time -# -if(SEED){ -oldSeed <- .Random.seed -set.seed(12) # So when using MVE or MCD, get consistent results -} -if(identical(corfun,wincor))corfun=winall -if(is.null(y[1]))m<-x -if(!is.null(y[1]))m<-cbind(x,y) -m<-elimna(m) -if(!RAN){ -if(!MC)temp<-outpro(m,gval=gval,plotit=plotit,op=op,cop=cop,MM=MM, -xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep -if(MC)temp<-outproMC(m,gval=gval,plotit=plotit,op=op,cop=cop,MM=MM, -xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep -} -if(RAN)temp=outpro.depth(m,MM=MM,plotit=plotit)$keep -tcor<-corfun(m[temp,])$cor -if(!is.null(dim((tcor))))tcor<-tcor[1,2] -test<-abs(tcor*sqrt((nrow(m)-2)/(1-tcor**2))) -if(ncol(m)!=2)diag(test)<-NA -crit<-6.947/nrow(m)+2.3197 -if(SEED) { - assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) -} -list(cor=tcor,test.stat=test,crit.05=crit) -} - - -cov.mba<-function(x,COR=FALSE){ -val<-covmba2(x)$cov -if(COR){ -val=val/outer(sqrt(diag(val)),sqrt(diag(val))) -} -val -} -qregci<-function(x,y,nboot=100,alpha=.05,qval=.5,q=NULL,SEED=TRUE,pr=TRUE,xout=FALSE,outfun=outpro,...){ -# -# Test the hypothesis that the quantile regression slopes are zero. -# -# qval=.5 i.e, default is to -# use the .5 quantile regression line only. -# -# Suggest only using quantiles between -# .2 and .8. If using both .2 and .8 quantiles, or -# the .2, .5 and .8 quantile regression lines. -# FWE is controlled for alpha=.1, .05, .025 and .01. -# -if(!is.null(q))qval=q -xx<-elimna(cbind(x,y)) -np<-ncol(xx) -p<-np-1 -y<-xx[,np] -x<-xx[,1:p] -x<-as.matrix(x) -if(xout){ -if(pr)print("Default for argument outfun is now outpro") -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -x<-as.matrix(x) -n<-length(y) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#if(pr)print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -# determine critical value. -crit<-NA -if(alpha==.1)crit<-1.645-1.19/sqrt(n) -if(alpha==.05)crit<-1.96-1.37/sqrt(n) -if(alpha==.025)crit<-2.24-1.18/sqrt(n) -if(alpha==.01)crit<-2.58-1.69/sqrt(n) -crit.fwe<-crit -if(length(qval)==2 || p==2){ -if(alpha==.1)crit.fwe<-1.98-1.13/sqrt(n) -if(alpha==.05)crit.fwe<-2.37-1.56/sqrt(n) -if(alpha==.025)crit.fwe<-2.60-1.04/sqrt(n) -if(alpha==.01)crit.fwe<-3.02-1.35/sqrt(n) -} -if(length(qval)==3 || p==3){ -if(alpha==.1)crit.fwe<-2.145-1.31/sqrt(n) -if(alpha==.05)crit.fwe<-2.49-1.49/sqrt(n) -if(alpha==.025)crit.fwe<-2.86-1.52/sqrt(n) -if(alpha==.01)crit.fwe<-3.42-1.85/sqrt(n) -} -if(is.na(crit.fwe)){ -print("Could not determine a critical value") -print("Only alpha=.1, .05, .025 and .01 are allowed") -} -if(p==1){ -bvec<-apply(data,1,qindbt.sub,x,y,qval=qval) -estsub<-NA -for(i in 1:length(qval)){ -estsub[i]<-qreg(x,y,qval[i])$coef[2] -} -if(is.matrix(bvec))se.val<-sqrt(apply(bvec,1,FUN=var)) -if(!is.matrix(bvec))se.val<-sqrt(var(bvec)) -test<-abs(estsub)/se.val -ci.mat<-matrix(nrow=length(qval),ncol=3) -dimnames(ci.mat)<-list(NULL,c("Quantile","ci.lower","ci.upper")) -ci.mat[,1]<-qval -ci.mat[,2]<-estsub-crit*se.val -ci.mat[,3]<-estsub+crit*se.val -} -if(p>1){ -if(length(qval)>1){ -print("With p>1 predictors,only the first qval value is used") -} -bvec<-apply(data,1,regboot,x,y,regfun=qreg,qval=qval[1]) -se.val<-sqrt(apply(bvec,1,FUN=var)) -estsub<-qreg(x,y,qval=qval[1])$coef -test<-abs(estsub)/se.val -ci.mat<-matrix(nrow=np,ncol=3) -dimnames(ci.mat)<-list(NULL,c("Predictor","ci.lower","ci.upper")) -ci.mat[,1]<-c(0:p) -ci.mat[,2]<-estsub-crit*se.val -ci.mat[,3]<-estsub+crit*se.val -} -list(test=test,se.val=se.val,crit.val=crit,crit.fwe=crit.fwe, -slope.est=estsub,ci=ci.mat) -} - - - - -covmba2<-function(x, csteps = 5) -{ -# Perform the median ball algorithm. -# -# It returns a measure of location and scatter for the -# multivariate data in x, which is assumed to have -# p>-2 column and n rows. -# -# This code is based on a very slight modificatiion of code originally -# written by David Olive -# -x<-as.matrix(x) -if(!is.matrix(x))stop("x should be a matrix") - p <- dim(x)[2] -#if(p==1)stop("x should be a matrix with two or more columns of variables") - ##get the DGK estimator - covs <- var(x) - mns <- apply(x, 2, mean) ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) -# mns <- apply(x[md2 <= medd2, ], 2, - mns <- apply(as.matrix(x[md2 <= medd2, ]), 2, - mean) - covs <- var(x[md2 <= medd2, ]) - } - covb <- covs - mnb <- mns ##get the square root of det(covb) - critb <- prod(diag(chol(covb))) - ##get the resistant estimator - covv <- diag(p) - med <- apply(x, 2, median) - md2 <- mahalanobis(x, center = med, covv) - medd2 <- median(md2) ## get the start -# mns <- apply(x[md2 <= medd2, ], 2, mean) - mns <- apply(as.matrix(x[md2 <= medd2, ]), 2, mean) - covs <- var(x[md2 <= medd2, ]) ## concentrate - for(i in 1:csteps) { - md2 <- mahalanobis(x, mns, covs) - medd2 <- median(md2) - # mns <- apply(x[md2 <= medd2, ], 2,mean) - mns <- apply(as.matrix(x[md2 <= medd2, ]), 2, mean) - covs <- var(x[md2 <= medd2, ]) - } - crit <- prod(diag(chol(covs))) - if(crit < critb) { - critb <- crit - covb <- covs - mnb <- mns - } -##scale for better performance at MVN - rd2 <- mahalanobis(x, mnb, covb) - const <- median(rd2)/(qchisq(0.5, p)) - covb <- const * covb - list(center = mnb, cov = covb) -} - - -rmmcp<-function(x, y=NULL,con = 0, tr = 0.2, alpha = 0.05,dif=TRUE,hoch=TRUE,na.rm=TRUE){ -# -# MCP on trimmed means with FWE controlled with Hochberg's method -# hoch=FALSE, will use Rom's method if alpha=.05 or .01 and number of tests is <=10 -# -# Note: confidence intervals are adjusted based on the corresponding critical p-value. -# -if(!is.null(y))x=cbind(x,y) -flagcon=FALSE -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-ncol(x) -xbar<-vector("numeric",J) -x<-elimna(x) # Remove missing values -nval<-nrow(x) -h1<-nrow(x)-2*floor(tr*nrow(x)) -df<-h1-1 -for(j in 1: J)xbar[j]<-mean(x[,j],tr) -if(sum(con^2!=0))CC<-ncol(con) -if(sum(con^2)==0)CC<-(J^2-J)/2 -ncon<-CC -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(hoch)dvec<-alpha/c(1:ncon) -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(sum(con^2)==0){ -flagcon<-TRUE -psihat<-matrix(0,CC,5) -dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c("Group","Group","test","p.value","p.crit","se")) -temp1<-0 -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -q1<-(nrow(x)-1)*winvar(x[,j],tr) -q2<-(nrow(x)-1)*winvar(x[,k],tr) -q3<-(nrow(x)-1)*wincor(x[,j],x[,k],tr)$cov -sejk<-sqrt((q1+q2-2*q3)/(h1*(h1-1))) -if(!dif){ -test[jcom,6]<-sejk -test[jcom,3]<-(xbar[j]-xbar[k])/sejk -temp1[jcom]<-2 * (1 - pt(abs(test[jcom,3]), df)) -test[jcom,4]<-temp1[jcom] -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -} -if(dif){ -dv<-x[,j]-x[,k] -test[jcom,6]<-trimse(dv,tr) -temp<-trimci(dv,alpha=alpha/CC,pr=FALSE,tr=tr) -test[jcom,3]<-temp$test.stat -temp1[jcom]<-temp$p.value -test[jcom,4]<-temp1[jcom] -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-mean(dv,tr=tr) -psihat[jcom,4]<-temp$ci[1] -psihat[jcom,5]<-temp$ci[2] -} -}}} -if(hoch)dvec<-alpha/c(1:ncon) -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2,4]>=zvec) -if(sum(sigvec)0){ -if(nrow(con)!=ncol(x))warning("The number of groups does not match the number - of contrast coefficients.") -ncon<-ncol(con) -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c("con.num","test","p.value","p.crit","se")) -temp1<-NA -for (d in 1:ncol(con)){ -psihat[d,1]<-d -if(!dif){ -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-0 -for(j in 1:J){ -for(k in 1:J){ -djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1)) -sejk<-sejk+con[j,d]*con[k,d]*djk -}} -sejk<-sqrt(sejk) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -test[d,5]<-sejk -temp1[d]<-2 * (1 - pt(abs(test[d,2]), df)) -} -if(dif){ -for(j in 1:J){ -if(j==1)dval<-con[j,d]*x[,j] -if(j>1)dval<-dval+con[j,d]*x[,j] -} -temp1[d]<-trimci(dval,tr=tr,pr=FALSE)$p.value -test[d,1]<-d -test[d,2]<-trimci(dval,tr=tr,pr=FALSE)$test.stat -test[d,5]<-trimse(dval,tr=tr) -psihat[d,2]<-mean(dval,tr=tr) -}} -test[,3]<-temp1 -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2,3]>=zvec) -if(sum(sigvec)=K) -i0<-sum(flag) -il<-length(y)-i0+1 -res.sort<-sort(res.scale) -if(i0>0){ -dval<-pnorm(res.sort[il:length(y)])-c(il:length(y))/length(y) -} -if(i0<=0)dval<-0 -dval<-max(dval) -ndval<-floor(length(y)*dval) -if(ndval<0)ndval<-0 -iup<-length(y)-ndval -rord<-order(res.scale) -flag<-rord[1:iup] -x=as.matrix(x) -temp<-lsfit(x[flag,],y[flag]) -list(coef=temp$coef,res=temp$residual) -} - - - -bwrmcp<-function(J,K,x,grp=NA,alpha=.05,bhop=TRUE){ -# -# Do all pairwise comparisons of -# main effects for Factor A and B and all interactions -# using a rank-based method that tests for equal distributions. -# -# A between by within subjects design is assumed. -# Levels of Factor A are assumed to be independent and -# levels of Factor B are dependent. -# -# The data are assumed to be stored in x in list mode or in a matrix. -# If grp is unspecified, it is assumed x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second factor: level 1,2 -# x[[j+1]] is the data for level 2,1, etc. -# If the data are in wrong order, grp can be used to rearrange the -# groups. For example, for a two by two design, grp<-c(2,4,3,1) -# indicates that the second group corresponds to level 1,1; -# group 4 corresponds to level 1,2; group 3 is level 2,1; -# and group 1 is level 2,2. -# -# Missing values are automatically removed. -# - if(is.list(x))xrem=matl(x) - JK <- J * K - if(is.matrix(x)){ - xrem=x - x <- listm(x) -} - - if(!is.na(grp[1])) { - yy <- x - x<-list() - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") -# for(j in 1:JK) { -# xx <- x[[j]] -# x[[j]] <- xx[!is.na(xx)] # Remove missing values -# } - # -if(JK != length(x))warning("The number of groups does not match the number of contrast coefficients.") -for(j in 1:JK){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} -# -CC<-(J^2-J)/2 -# Determine critical values -ncon<-CC*(K^2-K)/2 -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -Fac.A<-matrix(0,CC,5) -dimnames(Fac.A)<-list(NULL,c("Level","Level","test.stat","p-value","sig.crit")) -mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j < jj){ -ic<-ic+1 -Fac.A[ic,1]<-j -Fac.A[ic,2]<-jj -datsub=xrem[,c(mat[j,],mat[jj,])] -datsub=elimna(datsub) -#temp<-bwrank(2,K,elimna(x[,c(mat[j,],mat[jj,])])) -temp<-bwrank(2,K,datsub) -Fac.A[ic,3]<-temp$test.A -Fac.A[ic,4]<-temp$p.value.A -}}} -temp2<-order(0-Fac.A[,4]) -Fac.A[temp2,5]<-dvec[1:length(temp2)] -CCB<-(K^2-K)/2 -ic<-0 -Fac.B<-matrix(0,CCB,5) -dimnames(Fac.B)<-list(NULL,c("Level","Level","test.stat","p-value","sig.crit")) -for(k in 1:K){ -for(kk in 1:K){ -if(k=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,7) -dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi","p.value")) -gv1<-vector("list") -for (i in 1:5){ -j<-i+5 -temp1<-y1[near(x1,x1[isub[i]],fr1)] -temp2<-y2[near(x2,x1[isub[i]],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(temp1) -mat[i,3]<-length(temp2) -mat[,4]<-runmbo(x1,y1,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=tmean)- -runmbo(x2,y2,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median) -gv1[[i]]<-temp1 -gv1[[j]]<-temp2 -} -I1<-diag(5) -I2<-0-I1 -con<-rbind(I1,I2) -estmat1<-matrix(nrow=nboot,ncol=length(isub)) -estmat2<-matrix(nrow=nboot,ncol=length(isub)) -data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) -# -for(ib in 1:nboot){ -estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=x1[isub], -pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median) -estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=x1[isub], -pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median) -} -dif<-(estmat1=.5)stop("Amount of trimming must be less than .5") -if(is.list(m))m<-matl(m) -if(!is.matrix(m))stop("Data must be stored in a matrix or in list mode.") -if(ncol(m)==1){ -if(tr<.5)val<-mean(m,tr) -} -if(ncol(m)>1){ -temp<-NA -if(ncol(m)!=2){ -# Use approximate depth -if(dop==1)temp<-fdepth(m,plotit=FALSE,cop=cop) -if(dop==2)temp<-fdepthv2(m) -} -# Use exact depth if ncol=2 -if(ncol(m)==2){ -for(i in 1:nrow(m)) -temp[i]<-depth(m[i,1],m[i,2],m) -}} -mdep<-max(temp) -flag<-(temp==mdep) -flag2<-(temp>=tr) -if(sum(flag2)==0)stop("Trimmed all of the data") -if(sum(flag2)==1){ -if(pr)print("Warning: Trimmed all but one point") -val<-0 -} -if(sum(flag2)>1)val<-var(m[flag2,]) -val -} - -medr<-function(x,est=median,alpha=.05,nboot=500,grp=NA,op=1,MM=FALSE,cop=3,pr=TRUE, -SEED=TRUE,...){ -# -# Test the hypothesis that the distribution for each pairwise -# difference has a measure of location = 0 -# By default, the median estimator is used -# -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode or in a matrix. -# If stored in list mode, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# If stored in a matrix, columns correspond to groups. -# -# By default, all pairwise differences are used, but contrasts -# can be specified with the argument con. -# The columns of con indicate the contrast coefficients. -# Con should have J rows, J=number of groups. -# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first -# two measures of location is -# equal to the sum of the second two, and (2) the difference between -# the first two is equal to the difference between the -# measures of location for groups 5 and 6. -# -# The default number of bootstrap samples is nboot=500 -# -# op controls how depth is measured -# op=1, Mahalanobis -# op=2, Mahalanobis based on MCD covariance matrix -# op=3, Projection distance -# op=4, Projection distance using FORTRAN version -# -# for arguments MM and cop, see pdis. -# -if(is.matrix(x)){ -xx<-list() -for(i in 1:ncol(x)){ -xx[[i]]<-x[,i] -} -x<-xx -} -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(grp)){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] -x<-xx -} -J<-length(x) -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -Jm<-J-1 -d<-(J^2-J)/2 -data<-list() -bvec<-matrix(NA,ncol=d,nrow=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print("Taking bootstrap samples. Please wait.") -for(it in 1:nboot){ -for(j in 1:J)data[[j]]<-sample(x[[j]],size=length(x[[j]]),replace=TRUE) -dval<-0 -for(j in 1:J){ -for(k in 1:J){ -if(j=dv[1:nboot])/nboot -if(op==4)print(sig.level) -list(sig.level=sig.level,output=output) -} - -medind<-function(x,y,qval=.5,nboot=1000,com.pval=FALSE,SEED=TRUE,alpha=.05,pr=TRUE, -xout=FALSE,outfun=out,...){ -# -# Test the hypothesis that the regression surface is a flat -# horizontal plane. -# The method is based on a modification of a method derived by -# He and Zhu 2003, JASA, 98, 1013-1022. -# Here, resampling is avoided using approximate critical values if -# com.pval=F -# -# critical values are available for 10<=n<=400, p=1,...,8 and -# quantiles -# qval=.25,.5, .75. -# -# To get a p-value, via simulations, set com.pval=T -# nboot is number of simulations used to determine the p-value. -# -if(pr){ -if(!com.pval)print("To get a p-value, set com.pval=T") -print("Reject if the test statistic exceeds the critical value") -if(length(y)>400)print("If n>400, current version requires com.pval=TRUE, resulting in high execution time") -} -#store.it=F -x<-as.matrix(x) -p<-ncol(x) -pp1<-p+1 -p.val<-NULL -crit.val<-NULL -yx<-elimna(cbind(y,x)) #Eliminate missing values. -y<-yx[,1] -x<-yx[,2:pp1] -x<-as.matrix(x) -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -} -n<-length(y) -if(n>400)com.pval=T -if(qval==.5){ -resmat1=matrix(c( 0.0339384580, 0.044080032, 0.050923441, 0.064172557, - 0.0153224731, 0.021007108, 0.027687963, 0.032785044, - 0.0106482053, 0.014777728, 0.018249546, 0.023638611, - 0.0066190573, 0.009078091, 0.011690825, 0.014543009, - 0.0031558563, 0.004374515, 0.005519069, 0.007212951, - 0.0015448987, 0.002231473, 0.002748314, 0.003725916, - 0.0007724197, 0.001021767, 0.001370776, 0.001818037),ncol=4,nrow=7,byrow=TRUE) -resmat2=matrix(c( - 0.052847794, 0.061918744, 0.071346969, 0.079163419, - 0.021103277, 0.027198076, 0.031926052, 0.035083610, - 0.013720585, 0.018454145, 0.022177381, 0.026051716, - 0.008389969, 0.010590374, 0.012169233, 0.015346065, - 0.004261627, 0.005514060, 0.007132021, 0.008416836, - 0.001894753, 0.002416311, 0.003085230, 0.003924706, - 0.001045346, 0.001347837, 0.001579373, 0.001864344),ncol=4,nrow=7,byrow=TRUE) -resmat3=matrix(c( -0.071555715, 0.082937665, 0.089554679, 0.097538044, -0.031060795, 0.035798539, 0.043862556, 0.053712151, -0.019503635, 0.023776479, 0.027180121, 0.030991367, -0.011030001, 0.013419347, 0.015557409, 0.017979524, -0.005634478, 0.006804788, 0.007878358, 0.008807657, -0.002552182, 0.003603778, 0.004275965, 0.005021989, -0.001251044, 0.001531919, 0.001800608, 0.002037870),ncol=4,nrow=7,byrow=TRUE) -resmat4=matrix(c( -0.093267532, 0.101584002, 0.108733965, 0.118340448, -0.038677863, 0.045519806, 0.051402903, 0.060097046, -0.024205231, 0.029360145, 0.034267265, 0.039381482, -0.013739157, 0.015856343, 0.018065898, 0.019956084, -0.006467562, 0.007781030, 0.009037972, 0.010127143, -0.003197162, 0.003933525, 0.004656625, 0.005929469, -0.001652690, 0.001926060, 0.002363874, 0.002657071),ncol=4,nrow=7,byrow=TRUE) -resmat5=matrix(c( -0.117216934, 0.124714114, 0.129458602, 0.136456163, -0.048838630, 0.055608712, 0.060580045, 0.067943676, -0.030594644, 0.035003872, 0.040433885, 0.047648696, -0.016940240, 0.019527491, 0.022047442, 0.025313443, -0.008053039, 0.009778574, 0.011490394, 0.013383628, -0.003760567, 0.004376294, 0.005097890, 0.005866240, -0.001894616, 0.002253522, 0.002612405, 0.002938808),ncol=4,nrow=7,byrow=TRUE) -resmat6=matrix(c( -0.136961531, 0.144120225, 0.149003907, 0.152667432, -0.055909481, 0.062627211, 0.069978086, 0.081189957, -0.034634825, 0.040740587, 0.044161376, 0.047722045, -0.020165417, 0.023074738, 0.025881208, 0.028479913, -0.009436297, 0.011246968, 0.013220963, 0.015100546, -0.004644596, 0.005334418, 0.006040595, 0.007237195, -0.002277590, 0.002635712, 0.002997398, 0.003669488),ncol=4,nrow=7,byrow=TRUE) -resmat7=matrix(c( - 0.156184672, 0.163226643, 0.171754686, 0.177142753, - 0.070117003, 0.077052773, 0.082728047, 0.090410797, - 0.041774517, 0.047379662, 0.053101833, 0.057674454, - 0.023384451, 0.026014421, 0.029609042, 0.032619018, - 0.010856382, 0.012567043, 0.013747870, 0.016257014, - 0.005164004, 0.006131755, 0.006868101, 0.008351046, - 0.002537642, 0.003044154, 0.003623654, 0.003974469),ncol=4,nrow=7,byrow=TRUE) -resmat8=matrix(c( -0.178399742, 0.180006714, 0.193799396, 0.199585892, -0.078032767, 0.085624186, 0.091511226, 0.102491785, -0.045997886, 0.052181615, 0.057362163, 0.062630424, -0.025895739, 0.029733034, 0.033764463, 0.037873655, -0.012195876, 0.013663248, 0.015487587, 0.017717864, -0.005892418, 0.006876488, 0.007893475, 0.008520783, -0.002839731, 0.003243909, 0.003738571, 0.004124057),ncol=4,nrow=7,byrow=TRUE) -crit5=array(cbind(resmat1,resmat2,resmat3,resmat4,resmat5,resmat6,resmat7, -resmat8),c(7,4,8)) -flag=TRUE -crit.val=NULL -if(p > 8)flag=FALSE -if(n<10 || n>=400)flag=FALSE -aval<-c(.1,.05,.025,.01) -aokay<-duplicated(c(alpha,aval)) -if(sum(aokay)==0)flag=FALSE -if(flag){ -nalpha=c(0:4) -asel=c(0,aval) -ialpha=nalpha[aokay] -critit=crit5[,ialpha,p] -nvec<-c(10,20,30,50,100,200,400) -nval<-duplicated(c(n,nvec)) -nval<-nval[2:8] -if(sum(nval)>0)crit.val<-critit[nval] -loc<-rank(c(n,nvec)) -xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5) -yy<-c(critit[loc[1]-1],critit[loc[1]]) -icoef<-tsp1reg(xx,yy)$coef -crit.val<-icoef[1]+icoef[2]/n^1.5 -}} -mqval<-min(c(qval,1-qval)) -if(mqval==.25){ -resmat1=matrix(c( - 0.029933486, 0.0395983678, 0.054087714, 0.062961453, - 0.011122294, 0.0149893431, 0.018154062, 0.022685244, - 0.009207200, 0.0113020766, 0.014872309, 0.019930730, - 0.004824185, 0.0070402246, 0.010356886, 0.013176896, - 0.002370379, 0.0033146605, 0.004428004, 0.005122988, - 0.001106460, 0.0016110185, 0.001984450, 0.002650256, - 0.000516646, 0.0006796144, 0.000868751, 0.001202042),ncol=4,nrow=7,byrow=TRUE) -resmat2=matrix(c( -0.0448417783, 0.0602598211, 0.066001091, 0.087040667, -0.0173410522, 0.0224713157, 0.027370822, 0.033435727, -0.0121205549, 0.0150409465, 0.018938516, 0.022643559, -0.0064894201, 0.0084611518, 0.010700320, 0.013232000, -0.0029734778, 0.0040641310, 0.004911086, 0.005769038, -0.0015149104, 0.0020584993, 0.002582982, 0.003114029, -0.0007984207, 0.0009929547, 0.001182739, 0.001398774),ncol=4,nrow=7,byrow=TRUE) -resmat3=matrix(c( -0.0636530860, 0.072974943, 0.083840562, 0.097222407, -0.0216586978, 0.027436566, 0.031875356, 0.036830302, -0.0152898678, 0.018964066, 0.021728817, 0.028959751, -0.0083568493, 0.010071525, 0.012712862, 0.015254576, -0.0039033578, 0.004764140, 0.005577071, 0.006660322, -0.0019139215, 0.002343152, 0.002833612, 0.003465269, -0.0009598105, 0.001146689, 0.001355930, 0.001547572),ncol=4,nrow=7,byrow=TRUE) -resmat4=matrix(c( - 0.085071252, 0.095947936, 0.104197413, 0.118449765, - 0.029503024, 0.034198704, 0.039543410, 0.045043759, - 0.019203266, 0.022768842, 0.026886843, 0.033481535, - 0.011440493, 0.013555017, 0.016138970, 0.018297815, - 0.004863139, 0.005756305, 0.007385239, 0.009114958, - 0.002635144, 0.003111160, 0.003769051, 0.004215897, - 0.001188837, 0.001435179, 0.001727871, 0.001956372),ncol=4,nrow=7,byrow=TRUE) -resmat5=matrix(c( -0.102893512, 0.114258558, 0.122545016, 0.130222265, -0.036733497, 0.042504996, 0.048663576, 0.055456582, -0.024192946, 0.028805967, 0.032924489, 0.038209545, -0.012663224, 0.014635216, 0.017275594, 0.019736410, -0.006105572, 0.007310803, 0.008960242, 0.009745320, -0.003067163, 0.003614637, 0.003997615, 0.004812373, -0.001441008, 0.001732819, 0.002078651, 0.002307551),ncol=4,nrow=7,byrow=TRUE) -resmat6=matrix(c( -0.117642769, 0.126566104, 0.133106804, 0.142280074, -0.044309420, 0.049731991, 0.053912739, 0.060512997, -0.028607224, 0.033826020, 0.038616476, 0.043546500, -0.015445120, 0.017557181, 0.020040720, 0.022747707, -0.007334749, 0.008406468, 0.009392098, 0.010919651, -0.003352200, 0.003814582, 0.004380562, 0.005252154, -0.001703698, 0.002001713, 0.002338651, 0.002772864),ncol=4,nrow=7,byrow=TRUE) -resmat7=matrix(c( -0.106573121, 0.113058950, 0.117388191, 0.121286795, -0.052170054, 0.058363322, 0.064733684, 0.069749344, -0.030696897, 0.035506926, 0.039265698, 0.044437674, -0.016737307, 0.019605734, 0.021253610, 0.022922988, -0.007767232, 0.009231789, 0.010340874, 0.011471110, -0.003998261, 0.004590177, 0.005506926, 0.006217415, -0.001903372, 0.002174748, 0.002519055, 0.002858655),ncol=4,nrow=7,byrow=TRUE) -resmat8=matrix(c( - 0.119571179, 0.126977461, 0.130120853, 0.133258294, - 0.059499563, 0.067185338, 0.071283297, 0.079430577, - 0.034310968, 0.039827130, 0.044451690, 0.048512464, - 0.018599530, 0.021093909, 0.023273085, 0.027471116, - 0.009135712, 0.010901687, 0.012288682, 0.013729545, - 0.004382249, 0.005191810, 0.005598429, 0.006484433, - 0.002196973, 0.002525918, 0.002818550, 0.003242426),ncol=4,nrow=7,byrow=TRUE) -crit5=array(cbind(resmat1,resmat2,resmat3,resmat4,resmat5,resmat6,resmat7, -resmat8),c(7,4,8)) -flag=TRUE -crit.val=NULL -if(p > 8)flag=FALSE -if(n<10 || n>=400)flag=FALSE -aval<-c(.1,.05,.025,.01) -aokay<-duplicated(c(alpha,aval)) -if(sum(aokay)==0)flag=FALSE -if(flag){ -nalpha=c(0:4) -asel=c(0,aval) -ialpha=nalpha[aokay] -critit=crit5[,ialpha,p] -nvec<-c(10,20,30,50,100,200,400) -nval<-duplicated(c(n,nvec)) -nval<-nval[2:8] -if(sum(nval)>0)crit.val<-critit[nval,p] -loc<-rank(c(n,nvec)) -xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5) -yy<-c(critit[loc[1]-1],critit[loc[1]]) -icoef<-tsp1reg(xx,yy)$coef -crit.val<-icoef[1]+icoef[2]/n^1.5 -}} -if(is.null(crit.val))com.pval=TRUE -# no critical value found, so a p-value will be computed -# the code for checking the file medind.crit, which appears -# next, is not working yet. -gdot<-cbind(rep(1,n),x) -gdot<-ortho(gdot) -x<-gdot[,2:pp1] -x<-as.matrix(x) -coef<-NULL -if(qval==.5)coef<-median(y) -if(qval==.25)coef<-idealf(y)$ql -if(qval==.75)coef<-idealf(y)$qu -if(is.null(coef))coef<-qest(y,q=qval) -res<-y-coef -psi<-NA -psi<-ifelse(res>0,qval,qval-1) -rnmat<-matrix(0,nrow=n,ncol=pp1) -ran.mat<-apply(x,2,rank) -flagvec<-apply(ran.mat,1,max) -for(j in 1:n){ -flag<-ifelse(flagvec<=flagvec[j],TRUE,FALSE) -flag<-as.numeric(flag) -rnmat[j,]<-apply(flag*psi*gdot,2,sum) -} -rnmat<-rnmat/sqrt(n) -temp<-matrix(0,pp1,pp1) -for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,]) -temp<-temp/n -test<-max(eigen(temp)$values) -if(com.pval){ -if(SEED)set.seed(2) -p.val<-0 -rem<-0 -for(i in 1:nboot){ -yboot<-rnorm(n) -if(p==1)xboot<-rnorm(n) -if(p>1)xboot<-rmul(n,p=p) -temp3<-medindsub(x,yboot,qval=qval) -if(test>=temp3)p.val<-p.val+1 -rem[i]<-temp3 -} -ic10<-round(.9*nboot) -ic05<-round(.95*nboot) -ic025<-round(.975*nboot) -ic001<-round(.99*nboot) -rem<-sort(rem) -p.val<-1-p.val/nboot -# now remember the critical values by storing them in "medind.crit" -#if(store.it) -#write(c(n,p,qval,rem[ic10],rem[ic05],rem[ic025],rem[ic001]),"medind.crit", -#append=T,ncolumns=7) -print("The .1, .05, .025 and .001 critical values are:") -print(c(rem[ic10],rem[ic05],rem[ic025],rem[ic001])) -crit.val<-rem[ic05] -} -names(crit.val)="" -Decision="Fail To Reject" -if(test>=crit.val)Decision="Reject" -list(test.stat=test,crit.value=crit.val,p.value=p.val,Decision=Decision) -} - - -medindsub<-function(x,y,qval=.5){ -# -x<-as.matrix(x) -n<-length(y) -p<-ncol(x) -pp1<-p+1 -tvec<-c(qval,0-qval,1-qval,qval-1) -pval<-c((1-qval)/2,(1-qval)/2,qval/2,qval/2) -gdot<-cbind(rep(1,n),x) -gdot<-ortho(gdot) -x<-gdot[,2:pp1] -x<-as.matrix(x) -if(qval==.5)coef<-median(y) -if(qval!=.5)coef<-qest(y) -res<-y-coef -psi<-NA -psi<-ifelse(res>0,qval,qval-1) -rnmat<-matrix(0,nrow=n,ncol=pp1) -ran.mat<-apply(x,2,rank) -flagvec<-apply(ran.mat,1,max) -for(j in 1:n){ -flag<-ifelse(flagvec>=flagvec[j],T,F) -rnmat[j,]<-apply(flag*psi*gdot,2,sum) -} -rnmat<-rnmat/sqrt(n) -temp<-matrix(0,pp1,pp1) -for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,]) -temp<-temp/n -test<-max(eigen(temp)$values) -test -} -linplot<-function(x,con=0,plotfun=akerd,nboot=800,plotit=TRUE,pyhat=FALSE,...){ -# -# plot distribtion of the linear contrast -# c_1X_2+c_2X_2+... -# -# con contains contrast coefficients. If not specified, -# con<-c(1,1,...,1) -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -} -Jm<-J-1 -# -# Determine contrast matrix -# If not specified, assume distribution of the sum is to be plotted -# -if(sum(con^2)==0)con<-matrix(1,J,1) -bvec<-matrix(NA,nrow=J,ncol=nboot) -for(j in 1:J){ -data<-matrix(sample(x[[j]],size=nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-data -} -bcon<-t(con)%*%bvec #ncon by nboot matrix -bcon<-as.vector(bcon) -dval<-plotfun(bcon,pyhat=pyhat,...) -dval -} -lin2plot<-function(x,con,op=4,nboot=800,plotit=TRUE,pyhat=FALSE){ -# -# plot two distribtions. -# The first is the distribtion of the linear contrast -# c_1X_2+c_2X_2+... c_i>0 -# and the second is the distribution of c_1X_2+c_2X_2+... c_i<0 -# -# con contains contrast coefficients. If not specified, -# function terminates. -# -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -J<-length(x) -if(J != length(con)){ -stop("Number of contrast coefficients must equal the number of groups") -} -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} -# -# Determine contrast matrix for positive contrast coefficients -# -flag<-(con<0) -con1<-con -con1[flag]<-0 -# Determine contrast matrix for negative contrast coefficients -flag<-(con>0) -con2<-con -con2[flag]<-0 -bvec<-matrix(NA,nrow=J,ncol=nboot) -for(j in 1:J){ -data<-matrix(sample(x[[j]],size=nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-data -} -bcon1<-t(con1)%*%bvec -bcon2<-t(con2)%*%bvec -bcon1<-as.vector(bcon1) -bcon2<-as.vector(bcon2) -fval<-g2plot(bcon1,bcon2,op=op,rval=15,fr=0.8,aval=0.5,xlab="X",ylab="") -fval -} -adrunl<-function(x,y,est=tmean,iter=10,pyhat=FALSE,plotit=TRUE,fr=.8, -xlab="x1",ylab="x2",zlab="",theta=50,phi=25,expand=.5,scale=FALSE, -zscale=TRUE,xout=FALSE,outfun=out,ticktype="simple",...){ -# -# additive model based on running interval smoother -# and backfitting algorithm -# -m<-elimna(cbind(x,y)) -x<-as.matrix(x) -p<-ncol(x) -if(p==1)val<-lplot(x[,1],y,pyhat=TRUE,plotit=plotit,span=fr,pr=FALSE)$yhat.values -if(p>1){ -library(MASS) -library(akima) -np<-p+1 -x<-m[,1:p] -y<-m[,np] -fhat<-matrix(NA,ncol=p,nrow=length(y)) -fhat.old<-matrix(NA,ncol=p,nrow=length(y)) -res<-matrix(NA,ncol=np,nrow=length(y)) -dif<-1 -for(i in 1:p) -fhat.old[,i]<-lplot(x[,i],y,pyhat=TRUE,plotit=FALSE,span=fr,pr=FALSE)$yhat.values -eval<-NA -for(it in 1:iter){ -for(ip in 1:p){ -res[,ip]<-y -for(ip2 in 1:p){ -if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2] -} -fhat[,ip]<-lplot(x[,ip],res[,ip],pyhat=TRUE,plotit=FALSE,span=fr,pr=FALSE)$yhat.values -} -eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2)))) -if(it > 1){ -itm<-it-1 -dif<-abs(eval[it]-eval[itm]) -} -fhat.old<-fhat -if(dif<.01)break -} -val<-apply(fhat,1,sum) -aval<-est(y-val,...) -val<-val+aval -if(plotit && p==2){ -fitr<-val -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fitr<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype) -}} -if(!pyhat)val<-"Done" -val -} - - - - -Rpca<-function(x,p=ncol(x)-1,locfun=llocv2,loc.val=NULL,iter=100,SCORES=FALSE, -gvar.fun=cov.mba,SEED=TRUE,...){ -# -# Robust PCA using random orthogonal matrices and -# robust generalized variance method -# -# locfun, by default, use the marginal medians -# alternatives are mcd, tauloc, spat,... -# -if(SEED)set.seed(2) -x<-elimna(x) -n<-nrow(x) -m<-ncol(x) -if(is.null(loc.val))info<-locfun(x,...)$center -if(!is.null(loc.val))info<-loc.val -for(i in 1:n)x[i,]<-x[i,]-info -vals<-NA -z<-matrix(nrow=n,ncol=p) -bval<-array(NA,c(p,m,iter)) -for(it in 1:iter){ -B<-matrix(runif(p*m),nrow=p,ncol=m) -B <- t(ortho(t(B))) # so rows are orthogonal -bval[,,it]<-B -for(i in 1:n)z[i,]<-B%*%as.matrix(x[i,]) -#vals[it]<-gvar(z) -vals[it]<-gvarg(z,var.fun=gvar.fun) -} -iord<-order(vals) -Bop<-0-bval[,,iord[iter]] -zval<-NULL -if(SCORES){ -for(i in 1:n)z[i,]<-Bop%*%as.matrix(x[i,]) -zval<-z -} -list(B=Bop,gen.var=vals[iord[iter]],scores=zval) -} - -Rsq.ols<-function(x,y){ -res=lsfit(x,y)$residuals -yhat=y-res -rsq=var(yhat)/var(y) -rsq -} - -ols<-function(x,y,xout=FALSE,outfun=outpro,alpha=.05,plotit=FALSE,xlab='X',ylab='Y',zlab='Z',RES=TRUE,...){ -# -# Performs OLS regression calling built-in R function. -# -# xout=T will eliminate any leverage points (outliers among x values) -# if one predictor, -# plotit=TRUE will plot the points and the regression line -# -m<-elimna(cbind(x,y)) -n=nrow(m) -n.keep=n -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -x<-m[,1:p] -y<-m[,pp] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,pp] -n.keep=length(y) -} -x<-as.matrix(x) -temp<-summary(lm(y~x)) -coef<-temp[4]$coefficients -CI=matrix(NA,nrow(coef),ncol=2) -CI[,1]=coef[,1]-qt(1-alpha/2,temp[10]$fstatistic[3])*coef[,2] -CI[,2]=coef[,1]+qt(1-alpha/2,temp[10]$fstatistic[3])*coef[,2] -dimnames(CI)=list(NULL,c("low.ci","up.ci")) -coef=cbind(coef,CI) -if(plotit){ -if(p==1){ -plot(x,y,xlab=xlab,ylab=ylab) -abline(coef[,1]) -} -if(p==2){ -regp2plot(x,y,regfun=ols,xlab=xlab,ylab=ylab,zlab=zlab) -}} -Ftest<-temp[10]$fstatistic -Ftest.p.value<-1-pf(Ftest[1],Ftest[2],Ftest[3]) -Rval=Rsq.ols(x,y) -res=NULL -if(RES)res=y-x%*%coef[2:pp,1]-coef[1,1] -list(n=n,n.keep=n.keep,summary=coef,coef=coef[,1],F.test=temp[10]$fstatistic[1],Ftest.p.value=Ftest.p.value, -F.test.degrees.of.freedom=temp[10]$fstatistic[2:3],R.squared=Rval,residuals=as.vector(res)) -} - -olstest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,xout=FALSE,outfun=outpro,...){ -# -# Test the hypothesis that all OLS slopes are zero. -# Heteroscedasticity is allowed. -# -# RAD=T: use Rademacher function to generate wild bootstrap values. -# RAD=F, use standardized uniform distribution. -# -if(SEED)set.seed(2) -m<-elimna(cbind(x,y)) -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -x<-m[,1:p] -y<-m[,pp] -if(xout){ -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,pp] -} -x<-as.matrix(x) -temp<-lsfit(x,y) -yhat<-mean(y) -res<-y-yhat -test<-sum(temp$coef[2:pp]^2) -if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot) -if(!RAD){ -data<-matrix(runif(length(y)*nboot),nrow=nboot)# -data<-(data-.5)*sqrt(12) # standardize the random numbers. -} -rvalb<-apply(data,1,olstests1,yhat,res,x) -p.val<-sum(rvalb>=test)/nboot -list(p.value=p.val) -} - -qrchkv2<-function(x,y,qval=.5,...){ -# -# Test of a linear fit based on quantile regression -# The method stems from He and Zhu 2003, JASA, 98, 1013-1022. -# Here, resampling is avoided using approximate critical values if -# com.pval=F -# -# To get a p-value, via simulations, set com.pval=T -# nboot is number of simulations used to determine p-value. -# Execution time can be quite high -# -# This function quickly determines .1, .05, .025 and .01 -# critical values for n<=400 and p<=6 (p= number of predictors) -# and when dealing with the .5 quantile. -# Otherwise, critical values are determined via simulations, which -# can have high execution time. -# -# But, once critical values are determined for a given n, p and -# quantile qval, the function will remember these values and use them -# in the future. They are stored in a file called qrchk.crit -# Currently, however, when you source the Rallfun files, these values -# will be lost. You might save the file qrchk.crit in another file, -# source Rallfun, then copy the save file back to qrchk.crit -# -x=as.matrix(x) -p<-ncol(x) -pp1<-p+1 -yx<-elimna(cbind(y,x)) #Eliminate missing values. -y<-yx[,1] -x<-yx[,2:pp1] -store.it=F -x<-as.matrix(x) -p.val<-NULL -crit.val<-NULL -x<-as.matrix(x) -# shift the marginal x values so that the test statistic is -# invariant under changes in location -n<-length(y) -x=standm(x) -gdot<-cbind(rep(1,n),x) -gdot<-ortho(gdot) -x<-gdot[,2:pp1] -x<-as.matrix(x) -temp<-rqfit(x,y,qval=qval,res=TRUE) -coef<-temp$coef -psi<-NA -psi<-ifelse(temp$residuals>0,qval,qval-1) -rnmat<-matrix(0,nrow=n,ncol=pp1) -ran.mat<-apply(x,2,rank) -flagvec<-apply(ran.mat,1,max) -for(j in 1:n){ -flag<-ifelse(flagvec<=flagvec[j],TRUE,FALSE) -flag<-as.numeric(flag) -rnmat[j,]<-apply(flag*psi*gdot,2,sum) -} -rnmat<-rnmat/sqrt(n) -temp<-matrix(0,pp1,pp1) -for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,]) -temp<-temp/n -test<-max(eigen(temp)$values) -test -} - -sm2str<-function(xx,y,iv=c(1,2),nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro, -STAND=TRUE,...){ -# -# Compare robust measures of association of two predictors -# based on a smooth -# -if(!is.matrix(xx))stop("x should be a matrix with 2 or more columns") -if(ncol(xx)<2)stop("x should be a matrix with 2 or more columns") -val1=NA -val2=NA -x=xx[,iv] -xy=elimna(cbind(x,y)) -x=xy[,1:2] -y=xy[,3] -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(SEED)set.seed(2) -data1<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec1=apply(data1,1,sm2str.sub,x[,1],y) # 2 by nboot matrix -bvec2=apply(data2,1,sm2str.sub,x[,2],y) # 2 by nboot matrix -bvecd=bvec1-bvec2 -pv=akerdcdf(bvecd,pts=0) -vcor=cor(x,method="kendall") -pv=2*min(c(pv,1-pv)) -p.crit=.25*abs(vcor[1,2])+.05+(100-length(y))/10000 -p.crit=max(c(.05,p.crit)) -list(p.value=pv,p.crit=p.crit) -} - -sm2str.sub<-function(isub,x,y){ -xmat<-x[isub] -val1<-lplot(xmat,y[isub],plotit=FALSE)$Explanatory.power -val1 -} - -akerdcdf<-function(xx,hval=NA,aval=.5,op=1,fr=.8,pyhat=TRUE,pts=0,plotit=FALSE, -xlab="",ylab=""){ -# -# Compute cumulative adaptive kernel density estimate -# for univariate data -# (See Silverman, 1986) -# By default (univiate case) determine P(X<=pts), -# pts=0 by default. -# -# op=1 Use expected frequency as initial estimate of the density -# op=2 Univariate case only -# Use normal kernel to get initial estimate of the density -# -fval<-"Done" -if(is.matrix(xx)){ -if(ncol(xx)>1)fval<-akerdmul(xx,pts=pts,hval=hval,aval=aval,fr=fr,pr=pyhat,plotit=plotit) -plotit<-F -} -if(is.matrix(xx) && ncol(xx)==1)xx<-xx[,1] -if(!is.matrix(xx)){ -x<-sort(xx) -if(op==1){ -m<-mad(x) -if(m==0){ -temp<-idealf(x) -m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25)) -} -if(m==0)m<-sqrt(winvar(x)/.4129) -if(m==0)stop("All measures of dispersion are equal to 0") -fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr) -if(m>0)fhat<-fhat/(2*fr*m) -} -if(op==2){ -init<-density(xx) -fhat <- init$y -x<-init$x -} -n<-length(x) -if(is.na(hval)){ -sig<-sqrt(var(x)) -temp<-idealf(x) -iqr<-(temp$qu-temp$ql)/1.34 -A<-min(c(sig,iqr)) -if(A==0)A<-sqrt(winvar(x))/.64 -hval<-1.06*A/length(x)^(.2) -# See Silverman, 1986, pp. 47-48 -} -gm<-exp(mean(log(fhat[fhat>0]))) -alam<-(fhat/gm)^(0-aval) -dhat<-NA -if(is.na(pts[1]))pts<-x -pts<-sort(pts) -for(j in 1:length(pts)){ -temp<-(pts[j]-x)/(hval*alam) -sq5=0-sqrt(5) -epan=.75*(temp-.2*temp^3/3)/sqrt(5)-.75*(sq5-.2*sq5^3/3)/sqrt(5) -flag=(temp>=sqrt(5)) -epan[flag]=1 -flag=(temp= 0 - negres <- res <= 0 - lplus <- cumsum(posres) - rplus <- lplus[n] - lplus - lmin <- cumsum(negres) - rmin <- lmin[n] - lmin - depth <- pmin(lplus + rmin, rplus + lmin) - min(depth) -} -depthcom<-function(x1,y1,x2,y2,est=tmean,fr=1){ -temp1=depthcomsub(x1,y1,x2,y2,est=est,fr=fr) -temp2=depthcomsub(x2,y2,x1,y1,est=est,fr=fr) -dep=max(c(abs(temp1$dep1-temp1$dep2),abs(temp2$dep1-temp2$dep2))) -dep -} -depthcomsub<-function(x1,y1,x2,y2,est=tmean,fr=1){ -x1=(x1-median(x1))/mad(x1) -x2=(x2-median(x2))/mad(x2) -yh1=runhat(x1,y1,est=tmean,fr=fr) -yh2=runhat(x2,y2,pts=x1,est=tmean,fr=fr) -flag=is.na(yh2) -res1=y1-yh1 -res2=y1[!flag]-yh2[!flag] -dep1=resdepth(x1,res1) -dep2=resdepth(x1[!flag],res2) -list(dep1=dep1,dep2=dep2) -} - -ancsm<-function(x1,y1,x2,y2,crit.mat=NULL,nboot=200,SEED=TRUE,REP.CRIT=FALSE,LP=TRUE, -est=tmean,fr=NULL,plotit=TRUE,sm=FALSE,xout=FALSE,outfun=out,xlab="X",ylab="Y",...){ -# -# Compare two nonparametric -# regression lines corresponding to two independent groups -# using the depths of smooths. -# One covariate only is allowed. -# -# A running interval smoother is used. -# -# sm=T will create smooths using bootstrap bagging. -# -if(ncol(as.matrix(x1))>1)stop("One covariate only is allowed") -if(xout){ -flag1=outfun(x1,...)$keep -flag2=outfun(x2,...)$keep -x1=x1[flag1] -y1=y1[flag1] -x2=x2[flag2] -y2=y2[flag2] -} -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -xord=order(x1) -x1=x1[xord] -y1=xy[xord,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -xord=order(x2) -x2=x2[xord] -y2=xy[xord,2] -n1=length(y1) -n2=length(y2) -if(is.null(fr)){ -fr=1 -if(min(n1,n2)>150)fr=.2 -if(max(n1,n2)<35)fr=.5 -} -if(SEED)set.seed(2) -if(is.null(crit.mat[1])){ -crit.val=NA -yall=c(y1,y2) -xall=c(x1,x2) -nn=n1+n2 -il=n1+1 -for(i in 1:nboot){ -data=sample(nn,nn,TRUE) -yy1=yall[data[1:n1]] -yy2=yall[data[il:nn]] -xx1=xall[data[1:n1]] -xx2=xall[data[il:nn]] -crit.mat[i]=depthcom(xx1,yy1,xx2,yy2,est=est,fr=fr) -}} -if(plotit)runmean2g(x1,y1,x2,y2,fr=fr,est=est,sm=sm,xlab=xlab,ylab=ylab,LP=LP,...) -dep=depthcom(x1,y1,x2,y2,est=est,fr=fr) -n=min(n1,n2) -pv=1-mean(crit.mat=crit)p.value<-c("Less than .1") -crit<-15.49/n+2.68 -if(test>=crit)p.value<-c("Less than .05") -crit<-14.22/n+3.26 -if(test>=crit)p.value<-c("Less than .025") -crit<-24.83/n+3.74 -if(test>=crit)p.value<-c("Less than .01") -p.values[ic,3]=p.value -}}} -list(cor=val,test.results=info,p.values=p.values) -} - - -resdepth.sub<-function(x,res) -{ -########################################################################## -# This function computes the regression depth of a regression line based -# on its residuals. The fit could be, for example, a nonparmatric -# regression or smooth. -# -# The algorithm is based on a simple modification of -# -# Rousseeuw, P.J. and Hubert, M. (1996), -# Regression Depth, Technical report, University of Antwerp -# -########################################################################## - if(!is.vector(x)) stop("x should be vectors") - n <- length(x) - if(n < 2) - stop("you need at least two observations") -flag=is.na(res) -x=x[!flag] -res[!flag] -xord=order(x) -x=x[xord] -res=res[xord] - posres <- res >= 0 - negres <- res <= 0 - lplus <- cumsum(posres) - rplus <- lplus[n] - lplus - lmin <- cumsum(negres) - rmin <- lmin[n] - lmin - depth <- pmin(lplus + rmin, rplus + lmin) - min(depth) -} - -tbs<- function(x,eps=1e-3,maxiter=20,r=.45,alpha=.05,init.est=OGK){ -# Rocke's contrained s-estimator -# -# r=.45 is the breakdown point -# alpha=.05 is the asymptotic rejection probability. -# -library(MASS) -x<-elimna(x) -x=as.matrix(x) - n <- nrow(x) - p <- ncol(x) -LIST=FALSE -if(p==1){ -LIST=T -p=2 -x=cbind(x,rnorm(nrow(x))) -# Yes, this code is odd, but for moment easiest way of handling p=1 -} -temp<-init.est(x) -# very poor outside rate per obs under normality. -t1<-temp$center -s<-temp$cov -c1M<-cgen.bt(n,p,r,alpha,asymp=FALSE) -c1<-c1M$c1 -if(c1==0)c1<-.001 #Otherwise get division by zero -M<-c1M$M - b0 <- erho.bt(p,c1,M) - crit <- 100 - iter <- 1 - w1d <- rep(1,n) - w2d <- w1d - while ((crit > eps)&(iter <= maxiter)) - { - t.old <- t1 - s.old <- s - wt.old <- w1d - v.old <- w2d - d2 <- mahalanobis(x,center=t1,cov=s) - d <- sqrt(d2) - k <- ksolve.bt(d,p,c1,M,b0) - d <- d/k - w1d <- wt.bt(d,c1,M) - w2d <- v.bt(d,c1,M) - t1 <- (w1d %*% x)/sum(w1d) - s <- s*0 - for (i in 1:n) - { - xc <- as.vector(x[i,]-t1) - s <- s + as.numeric(w1d[i])*(xc %o% xc) - } - s <- p*s/sum(w2d) - mnorm <- sqrt(as.vector(t.old) %*% as.vector(t.old)) - snorm <- eigen(s.old)$values[1] - crit1 <- max(abs(t1 - t.old)) -# crit <- max(crit1,crit2) - crit <- max(abs(w1d-wt.old))/max(w1d) - iter <- iter+1 - } -if(LIST){ -v1=t1[1] -v2=s[1,1] -return(list(center=v1,var=v2)) -} -if(!LIST)return(list(center=t1,cov=s)) -} - -pcorhc4sub<-function(x,y,CN=FALSE){ -# -# Compute a .95 confidence interval for Pearson's correlation coefficient. -# using the HC4 method -# -# CN=T degrees of freedom are infinite, as done by Cribari-Neto (2004) -# CN=F degrees of freedom are n-p -# -xy<-elimna(cbind(x,y)) -x<-xy[,1] -y<-xy[,2] -z1=(x-mean(x))/sqrt(var(x)) -z2=(y-mean(y))/sqrt(var(y)) -ans=olshc4sub(z1,z2,CN=CN) -ci=ans$ci[2,3:4] -ci -} - -TWOpNOV<-function(x,y,HC4=FALSE,alpha=.05){ -# -# Compute a .95 confidence interval -# for the difference between two dependent Pearson correlations, -# non-overlapping case. -# -# Both x and y are assumed to be matrices with two columns. -# The function compares the correlation between x[,1] and x[,2] -# to the correlation between y[,1] and y[,2]. -# -# For simulation results, see Wilcox (2009). -# COMPARING PEARSON CORRELATIONS: DEALING WITH -# HETEROSCEDASTICITY AND NON-NORMALITY, Communications in Statistics--Simulations -# and Computations, 38, 2220-2234. -# -# -if(!HC4 && alpha!=.05)stop('For alpha not equal to .05, must use HC4=TRUE') -#if(!is.matrix(x))stop("x should be a matrix") -#if(!is.matrix(y))stop("y should be a matrix") -if(ncol(x)!=2)stop("x should be a matrix or data a frame with 2 columns") -if(ncol(y)!=2)stop("y should be a matrix or a data frame with 2 columns") -xy=elimna(cbind(x,y)) -x1=xy[,1] -x2=xy[,2] -y1=xy[,3] -y2=xy[,4] -r12=cor(x1,x2) -r13=cor(x1,y1) -r14=cor(x1,y2) -r23=cor(x2,y1) -r24=cor(x2,y2) -r34=cor(y1,y2) -term1=.5*r12*r34*(r13^2+r14^2+r23^2+r24^2) -term2=r12*r13*r14+r12*r23*r24+r13*r23*r34+r14*r24*r34 -corhat=(term1+r13*r24+r14*r23-term2)/((1-r12^2)*(1-r34^2)) -if(!HC4)temp=pcorbv4(x1,x2,SEED=FALSE) -if(HC4)temp=pcorhc4(x1,x2,alpha=alpha) -ci12=temp$ci[1] -ci12[2]=temp$ci[2] -if(!HC4)temp=pcorbv4(y1,y2,SEED=FALSE) -if(HC4)temp=pcorhc4(y1,y2,alpha=alpha) -ci34=temp$ci[1] -ci34[2]=temp$ci[2] -terml=2*corhat*(r12-ci12[1])*(ci34[2]-r34) -termu=2*corhat*(ci12[2]-r12)*(r34-ci34[1]) -L=r12-r34-sqrt((r12-ci12[1])^2+(ci34[2]-r34)^2-terml) -U=r12-r34+sqrt((r12-ci12[2])^2+(ci34[1]-r34)^2-termu) -if(ZCI){ -if(is.na(L) || is.na(U))L=U=0 -} -list(est.1=r12,est.2=r34,ci.lower=L,ci.upper=U) -} - -TWOpov<-function(x,y,alpha=.05,CN=FALSE,BOOT=TRUE, nboot=499,SEED=TRUE,ZCI=FALSE){ -# -# Comparing two dependent correlations: Overlapping case -# -# x is assumed to be a matrix with 2 columns -# -# Compare correlation of x[,1] with y to x[,2] with y -# -# returns a confidence stored in -# ci -# -if(ncol(x)!=2)stop('x should be a matrix with two columns') -x1y=elimna(cbind(x[,1],y)) -x2y=elimna(cbind(x[,2],y)) -xx=elimna(x) -r12=cor(x1y[,1],x1y[,2]) -r13=cor(x2y[,1],x2y[,2]) -r23=cor(xx[,1],xx[,2]) -if(!BOOT){ -ci12=pcorhc4(x1y[,1],x1y[,2],alpha=alpha,CN=CN)$ci -ci13=pcorhc4(x2y[,1],x2y[,2],alpha=alpha,CN=CN)$ci -} -if(BOOT){ -ci12=rhohc4bt(x1y[,1],x1y[,2],alpha=alpha,SEED=SEED,nboot=nboot)$ci -ci13=rhohc4bt(x2y[,1],x2y[,2],alpha=alpha,SEED=SEED,nboot=nboot)$ci -} -corhat=((r23-.5*r12*r13)*(1-r12^2-r13^2-r23^2)+r23^3)/((1-r12^2)*(1-r13^2)) -term1=2*corhat*(r12-ci12[1])*(ci13[2]-r13) -term2=2*corhat*(r12-ci12[2])*(ci13[1]-r13) -L=r12-r13-sqrt((r12-ci12[1])^2+(ci13[2]-r13)^2-term1) -U=r12-r13+sqrt((r12-ci12[2])^2+(ci13[1]-r13)^2-term2) -if(ZCI){ -if(is.na(L) || is.na(U))L=U=0 -} -list(est.rho1=r12,est.rho2=r13,dif=r12-r13,ci=c(L,U)) -} - - - - -sm2strv7<-function(xx,y,iv=c(1,2),nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro, -STAND=TRUE,...){ -# -# Compare robust measures of association of two predictors -# based on a smooth -# -# x is a matrix with two columns -# robust explanatory of x[,1] with y is compared to x[,2] with y. -# xout=T eliminates any leverage points found with outfun, which -# defaults to outpro, a projecion method for detecting outliers. -# -# iv: indicates the two columns of x that will be used. By default, col 1 and 2 are used. -# -if(!is.matrix(xx))stop("x should be a matrix with 2 or more columns") -if(ncol(xx)<2)stop("x should be a matrix with 2 or more columns") -val1=NA -val2=NA -x=xx[,iv] -xy=elimna(cbind(x,y)) -x=xy[,1:2] -y=xy[,3] -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(SEED)set.seed(2) -data1<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec1=apply(data1,1,sm2str.sub,x[,1],y) # 2 by nboot matrix -bvec2=apply(data2,1,sm2str.sub,x[,2],y) # 2 by nboot matrix -bvecd=bvec1-bvec2 -pv=akerdcdf(bvecd,pts=0) -vcor=cor(x,method="kendall") -pv=2*min(c(pv,1-pv)) -p.crit=.25*abs(vcor[1,2])+.05+(100-length(y))/10000 -p.crit=max(c(.05,p.crit)) -list(p.value=pv,p.crit=p.crit) -} - -pcorhc4<-function(x,y,alpha=.05,CN=FALSE,HC3=FALSE){ -# -# Compute a .95 confidence interval for Pearson's correlation coefficient. -# using the HC4 method -# -# CN=F, degrees of freedom are n-p; seems better for general use. -# CN=T degrees of freedom are infinite, as done by Cribari-Neto (2004) -# -print('Can return meaningless confidence interval due to outliers') -xy<-elimna(cbind(x,y)) -x<-xy[,1] -y<-xy[,2] -z1=(x-mean(x))/sqrt(var(x)) -z2=(y-mean(y))/sqrt(var(y)) -ans=olshc4(z1,z2,alpha=alpha,CN=CN,HC3=HC3) -list(r=ans$ci[2,2],ci=ans$ci[2,3:4],p.value=ans$ci[2,5],test.stat=ans$test.stat) -} -regpreS<-function(x,y,regfun=lsfit,error=absfun,nboot=100, -mval=round(5*log(length(y))),locfun=mean,pr=TRUE, -xout=FALSE,outfun=out, -plotit=TRUE,xlab="Model Number",ylab="Prediction Error",SEED=TRUE,...){ -# -# Stepwise selection of predictors based on -# estimates of prediction error using the regression method -# regfun, -# which defaults to least squares. Prediction error -# is estimated with .632 method. -# (See Efron and Tibshirani, 1993, pp. 252--254) -# -# The predictor values are assumed to be in the n by p matrix x. -# The default number of bootstrap samples is nboot=100 -# -# Prediction error is the expected value of the function error. -# The argument error defaults to absolute error. To use -# squared error, set error=sqfun. -# -# regfun can be any R function that returns the coefficients in -# the vector regfun$coef, the first element of which contains the -# estimated intercept, the second element contains the estimate of -# the first predictor, etc. -# -# The default value for mval, the number of observations to resample -# for each of the B bootstrap samples is based on results by -# Shao (JASA, 1996, 655-665). (Resampling n vectors of observations, -# model selection may not lead to the correct model as n->infinity. -# -if(SEED)set.seed(2) -q=ncol(x) -qm1=q-1 -x<-as.matrix(x) -d<-ncol(x) -p1<-d+1 -temp<-elimna(cbind(x,y)) -x<-temp[,1:d] -y<-temp[,d+1] -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,SEED=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -adit=NULL -pval=c(1:ncol(x)) -#pval=c(1:q) -allp=pval -for(ip in 1:qm1){ -model=list() -for(j in 1:length(pval))model[[j]]=c(adit,pval[j]) -temp=regpre(x,y,model=model,pr=FALSE,plotit=FALSE,adz=FALSE,regfun=regfun, -SEED=SEED)$estimates -pbest=order(temp[,5]) -adit=model[[pbest[1]]] -pval=allp[-adit] -} -output=model[[pbest[1]]] -output=c(output,allp[-output]) -output -} - -akp.effect<-function(x,y,EQVAR=TRUE,tr=.2){ -# -# Computes the robust effect size suggested by -#Algina, Keselman, Penfield Psych Methods, 2005, 317-328 -library(MASS) -x<-elimna(x) -y<-elimna(y) -n1<-length(x) -n2<-length(y) -s1sq=winvar(x,tr=tr) -s2sq=winvar(y,tr=tr) -spsq<-(n1-1)*s1sq+(n2-1)*s2sq -sp<-sqrt(spsq/(n1+n2-2)) -cterm=1 -if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr -cterm=sqrt(cterm) -if(EQVAR)dval<-cterm*(tmean(x,tr)-tmean(y,tr))/sp -if(!EQVAR){ -dval<-cterm*(tmean(x,tr)-tmean(y,tr))/sqrt(s1sq) -dval[2]=cterm*(tmean(x,tr)-tmean(y,tr))/sqrt(s2sq) -} -dval -} - -akp.effect.ci<-function(x,y,alpha=.05,tr=.2,nboot=1000,SEED=TRUE,null.val=0){ -# -# Computes the robust effect size for two-sample case using -# Algina, Keselman, Penfield Pcyh Methods, 2005, 317-328 -# -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -n1=length(x) -n2=length(y) -be.f=NA -for(i in 1:nboot){ -X=sample(x,n1,replace=TRUE) -Y=sample(y,n2,replace=TRUE) -be.f[i]=akp.effect(X,Y,tr=tr) -} -L=alpha*nboot/2 -U=nboot-L -be.f=sort(be.f) -ci=be.f[L+1] -ci[2]=be.f[U] -est=akp.effect(x,y,tr=tr) -pv=mean(be.f0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr -cterm=sqrt(cterm) -del=cterm*d1 #rescale for a normal distribution. -list(effect.size=del,Cohen.d.equiv=2*del) -} - - - - -wwwtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L){ -# Perform a within by within by within (three-way) anova on trimmed means. -# -# That is, there are three factors with a total of JKL dependent groups. -# -# The argument data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of all three factors: level 1,1,1. -# data][2]] is assumed to contain the data for level 1 of the -# first two factors and level 2 of the third factor: level 1,1,2 -# data[[L]] is the data for level 1,1,L -# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. -# data[[KL+1]] is level 2,1,1, etc. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JKL, the total number of -# groups being tested. -# -if(is.data.frame(data))data=as.matrix(data) -if(is.list(data))data=listm(elimna(matl(data))) -if(is.matrix(data))data=listm(elimna(data)) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in data is") -print(length(data)) -print("Warning: These two values are not equal") -} -tmeans<-0 -h<-0 -v<-0 -for (i in 1:p){ -tmeans[i]<-mean(data[[grp[i]]],tr) -h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) -# h is the effective sample size -} -v=covmtrim(data,tr=tr) -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -il<-matrix(c(rep(1,L)),1,L) -jm1<-J-1 -cj<-diag(1,jm1,J) -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -lm1<-L-1 -cl<-diag(1,lm1,L) -for (i in 1:lm1)cl[i,i+1]<-0-1 -# Do test for factor A -cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A -Qa=bwwtrim.sub(cmat, tmeans, v, h,p) -Qa.siglevel <- 1 - pf(Qa, J - 1, 999) -# Do test for factor B -cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B -Qb=bwwtrim.sub(cmat, tmeans, v, h,p) - Qb.siglevel <- 1 - pf(Qb, K - 1, 999) -# Do test for factor C -cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C -Qc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qc.siglevel <- 1 - pf(Qc, L - 1, 999) -# Do test for factor A by B interaction -cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B -Qab<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999) -# Do test for factor A by C interaction -cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C -Qac<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999) -# Do test for factor B by C interaction -cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C -Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999) -# Do test for factor A by B by C interaction -cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C -Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p) -Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999) -list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.p.value=Qb.siglevel, -Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel, -Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel, -Qabc=Qabc,Qabc.p.value=Qabc.siglevel) -} - - -ltsR<-function(x,y,RES=FALSE,varfun=pbvar,corfun=pbcor){ -# -library(MASS) -xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -x=xy[,1:p] -y=xy[,p1] -temp=ltsreg(x,y)$coef -x=as.matrix(x) -p=ncol(x)+1 -res<-y-x%*%temp[2:p]-temp[1] -yhat<-y-res -if(!RES)res=NULL -e.pow<-varfun(yhat)/varfun(y) -if(is.na(e.pow))e.pow<-1 -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -list(coef=temp,residuals=res,Explanatory.Power=e.pow, -Strength.Assoc=sqrt(e.pow)) -} - -standmar<-function(x,locfun=lloc,est=mean,scat=var,...){ -# standardize a matrix x -# -x=as.matrix(x) -m1=apply(x,2,est,na.rm=TRUE) -v1=apply(x,2,scat,na.rm=TRUE) -p=ncol(x) -for(j in 1:p)x[,j]=(x[,j]-m1[j])/sqrt(v1[j]) -x -} - -qsmcobs<-function(x,y,qval=.5,xlab="X",ylab="Y",FIT=TRUE,pc=".",plotit=TRUE, -xout=FALSE,outfun=out,q=NULL,lambda=0,...){ -# -# Plots smooths of quantile regression lines using R package cobs -# -# qval is the quantile -# qsmcobs(x,y,qval=c(.2,.5,.8)) will plot three smooths corresponding to -# the .2, .5 and .8 quantile regression lines. -# -# FIT=T, uses the values returned by predict -# FIT=F, determines predicted Y for each X and plots the results -library(cobs) -if(!is.null(q))qval=q -x=as.matrix(x) -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -} -yhat=NULL -res=NULL -if(plotit)plot(x,y,xlab=xlab,ylab=ylab,pch=pc) -if(FIT){ -for(j in 1:length(qval)){ -if(plotit)lines(predict(cobs(x,y,tau=qval[j],lambda=lambda,print.mesg=FALSE,print.warn=FALSE))) -}} -if(!FIT){ -for(j in 1:length(qval)){ -temp=cobs(x,y,tau=qval[j],print.mesg=FALSE,print.warn=FALSE,lambda=lambda) -xord=order(x) -if(plotit)lines(x[xord],temp$fitted[xord]) -} -if(length(qval)==1){ -yhat=temp$fitted -#res=y-yhat - # yhat is only for the unique x values. If x has,say, -# three tied values = 6, then -# yhat contains only one predicted value for x=6, not three yhat values -# all equal to the predicted value at x=6 -} -} -list(yhat=yhat) -} - - -Qdepthcom<-function(x1,y1,x2,y2,qval){ -temp1=Qdepthcomsub(x1,y1,x2,y2,qval) -temp2=Qdepthcomsub(x2,y2,x1,y1,qval) -dep=max(c(abs(temp1$dep1-temp1$dep2),abs(temp2$dep1-temp2$dep2))) -dep -} -Qdepthcomsub<-function(x1,y1,x2,y2,qval){ -x1=(x1-median(x1))/mad(x1) -x2=(x2-median(x2))/mad(x2) -yh1=qsmcobs(x1,y1,FIT=FALSE,qval=qval,plotit=FALSE)$yhat -temp2=cobs(x2,y2,print.mesg=FALSE,print.warn=FALSE,tau=qval) -yh2=predict(temp2,z=x1) -yh2=yh2[,2] -flag=is.na(yh2) -res1=y1-yh1 -res2=y1[!flag]-yh2[!flag] -dep1=resdepth(x1,res1) -dep2=resdepth(x1[!flag],res2) -list(dep1=dep1,dep2=dep2) -} - - -mulgreg<-function(x,y,cov.fun=rmba){ -# -# Do Multivariate regression in Rousseeuw, Van Aelst, Van Driessen Agullo -# (2004) Technometrics, 46, 293-305 -# -# (y can be multivariate) -# -library(MASS) -if(!is.matrix(y))stop("y is not a matrix") -X<-cbind(x,y) -X<-elimna(X) -qy<-ncol(y) -qx<-ncol(x) -qxp1<-qx+1 -tqyqx<-qy+qx -y<-X[,qxp1:tqyqx] -# compute initial estimate of slopes and intercept: -locscat<-cov.fun(X) -sig<-locscat$cov -mu<-locscat$center -sigxx<-sig[1:qx,1:qx] -sigxy<-sig[1:qx,qxp1:tqyqx] -sigyy<-sig[qxp1:tqyqx,qxp1:tqyqx] -Bhat<-solve(sigxx)%*%sigxy -sige<-sigyy-t(Bhat)%*%sigxx%*%Bhat -sige.inv<-solve(sige) -Ahat<-t(mu[qxp1:tqyqx]-t(Bhat)%*%mu[1:qx]) -resL<-matrix(nrow=nrow(X),ncol=qy) -for(i in 1:nrow(X))resL[i,]<-y[i,]-t(Bhat)%*%X[i,1:qx] -for(j in 1:qy)resL[,j]<-resL[,j]-Ahat[j] -list(coef=rbind(Ahat,Bhat),residuals=resL) -} - -tsp1reg<-function(x,y,plotit=FALSE,HD=FALSE,OPT=TRUE,tr=FALSE){ -# -# Compute the Theil-Sen regression estimator. -# Only a single predictor is allowed in this version -# -# OPT=TRUE, compute the intercept using median(y)-beta_1median(X) -# OPT=FALSE compute the intercept using median of y-beta_1X -# -temp<-matrix(c(x,y),ncol=2) -temp<-elimna(temp) # Remove any pairs with missing values -x<-temp[,1] -y<-temp[,2] -ord<-order(x) -xs<-x[ord] -ys<-y[ord] -vec1<-outer(ys,ys,"-") -vec2<-outer(xs,xs,"-") -v1<-vec1[vec2>0] -v2<-vec2[vec2>0] -if(!HD)slope<-median(v1/v2,na.rm=TRUE) -if(HD)slope<-hd(v1/v2,na.rm=TRUE,tr=tr) -if(OPT){ -if(!HD)coef<-median(y,na.rm=TRUE)-slope*median(x,na.rm=TRUE) -if(HD)coef<-hd(y,na.rm=TRUE)-slope*hd(x,na.rm=TRUE,tr=tr) -} -if(!OPT){ -if(!HD)coef<-median(y-slope*x,na.rm=TRUE) -if(HD)coef<-hd(y-slope*x,na.rm=TRUE,tr=tr) -} -names(coef)<-"Intercept" -coef<-c(coef,slope) -if(plotit){ -plot(x,y,xlab="X",ylab="Y") -abline(coef) -} -res<-y-slope*x-coef[1] -list(coef=coef,residuals=res) -} - -gplot<-function(x,xlab="Group",ylab="",xnum=FALSE){ -if(is.matrix(x))x<-listm(x) -if(!xnum)par(xaxt="n") -mval<-NA -vals<-x[[1]] -gval<-rep(1,length(x[[1]])) -for(j in 2:length(x)){ -vals<-c(vals,x[[j]]) -gval<-c(gval,rep(j,length(x[[j]]))) -} -plot(gval,vals,xlab=xlab,ylab=ylab) -} - -trimpb<-function(x,y=NULL,tr=.2,alpha=.05,nboot=2000,WIN=FALSE,win=.1, -plotit=FALSE,pop=1,null.value=0,pr=TRUE,xlab="X",fr=NA,SEED=TRUE){ -# -# Compute a 1-alpha confidence interval for -# a trimmed mean. -# -# The default number of bootstrap samples is nboot=2000 -# -# win is the amount of Winsorizing before bootstrapping -# when WIN=T. -# -# Missing values are automatically removed. -# -# nv is null value. That test hypothesis trimmed mean equals nv -# -# plotit=TRUE gives a plot of the bootstrap values -# pop=1 results in the expected frequency curve. -# pop=2 kernel density estimate -# pop=3 boxplot -# pop=4 stem-and-leaf -# pop=5 histogram -# pop=6 adaptive kernel density estimate. -# -# fr controls the amount of smoothing when plotting the bootstrap values -# via the function rdplot. fr=NA means the function will use fr=.8 -# (When plotting bivariate data, rdplot uses fr=.6 by default.) -# -# If y is not null, the function uses x-y; so can be used for two dependent variables. -# -if(pr){ -print("The p-value returned by this function is based on the") -print("null value specified by the argument null.value, which defaults to 0") -} -if(!is.null(y))x=x-y -x<-x[!is.na(x)] -if(WIN){ -if(win > tr)stop("The amount of Winsorizing must be <= to the amount of trimming") -x<-winval(x,win) -} -crit<-alpha/2 -icl<-round(crit*nboot)+1 -icu<-nboot-icl -bvec<-NA -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,mean,tr) # Bootstrapped trimmed means -bvec<-sort(bvec) -#p.value<-sum(bvec9, this adjustment can be crucial -# -m=elimna(m) -m=as.matrix(m) -n=nrow(m) -if(SEED)set.seed(2) -z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m))) -newq=0 -gtry=NA -for(itry in 1:ip){ -newq=newq+9/10^itry -gtry[itry]=newq -} -gtry=c(.95,.975,gtry[-1]) -if(pr)print("Computing adjustment") -for(itry in 1:ip){ -val=NA -for(i in 1:iter){ -temp=outpro(z[i,,],gval = sqrt(qchisq(gtry[itry],ncol(m))), -center=center,plotit=FALSE,op=op,MM=MM,cop=cop,STAND=STAND)$out.id -val[i]=length(temp) -} -erate=mean(val)/n -if(erate=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value", -"crit.sig","ci.lower","ci.upper")) -tmeans<-apply(x,2,est,na.rm=TRUE,...) -psi<-1 -output[temp2,4]<-zvec -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(psihat[ic,]) -icl<-round(output[ic,4]*nboot/2)+1 -icu<-nboot-(icl-1) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -if(!flag.con){ -} -if(flag.con){ -CC=(J^2-J)/2 -test<-matrix(NA,CC,7) -dimnames(test)<-list(NULL,c("Group","Group","psi.hat","p.value","p.crit", -"ci.low","ci.upper")) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,1]=j -test[jcom,2]=k -test[jcom,3:5]=output[jcom,2:4] -test[jcom,6:7]=output[jcom,5:6] -con=NULL -}}}} -if(!flag.con)test=output -#num.sig<-sum(output[,4]<=output[,5]) -if(flag.con)num.sig<-sum(test[,4]<=test[,5]) -if(!flag.con)num.sig<-sum(test[,3]<=test[,4]) -list(output=test,con=con,num.sig=num.sig) -} - - - -mulrank<-function(J,K,x,grp=c(1:p),p=J*K){ -# -# Perform the Munzel and Brunner -# multivariate one-way rank-based ANOVA -# (Munzel and Brunner, Biometrical J., 2000, 42, 837--854 -# -# x can be a matrix with columns corresponding to groups -# -# Have a J by K design with J independent levels and K dependent -# measures -# -# or it can have list mode. -# -newx=list() -GV=matrix(c(1:p),ncol=K,byrow=TRUE) -if(is.list(x)){ -temp=NA -jk=0 -for(j in 1:J){ -temp=elimna(matl(x[GV[j,]])) -for(k in 1:K){ -jk=jk+1 -newx[[jk]]=temp[,k] -}} -x=NA -x=newx -} -if(is.matrix(x)){ -x=elimna(x) -x<-listm(x) -} -xx<-list() -nvec<-NA -for(j in 1:p){ -xx[[j]]<-x[[grp[j]]] -nvec[j]<-length(xx[[j]]) -} -Nrow=nvec[GV[,1]] -v<-matrix(0,p,p) -Ja<-matrix(1,J,J) -Ia<-diag(1,J) -Pa<-Ia-Ja/J -Jb<-matrix(1,K,K) -Ib<-diag(1,K) -Pb<-Ib-Jb/K -cona<-kron(Pa,Ib) -xr<-list() -N<-0 -jj=0 -for(k in 1:K){ -temp<-x[[k]] -jk<-k -for (j in 2:J){ -jj=jj+1 -jk<-jk+K -temp<-c(temp,x[[jk]]) -} -N<-length(temp) -pr<-rank(temp) -xr[[k]]<-pr[1:nvec[k]] #Put ranks of pooled data for first -# variable in xr -top<-nvec[k] -jk<-k -bot<-1 -for (j in 2:J){ -jk<-jk+K -bot<-bot+nvec[jk] -top<-top+nvec[jk] -xr[[jk]]<-pr[bot:top] # Put midranks in xr -}} -phat<-NA -botk<-0 -for(j in 1:J){ -for(k in 1:K){ -botk<-botk+1 -phat[botk]<-(mean(xr[[botk]])-.5)/N -}} -klow<-1-K -kup<-0 -for(j in 1:J){ -klow<-klow+K -kup<-kup+K -sel<-c(klow:kup) -v[sel,sel]<-covmtrim(xr[klow:kup],tr=0)/N -} -qhat<-matrix(phat,J,K,byrow=TRUE) -test<-N*t(phat)%*%cona%*%phat/sum(diag(cona%*%v)) -nu1<-sum(diag(cona%*%v))^2/sum(diag(cona%*%v%*%cona%*%v)) -sig.level<-1-pf(test,nu1,1000000) -list(test.stat=test[1,1],nu1=nu1,p.value=sig.level,N=N,q.hat=qhat) -} - - -lincon.old<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,KB=FALSE){ -# -# A heteroscedastic test of d linear contrasts using trimmed means. -# -# The data are assumed to be stored in $x$ in list mode, a matrix -# or a data frame. If in list mode, -# length(x) is assumed to correspond to the total number of groups. -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# Missing values are automatically removed. -# -# To apply the Kaiser-Bowden method, use the function kbcon -# -if(tr==.5)stop("Use the R function medpb to compare medians") -if(is.data.frame(x))x=as.matrix(x) -if(KB)stop("Use the function kbcon") -flag<-TRUE -if(alpha!= .05 && alpha!=.01)flag<-FALSE -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-length(x) -sam=NA -h<-vector("numeric",J) -w<-vector("numeric",J) -xbar<-vector("numeric",J) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -sam[j]=length(x[[j]]) -h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) - # h is the number of observations in the jth group after trimming. -w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) -xbar[j]<-mean(x[[j]],tr) -} -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -if(CC>28)print("For faster execution time but less power, use kbcon") -psihat<-matrix(0,CC,8) -dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper", -"p.value","Est.1","Est.2")) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","df")) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) -sejk<-sqrt(w[j]+w[k]) -test[jcom,5]<-sejk -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) -test[jcom,6]<-df -psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) -psihat[jcom,7]=xbar[j] -psihat[jcom,8]=xbar[k] -if(!KB){ -if(CC>28)flag=FALSE -if(flag){ -if(alpha==.05)crit<-smmcrit(df,CC) -if(alpha==.01)crit<-smmcrit01(df,CC) -} -if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED) -} -if(KB)crit<-sqrt((J-1)*(1+(J-2)/df)*qf(1-alpha,J-1,df)) -test[jcom,4]<-crit -psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk -psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x)){ -stop("The number of groups does not match the number of contrast coefficients.") -} -psihat<-matrix(0,ncol(con),5) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper", -"p.value")) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c("con.num","test","crit","se","df")) -df<-0 -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) -if(flag){ -if(alpha==.05)crit<-smmcrit(df,ncol(con)) -if(alpha==.01)crit<-smmcrit01(df,ncol(con)) -} -if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED) -test[d,3]<-crit -test[d,4]<-sejk -test[d,5]<-df -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) -} -} -if(pr){ -print("Note: confidence intervals are adjusted to control FWE") -print("But p-values are not adjusted to control FWE") -print('Adjusted p-values can be computed with the R function p.adjust') -} -list(n=sam,test=test,psihat=psihat) -} - -lincon.pool<-function(x,con=0,tr=.2,alpha=.05,POOL=FALSE){ -# -# Same as lincon but with a pooling option that is used when -# dealing with main effects in a two-way and three-way designs -# -# See, for example, the function twowayA.poolB -# - -if(tr==.5)stop('Use the R function medpb to compare medians') -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -if(sum(con^2)>0){ -if(POOL){ -ic=0 -y=list() -nc=ncol(con) -nc2=nc*2 -Ncon=matrix(0,nrow=nc2,ncol=nc) -for(k in 1:nc){ -id1=which(con[,k]==1) -id2=which(con[,k]==-1) -ic=ic+1 -print(ic) -Ncon[ic,k]=1 -y[[ic]]=pool.a.list(x[id1]) -ic=ic+1 -Ncon[ic,k]=-1 -y[[ic]]=pool.a.list(x[id2]) -} -res=lincon(y,con=Ncon,tr=tr) -print(Ncon) -}} -if(!POOL)res=lincon(x,con=con,tr=tr,alpha=alpha) -res -} - - - -poireg<-function(x,y,xout=FALSE,outfun=outpro,plotit=FALSE,xlab="X",ylab="Y", -varfun=var,YHAT=FALSE,STAND=TRUE,...){ -# -# Perform Poisson regression. -# The predictors are assumed to be stored in the n by p matrix x. -# The y values are typically count data (integers). -# -# xout=T will remove outliers from among the x values and then fit -# the regression line. -# Default: -# One predictor, a mad-median rule is used. -# With more than one, projection method is used. -# -# outfun=out will use MVE method -# -xy=elimna(cbind(x,y)) -x<-as.matrix(x) -x=xy[,1:ncol(x)] -y=xy[,ncol(xy)] -x<-as.matrix(x) -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -temp=glm(formula=y~x,family=poisson) -init=summary(temp) -yhat=temp$coef[1] -for(j in 1:ncol(x)){ -j1=j+1 -yhat=yhat+temp$coef[j1]*x[,j] -} -yhat=exp(yhat) -if(plotit){ -x=as.matrix(x) -if(ncol(x)>1)stop("Cannot plot with more than one predictor") -plot(x,y,xlab=xlab,ylab=ylab) -#points(x,yhat,pch=".") -xord=order(x) -lines(x[xord],yhat[xord]) -init$coef -} -ex=varfun(yhat)/varfun(y) -str=sqrt(ex) -hatv=NULL -if(YHAT)hatv=yhat -list(results=init,Explanatory.Power=ex,Strength.Assoc=str,yhat=hatv) -} - - -smcorcom<-function(x1,y1,x2,y2,nboot=200,pts=NA,plotit=TRUE, -SEED=TRUE,varfun=pbvar,xout=TRUE,outfun=out,...){ -# -# Compare strength of association of pairs of variables associated with -# two independent group. -# The strength of the association is based on Cleveland's LOWESS -# smoother coupled with a robust analog of explanatory power. -# -# The method generalizes the goal of compared the usual -# coefficient of determination associated with two independent groups. -# -# Assume data are in x1 y1 x2 and y2 -# -# Reject at the .05 level if the reported p-value is less than or -# equal to p.crit, which is returned by the function. -# -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -} -m<-elimna(cbind(x2,y2)) -x2<-m[,1] -y2<-m[,2] -if(xout){ -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -if(SEED)set.seed(2) -estmat1=NA -estmat2=NA -data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) -# -for(ib in 1:nboot){ -estmat1[ib]=lplot(x1[data1[ib,]],y1[data1[ib,]],plotit=FALSE, -varfun=varfun)$Explanatory.power -estmat2[ib]=lplot(x2[data2[ib,]],y2[data2[ib,]], -varfun=varfun,plotit=FALSE)$Explanatory.power -} -dif<-(estmat11){ -for(p in 1:ncol(x)){ -temp[p]<-tsp1reg(x[,p],y,OPT=OPT,tr=tr)$coef[2] -} -res<-y-x%*%temp -if(!HD)alpha<-median(res) -if(HD)alpha<-hd(res,tr=tr) -r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) -tempold<-temp -for(it in 1:iter){ -for(p in 1:ncol(x)){ -r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] -temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE,OPT=OPT,tr=tr)$coef[2] -} -if(!HD)alpha<-median(y-x%*%temp) -if(HD)alpha<-hd(y-x%*%temp,tr=tr) -tempold<-temp -} -coef<-c(alpha,temp) -res<-y-x%*%temp-alpha -} -yhat<-y-res -stre=e.pow=NULL -if(do.stre){ -temp=varfun(y) -if(temp==0){ -if(WARN)print("Warning: When computing strength of association, measure of variation=0") -} -e.pow=NULL -if(temp>0){ -e.pow<-varfun(yhat)/varfun(y) -if(!is.na(e.pow)){ -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -e.pow=as.numeric(e.pow) -stre=sqrt(e.pow) -}}} -if(plotit){ -if(ncol(x)==1){ -plot(x,y,xlab=xlab,ylab=ylab) -abline(coef) -}} -list(n=n,n.keep=n.keep, -coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow) -} - -lplotv2<-function(x,y,span=.75,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,plotit=TRUE, -expand=.5,low.span=2/3,varfun=pbvar,cor.op=FALSE,cor.fun=pbcor,ADJ=FALSE,nboot=20, -scale=TRUE,xlab="X",ylab="Y",zlab="",theta=50,phi=25,family="gaussian", -duplicate="error",pr=TRUE,SEED=TRUE,ticktype="simple"){ -# -# Plot regression surface using LOESS -# -# low.span is the span when lowess is used and there is one predictor -# span is the span when loess is used with two or more predictors -# pyhat=T will return Y hat values -# eout=T will eliminate outliers -# xout=T will eliminate points where X is an outliers -# family="gaussian"; see the description of the built-in function loess -# -# duplicate="error" -# In some situations where duplicate values occur, when plotting with -# two predictors, it is necessary to set duplicate="strip" -# -st.adj=NULL -e.adj=NULL -if(ADJ){ -if(SEED)set.seed(2) -} -si=1 -library(stats) -x<-as.matrix(x) -if(!is.matrix(x))stop("x is not a matrix") -d<-ncol(x) -if(d>=2){ -library(akima) -if(ncol(x)==2 && !scale){ -if(pr){ -print("scale=F is specified.") -print("If there is dependence, might use scale=T") -}} -m<-elimna(cbind(x,y)) -x<-m[,1:d] -y<-m[,d+1] -if(eout && xout)stop("Can't have both eout and xout = F") -if(eout){ -flag<-outfun(m,plotit=FALSE)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -} -x<-m[,1:d] -y<-m[,d+1] -if(d==2)fitr<-fitted(loess(y~x[,1]*x[,2],span=span,family=family)) -if(d==3)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3],span=span,family=family)) -if(d==4)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3]*x[,4],span=span,family=family)) -if(d>4)stop("Can have at most four predictors") -last<-fitr -if(d==2 && plotit){ -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fitr<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype) -}} -if(d==1){ -m<-elimna(cbind(x,y)) -x<-m[,1:d] -y<-m[,d+1] -if(eout && xout)stop("Can't have both eout and xout = F") -if(eout){ -flag<-outfun(m)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(x)$keep -m<-m[flag,] -} -x<-m[,1:d] -y<-m[,d+1] -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab) -lines(lowess(x,y,f=low.span)) -} -yyy<-lowess(x,y)$y -xxx<-lowess(x,y)$x -if(d==1){ -ordx=order(xxx) -yord=yyy[ordx] -flag=NA -for (i in 2:length(yyy))flag[i-1]=sign(yord[i]-yord[i-1]) -if(sum(flag)<0)si=-1 -} -last<-yyy -chkit<-sum(duplicated(x)) -if(chkit>0){ -last<-rep(1,length(y)) -for(j in 1:length(yyy)){ -for(i in 1:length(y)){ -if(x[i]==xxx[j])last[i]<-yyy[j] -}} -} -} -E.power<-1 -if(!cor.op)E.power<-varfun(last[!is.na(last)])/varfun(y) -if(cor.op || E.power>=1){ -if(d==1){ -xord<-order(x) -E.power<-cor.fun(last,y[xord])$cor^2 -} -if(d>1)E.power<-cor.fun(last,y)$cor^2 -} -if(ADJ){ -x=as.matrix(x) -val=NA -n=length(y) -data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(i in 1:nboot){ -temp=lplot.sub(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE) -val[i]=temp$Explanatory.power -} -vindt=median(val) -v2indt=median(sqrt(val)) -st.adj=(sqrt(E.power)-max(c(0,v2indt)))/(1-max(c(0,v2indt))) -e.adj=(E.power-max(c(0,vindt)))/(1-max(c(0,vindt))) -st.adj=max(c(0,st.adj)) -e.adj=max(c(0,e.adj)) -} -if(!pyhat)last <- NULL -list(Strength.Assoc=si*sqrt(E.power),Explanatory.power=E.power, -Strength.Adj=st.adj,Explanatory.Adj=e.adj,yhat.values=last) -} -yuendna<-function(x,y=NULL,tr=.2,alpha=.05){ -# -# Compare the trimmed means of two dependent random variables -# using the data in x and y. -# The default amount of trimming is 20% -# -# If y is not supplied, this function assumes x is a matrix with 2 columns. -# -# pairs of observations, for which one value is missing, are NOT deleted. -# Marginal trimmed means are compared -# using all available data. -# -if(is.null(y)){ -if(!is.matrix(x))stop("y is null and x is not a matrix") -y=x[,2] -x=x[,1] -} -if(length(x)!=length(y))stop("The number of observations must be equal") -m<-cbind(x,y) -# first eliminate any rows with both values missing. -flag=(apply(is.na(m),1,sum)==2) -m=m[!flag,] -x<-m[,1] -y<-m[,2] -flagx=is.na(y) # Indicates observed x values for which y is missing -flagy=is.na(x) # Indicates the y values for which x is missing -m<-elimna(m) # m has data where both values are available--no missing values -n=nrow(m) -n1=sum(flagx) # number of x values for which y is missing -n2=sum(flagy) -h=n-2*floor(tr*n) -h1=n1-2*floor(tr*n1) -h2=n2-2*floor(tr*n2) -xbarn=mean(x,tr=tr,na.rm=TRUE) -xbarn1=0 -if(h1>0)xbarn1=mean(x[flagx],tr=tr) -ybarn=mean(y[!flagy],tr=tr,na.rm=TRUE) -ybarn1=0 -if(h2>0)ybarn1=mean(y[flagy],tr=tr) -lam1=h/(h+h1) -lam2=h/(h+h2) -est=lam1*xbarn-lam2*ybarn+(1-lam1)*xbarn1-(1-lam2)*ybarn1 -sex=trimse(elimna(x),tr=tr) -sey=trimse(elimna(y),tr=tr) -q1<-(n-1)*winvar(m[,1],tr) -q2<-(n-1)*winvar(m[,2],tr) -q3<-(n-1)*wincor(m[,1],m[,2],tr)$cov -sen=sqrt((lam1^2*q1+lam2^2*q2-2*lam1*lam2*q3)/(h*(h-1))) -SE=sqrt(sen^2+(1-lam1)^2*sex^2+(1-lam2)^2*sey^2) -test=est/SE -list(estimate=est,test=test,se=SE) -} - -rm2miss<-function(x,y=NULL,tr=0,nboot=1000,alpha=.05,SEED=TRUE){ -# -# Compare the marginal trimmed means of two dependent groups -# using a bootstrap t method that allows missing values -# -# If y is not supplied, this function assumes x is a matrix with 2 columns. -# -# NOTE: This function can fail if there are too many missing values -# get the error: incorrect number of dimensions -# -# -if(SEED)set.seed(2) -if(is.null(y)){ -if(!is.matrix(x))stop("y is null and x is not a matrix") -} -if(!is.null(y))x=cbind(x,y) -if(ncol(x)!=2) -print("warning: x has more than one column; columns 1 and 2 are used") -n=nrow(x) -test=yuendna(x,tr=tr) -cen=x -cen[,1]=cen[,1]-mean(x[,1],na.rm=TRUE,tr=tr) -cen[,2]=cen[,2]-mean(x[,2],na.rm=TRUE,tr=tr) -data=matrix(sample(n,n*nboot,replace=TRUE),ncol=nboot) -tval=apply(data,2,FUN=rm2miss.sub,x=cen,tr=tr) -tval=sort(abs(tval)) -icrit<-floor((1-alpha)*nboot+.5) -ci=test$est-tval[icrit]*test$se -ci[2]=test$est+tval[icrit]*test$se -pv=mean(abs(test$test)<=abs(tval)) -list(est.dif=test$est,ci=ci,p.value=pv) -} -rm2miss.sub<-function(data,x,tr){ -n=nrow(x) -m=x[data,] -ans=yuendna(m,tr=tr)$test -ans -} -ydbt<-function(x,y,tr=.2,alpha=.05,nboot=599,side=TRUE,plotit=FALSE,op=1,SEED=TRUE){ -# -# Using the bootstrap-t method, -# compute a .95 confidence interval for the difference between -# the marginal trimmed means of paired data. -# By default, 20% trimming is used with B=599 bootstrap samples. -# -# side=F returns equal-tailed ci -# side=T returns symmetric ci. -# -side<-as.logical(side) -if(length(x)!=length(y))stop("Must have equal sample sizes.") -m<-cbind(x,y) -m<-elimna(m) -x<-m[,1] -y<-m[,2] -if(sum(c(!is.na(x),!is.na(y)))!=(length(x)+length(y)))stop("Missing values are not allowed.") -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -e1=mean(x,tr) -e2=mean(y,tr) -xcen<-x-mean(x,tr) -ycen<-y-mean(y,tr) -bvec<-apply(data,1,tsub,xcen,ycen,tr) -# bvec is a 1 by nboot matrix containing the bootstrap test statistics. -dotest=yuend(x,y,tr=tr) -estse<-dotest$se -p.value=NULL -dif<-mean(x,tr)-mean(y,tr) -if(!side){ -ilow<-round((alpha/2)*nboot) -ihi<-nboot-ilow -bsort<-sort(bvec) -ci<-0 -ci[1]<-dif-bsort[ihi]*estse -ci[2]<-dif-bsort[ilow+1]*estse -} -if(side){ -bsort<-sort(abs(bvec)) -ic<-round((1-alpha)*nboot) -ci<-0 -ci[1]<-dif-bsort[ic]*estse -ci[2]<-dif+bsort[ic]*estse -p.value<-(sum(abs(dotest$teststat)<=abs(bvec)))/nboot -} -if(plotit){ -if(op==1)akerd(bsort) -if(op==2)rdplot(bsort) -if(op==3)boxplot(bsort) -} -list(ci=ci,Est.1=e1,Est.2=e2,dif=dif,p.value=p.value) -} - - -rmrvar<-function(x,y=NA,alpha=.05,con=0,est=pbvar,plotit=FALSE,grp=NA, -hoch=TRUE,nboot=NA,xlab="Group 1",ylab="Group 2",pr=TRUE,SEED=TRUE,...){ -# -# Use a percentile bootstrap method to compare dependent groups. -# based on some robust measure of variation indicated by the argument -# est -# By default, est=pbvar, the percentage bend midvariance. -# -# The function computes a .95 confidence interval for all linear contrasts -# specified by con, a J by C matrix, where C is the number of -# contrasts to be tested, and the columns of con are the -# contrast coefficients. -# If con is not specified, all pairwise comparisons are done. -# -# nboot is the bootstrap sample size. If not specified, a value will -# be chosen depending on the number of contrasts there are. -# -# x can be an n by J matrix or it can have list mode -# for two groups, data for second group can be put in y -# otherwise, assume x is a matrix (n by J) or has list mode. -# -# Hochberg's sequentially rejective method is used to control alpha. -# -if(!is.na(y[1]))x=cbind(x,y) -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matl(x) -} -if(is.matrix(x) && is.matrix(con)){ -if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the -number of groups.") -mat<-x -} -if(is.matrix(x))mat<-x -if(!is.na(sum(grp)))mat<-mat[,grp] -mat<-elimna(mat) # Remove rows with missing values. -x<-mat -J<-ncol(mat) -Jm<-J-1 -if(sum(con^2)==0){ -d<-(J^2-J)/2 -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -d<-ncol(con) -if(is.na(nboot)){ -if(d<=4)nboot<-1000 -if(d>4)nboot<-5000 -} -n<-nrow(mat) -crit.vec<-alpha/c(1:d) -connum<-ncol(con) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -xbars<-apply(mat,2,est) -psidat<-NA -for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) -psihat<-matrix(0,connum,nboot) -bvec<-matrix(NA,ncol=J,nrow=nboot) -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec[ib,]<-apply(x[data[ib,],],2,est,...) -} -# -# Now have an nboot by J matrix of bootstrap values. -# -test<-1 -bias<-NA -for (ic in 1:connum){ -psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) -test[ic]<-sum((psihat[ic,]>0))/nboot -test[ic]<-min(test[ic],1-test[ic]) -} -test<-2*test -ncon<-ncol(con) -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -#if(hoch)dvec<-alpha/(2* c(1:ncon)) -#dvec<-2*dvec -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvecba<-dvec -dvec[1]<-alpha/2 -} -if(hoch)dvec<-alpha/(c(1:ncon)) -if(plotit && ncol(bvec)==2){ -z<-c(0,0) -one<-c(1,1) -plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") -points(bvec) -totv<-apply(x,2,est,...) -cmat<-var(bvec) -dis<-mahalanobis(bvec,totv,cmat) -temp.dis<-order(dis) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -abline(0,1) -} -temp2<-order(0-test) -ncon<-ncol(con) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","est.var","p.value","crit.p.value", -"ci.lower","ci.upper")) -tmeans<-apply(mat,2,est,...) -psi<-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -output[temp2,4]<-zvec -temp<-sort(psihat[ic,]) -icl<-round(output[ic,4]*nboot/2)+1 -icu<-nboot-(icl-1) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -bprm<-function(x,y=NULL,grp=NA){ -# -# Perform Brunner-Puri within groups rank-based ANOVA -# -# x can be a matrix with columns corresponding to groups -# or it can have list mode. -# -# For computational details, see Brunner, B., Domhof, S. and Langer, F. (2002, -# section 7.2.2, Nonparametric Analysis of Longitudinal Data in -# Factorial Designs) -# -if(is.list(x))x<-matl(x) -if(!is.null(y[1]))x=cbind(x,y) -x<-elimna(x) -if(is.na(grp[1]))grp <- c(1:ncol(x)) -if(!is.matrix(x))stop("Data are not stored in a matrix or in list mode.") -K<-length(grp) # The number of groups. -Jb<-matrix(1,K,K) -Ib<-diag(1,K) -Pb<-Ib-Jb/K -y<-matrix(rank(x),ncol=ncol(x)) #ranks of pooled data -ybar<-apply(y,2,mean) # average of ranks -N<-ncol(x)*nrow(x) -vhat<-var(y)/N^2 -test<-nrow(x)*sum((ybar-(N+1)/2)^2)/N^2 -trval<-sum(diag(Pb%*%vhat)) -test<-test/trval # See Brunner, Domhof and Langer, p. 98, eq. 7.12 -nu1<-trval^2/sum(diag(Pb%*%vhat%*%Pb%*%vhat)) -sig.level<-1-pf(test,nu1,1000000) -list(test.stat=test,nu1=nu1,p.value=sig.level) -} - - - -effectg.sub<-function(x,y,locfun=tmean,varfun=winvarN,...){ -# -# Compute a robust-heteroscedastic measure of effect size -# based on the measure of location indicated by the argument -# locfun, and the measure of scatter indicated by -# varfun. -# -# This subfunction is for the equal sample size case and is called by -# effectg when sample sizes are not equal. -# -# varfun defaults to winvarN, the Winsorized variance rescaled so that -# it estimates the population variance under normality. -# -library(MASS) -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -m1=locfun(x,...) -m2=locfun(y,...) -top=var(c(m1,m2)) -pts=c(x,y) -# -bot=varfun(pts,...) -# -e.pow=top/bot -list(Var.Explained=e.pow,Effect.Size=sqrt(e.pow)) -} - - -effectg<-function(x,y,locfun=tmean,varfun=winvarN,nboot=100,SEED=TRUE,...){ -# -# Compute a robust heteroscedastic measure of effect size -# (explanatory power) based on the measures of location and scale -# indicated by the arguments locfun and varfun, respectively -# -library(MASS) -if(SEED)set.seed(2) -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -n1=length(x) -n2=length(y) -if(n1==n2){ -temp=effectg.sub(x,y,locfun=locfun,varfun=varfun,...) -e.pow=temp$Var.Explained -} -if(n1!=n2){ -N=min(c(n1,n2)) -vals=0 -for(i in 1:nboot)vals[i]=effectg.sub(sample(x,N),sample(y,N), -locfun=locfun,varfun=varfun,...)$Var.Explained -e.pow=mean(vals) -} -list(Explanatory.power=e.pow,Effect.Size=sqrt(e.pow)) -} - - -winvarN<-function(x,tr=.2){ -# -# rescale the Winsorized variance so that it equals one for the standard -# normal distribution -# -x=elimna(x) -library(MASS) -cterm=NULL -if(tr==0)cterm=1 -if(tr==0.1)cterm=0.6786546 -if(tr==0.2)cterm=0.4120867 -if(is.null(cterm))cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr -bot=winvar(x,tr=tr)/cterm -bot -} -covloc<-function(x){ -# -# Return mean and covarinace matrix -# -loc=apply(x,2,mean) -mcov=cov(x) -list(center=loc,cov=mcov) -} -g2plotdifxy<-function(x,y,xlab="Difference",ylab=""){ -# -# Plot an estimate of the distribution of X-Y -# -x<-x[!is.na(x)] -y<-y[!is.na(y)] -m<-as.vector(outer(x,y,FUN="-")) -akerd(m,xlab=xlab,ylab=ylab) -} -sumplot2g<-function(x,y=NULL,xlab="X",ylab="",eblabx="Groups",eblaby="",nse=2){ -# -# create four plots useful when comparing two groups -# 1. error bars -# 2. boxplots -# 3. kernel density estimates -# 4 shift function -# -if(!is.null(y)){ -xy=list() -xy[[1]]=x -xy[[2]]=y -} -if(is.null(y)){ -if(is.matrix(x))xy=matl(x) -} -par(mfrow=c(2,2)) -par(oma=c(4,0,0,0)) -ebarplot(xy,xlab=eblabx,ylab=eblaby,nse=nse) -boxplot(xy) -g2plot(xy[[1]],xy[[2]]) -sband(xy[[1]],xy[[2]]) -par(mfrow=c(1,1)) -} - -yuenv2<-function(x,y=NULL,tr=.2,alpha=.05,plotit=FALSE,plotfun=splot,op=TRUE, VL=TRUE,cor.op=FALSE, loc.fun=median, -xlab="Groups",ylab="",PB=FALSE,nboot=100, SEED=TRUE){ -# -# Perform Yuen's test for trimmed means on the data in x and y. -# The default amount of trimming is 20% -# Missing values (values stored as NA) are automatically removed. -# -# A confidence interval for the trimmed mean of x minus the -# the trimmed mean of y is computed and returned in yuen$ci. -# The significance level is returned in yuen$p.value -# -# For an omnibus test with more than two independent groups, -# use t1way. -# -# Unlike the function yuen, a robust heteroscedastic measure -# of effect size is returned. -# PB=FALSE means that a Winsorized variation of prediction error is used to measure effect size. -# PB=TRUE: A percentage bend measure of variation is used instead. -# -if(tr==.5)stop("Use medpb to compare medians.") -if(tr>.5)stop("Can't have tr>.5") -if(is.null(y)){ -if(is.matrix(x) || is.data.frame(x)){ -y=x[,2] -x=x[,1] -} -if(is.list(x)){ -y=x[[2]] -x=x[[1]] -} -} -library(MASS) -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -n1=length(x) -n2=length(y) -h1<-length(x)-2*floor(tr*length(x)) -h2<-length(y)-2*floor(tr*length(y)) -q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) -q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) -df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) -crit<-qt(1-alpha/2,df) -m1=mean(x,tr) -m2=mean(y,tr) -mbar=(m1+m2)/2 -dif=m1-m2 -low<-dif-crit*sqrt(q1+q2) -up<-dif+crit*sqrt(q1+q2) -test<-abs(dif/sqrt(q1+q2)) -yuen<-2*(1-pt(test,df)) -xx=c(rep(1,length(x)),rep(2,length(y))) -if(h1==h2){ -pts=c(x,y) -top=var(c(m1,m2)) -# -if(!PB){ -if(tr==0)cterm=1 -if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr -bot=winvar(pts,tr=tr)/cterm -e.pow=top/bot -if(!is.na(e.pow)){ -if(e.pow>1){ -x0=c(rep(1,length(x)),rep(2,length(y))) -y0=c(x,y) -e.pow=wincor(x0,y0,tr=tr)$cor^2 -} -} -} -# -if(PB){ -bot=pbvar(pts) -e.pow=top/bot -} -# -} -if(n1!=n2){ -N=min(c(n1,n2)) -vals=0 -if(SEED)set.seed(2) -for(i in 1:nboot)vals[i]=yuen.effect(sample(x,N),sample(y,N),tr=tr)$Var.Explained -e.pow=loc.fun(vals) -} -if(plotit){ -plot(xx,pts,xlab=xlab,ylab=ylab) -if(op) -points(c(1,2),c(m1,m2)) -if(VL)lines(c(1,2),c(m1,m2)) -} -list(ci=c(low,up),n1=n1,n2=n2, -p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test, -crit=crit,df=df,Var.Explained=e.pow,Effect.Size=sqrt(e.pow)) -} - -yuen.effect.ci<-function(x,y,SEED=TRUE,nboot=400,tr=.2,alpha=.05){ -# -# Compute a 1-alpha confidence interval -# for a robust, heteroscedastic measure of effect size -# The absolute value of the measure of effect size is used. -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -x=elimna(x) -y=elimna(y) -bvec=0 -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot) -for(i in 1:nboot){ -bvec[i]=yuenv2(datax[i,],datay[i,],tr=tr,SEED=FALSE)$Effect.Size -} -bvec<-sort(abs(bvec)) -crit<-alpha/2 -icl<-round(crit*nboot)+1 -icu<-nboot-icl -ci<-NA -ci[1]<-bvec[icl] -pchk=yuen(x,y,tr=tr)$p.value -if(pchk>alpha)ci[1]=0 -ci[2]<-bvec[icu] -if(ci[1]<0)ci[1]=0 -es=abs(yuenv2(x,y,tr=tr)$Effect.Size) -list(CI=ci,Effect.Size=es) -} - -interplot<-function(J,K,x,locfun=mean,locvec=NULL,na.rm=TRUE, -g1lev=NULL,g2lev=NULL,type = c("l", - "p", "b"), xlab = "Fac 1", ylab = "means",trace.label="Fac 2",...){ -if(is.null(locvec))locvec=lloc(x,est=locfun,na.rm=na.rm) -if(is.list(locvec))locvec=as.vector(matl(locvec)) -if(is.null(g1lev[1])){ -g1=c(rep(1,K)) -for(j in 2:J)g1=c(g1,rep(j,K)) -} -if(!is.null(g1lev)){ -g1=c(rep(g1lev[1],K)) -for(j in 2:J)g1=c(g1,rep(g1lev[j],K)) -} -g1=as.factor(g1) -if(is.null(g2lev[1]))g2=as.factor(rep(c(1:K),J)) -if(!is.null(g2lev[1]))g2=as.factor(rep(g2lev,J)) -g2=as.factor(g2) -interaction.plot(g1,g2,locvec, xlab = xlab, ylab = ylab, -trace.label=trace.label) -} - - - -pbad2way<-function(J,K,x,est=tmean,conall=TRUE,alpha=.05,nboot=2000,grp=NA, -op=FALSE,pro.dis=TRUE,MM=FALSE,pr=TRUE,...){ -# -# This function is like the function pbadepth, -# only it is assumed that main effects and interactions for a -# two-way design are to be tested. -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # - if(pr){ - print('As of June, 2022, the default measure of location is tmean, a 20% trimmed mean') - print('The default for measuring depth is a projection method rather than Mahalanobis distance') - } - JK <- J * K - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp[1])) { - yy <- x - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") - for(j in 1:JK) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] - } - # - # Create the three contrast matrices - # - if(!conall){ - ij <- matrix(c(rep(1, J)), 1, J) - ik <- matrix(c(rep(1, K)), 1, K) - jm1 <- J - 1 - cj <- diag(1, jm1, J) - for(i in 1:jm1) - cj[i, i + 1] <- 0 - 1 - km1 <- K - 1 - ck <- diag(1, km1, K) - for(i in 1:km1) - ck[i, i + 1] <- 0 - 1 - conA <- t(kron(cj, ik)) - conB <- t(kron(ij, ck)) - conAB <- t(kron(cj, ck)) - conAB <- t(kron(abs(cj), ck)) -} -if(conall){ -temp<-con2way(J,K) -conA<-temp$conA -conB<-temp$conB -conAB<-temp$conAB -} - ncon <- max(nrow(conA), nrow(conB), nrow(conAB)) - if(JK != length(x)) - warning("The number of groups does not match the number of contrast coefficients.") -if(!is.na(grp[1])){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -mvec<-NA -for(j in 1:JK){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -bvec<-matrix(NA,nrow=JK,ncol=nboot) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -for(j in 1:JK){ -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # J by nboot matrix, jth row contains -# bootstrapped estimates for jth group -} -bconA<-t(conA)%*%bvec #C by nboot matrix -tvecA<-t(conA)%*%mvec -tvecA<-tvecA[,1] -tempcenA<-apply(bconA,1,mean) -veczA<-rep(0,ncol(conA)) -bconA<-t(bconA) -smatA<-var(bconA-tempcenA+tvecA) -bconA<-rbind(bconA,veczA) -if(!pro.dis){ -if(!op)dv<-mahalanobis(bconA,tvecA,smatA) -if(op){ -dv<-out(bconA)$dis -}} -if(pro.dis)dv=pdis(bconA,MM=MM) -bplus<-nboot+1 -sig.levelA<-1-sum(dv[bplus]>=dv[1:nboot])/nboot -bconB<-t(conB)%*%bvec #C by nboot matrix -tvecB<-t(conB)%*%mvec -tvecB<-tvecB[,1] -tempcenB<-apply(bconB,1,mean) -veczB<-rep(0,ncol(conB)) -bconB<-t(bconB) -smatB<-var(bconB-tempcenB+tvecB) -bconB<-rbind(bconB,veczB) -if(!pro.dis){ -if(!op)dv<-mahalanobis(bconB,tvecB,smatB) -if(op){ -dv<-out(bconA)$dis -}} -if(pro.dis)dv=pdis(bconB,MM=MM) -sig.levelB<-1-sum(dv[bplus]>=dv[1:nboot])/nboot -bconAB<-t(conAB)%*%bvec #C by nboot matrix -tvecAB<-t(conAB)%*%mvec -tvecAB<-tvecAB[,1] -tempcenAB<-apply(bconAB,1,mean) -veczAB<-rep(0,ncol(conAB)) -bconAB<-t(bconAB) -smatAB<-var(bconAB-tempcenAB+tvecAB) -bconAB<-rbind(bconAB,veczAB) -if(!pro.dis){ -if(!op)dv<-mahalanobis(bconAB,tvecAB,smatAB) -if(op){ -dv<-out(bconAB)$dis -}} -if(pro.dis)dv=pdis(bconAB,MM=MM) -sig.levelAB<-1-sum(dv[bplus]>=dv[1:nboot])/nboot -list(sig.levelA=sig.levelA,sig.levelB=sig.levelB,sig.levelAB=sig.levelAB,conA=conA,conB=conB,conAB=conAB) - -} - - - - -t2way.no.p<-function(J,K,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K){ -# Perform a J by K (two-way) anova on trimmed means where -# all jk groups are independent. -# -# The R variable x is assumed to contain the raw -# data stored in list mode. -# If grp is unspecified, it is assumed x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second factor: level 1,2 -# x[[j+1]] is the data for level 2,1, etc. -# If the data are in wrong order, grp can be used to rearrange the -# groups. For example, for a two by two design, grp<-c(2,4,3,1) -# indicates that the second group corresponds to level 1,1; -# group 4 corresponds to level 1,2; group 3 is level 2,1; -# and group 1 is level 2,2. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that the input variable x has length JK, the total number of -# groups being tested. If not, a warning message is printed. -# -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data are not stored in a matrix or in list mode") -if(p!=length(x)){ -print("Warning: The number of groups in your data is not equal to JK") -} -for(j in 1:p)x[[j]]<-elimna(x[[j]]) -xbar<-0 -h<-0 -d<-0 -R<-0 -W<-0 -d<-0 -r<-0 -w<-0 -nuhat<-0 -omegahat<-0 -DROW<-0 -DCOL<-0 -xtil<-matrix(0,J,K) -aval<-matrix(0,J,K) -for (j in 1:p){ -xbar[j]<-mean(x[[grp[j]]],tr) -h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) -d[j]<-(length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr)/(h[j]*(h[j]-1)) -} -d<-matrix(d,J,K,byrow=TRUE) -xbar<-matrix(xbar,J,K,byrow=TRUE) -h<-matrix(h,J,K,byrow=TRUE) -for(j in 1:J){ -R[j]<-sum(xbar[j,]) -nuhat[j]<-(sum(d[j,]))^2/sum(d[j,]^2/(h[j,]-1)) -r[j]<-1/sum(d[j,]) -DROW[j]<-sum(1/d[j,]) -} -for(k in 1:K){ -W[k]<-sum(xbar[,k]) -omegahat[k]<-(sum(d[,k]))^2/sum(d[,k]^2/(h[,k]-1)) -w[k]<-1/sum(d[,k]) -DCOL[k]<-sum(1/d[,k]) -} -D<-1/d -for(j in 1:J){ -for(k in 1:K){ -xtil[j,k]<-sum(D[,k]*xbar[,k]/DCOL[k])+sum(D[j,]*xbar[j,]/DROW[j])- -sum(D*xbar/sum(D)) -aval[j,k]<-(1-D[j,k]*(1/sum(D[j,])+1/sum(D[,k])-1/sum(D)))^2/(h[j,k]-3) -} -} -Rhat<-sum(r*R)/sum(r) -What<-sum(w*W)/sum(w) -Ba<-sum((1-r/sum(r))^2/nuhat) -Bb<-sum((1-w/sum(w))^2/omegahat) -Va<-sum(r*(R-Rhat)^2)/((J-1)*(1+2*(J-2)*Ba/(J^2-1))) -Vb<-sum(w*(W-What)^2)/((K-1)*(1+2*(K-2)*Bb/(K^2-1))) -nu2<-(J^2-1)/(3*Ba) -sig.A<-1-pf(Va,J-1,nu2) -nu2<-(K^2-1)/(3*Bb) -sig.B<-1-pf(Vb,K-1,nu2) -# Next, do test for interactions -Vab<-sum(D*(xbar-xtil)^2) -dfinter<-(J-1)*(K-1) -crit<-qchisq(1-alpha,dfinter) -hc<-(crit/(2*dfinter))*(1+(3*crit)/(dfinter+2))*sum(aval) -adcrit<-crit+hc -list(Qa=Va,sig.A=sig.A,Qb=Vb,sig.B=sig.B,Qab=Vab,critinter=adcrit) -} - - -t2waybt<-function(J,K,x,tr=.2,grp=c(1:p),p=J*K,nboot=599,SEED=TRUE){ -# -# Two-way ANOVA based on trimmed means and a bootstrap-t method -# -# The data are assumed to be stored as described in the function t2way -# -# The default number of bootstrap samples is nboot=599 -# -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# compute test statistics: -tests=t2way.no.p(J=J,K=K,x,tr=tr,grp=grp) -TA=NULL -TB=NULL -TAB=NULL -data=list() -xcen=list() -for(j in 1:length(x))xcen[[j]]<-x[[j]]-mean(x[[j]],tr) -print("Taking bootstrap samples. Please wait.") -for(b in 1:nboot){ -for(j in 1:length(x))data[[j]]<-sample(xcen[[j]],size=length(x[[j]]),replace=TRUE) -bt=t2way.no.p(J,K,data,tr=tr,grp=grp) -TA[b]=bt$Qa -TB[b]=bt$Qb -TAB[b]=bt$Qab -} -pA<-sum(tests$Qa<=TA)/nboot -pB<-sum(tests$Qb<=TB)/nboot -pAB<-sum(tests$Qab<=TAB)/nboot -list(A.p.value=pA,B.p.value=pB,AB.p.value=pAB) -} - - -t3way<-function(J,K,L,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,MAT=FALSE, -lev.col=c(1:3),var.col=4,pr=TRUE,IV1=NULL,IV2=NULL,IV3=NULL){ -# Perform a J by K by L (three-way) anova on trimmed means where -# all JKL groups are independent. -# -# The R variable data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of all three factors: level 1,1,1. -# data][2]] is assumed to contain the data for level 1 of the -# first two factors and level 2 of the third factor: level 1,1,2 -# data[[L]] is the data for level 1,1,L -# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. -# data[[KL+1]] is level 2,1,1, etc. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JKL, the total number of -# groups being tested. -# -# MAT=T, assumes data are stored in matrix with 3 columns indicating -# levels of the three factors. -# That is, this function calls selby2 for you. -# -if(is.data.frame(x))x=as.matrix(x) -if(!is.null(IV1[1])){ -if(is.null(IV2[1]))stop("IV2 is NULL") -if(is.null(IV3[1]))stop("IV3 is NULL") -if(pr)print("Assuming x is a vector containing all of the data; the dependent variable") -xi=elimna(cbind(x,IV1,IV2,IV3)) -x=fac2list(xi[,1],xi[,2:4]) -J=length(unique(IV1)) -K=length(unique(IV2)) -L=length(unique(IV3)) -p=J*K*L -} -data=x -if(MAT){ -if(!is.matrix(data))stop("With MAT=T, data must be a matrix") -if(length(lev.col)!=3)stop("Argument lev.col should have 3 values") -temp=selby2(data,lev.col,var.col) -lev1=length(unique(temp$grpn[,1])) -lev2=length(unique(temp$grpn[,2])) -lev3=length(unique(temp$grpn[,3])) -gv=apply(temp$grpn,2,rank) -gvad=100*gv[,1]+10*gv[,2]+gv[,3] -grp=rank(gvad) -if(pr){ -print(paste("Factor 1 has", lev1, "levels")) -print(paste("Factor 2 has", lev2, "levels")) -print(paste("Factor 3 has", lev3, "levels")) -} -if(J!=lev1)warning("J is being reset to the number of levels found") -if(K!=lev2)warning("K is being reset to the number of levels found") -if(L!=lev3)warning("K is being reset to the number of levels found") -J=lev1 -K=lev2 -L=lev3 -data=temp$x -} -if(is.matrix(data))data=listm(data) -if(!is.list(data))stop("Data are not stored in list mode") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in data is") -print(length(data)) -print("Warning: These two values are not equal") -} -tmeans<-0 -h<-0 -v<-0 -for (i in 1:p){ -tmeans[i]<-mean(data[[grp[i]]],tr) -h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) -# h is the effective sample size -v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1)) -# v contains the squared standard errors -} -v<-diag(v,p,p) # Put squared standard errors in a diag matrix. -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -il<-matrix(c(rep(1,L)),1,L) -jm1<-J-1 -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -lm1<-L-1 -cl<-diag(1,lm1,L) -for (i in 1:lm1)cl[i,i+1]<-0-1 -alval<-c(1:999)/1000 -# Do test for factor A -cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A -Qa<-johan(cmat,tmeans,v,h,alpha) -A.p.value=t3pval(cmat,tmeans,v,h) -# Do test for factor B -cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B -Qb<-johan(cmat,tmeans,v,h,alpha) -B.p.value=t3pval(cmat,tmeans,v,h) -# Do test for factor C -cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C -#Qc<-johan(cmat,tmeans,v,h,alpha) -for(i in 1:999){ -irem<-i -Qc<-johan(cmat,tmeans,v,h,alval[i]) -if(Qc$teststat>Qc$crit)break -} -C.p.value=irem/1000 -# Do test for factor A by B interaction -cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B -for(i in 1:999){ -irem<-i -Qab<-johan(cmat,tmeans,v,h,alval[i]) -if(Qab$teststat>Qab$crit)break -} -AB.p.value=irem/1000 -# Do test for factor A by C interaction -cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C -for(i in 1:999){ -irem<-i -Qac<-johan(cmat,tmeans,v,h,alval[i]) -if(Qac$teststat>Qac$crit)break -} -AC.p.value=irem/1000 -#Qac<-johan(cmat,tmeans,v,h,alpha) -# Do test for factor B by C interaction -cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C -#Qbc<-johan(cmat,tmeans,v,h,alpha) -for(i in 1:999){ -irem<-i -Qbc<-johan(cmat,tmeans,v,h,alval[i]) -if(Qbc$teststat>Qbc$crit)break -} -BC.p.value=irem/1000 -# Do test for factor A by B by C interaction -cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C -#Qabc<-johan(cmat,tmeans,v,h,alpha) -for(i in 1:999){ -irem<-i -Qabc<-johan(cmat,tmeans,v,h,alval[i]) -if(Qabc$teststat>Qabc$crit)break -} -ABC.p.value=irem/1000 -list(Qa=Qa$teststat,Qa.crit=Qa$crit,A.p.value=A.p.value, -Qb=Qb$teststat,Qb.crit=Qb$crit, -B.p.value=B.p.value, -Qc=Qc$teststat,Qc.crit=Qc$crit,C.p.value=C.p.value, -Qab=Qab$teststat,Qab.crit=Qab$crit, -AB.p.value=AB.p.value, -Qac=Qac$teststat,Qac.crit=Qac$crit,AC.p.value=AC.p.value, -Qbc=Qbc$teststat,Qbc.crit=Qbc$crit, -BC.p.value=BC.p.value, -Qabc=Qabc$teststat,Qabc.crit=Qabc$crit,ABC.p.value=ABC.p.value) -} - -regciMC<- -function(x,y,regfun=tsreg,nboot=599,alpha=.05,plotit=FALSE,pr=FALSE, -null.val=NULL,method='hoch', -xlab='Predictor 1',ylab='Predictor 2',xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Compute a .95 confidence interval for each of the parameters of -# a linear regression equation. The default regression method is -# Theil-Sen estimator. -# -# When using the least squares estimator, and when n<250, use -# lsfitci instead. -# -# Same as the function regci, only a multi-core processor is used. -# -# The predictor values are assumed to be in the n by p matrix x. -# The default number of bootstrap samples is nboot=599 -# -# regfun can be any R function that returns the coefficients in -# the vector regfun$coef, the first element of which contains the -# estimated intercept, the second element contains the estimated of -# the first predictor, etc. -# -# plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based -# on the bootstrap samples. -# -library(parallel) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -nrem=length(y) -if(xout){ -if(pr)print('Default for argument outfun is now outpro') -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -estit=regfun(x,y,...)$coef -if(is.null(null.val))null.val=rep(0,p1) -flagF=FALSE -flagF=identical(regfun,tsreg) -if(flagF){ -if(pr){ -if(sum(duplicated(y)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') -}} -x=as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -bvec<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE,xout=FALSE,...) -bvec=matl(bvec) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -p1<-ncol(x)+1 -regci<-matrix(0,p1,6) -vlabs='Intercept' -for(j in 2:p1)vlabs[j]=paste('Slope',j-1) -dimnames(regci)<-list(vlabs,c('ci.low','ci.up','Estimate','S.E.','p-value','p.adj')) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -se<-NA -sig.level<-NA -for(i in 1:p1){ -#temp=(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot -temp=(sum(bvec[i,]1){ -if(is.na(center[1])){ -if(cop==1)center<-dmean(m,tr=.5,dop=dop) -if(cop==2)center<-cov.mcd(m,print=FALSE)$center -if(cop==3)center<-apply(m,2,median) -if(cop==4)center<-cov.mve(m,print=FALSE)$center -if(cop==5)center<-smean(m) -} -cenmat=matrix(rep(center,nrow(m)),ncol=ncol(m),byrow=TRUE) -Amat=m-cenmat -B=listm(t(Amat)) # so rows are now in B[[1]]...B[[n]] -dis=mclapply(B,outproMC.sub,Amat,mc.preschedule=TRUE) -if(!MM){ -dmat<-mclapply(dis,IQRstand,mc.preschedule=TRUE) -} -if(MM)dmat<-mclapply(dis,MADstand,mc.preschedule=TRUE) -pdis<-apply(matl(dmat),1,max,na.rm=TRUE) -} -pdis -} -IQRstand<-function(x){ -vals=idealf(x) -res=x/(vals$qu-vals$ql) -res -} -MADstand<-function(x){ -val=x/mad(x) -val -} -regtestMC<-function(x,y,regfun=tsreg,nboot=600,alpha=.05,plotit=TRUE, -grp=c(1:ncol(x)),nullvec=c(rep(0,length(grp))),xout=FALSE,outfun=outpro,SEED=TRUE,pr=TRUE,...){ -# -# Test the hypothesis that q of the p predictors are equal to -# some specified constants. By default, the hypothesis is that all -# p predictors have a coefficient equal to zero. -# The method is based on a confidence ellipsoid. -# The critical value is determined with the percentile bootstrap method -# in conjunction with Mahalanobis distance. -# -library(parallel) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -if(pr)print("Default for outfun is now outpro") -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x<-as.matrix(x) -if(length(grp)!=length(nullvec))stop("The arguments grp and nullvec must have the same length.") -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -# bvec<-apply(data,1,regboot,x,y,regfun) # A p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -bvec=mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE) # list mode bvec[[1]] -# contains estimate from first bootstrap sample, etc. -bvec=matl(bvec) -grp<-grp+1 -est<-regfun(x,y)$coef -estsub<-est[grp] -bsub<-t(bvec[grp,]) -if(length(grp)==1){ -m1<-sum((bvec[grp,]-est)^2)/(length(y)-1) -dis<-(bsub-estsub)^2/m1 -} -if(length(grp)>1){ -mvec<-apply(bsub,2,FUN=mean) -m1<-var(t(t(bsub)-mvec+estsub)) -dis<-mahalanobis(bsub,estsub,m1) -} -dis2<-order(dis) -dis<-sort(dis) -critn<-floor((1-alpha)*nboot) -crit<-dis[critn] -test<-mahalanobis(t(estsub),nullvec,m1) -sig.level<-1-sum(test>dis)/nboot -if(length(grp)==2 && plotit){ -plot(bsub,xlab="Parameter 1",ylab="Parameter 2") -points(nullvec[1],nullvec[2],pch=0) -xx<-bsub[dis2[1:critn],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -list(test=test,crit=crit,p.value=sig.level,nullvec=nullvec,est=estsub) -} - -pbadepth<-function(x,est=onestep,con=0,alpha=.05,nboot=2000,grp=NA,op=3,allp=TRUE, -MM=FALSE,MC=FALSE,cop=3,SEED=TRUE,na.rm=FALSE,...){ -# -# Test the hypothesis that C linear contrasts all have a value of zero. -# By default, an M-estimator is used -# -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode or in a matrix. -# If stored in list mode, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# If stored in a matrix, columns correspond to groups. -# -# By default, all pairwise differences are used, but contrasts -# can be specified with the argument con. -# The columns of con indicate the contrast coefficients. -# Con should have J rows, J=number of groups. -# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first -# two measures of location is -# equal to the sum of the second two, and (2) the difference between -# the first two is equal to the difference between the -# measures of location for groups 5 and 6. -# -# The default number of bootstrap samples is nboot=2000 -# -# op controls how depth is measured -# op=1, Mahalanobis -# op=2, Mahalanobis based on MCD covariance matrix -# op=3, Projection distance -# -# MC=TRUE, use a multicore processor when op=3 -# -# for arguments MM and cop, see pdis. -# -con<-as.matrix(con) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(grp)){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -mvec<-NA -nvec=NA -for(j in 1:J){ -temp<-x[[j]] -if(na.rm)temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -mvec[j]<-est(temp,...) -nvec[j]=length(temp) -} -Jm<-J-1 -d<-ifelse(con==0,(J^2-J)/2,ncol(con)) -if(sum(con^2)==0){ -if(allp){ -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -if(!allp){ -con<-matrix(0,J,Jm) -for (j in 1:Jm){ -jp<-j+1 -con[j,j]<-1 -con[jp,j]<-0-1 -}}} -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -#print(paste("Working on group ",j)) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,na.rm=na.rm,...) # J by nboot matrix, jth row contains -# bootstrapped estimates for jth group -} -chkna=sum(is.na(bvec)) -if(chkna>0){ -print("Bootstrap estimates of location could not be computed") -print("This can occur when using an M-estimator") -print("Might try est=tmean") -} -bcon<-t(con)%*%bvec #C by nboot matrix -tvec<-t(con)%*%mvec -tvec<-tvec[,1] -tempcen<-apply(bcon,1,mean) -vecz<-rep(0,ncol(con)) -bcon<-t(bcon) -smat<-var(bcon-tempcen+tvec) -temp<-bcon-tempcen+tvec -bcon<-rbind(bcon,vecz) -if(op==1)dv<-mahalanobis(bcon,tvec,smat) -if(op==2){ -smat<-cov.mcd(temp)$cov -dv<-mahalanobis(bcon,tvec,smat) -} -if(op==3){ -#print("Computing p-value. Might take a while with op=3") -if(!MC)dv<-pdis(bcon,MM=MM,cop=cop) -if(MC)dv<-pdisMC(bcon,MM=MM,cop=cop) -} -bplus<-nboot+1 -sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot -list(p.value=sig.level,psihat=tvec,con=con,n=nvec) -} - -outproMC.sub<-function(B,Amat){ -dis<-NA -bot<-sum(B^2) -Bmat=matrix(rep(B,nrow(Amat)),ncol=ncol(Amat),byrow=TRUE) -temp<-apply(Bmat*Amat,1,sum) -temp=matrix(rep(temp,ncol(Amat)),ncol=ncol(Amat)) -temp=temp*Bmat/bot -temp=temp^2 -dis=apply(temp,1,sum) -dis<-sqrt(dis) -flag=(dis==Inf) -dis[flag]=NA -dis -} -outproMC.sub2<-function(dis,MM,gval){ -temp<-idealf(dis) -if(!MM)cu<-median(dis)+gval*(temp$qu-temp$ql) -if(MM)cu<-median(dis)+gval*mad(dis) -outid<-NA -temp2<-(dis> cu) -flag<-rep(0,length(dis)) -flag[temp2]<-1 -flag -} -bdm2way<-function(J,K,x,grp=c(1:p),p=J*K){ -# -# Perform the Brunner, Dette, Munk rank-based ANOVA -# (JASA, 1997, 92, 1494--1502) -# for a J by K independent groups design. -# -# x can be a matrix with columns corresponding to groups -# or it can have list mode. -# -if(is.matrix(x))x<-listm(x) -xx<-list() -for(j in 1:p)xx[[j]]<-x[[grp[j]]] -Ja<-matrix(1,J,J) -Ia<-diag(1,J) -Pa<-Ia-Ja/J -Jb<-matrix(1,K,K) -Ib<-diag(1,K) -Pb<-Ib-Jb/K -cona<-kron(Pa,Jb/K) -conb<-kron(Ja/J,Pb) -conab<-kron(Pa,Pb) -outA<-bdms1(xx,cona) -releff=matrix(outA$q.hat,nrow=J,ncol=K,byrow=TRUE) -outB<-bdms1(xx,conb) -outAB<-bdms1(xx,conab) -# Could report degrees of freedom, but they are meaningless in terms of understanding the data. -list(p.valueA=outA$p.value,p.valueB=outB$p.value, p.valueAB=outAB$p.value, -Relative.Effects=releff,A.F=outA$F,B.F=outB$F,AB.F=outAB$F) -} -mregdepth<-function(X,RES){ -X=as.matrix(X) -XRES=elimna(cbind(X,RES)) -p=ncol(X) -p1=p+1 -vals=NA -for(j in 1:p)vals[j]=resdepth(XRES[,j],XRES[,p1]) -mdepthappr=min(vals) -mdepthappr -} - - -lband<-function(x,y=NULL,alpha=.05,plotit=TRUE,sm=TRUE,op=1,ylab='delta',CI=TRUE, -xlab='x (first group)'){ -# -# Compute a confidence band for the shift function. -# Assuming two dependent groups are being compared -# -# See Lombard (2005, Technometrics, 47, 364-369) -# -# if y=NA, assume x is a matrix with two columns or it has list mode -# -# If plotit=TRUE, a plot of the shift function is created, assuming that -# the graphics window has already been activated. -# -# sm=T, plot of shift function is smoothed using: -# expected frequency curve if op!=1 -# otherwise use S+ function lowess is used. -# -# This function removes all missing observations. -# -# When plotting, the median of x is marked with a + and the two -# quartiles are marked with o. -# -if(!is.null(y[1]))x<-cbind(x,y) -if(is.list(x))x=matl(x) -if(ncol(x)!=2)stop('Should have two groups only') -m<-elimna(x) -y<-m[,2] -x<-m[,1] -n<-length(x) -crit<-nelderv2(m,1,lband.fun2,alpha=alpha) -plotit<-as.logical(plotit) -xsort<-sort(x) -ysort<-sort(y) -l<-0 -u<-0 -ysort[0]<-NA -ysort[n+1]<-NA -lsub<-c(1:n)-floor(sqrt(2*n)*crit) -usub<-c(1:n)+floor(sqrt(2*n)*crit) -for(ivec in 1:n){ -isub<-max(0,lsub[ivec]) -l[ivec]<-NA -if(isub>0)l[ivec]<-ysort[isub]-xsort[ivec] -isub<-min(n+1,usub[ivec]) -u[ivec]<-NA -if(isub <= n)u[ivec]<-ysort[isub]-xsort[ivec] -} -num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)]) -qhat<-c(1:n)/n -m<-cbind(qhat,l,u) -dimnames(m)<-list(NULL,c('qhat','lower','upper')) -if(plotit){ -xsort<-sort(x) -ysort<-sort(y) -del<-0 -for (i in 1:n)del[i]<-ysort[i]-xsort[i] -xaxis<-c(xsort,xsort) -yaxis<-c(m[,1],m[,2]) -allx<-c(xsort,xsort,xsort) -ally<-c(del,m[,2],m[,3]) -temp2<-m[,2] -temp2<-temp2[!is.na(temp2)] -plot(allx,ally,type='n',ylab=ylab,xlab=xlab) -ik<-rep(F,length(xsort)) -if(sm){ -if(op==1){ -ik<-duplicated(xsort) -del<-lowess(xsort,del)$y -} -if(op!=1)del<-runmean(xsort,del,pyhat=TRUE) -} -lines(xsort[!ik],del[!ik]) -lines(xsort,m[,2],lty=2) -lines(xsort,m[,3],lty=2) -temp<-summary(x) -text(temp[3],min(temp2),'+') -text(temp[2],min(temp2),'o') -text(temp[5],min(temp2),'o') -} -id.sig.greater=NULL -id.sig.less.than=NULL -num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)]) -id.sig.greater=which(l>0) -id.sig.less.than=which(u<0) - -flag=is.na(m[,2]) -m[flag,2]=-Inf -flag=is.na(m[,3]) -m[flag,3]=Inf -q.greater=NULL -if(length(id.sig.greater)>0)q.greater=m[id.sig.greater,1] -q.less=NULL -if(length(id.sig.less.than)>0)q.less=m[id.sig.less.than,1] -if(!CI)m=NULL -list(m=m,crit=crit,numsig=num,q.sig.greater=q.greater,q.sig.less=q.less) -} - -cov.ogk<-function(x,y=NA,n.iter=1,sigmamu=taulc,v=gkcov,beta=.9,...){ -# -# Compute robust (weighted) covariance matrix in Maronna and Zamar -# (2002, Technometrics, eq. 7). -# -# n.iter number of iterations. 1 seems to be best -# sigmamu computes a robust measure of location and scale for -# data stored in a single vector. -# v robust correlation coefficient -# estloc, a robust measure of location -# -if(!is.na(y[1]))x<-cbind(x,y) -if(!is.matrix(x))stop("x should be a matrix") -x<-elimna(x) -n<-nrow(x) -p<-ncol(x) -val<-matrix(NA,p,p) -temp<-ogk(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...)$cov -temp -} -pbmcp<-function(x,alpha=.05,nboot=NA,grp=NA,est=onestep,con=0,bhop=FALSE, -SEED=TRUE,...){ -# -# Multiple comparisons for J independent groups. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to an M-estimator -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are allowed. -# -okay=FALSE -if(identical(est,onestep))okay=TRUE -if(identical(est,mom))okay=TRUE -if(!okay)stop('For estimators other than onestep and mom, use linconpb') -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -nmax=max(tempn) -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -if(nrow(con)!=J){ -stop("Something is wrong with con; the number of rows does not match the number of groups.") -} -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(!bhop){ -if(!identical(est,onestep))print('When est is not equal to onestep, suggest using bhop=TRUE') -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(nmax>=100)dvec[1]=.01 -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvec[1]<-alpha/2 -} -dvec<-2*dvec -} -if(nmax>80){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -#paste("Working on group ",j) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group -} -chkna=sum(is.na(bvec)) -if(chkna>0){ -print("Bootstrap estimates of location could not be computed") -print("This can occur when using an M-estimator") -print("Might try est=tmean") -} -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -test[d]<-(sum(bcon[d,]>0)+.5*sum(bcon[d,]==0))/nboot -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit", -"ci.lower","ci.upper")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-tvec[ic,] -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -pbmcpSR=pbmcp - -bmpmul<-function(x,alpha=.05){ -# -# Perform Brunner-Munzel method for all pairs of J independent groups. -# -# The familywise type I error probability is controlled by using -# a critical value from the Studentized maximum modulus distribution. -# -# The data are assumed to be stored in $x$ in list mode -# or in a matrix having J columns. -# -# Missing values are automatically removed. -# -# The default value for alpha is .05. Any other value results in using -# alpha=.01. -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -J<-length(x) -CC<-(J^2-J)/2 -test<-matrix(NA,CC,7) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -} -dimnames(test)<-list(NULL,c("Group","Group","P.hat","ci.lower","ci.upper","df","p.value")) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -temp<-bmp(x[[j]],x[[k]],alpha) -crit<-0-smmcrit(temp$df,CC) -if(alpha!=.05)crit<-0-smmcrit01(temp$df,CC) -temp<-bmp(x[[j]],x[[k]],crit=crit) -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-k -test[jcom,3]<-temp$phat -test[jcom,4]<-temp$ci.p[1] -test[jcom,5]<-temp$ci.p[2] -test[jcom,6]<-temp$df -test[jcom,7]<-temp$p.value -}}} -list(test=test) -} -outproadMC<-function(m,center=NA,plotit=TRUE,op=TRUE,MM=TRUE,cop=3, -xlab="VAR 1",ylab="VAR 2",rate=.05,iter=100,ip=6,pr=TRUE,SEED=TRUE){ -# -# Adjusts the critical value, gval used by outpro, -# so that the outside rate per observation, under normality -# is approximatley equal to the value given by the argument -# rate, which defaults to .05. -# That is, expected proportion of points declared outliers under normality -# is intended to be rate=.05 -# -# When dealing with p-variate data, p>9, this adjustment can be crucial -# -library(parallel) -m=elimna(m) -m=as.matrix(m) -n=nrow(m) -z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m))) -newq=0 -gtry=NA -for(itry in 1:ip){ -newq=newq+9/10^itry -gtry[itry]=newq -} -gtry=c(.95,.975,gtry[-1]) -if(pr)print("Computing adjustment") -val=NA -if(SEED)set.seed(2) -for(itry in 1:ip){ -for(i in 1:iter){ -temp=outproMC(z[i,,],gval = sqrt(qchisq(gtry[itry],ncol(m))), -center=center,plotit=FALSE,op=op,MM=MM,cop=cop)$out.id -val[i]=length(temp) -} -erate=mean(val)/n -if(erate0){ -if(nrow(con)!=length(x)){ -stop("The number of groups does not match the number of contrast coefficients.") -} -v1=nrow(con)-1 -psihat<-matrix(0,ncol(con),5) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper", -"p.value")) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c("con.num","test","crit","se","df")) -df<-0 -L=nrow(con) -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) -A=(L-1)*(1+(L-2)/df) -test[d,2]<-(sum(con[,d]*xbar)/(sqrt(A)*sejk))^2 -crit=qf(1-alpha,v1,df) -test[d,3]<-crit -test[d,4]<-sejk -test[d,5]<-df -psihat[d,3]<-psihat[d,2]-sqrt(crit*A)*sejk -psihat[d,4]<-psihat[d,2]+sqrt(crit*A)*sejk -psihat[d,5]<-1-pf(test[d,2],v1,df) -}} -# -if(pr){ -print("Note: confidence intervals are adjusted to control FWE") -print("But p-values are not adjusted to control FWE") -} -list(test=test,psihat=psihat) -} -smmvalv2<-function(dfvec,iter=20000,alpha=.05,SEED=TRUE){ -# -if(SEED)set.seed(1) -vals<-NA -tvals<-NA -J<-length(dfvec) -z=matrix(nrow=iter,ncol=J) -for(j in 1: J)z[,j]=abs(rt(iter,dfvec[j])) -vals=apply(z,1,max) -vals<-sort(vals) -ival<-round((1-alpha)*iter) -qval<-vals[ival] -qval -} -bwtrim<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K,MAT=FALSE,grpc=1,coln=c(2:3)){ -# Perform a J-by-K anova on trimmed means with -# repeated measures on the second factor. That is, a split-plot design -# is assumed, with the first factor consisting of independent groups. -# -# If the data are stored in a matrix or data frame, it is converted to list mode. -# Once in list mode, -# data[[1]] contains the data -# for the first level of both factors: level 1,1. -# data[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# data[[K]] is the data for level 1,K -# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -# If the between groups are denoted by groups numbers stored in a column -# of dat, you can set MAT=T, which will store the data in the format -# expected by this function -# -# Example, grpc=1 means group id numbers are in col 1. -# coln=c(3:6) means the within variables are stored in col 3-6. -# -# Or you can use the function selbybw to sort the data. -# -if(is.data.frame(data))data=as.matrix(data) -if(MAT) -data=selbybw(data,grpc=grpc,coln=coln)$x -x<-data - if(is.matrix(x) || is.data.frame(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - data <- y - } -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in data is") -print(length(data)) -print("Warning: These two values are not equal") -} -if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") -tmeans<-0 -h<-0 -v<-matrix(0,p,p) -klow<-1-K -kup<-0 -for (i in 1:p)tmeans[i]<-mean(data[[grp[i]]],tr,na.rm=TRUE) -for (j in 1:J){ -h[j]<-length(data[[grp[j]]])-2*floor(tr*length(data[[grp[j]]])) -# h is the effective sample size for the jth level of factor A -# Use covmtrim to determine blocks of squared standard errors and -# covariances. -klow<-klow+K -kup<-kup+K -sel<-c(klow:kup) -v[sel,sel]<-covmtrim(data[grp[klow:kup]],tr) -} -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -jm1<-J-1 -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -# Do test for factor A -cmat<-kron(cj,ik) # Contrast matrix for factor A -Qa<-johansp(cmat,tmeans,v,h,J,K) -# Do test for factor B -cmat<-kron(ij,ck) # Contrast matrix for factor B -Qb<-johansp(cmat,tmeans,v,h,J,K) -# Do test for factor A by B interaction -cmat<-kron(cj,ck) # Contrast matrix for factor A by B -Qab<-johansp(cmat,tmeans,v,h,J,K) -list(Qa=Qa$teststat,Qa.p.value=Qa$p.value, -Qb=Qb$teststat,Qb.p.value=Qb$p.value, -Qab=Qab$teststat,Qab.p.value=Qab$p.value) -} - - -rmmest<-function(x,y=NA,alpha=.05,con=0,est=onestep,plotit=TRUE,dif=FALSE,grp=NA, -hoch=FALSE,nboot=NA,BA=TRUE,xlab="Group 1",ylab="Group 2",pr=TRUE,...){ -# -# Use a percentile bootstrap method to compare dependent groups. -# By default, -# compute a .95 confidence interval for all linear contasts -# specified by con, a J by C matrix, where C is the number of -# contrasts to be tested, and the columns of con are the -# contrast coefficients. -# If con is not specified, all pairwise comparisons are done. -# -# By default, a one-step M-estimator is used -# and a sequentially rejective method -# is used to control the probability of at least one Type I error. -# -# dif=T indicates that difference scores are to be used -# dif=F indicates that measure of location associated with -# marginal distributions are used instead. -# -# nboot is the bootstrap sample size. If not specified, a value will -# be chosen depending on the number of contrasts there are. -# -# x can be an n by J matrix or it can have list mode -# for two groups, data for second group can be put in y -# otherwise, assume x is a matrix (n by J) or has list mode. -# -# A sequentially rejective method is used to control alpha. -# -# Argument BA: When using dif=F, BA=T uses a correction term -# that is recommended when using MOM. -# -if(dif){ -if(pr)print("dif=T, so analysis is done on difference scores") -temp<-rmmcppbd(x,y=y,alpha=.05,con=con,est,plotit=plotit,grp=grp, -nboot=nboot,hoch=hoch,...) -output<-temp$output -con<-temp$con -} -if(!dif){ -if(pr)print("dif=F, so analysis is done on marginal distributions") -if(!is.na(y[1]))x<-cbind(x,y) -if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or -in list mode.") -if(is.list(x)){ -if(is.matrix(con)){ -if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the -number of groups.") -}} -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matl(x) -} -if(is.matrix(x) && is.matrix(con)){ -if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the -number of groups.") -mat<-x -} -n=nrow(x) -if(is.matrix(x))mat<-x -if(!is.na(sum(grp)))mat<-mat[,grp] -mat<-elimna(mat) # Remove rows with missing values. -x<-mat -J<-ncol(mat) -xcen<-x -for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j]) -Jm<-J-1 -if(sum(con^2)==0){ -d<-(J^2-J)/2 -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -d<-ncol(con) -if(is.na(nboot)){ -if(d<=4)nboot<-1000 -if(d>4)nboot<-5000 -} -n<-nrow(mat) -crit.vec<-alpha/c(1:d) -connum<-ncol(con) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -xbars<-apply(mat,2,est) -psidat<-NA -for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) -psihat<-matrix(0,connum,nboot) -psihatcen<-matrix(0,connum,nboot) -bvec<-matrix(NA,ncol=J,nrow=nboot) -bveccen<-matrix(NA,ncol=J,nrow=nboot) -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec[ib,]<-apply(x[data[ib,],],2,est,...) -bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...) -} -# -# Now have an nboot by J matrix of bootstrap values. -# -test<-1 -bias<-NA -for (ic in 1:connum){ -psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) -psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic]) -bias[ic]<-sum((psihatcen[ic,]>0))/nboot-.5 -if(BA){ -test[ic]<-sum((psihat[ic,]>0))/nboot-.1*bias[ic] -if(test[ic]<0)test[ic]<-0 -} -if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot -test[ic]<-min(test[ic],1-test[ic]) -} -test<-2*test -ncon<-ncol(con) -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvecba<-dvec -dvec[1]<-alpha/2 -} -if(n>=80)hoch=T -if(hoch)dvec<-alpha/(c(1:ncon)) -if(plotit && ncol(bvec)==2){ -z<-c(0,0) -one<-c(1,1) -plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") -points(bvec) -totv<-apply(x,2,est,...) -cmat<-var(bvec) -dis<-mahalanobis(bvec,totv,cmat) -temp.dis<-order(dis) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -abline(0,1) -} -temp2<-order(0-test) -ncon<-ncol(con) -zvec<-dvec[1:ncon] -if(BA)zvec<-dvecba[1:ncon] -sigvec<-(test[temp2]>=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","sig.level","crit.sig", -"ci.lower","ci.upper")) -tmeans<-apply(mat,2,est,...) -psi<-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -output[temp2,4]<-zvec -temp<-sort(psihat[ic,]) -icl<-round(output[ic,4]*nboot/2)+1 -icu<-nboot-(icl-1) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -lindep<-function(x,con,cmat,alpha=.05,tr=.2){ -# -# Compute a test statistic based on the -# linear contrast coefficients in con and the covariance matrix -# cmat. -# -# The data are assumed to be stored in x in list mode -# or a matrix with columns correpsonding to groups. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# d=number of linear contrasts -# -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-length(x) -w<-vector("numeric",J) -xbar<-vector("numeric",J) -for(j in 1:J){ -xbar[j]<-mean(x[[j]],tr=tr,na.rm=TRUE) -} -ncon<-ncol(con) -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","se","test")) -w<-cmat -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -cvec<-as.matrix(con[,d]) -sejk<-sqrt(t(cvec)%*%w%*%cvec) -psihat[d,3]<-sejk -psihat[d,4]<-psihat[d,2]/sejk -} -list(test.stat=psihat) -} - -bwmcp<-function(J, K, x, tr = 0.2, JK = J * K, con = 0, - alpha = 0.05, grp =c(1:JK), nboot = 599, method='hoch',SEED = TRUE, ...) -{ - # - # A bootstrap-t for multiple comparisons among - # for all main effects and interactions. - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} - - conM = con2way(J, K) - p <- J * K - v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { - data[[j]] <- x[[grp[j]]] -xx[[j]]=x[[grp[j]]] # save input data - # Now have the groups in proper order. - data[[j]] = data[[j]] - mean(data[[j]], tr = tr,na.rm=TRUE) #centered data for bootstrapping - } -ilow=1-K -iup=0 -for(j in 1:J){ -ilow <- ilow + K - iup = iup + K -sel <- c(ilow:iup) -xx[sel]=listm(elimna(matl(xx[sel]))) - v[sel, sel] <- covmtrim(xx[sel], tr) - } -A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat -B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat -AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat - x <- data - jp <- 1 - K - kv <- 0 - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - # Next determine the n_j values - nvec <- NA - testA = NA - testB = NA - testAB = NA - bsam = list() - bdat = list() -aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA)) -bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB)) -abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB)) -# for(j in 1:J) -# nvec[j] = length(x[[j]]) - for(ib in 1:nboot) { - ilow <- 1 - K - iup = 0 - for(j in 1:J) { - ilow <- ilow + K - iup = iup + K -nv=length(xx[[ilow]]) -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in ilow:iup){ -# bsam[[k]] = xx[[k]][bdat[[j]]] -bsam[[k]] = data[[k]][bdat[[j]]] # Use centered data to determine critical value. -} - } -ilow=0-K -iup=0 -for(j in 1:J){ -ilow <- ilow + K - iup = iup + K -sel <- c(ilow:iup) - v[sel, sel] <- covmtrim(bsam[sel], tr) - } - -temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4]) -aboot[ib,]=temp -testA[ib] = max(temp) -temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4]) -bboot[ib,]=temp -testB[ib]= max(temp) -temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4]) -testAB[ib] = max(temp) -abboot[ib,]=temp - } -pbA=NA -pbB=NA -pbAB=NA -for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])length(x))stop('JKL is less than the Number of groups') -JK=J*K -KL=K*L - v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { - data[[j]] <- x[[grp[j]]] -xx[[j]]=x[[grp[j]]] # save input data - # Now have the groups in proper order. - data[[j]] = data[[j]] - mean(data[[j]], tr = tr) - } -ilow=1-KL -iup=0 -for(j in 1:J){ -ilow <- ilow + KL - iup = iup + KL -sel <- c(ilow:iup) -xx[sel]=listm(elimna(matl(xx[sel]))) - v[sel, sel] <- covmtrim(xx[sel], tr) - } -A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat -B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat -C=lindep(xx,conM$conC,cmat=v,tr=tr)$test.stat -AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat -AC=lindep(xx,conM$conAC,cmat=v,tr=tr)$test.stat -BC=lindep(xx,conM$conBC,cmat=v,tr=tr)$test.stat -ABC=lindep(xx,conM$conABC,cmat=v,tr=tr)$test.stat - x <- data - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - testA = NA - testB = NA -testC=NA - testAB = NA - testAC = NA - testBC = NA - testABC = NA - bsam = list() - bdat = list() -aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA)) -bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB)) -cboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conC)) -abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB)) -acboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAC)) -bcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conBC)) -abcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conABC)) - for(ib in 1:nboot) { - ilow <- 1 - KL - iup = 0 - for(j in 1:J) { - ilow <- ilow + KL - iup = iup + KL -nv=length(x[[ilow]]) - bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in ilow:iup){ - bsam[[k]] = x[[k]][bdat[[j]]] -} - } -ilow=1-KL -iup=0 -for(j in 1:J){ -ilow <- ilow + KL - iup = iup + KL -sel <- c(ilow:iup) - v[sel, sel] <- covmtrim(bsam[sel], tr) - } -temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4]) -aboot[ib,]=temp -testA[ib] = max(temp) -temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4]) -bboot[ib,]=temp -testB[ib]= max(temp) - -temp=abs(lindep(bsam,conM$conC,cmat=v,tr=tr)$test.stat[,4]) -cboot[ib,]=temp -testC[ib]= max(temp) - -temp=abs(lindep(bsam,conM$conAC,cmat=v,tr=tr)$test.stat[,4]) -acboot[ib,]=temp -testAC[ib]= max(temp) - -temp=abs(lindep(bsam,conM$conBC,cmat=v,tr=tr)$test.stat[,4]) -bcboot[ib,]=temp -testBC[ib]= max(temp) - -temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4]) -testAB[ib] = max(temp) -abboot[ib,]=temp - -temp=abs(lindep(bsam,conM$conABC,cmat=v,tr=tr)$test.stat[,4]) -abcboot[ib,]=temp -testABC[ib]= max(temp) - - } -pbA=NA -pbB=NA -pbC=NA -pbAB=NA -pbAC=NA -pbBC=NA -pbABC=NA -for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])length(x))stop("JKL is less than the Number of groups") -JK=J*K - v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { - data[[j]] <- x[[grp[j]]] -xx[[j]]=x[[grp[j]]] # save input data - # Now have the groups in proper order. - data[[j]] = data[[j]] - mean(data[[j]], tr = tr) - } -ilow=1-L -iup=0 -for(j in 1:JK){ -ilow <- ilow + L - iup = iup + L -sel <- c(ilow:iup) -xx[sel]=listm(elimna(matl(xx[sel]))) - v[sel, sel] <- covmtrim(xx[sel], tr) - } -A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat -B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat -C=lindep(xx,conM$conC,cmat=v,tr=tr)$test.stat -AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat -AC=lindep(xx,conM$conAC,cmat=v,tr=tr)$test.stat -BC=lindep(xx,conM$conBC,cmat=v,tr=tr)$test.stat -ABC=lindep(xx,conM$conABC,cmat=v,tr=tr)$test.stat - x <- data - jp <- 1 - K - kv <- 0 - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - testA = NA - testB = NA -testC=NA - testAB = NA - testAC = NA - testBC = NA - testABC = NA - bsam = list() - bdat = list() -aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA)) -bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB)) -cboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conC)) -abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB)) -acboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAC)) -bcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conBC)) -abcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conABC)) -# for(j in 1:JK) -# nvec[j] = length(x[[j]]) - for(ib in 1:nboot) { - ilow <- 1 - L - iup = 0 - for(j in 1:JK) { - ilow <- ilow + L - iup = iup + L -nv=length(x[[ilow]]) - bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in ilow:iup){ - bsam[[k]] = x[[k]][bdat[[j]]] -} - } -ilow=1-L -iup=0 -for(j in 1:JK){ -ilow <- ilow + L - iup = iup + L -sel <- c(ilow:iup) - v[sel, sel] <- covmtrim(bsam[sel], tr) - } -temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4]) -aboot[ib,]=temp -testA[ib] = max(temp) -temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4]) -bboot[ib,]=temp -testB[ib]= max(temp) - -temp=abs(lindep(bsam,conM$conC,cmat=v,tr=tr)$test.stat[,4]) -cboot[ib,]=temp -testC[ib]= max(temp) - -temp=abs(lindep(bsam,conM$conAC,cmat=v,tr=tr)$test.stat[,4]) -acboot[ib,]=temp -testAC[ib]= max(temp) - -temp=abs(lindep(bsam,conM$conBC,cmat=v,tr=tr)$test.stat[,4]) -bcboot[ib,]=temp -testBC[ib]= max(temp) - -temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4]) -testAB[ib] = max(temp) -abboot[ib,]=temp - -temp=abs(lindep(bsam,conM$conABC,cmat=v,tr=tr)$test.stat[,4]) -abcboot[ib,]=temp -testABC[ib]= max(temp) - - } -pbA=NA -pbB=NA -pbC=NA -pbAB=NA -pbAC=NA -pbBC=NA -pbABC=NA -for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])crit,1,0) -id<-vec[chk==1] -keep<-vec[chk==0] -if(is.matrix(x)){ -if(ncol(x)==2 && plotit){ -plot(x[,1],x[,2],xlab=xlab,ylab=ylab,type="n") -flag<-rep(TRUE,nrow(x)) -flag[id]<-FALSE -points(x[flag,1],x[flag,2]) -if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="*") -}} -if(!is.matrix(x))outval<-x[id] -if(is.matrix(x))outval<-x[id,] -n=nrow(as.matrix(x)) -n.out=length(id) -assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) -list(n=n,n.out=n.out,out.val=outval,out.id=id,keep=keep,dis=dis,crit=crit) -} - -lintestMC<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=FALSE,outfun=out,...){ -# -# Test the hypothesis that the regression surface is a plane. -# Stute et al. (1998, JASA, 93, 141-149). -# -if(identical(regfun,Qreg))print('When using Qreg, be sure to include res.vals=TRUE') -if(identical(regfun,tshdreg))print('When using tshdreg, be sure to include RES=TRUE') -if(identical(regfun,MMreg))print('When using MMreg, be sure to include RES=TRUE') -library(parallel) -set.seed(2) -x<-as.matrix(x) -d<-ncol(x) -temp<-elimna(cbind(x,y)) -x<-temp[,1:d] -x<-as.matrix(x) -y<-temp[,d+1] -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag,] -x<-as.matrix(x) -y<-y[flag] -} -mflag<-matrix(NA,nrow=length(y),ncol=length(y)) -for (j in 1:length(y)){ -for (k in 1:length(y)){ -mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) -} -} -reg<-regfun(x,y,...) -yhat<-y-reg$residuals -#print("Taking bootstrap sample, please wait.") -data<-matrix(runif(length(y)*nboot),nrow=nboot) -data<-sqrt(12)*(data-.5) # standardize the random numbers. -data=listm(t(data)) -#rvalb<-apply(data,1,lintests1,yhat,reg$residuals,mflag,x,regfun,...) -rvalb<-mclapply(data,lintests1,yhat,reg$residuals,mflag,x,regfun,mc.preschedule=TRUE,...) -# An n x nboot matrix of R values -rvalb=matl(rvalb) -rvalb<-rvalb/sqrt(length(y)) -dstatb<-apply(abs(rvalb),2,max) -wstatb<-apply(rvalb^2,2,mean) -# compute test statistic -v<-c(rep(1,length(y))) -rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...) -rval<-rval/sqrt(length(y)) -dstat<-max(abs(rval)) -wstat<-mean(rval^2) -ib<-round(nboot*(1-alpha)) -p.value.d<-1-sum(dstat>=dstatb)/nboot -p.value.w<-1-sum(wstat>=wstatb)/nboot -#critw<-wstatb[ib] -list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w) -} - - -yuen.effect<-function(x,y,tr=.2,alpha=.05,plotit=FALSE, -plotfun=splot,op=TRUE,VL=TRUE,cor.op=FALSE, -xlab="Groups",ylab="",PB=FALSE){ -# -# Same as yuen, only it computes explanatory power and the related -# measure of effect size. Only use this with n1=n2. Called by yuenv2 -# which allows n1!=n2. -# -# -# Perform Yuen's test for trimmed means on the data in x and y. -# The default amount of trimming is 20% -# Missing values (values stored as NA) are automatically removed. -# -# A confidence interval for the trimmed mean of x minus the -# the trimmed mean of y is computed and returned in yuen$ci. -# The p-valueis returned in yuen$p.value -# -# For an omnibus test with more than two independent groups, -# use t1way. -# This function uses winvar from chapter 2. -# -if(tr==.5)stop("Use medpb to compare medians.") -if(tr>.5)stop("Can't have tr>.5") -library(MASS) -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -h1<-length(x)-2*floor(tr*length(x)) -h2<-length(y)-2*floor(tr*length(y)) -q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) -q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) -df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) -crit<-qt(1-alpha/2,df) -m1=mean(x,tr) -m2=mean(y,tr) -mbar=(m1+m2)/2 -dif=m1-m2 -low<-dif-crit*sqrt(q1+q2) -up<-dif+crit*sqrt(q1+q2) -test<-abs(dif/sqrt(q1+q2)) -yuen<-2*(1-pt(test,df)) -xx=c(rep(1,length(x)),rep(2,length(y))) -pts=c(x,y) -top=var(c(m1,m2)) -# -if(!PB){ -if(tr==0)cterm=1 -if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr -bot=winvar(pts,tr=tr)/cterm -} -if(PB)bot=pbvar(pts)/1.06 -# -e.pow=top/bot -if(e.pow>1){ -x0=c(rep(1,length(x)),rep(2,length(y))) -y0=c(x,y) -e.pow=wincor(x0,y0,tr=tr)$cor^2 -} -if(plotit){ -plot(xx,pts,xlab=xlab,ylab=ylab) -if(op) -points(c(1,2),c(m1,m2)) -if(VL)lines(c(1,2),c(m1,m2)) -} -list(ci=c(low,up),p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test, -crit=crit,df=df,Var.Explained=e.pow,Effect.Size=sqrt(e.pow)) -} - - bbbmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0, - alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ -# -# between-by-between-by-between design -# - # - # A percentile bootstrap for - # multiple comparisons for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using and appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # -# - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JKL, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} -ncon=ncol(con) - p <- J*K*L -JKL=p -if(p>length(x))stop('JKL is less than the Number of groups') -JK=J*K -KL=K*L - data <- list() -xx=list() - for(j in 1:length(x)) { -xx[[j]]=x[[grp[j]]] # save input data -# # Now have the groups in proper order. - } -for(j in 1:p){ -xx[j]=elimna(xx[j]) -} - crit=alpha/2 - icl<-round(crit*nboot)+1 -icu<-nboot-icl - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - # Next determine the n_j values - testA = NA - bsam = list() - bdat = list() -aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) -tvec=NA -tvec=linhat(x,con,est=est,...) - for(ib in 1:nboot) { - for(j in 1:JKL) { -nv=length(x[[j]]) -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -bsam[[j]] = x[[j]][bdat[[j]]] -} -aboot[ib,]=linhat(bsam,con=con,est=est,...) -} -pbA=NA -for(j in 1:ncol(aboot)){ -pbA[j]=mean(aboot[,j]>0) -pbA[j]=2*min(c(pbA[j],1-pbA[j])) -} -outputA<-matrix(0,ncol(con),6) -dimnames(outputA)<-list(NULL,c('con.num','psihat','p.value','p.adjust', -'ci.lower','ci.upper')) -test=pbA -outputA[,2]<-tvec -for (ic in 1:ncol(con)){ -outputA[ic,1]<-ic -outputA[ic,3]<-test[ic] -temp<-sort(aboot[,ic]) -outputA[ic,5]<-temp[icl] -outputA[ic,6]<-temp[icu] -} -outputA[,4]=p.adjust(outputA[,3],method='hoch') -outputA -} - -bbbmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L, - alpha = 0.05, grp =c(1:JKL), nboot = 2000, bhop=FALSE,SEED = TRUE,...) -{ -# -# BETWEEN-BETWEEN-BETWEEN DESIGN -# - # A percentile bootstrap for multiple comparisons among - # multiple comparisons for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # -con=con3way(J,K,L) -A=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -B=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -C=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AB=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -BC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -ABC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) -} - -linhat<-function(x,con,est=tmean,...){ -# -# estimate all linear contrasts specified by con -# -psihat=0 -xbar=llocv2(x,est=est,...)$center -for(i in 1:ncol(con))psihat[i]=sum(con[,i]*xbar) -psihat -} - -bbwmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L, - alpha = 0.05, grp =c(1:JKL), nboot = 2000, bhop=FALSE,SEED = TRUE,...) -{ -# -# BETWEEN-BETWEEN-WITHIN DESIGN -# - # A percentile bootstrap for multiple comparisons - # for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of all three factors: level 1,1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first two factors and level 3 of the third: level 1,1,2 - # x[[K]] is the data for level 1,1,K - # x[[K+1]] is the data for level 1,2,1, x[[2K]] is level 1,2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JKL, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # -con=con3way(J,K,L) -A=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -B=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -C=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AB=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -BC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -ABC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) -} - -bbwmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0, - alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ - # - # A percentile bootstrap for multiple comparisons among - # multiple comparisons for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1,1. - # x[[2]] is assumed to contain the data for levels 1,1,2, etc. - # -# -# JK independent groups, L dependent groups -# - - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JKL, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} -ncon=ncol(con) - p <- J*K*L -if(p>length(x))stop("JKL is less than the Number of groups") -JK=J*K -KL=K*L - data <- list() -xx=list() - for(j in 1:length(x)) { -xx[[j]]=x[[grp[j]]] # save input data -# # Now have the groups in proper order. - } -ilow=1-L -iup=0 -for(j in 1:JK){ -ilow <- ilow + L - iup = iup + L -sel <- c(ilow:iup) -xx[sel]=listm(elimna(matl(xx[sel]))) -} - - jp <- 1 - L - kv <- 0 - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - # Next determine the n_j values - testA = NA - bsam = list() - bdat = list() -aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) -tvec=NA -x=xx -tvec=linhat(x,con,est=est,...) - for(ib in 1:nboot) { - ilow <- 1 - L - iup = 0 - for(j in 1:JK) { - ilow <- ilow + L - iup = iup + L -nv=length(x[[ilow]]) -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in ilow:iup){ - bsam[[k]] = x[[k]][bdat[[j]]] -} -} -ilow=0-L -iup=0 -aboot[ib,]=linhat(bsam,con=con,est=est,...) -} -pbA=NA -for(j in 1:ncol(aboot)){ -pbA[j]=mean(aboot[,j]>0) -pbA[j]=2*min(c(pbA[j],1-pbA[j])) -} -# Determine critical values -if(!bhop)dvec=alpha/c(1:ncol(con)) -if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con) -outputA<-matrix(0,ncol(con),6) -dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit", -"ci.lower","ci.upper")) -test=pbA -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -outputA[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -outputA[,2]<-tvec -for (ic in 1:ncol(con)){ -outputA[ic,1]<-ic -outputA[ic,3]<-test[ic] -temp<-sort(aboot[,ic]) -outputA[ic,5]<-temp[icl] -outputA[ic,6]<-temp[icu] -} -outputA -} - -bwwmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0, - alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ - # - # A percentile bootstrap for multiple comparisons - # for all main effects and interactions. - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # -# -# J independent groups, KL dependent groups -# - - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} -# nvec <- NA -#for(j in 1:length(x))nvec[j]=length(x[[j]]) -ncon=ncol(con) - p <- J*K*L -if(p>length(x))stop("JKL is less than the Number of groups") -JK=J*K -KL=K*L -# v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { -# data[[j]] <- x[[grp[j]]] -xx[[j]]=x[[grp[j]]] # save input data -# # Now have the groups in proper order. - } -ilow=1-KL -iup=0 -for(j in 1:J){ -ilow <- ilow + KL - iup = iup + KL -sel <- c(ilow:iup) -xx[sel]=listm(elimna(matl(xx[sel]))) -} - - jp <- 1 - KL - kv <- 0 - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - # Next determine the n_j values - testA = NA - bsam = list() - bdat = list() -aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) -tvec=NA -x=xx -tvec=linhat(x,con,est=est,...) - for(ib in 1:nboot) { - ilow <- 1 - KL - iup = 0 - for(j in 1:J) { - ilow <- ilow + KL - iup = iup + KL -nv=length(x[[ilow]]) -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in ilow:iup){ - bsam[[k]] = x[[k]][bdat[[j]]] -} -} -ilow=1-KL -iup=0 -aboot[ib,]=linhat(bsam,con=con,est=est,...) -} -pbA=NA -for(j in 1:ncol(aboot)){ -pbA[j]=mean(aboot[,j]>0) -pbA[j]=2*min(c(pbA[j],1-pbA[j])) -} -# Determine critical values -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncol(con) > 10){ -avec<-.05/c(11:(ncol(con))) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(con > 10){ -avec<-.01/c(11:ncol(con)) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncol(con)) -} -} -if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con) -outputA<-matrix(0,ncol(con),6) -dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit", -"ci.lower","ci.upper")) -test=pbA -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -outputA[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -outputA[,2]<-tvec -for (ic in 1:ncol(con)){ -outputA[ic,1]<-ic -outputA[ic,3]<-test[ic] -temp<-sort(aboot[,ic]) -outputA[ic,5]<-temp[icl] -outputA[ic,6]<-temp[icu] -} -outputA -} - -wwwmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0, - alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ - # - # A percentile bootstrap for multiple comparisons among - # multiple comparisons for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # -# -# within-by-within-by-within design -# -# JKL dependent groups -# - - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} -# nvec <- NA -#for(j in 1:length(x))nvec[j]=length(x[[j]]) -ncon=ncol(con) - p <- J*K*L -JKL=p -if(p>length(x))stop("JKL is less than the Number of groups") -JK=J*K -KL=K*L -# v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { -# data[[j]] <- x[[grp[j]]] -xx[[j]]=x[[grp[j]]] # save input data -# # Now have the groups in proper order. - } - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - # Next determine the n_j values - testA = NA - bsam = list() - bdat = list() -aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) -tvec=NA -x=xx -tvec=linhat(x,con,est=est,...) -nv=length(x[[1]]) - for(ib in 1:nboot) { -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in 1:JKL) bsam[[k]] = x[[k]][bdat[[j]]] -aboot[ib,]=linhat(bsam,con=con,est=est,...) -} -pbA=NA -for(j in 1:ncol(aboot)){ -pbA[j]=mean(aboot[,j]>0) -pbA[j]=2*min(c(pbA[j],1-pbA[j])) -} -# Determine critical values -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncol(con) > 10){ -avec<-.05/c(11:(ncol(con))) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(con > 10){ -avec<-.01/c(11:ncol(con)) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncol(con)) -} -} -if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con) -outputA<-matrix(0,ncol(con),6) -dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit", -"ci.lower","ci.upper")) -test=pbA -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -outputA[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -outputA[,2]<-tvec -for (ic in 1:ncol(con)){ -outputA[ic,1]<-ic -outputA[ic,3]<-test[ic] -temp<-sort(aboot[,ic]) -outputA[ic,5]<-temp[icl] -outputA[ic,6]<-temp[icu] -} -outputA -} - -wwwmcppb.OLD<-function(J, K,L, x, est=tmean,JKL = J * K*L, - alpha = 0.05, grp =c(1:JKL), nboot = 2000, bhop=FALSE,SEED = TRUE,...) -{ - # - # A percentile bootstrap for - # multiple comparisons for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # -con=con3way(J,K,L) -A=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -B=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -C=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AB=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -BC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -ABC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) -} - -wwwmcppb<-function(J,K,L,x, alpha = 0.05, con = 0,est=tmean, plotit = FALSE, - dif = TRUE, grp = NA, nboot = NA, BA = TRUE, hoch = TRUE, xlab = "Group 1", - ylab = "Group 2", pr = TRUE, SEED = TRUE,...){ -# -# Do all multiple comparisons for a within-by-within-by-within design. -# using a percentile bootstrap method and trimmed means -# -if(pr){ -print('This new version includes the option to use difference scores and defaults to dif=TRUE') -print('Number of bootstrap samples differs from the old version') -print('To use the old version, use wwwmcppb.OLD') -} -conM=con3way(J,K,L) -A=rmmcppb(x,con=conM$conA,alpha=alpha,dif=dif,plotit=plotit,est=est, -nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) -B=rmmcppb(x,con=conM$conB,alpha=alpha,dif=dif, -plotit=plotit,est=est,nboot=nboot,BA=BA,hoch=hoch, -SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) -C=rmmcppb(x,con=conM$conC,alpha=alpha,dif=dif, -plotit=plotit,est=est,nboot=nboot,BA=BA,hoch=hoch, -SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) -AB=rmmcppb(x,con=conM$conAB,alpha=alpha,dif=dif,plotit=plotit,est=est, -nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) -AC=rmmcppb(x,con=conM$conAC,alpha=alpha,dif=dif,plotit=plotit,est=est, -nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) -BC=rmmcppb(x,con=conM$conBC,alpha=alpha,dif=dif,plotit=plotit,est=est, -nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) -ABC=rmmcppb(x,con=conM$conABC,alpha=alpha,dif=dif,plotit=plotit,est=est, -nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) -list(Factor_A=A,Factor_B=B,Factor_AB=AB,Factor_AC=AC,Factor_BC=BC,Factor_ABC=ABC) -} - - - -bwwmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L, - alpha = 0.05, grp =c(1:JKL), nboot = 2000, bhop=FALSE,SEED = TRUE,...) -{ - # - # A percentile bootstrap for multiple comparisons - # for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # -con=con3way(J,K,L) -A=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -B=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -C=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AB=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -BC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -ABC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) -} - - -cjMAT<-function(J){ -L=(J^2-J)/2 -cj=matrix(0,nrow=L,ncol=J) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j0)e.pow=top/bot -if(bot==0)e.pow=1 -if(e.pow>=1){ -v1=NULL -v2=NULL -for(j in 1:J){ -v1=c(v1,rep(xbar[j],length(x[[j]]))) -v2=c(v2,x[[j]]) -} -e.pow=wincor(v1,v2,tr=tr)$cor^2 -} -list(TEST=TEST,nu1=nu1,nu2=nu2,p.value=sig,Var.Explained=e.pow, -Effect.Size=sqrt(e.pow)) -} - -snmreg<-function(x,y,SEED=TRUE,xout=FALSE,outfun=outpro,initreg=MMreg,...){ -# -# Compute regression S-estimator via Nelder-Mead method -# The measure of scale is taken to be the percentage bend midvariance -# -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -x <- as.matrix(x) -X<-cbind(x,y) -X<-elimna(X) -np<-ncol(X) -npm1=np-1 -x=X[,1:npm1] -x=as.matrix(x) -y=X[,np] -N<-np-1 -#temp<-initreg(x,y,SEED=SEED)$coef -temp<-initreg(x,y)$coef -START<-temp[2:np] -temp<-nelder(X,N,FN=snmreg.sub,START=START) -alpha <- median(y - x %*% temp) -coef <- c(alpha,temp) -res <- y - x %*% temp - alpha -list(coef = coef, residuals = res) -} -snmregv2<-function(x,y,SEED=TRUE){ -# -# Compute regression S-estimator -# remove points for which residuals are outliers -# then recompute the estimated slopes and intercept -# -res=snmreg(x,y,SEED=SEED)$residuals -chk<-abs(res-median(res))/mad(res) -x=as.matrix(x) -xx<-x[chk<=2,] -yy<-y[chk<=2] -temp<-snmreg(xx,yy,SEED=SEED) -list(coef=temp$coef,residuals=temp$residuals) -} - - -larsR<-function(x,y,type="lasso"){ -library(lars) -p=ncol(x) -p1=p+1 -xy=elimna(cbind(x,y)) -result=lars(xy[,1:p],xy[,p1],type=type) -result -} - -regvarp<-function(x,y,p=1,locfun=lloc,scat=var,est=mean,cov.fun=cov.mba){ -# -# Measure the importance of each of p variables in a regression -# problem, p>1 -# -xy=cbind(x,y) -xy<-elimna(xy) -m<-ncol(x) -x=xy[,1:m] -n<-nrow(x) -m1=m+1 -y=xy[,m1] -x=standm(x,locfun=locfun,est=est,scat=scat) -vals=NA -if(p==1)for(j in 1:m){ -vals[j]=gvarg(cbind(y,x[,j]),cov.fun) -} -if(p>1){ -temp=modgen(m) -ic=0 -for(j in 1:length(temp)){ -if(length(temp[[j]])==p){ -ic=ic+1 -vals[ic]=gvarg(cbind(y,x[,temp[[j]]]),cov.fun) -z=cbind(y,x[,temp[[j]]]) -}}} -vals -} - -bwmcppb<-function(J, K, x, est=tmean,JK = J * K, - alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=TRUE,SEED = TRUE,...) -{ - # - # A percentile bootstrap for multiple comparisons - # for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - # bhop=TRUE, use Benjaminin--Hochberg. When using a one-step M-estimator - # and the sample sizes are small, say less than 80, bhop=TRUE is a bit better. - # -con=con2way(J,K) -A=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conA, - alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...) -B=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conB, - alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...) -AB=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conAB, - alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...) -list(Fac.A=A,Fac.B=B,Fac.AB=AB) -} - -bwmcppb.sub<-function(J, K, x, est=tmean, JK = J * K, con = 0,method='hoch', - alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=TRUE,SEED = TRUE, ...){ - # - # A percentile bootstrap for multiple comparisons - # for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} - nvec <- NA -for(j in 1:length(x))nvec[j]=length(x[[j]]) -nmax=max(nvec) -ncon=ncol(con) - p <- J * K - v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { -# data[[j]] <- x[[grp[j]]] -xx[[j]]=x[[grp[j]]] # save input data -# # Now have the groups in proper order. -# data[[j]] = data[[j]] - mean(data[[j]], tr = tr) - } -ilow=1-K -iup=0 -for(j in 1:J){ -ilow <- ilow + K - iup = iup + K -sel <- c(ilow:iup) -xx[sel]=listm(elimna(matl(xx[sel]))) - } - jp <- 1 - K - kv <- 0 - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - # Next determine the n_j values - testA = NA - bsam = list() - bdat = list() -aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) -tvec=NA -x=xx -tvec=linhat(x,con,est=est,...) - for(ib in 1:nboot) { - ilow <- 1 - K - iup = 0 - for(j in 1:J) { - ilow <- ilow + K - iup = iup + K -nv=length(x[[ilow]]) -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in ilow:iup){ - bsam[[k]] = x[[k]][bdat[[j]]] -} -} -ilow=1-K -iup=0 -aboot[ib,]=linhat(bsam,con=con,est=est,...) -} -pbA=NA -for(j in 1:ncol(aboot)){ -pbA[j]=mean(aboot[,j]>0) -pbA[j]=2*min(c(pbA[j],1-pbA[j])) -} -# Determine critical values -if(!bhop)dvec=alpha/c(1:ncol(con)) -if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con) -outputA<-matrix(0,ncol(con),6) -dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","adj.p.value", -"ci.lower","ci.upper")) -test=pbA -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -outputA[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -outputA[,2]<-tvec -for (ic in 1:ncol(con)){ -outputA[ic,1]<-ic -outputA[ic,3]<-test[ic] -temp<-sort(aboot[,ic]) -outputA[ic,5]<-temp[icl] -outputA[ic,6]<-temp[icu] -} -outputA[,4]=p.adjust(outputA[,3],method=method) -outputA -} - -D.akp.effect<-function(x,y=NULL,null.value=0,tr=.2){ -# -# Computes the robust effect size for one-sample case using -# a simple modification of -# Algina, Keselman, Penfield Pcyh Methods, 2005, 317-328 -# -# When comparing two dependent groups, data for the second group can be stored in -# the second argument y. The function then computes the difference scores x-y -# -library(MASS) -if(!is.null(y))x=x-y -x<-elimna(x) -s1sq=winvar(x,tr=tr) -cterm=1 -if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr -cterm=sqrt(cterm) -dval<-cterm*(tmean(x,tr=tr)-null.value)/sqrt(s1sq) -dval -} - - -smean2v2<-function(m1,m2,nullv=rep(0,ncol(m1)),cop=3,MM=FALSE,SEED=TRUE, -nboot=500,plotit=TRUE,MC=FALSE,STAND=TRUE){ -# -# m is an n by p matrix -# -# For two independent groups, -# test hypothesis that multivariate skipped estimators -# are all equal. -# -# The level of the test is .05. -# -# Skipped estimator is used, i.e., -# eliminate outliers using a projection method. -# That is, determine center of data using: -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 MVE -# -# For each point -# consider the line between it and the center, -# project all points onto this line, and -# check for outliers using -# -# MM=F, a boxplot rule. -# MM=T, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection is an outlier -# using a modification of the usual boxplot rule. -# -# Eliminate any outliers and compute means -# using remaining data. -# -if(ncol(m1) != ncol(m2)){ -stop("Number of variables in group 1 does not equal the number in group 2.") -} -if(SEED)set.seed(2) -m1<-elimna(m1) -m2<-elimna(m2) -n1<-nrow(m1) -n2<-nrow(m2) -n<-min(c(n1,n2)) -crit.level<-.05 -if(n<=120)crit.level<-.045 -if(n<=80)crit.level<-.04 -if(n<=60)crit.level<-.035 -if(n<=40)crit.level<-.03 -if(n<=30)crit.level<-.025 -if(n<=20)crit.level<-.02 -val<-matrix(NA,ncol=ncol(m1),nrow=nboot) -est1=smean(m1) -est2=smean(m2) -#est=smean(m1)-smean(m2) -est=est1-est2 -for(j in 1: nboot){ -data1<-sample(n1,size=n1,replace=TRUE) -data2<-sample(n2,size=n2,replace=TRUE) -mm1<-m1[data1,] -temp<-outpro(mm1,plotit=FALSE,cop=cop,STAND=STAND)$keep -v1<-apply(mm1[temp,],2,mean) -mm2<-m2[data2,] -temp<-outpro(mm2,plotit=FALSE,cop=cop,STAND=STAND)$keep -v2<-apply(mm2[temp,],2,mean) -val[j,]<-v1-v2 -} -if(!MC)temp<-pdis(rbind(val,nullv)) -if(MC)temp<-pdisMC(rbind(val,nullv)) -sig.level<-sum(temp[nboot+1]2){ -center1<-dmean(m1,tr=.5) -center2<-dmean(m2,tr=.5) -} -if(ncol(m1)==2){ -tempd<-NA -for(i in 1:nrow(m1)) -tempd[i]<-depth(m1[i,1],m1[i,2],m1) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center1<-m1[flag,] -if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) -for(i in 1:nrow(m2)) -tempd[i]<-depth(m2[i,1],m2[i,2],m2) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center2<-m2[flag,] -if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) -}} -if(cop==2){ -center1<-cov.mcd(m1)$center -center2<-cov.mcd(m2)$center -} -if(cop==3){ -center1<-apply(m1,2,median) -center2<-apply(m2,2,median) -} -if(cop==4){ -center1<-smean(m1) -center2<-smean(m2) -} -center<-(center1+center2)/2 -B<-center1-center2 -if(sum(center1^2)=crit05)regci[ic,4]<-1 -}}} -regci=data.frame(regci) -flag=(regci[,4]==0) -regci[flag,4]="fail to reject" -regci[!flag,4]="reject" -list(crit.value=crit05,est=est,results=regci) -} - - -mopreg<-function(x,y,regfun=tsreg,cop=3,KEEP=TRUE,MC=FALSE,STAND=TRUE){ -# -# Do multiple (outcomes) regression on points not labled outliers -# using projection-type outlier detection method -# Arg=regfun determines regression method; -# by default, Theil-Sen is used. -# -# KEEP=F, outliers will be eliminated -# KEEP=T, outliers are not eliminated -# cop: see function outpro -library(MASS) -if(MC)library(parallel) -x<-as.matrix(x) -y<-as.matrix(y) -px<-ncol(x) -py<-ncol(y) -m<-cbind(x,y) -m<-elimna(m) # eliminate any rows with missing data -if(KEEP)ivec<-c(1:nrow(x)) -if(!KEEP){ -if(!MC)ivec<-outpro(m,plotit=FALSE,cop=cop,STAND=STAND)$keep -if(MC)ivec<-outproMC(m,plotit=FALSE,cop=cop,STAND=STAND)$keep -} -np1<-ncol(x)+1 -vec<-rep(1,nrow(m)) -pxpy<-px+py -coef<-matrix(ncol=py,nrow=np1) -res<-matrix(ncol=py,nrow=nrow(m)) -for(i in 1:py){ -pv<-px+i -coef[,i]<-regfun(m[ivec,1:ncol(x)],m[ivec,pv])$coef -vec<-as.matrix(vec) -res[,i]<-m[,pv]-cbind(vec,m[,1:ncol(x)])%*%coef[,i] -} -list(coef=coef,residuals=res) -} -robpcaS<-function(x,pval=ncol(x),SCORES=FALSE,STAND=TRUE,est=tmean,varfun=winvar,SEED=TRUE){ -# -# An abbreviated form of robpca. -# -# compute eigen values to determine proportion of scatter. -# Goal is to see how many components are needed -# -# pval indicates the number of principal components. -# -x=elimna(x) -if(STAND)x=standm(x,est=est,scat=varfun) -v=robpca(x,pval=pval,pr=FALSE,plotit=FALSE,SEED=SEED) -cumsum(v$L/sum(v$L)) -val=matrix(NA,ncol=length(v$L),nrow=4) -scores=NULL -if(SCORES)scores=v$T -dimnames(val)=list(c("Number of Comp.","Robust Stand Dev","Proportion Robust var","Cum. Proportion"), -NULL) -val[1,]=c(1:length(v$L)) -val[2,]=sqrt(v$L) -val[3,]=v$L/sum(v$L) -val[4,]=cumsum(v$L/sum(v$L)) -list(summary=val,scores=scores) -} - - -Ppca<-function(x,p=ncol(x)-1,locfun=L1medcen,loc.val=NULL,SCORES=FALSE, -gvar.fun=cov.mba,pr=TRUE,SEED=TRUE,gcov=rmba,SCALE=TRUE,...){ -# -# Robust PCA aimed at finding scores that maximize a -# robust generalized variance given the goal of reducing data from -# m dimensions to -# p, which defaults to m-1 -# -# locfun, location used to center design space. -# by default, use the spatial median -# alternatives are mcd, tauloc, ... -# -# # data are centered based on measure of location indicated by -# locfun: default is spatial median. -# -# SCALE=T means the marginal distributions are rescaled using the -# measure and scatter indicated by -# gcov, which defaults to median ball measure of location and variance -# -# Output: the projection matrix. If -# SCORES=T, the projected scores are returned. -# -x=as.matrix(x) -x<-elimna(x) -n<-nrow(x) -m<-ncol(x) -xdat=c(n,m,p,as.vector(x)) -if(!SCALE){ -if(is.null(loc.val))info<-locfun(x,...)$center -if(!is.null(loc.val))info<-loc.val -for(i in 1:n)x[i,]<-x[i,]-info -} -if(SCALE){ -ms=gcov(x) -for(i in 1:n)x[i,]<-x[i,]-ms$center -for(j in 1:m)x[,j]<-x[,j]/sqrt(ms$cov[j,j]) -} -vals<-NA -z<-matrix(nrow=n,ncol=p) -np=p*m -B=robpca(x,pval=p,plotit=FALSE,pr=pr,SEED=SEED,scree=FALSE)$P -B=t(B) -Bs=nelderv2(xdat,np,NMpca,START=B) -Bop=matrix(Bs,nrow=p,ncol=m) -Bop=t(ortho(t(Bop))) -z<-matrix(nrow=n,ncol=p) -zval<-NULL -for(i in 1:n)z[i,]<-Bop%*%as.matrix(x[i,]) -if(SCORES)zval<-z -val=gvarg(z) -list(B=Bop,gen.sd=sqrt(val),scores=zval) -} -Ppca.sum.sub<-function(j,x,SCALE=TRUE){ -# -res=Ppca(x,p=j,pr=FALSE,SCALE=SCALE)$gen.sd -res -} -Ppca.summary<-function(x,MC=FALSE,SCALE=TRUE,p=NULL){ -# -# x is assumed to be a matrix with p columns -# Using robust principal components (Ppca) -# compute generalized variance for each dimension reduction -# from 1 to p. -# -# report values plus proportion relative to largest value found -# -x=as.matrix(x) -if(!is.matrix(x))stop("x should be a matrix") -x=elimna(x) -gv=NA -if(is.null(p))p=ncol(x) -if(!MC)for(j in 1:p)gv[j]=Ppca(x,p=j,pr=FALSE,SCALE=SCALE)$gen.sd -if(MC){ -library(parallel) -y=list() -for(j in 1:p)y[[j]]=j -gv=mclapply(y,Ppca.sum.sub,x,SCALE=SCALE,mc.preschedule=TRUE) -gv=as.vector(matl(gv)) -} -res=matrix(NA,nrow=3,ncol=p) -res[1,]=c(1:p) -res[2,]=gv -res[3,]=gv/max(gv) -dimnames(res)=list(c("Num. of Comp.","Gen.Stand.Dev","Relative Size"),NULL) -list(summary=res) -} - -mdepreg.coef<-function(x,y,xout=FALSE,outfun=out,...){ -# -# multiple depth regression -# -X<-cbind(x,y) -X<-elimna(X) -p1=ncol(X) -p=p1-1 -if(xout){ -flag<-outfun(X[,1:p],plotit=FALSE,...)$keep -X<-X[flag,] -} -library(mrfDepth) -a=rdepthmedian(X)$deep -list(coef=a) -} - -mdepreg<-function(x,y,xout=FALSE,outfun=out,RES=FALSE,...){ -# -# multiple depth regression -# -X<-cbind(x,y) -X<-elimna(X) -n=nrow(X) -p1=ncol(X) -p=p1-1 -if(xout){ -flag<-outfun(X[,1:p],plotit=FALSE,...)$keep -X<-X[flag,] -} -n.keep=nrow(X) -library(mrfDepth) -a=rdepthmedian(X)$deepest -res=NA -if(RES)res=X[,p1]-X[,1:p]%*%a[2:p1]-a[1] -list(n=n,n.keep=n.keep,coef=a,residuals=res) -} - -# OLD CODE use rdepthmedian in package mdrDepth in new version: -mdepreg.orig<-function(x,y,xout=FALSE,outfun=outpro){ -# -# multiple depth regression -# -X<-cbind(x,y) -X<-elimna(X) -np=n.keep=ncol(X) -p=np-1 -if(xout){ -id=outfun(X[,1:p],plotit=FALSE)$keep -X=X[id,] -n.keep=nrow(X) -} -if(np==2){ -temp=depreg(X[,1],X[,2]) -coef=temp$coef -res=temp$residuals -} -if(np>2){ -N<-np-1 -x=X[,1:N] -y=X[,np] -START<-tsreg(x,y)$coef -coef<-nelderv2(X,np,FN=mdepreg.sub,START=START) -x <- as.matrix(x) -res <- y - x %*% coef[2:np] - coef[1] -} -list(n=n,n.keep=n.keep,coef = coef, residuals = res) -} - - - -l2plot<-function(x1,y1,x2,y2,f=2/3,SCAT=TRUE,xlab="x",ylab="y",pch='*', -eout=FALSE,xout=FALSE,...){ -# -# Plot LOESS smoother for two groups -# -# f is the span used by loess -# SCAT=F, scatterplot not created, just the regression lines -# Missing values are automatically removed. -# -m<-elimna(cbind(x1,y1)) -x1<-m[,1] -y1<-m[,2] -m<-elimna(cbind(x2,y2)) -x2<-m[,1] -y2<-m[,2] -plot(c(x1,x2),c(y1,y2),xlab=xlab,ylab=ylab,pch=pch) -lines(lowess(x1,y1,f=f)) -lines(lowess(x2,y2,f=f)) -} - -contab<-function(dat,alpha=.05){ -# dat is a 2-by-2 contingency table (matrix) -# Goal: compare the marginal probability of the first variable (e.g. Time 1) -# to the marginal probability of the second variable (e.g. Time 2). -# Issue: do the probabilities change from time 1 to time 2. -# -phat=dat -n=sum(phat) -phat=phat/n -p1.=phat[1,1]+phat[1,2] -p.1=phat[1,1]+phat[2,1] -del=p1.-p.1 -sigsq=p1.*(1-p1.)+p.1*(1-p.1)-2*(phat[1,1]*phat[2,2]-phat[1,2]*phat[2,1]) -sig=sqrt(sigsq/n) -test=abs(del)/sig -pv=2*(1-pnorm(test)) -ci=del-qnorm(1-alpha/2)*sig -ci[2]=del+qnorm(1-alpha/2)*sig -list(s.e.=sig,delta=del,CI=ci,p.value=pv) -} - - -Ckappa<-function (x,fleiss=FALSE,w = NULL){ -# -# compute Cohen's kappa -# if fleiss=T, compute weighted kappa with Fleiss weights if w=NULL -# if fleiss=F, w=.5^|i-j| is used. -# if argument w contains weights, they are used -# -if(!is.matrix(x))stop("x should be a square matrix") -if(ncol(x)!=nrow(x))stop("x should be a square matrix") - p <- dim(x)[2] - x <- as.matrix(x) - tot <- sum(x) - x <- x/tot - rs <- rowSums(x) - cs <- colSums(x) - prob <- rs %*% t(cs) - po <- sum(diag(x)) - pc <- sum(diag(prob)) - kappa <- (po - pc)/(1 - pc) - if (is.null(w)) { -v=outer(c(1:p),c(1:p),"-") -w=outer(c(1:p),c(1:p),"-") -if(fleiss)w=1-w^2/(p-1)^2 -if(!fleiss)w=.5^abs(w) -} - weighted.prob <- w * prob - weighted.obser <- w * x - wpo <- sum(weighted.obser) - wpc <- sum(weighted.prob) - wkappa <- (wpo - wpc)/(1 - wpc) - return(list(kappa = kappa, weighted.kappa = wkappa)) -} -ODDSR.CI<-function(x,y=NULL,alpha=.05){ -# -# Compute confidence interval of the odds ratio. -# -# x is either a two-by-two contingency table or a -# vector of 0's and 1's, in which case -# y is also a vector of 0's and 1's -# -# if x is a 2-by-2 matrix, assume col 1 is X=1, col 2 is X=0 -# row 1 is Y=1 and row 2 is Y=0. -# -if(is.matrix(x)){ -if(ncol(x)!=2)stop("If x is a matrix, should have 2 columns") -if(nrow(x)!=2)stop("If x is a matrix, should have 2 rows") -n=sum(x) -x1=rep(1,x[1,1]) -y1=rep(1,x[1,1]) -x2=rep(0,x[1,2]) -y2=rep(1,x[1,2]) -x3=rep(1,x[2,1]) -y3=rep(0,x[2,1]) -x4=rep(0,x[2,2]) -y4=rep(0,x[2,2]) -x=c(x1,x2,x3,x4) -y=c(y1,y2,y3,y4) -} -temp=logreg(x,y) -z=qnorm(1-alpha/2) -ci=c(exp(temp[2,1]-z*temp[2,2]),exp(temp[2,1]+z*temp[2,2])) -list(odds.ratio=exp(temp[2,1]),ci=ci) -} - -smean<-function(m,cop=3,MM=FALSE,op=1,outfun=outogk,cov.fun=rmba,MC=FALSE,STAND=TRUE,...){ -# -# m is an n by p matrix -# -# Compute a multivariate skipped measure of location -# -# op=1: -# Eliminate outliers using a projection method -# If in addition, MC=T, a multicore processor is used -# assuming your computer has multiple cores and the package -# multicore has been installed. -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 uses MVE center -# cop=5 uses TBS -# cop=6 uses rmba (Olive's median ball algorithm) -# -# For each point -# consider the line between it and the center, -# project all points onto this line, and -# check for outliers using -# -# MM=FALSE, a boxplot rule. -# MM=TRUE, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# op=2 use mgv (function outmgv) method to eliminate outliers -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# op=3 use outlier method indicated by outfun -# -# Eliminate any outliers and compute means -# using remaining data. -# -m<-elimna(m) -m=as.matrix(m) -if(nrow(m)<14)op=2 -if(op==1){ -if(!MC)temp<-outpro(m,plotit=FALSE,cop=cop,MM=MM,STAND=STAND)$keep -if(MC)temp<-outproMC(m,plotit=FALSE,cop=cop,MM=MM,STAND=STAND)$keep -} -if(op==2)temp<-outmgv(m,plotit=FALSE,cov.fun=cov.fun)$keep -if(op==3)temp<-outfun(m,plotit=FALSE,...)$keep -val<-apply(m[temp,],2,mean) -val -} - -smeancrv2<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE, -nboot=500,plotit=TRUE,MC=FALSE,xlab="VAR 1",ylab="VAR 2",STAND=TRUE){ -# -# m is an n by p matrix -# -# Test hypothesis that multivariate skipped estimators -# are all equal to the null value, which defaults to zero. -# The level of the test is .05. -# -# Eliminate outliers using a projection method -# That is, determine center of data using: -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 MVE -# -# For each point -# consider the line between it and the center -# project all points onto this line, and -# check for outliers using -# -# MM=F, a boxplot rule. -# MM=T, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# Eliminate any outliers and compute means -# using remaining data. -# -if(SEED)set.seed(2) -m<-elimna(m) -n<-nrow(m) -est=smean(m,MC=MC,cop=cop,STAND=STAND) -crit.level<-.05 -if(n<=120)crit.level<-.045 -if(n<=80)crit.level<-.04 -if(n<=60)crit.level<-.035 -if(n<=40)crit.level<-.03 -if(n<=30)crit.level<-.025 -if(n<=20)crit.level<-.02 -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -val<-matrix(NA,ncol=ncol(m),nrow=nboot) -for(j in 1: nboot){ -mm<-m[data[j,],] -val[j,]<-smean(mm,MC=MC,cop=cop,STAND=STAND) -} -if(!MC)temp<-pdis(rbind(val,nullv),center=est) -if(MC)temp<-pdisMC(rbind(val,nullv),center=est) -sig.level<-sum(temp[nboot+1] kmax) { - warning("Attention robpca: The number of principal components k = ", k, " is larger then kmax = ", kmax, "; k is set to ", kmax,".") - k <- kmax - } - if(!missing(h) & !missing(alpha)) { - stop("Error in robpca: Both inputarguments alpha and h are provided. Only one is required.") - } - if(missing(h) & missing(alpha)) { - h <- min(floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha),n) - } - if(!missing(h) & missing(alpha)) { - alpha <- h/n - if(k==0) { - if(h < floor((n+kmax+1)/2)) { - h <- floor((n+kmax+1)/2) - alpha <- h/n - warning("Attention robpca: h should be larger than (n+kmax+1)/2. It is set to its minimum value ", h, ".") - } - } - else { - if(h < floor((n+k+1)/2)) { - h <- floor((n+k+1)/2) - alpha <- h/n - warning("Attention robpca: h should be larger than (n+k+1)/2. It is set to its minimum value ", h, ".") - } - } - if(h > n) { - alpha <- 0.75 - if(k==0) { - h <- floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha) - } - else { - h <- floor(2*floor((n+k+1)/2)-n+2*(n-floor((n+k+1)/2))*alpha) - } - warning("Attention robpca: h should be smaller than n = ", n, ". It is set to its default value ", h, ".") - } - } - if(missing(h) & !missing(alpha)) { - if(alpha < 0.5) { - alpha <- 0.5 - warning("Attention robpca: Alpha should be larger then 0.5. It is set to 0.5.") - } - if(alpha >= 1) { - alpha <- 0.75 - warning("Attention robpca: Alpha should be smaller then 1. It is set to its default value 0.75.") - - - } - if(k==0) { - h <- floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha) - } - else { - h <- floor(2*floor((n+k+1)/2)-n+2*(n-floor((n+k+1)/2))*alpha) - } - } - labsd <- floor(max(0,min(labsd,n))) - labod <- floor(max(0,min(labod,n))) - - out <- list() - - Xa <- X.svd$scores - center <- X.svd$centerofX - rot <- X.svd$loadings - p1 <- ncol(Xa) - if( (p1 <= min(floor(n/5), kmax)) & (mcd == 1 ) ) { - if(k != 0) { - k <- min(k, p1) - } - else { - k <- p1 -# cat("Message from robpca: The number of principal -# components is defined by the algorithm. It is set to ", k,".\n", sep="") - } - if(h < floor((nrow(Xa) + ncol(Xa) +1)/2)) { - h <- floor((nrow(Xa) + ncol(Xa) +1)/2) - cat("Message from robpca: The number of non-outlying observations h is set to ", h," in order to make the mcd algorithm function.\n", sep="") - } -# Xa.mcd <- cov.mcd(as.data.frame(Xa), quan=h, print=FALSE) -Xa.mcd <- cov.mcd(as.data.frame(Xa), quan=h) # R version -#print(Xa.mcd$method) -#if(length(grep("equation", Xa.mcd$method)) == 1) { -# print(Xa.mcd$method) -# stop("The ROBPCA algorithm can not deal with this -# result from the FAST-MCD algorithm. The algorithm is aborted.") -# } -#print("OUT") - Xa.mcd.svd <- svd(Xa.mcd$cov) - scores <- (Xa - matrix(data=rep(Xa.mcd$center, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=TRUE)) %*% Xa.mcd.svd$u - out$M <- center + as.vector(Xa.mcd$center %*% t(rot)) - out$L <- Xa.mcd.svd$d[1:k] -# -if(scree){ -pv=out$L -cs=pv/sum(pv) -cm=cumsum(cs) -plot(rep(c(1:ncol(x)),2),c(cs,cm),type="n",xlab=xlab,ylab=ylab) -points(c(1:ncol(x)),cs,pch="*") -lines(c(1:ncol(x)),cs,lty=1) -points(c(1:ncol(x)),cm,pch=".") -lines(c(1:ncol(x)),cm,lty=2) -} - - out$P <- X.svd$loadings %*% Xa.mcd.svd$u[,1:k] - out$T <- as.matrix(scores[,1:k]) - if(is.list(dimnames(data))) { - dimnames(out$T)[[1]] <- dimnames(data)[[1]] - } - out$h <- h - out$k <- k - out$alpha <- alpha - } - else { - directions <- choose(n,2) - ndirect <- min(250, directions) - all <- (ndirect == directions) - seed <- 0 - B <- extradir(Xa, ndirect, seed, all) - Bnorm <- vector(mode="numeric", length=nrow(B)) - Bnorm<-apply(B,1,vecnorm) - Bnormr <- Bnorm[Bnorm > 1.E-12] - B <- B[Bnorm > 1.E-12,] - A <- diag(1/Bnormr) %*% B - Y <- Xa %*% t(A) - Z <- matrix(data=0, nrow=n, ncol=length(Bnormr)) - for(i in 1:ncol(Z)) { - univ <- unimcd(Y[,i],quan = h) - if(univ$smcd < 1.E-12) { - r2 <- qr(data[univ$weights==1,])$rank - if(r2 == 1) { - stop("Error in robpca: At least ", sum(univ$weights), " observations are identical.") - } - } - else { - Z[,i] <- abs(Y[,i] - univ$tmcd) / univ$smcd - } - } - H0 <- order(apply(Z, 1, max)) - - Xh <- Xa[H0[1:h],] - Xh.svd <- classSVD(Xh) - - kmax <- min(Xh.svd$rank, kmax) - if( (k == 0) & (plots == 0) ) { - test <- which((Xh.svd$eigenvalues/Xh.svd$eigenvalues[1]) <= 1.E-3) - if(length(test) != 0) { - k <- min(min(Xh.svd$rank, test[1]), kmax) - } - else { - k <- min(Xh.svd$rank, kmax) - } - cumulative <- cumsum(Xh.svd$eigenvalues[1:k]) / sum(Xh.svd$eigenvalues) - if(cumulative[k] > 0.8) { - k <- which(cumulative >= 0.8)[1] - } - cat("Message from robpca: The number of principal components is set by the algorithm. It is set to ", k, ".\n", sep="") - } - else { - if( (k==0) & (plots != 0) ) { - loc <- 1:kmax - plot(loc, Xh.svd$eigenvalues[1:kmax], type='b', axes= FALSE, xlab="Component", ylab="Eigenvalue") - axis(2) - axis(1, at=loc) - cumv <- cumsum(Xh.svd$eigenvalues)/sum(Xh.svd$eigenvalues) - text(loc, Xh.svd$eigenvalues[1:kmax] + par("cxy")[2], as.character(signif(cumv[1:kmax], 2))) - box <- dialogbox(title="ROBPCA", controls=list(),buttons = c("OK")) - box <- dialogbox.add.control(box, where=1, statictext.control(paste("How many principal components would you like to retain?\nMaximum = ", kmax, sep=""), size=c(200,20))) - box <- dialogbox.add.control(box, where=2, editfield.control(label="Your choice:", size=c(30,10))) - input <- as.integer(dialogbox.display(box)$values$"Your choice:") - k <- max(min(min(Xh.svd$rank, input), kmax), 1) - } - else { - k <- min(min(Xh.svd$rank, k), kmax) - } - } - if(k!=X.svd$rank){ - XRc <- Xa-matrix(data=rep(Xh.svd$centerofX, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=TRUE) - Xtilde <- XRc%*%Xh.svd$loadings[,1:k]%*%t(Xh.svd$loadings[,1:k]) - Rdiff <- XRc-Xtilde - odh <- apply(Rdiff,1,vecnorm) - ms <- unimcd(odh^(2/3),h) - cutoffodh <- sqrt(qnorm(0.975,ms$tmcd,ms$smcd)^3) - indexset <- (odh<=cutoffodh) - Xh.svd <- classSVD(Xa[indexset,]) - kmax <- min(Xh.svd$rank, kmax) - } - - center <- center + Xh.svd$centerofX %*% t(rot) - rot <- rot %*% Xh.svd$loadings - Xstar<- (Xa - matrix(data=rep(Xh.svd$centerofX, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=TRUE)) %*% Xh.svd$loadings - Xstar <- as.matrix(Xstar[,1:k]) - rot <- as.matrix(rot[,1:k]) - mah <- mahalanobis(Xstar, center=rep(0, ncol(Xstar)), cov=diag(Xh.svd$eigenvalues[1:k], nrow=k)) - oldobj <- prod(Xh.svd$eigenvalues[1:k]) - niter <- 100 - for(j in 1:niter) { - mah.order <- order(mah) - Xh <- as.matrix(Xstar[mah.order[1:h],]) - Xh.svd <- classSVD(Xh) - obj <- prod(Xh.svd$eigenvalues) - Xstar <- (Xstar - matrix(data=rep(Xh.svd$centerofX, times=nrow(Xstar)), nrow=nrow(Xstar), ncol=ncol(Xstar), byrow=TRUE)) %*% Xh.svd$loadings - center <- center + Xh.svd$centerofX %*% t(rot) - rot <- rot %*% Xh.svd$loadings - mah <- mahalanobis(Xstar, center=rep(0, ncol(Xstar)), cov=diag(x=Xh.svd$eigenvalues, nrow=length(Xh.svd$eigenvalues))) - if( (Xh.svd$rank == k) & ( abs(oldobj - obj) < 1.E-12) ) { - break - } - else { - oldobj <- obj - if(Xh.svd$rank < k) { - j <- 1 - k <- Xh.svd$rank - } - } - } -Xstar.mcd <- cov.mcd(as.data.frame(Xstar), quan=h) # R version -# if(Xstar.mcd$raw.objective < obj) { - covf <- Xstar.mcd$cov - centerf <- Xstar.mcd$center -# } -# else { -# consistencyfactor <- median(mah)/qchisq(0.5,k) -# mah <- mah/consistencyfactor -# weights <- ifelse(mah <= qchisq(0.975, k), T, F) -# noMCD <- weightmecov(Xstar, weights, n, k) -# centerf <- noMCD$center -# covf <- noMCD$cov -# } - - covf.eigen <- eigen(covf) - covf.eigen.values.sort <- greatsort(covf.eigen$values) - P6 <- covf.eigen$vectors - P6 <- covf.eigen$vectors[,covf.eigen.values.sort$index] - -out$T <- (Xstar - matrix(data=rep(centerf, times=n), nrow=n, ncol=ncol(Xstar), byrow=TRUE)) %*% covf.eigen$vectors[,covf.eigen.values.sort$index] - - if(is.list(dimnames(data))) { - dimnames(out$T)[[1]] <- dimnames(data)[[1]] - } - out$P <- rot %*% covf.eigen$vectors[,covf.eigen.values.sort$index] - out$M <- as.vector(center + centerf %*% t(rot)) - out$L <- as.vector(covf.eigen$values) - out$k <- k - out$h <- h - - out$alpha <- alpha - } - oldClass(out) <- "robpca" - out <- CompRobustDist(data, X.svd$rank, out, classic) - if(classic == 1) { - out <- CompClassicDist(X.svd, out) - } - if(plots == 1) { - plot(out, classic, labod=labod, labsd=labsd) - } - return(out) -} -"greatsort"<-function(vec){ - x <- vec * (-1) - index <- order(x) - return(list(sortedvector=rev(sort(vec)), index=index)) -} -"classSVD"<-function(x){ - if(!is.matrix(x)) { - stop("The function classSVD requires input of type 'matrix'.") - } - n <- nrow(x) - p <- ncol(x) - if(n == 1) { - stop("The sample size is 1. No singular value decomposition can be performed.") - } - if(p < 5) { - tolerance <- 1E-12 - } - else { - if(p <= 8) { - tolerance <- 1E-14 - } - else { - tolerance <- 1E-16 - } - } - centerofX <- apply(x, 2, mean) - Xcentered <- scale(x, center=TRUE, scale=FALSE) - XcenteredSVD <- svd(Xcentered/sqrt(n-1)) - rank <- sum(XcenteredSVD$d > tolerance) - eigenvalues <- (XcenteredSVD$d[1:rank])^2 - loadings <- XcenteredSVD$v[,1:rank] - scores <- Xcentered %*% loadings - return(list(loadings=as.matrix(loadings), scores=as.matrix(scores), eigenvalues=as.vector(eigenvalues), rank=rank, - Xcentered=as.matrix(Xcentered), centerofX=as.vector(centerofX))) -} -"kernelEVD"<-function(x){ - if(!is.matrix(x)) { - stop("The function kernelEVD requires input of type 'matrix'.") - } - n <- nrow(x) - p <- ncol(x) - if(n > p) { - return(classSVD(x)) - } - else { - centerofX <- apply(x, 2, mean) - Xcentered <- scale(x, center=TRUE, scale=FALSE) - if(n == 1) { - stop("The sample size is 1. No singular value decomposition can be performed.") - } - eigen <- eigen(Xcentered %*% t(Xcentered)/(n-1)) - eigen.descending <- greatsort(eigen$values) - loadings <- eigen$vectors[,eigen.descending$index] - tolerance <- n * max(eigen$values) * .Machine$double.eps - rank <- sum(eigen.descending$sortedvector > tolerance) - eigenvalues <- eigen.descending$sortedvector[1:rank] - loadings <- t((Xcentered/sqrt(n-1))) %*% loadings[,1:rank] %*% diag(1/sqrt(eigenvalues), nrow=length(eigenvalues), ncol=length(eigenvalues)) - scores <- Xcentered %*% loadings - return(list(loadings=as.matrix(loadings), scores=as.matrix(scores), eigenvalues=as.vector(eigenvalues), rank=rank, - Xcentered=as.matrix(Xcentered), centerofX=as.vector(centerofX))) - } -} -"extradir"<-function(data, ndirect, seed=0, all=TRUE){ - n <- nrow(data) - p <- ncol(data) - B2 <- matrix(data=0, nrow = ndirect, ncol = p) - rowindex <- 1 - i <- 1 - if(all == T) { - while( (i < n) & (rowindex <= ndirect) ) { - j <- i + 1 - while( (j <= n) & (rowindex <= ndirect) ) { - B2[rowindex,] <- data[i,] - data[j,] - j <- j + 1 - rowindex <- rowindex + 1 - } - i <- i + 1 - } - } - else { - while(rowindex <= ndirect) { - sseed<-randomset(n,2,seed) - seed<-sseed$seed - B2[rowindex,] <- data[sseed$ranset[1],] - data[sseed$ranset[2],] - rowindex <- rowindex + 1 - } - } - return(B2) -} -"randomset"<-function(tot,nel,seed){ -out<-list() -for(j in 1:nel){ - randseed<-uniran(seed) - seed<-randseed$seed - num<-floor(randseed$random*tot)+1 - if(j > 1){ - while(any(out$ranset==num)){ - randseed<-uniran(seed) - seed<-randseed$seed - num<-floor(randseed$random*tot)+1 - - } - } - out$ranset[j]<-num - } - out$seed<-seed - return(out) -} -"uniran"<-function(seed = 0){ - out <- list() - seed<-floor(seed*5761)+999 - quot<-floor(seed/65536) - out$seed<-floor(seed)-floor(quot*65536) - out$random<-out$seed/65536 - return(out) -} -"unimcd"<-function(y,quan){ - out<-list() - ncas<-length(y) - len<-ncas-quan+1 - if(len==1){ - out$tmcd<-mean(y) - out$smcd<-sqrt(var(y)) - } - else { - ay<-c() - I<-order(y) - y<-y[I] - ay[1]<-sum(y[1:quan]) - for(samp in 2:len){ - ay[samp]<-ay[samp-1]-y[samp-1]+y[samp+quan-1] - } - ay2<-ay^2/quan - sq<-c() - sq[1]<-sum(y[1:quan]^2)-ay2[1] - for(samp in 2:len){ - sq[samp]<-sq[samp-1]-y[samp-1]^2+y[samp+quan-1]^2-ay2[samp]+ay2[samp-1] - } - sqmin<-min(sq) - Isq<-order(sq) - ndup<-sum(sq == sqmin) - ii<-Isq[1:ndup] - slutn<-c() - slutn[1:ndup]<-ay[ii] - initmean<-slutn[floor((ndup+1)/2)]/quan - initcov<-sqmin/(quan-1) - res<-(y-initmean)^2/initcov - sortres<-sort(res) - factor<-sortres[quan]/qchisq(quan/ncas,1) - initcov<-factor*initcov - res<-(y-initmean)^2/initcov - quantile<-qchisq(0.975,1) - out$weights<-(res9){ -if(pr)print("With more than 9 variables, might want to use ADJ=T") -} -if(!ADJ)flag<-outpro(x,cop=cop,STAND=STAND,plotit=FALSE)$keep -if(ADJ)flag<-outproad(x,cop=cop,SEED=SEED,STAND=STAND)$results$keep -remx<-x -temp2<-princomp(remx) -x<-x[flag,] -loc<-apply(x,2,mean) -temp<-princomp(x,cor=cor,scores=TRUE) -if(scree){ -z=temp$sdev -pv=z^2 -cs=pv/sum(pv) -cm=cumsum(cs) -plot(rep(c(1:ncol(x)),2),c(cs,cm),type="n",xlab=xlab,ylab=ylab) -points(c(1:ncol(x)),cs,pch="*") -lines(c(1:ncol(x)),cs,lty=1) -points(c(1:ncol(x)),cm,pch=".") -lines(c(1:ncol(x)),cm,lty=2) -} -if(!SCORES)temp<-summary(temp,loadings=loadings) -if(SCORES){ -if(is.null(pval)) -stop("When computing scores, specify pval, number of components") -if (!ALL)temp<-temp$scores[,1:pval] -if(ALL){ -temp<-summary(temp,loadings=TRUE) -B<-temp[2]$loadings[1:m,1:m] # Use robust loadings - z<-remx -for(i in 1:nrow(z))z[i,]<-z[i,]-loc -temp<-t(B)%*%t(z) -temp<-t(temp) -temp<-temp[,1:pval] -}} -temp -} - -mcp2a<-function(J,K,x,est=mom,con=NULL,alpha=.05,nboot=NA,grp=NA,...){ -# -# Do all pairwise comparisons of -# main effects for Factor A and B and all interactions -# - # The data are assumed to be stored in x - # in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # - if(identical(est,median))print('Warning: med2mcp is a better when using the usual sample median') - JK <- J * K - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp)) { - yy <- x - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") -mvec<-NA - tempn=0 - for(j in 1:JK) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] - mvec[j]<-est(x[[j]],...) -tempn[j]=length(x[[j]]) - } -nmax=max(tempn) - # - # Create the three contrast matrices - # - if(JK != length(x)) - warning("The number of groups does not match the number of contrast coefficients.") -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -bvec<-matrix(NA,nrow=JK,ncol=nboot) -for(j in 1:JK){ -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # J by nboot matrix, jth row contains -# bootstrapped estimates for jth group -} -outvec<-list() -if(!is.null(con))stop('Use linconm when specifying the linear contrast coefficients') -temp3<-con2way(J,K) -for(jj in 1:3){ -con<-temp3[[jj]] -con<-as.matrix(con) -ncon<-ncol(con) -# Determine critical values -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(nmax>80){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -test[d]<-sum(bcon[d,]>0)/nboot -if(test[d]> .5)test[d]<-1-test[d] -} -output<-matrix(0,ncon,6) -dimnames(output)<-list(NULL,c("con.num","psihat","sig.test","sig.crit","ci.lower","ci.upper")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -if(sum(sigvec)0){ -print("Some bootstrap estimates of the test statistic could not be computed") -print("Effective number of bootstrap samples was") -print(sum(!is.na(testb))) -} -test<-t1wayv2(x,tr=tr,grp=grp) -pval<-mean(test$TEST<=testb,na.rm=TRUE) -list(test=test$TEST,p.value=pval,Var.Explained=test$Var.Explained,Effect.Size=test$Effect.Size) -} - -cidM<-function(x,nboot=1000,alpha=.05,MC=FALSE,SEED=TRUE,g=NULL,dp=NULL){ -# -# Variation of Cliff method based on median of X-Y -# i.e., use p=P(XY)","p.hat")) -dvec<-alpha/c(1:CC) -for(j in 1:J){ -for(k in 1:J){ -if(j0)+.5*mean(MAT[,jcom]==0) -pvec[jcom]=2*min(c(p.value,1-p.value)) -if(is.na(pvec[jcom]))pvec=1 -test[jcom,1]<-j -test[jcom,2]<-k -test[jcom,3]<-pvec[jcom] -test[jcom,5:7]<-cid(x[[j]],x[[k]])$summary.dvals -test[jcom,8]<-test[jcom,5]+.5*test[jcom,6] -}}} -temp2<-order(0-test[,3]) -test[temp2,4]=dvec -list(test=test) -} - -msmedse<-function(x){ -# -# Compute standard error of the median using method -# recommended by McKean and Shrader (1984). -# -x=elimna(x) -chk=sum(duplicated(x)) -if(chk>0){ -print("WARNING: tied values detected.") -print("Estimate of standard error might be highly inaccurate, even with n large") -} -y<-sort(x) -n<-length(x) -av<-round((n+1)/2-qnorm(.995)*sqrt(n/4)) -if(av==0)av<-1 -top<-n-av+1 -sqse<-((y[top]-y[av])/(2*qnorm(.995)))^2 -sqse<-sqrt(sqse) -sqse -} - - - - -t1waybtv2<-function(x,tr=.2,grp=NA,g=NULL,dp=NULL,nboot=599,SEED=TRUE){ -# -# Test the hypothesis of equal trimmed mdeans, corresponding to J independent -# groups, using a bootstrap-t method. -# -# The data are assumed to be stored in x in list mode -# or in a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, columns correspond to groups. -# -# grp is used to specify some subset of the groups, if desired. -# By default, all J groups are used. -# g=NULL, x is assumed to be a matrix or have list mode -# -# if g is specifed, it is assumed that column g of x is -# a factor variable and that the dependent variable of interest is in column -# dp of x, which can be a matrix or data frame. -# -# The default number of bootstrap samples is nboot=599 -# -if(!is.null(g)){ -if(is.null(dp))stop("Specify a value for dp, the column containing the data") -x=fac2list(x[,dp],x[,g]) -} -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -if(is.na(grp[1]))grp<-c(1:length(x)) -J<-length(grp) -nval=NA -x=lapply(x,elimna) -nval=lapply(x,length) -xbar=lapply(x,mean,tr=tr) -bvec<-array(0,c(J,2,nboot)) -hval<-vector("numeric",J) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -hval[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) - # hval is the number of observations in the jth group after trimming. -print(paste("Working on group ",grp[j])) -xcen<-x[[grp[j]]]-mean(x[[grp[j]]],tr) -data<-matrix(sample(xcen,size=length(x[[grp[j]]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row -# contains the bootstrap trimmed means, the second row -# contains the bootstrap squared standard errors. -} -m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means -m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq standard errors -wvec<-1/m2 # J by nboot matrix of w values -uval<-apply(wvec,2,sum) # Vector having length nboot -blob<-wvec*m1 -xtil<-apply(blob,2,sum)/uval # nboot vector of xtil values -blob1<-matrix(0,J,nboot) -for (j in 1:J)blob1[j,]<-wvec[j,]*(m1[j,]-xtil)^2 -avec<-apply(blob1,2,sum)/(length(x)-1) -blob2<-(1-wvec/uval)^2/(hval-1) -cvec<-apply(blob2,2,sum) -cvec<-2*(length(x)-2)*cvec/(length(x)^2-1) -testb<-avec/(cvec+1) -# A vector of length nboot containing bootstrap test values -ct<-sum(is.na(testb)) -if(ct>0)print("Some bootstrap estimates of the test statistic could not be computed") -test<-t1way(x,tr=tr,grp=grp) -pval<-sum(test$TEST<=testb)/nboot -# -# Determine explanatory effect size -# -e.pow=t1wayv2(x)$Var.Explained -list(test=test$TEST,p.value=pval,Explanatory.Power=e.pow, -Effect.Size=sqrt(e.pow)) -} - - - - -t2wayv2<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K,g=NULL,dp=NULL,pr=TRUE){ -# Perform a J by K (two-way) anova on trimmed means where -# all groups are independent. -# -# The R variable data is assumed to contain the raw -# data stored in list mode, or a matrix with columns -# corresponding to groups. If stored in list mode, data[[1]] contains the data -# for the first level of all three factors: level 1,1,. -# data[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second factor: level 1,2 -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JK, the total number of -# groups being tested. -# -# g=NULL, x is assumed to be a matrix or have list mode -# -# if g is specifed, it is assumed that column g of x is -# a factor variable and that the dependent variable of interest is in column -# dp of x, which can be a matrix or data frame. -# -if(!is.null(g[1])){ -if(length(g)!=2)stop("Argument g should have two values") -if(is.null(dp[1])) -stop("Specify a value for dp, the column containing the data") -data=fac2list(data[,dp],data[,g]) -} -if(is.matrix(data))data=listm(data) -if(!is.list(data))stop("Data are not stored in list mode") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in data is") -print(length(data)) -print("Warning: These two values are not equal") -} -tmeans<-0 -h<-0 -v<-0 -for (i in 1:p){ -data[[grp[i]]]=elimna(data[[grp[i]]]) -tmeans[i]<-mean(data[[grp[i]]],tr) -h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) -# h is the effective sample size - if(winvar(data[[grp[i]]],tr)==0)print(paste('The Winsorized variance is zero for group',i)) -v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1)) -# v contains the squared standard errors -} -v<-diag(v,p,p) # Put squared standard errors in a diag matrix. -ij<-matrix(c(rep(1,J)),1,J) -ik<-matrix(c(rep(1,K)),1,K) -jm1<-J-1 -cj<-diag(1,jm1,J) -for (i in 1:jm1)cj[i,i+1]<-0-1 -km1<-K-1 -ck<-diag(1,km1,K) -for (i in 1:km1)ck[i,i+1]<-0-1 -# Do test for factor A -#cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A -cmat<-kron(cj,ik) # Contrast matrix for factor A -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -Qa<-johan(cmat,tmeans,v,h,alval[i]) -if(Qa$teststat>Qa$crit)break -} -A.p.value=irem/1000 -# Do test for factor B -cmat<-kron(ij,ck) # Contrast matrix for factor B -for(i in 1:999){ -irem<-i -Qb<-johan(cmat,tmeans,v,h,alval[i]) -if(Qb$teststat>Qb$crit)break -} -B.p.value=irem/1000 -# Do test for factor A by B interaction -cmat<-kron(cj,ck) # Contrast matrix for factor A by B -for(i in 1:999){ -irem<-i -Qab<-johan(cmat,tmeans,v,h,alval[i]) -if(Qab$teststat>Qab$crit)break -} -AB.p.value=irem/1000 -tmeans=matrix(tmeans,J,K,byrow=TRUE) -list(Qa=Qa$teststat,A.p.value=A.p.value, -Qb=Qb$teststat,B.p.value=B.p.value, -Qab=Qab$teststat,AB.p.value=AB.p.value,means=tmeans) -} - - - -lpindt<-function(x,y,nboot=500,xout=FALSE,outfun=out){ -# -# Test the hypothesis of no association based on the fit obtained -# from lplot (Cleveland's LOESS) -# -m<-elimna(cbind(x,y)) -x<-as.matrix(x) -p<-ncol(x) -pp<-p+1 -x<-m[,1:p] -y<-m[,pp] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,pp] -} -n=length(y) -data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -val=NA -x=as.matrix(x) -for(i in 1:nboot){ -val[i]=lplot(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE)$Strength.Assoc -} -val=sort(val) -est=lplot(x,y,plotit=FALSE,pr=FALSE)$Strength.Assoc -p.value=mean((est4)stop("x should have at most four columns of data") -m<-elimna(cbind(x,y)) -if(xout && eout)stop("Can't have xout=eout=T") -if(eout){ -flag<-outfun(m)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -} -x<-m[,1:np] -x=as.matrix(x) -y<-m[,np1] -if(!sop){ -if(ncol(x)==1)fitr<-fitted(gam(y~x[,1])) -if(ncol(x)==2)fitr<-fitted(gam(y~x[,1]+x[,2])) -if(ncol(x)==3)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3])) -if(ncol(x)==4)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3]+x[,4])) -} -if(sop){ -if(ncol(x)==1)fitr<-fitted(gam(y~s(x[,1]))) -if(ncol(x)==2)fitr<-fitted(gam(y~s(x[,1])+s(x[,2]))) -if(ncol(x)==3)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3]))) -if(ncol(x)==4)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3])+s(x[,4]))) -} -last<-fitr -if(plotit){ -if(ncol(x)==1){ -plot(x,fitr,xlab=xlab,ylab=ylab) -} -if(ncol(x)==2){ -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the S-PLUS function interp -mkeep<-x[iout>=1,] -fitr<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fitr,theta=theta,phi=phi,expand=expand,xlab="x1",ylab="x2",zlab="", -scale=scale,ticktype=ticktype) -} -} -top=varfun(last) -ep=top/varfun(y) -if(ep>=1)ep=cor.fun(last,y)$cor^2 -eta=sqrt(ep) -st.adj=NULL -e.adj=NULL -if(ADJ){ -x=as.matrix(x) -val=NA -n=length(y) -data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(i in 1:nboot){ -temp=gamplotv2.sub(x[data1[i,],],y[data2[i,]],plotit=FALSE) -val[i]=temp$Explanatory.power -} -vindt=median(val) -v2indt=median(sqrt(val)) -st.adj=(sqrt(ep)-max(c(0,v2indt)))/(1-max(c(0,v2indt))) -e.adj=(ep-max(c(0,vindt)))/(1-max(c(0,vindt))) -st.adj=max(c(0,st.adj)) -e.adj=max(c(0,e.adj)) -} -eta=as.matrix(eta) -ep=as.matrix(ep) -dimnames(eta)=NULL -dimnames(ep)=NULL -eta=eta[1] -ep=ep[1] -list(Strength.Assoc=eta,Explanatory.power=ep, -Strength.Adj=st.adj,Explanatory.Adj=e.adj) -} - -cidmul<-function(x,alpha=.05,g=NULL,dp=NULL,pr=TRUE){ -# -# Perform Cliff's method for all pairs of J independent groups. -# Unlike the function meemul, ties are allowed. -# The familywise type I error probability is controlled by using -# a critical value from the Studentized maximum modulus distribution. -# -# The data are assumed to be stored in $x$ in list mode. -# Length(x) is assumed to correspond to the total number of groups, J. -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# The default value for alpha is .05. Any other value results in using -# alpha=.01. -# -if(pr)print('cidmulv2 might provide better power') -if(!is.null(g)){ -if(is.null(dp))stop("Specify a value for dp, the column containing the data") -x=fac2list(x[,dp],x[,g]) -} -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -J<-length(x) -CC<-(J^2-J)/2 -test<-matrix(NA,CC,7) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -} -dimnames(test)<-list(NULL,c("Group","Group","d","ci.lower","ci.upper", -"p.hat","p-value")) -jcom<-0 -crit<-smmcrit(200,CC) -if(alpha!=.05)crit<-smmcrit01(200,CC) -alpha<-1-pnorm(crit) -n=matl(lapply(x,length)) -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -temp<-cid(x[[j]],x[[k]],alpha,plotit=FALSE) -temp2<-cidv2(x[[j]],x[[k]],alpha,plotit=FALSE) -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-k -test[jcom,3]<-temp$d -test[jcom,4]<-temp$cl -test[jcom,5]<-temp$cu -test[jcom,6]<-temp$phat -test[jcom,7]<-temp2$p.value -}}} -list(n=n,test=test) -} - - cidmulv2<-function(x,alpha=.05,g=NULL,dp=NULL,CI.FWE=FALSE){ -# -# Perform Cliff's method for all pairs of J independent groups. -# The familywise type I error probability is controlled via -# Hochberg's method. -# -# The data are assumed to be stored in $x$ in list mode or in a -# matrix with J columns, columns corresponding to groups. -# -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# g=NULL, x is assumed to be a matrix or have list mode -# if g is specified, it is assumed that column g of x is -# a factor variable and that the dependent variable of interest is in column -# dp of x, which can be a matrix or data frame. -# -if(!is.null(g)){ -if(is.null(dp))stop("Specify a value for dp, the column containing the data") -x=fac2list(x[,dp],x[,g]) -} -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -J<-length(x) -CC<-(J^2-J)/2 -test<-matrix(NA,CC,7) -c.sum=matrix(NA,CC,5) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -} -dimnames(test)<-list(NULL,c("Group","Group","p.hat","p.ci.lower", -"p.ci.uppper","p-value","p.crit")) -dvec<-alpha/c(1:CC) -dimnames(c.sum)<-list(NULL,c("Group","Group","P(XY)")) -jcom<-0 -n=matl(lapply(x,length)) -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -temp<-cidv2(x[[j]],x[[k]],alpha,plotit=FALSE) -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-k -c.sum[jcom,1]<-j -c.sum[jcom,2]<-k -c.sum[jcom,3:5]=cid(x[[j]],x[[k]])$summary.dvals -test[jcom,3]<-temp$p.hat -test[jcom,4]<-temp$p.ci[1] -test[jcom,5]<-temp$p.ci[2] -test[jcom,6]<-temp$p.value -}}} -temp2<-order(0-test[,6]) -test[temp2,7]=dvec -if(CI.FWE){ -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -temp<-cidv2(x[[j]],x[[k]],alpha=test[jcom,7],plotit=FALSE) -test[jcom,4]<-temp$p.ci[1] -test[jcom,5]<-temp$p.ci[2] -}}}} -list(n=n,test=test,summary.dvals=c.sum) -} - -cidmcp=cidmulv2 - -fac2list<-function(x,g,pr=TRUE){ -# -# data are stored in x -# information about the level of the value in x is stored in g, -# which can be a matrix with up to 4 columns -# -# sort the data in x into groups based on values in g. -# store results in list mode. -# -# Example: fac2list(m[,2],m[,4]) would sort the values -# in column 2 of m according to the values in column 4 of m -# -g=as.data.frame(g) -ng=ncol(g)+1 -xg=cbind(x,g) -xg=elimna(xg) -x=xg[,1] -x=as.matrix(x) -g=xg[,2:ng] -g=as.data.frame(g) -L=ncol(g) -g=listm(g) -for(j in 1:L)g[[j]]=as.factor(g[[j]]) -g=matl(g) -Lp1=L+1 -if(L>4)stop("Can have at most 4 factors") -if(L==1){ -res=selby(cbind(x,g),2,1) -group.id=res$grpn -res=res$x -} -if(L>1){ -res=selby2(cbind(x,g),c(2:Lp1),1) -group.id=res$grpn -res=res$x -} -if(pr) -{print("Group Levels:") -print(group.id) -} -res=lapply(res,as.numeric) -res -} - -MMreg<-function(x,y,RES=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,varfun=pbvar,corfun=pbcor,WARN=FALSE,...){ -# -# Compute MM regression estimate derived by Yohai (1987) -# simply by calling the R function lmrob -# This function will remove leverage points when -# xout=T -# using the outlier detection method indicated by -# outfun, which defaults to the projection method. -# -library('robustbase') -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else flag<-outpro(x,STAND=STAND,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(!WARN)options(warn=-1) -temp=lmrob(y~x) -if(!WARN)options(warn=0) -coef=temp$coefficients -p1=ncol(x)+1 -res<-y-x%*%coef[2:p1]-coef[1] -yhat<-y-res -stre=NULL -e.pow<-varfun(yhat)/varfun(y) -if(!is.na(e.pow)){ -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -e.pow=as.numeric(e.pow) -stre=sqrt(e.pow) -} -if(!RES)res=NULL -list(coef=coef,residuals=res,Strength.Assoc=stre) -} - -ks<-function(x,y,w=FALSE,sig=TRUE,alpha=.05){ -# Compute the Kolmogorov-Smirnov test statistic -# -# w=T computes the weighted version instead. -# -# sig=T indicates that the exact level is to be computed. -# If there are ties, the reported Type I error probability is exact when -# using the unweighted test, but for the weighted test the reported -# level is too high. -# -# This function uses the functions ecdf, kstiesig, kssig and kswsig -# -# This function returns the value of the test statistic, the approximate .05 -# critical value, and the exact level if sig=T. -# -# Missing values are automatically removed -# -x<-x[!is.na(x)] -y<-y[!is.na(y)] -n1 <- length(x) -n2 <- length(y) -w<-as.logical(w) -sig<-as.logical(sig) -tie<-logical(1) -siglevel<-NA -z<-sort(c(x,y)) # Pool and sort the observations -tie=FALSE -chk=sum(duplicated(x,y)) -if(chk>0)tie=TRUE -v<-1 # Initializes v -for (i in 1:length(z))v[i]<-abs(ecdf(x,z[i])-ecdf(y,z[i])) -ks<-max(v) -if(!tie)crit=ks.crit(n1=n1,n2=n2,alpha=alpha) -else crit=ksties.crit(x,y,alpha=alpha) -if(!w && sig && !tie)siglevel<-kssig(length(x),length(y),ks) -if(!w && sig && tie)siglevel<-kstiesig(x,y,ks) -if(w){ -crit=ksw.crit(length(x),length(y),alpha=alpha) -for (i in 1:length(z)){ -temp<-(length(x)*ecdf(x,z[i])+length(y)*ecdf(y,z[i]))/length(z) -temp<-temp*(1.-temp) -v[i]<-v[i]/sqrt(temp) -} -v<-v[!is.na(v)] -ks<-max(v)*sqrt(length(x)*length(y)/length(z)) -if(sig)siglevel<-kswsig(length(x),length(y),ks) -if(tie && sig) -warning(paste("Ties were detected. The reported significance level of the -weighted Kolmogorov-Smirnov test statistic is not exact.")) -} -list(test=ks,critval=crit,p.value=siglevel) -} - -ks.crit<-function(n1,n2,alpha=.05){ -# -# Compute a critical value so that probability coverage is approximately -# 1-alpha -# -START=sqrt(0-log(alpha/2)*(n1+n2)/(2*n1*n2)) -crit=optim(START,ks.sub,n1=n1,n2=n2,alpha=alpha,lower=.001,upper=.86,method='Brent')$par -crit -} - -ks.sub<-function(crit,n1,n2,alpha){ -v=kssig(n1,n2,crit) -dif=abs(alpha-v) -dif -} - - -ksw.crit<-function(n1,n2,alpha=.05){ -# -# Compute a critical value so that probability coverage is -# >= 1-alpha while being close as possible to 1-alpha -# -if(alpha>.1)stop('The function assumes alpha is at least .1') -crit=2.4 -del=.05 -pc=.12 -while(pc>alpha){ -crit=crit+.05 -pc=kswsig(n1,n2,crit) -} -crit -} - - -bbw2list<-function(x,grp.col,lev.col,pr=TRUE){ -# -# for a between-by-between-by-within design -# grp.col indicates the columns where values of the levels of between factor -# are stored. -# lev.col indicates the columns where repeated measures are contained. -# If, for example, there are data for three times, stored in columns -# 6, 8 and 11, set -# lev.col=c(6,8,11) -# -# Example: Have a 3 x 4 x 2 design -# values in columns 2 and 4 indicate the -# levels of the two between factors. -# column 3 contains time 1 data, -# column 7 contains time 2 data -# bbw2list(x,(c(2,4),c(3,7)) will store data in list mode that can be -# used by bbwtrim and related functions -# -res=selbybbw(x,grp.col,lev.col,pr=pr) -res -} - - -selbybbw<-function(m,grpc,coln,pr=TRUE){ -# -# For a between by-between-by-within design, -# a commmon situation is to have data stored in an n by p matrix where -# two column indicate a group identification numbers (levels) -# for the between factors, -# and two or more other columns contain the within group results. -# -# This function is used by bbw2list to store the data in list mode so -# that the R function bbwtrim can be use. -# -# m is a matrix containing the data. One column contains group -# identification values -# and two or more other columns contain repeated measures. -# -# This function groups all values in the columns -# indicated by coln according to the -# group numbers in column grpc and stores the results in list mode. -# -# So if grpc[1] has J values, grpc[2] has K values, -# and coln indicates L columns, -# this function returns the data stored in list mode have length JKL -# -# Example: -# y<-selbybbw(blob,c(2,3),c(7,9,11))$x -# will look for group numbers in col 2 and 3 of the matrix blob, -# which indicate levels for the between factors, -# and it assumes that times 1, 2 and 3 are stored in col 7, 9, and 11. -# -# Result: the data will now be stored in y having list mode. -# -#if(!is.matrix(m))stop("Data must be stored in a matrix") -if(is.na(grpc[1]))stop("The argument grpc is not specified") -if(is.na(coln[1]))stop("The argument coln is not specified") -if(length(grpc)!=2)stop("The argument grpc must have length 2") -mm=m -m<-as.data.frame(elimna(mm)) -x<-list() -grp1<-sort(unique(m[,grpc[1]])) -grp2<-sort(unique(m[,grpc[2]])) -if(pr){ -print("Levels for first factor:") -print(grp1) -print("Levels for second factor:") -print(grp2) -} -J<-length(grp1) -K<-length(grp2) -L<-length(coln) -JKL<-J*K*L -itt<-0 -it=0 -mm=as.matrix(m[,coln]) -gmat=matrix(NA,ncol=2,nrow=J*K) -for (ig1 in 1:length(grp1)){ -for (ig2 in 1:length(grp2)){ -itt=itt+1 -gmat[itt,]=c(grp1[ig1],grp2[ig2]) -for (ic in 1:length(coln)){ -it<-it+1 -flag<-(m[,grpc[1]]==grp1[ig1])*(m[,grpc[2]]==grp2[ig2]) -flag=as.logical(flag) -x[[it]]<-as.numeric(mm[flag,ic]) -}}} -x -} - -selbybw<-function(m,grpc,coln){ -# -# For a between by within design, -# a commmon situation is to have data stored in an n by p matrix where -# a column is a group identification number -# and the remaining columns are the within group results. -# -# m is a matrix containing the data. One column contains group -# identification values -# and two or more other columns contain repeated measures. -# -# This function groups all values in the columns -# indicated by coln according to the -# group numbers in column grpc and stores the results in list mode. -# -# So if grpc has J values, and coln indicates K columns, -# this function returns the data stored in list mode have length JK -# -# Example: y<-selbybw(blob,3,c(4,6,7))$x -# will look for group numbers in col 3 of the matrix blob, -# and it assumes within -# group data are stored in col 4, 6 and 7. -# Result: the data will now be stored in y having list mode -# - -#if(!is.matrix(m))stop("Data must be stored in a matrix") -if(is.na(grpc[1]))stop("The argument grpc is not specified") -if(is.na(coln[1]))stop("The argument coln is not specified") -if(length(grpc)!=1)stop("The argument grpc must have length 1") -x<-list() -m=m[,c(grpc,coln)] -m<-as.data.frame(elimna(m)) -grpn<-sort(unique(m[,1])) -J<-length(grpn) -K<-length(coln) -JK<-J*K -it<-0 -mm=as.data.frame(m[,2:ncol(m)]) -for (ig in 1:length(grpn)){ -for (ic in 1:length(coln)){ -it<-it+1 -flag<-(m[,1]==grpn[ig]) -x[[it]]<-as.numeric(mm[flag,ic]) -}} -list(x=x,grpn=grpn) -} - -bw2list<-function(x,between.col,within.col,grp.col=between.col,lev.col=within.col,pr=TRUE){ -# -# for a between by within design -# grp.col is column indicating levels of between factor. -# lev.col indicates the columns where repeated measures are contained -# -# Example: column 2 contains information on levels of between factor -# have a 3 by 2 design, column 3 contains time 1 data, -# column 7 contains time 2 -# bw2list(x,2,c(3,7)) will store data in list mode that can be -# used by rmanova and related functions -# -res=selbybw(x,grp.col,lev.col) -if(pr){ -print("Levels for between factor:") -print(unique(x[,grp.col])) -} -res$x -} - - -rmc2list<-function(x,grp.col,lev.col,pr=TRUE){ -# -# for a between by within design -# grp.col is column indicating levels of between factor. -# lev.col indicates the columns where repeated measures are contained -# -# Example: column 2 contains information on levels of between factor -# have a 3 by 2 design, column 3 contains time 1 data, -# column 7 contains time 2 -# rmc2list(x,2,c(3,7)) will store data in list mode that can be -# bw2list(x,2,c(3,7)) also can be used. -# used by rmanova and related functions -# -res=selbybw(x,grp.col,lev.col) -if(pr){ -print("Levels for between factor:") -print(unique(x[,grp.col])) -} -res$x -} - - -wlogregci<-function(x,y,nboot=400,alpha=.05,SEED=TRUE,MC=FALSE, -xlab="Predictor 1",ylab="Predictor 2",xout=FALSE,outfun=out,...){ -# -# Compute a confidence interval for each of the parameters of -# a log linear model based on a robust estimator -# -# The predictor values are assumed to be in the n by p matrix x. -# - -if(MC)library(parallel) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -n=length(y) -data<-matrix(sample(n,size=length(y)*nboot,replace=TRUE),nrow=n,ncol=nboot) -data=listm(data) -if(MC)bvec<-mclapply(data,wlogreg.sub,x,y,mc.preschedule=TRUE) -if(!MC)bvec<-lapply(data,wlogreg.sub,x,y) -bvec=matl(bvec) -# -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -x=as.matrix(x) -p1<-ncol(x)+1 -regci<-matrix(0,p1,3) -VAL<-c("intercept",rep("X",ncol(x))) -dimnames(regci)<-list(VAL,c("Est.","ci.low","ci.up")) -se<-NA -sig.level<-NA -for(i in 1:p1){ -bna=elimna(bvec[i,]) -nbn=length(bna) -ilow<-round((alpha/2) * nbn) -ihi<-nbn - ilow -ilow<-ilow+1 -temp<-mean(bna<0) -sig.level[i]<-2*(min(temp,1-temp)) -bna<-sort(bna) -regci[i,2]<-bna[ilow] -regci[i,3]<-bna[ihi] -se[i]<-sqrt(var(elimna(bvec[i,]))) -} -regci[,1]=wlogreg(x,y)$coef -list(conf.interval=regci,p.values=sig.level,se=se) -} -wlogreg.sub<-function(data,x,y){ -x=as.matrix(x) -vals=wlogreg(x[data,],y[data])$coef -} - - - -# original version of logreg.plot is stored in logreg_plot_orig_chk.tex - - -logreg.plot<-function(x,y,MLE=TRUE,ROB=FALSE,xlab=NULL,ylab=NULL,zlab='P(Z=1)',xout=FALSE,outfun=outpro, -theta=50,phi=25,duplicate="error",LP=TRUE,Lspan=.75,pyhat=FALSE,LABELS=FALSE, -WARN=FALSE,BY=TRUE, -expand=.5,scale=TRUE,fr=2,ticktype="simple",pr=TRUE,...){ -# -# For one predictor, plot logistic regression line -# -# if x is a matrix with more than one column, plot is based on data in -# in column 1. -# -# MLE=T, will plot usual maximum likelihood estimate using a solid line -# ROB=T, will plot robust estimate, which is indicated by a -# dashed line. -# -library(robustbase) -xy=cbind(x,y) -xy=elimna(xy) -p1=ncol(xy) -if(p1>3)stop('Only one or two independent variables can be used') -if(!xout){ -if(pr)print('Suggest also looking at result using xout=TRUE') -} -p=p1-1 -x=xy[,1:p] -x=as.matrix(x) -y=xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -if(p==1){ -if(is.null(ylab))ylab='P(Y=1|X)' -if(is.matrix(x))x=x[,1] -xord=order(x) -xx=x[xord] -yy=y[xord] -est1=logreg(xx,yy)[1:2,1] -if(is.null(xlab))v='X' -if(is.null(ylab))ylab='P(Y=1|X)' -if(LABELS)v=labels(x)[[2]] -if(MLE){ -plot(xx,yy,xlab=v[1],ylab=ylab) -phat=logreg.pred(xx,yy,xx) -lines(xx,phat) -} -if(ROB){ -if(!WARN)options(warn=-1) -if(!BY)est2=wlogreg(xx,yy)$coef[1:2] -if(BY)est2=BYlogreg(xx,yy)$coef[1:2] -phat2=exp(est2[1]+est2[2]*xx)/(1+exp(est2[1]+est2[2]*xx)) -lines(xx,phat2,lty=2) -phat=cbind(xx,phat2) -dimnames(phat)=list(NULL,c(v,'Y.hat')) -if(!WARN)options(warn=0) -} -} -if(p==2){ -library(akima) -fitr=logreg.pred(x,y,x) -if(is.null(xlab))v='X' -if(is.null(ylab))v[2]='Y' -if(LABELS)v=labels(x)[[2]] -if(LP)lplot(x,fitr,xlab=v[1],ylab=v[2],zlab=xlab,z=zlab,ticktype=ticktype,theta=theta,phi=phi,pr=FALSE) -phat=cbind(x,fitr) -dimnames(phat)=list(NULL,c(v,'Y.hat')) -} -if(!pyhat)phat<-"Done" -phat -} - - -logreg.P.ci<-function(x,y,alpha=.05,plotit=TRUE, -xlab='X',ylab='P(Y=1|X)',xout=FALSE,outfun=outpro,...){ -# -# Assuming the logistic regression model provides an adequate fit, -# compute a confidence interval for P(Y=1|X) for each value stored in x. -# -xx<-elimna(cbind(x,y)) -x<-xx[,1] -y<-xx[,2] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1] -y<-m[,2] -} -if(length(unique(y))>2)stop('y should be binary') -# Next convert y to 0 and 1s -n=length(y) -yy=rep(0,n) -y=as.vector(y) -flag=y==max(y) -yy[flag]=1 -y=yy - -xord=order(x) -x=x[xord] -y=y[xord] -mod1 = glm(y ~ x, family=binomial(link='logit')) -v=predict(mod1,se.fit=TRUE) -top=v$fit+qnorm(1-alpha/2)*v$se.fit -bot=v$fit-qnorm(1-alpha/2)*v$se.fit -p=exp(v$fit)/(1+exp(v$fit)) -top=exp(top)/(1+exp(top)) -bot=exp(bot)/(1+exp(bot)) -est=cbind(x,p,bot,top) -dimnames(est)=list(NULL,c('X','est.p','ci.low','ci,up')) -if(plotit){ -plot(c(x,x,x),c(top,bot,p),ylim=c(0,1),type='n',xlab=xlab,ylab=ylab) -lines(x,p) -lines(x,bot,lty=2) -lines(x,top,lty=2) -} -list(Strength.Assoc=sd(p)/sd(y),output=est) -} - - -medpb2<-function(x,y=NULL,alpha=.05,nboot=2000,SEED=TRUE){ -# -# Compare 2 independent groups using medians. -# -# A percentile bootstrap method is used, which performs well when -# there are tied values. -# -# The data are assumed to be stored in x and y. If y=NULL, x is assumed to have two columns. -# -# Missing values are automatically removed. -# -if(is.null(y)){ -if(is.matrix(x) || is.data.frame(x)){ -y=x[,2] -x=x[,1] -} -if(is.list(x)){ -y=x[[2]] -x=x[[1]] -} -} -x=elimna(x) -y=elimna(y) -xx<-list() -xx[[1]]<-x -xx[[2]]<-y -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -est1=median(xx[[1]]) -est2=median(xx[[2]]) -est.dif<-median(xx[[1]])-median(xx[[2]]) -crit<-alpha/2 -temp<-round(crit*nboot) -icl<-temp+1 -icu<-nboot-temp -bvec<-matrix(NA,nrow=2,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(j in 1:2){ -data<-matrix(sample(xx[[j]],size=length(xx[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,median) # Bootstrapped medians for jth group -} -top<-bvec[1,]-bvec[2,] -test<-sum(top<0)/nboot+.5*sum(top==0)/nboot -if(test > .5)test<-1-test -top<-sort(top) -ci<-NA -ci[1]<-top[icl] -ci[2]<-top[icu] -list(n1=length(x),n2=length(y),p.value=2*test,ci=ci,est1=est1,est2=est2, -est.dif=est.dif) -} -m2ci<-function(x,y,alpha=.05,nboot=1000,bend=1.28,os=FALSE){ -# -# Compute a bootstrap, .95 confidence interval for the -# the difference between two independent -# M-estimator of location based on Huber's Psi. -# The default percentage bend is bend=1.28 -# The default number of bootstrap samples is nboot=399 -# -# By default, the fully iterated M-estimator is used. To use the -# one-step M-estimator instead, set os=T -# -os<-as.logical(os) -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -if(length(x)<=19 || length(y)<=19) -warning(paste("The number of observations in at least one group -is less than 20. This function might fail due to division by zero, -which in turn causes an error in function hpsi having to do with -a missing value.")) -set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -if(!os){ -bvecx<-apply(datax,1,mest,bend) -bvecy<-apply(datay,1,mest,bend) -} -if(os){ -bvecx<-apply(datax,1,onestep,bend) -bvecy<-apply(datay,1,onestep,bend) -} -bvec<-sort(bvecx-bvecy) -test<-sum(bvec<0)/nboot+.5*sum(bvec==0)/nboot -pv=2*min(c(test,1-test)) -low<-round((alpha/2)*nboot) -up<-round((1-alpha/2)*nboot) -se<-sqrt(var(bvec)) -list(ci=c(bvec[low],bvec[up]),se=se,p.value=pv) -} - -qsplit<-function(x,y,split.val=NULL){ -# -# x assumed to be a matrix or data frame -# -# IF split.val=NULL, -# -# split the data in x into 3 groups: -# those for which y <= lower quartile -# those between lower and upper quartile -# those >= upper quartile -# -# IF split.val CONTAINS TWO VALUES, SPLIT THE DATA ACCORDING TO -# THE VALUES SPECIFIED. -# -if(!is.data.frame(x))x=as.matrix(x) -if(is.null(split.val)){ -v=idealf(y) -flag1=(y<=v$ql) -flag2=(y>=v$qu) -} -if(!is.null(split.val)){ -flag1=(y<=split.val[1]) -flag2=(y>=split.val[2]) -} -flag3=as.logical(as.numeric(!flag1)*as.numeric(!flag2)) -d1=x[flag1,] -d2=x[flag2,] -d3=x[flag3,] -list(lower=d1,middle=d3,upper=d2) -} -cohen2xi<-function(delta){ -xi=sqrt((2*delta^2)/(4+delta^2)) -xi -} -xi2cohen<-function(xi){ -delta=sqrt((4*xi^2)/(2-xi^2)) -delta -} - -cid<-function(x,y,alpha=.05,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab=""){ -# -# For two independent groups, -# compute a confidence interval for P(XY)-P(X10^6)stop('Use bmp with a large sample size. If using rimul, use ribmp instead') -m<-outer(x,y,FUN="-") -msave<-m -m<-sign(m) -d<-mean(m) -phat<-(1-d)/2 -flag=TRUE -if(phat==0 || phat==1)flag=FALSE -q0<-sum(msave==0)/length(msave) -qxly<-sum(msave<0)/length(msave) -qxgy<-sum(msave>0)/length(msave) -c.sum<-matrix(c(qxly,q0,qxgy),nrow=1,ncol=3) -dimnames(c.sum)<-list(NULL,c("P(XY)")) -if(flag){ -sigdih<-sum((m-d)^2)/(length(x)*length(y)-1) -di<-NA -for (i in 1:length(x))di[i]<-sum(x[i]>y)/length(y)-sum(x[i]x)/length(x)-sum(y[i]2500){ -print("Product of sample sizes exceeds 2500.") -print("Execution time might be high when using pop=0 or 1") -print("If this is case, might consider changing the argument pop") -}} -if(pop==0)akerd(as.vector(msave),xlab=xlab,ylab=ylab) -if(pop==1)rdplot(as.vector(msave),fr=fr,xlab=xlab,ylab=ylab) -if(pop==2)kdplot(as.vector(msave),rval=rval,xlab=xlab,ylab=ylab) -if(pop==3)boxplot(as.vector(msave)) -if(pop==4)stem(as.vector(msave)) -if(pop==5)hist(as.vector(msave),xlab=xlab) -if(pop==6)skerd(as.vector(msave)) -} -if(flag)pci=c((1-cu)/2,(1-cl)/2) -if(!flag){ -pci=bci$ci -cl=1-2*pci[2] -cu=1-2*pci[1] -} -list(n1=length(x),n2=length(y),cl=cl,cu=cu,d=d,sqse.d=sh,phat=phat,summary.dvals=c.sum,ci.p=pci) -} - - -cidv2<-function(x,y,alpha=.05,plotit=FALSE,pop=0,fr=.8,rval=15,xlab='',ylab=''){ -# -# p-value for Cliff's analog of WMW test -# -# To compare the lower and upper quantiles of the distribution of D=X-Y, -# use cbmhd. -# -if(length(x)*length(y)>10^6)stop('Use bmp with a large sample size.') -nullval<-0 -ci<-cid(x,y,alpha=alpha,plotit=plotit,pop=pop,fr=fr,rval=rval) -FLAG=TRUE -if(ci$phat==0 || ci$phat==1)FLAG=FALSE -if(FLAG){ -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-cid(x,y,alpha=alph[i],plotit=FALSE) -if(chkit[[3]]>nullval || chkit[[4]]nullval || chkit[[4]]nullval || chkit[[4]]Y)')) -if(!flag){ -nm=max(c(length(x),length(y))) -if(phat==1)A=binomcipv(nm,nm,alpha=alpha) -if(phat==0)A=binomcipv(0,nm,alpha=alpha) -ci.p=A$ci -sig=A$p.value -} - -if(plotit){ -msave<-outer(x,y,FUN='-') -if(pop==0){ -if(length(x)*length(y)>2500){ -print('Product of sample sizes exceeds 2500.') -print('Execution time might be high when plotting and when using pop=1') -print('If this is case, might consider changing the argument pop or using plotit=F') -} -akerd(as.vector(msave),fr=fr) -} -if(pop==1)rdplot(as.vector(msave),fr=fr,xlab=xlab,ylab=ylab) -if(pop==2)kdplot(as.vector(msave),rval=rval,xlab=xlab,ylab=ylab) -if(pop==3)boxplot(as.vector(msave)) -if(pop==4)stem(as.vector(msave)) -if(pop==5)hist(as.vector(msave)) -if(pop==6)skerd(as.vector(msave),xlab=xlab,ylab=ylab) -} -list(n1=n1,n2=n2,test.stat=bmtest,phat=phat,dhat=dhat,s.e.=se/N,p.value=sig,ci.p=ci.p,df=df,summary.dval=dval) -} - -ribmp<-function(J,K,x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4){ -# -# Rank-based multiple comparisons for all interactions -# in J by K design. The method is based on an -# extension of Cliff's heteroscedastic technique for -# handling tied values and the Patel-Hoel definition of no interaction. -# -# The familywise type I error probability is controlled by using -# a critical value from the Studentized maximum modulus distribution. -# -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# The default value for alpha is .05. Any other value results in using -# alpha=.01. -# -# Argument grp can be used to rearrange the order of the data. -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -CCJ<-(J^2-J)/2 -CCK<-(K^2-K)/2 -CC<-CCJ*CCK -test<-matrix(NA,CC,8) -test.p<-matrix(NA,CC,7) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -} -mat<-matrix(grp,ncol=K,byrow=TRUE) -dimnames(test)<-list(NULL,c("Factor A","Factor A","Factor B","Factor B","delta","ci.lower","ci.upper","p.value")) -jcom<-0 -crit<-smmcrit(200,CC) -if(alpha!=.05)crit<-smmcrit01(200,CC) -alpha<-1-pnorm(crit) -for (j in 1:J){ -for (jj in 1:J){ -if (j < jj){ -for (k in 1:K){ -for (kk in 1:K){ -if (k < kk){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-jj -test[jcom,3]<-k -test[jcom,4]<-kk -temp1<-bmp(x[[mat[j,k]]],x[[mat[j,kk]]],plotit=FALSE,alpha=alpha) -temp2<-bmp(x[[mat[jj,k]]],x[[mat[jj,kk]]],plotit=FALSE,alpha=alpha) -delta<-temp1$phat-temp2$phat -sqse<-temp1$s.e.^2.+temp2$s.e.^2 -test[jcom,5]<-delta -test[jcom,6]<-delta-crit*sqrt(sqse) -test[jcom,7]<-delta+crit*sqrt(sqse) -test[jcom,8]=2*(1-pnorm(abs((delta)/sqrt(sqse)))) -}}}}}} -list(test=test) -} - -adjboxout<-function(x){ -# -# determine outliers using adjusted boxplot rule based on the -# medcouple -# -x=elimna(x) -n=length(x) -MC=mcskew(x) -val=idealf(x) -iqr=val$qu-val$ql -if(MC>=0){ -bot=val$ql-1.5*exp(0-4*MC)*iqr -top=val$qu+1.5*exp(3*MC)*iqr -} -if(MC<0){ -bot=val$ql-1.5*exp(0-3*MC)*iqr -top=val$qu+1.5*exp(4*MC)*iqr -} -flag=rep(F,length(x)) -fl=(xtop) -flag[fl]=T -flag[fu]=T -vec<-c(1:n) -outid<-NULL -if(sum(flag)>0)outid<-vec[flag] -keep<-vec[!flag] -outval<-x[flag] -keep=x[!flag] -list(out.val=outval,out.id=outid,keep=keep,cl=bot,cu=top) -} - -Mreglde.sub<-function(x,B){ -n=x[1] -ncx=x[2] -ncy=x[3] -nxx=n*ncx -nyy=n*ncy -ncx1=ncx+1 -B=matrix(B,nrow=ncx1,ncol=ncy) -iu=nxx+3 -xm=matrix(x[4:iu],ncol=ncx) -il=iu+1 -ym=matrix(x[il:length(x)],ncol=ncy) -ainit=B[1:ncy] -il=ncy+1 -Binit=matrix(B[il:length(B)],nrow=ncx,ncol=ncy) -yhat=matrix(0,nrow=n,ncol=ncy) -for(i in 1:n){ -z=as.matrix(xm[i,]) -yhat[i,]=t(Binit)%*%z -} -yhat=t(t(yhat)+ainit) -res=ym-yhat -res=sum(sqrt(apply(res^2,1,sum))) -res -} - -pbtrmcp<-function(x,alpha=.05,nboot=NA,grp=NA,con=0,bhop=FALSE,tr=.2,SEED=TRUE){ -# -# Multiple comparisons for J independent groups based on trimmed means. -# using a percentile bootstrap method -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# - -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are allowed. -# -stop('Old function for trimmed means. Use bmcppb. (The function tmcppb gives the same results as bmcppb)') -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -mvec[j]<-tmean(temp,tr=tr) -} -nmax=max(tempn) -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(!bhop){ -if(alpha==.05){ -dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvec[1]<-alpha/2 -} -dvec<-2*dvec -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(j in 1:J){ -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,tmean,tr=tr) # Bootstrapped values for jth group -} -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -test[d]<-sum(bcon[d,]>0)/nboot -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,6) -dimnames(output)<-list(NULL,c("con.num","psihat","sig.test","sig.crit","ci.lower","ci.upper")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-tvec[ic,] -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} -mcp3atm<-function(J,K,L, x,tr=.2,con=0,alpha=.05,grp=NA,op=FALSE,pr=TRUE){ -# -# Do all pairwise comparisons of -# main effects for Factor A and B and C and all interactions -# based on trimmed means -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # -if(is.data.frame(x))x=as.matrix(x) - JKL <- J*K*L - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp[1])) { - yy <- x - x<-list() - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") - for(j in 1:JKL) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] # Remove missing values - } - # - - if(JKL != length(x)) - warning("The number of groups does not match the number of contrast coefficients.") -for(j in 1:JKL){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} - # Create the three contrast matrices -temp<-con3way(J,K,L) -conA<-temp$conA -conB<-temp$conB -conC<-temp$conC -conAB<-temp$conAB -conAC<-temp$conAC -conBC<-temp$conBC -conABC<-temp$conABC -if(!op){ -Factor.A<-lincon(x,con=conA,tr=tr,alpha=alpha,pr=pr) -Factor.B<-lincon(x,con=conB,tr=tr,alpha=alpha,pr=pr) -Factor.C<-lincon(x,con=conC,tr=tr,alpha=alpha,pr=pr) -Factor.AB<-lincon(x,con=conAB,tr=tr,alpha=alpha,pr=pr) -Factor.AC<-lincon(x,con=conAC,tr=tr,alpha=alpha,pr=pr) -Factor.BC<-lincon(x,con=conBC,tr=tr,alpha=alpha,pr=pr) -Factor.ABC<-lincon(x,con=conABC,tr=tr,alpha=alpha,pr=pr) -} -All.Tests<-NA -if(op){ -Factor.A<-NA -Factor.B<-NA -Factor.C<-NA -Factor.AB<-NA -Factor.AC<-NA -Factor.BC<-NA -Factor.ABC<-NA -con<-cbind(conA,conB,conB,conAB,conAC,conBC,conABC) -All.Tests<-lincon(x,con=con,tr=tr,alpha=alpha,,pr=pr) -} -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, -Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, -Factor.ABC=Factor.ABC,All.Tests=All.Tests,conA=conA,conB=conB,conC=conC, -conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC) -} - -bbbmcp=mcp3atm - -mcp3med<-function(J,K,L, x,tr=.2,con=0,alpha=.05,grp=NA,op=FALSE){ -# -# Do all pairwise comparisons of -# main effects for Factor A and B and C and all interactions -# based on trimmed means -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # -if(is.data.frame(x))x=as.matrix(x) - JKL <- J*K*L - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp[1])) { - yy <- x - x<-list() - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") - for(j in 1:JKL) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] # Remove missing values - } - # - - if(JKL != length(x)) - warning("The number of groups does not match the number of contrast coefficients.") -for(j in 1:JKL){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} - # Create the three contrast matrices -temp<-con3way(J,K,L) -conA<-temp$conA -conB<-temp$conB -conC<-temp$conC -conAB<-temp$conAB -conAC<-temp$conAC -conBC<-temp$conBC -conABC<-temp$conABC -if(!op){ -Factor.A<-msmed(x,con=conA,alpha=alpha) -Factor.B<-msmed(x,con=conB,alpha=alpha) -Factor.C<-msmed(x,con=conC,alpha=alpha) -Factor.AB<-msmed(x,con=conAB,alpha=alpha) -Factor.AC<-msmed(x,con=conAC,alpha=alpha) -Factor.BC<-msmed(x,con=conBC,alpha=alpha) -Factor.ABC<-msmed(x,con=conABC,alpha=alpha) -} -All.Tests<-NA -if(op){ -Factor.A<-NA -Factor.B<-NA -Factor.C<-NA -Factor.AB<-NA -Factor.AC<-NA -Factor.BC<-NA -Factor.ABC<-NA -con<-cbind(conA,conB,conB,conAB,conAC,conBC,conABC) -All.Tests<-msmed(x,con=con,alpha=alpha) -} -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, -Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, -Factor.ABC=Factor.ABC,All.Tests=All.Tests,conA=conA,conB=conB,conC=conC, -conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC) -} - -bbtrim<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,nboot=600,alpha=.05,pr=FALSE){ -# -# Perform a J by K anova using trimmed means with -# for independent groups using a bootstrap-t method -# -# tr=.2 is default trimming -# -# -# The R variable x is assumed to contain the raw -# data stored in list mode or a matrix with JK columns. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# data[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[2K] is level 2,K, etc. -# -# It is assumed that data has length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -if(is.list(x))x<-elimna(matl(x)) -if(is.matrix(x))x<-elimna(x) -data<-x -if(is.matrix(data))data<-listm(data) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups stored in x is") -print(length(data)) -print("Warning: These two values are not equal") -} -if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") -temp=con2way(J,K) -conA<-temp$conA -conB<-temp$conB -conAB<-temp$conAB -Factor.A<-linconb(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -Factor.B<-linconb(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -Factor.AB<-linconb(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,pr=pr) -} - -bbbtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,nboot=600,pr=FALSE){ -# -# Perform three-way anova, independent groups, based on trimmed means -# -# That is, there are three factors with a total of JKL independent groups. -# -# A bootstrap-t method is used to perform multiple comparisons -# The variable data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of all three factors: level 1,1,1. -# data[[2]] is assumed to contain the data for level 1 of the -# first two factors and level 2 of the third factor: level 1,1,2 -# data[[L]] is the data for level 1,1,L -# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. -# data[[KL+1]] is level 2,1,1, etc. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JKL, the total number of -# groups being tested. -# -if(is.list(data))data=listm(elimna(matl(data))) -if(is.matrix(data))data=listm(elimna(data)) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in data is") -print(length(data)) -print("Warning: These two values are not equal") -} -x=data -temp=con3way(J,K,L) -conA<-temp$conA -conB<-temp$conB -conC<-temp$conC -conAB<-temp$conAB -conAC<-temp$conAC -conBC<-temp$conBC -conABC=temp$conABC -Factor.A<-linconb(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -Factor.B<-linconb(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -Factor.C<-linconb(x,con=conC,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -Factor.AB<-linconb(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -Factor.AC<-linconb(x,con=conAC,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -Factor.BC<-linconb(x,con=conBC,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -Factor.ABC<-linconb(x,con=conABC,tr=tr,alpha=alpha,nboot=nboot,pr=pr) -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, -Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, -Factor.ABC=Factor.ABC,pr=pr) -} - - -pb2trmcp<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,nboot=NA,alpha=.05,SEED=TRUE,pr=TRUE, -bhop=FALSE){ -# -# Perform a J by K anova using trimmed means with -# for two independent groups using a bootstrap-t method -# -# tr=.2 is default trimming -# -# -# The R variable data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of both factors: level 1,1. -# data[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# data[[K]] is the data for level 1,K -# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. -# -# It is assumed that data has length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -if(SEED)set.seed(2) -if(is.list(x))x<-elimna(matl(x)) -if(is.matrix(x))x<-elimna(x) -data<-x -if(is.matrix(data))data<-listm(data) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups stored in x is") -print(length(data)) -print("Warning: These two values are not equal") -} -if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") -temp=con2way(J,K) -conA<-temp$conA -conB<-temp$conB -conAB<-temp$conAB -if(pr)print("Taking bootstrap samples") -Factor.A<-pbtrmcp(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) -Factor.B<-pbtrmcp(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) -Factor.AB<-pbtrmcp(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,bhop=bhop,SEED=FALSE) -} - - - -pb3trmcp<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,nboot=NA, -SEED=TRUE,bhop=FALSE){ -# -# Multiple comparisons for a three-way anova, independent groups, -# based on trimmed means -# -# That is, there are three factors with a total of JKL independent groups. -# -# A percentile bootstrap method is used to perform multiple comparisons -# The variable data is assumed to contain the raw -# data stored in list mode. data[[1]] contains the data -# for the first level of all three factors: level 1,1,1. -# data][2]] is assumed to contain the data for level 1 of the -# first two factors and level 2 of the third factor: level 1,1,2 -# data[[L]] is the data for level 1,1,L -# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. -# data[[KL+1]] is level 2,1,1, etc. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that data has length JKL, the total number of -# groups being tested. -# -if(SEED)set.seed(2) -if(is.list(data))data=listm(elimna(matl(data))) -if(is.matrix(data))data=listm(elimna(data)) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in data is") -print(length(data)) -print("Warning: These two values are not equal") -} -temp=con3way(J,K,L) -conA<-temp$conA -conB<-temp$conB -conC<-temp$conC -conAB<-temp$conAB -conAC<-temp$conAC -conBC<-temp$conBC -conABC=temp$conABC -Factor.A<-pbtrmcp(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.B<-pbtrmcp(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.C<-pbtrmcp(x,con=conC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.AB<-pbtrmcp(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.AC<-pbtrmcp(x,con=conAC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.BC<-pbtrmcp(x,con=conBC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.ABC<-pbtrmcp(x,con=conABC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, -Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, -Factor.ABC=Factor.ABC) -} - - -med2mcp<-function(J,K,x,grp=c(1:p),p=J*K,nboot=NA,alpha=.05,SEED=TRUE,pr=TRUE, -bhop=FALSE){ -# -# Perform multiple comparisons for J by K anova using medians with -# using a percentile bootstrap method -# -# -# The R variable data is assumed to contain the raw -# data stored in a matrix or in list mode. -# If stored in list mode, data[[1]] contains the data -# for the first level of both factors: level 1,1. -# data[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# data[[K]] is the data for level 1,K -# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. -# -# It is assumed that data has length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -if(SEED)set.seed(2) -if(is.list(x))x<-elimna(matl(x)) -if(is.matrix(x))x<-elimna(x) -data<-x -if(is.matrix(data))data<-listm(data) -if(!is.list(data))stop("Data are not stored in list mode or a matrix") -if(p!=length(data)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups stored in x is") -print(length(data)) -print("Warning: These two values are not equal") -} -if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") -temp=con2way(J,K) -conA<-temp$conA -conB<-temp$conB -conAB<-temp$conAB -if(pr)print("Taking bootstrap samples") -Factor.A<-medpb(x,con=conA,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) -Factor.B<-medpb(x,con=conB,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) -Factor.AB<-medpb(x,con=conAB,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,bhop=bhop,SEED=FALSE) -} - - - - med3mcp<-function(J,K,L,x,grp=c(1:p),alpha=.05,p=J*K*L,nboot=NA, -SEED=TRUE,bhop=FALSE){ -# -# Multiple comparisons for a three-way anova, independent groups, -# based on medians using a percentile bootstrap method -# -# That is, there are three factors with a total of JKL independent groups. -# -# The variable x is assumed to contain the raw -# x stored in a matrix or in list mode. -# If in list mode, x[[1]] contains the x -# for the first level of all three factors: level 1,1,1. -# x][2]] is assumed to contain the x for level 1 of the -# first two factors and level 2 of the third factor: level 1,1,2 -# x[[L]] is the x for level 1,1,L -# x[[L+1]] is the x for level 1,2,1. x[[2L]] is level 1,2,L. -# x[[KL+1]] is level 2,1,1, etc. -# -# It is assumed that x has length JKL, the total number of -# groups being tested. -# -if(SEED)set.seed(2) -if(is.list(x))x=listm(elimna(matl(x))) -if(is.matrix(x))x=listm(elimna(x)) -if(!is.list(x))stop("x are not stored in list mode or a matrix") -if(p!=length(x)){ -print("The total number of groups, based on the specified levels, is") -print(p) -print("The number of groups in x is") -print(length(x)) -print("Warning: These two values are not equal") -} -temp=con3way(J,K,L) -conA<-temp$conA -conB<-temp$conB -conC<-temp$conC -conAB<-temp$conAB -conAC<-temp$conAC -conBC<-temp$conBC -conABC=temp$conABC -Factor.A<-medpb(x,con=conA,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.B<-medpb(x,con=conB,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.C<-medpb(x,con=conC,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.AB<-medpb(x,con=conAB,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.AC<-medpb(x,con=conAC,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.BC<-medpb(x,con=conBC,alpha=alpha,nboot=nboot,bhop=bhop) -Factor.ABC<-medpb(x,con=conABC,alpha=alpha,nboot=nboot,bhop=bhop) -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, -Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, -Factor.ABC=Factor.ABC) -} - -wmwaov<-function(x,est=median,nboot=500,MC=FALSE,SEED=TRUE,MM=FALSE){ -# -# Extension of WMW to J groups -# i.e., use p=P(Xdv[1:nboot])/nboot-.5*sum(dv[bplus]==dv[1:nboot])/nboot -p.value -} - - -wincov<-function(m,tr=.2){ -m=winall(m,tr=tr)$cov -m -} - -mgvreg<-function(x,y,regfun=tsreg,cov.fun=rmba,se=TRUE,varfun=pbvar,corfun=pbcor, -SEED=TRUE){ -# -# Do regression on points not labled outliers -# by the MGV method. -# (This function replaces an older version of mgvreg as of 11/6/06) -# -# SEED=T so that results from outmgv are always duplicated using the same data -# -# In contrast to the old version, -# when calling outmgv, center of data is determined via -# the measure of location corresponding to cov.fun, which defaults -# to the median ball algorithm (MBA) -# -x=as.matrix(x) -m<-cbind(x,y) -m<-elimna(m) # eliminate any rows with missing data -ivec<-outmgv(m,plotit=FALSE,cov.fun=cov.fun,SEED=SEED)$keep -np1<-ncol(x)+1 -y=m[ivec,np1] -x=m[ivec,1:ncol(x)] -coef<-regfun(x,y)$coef -vec<-rep(1,length(y)) -residuals<-y-cbind(vec,x)%*%coef -stre=NULL -yhat<-y-residuals -e.pow<-varfun(yhat)/varfun(y) -if(!is.na(e.pow)){ -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -stre=sqrt(e.pow) -} -list(coef=coef,residuals=residuals,Strength.Assoc=stre,Explanatory.Power=e.pow) -} -opregpbMC<-function(x,y,nboot=1000,alpha=.05,om=TRUE,ADJ=TRUE,cop=3,SEED=TRUE, -nullvec=rep(0,ncol(x)+1),plotit=TRUE,opdis=2,gval=sqrt(qchisq(.95,ncol(x)+1))){ -# -# Same as opregpb, only this function takes advantage of a multi-core -# processor assuming one is availabe and that the R package -# multicore has been installed. -# -# generate bootstrap estimates -# use projection-type outlier detection method followed by -# TS regression. -# -# om=T and ncol(x)>1, means an omnibus test is performed, -# otherwise only individual tests of parameters are performed. -# -# opdis=2, means that Mahalanobis distance is used -# opdis=1, means projection-type distance is used -# -# gval is critical value for projection-type outlier detection -# method -# -# ADJ=T, Adjust p-values as described in Section 11.1.5 of the text. -# -if(SEED)set.seed(2) -library(parallel) -x<-as.matrix(x) -m<-cbind(x,y) -p1<-ncol(x)+1 -m<-elimna(m) # eliminate any rows with missing data -x<-m[,1:ncol(x)] -x<-as.matrix(x) -y<-m[,p1] -if(nrow(x)!=length(y))stop("Sample size of x differs from sample size of y") -if(!is.matrix(x))stop("Data should be stored in a matrix") -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,regboot,x,y,regfun=opregMC) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -# using Hochberg method -bvec<-t(bvec) -dvec<-alpha/(c(1:ncol(x))) -test<-NA -icl0<-round(alpha*nboot/2) -icl<-round(alpha*nboot/(2*ncol(x))) -icu0<-nboot-icl0 -icu<-nboot-icl -output<-matrix(0,p1,6) -dimnames(output)<-list(NULL,c("Param.","p.value","crit.p.value", -"ci.lower","ci.upper","s.e.")) -pval<-NA -for(i in 1:p1){ -output[i,1]<-i-1 -se.val<-var(bvec[,i]) -temp<-sort(bvec[,i]) -output[i,6]<-sqrt(se.val) -if(i==1){ -output[i,4]<-temp[icl0+1] -output[i,5]<-temp[icu0] -} -if(i>1){ -output[i,4]<-temp[icl+1] -output[i,5]<-temp[icu] -} -pval[i]<-sum((temp>nullvec[i]))/length(temp) -if(pval[i]>.5)pval[i]<-1-pval[i] -} -fac<-2 -if(ADJ){ -# Adjust p-value if n<60 -nval<-length(y) -if(nval<20)nval<-20 -if(nval>60)nval<-60 -fac<-2-(60-nval)/40 -} -pval[1]<-2*pval[1] -pval[2:p1]<-fac*pval[2:p1] -output[,2]<-pval -temp2<-order(0-pval[2:p1]) -zvec<-dvec[1:ncol(x)] -sigvec<-(test[temp2]>=zvec) -output[temp2+1,3]<-zvec -output[1,3]<-NA -output[,2]<-pval -om.pval<-NA -temp<-opregMC(x,y)$coef -if(om && ncol(x)>1){ -temp2<-rbind(bvec[,2:p1],nullvec[2:p1]) -if(opdis==1)dis<-pdisMC(temp2,center=temp[2:p1]) -if(opdis==2){ -cmat<-var(bvec[,2:p1]-apply(bvec[,2:p1],2,mean)+temp[2:p1]) -dis<-mahalanobis(temp2,temp[2:p1],cmat) -} -om.pval<-sum((dis[nboot+1]<=dis[1:nboot]))/nboot -} -# do adjusted p-value -nval<-length(y) -if(nval<20)nval<-20 -if(nval>60)nval<-60 -adj.pval<-om.pval/2+(om.pval-om.pval/2)*(nval-20)/40 -if(ncol(x)==2 && plotit){ -plot(bvec[,2],bvec[,3],xlab="Slope 1",ylab="Slope 2") -temp.dis<-order(dis[1:nboot]) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],2:3] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -list(output=output,om.pval=om.pval,adj.om.pval=adj.pval) -} - - -opregMC<-function(x,y,regfun=tsreg,cop=3,fast=FALSE,pr=TRUE,prres=FALSE,STAND=TRUE,xout=FALSE){ -# -# Do regression on points not labled outliers -# using projection-type outlier detection method -# -# Note: argument xout is not relevant here, but is included to avoid conflicts when using regci. -# -library(parallel) -x<-as.matrix(x) -m<-cbind(x,y) -m<-elimna(m) # eliminate any rows with missing data -ivec<-outproMC(m,plotit=FALSE,cop=cop,fast=FALSE,pr=FALSE,STAND=STAND)$keep -np1<-ncol(x)+1 -coef<-regfun(m[ivec,1:ncol(x)],m[ivec,np1])$coef -vec<-rep(1,length(y)) -residuals<-y-cbind(vec,x)%*%coef -if(fast && pr){ -print("Intercept, followed by slopes:") -print(coef) -if(prres){ -print("Residuals:") -print(residuals) -}} -list(coef=coef,residuals=residuals) -} -twocor<-function(x1,y1,x2,y2,corfun=pbcor,nboot=599,alpha=.05,SEED=TRUE,...){ -# -# Compute a .95 confidence interval for the -# difference between two correlation coefficients -# corresponding to two independent groups. -# -# the function corfun is any R function that returns a -# correlation coefficient in corfun$cor. The functions pbcor and -# wincor follow this convention. -# -# For Pearson's correlation, use -# the function twopcor instead. -# -# The default number of bootstrap samples is nboot=599 -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -bvec1<-apply(data1,1,corbsub,x1,y1,corfun,...) # A 1 by nboot matrix. -data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) -bvec2<-apply(data2,1,corbsub,x2,y2,corfun,...) # A 1 by nboot matrix. -bvec<-bvec1-bvec2 -bsort<-sort(bvec) -term<-alpha/2 -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -corci<-1 -corci[1]<-bsort[ilow] -corci[2]<-bsort[ihi] -pv<-(sum(bvec<0)+.5*sum(bvec==0))/nboot -pv=2*min(c(pv,1-pv)) -r1<-corfun(x1,y1)$cor -r2<-corfun(x2,y2)$cor -reject<-"NO" -if(corci[1]>0 || corci[2]<0)reject="YES" -list(r1=r1,r2=r2,ci.dif=corci,p.value=pv) -} - - -rm3mcp<-function(J,K,L, x,tr=.2,alpha=.05,dif=TRUE,op=FALSE,grp=NA){ -# -# MULTIPLE COMPARISONS FOR A 3-WAY within-by-within-by within ANOVA -# Do all multiple comparisons associated with -# main effects for Factor A and B and C and all interactions -# based on trimmed means -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # -if(is.data.frame(x))x=as.matrix(x) - JKL <- J*K*L - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp[1])) { - yy <- x - x<-list() - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") - for(j in 1:JKL) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] # Remove missing values - } - # - - if(JKL != length(x)) - warning("The number of groups does not match the number of contrast coefficients.") -for(j in 1:JKL){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} - # Create the three contrast matrices -temp<-con3way(J,K,L) -conA<-temp$conA -conB<-temp$conB -conC<-temp$conC -conAB<-temp$conAB -conAC<-temp$conAC -conBC<-temp$conBC -conABC<-temp$conABC -Factor.A<-rmmcp(x,con=conA,tr=tr,alpha=alpha,dif=dif) -Factor.B<-rmmcp(x,con=conB,tr=tr,alpha=alpha,dif=dif) -Factor.C<-rmmcp(x,con=conC,tr=tr,alpha=alpha,dif=dif) -Factor.AB<-rmmcp(x,con=conAB,tr=tr,alpha=alpha,dif=dif) -Factor.AC<-rmmcp(x,con=conAC,tr=tr,alpha=alpha,dif=dif) -Factor.BC<-rmmcp(x,con=conBC,tr=tr,alpha=alpha,dif=dif) -Factor.ABC<-rmmcp(x,con=conABC,tr=tr,alpha=alpha,dif=dif) -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, -Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, -Factor.ABC=Factor.ABC,conA=conA,conB=conB,conC=conC, -conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC) -} - -wwwmcp=rm3mcp - -tmcppb<-function(x,alpha=.05,nboot=NA,grp=NA,est=tmean,con=0,bhop=FALSE,SEED=TRUE,...){ -# -# Multiple comparisons for J independent groups using trimmed means -# -# A percentile bootstrap method with Rom's method is used. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are allowed. -# -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -#print(paste("Working on group ",j)) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group -} -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -tv<-sum(bcon[d,]==0)/nboot -test[d]<-sum(bcon[d,]>0)/nboot+.5*tv -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-tvec[ic,] -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -#linconpb=tmcppb - -bbmcppb<-function(J, K, x, est=tmean,JK = J*K, - alpha = 0.05, grp =c(1:JK), nboot = 2000, bhop=FALSE,SEED = TRUE,...) -{ -# -# BETWEEN-BY-BETWEEN DESIGN -# - # A percentile bootstrap for multiple comparisons - # for all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - -if(!is.null(dim(x)))x= listm(x) -x=elimna(x) -n=lapply(x,length) -con=con2way(J,K) -A=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conA, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -B=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -AB=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conAB, - alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) -list(n=n,Fac.A=A,Fac.B=B,Fac.AB=AB) -} - - bbmcppb.sub<-function(J, K, x, est=tmean, JK = J*K, con = 0, - alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ -# -# between-by-between design -# - # - # A percentile bootstrap for multiple comparisons among - # all main effects and interactions - # The analysis is done by generating bootstrap samples and - # using an appropriate linear contrast. - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # -# -# JK independent groups -# - - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} -ncon=ncol(con) - p <- J*K -JK=p -if(p>length(x))stop('JK is less than the Number of groups') -JK=J*K - data <- list() -xx=list() - for(j in 1:length(x)) { -xx[[j]]=x[[grp[j]]] # save input data -# # Now have the groups in proper order. - } -for(j in 1:p){ -xx[[j]]=elimna(xx[[j]]) -} -x=xx - crit=alpha/2 - icl<-round(crit*nboot)+1 -icu<-nboot-icl - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - # Next determine the n_j values - testA = NA - bsam = list() - bdat = list() - aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) -tvec=NA -tvec=linhat(x,con,est=est,...) - for(ib in 1:nboot) { - for(j in 1:JK) { -nv=length(x[[j]]) -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -bsam[[j]] = x[[j]][bdat[[j]]] -} -aboot[ib,]=linhat(bsam,con=con,est=est,...) -} -pbA=NA -for(j in 1:ncol(aboot)){ -pbA[j]=mean(aboot[,j]>0) -pbA[j]=2*min(c(pbA[j],1-pbA[j])) -} -# Determine critical values -outputA<-matrix(0,ncol(con),6) -dimnames(outputA)<-list(NULL,c('con.num','psihat','p.value','p.adjust', -'ci.lower','ci.upper')) -test=pbA -temp2<-sort(test) #order(0-test) -outputA[,2]<-tvec -for (ic in 1:ncol(con)){ -outputA[ic,1]<-ic -outputA[ic,3]<-test[ic] -temp<-sort(aboot[,ic]) -outputA[ic,5]<-temp[icl] -outputA[ic,6]<-temp[icu] -} -outputA[,4]=p.adjust(outputA[,3],method='hoch') -outputA -} - -t2waypb<-bbmcppb - -ols.plot.inter<-function(x,y, pyhat = FALSE, eout = FALSE, xout = FALSE, outfun = out, - plotit = TRUE, expand = 0.5, scale = TRUE, xlab = "X", - ylab = "Y", zlab = "", theta = 50, phi = 25, family = "gaussian", - duplicate = "error",ticktype="simple",...){ -# -# Plot regression surface based on the classic interaction model: -# usual product term -# -# x is assumed to be a matrix with two columns (two predictors) -library(akima) -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -if(ncol(x)!=2)stop("x should have two columns") -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:2] -y<-m[,3] -} -xx=cbind(x,x[,1]*x[,2]) -temp=lsfit(xx,y) -fitr=y-temp$residuals -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -persp(fit,theta=theta,phi=phi,expand=expand, -scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -} - -gamplotINT<-function(x,y,pyhat=FALSE,plotit=TRUE,theta=50,phi=25,expand=.5,xout=FALSE, -SCALE=FALSE,zscale=TRUE,eout=FALSE,outfun=out,ticktype="simple",xlab = "X", ylab = "Y", zlab = "",...){ -# -# Plot regression surface, assuming two predictors in -# n by 2 matrix x using gam (generalized additive model) -# Same as gamplot, only a product term is included. -# -if(eout && xout)stop("Not allowed to have eout=xout=T") -x<-as.matrix(x) -if(ncol(x)!=2)stop("x must be an n by 2 matrix") -library(akima) -library(mgcv) -np=ncol(x) -np1=np+1 -m<-elimna(cbind(x,y)) -x<-m[,1:np] -x<-as.matrix(x) -y<-m[,np1] -if(xout){ -flag<-outfun(x,...)$keep -m<-m[flag,] -} -if(eout){ -flag<-outfun(m,...)$keep -m<-m[flag,] -} -x1<-m[,1] -x2<-m[,2] -y<-m[,3] -xrem<-m[,1:2] -n<-nrow(x) -fitr<-fitted(gam(y~s(x1)+s(x2)+s(x1,x2))) -allfit<-fitr -if(plotit){ -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(xrem[i,]==xrem[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] -mkeep<-xrem[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fit,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab, -scale=scale,ticktype=ticktype) -} -m<-"Done" -if(pyhat)m<-allfit -m -} - - - -reg.plot.inter<-function(x,y, regfun=tsreg, - pyhat = FALSE, eout = FALSE, xout = FALSE, outfun = out, - plotit = TRUE, expand = 0.5, scale = TRUE, xlab = "X", - ylab = "Y", zlab = "", theta = 50, phi = 25, family = "gaussian", - duplicate = "error",ticktype="simple",...){ -# -# Plot regression surface based on the classic interaction model: -# usual product term -# -# x is assumed to be a matrix with two columns (two predictors) -library(akima) -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -if(xout){ -p=ncol(x) -p1=p+1 -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} - -if(!scale)print("scale=F. If there is an association, try scale=T") -if(ncol(x)!=2)stop("x should have two columns") -xx=cbind(x,x[,1]*x[,2]) -temp=regfun(xx,y) -fitr=y-temp$residuals -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -persp(fit,theta=theta,phi=phi,expand=expand, -scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -} - -bwrank<-function(J,K,x,grp=c(1:p),p=J*K){ -# -# Between by within rank-based ANOVA -# That is, have a J by K design with J independent levels and K dependent -# measures -# -# x can be a matrix with columns corresponding to groups -# or it can have list mode. -# -# -if(is.data.frame(x))data=as.matrix(x) -if(is.matrix(x))x<-listm(x) -x=x[grp] -xx<-list() -nvec<-NA -alldat<-NA -klow<-1-K -kup<-0 -iall=0 -for (j in 1:J){ -klow<-klow+K -kup<-kup+K -mtemp=elimna(matl(x[klow:kup])) -for(k in 1:K){ -iall=iall+1 -xx[[iall]]=mtemp[,k] -}} -for(j in 1:p){ -alldat<-c(alldat,xx[[j]]) -nvec[j]<-length(xx[[j]]) -} -# -# Check sample sizes -# -nmat<-matrix(nvec,J,K,byrow=TRUE) -for(j in 1:J){ -if(var(nmat[j,]) !=0){ -warning("Number of observations among dependent groups for level",paste(j)," of Factor A are unequal") -print("Matrix of sample sizes:") -print(nmat) -}} -if(sum(is.na(alldat[2:length(alldat)])>0))stop("Missing values not allowed") -rval<-rank(alldat[2:length(alldat)]) -rdd<-mean(rval) # R bar ... -xr<-list() -il<-1-nvec[1] -iu<-0 -for(j in 1:p){ -il<-il+nvec[j] -iu<-iu+nvec[j] -xr[[j]]<-rval[il:iu] -} -v<-matrix(0,p,p) -Ja<-matrix(1,J,J) -Ia<-diag(1,J) -Pa<-Ia-Ja/J -Jb<-matrix(1,K,K) -Ib<-diag(1,K) -Pb<-Ib-Jb/K -cona<-kron(Pa,Ib) -conb<-kron(Ia,Pb) -conab<-kron(Pa,Pb) -for(k in 1:K){ -temp<-x[[k]] -bigm<-matrix(temp,ncol=1) -jk<-k -for (j in 2:J){ -jk<-jk+K -tempc<-matrix(x[[jk]],ncol=1) -bigm<-rbind(bigm,tempc) -temp<-c(temp,x[[jk]]) -}} -N<-length(temp) -rbbd<-NA -for(k in 1:K){ -bigm<-xr[[k]] -jk<-k -for (j in 2:J){ -jk<-jk+K -bigm<-c(bigm,xr[[jk]]) -}} -rbjk<-matrix(NA,nrow=J,ncol=K) #R_.jk -ic<-0 -for (j in 1:J){ -for(k in 1:K){ -ic<-ic+1 -rbjk[j,k]<-mean(xr[[ic]]) # R bar_.jk -}} -for(k in 1:K)rbbd[k]<-mean(rbjk[,k]) -rbj<-1 # R_.j. -sigv<-0 -njsam<-0 # n_j -icc<-1-K -ivec<-c(1:K)-K -for (j in 1:J){ -icc<-icc+K -ivec<-ivec+K -temp<-xr[ivec[1]:ivec[K]] -temp<-matl(temp) -tempv<-apply(temp,1,mean) -njsam[j]<-nvec[icc] -rbj[j]<-mean(rbjk[j,]) -sigv[j]<-var(tempv) # var of R bar_ij. -} -nv<-sum(njsam) -phat<-(rbjk-.5)/(nv*K) -sv2<-sum(sigv/njsam) -uv<-sum((sigv/njsam)^2) -dv<-sum((sigv/njsam)^2/(njsam-1)) -testA<-J*var(rbj)/sv2 -klow<-1-K -kup<-0 -sv<-matrix(0,nrow=K,ncol=K) -rvk<-NA -for(j in 1:J){ -klow<-klow+K -kup<-kup+K -sel<-c(klow:kup) -m<-matl(xr[klow:kup]) -m<-elimna(m) -xx<-listm(m) -xx<-listm(m) -vsub<-nv*var(m)/(nv*K*nv*K*njsam[j]) -v[sel,sel]<-vsub -sv<-sv+vsub -} -sv<-sv/J^2 -testB<-nv/(nv*K*nv*K*sum(diag(Pb%*%sv)))*sum((rbbd-mean(rbbd))^2) -testAB<-0 -for (j in 1:J){ -for (k in 1:K){ -testAB<-testAB+(rbjk[j,k]-rbj[j]-rbbd[k]+rdd)^2 -}} -testAB<-nv*testAB/(nv*K*nv*K*sum(diag(conab%*%v))) -nu1B<-(sum(diag(Pb%*%sv)))^2/sum((diag(Pb%*%sv%*%Pb%*%sv))) -nu1A<-(J-1)^2/(1+J*(J-2)*uv/sv2^2) -nu2A<-sv2^2/dv -nu1AB<-(sum(diag(conab%*%v)))^2/sum(diag(conab%*%v%*%conab%*%v)) -sig.A<-1-pf(testA,nu1A,nu2A) -sig.B<-1-pf(testB,nu1B,1000000) -sig.AB<-1-pf(testAB,nu1AB,1000000) -list(test.A=testA,p.value.A=sig.A,test.B=testB,p.value.B=sig.B,test.AB=testAB, -p.value.AB=sig.AB,avg.ranks=rbjk,rel.effects=phat) -} - - - -rqtest<-function(x,y,qval=.5,nboot=200,alpha=.05,SEED=TRUE,xout=FALSE,outfun=outpro,...){ -# -# Omnibus test when using a quantile regression estimator -# -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -} -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,rqtest.sub,x,y,qval=qval) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -p<-ncol(x) -if(p==1)stop("Use qregci when p=1") -n<-length(y) -np<-p+1 -bvec<-t(bvec) -semat<-var(bvec[,2:np]) - -#temp<-rqfit(x,y,qval=qval)$coef[2:np] -temp<-qreg(x,y,qval=qval)$coef[2:np] -temp<-as.matrix(temp) -test<-t(temp)%*%solve(semat)%*%temp -test<-test*(n-p)/((n-1)*p) -p.value<-1-pf(test,p,n-p) -# Determine adjusted critical level, if possible. -adjusted.alpha=NULL -b1=NULL -if(n<=60){ -if(alpha==.1){ -if(p==2){ -b1<-0-0.001965 -b0<-.2179 -} -if(p==3){ -b1<-0-.003 -b0<-.2814 -} -if(p==4){ -b1<-0-.0058 -b0<-.4478 -} -if(p==5){ -b1<-0-.00896 -b0<-.6373 -} -if(p>=6){ -b1<-0-.0112 -b0<-.7699 -}} -if(alpha==.05){ -if(p==2){ -b1<-0-0.001173 -b0<-.1203 -} -if(p==3){ -b1<-0-.00223 -b0<-.184 -} -if(p==4){ -b1<-0-.00476 -b0<-.3356 -} -if(p==5){ -b1<-0-.0063 -b0<-.425 -} -if(p==6){ -b1<-0-.00858 -b0<-.5648 -}} -if(alpha==.025){ -if(p==2){ -b1<-0-0.00056 -b0<-.05875 -} -if(p==3){ -b1<-0-.00149 -b0<-.1143 -} -if(p==4){ -b1<-0-.00396 -b0<-.2624 -} -if(p==5){ -b1<-0-.00474 -b0<-.3097 -} -if(p==6){ -b1<-0-.0064 -b0<-.4111 -}} -if(alpha==.01){ -if(p==2){ -b1<-0-0.00055 -b0<-.043 -} -if(p==3){ -b1<-0-.00044 -b0<-.0364 -} -if(p==4){ -b1<-0-.0024 -b0<-.1546 -} -if(p==5){ -b1<-0-.00248 -b0<-.159 -} -if(p==6){ -b1<-0-.00439 -b0<-.2734 -}} -if(!is.null(b1))adjusted.alpha<-b1*n+b0 -adjusted.alpha<-max(alpha,adjusted.alpha) -} -list(test.stat=test,p.value=p.value,adjusted.alpha=adjusted.alpha) -} - - -runpd<-function(x,y,pts=x,est=tmean,fr=.8,plotit=TRUE,pyhat=FALSE,nmin=0,scale=TRUE, -expand=.5,xout=FALSE,outfun=out,pr=TRUE,xlab="X1",ylab="X2",zlab="",LP=TRUE, -theta=50,phi=25,duplicate="error",MC=FALSE,ticktype="simple",...){ -# -# running mean using interval method -# Distances from a point are determined using a projection method -# see function pdclose -# -# fr controls amount of smoothing -# tr is the amount of trimming -# x is an n by p matrix of predictors. -# -if(is.list(x))stop("Data should not stored be stored in list mode") -x<-as.matrix(x) -pval<-ncol(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:pval] -x<-as.matrix(x) -y<-xx[,pval+1] -if(xout){ -keepit<-outfun(x,plotit=FALSE)$keep -x<-x[keepit,] -y<-y[keepit] -} -plotit<-as.logical(plotit) -iout<-c(1:nrow(x)) -rmd<-1 # Initialize rmd -nval<-1 -nmat<-pdclose(x,pts,fr=fr,MC=MC) -for(i in 1:nrow(pts))rmd[i]<-est(y[nmat[i,]],...) -for(i in 1:nrow(pts))nval[i]<-sum(nmat[i,]) -if(ncol(x)==2){ -if(plotit){ -library(akima) -fitr<-rmd[nval>nmin] -y<-y[nval>nmin] -x<-x[nval>nmin,] -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -if(plotit){ -if(pr){ -if(!scale)print("With dependence, suggest using scale=T") -} -fitr<-rmd[nval>nmin] -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -if(LP)fitr=lplot(x[iout>=1,],fitr,pyhat=TRUE,pr=FALSE,plotit=FALSE)$yhat -fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -persp(fit,theta=theta,phi=phi,expand=expand, -scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -}}} -if(pyhat)last<-rmd -if(!pyhat)last <- "Done" - last -} - -sppbi<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),nboot=500,SEED=TRUE,pr=TRUE,...){ -# -# A percentile bootstrap for interactions -# in a split-plot design. -# The analysis is done by taking difference scores -# among all pairs of dependent groups and seeing whether -# these differences differ across levels of Factor A. -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -if(pr)print('As of Oct. 2014, argument est defaults to tmean') -library(MASS) - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} - -JK<-J*K -MJ<-(J^2-J)/2 -MK<-(K^2-K)/2 -JMK<-J*MK -Jm<-J-1 -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -jp<-1-K -kv<-0 -kv2<-0 -for(j in 1:J){ -jp<-jp+K -xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) -for(k in 1:K){ -kv<-kv+1 -xmat[,k]<-x[[kv]] -} -xmat<-elimna(xmat) -for(k in 1:K){ -kv2<-kv2+1 -x[[kv2]]<-xmat[,k] -}} -xx<-x -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# Next determine the n_j values -nvec<-NA -jp<-1-K -for(j in 1:J){ -jp<-jp+K -nvec[j]<-length(x[[jp]]) -} -# -# Now take bootstrap samples from jth level -# of Factor A and average K corresponding estimates -# of location. -# -bloc<-matrix(NA,ncol=J,nrow=nboot) -#print("Taking bootstrap samples. Please wait.") -mvec<-NA -it<-0 -for(j in 1:J){ -paste("Working on level ",j," of Factor A") -x<-matrix(NA,nrow=nvec[j],ncol=MK) -# -im<-0 -for(k in 1:K){ -for(kk in 1:K){ -if(k1)bloc<-cbind(bloc,bvec) -} -# -MJMK<-MJ*MK -con<-matrix(0,nrow=JMK,ncol=MJMK) -cont<-matrix(0,nrow=J,ncol=MJ) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j1){ -for(k in 2:MK){ -con1<-push(con1) -con<-cbind(con,con1) -}} -bcon<-t(con)%*%t(bloc) #C by nboot matrix -tvec<-t(con)%*%mvec -tvec<-tvec[,1] -tempcen<-apply(bcon,1,mean) -vecz<-rep(0,ncol(con)) -bcon<-t(bcon) -temp=bcon -for(ib in 1:nrow(temp))temp[ib,]=temp[ib,]-tempcen+tvec -smat<-var(temp) -if(sum(is.na(smat))==0){ -chkrank<-qr(smat)$rank -bcon<-rbind(bcon,vecz) -if(chkrank==ncol(smat))dv<-mahalanobis(bcon,tvec,smat) -if(chkrank0)print('Computational Problem. Try est=tmean or use function spmcpi or tsplitbt') -bplus<-nboot+1 -sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot -list(p.value=sig.level,psihat=tvec,con=con) -} -sppba<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),avg=TRUE,nboot=500,SEED=TRUE, -MC=FALSE,MDIS=FALSE,pr=TRUE,...){ -# -# A percentile bootstrap for main effects -# among independent groups in a split-plot design -# -# avg=T: The analysis is done by averaging K measures of -# location for each level of Factor A, -# and then comparing averages by testing the hypothesis -# that all pairwise differences are equal to zero. -# -# avg=F: The analysis is done by testing whether $K$ equalities are -# simultaneously true. For kth level of Factor B, the kth equality is -# theta_{1k}= ... theta_{Jk}, k=1,...,K. -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# -if(pr)print('As of Oct. 2014 the argument est defaults to tmean') -library(MASS) - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} - -JK<-J*K -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -jp<-1-K -kv<-0 -kv2<-0 -for(j in 1:J){ -jp<-jp+K -xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) -for(k in 1:K){ -kv<-kv+1 -xmat[,k]<-x[[kv]] -} -xmat<-elimna(xmat) -for(k in 1:K){ -kv2<-kv2+1 -x[[kv2]]<-xmat[,k] -} -} -xx<-x -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# Next determine the n_j values -nvec<-NA -jp<-1-K -for(j in 1:J){ -jp<-jp+K -nvec[j]<-length(x[[jp]]) -} -# -# Now take bootstrap samples from jth level -# of Factor A. -# -bloc<-matrix(NA,nrow=J,ncol=nboot) -#print("Taking bootstrap samples. Please wait.") -mvec<-NA -ik<-0 -for(j in 1:J){ -paste("Working on level ",j," of Factor A") -x<-matrix(NA,nrow=nvec[j],ncol=K) -# -for(k in 1:K){ -ik<-ik+1 -x[,k]<-xx[[ik]] -if(!avg)mvec[ik]<-est(xx[[ik]],...) -} -tempv<-apply(x,2,est,...) -data<-matrix(sample(nvec[j],size=nvec[j]*nboot,replace=TRUE),nrow=nboot) -bvec<-matrix(NA,ncol=K,nrow=nboot) -for(k in 1:K){ -temp<-x[,k] -bvec[,k]<-apply(data,1,rmanogsub,temp,est,...) # An nboot by K matrix -} -if(avg){ -mvec[j]<-mean(tempv) -bloc[j,]<-apply(bvec,1,mean) -} -if(!avg){ -if(j==1)bloc<-bvec -if(j>1)bloc<-cbind(bloc,bvec) -} -} -if(avg){ -d<-(J^2-J)/2 -con<-matrix(0,J,d) -id<-0 -Jm<-J-1 -for (j in 1:Jm){ -jp<-j+1 -for(k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -if(!avg){ -MJK<-K*(J^2-J)/2 # NUMBER OF COMPARISONS -JK<-J*K -MJ<-(J^2-J)/2 -cont<-matrix(0,nrow=J,ncol=MJ) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j1){ -for(k in 2:K){ -con1<-push(con1) -con<-cbind(con,con1) -}}} -if(!avg)bcon<-t(con)%*%t(bloc) #C by nboot matrix -if(avg)bcon<-t(con)%*%(bloc) -tvec<-t(con)%*%mvec -tvec<-tvec[,1] -tempcen<-apply(bcon,1,mean) -vecz<-rep(0,ncol(con)) -bcon<-t(bcon) -temp=bcon -for(ib in 1:nrow(temp))temp[ib,]=temp[ib,]-tempcen+tvec -bcon<-rbind(bcon,vecz) -if(!MDIS){ -if(!MC)dv=pdis(bcon,center=tvec,na.rm=FALSE) -if(MC)dv=pdisMC(bcon,center=tvec) -lbcon=length(elimna(bcon)) -bplus<-nboot+1 -if(lbcon=dv[1:nboot])/nboot -list(p.value=sig.level,psihat=tvec,con=con) -} - -outpro<-function(m,gval=NA,center=NA,plotit=TRUE,op=TRUE,MM=FALSE,cop=3, -xlab="VAR 1",ylab="VAR 2",STAND=TRUE,tr=.2,q=.5,pr=TRUE,...){ -# -# Detect outliers using a modification of the -# Stahel-Donoho projection method. -# -# Determine center of data cloud, for each point, -# connect it with center, project points onto this line -# and use distances between projected points to detect -# outliers. A boxplot method is used on the -# projected distances. -# -# plotit=TRUE creates a scatterplot when working with -# bivariate data. -# -# op=T -# means the .5 depth contour is plotted -# based on data with outliers removed. -# -# op=F -# means .5 depth contour is plotted without removing outliers. -# -# MM=F Use interquatile range when checking for outliers -# MM=T uses MAD. -# -# If value for center is not specified, -# there are four options for computing the center of the -# cloud of points when computing projections: -# -# cop=2 uses MCD center -# cop=3 uses median of the marginal distributions. -# cop=4 uses MVE center -# cop=5 uses TBS -# cop=6 uses rmba (Olive's median ball algorithm)# cop=7 uses the spatial (L1) median -# -# args q and tr having are not used by this function. They are included to deal -# with situations where smoothers have optional arguments for q and tr -# -# When using cop=2, 3 or 4, default critical value for outliers -# is square root of the .975 quantile of a -# chi-squared distribution with p degrees -# of freedom. -# -# STAND=T means that marginal distributions are standardized before -# checking for outliers. -# -# Donoho-Gasko (Tukey) median is marked with a cross, +. -# -m<-as.matrix(m) -if(pr){ -if(!STAND){ -if(ncol(m)>1)print("STAND=FALSE. If measures are on different scales, might want to use STAND=TRUE") -}} -library(MASS) -m=elimna(m) -m<-as.matrix(m) -nv=nrow(m) -if(ncol(m)==1){ -dis<-(m-median(m,na.rm=TRUE))^2/mad(m,na.rm=TRUE)^2 -dis<-sqrt(dis) -dis[is.na(dis)]=0 -crit<-sqrt(qchisq(.975,1)) -chk<-ifelse(dis>crit,1,0) -vec<-c(1:nrow(m)) -outid<-vec[chk==1] -keep<-vec[chk==0] -} -if(ncol(m)>1){ -M=m -if(STAND)m=standm(m,est=median,scat=mad) -if(is.na(gval) && cop==1)gval<-sqrt(qchisq(.95,ncol(m))) -if(is.na(gval) && cop!=1)gval<-sqrt(qchisq(.975,ncol(m))) -if(cop==1 && is.na(center[1])){ -if(ncol(m)>2)center<-dmean(m,tr=.5,cop=1) -if(ncol(m)==2){ -tempd<-NA -for(i in 1:nrow(m)) -tempd[i]<-depth(m[i,1],m[i,2],m) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center<-m[flag,] -if(sum(flag)>1)center<-apply(m[flag,],2,mean) -}} -if(cop==2 && is.na(center[1])){ -center<-cov.mcd(m)$center -} -if(cop==4 && is.na(center[1])){ -center<-cov.mve(m)$center -} -if(cop==3 && is.na(center[1])){ -center<-apply(m,2,median) -} -if(cop==5 && is.na(center[1])){ -center<-tbs(m)$center -} -if(cop==6 && is.na(center[1])){ -center<-rmba(m)$center -} -if(cop==7 && is.na(center[1])){ -center<-spat(m) -} -flag<-rep(0, nrow(m)) -outid <- NA -vec <- c(1:nrow(m)) -for (i in 1:nrow(m)){ -B<-m[i,]-center -dis<-NA -BB<-B^2 -bot<-sum(BB) -if(bot!=0){ -for (j in 1:nrow(m)){ -A<-m[j,]-center -temp<-sum(A*B)*B/bot -dis[j]<-sqrt(sum(temp^2)) -} -temp<-idealf(dis) -if(!MM)cu<-median(dis)+gval*(temp$qu-temp$ql) -if(MM)cu<-median(dis)+gval*mad(dis) -outid<-NA -temp2<-(dis> cu) -flag[temp2]<-1 -}} -if(sum(flag) == 0) outid <- NA -if(sum(flag) > 0)flag<-(flag==1) -outid <- vec[flag] -idv<-c(1:nrow(m)) -keep<-idv[!flag] -if(ncol(m)==2){ -if(plotit){ -m=M # plot data using the original scale. -plot(m[,1],m[,2],type="n",xlab=xlab,ylab=ylab) -points(m[keep,1],m[keep,2],pch="*") -if(length(outid)>0)points(m[outid,1],m[outid,2],pch="o") -if(op){ -tempd<-NA -keep<-keep[!is.na(keep)] -mm<-m[keep,] -for(i in 1:nrow(mm))tempd[i]<-depth(mm[i,1],mm[i,2],mm) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center<-mm[flag,] -if(sum(flag)>1)center<-apply(mm[flag,],2,mean) -m<-mm -} -points(center[1],center[2],pch="+") -x<-m -temp<-fdepth(m,plotit=FALSE) -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -}}} -list(n=nv,n.out=length(outid),out.id=outid,keep=keep) -} - -skerd<-function(x,op=TRUE,kernel="gaussian",xlab='X',ylab=''){ -# -# Compute kernel density estimate -# for univariate data using S+ function density -# -# kernel=epanechnikov will use the Epanechnikov kernel. -# -if(!op)temp<-density(x,na.rm=TRUE,width=bandwidth.sj(x,method="dpi"),n=256) -if(op)temp<-density(x) -plot(temp$x,temp$y,type="n",ylab=ylab,xlab=xlab) -lines(temp$x,temp$y) -} - - -bkreg<-function(x,y,kerfun=akerd,pyhat=FALSE,plotit=TRUE,xlab="X",ylab="Y", -zlab="Z",xout=FALSE,outfun=outpro,pr=TRUE,theta=50,phi=25,duplicate="error", -expand=.5,scale=FALSE,ticktype="simple",...){ -# -# Kernel estimator for binary regression. -# (See Signorini and Jones, JASA, 2004, 119-) -# -x=as.matrix(x) -p=ncol(x) -p1=p+1 -xx<-elimna(cbind(x,y)) -x<-xx[,1:p] -y<-xx[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x=as.matrix(x) -flag<-(y==1) -mv=sum(flag) -nv=sum(!flag) -phat<-NA -fhat<-kerfun(x[flag,],pyhat=TRUE,plotit=FALSE,pts=x) -ghat<-kerfun(x[!flag,],pyhat=TRUE,plotit=FALSE,pts=x) -phat<-mv*fhat/(mv*fhat+nv*ghat) -if(p==1){ -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab) -flag2<-order(x) -#lines(x[flag2],phat[flag2]) -lines(x[flag2],phat) -}} -if(p==2){ -if(plotit){ -library(akima) -if(pr){ -if(!scale)print("With dependence, suggest using scale=T") -} -fitr<-phat -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -persp(fit,theta=theta,phi=phi,expand=expand, -scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -}} -if(!pyhat)phat<-"Done" -phat -} - -logSM<-function(x,y,pyhat=FALSE,plotit=TRUE,xlab="X",ylab="Pred.Prob", -zlab=" ",xout=FALSE,outfun=outpro,pr=TRUE,theta=50,phi=25,duplicate="error",LP=TRUE,Lspan=.75, -expand=.5,scale=TRUE,fr=2,ticktype="simple",...){ -# -# A smoother designed specifically for binary outcomes -# LP=TRUE: With two independent variables, smooth the initial smooth using LOESS -# -# fr is span -# Lspan: when plotting the regression surface, -# LP =TRUE -# means that the plot will be smoothed using LOESS -# Lspan is the span used by LOESS -# -y=chbin2num(y) -x=as.matrix(x) -p=ncol(x) -p1=p+1 -xx<-elimna(cbind(x,y)) -x<-xx[,1:p] -y<-xx[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -if(length(unique(y))>2)stop('y should be binary') -# Next convert y to 0 and 1s -n=length(y) -yy=rep(0,n) -y=as.vector(y) -flag=y==max(y) -yy[flag]=1 -y=yy -x=as.matrix(x) -library(MASS) -m=cov.mve(x) -flag<-(y==1) -phat<-NA -m1=matrix(NA,nrow=length(y),ncol=length(y)) -for(i in 1:nrow(x))m1[,i]<-mahalanobis(x,x[i,],m$cov) -m2<-exp(-1*m1)*(sqrt(m1)<=fr) -m3<-matrix(y,length(y),length(y))*m2 -phat=apply(m3,2,sum)/apply(m2,2,sum) -if(p==1){ -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab) -flag2<-order(x) -lines(x[flag2],phat[flag2]) -}} -if(p==2){ -if(plotit){ -library(akima) -if(pr){ -if(!scale)print("With dependence, suggest using scale=TRUE") -} -fitr<-phat -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -if(LP){ -fitr=lplot(x[iout>=1,],fitr,pyhat=TRUE,pr=FALSE,plotit=FALSE,span=Lspan)$yhat -fitr[fitr>1]=1 -fitr[fitr<0]=0 -} -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -persp(fit,theta=theta,phi=phi,expand=expand,zlim=c(0,1), -scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -}} -if(!pyhat)phat<-"Done" -phat -} - -logreg.pred<-function(x,y,pts=x,xout=FALSE,outfun=outpro,ROB=FALSE,ridge=FALSE){ -# -# logistic regression: estimate the probability of success for points in pts -# Default is to use pts=x -# -if(!ridge){ -if(!ROB)est=logreg(x,y,xout=xout,outfun=outfun)[,1] -else -est=wlogreg(x,y,)$coef -} -if(ridge)est=logistic.ridge(x,y,xout=xout,outfun=outfun,ROB=ROB)$ridge.est -p=length(est) -if(p==2){z=exp(est[1]+est[2]*pts) -pr=z/(1+z) -} -if(p>2){ -pr=NA -pts=as.matrix(pts) -if(ncol(pts)==1)pts=t(pts) -n=nrow(pts) -if(!is.matrix(pts))stop('pts should be a matrix') -if(ncol(pts)!=ncol(x))stop('pts should have the same number of col. as x') -for(i in 1:n){ -z=exp(est[1]+sum(est[2:p]*pts[i,])) -pr[i]=z/(1+z) -} -} -pr -} - - - -YYmanova<-function(x1,x2,tr=.2){ -# -# Do MANOVA using generalization of -# Yanagihara, H. \& Yuan, K. H. (2005). -# Three approximate solutions to the -# multivariate Behrens-Fisher problem. Communications in Statistics-- -# Simulation and Computation, 34, 975--988; see their eq. (2.7). -# -# x1 and x2 are assumed to be matrices -# -x1=elimna(x1) -x2=elimna(x2) -s1=winall(x1,tr=tr)$cov -s2=winall(x2,tr=tr)$cov -n1=nrow(x1) -n2=nrow(x2) -n=n1+n2 -g1=floor(n1*tr) -g2=floor(n2*tr) -h1=n1-2*g1 -h2=n2-2*g2 -h=h1+h2 -sbar=n2*s1/n+n1*s2/n -sbarinv=solve(sbar) -psi1=n2^2*(n-2)*(sum(diag(s1%*%sbarinv)))^2/(n^2*(n1-1))+ -n1^2*(n-2)*(sum(diag(s2%*%sbarinv)))^2/(n^2*(n2-1)) -psi2=n2^2*(n-2)*(sum(diag(s1%*%sbarinv%*%s1%*%sbarinv)))/(n^2*(n1-1))+ -n1^2*(n-2)*(sum(diag(s2%*%sbarinv%*%s2%*%sbarinv)))/(n^2*(n2-1)) -p=ncol(x1) -theta1=(p*psi1+(p-2)*psi2)/(p*(p+2)) -theta2=(psi1+2*psi2)/(p*(p+2)) -nuhat=(h-2-theta1)^2/((h-2)*theta2-theta1) -xb1=apply(x1,2,mean,tr=tr) -xb2=apply(x2,2,mean,tr=tr) -dif=xb1-xb2 -dif=as.matrix(dif) -Ttest=t(dif)%*%solve((n1-1)*s1/(h1*(h1-1))+(n2-1)*s2/(h2*(h2-1)))%*%dif -TF=(n-2-theta1)*Ttest/((n-2)*p) -pv=1-pf(TF,p,nuhat) -list(test.stat=TF,p.value=pv) -} - - -logreg<-function(x,y,xout=FALSE,outfun=outpro,plotit=FALSE,POLY=FALSE, -xlab='X',ylab='Y',zlab='',scale=TRUE ,expand=.5,theta=50,phi=25, -duplicate='error',ticktype='simple',...){ -# -# Perform logistic regression. -# The predictors are assumed to be stored in the n by p matrix x. -# The y values should be 1 or 0. -# -# xout=TRUE will remove outliers from among the x values and then fit -# the regression line. -# Default: -# One predictor, a mad-median rule is used. -# With more than one, projection method is used. -# -# outfun=out will use MVE method -# -# plotit=TRUE will plot regression line -# POLY=T, will plot regression line assuming predictor -# is in col 1 of x and other columns are x (in col 1) raised to some power -# or some other function of x -# -y=chbin2num(y) -x<-as.matrix(x) -p=ncol(x) -xy=elimna(cbind(x,y)) -n=nrow(xy) -x=xy[,1:ncol(x)] -y=xy[,ncol(xy)] -x<-as.matrix(x) - -yy=rep(1,n) -vals=sort(unique(y)) -if(length(vals)!=2)stop('y should be binary') -flag=y==vals[2] -yy[!flag]=0 -y=yy - -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -x<-as.matrix(x) -if(p==1 || POLY){ -xord=order(x[,1]) -x=x[xord,] -y=y[xord] -} -fitit=glm(formula=y~x,family=binomial) -init<-summary(fitit) -if(plotit){ -vals=fitted.values(fitit) -if(p==1){ -plot(x,y,xlab=xlab,ylab=ylab) -lines(x,vals) -} -if(p==2){ -if(!scale)print('With dependence, suggest using scale=TRUE') -fitr=vals -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) -persp(fit,theta=theta,phi=phi,expand=expand, -scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -} -} -init$coef -p1=p+1 -p.adjusted.slopes=c(init$coef[1,1],p.adjust(init$coef[2:p1,4],method='hoch')) -p.adjusted.slopes[1]=NA -a=cbind(init$coef,p.adjusted.slopes) -a -} - -rplot.bin<-function(x,y,est=mean,scat=TRUE,fr=NULL,plotit=TRUE,pyhat=FALSE,pts=x,LP=FALSE, -theta=50,phi=25,scale=TRUE,expand=.5,SEED=TRUE, -nmin=0,xout=FALSE,outfun=outpro,xlab=NULL,ylab=NULL, -zlab='P(Y=1)',pr=TRUE,duplicate='error',...){ -# -# This function applies the running interval smoother, but is designed -# specifically for situations where y is binary. -# -# duplicate='error' -# In some situations where duplicate values occur, when plotting with -# two predictors, it is necessary to set duplicate='strip' -# -y=chbin2num(y) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -x<-as.matrix(x) -if(length(unique(y))!=2)stop('y is not binary') -n=length(y) -Y=rep(0,n) -flag=y==max(y) -Y[flag]=1 -y=Y -x<-as.matrix(x) -if(ncol(x)==1){ -if(is.null(ylab))ylab='P(Y=1)' -if(is.null(xlab))ylab='X' -if(is.null(fr))fr=.8 -a=rplot(x,y,est=mean,xout=xout,outfun=outfun,fr=fr,xlab=xlab,ylab=ylab,pr=FALSE,LP=LP) -} -if(ncol(x)>1){ -id=chk4binary(x) -Lid=length(id) -if(Lid>0)stop('Binary independent variables detected. Use rplot.binv2') -if(is.null(xlab))xlab='X1' -if(is.null(ylab))ylab='X2' -if(is.null(fr))fr=1.2 -if(ncol(x)==2){ -if(scale){ -if(pr){print('scale=T is specified.') -print('If there is independence, might want to use scale=F') -a=rplot(x,y,est=mean,xout=xout,outfun=outfun,fr=fr,xlab=xlab,ylab=ylab,zlab=zlab,scale=scale,pr=FALSE) -}}}} -if(!pyhat)val <- 'DONE' -if(pyhat)val=rplot.pred(x,y,pts=pts,est=mean,xout=xout,outfun=outfun,fr=fr) -val -} - - - -rplot.binCI<-function(x,y,pts=NULL,alpha=.05,nmin=5,xout=FALSE,outfun=outpro,fr=.5,tr.plot=FALSE, -method=NULL,plotit=TRUE,LP=TRUE,xlab='X',ylab='P(Y=1|X)',...){ -# -# An alternative to logistic regression. -# -# For a collection of intervals among the values in -# x, compute the probability of success and a confidence based on the corresponding y values -# -# Default: use the deciles to define the intervals -# -# Example: pts=c(-1,0,1,2). The intervals would be (-1,0), (0,1), (1,2). -# -y=chbin2num(y) -xx<-elimna(cbind(x,y)) -x<-xx[,1] -y<-xx[,2] -if(is.null(pts)){ -id=duplicated(x) -pts=x[!id] -} -else plotit=FALSE -if(xout){ -m<-cbind(x,y) -if(ncol(m)!=2)stop('Only one explanatory variable is allowed') -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1] -y<-m[,2] -} -n=length(x) -xor=order(x) -x=x[xor] -y=y[xor] -pts=sort(pts) -npts=length(pts) -# -if(length(unique(y))>2)stop('y should be binary') -# -# Determine which method will be used: -if(is.null(method)){ -if(n<80)method='AC' -if(n>=80)method='CP' -} - -# Next convert y to 0 and 1s if not already 0 and 1s -yy=rep(0,n) -yc=as.character(y) -flag=yc==max(yc) -yy[flag]=1 -y=yy -# -rmd<-matrix(NA,nrow=npts,ncol=7) -for(i in 1:npts){ -isub=near(x,pts[i],fr) -if(sum(isub)>=nmin){ -z=y[isub] -v=binom.conf(y=z,method=method,alpha=alpha,pr=FALSE) -rmd[i,1]=v$n -rmd[i,2]=min(x[isub]) -rmd[i,3]=max(x[isub]) -rmd[i,4]=pts[i] -rmd[i,5]=v$phat -rmd[i,6:7]=v$ci -}} -rs=elimna(rmd) -dimnames(rmd)=list(NULL,c('n','low.end','upper.end','pts','p.hat','ci.low','ci.up')) -if(plotit){ -if(tr.plot){ -v=quantile(rmd[,4],probs=c(.1,.9),na.rm=TRUE) -flag=(rmd[,4]>=v[1] & rmd[,4]<=v[2]) -rmd=rmd[flag,] -} -ys=rs[,5] -plot(rs[,4],rs[,5],ylim=c(0,1),xlab=xlab,ylab=ylab,type='n') -if(LP){ -z1=lplot.pred(rmd[,4],rmd[,5],pts=rmd[,2])$yhat -flag=z1>1 -z1[flag]=1 -flag=z1<0 -z1[flag]=0 -z2=lplot.pred(rmd[,4],rmd[,6],pts=rmd[,2])$yhat -flag=z2>1 -z2[flag]=1 -flag=z2<0 -z2[flag]=0 -z3=lplot.pred(rmd[,4],rmd[,7],pts=rmd[,2])$yhat -flag=z3>1 -z3[flag]=1 -flag=(z3<0) -z3[flag]=0 -lines(rmd[,4],z1) -lines(rmd[,4],z2,lty=2) -lines(rmd[,4],z3,lty=2) -} -if(!LP){ -lines(rmd[,4],rmd[,5]) -lines(rmd[,4],rmd[,6],lty=2) -lines(rmd[,4],rmd[,7],lty=2) -}} -id=duplicated(rmd[,2:3]) -rmd=elimna(rmd[!id,]) -rmd -} - - -wlogregv2<-function(x0,y,initwml=FALSE,const=0.5,kmax=1e3,maxhalf=10) -{ -# Computation of the estimator of Bianco and Yohai (1996) in logistic regression -# ------------- -# This is a slightly modified version of code due to -# Christophe Croux, Gentiane Haesbroeck, and Kristel Joossens -# (Here initwml defaults to F -# -# This program computes the estimator of Bianco and Yohai in -# logistic regression. By default, an intercept term is included -# and p parameters are estimated. -# -# For more details we refer to -# Croux, C., and Haesbroeck, G. (2003), ``Implementing the Bianco and Yohai -# estimator for Logistic Regression'', -# Computational Statistics and Data Analysis, 44, 273-295 -# -#Input: -#------- -# x0= n x (p-1) matrix containing the explanatory variables; -# y= n-vector containing binomial response (0 or 1); -# -# initwml= logical value for selecting one of the two possible methods for computing -# the initial value of the optimization process. If initwml=T (default), a -# weighted ML estimator is computed with weights derived from the MCD estimator -# computed on the explanatory variables. If initwml=F, a classical ML fit is perfomed. -# When the explanatory variables contain binary observations, it is recommended -# to set initwml to F or to modify the code of the algorithm to compute the weights -# only on the continuous variables. -# const= tuning constant used in the computation of the estimator (default=0.5); -# kmax= maximum number of iterations before convergence (default=1000); -# maxhalf= max number of step-halving (default=10). -# -# Example: -# x0=matrix(rnorm(100,1)) -# y0=numeric(runif(100)>0.5) -# BYlogreg(x0,y) -# -#Output: -#-------- -# list with -# 1st component: T or F if convergence achieved or not -# 2nd component: value of the objective function at the minimum -# p next components: estimates for the parameters. -# p last components: standard errors of the parameters (if first component is T) - -library(MASS) -x0=as.matrix(x0) -# n=nrow(x0) - p=ncol(x0)+1 -p0=p-1 - #Smallest value of the scale parameter before implosion - sigmamin=1e-4 - -# eliminate any rows with missing values -zz=elimna(cbind(x,y)) -x=as.matrix(zz[,1:p0]) -y=zz[,p] -n=nrow(x) -# x=as.matrix(cbind(rep(1,n),x0)) - x=as.matrix(cbind(rep(1,n),x)) - y=as.numeric(y) - - # Computation of the initial value of the optimization process - if (initwml==TRUE) - { - hp=floor(n*(1-0.25))+1 - mcdx=cov.mcd(x0, quantile.used =hp,method="mcd") - rdx=sqrt(mahalanobis(x0,center=mcdx$center,cov=mcdx$cov)) - vc=sqrt(qchisq(0.975,p-1)) - wrd=(rdx<=vc) - gstart=glm(y~x0,family=binomial,subset=wrd)$coef - } -else {gstart=glm(y~x0,family=binomial)$coef} - sigmastart=1/sqrt(sum(gstart^2)) - xistart=gstart*sigmastart - stscores=x %*% xistart -sigma1=sigmastart - #Initial value for the objective function - oldobj=mean(phiBY3(stscores/sigmastart,y,const)) - kstep=jhalf=1 - while ((kstep < kmax) & (jhalfoldobj)){ - hstep=hstep/2 - xi1=xistart+finalstep*hstep - xi1=xi1/sqrt(sum(xi1^2)) - scores1=x%*%xi1/sigma1 - newobj=mean(phiBY3(scores1,y,const)) - jhalf=jhalf+1 - } - CONV=FALSE - if ((jhalf==maxhalf+1) & (newobj>oldobj)) {CONV=TRUE - } else { - jhalf=1 - xistart=xi1 - oldobj=newobj - stscores=x%*% xi1 - kstep=kstep+1 - } - } - } - - if (kstep == kmax) { -CONV=FALSE # print("No convergence") - result=list(convergence=FALSE,objective=0,coef=t(rep(NA,p))) - } else { - gammaest=xistart/sigma1 - stander=sterby3(x0,y,const,gammaest) - result=list(convergence=CONV,coef=t(gammaest),sterror=stander) - } - return(result) -} - - - - -############################################################### -############################################################### -#Functions needed for the computation of estimator of Bianco and Yohai - -phiBY3 <- function(s,y,c3) -{ - s=as.double(s) - dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0) - return(rhoBY3(dev,c3)+GBY3Fs(s,c3)+GBY3Fsm(s,c3)) -} -rhoBY3 <- function(t,c3) -{ - (t*exp(-sqrt(c3))*as.numeric(t <= c3))+ - (((exp(-sqrt(c3))*(2+(2*sqrt(c3))+c3))-(2*exp(-sqrt(t))*(1+sqrt(t))))*as.numeric(t >c3)) -} -psiBY3 <- function(t,c3) -{(exp(-sqrt(c3))*as.numeric(t <= c3))+(exp(-sqrt(t))*as.numeric(t >c3))} -derpsiBY3 <- function(t,c3) -{ -res=NULL - for (i in 1:length(t)) -{ -if (t[i] <= c3) - { res=rbind(res,0) } -else -{res=rbind(res,-exp(-sqrt(t[i]))/(2*sqrt(t[i]))) } -} -res -} - - -sigmaBY3<-function(sigma,s,y,c3) {mean(phiBY3(s/sigma,y,c3))} - -derphiBY3=function(s,y,c3) -{ - Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0))) - ds=Fs*(1-Fs) - dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0) - Gprim1=log(1+exp(-abs(s)))+abs(s)*(s<0) - Gprim2=log(1+exp(-abs(s)))+abs(s)*(s>0) - return(-psiBY3(dev,c3)*(y-Fs)+((psiBY3(Gprim1,c3)-psiBY3(Gprim2,c3))*ds)) -} - -der2phiBY3=function(s,y,c3) -{ - s=as.double(s) - Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0))) - ds=Fs*(1-Fs) - dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0) - Gprim1=log(1+exp(-abs(s)))+abs(s)*(s<0) - Gprim2=log(1+exp(-abs(s)))+abs(s)*(s>0) - der2=(derpsiBY3(dev,c3)*(Fs-y)^2)+(ds*psiBY3(dev,c3)) - der2=der2+(ds*(1-2*Fs)*(psiBY3(Gprim1,c3)-psiBY3(Gprim2,c3))) - der2=der2-(ds*((derpsiBY3(Gprim1,c3)*(1-Fs))+(derpsiBY3(Gprim2,c3)*Fs))) - der2 -} - - -GBY3Fs <- function(s,c3) -{ - Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0))) - resGinf=exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(-log(Fs))))-1) - resGinf=(resGinf+(Fs*exp(-sqrt(-log(Fs)))))*as.numeric(s <= -log(exp(c3)-1)) - resGsup=((Fs*exp(-sqrt(c3)))+(exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(c3)))-1)))*as.numeric(s > -log(exp(c3)-1)) - return(resGinf+resGsup) -} - - -GBY3Fsm <- function(s,c3) -{ - Fsm=exp(-(log(1+exp(-abs(s)))+abs(s)*(s>0))) - resGinf=exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(-log(Fsm))))-1) - resGinf=(resGinf+(Fsm*exp(-sqrt(-log(Fsm)))))*as.numeric(s >= log(exp(c3)-1)) - resGsup=((Fsm*exp(-sqrt(c3)))+(exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(c3)))-1)))*as.numeric(s < log(exp(c3)-1)) - return(resGinf+resGsup) -} - -sterby3 <- function(x0,y,const,estim) -{ - n=nrow(x0) - p=ncol(x0)+1 - - z=cbind(matrix(1,nrow=n),x0) - argum=z %*% estim - - matM=matrix(data=0,nrow=p,ncol=p) - IFsquar=matrix(data=0,nrow=p,ncol=p) - for (i in 1:n) -{ -myscalar=as.numeric(der2phiBY3(argum[i],y[i],const)) -matM=matM+myscalar * (z[i,] %*% t(z[i,])) -IFsquar=IFsquar+myscalar^2 * (z[i,] %*% t(z[i,])) -} - matM=matM/n - matMinv=solve(matM) - IFsquar=IFsquar/n - asvBY=matMinv %*% IFsquar %*% t(matMinv) - sqrt(diag(asvBY))/sqrt(n) -} - - -long2mat<-function(x,Sid.col,dep.col){ -# -# Have data in a matrix or data frame, x -# Sid.col indicates Subject's id -# Here, each subject has one or more rows of data -# -# Goal: store the data in a data frame where -# each row contains all of the data for an individual -# subject. -# -# dep.col indicates column of the outcome (dependent) variable -# This version assumed a single column of outcome values are to be -# rearranged. -# -if(length(dep.col)!=1)stop("Argument dep.col must have a single value") -if(is.null(dim(x)))stop("x must be a matrix or data frame") -Sid=unique(x[,Sid.col]) -n=nrow(x) -nid=length(Sid) -flag=(x[,Sid.col]==Sid[1]) -num.out=sum(flag) -res=matrix(NA,nrow=nid,ncol=num.out) -for(i in 1:nid){ -flag=(x[,Sid.col]==Sid[i]) -res[i,]=x[flag,dep.col] -} -res -} - -wlogreg<-function(x,y,initwml=FALSE,const=0.5,kmax=1e3,maxhalf=10){ -# -# -# Bianco and Yohai (1996) in logistic regression -# -# -options(warn=-1) -xy=cbind(x,y) -p1=ncol(xy) -xy=elimna(xy) -p=p1-1 -if(p==1){ -library(robustbase) -a=BYlogreg(x,y,initwml=initwml,const=const,kmax=kmax,maxhalf=maxhalf) -} -else -a=wlogregv2(x,y,initwml=initwml,const=const,kmax=kmax,maxhalf=maxhalf) -options(warn=0) -a -} - - - - - - -longcov2mat<-function(x,Sid.col,dep.col){ -# -# Have data in a matrix or data frame, x -# Sid.col indicates Subject's id -# Here, each subject has one or more rows of data -# -# In a regression setting, each subject has -# one or more covariates corresponding to columns. -# For example, two covariates might be stored in columns -# 3 and 6. -# -# Goal: For ith subject, store the covariate data in -# list mode, which is a matrix. -# So for ith subject, store covariate data in z[[i]], say, which -# contains a matrix of dimension m by p, -# m is the number of observations for ith subject and p -# the number of covariates. -# -# dep.col, having length p, indicates columns containe the covariates -# Column Sid.col indicates the column containing subject's id -# -if(is.null(dim(x)))stop("x must be a matrix or data frame") -Sid=unique(x[,Sid.col]) -res=list() -nid=length(Sid) -p=length(dep.col)# Number of covariates for each subject -n=nrow(x) -flag=(x[,Sid.col]==Sid[1]) -n.each.s=sum(flag) # the number of rows for each subject -ns=n/n.each.s # the number of subjects -if(!is.wholenumber(ns))stop("Not all S's have same number of rows of data") -for(i in 1:ns){ -#res[[i]]=matrix(NA,nrow=n.each.s,ncol=p) -flag=(x[,Sid.col]==Sid[i]) -res[[i]]=as.matrix(x[flag,dep.col]) -} -res -} -is.wholenumber <- - function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol - -long2g<-function(x,x.col,y.col,s.id,grp.id,regfun=tsreg,MAR=TRUE,tr=.2){ -# -# x is a matrix or data frame. -# -# Longitudinal data, compare two groups, where the groups correspond to two -# values in column -# grp.id. -# The outcome (dependent) variable is assumed to be stored in -# the column indicated by the argument y.col. -# Example, y.col=3 means the outcome variable of interest is in col. 3 -# Predictors are stored in columns indicated by -# x.col. -# s.id indicates column where subject's id is stored. -# -# Assuming data are stored as for example in the R variable -# Orthodont, -# which can be accessed via the command library(nlme) -# -m=matsplit(x,grp.id) -g1=longreg(m$m1,x.col,y.col,s.id,regfun)$est.S -g2=longreg(m$m2,x.col,y.col,s.id,regfun)$est.S -res=list() -if(MAR){ -for(iv in 1:ncol(g1))res[[iv]]=yuen(g1[,iv],g2[,iv],tr=tr) -} -if(!MAR)res=smean2(g1,g2) -res -} - -longreg.plot<-function(x,x.col,y.col,s.id,regfun=tsreg,scat=TRUE,xlab="X", -ylab="Y"){ -# -# x is a data frame or matrix -# -# Longitudinal data: plot regression lines -# -# For each subject, fit a regression line -# using outcome data in col y.col and predictors, usually times -# when measures were taken, in columns indicated by x.col. -# s.id indicates column where subject's id is stored. -# -# Assuming data are stored as for example in the R variable -# Orthodont, -# which can be accessed via the command library(nlme) -# For this data set, x.col=2 would indicated that the -# participants age at the time of being measured, is used -# to predict the outcome variable. -# -ymat=long2mat(x,s.id,y.col) # matrix, ith row contains outcome y -# for the ith subject. -# -xvals=longcov2mat(x,s.id,x.col)# list mode -n=nrow(ymat) -p=length(x.col)+1 -if(p!=2)stop("Plot allows a single covariate only") -outmat=matrix(NA,nrow=n,ncol=p) -datx=NULL -daty=NULL -for(i in 1:n){ -outmat[i,]=regfun(as.matrix(xvals[[i]]),ymat[i,])$coef -temp=as.matrix(xvals[[i]]) -datx=c(datx,temp) -daty=c(daty,ymat[i,]) -} -if(!scat)plot(datx,daty,type="n",xlab=xlab,ylab=ylab) -if(scat)plot(datx,daty,xlab=xlab,ylab=ylab) -for(i in 1:n)abline(outmat[i,1],outmat[i,2]) -} - -hotel1.tr<-function(x,null.value=0,tr=.2) { -# -# Perform a trimmed analog of Hotelling's (one-sample) T^2 test -# That is, for p-variate data, test the hypothesis that the p marginal -# trimmed means are equal to the value specified by -# the argument null.value -# -if (is.data.frame(x)) - x <- as.matrix(x) -x=elimna(x) - if(!is.matrix(x)) - stop("'x' must be a numeric matrix or a data frame") - n <- nrow(x) - p <- ncol(x) - mu=null.value -xbar=apply(x,2,mean,tr=tr) - if(!is.numeric(mu) || ((lmu <- length(mu)) > 1 & lmu != p)) - stop("'null.value' must be a numeric vector of length ", p) -if(lmu == 1) mu <- rep(mu, p) - xbar.mu <- xbar - mu - V <- winall(x,tr=tr)$cov -h=n-2*floor(n*tr) - k <- h / (n - 1) * (h - p) / p - stat <- k * crossprod(xbar.mu, solve(V, xbar.mu))[1, ] - pvalue <- 1 - pf(stat, p, h - p) -list(test.statistic = stat, degrees_of_freedom = c(p, h - p), p.value = -pvalue, estimate = xbar, - null.value = mu) -} - -hotel1<-function(x,null.value=0,tr=0) { -# -# Perform a trimmed analog of Hotelling's (one-sample) T^2 test -# That is, for p-variate data, test the hypothesis that the p marginal -# trimmed means are equal to the value specified by -# the argument null.value -# -if (is.data.frame(x)) - x <- as.matrix(x) -x=elimna(x) - if(!is.matrix(x)) - stop("'x' must be a numeric matrix or a data frame") - n <- nrow(x) - p <- ncol(x) - mu=null.value -xbar=apply(x,2,mean,tr=tr) - if(!is.numeric(mu) || ((lmu <- length(mu)) > 1 & lmu != p)) - stop("'null.value' must be a numeric vector of length ", p) -if(lmu == 1) mu <- rep(mu, p) - xbar.mu <- xbar - mu - V <- winall(x,tr=tr)$cov -h=n-2*floor(n*tr) - k <- h / (n - 1) * (h - p) / p - stat <- k * crossprod(xbar.mu, solve(V, xbar.mu))[1, ] - pvalue <- 1 - pf(stat, p, h - p) -list(test.statistic = stat, degrees_of_freedom = c(p, h - p), p.value = -pvalue, estimate = xbar, - null.value = mu) -} - - wwmcp<-function(J,K,x,tr=.2,alpha=.05,dif=TRUE,method='hoch'){ -# -# Do all multiple comparisons for a within-by-within design -# using trimmed means -# -conM=con2way(J,K) -A=rmmcp(x,con=conM$conA,tr=tr,alpha=alpha,dif=dif) - A$test[,4]=p.adjust(A$test[,3],method=method) - dimnames(A$test)=list(NULL,c('con.num', 'test', 'p.value','adj.p.value', 'se')) -B=rmmcp(x,con=conM$conB,tr=tr,alpha=alpha,dif=dif) - B$test[,4]=p.adjust(B$test[,3],method=method) - dimnames(B$test)=list(NULL,c('con.num', 'test', 'p.value','adj.p.value', 'se')) -AB=rmmcp(x,con=conM$conAB,tr=tr,alpha=alpha,dif=dif) - AB$test[,4]=p.adjust(AB$test[,3],method=method) - dimnames(AB$test)=list(NULL,c('con.num', 'test', 'p.value','adj.p.value', 'se')) -list(Factor_A=A,Factor_B=B,Factor_AB=AB) -} - -wwmcpES<-function(J,K,x,tr=.2,alpha=.05,dif=TRUE){ -# -# Do all multiple comparisons for a within-by-within design -# using trimmed means -# -stop('Use ww.es instead') -conM=con2way(J,K) -A=rmmcpES(x,con=conM$conA,tr=tr,alpha=alpha,dif=dif) -B=rmmcpES(x,con=conM$conB,tr=tr,alpha=alpha,dif=dif) -AB=rmmcpES(x,con=conM$conAB,tr=tr,alpha=alpha,dif=dif) -list(Factor_A=A,Factor_B=B,Factor_AB=AB) -} - - -wwmcpbt<-function(J,K,x, tr=.2, dif=TRUE, alpha = 0.05, nboot = 599){ -# -# Do multiple comparisons for a within-by-within design. -# using a bootstrap-t method and trimmed means. -# All linear contrasts relevant to main effects and interactions -# are tested. -# -# -conM=con2way(J,K) -A=lindepbt(x,con=conM$conA,alpha=alpha,tr=tr,dif=dif,nboot=nboot) -B=lindepbt(x,con=conM$conB,alpha=alpha,tr=tr,dif=dif,nboot=nboot) -AB=lindepbt(x,con=conM$conAB,alpha=alpha,tr=tr,dif=dif,nboot=nboot) -list(Factor_A=A,Factor_B=B,Factor_AB=AB) -} - - -wwmcppb<-function(J,K,x, alpha = 0.05, con = 0,est=tmean, plotit = FALSE, - dif = TRUE, grp = NA, nboot = NA, BA = TRUE, hoch = TRUE, xlab = "Group 1", - ylab = "Group 2", pr = TRUE, SEED = TRUE,...){ -# -# Do all multiple comparisons for a within-by-within design. -# using a percentile bootstrap method and trimmed means -# -conM=con2way(J,K) -A=rmmcppb(x,con=conM$conA,alpha=alpha,dif=dif,plotit=plotit,est=est, -nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) -B=rmmcppb(x,con=conM$conB,alpha=alpha,dif=dif, -plotit=plotit,est=est,nboot=nboot,BA=BA,hoch=hoch, -SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) -AB=rmmcppb(x,con=conM$conAB,alpha=alpha,dif=dif,plotit=plotit,est=est, -nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) -list(Factor_A=A,Factor_B=B,Factor_AB=AB) -} - -wmcppb<-function(x, y=NULL,alpha = 0.05, con = 0,est=tmean, plotit = FALSE, - dif = TRUE, grp = NA, nboot = NA, BA = TRUE, hoch = TRUE, xlab = "Group 1", - ylab = "Group 2", pr = TRUE, SEED = TRUE, ...){ -# -# Do all multiple comparisons for a repeated measures design. -# using a percentile bootstrap method and trimmed means -# -if(!is.null(y))x=cbind(x,y) -A=rmmcppb(x,con=con,alpha=alpha,dif=dif,plotit=plotit,est=est, -nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) -A -} - -lindepbt<-function(x, con = NULL, tr = 0.2, alpha = 0.05,nboot=599,dif=TRUE,method='holm', -SEED=TRUE){ -# -# MCP on trimmed means with FWE controlled with Rom's method -# Using a bootstrap-t method. -# -# dif=T, difference scores are used. And for linear contrasts a simple -# extension is used. -# -# dif=F, hypotheses are tested based on the marginal trimmed means. -# -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.list(x))x=matl(x) -if(is.null(con))con=con.all.pairs(ncol(x)) # all pairwise -x=elimna(x) -n=nrow(x) -flagcon=FALSE -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-ncol(x) -xbar<-vector("numeric",J) -nval<-nrow(x) -h1<-nrow(x)-2*floor(tr*nrow(x)) -df<-h1-1 -xbar=apply(x,2,mean,tr=tr) -if(sum(con^2!=0))CC<-ncol(con) -ncon<-CC -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(nrow(con)!=ncol(x))warning("The number of groups does not match the number - of contrast coefficients.") -ncon<-ncol(con) -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),6) -dimnames(test)<-list(NULL,c("con.num","test","p.value","p.crit","se",'p.adjusted')) -temp1<-NA -for (d in 1:ncol(con)){ -psihat[d,1]<-d -# -# !dif Use marginal trimmed means -# -if(!dif){ -psihat[d,2]<-sum(con[,d]*xbar) -# -# -sejk<-0 -for(j in 1:J){ -for(k in 1:J){ -djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1)) -sejk<-sejk+con[j,d]*con[k,d]*djk -}} -sejk<-sqrt(sejk) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -test[d,5]<-sejk -# -# now use bootstrap-t to determine p-value -# -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -xcen=x -for(j in 1:ncol(x))xcen[,j]=xcen[,j]-tmean(x[,j],tr=tr) -bvec=apply(data,1,lindep.sub,xcen,con[,d],tr) -bsort<-sort(abs(bvec)) -ic<-round((1-alpha)*nboot) -ci<-0 -psihat[d,3]<-psihat[d,2]-bsort[ic]*test[d,5] -psihat[d,4]<-psihat[d,2]+bsort[ic]*test[d,5] -p.value<-mean(abs(test[d,2])<=abs(bvec)) -temp1[d]=p.value -} -if(dif){ -for(j in 1:J){ -if(j==1)dval<-con[j,d]*x[,j] -if(j>1)dval<-dval+con[j,d]*x[,j] -} -temp=trimcibt(dval,tr=tr,alpha=alpha,nboot=nboot,pr=FALSE) -temp1[d]<-temp$p.value #trimci(dval,tr=tr,pr=FALSE)$p.value -test[d,1]<-d -test[d,2]=temp$test.stat -test[d,5]<-trimse(dval,tr=tr) -psihat[d,2]<-mean(dval,tr=tr) -psihat[d,3]<-temp$ci[1] #psihat[,2]-qt(1-test[,4]/2,df)*test[,5] -psihat[d,4]<-temp$ci[2] #psihat[,2]+qt(1-test[,4]/2,df)*test[,5] -}} -# -# d ends here -# -test[,3]<-temp1 -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2,3]>=zvec) -test[temp2,4]<-zvec -if(flagcon)num.sig<-sum(test[,4]<=test[,5]) -if(!flagcon){num.sig<-sum(test[,3]<=test[,4]) -test[,6]=p.adjust(test[,3],method=method) -} -list(test=test,psihat=psihat,con=con,num.sig=num.sig) -} - -lindep.sub<-function(data,x,con=con,tr=tr){ -con=as.matrix(con) -res=rmmcp(x[data,],con=con,tr=tr,dif=FALSE)$test[,2] -res -} - -mcp.nestAP<-function(x,tr=.2){ -# -# Nested ANOVA -# -# Strategy: for each level of factor A, pool the data -# and then perform the analysis -# -# x is assumed to have list mode with length J, -# the number of independent groups. -# -# x[[1]] contains an n by K matrix, the nested data -# for the first level of the first factor. -# x[[2]] contains an n by K matrix, the nested data -# for the second level of the first factor, etc. -# - xx=list() -for(j in 1: length(x))xx[[j]]=as.vector(x[[j]]) -results=lincon(xx,tr=tr) -results -} - -outmgvad<-function(m,center=NA,plotit=TRUE,op=1, -xlab="VAR 1",ylab="VAR 2",rate=.05,iter=100,ip=6,pr=TRUE){ -# -# Adjusts the critical value, gval used by outmgv, -# so that the outside rate per observation, under normality -# is approximately equal to the value given by the argument -# rate, which defaults to .05. -# That is, expected proportion of points declared outliers under normality -# is intended to be rate=.05 -# -# When dealing with p-variate data, p>9, this adjustment can be crucial -# -m=elimna(m) -n=nrow(m) -newgval=sqrt(qchisq(.975,ncol(m))) -z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m))) -newq=0 -gtry=NA -val=NULL -for(itry in 1:ip){ -newq=newq+9/10^itry -gtry[itry]=newq -} -gtry=c(.95,.975,gtry[-1]) -if(pr)print("Computing adjustment") -for(itry in 1:ip){ -for(i in 1:iter){ -temp=outmgv.v2(z[i,,],gval=gval,op=op)$out.id -val[i]=length(temp) -} -erate=mean(val)/n -if(erate1)temp$points(x[outid,],col="red") -} -if(!COLOR){ -if(length(outid)==1)temp$points(t(as.matrix(x[outid,])),pch="*") -if(length(outid)>1)temp$points(x[outid,],pch="*") -} -} -if(reg.plane){ -vals<-regfun(x[,1:2],x[,3],...)$coef -if(COLOR)temp$plane(vals,col="blue") -if(!COLOR)temp$plane(vals) -} -} - -ees.ci<-function(x,y,SEED=TRUE,nboot=400,tr=.2,alpha=.05,pr=TRUE){ -# -# Compute a 1-alpha confidence interval -# for a robust, heteroscedastic measure of effect size -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -x=elimna(x) -y=elimna(y) -bvec=0 -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot) -for(i in 1:nboot){ -bvec[i]=yuenv2(datax[i,],datay[i,],tr=tr,SEED=FALSE)$Var.Explained -} -bvec<-sort(bvec) -crit<-alpha/2 -icl<-round(crit*nboot)+1 -icu<-nboot-icl -ci<-NA -ci[1]<-bvec[icl] -pchk=yuen(x,y,tr=tr)$p.value -if(pchk>alpha)ci[1]=0 -ci[2]<-bvec[icu] -if(ci[1]<0)ci[1]=0 -ci=sqrt(ci) -ci -} -wwwtrimbt<-function(J, K,L, x, tr = 0.2, JKL = J * K*L, con = 0, - alpha = 0.05, grp =c(1:JKL), nboot = 599,SEED = TRUE, ...){ - # - # A bootstrap-t for a within-by-within-by-within omnibus tests - # for all main effects and interactions - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # -# -# within-by-within-by-within design -# -# JKL dependent groups -# - - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # -if(is.data.frame(x))x=as.matrix(x) - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} -ncon=ncol(con) - p <- J*K*L -JKL=p -if(p>length(x))stop("JKL is less than the Number of groups") -JK=J*K -KL=K*L -# v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { -xx[[j]]=x[[grp[j]]] # save input data -data[[j]] = xx[[j]] - mean(xx[[j]], tr = tr) -# # Now have the groups in proper order. - } - if(SEED)set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - # Next determine the n_j values - bsam = list() - bdat = list() -aboot=NA -bboot=NA -cboot=NA -abboot=NA -acboot=NA -bcboot=NA -abcboot=NA -test.stat=wwwtrim(J,K,L,xx,tr=tr) -nv=length(x[[1]]) - for(ib in 1:nboot) { -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in 1:JKL) bsam[[k]] = data[[k]][bdat[[j]]] -temp=wwwtrim(J,K,L,bsam,tr=tr) -aboot[ib]=temp$Qa -bboot[ib]=temp$Qb -cboot[ib]=temp$Qc -acboot[ib]=temp$Qac -bcboot[ib]=temp$Qbc -abboot[ib]=temp$Qab -abcboot[ib]=temp$Qabc -} -pbA=NA -pbB=NA -pbC=NA -pbAB=NA -pbAC=NA -pbBC=NA -pbABC=NA -pbA=mean(test.stat$Qa[1,1]length(x))stop("JKL is less than the Number of groups") -JK=J*K -KL=K*L - v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { - data[[j]] <- x[[grp[j]]] -xx[[j]]=x[[grp[j]]] # save input data - # Now have the groups in proper order. - data[[j]] = data[[j]] - mean(data[[j]], tr = tr) - } - x <- data # centered data xx has original data -test=bwwtrim(J,K,L,xx,tr=tr) - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - bsam = list() - bdat = list() -aboot=NA -bboot=NA -cboot=NA -abboot=NA -acboot=NA -bcboot=NA -abcboot=NA - for(ib in 1:nboot) { - ilow <- 1 - KL - iup = 0 - for(j in 1:J) { - ilow <- ilow + KL - iup = iup + KL -nv=length(x[[ilow]]) - bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in ilow:iup){ - bsam[[k]] = x[[k]][bdat[[j]]] -} -} -temp=bwwtrim(J,K,L,bsam,tr=tr) -aboot[ib]=temp$Qa -bboot[ib]=temp$Qb -cboot[ib]=temp$Qc -acboot[ib]=temp$Qac -bcboot[ib]=temp$Qbc -abboot[ib]=temp$Qab -abcboot[ib]=temp$Qabc - } -pbA=NA -pbB=NA -pbC=NA -pbAB=NA -pbAC=NA -pbBC=NA -pbABC=NA -pbA=mean(test$Qa[1,1]length(x))stop("JKL is less than the Number of groups") -JK=J*K - v <- matrix(0, p, p) - data <- list() -xx=list() - for(j in 1:length(x)) { - data[[j]] <- x[[grp[j]]] -xx[[j]]=x[[grp[j]]] # save input data - # Now have the groups in proper order. - data[[j]] = data[[j]] - mean(data[[j]], tr = tr) - } -#ilow=0-L -#iup=0 -#for(j in 1:JK){ -#ilow <- ilow + L -# iup = iup + L -#sel <- c(ilow:iup) -#xx[sel]=listm(elimna(matl(xx[sel]))) -# v[sel, sel] <- covmtrim(xx[sel], tr) -# } -test.stat=bbwtrim(J,K,L,xx,tr=tr) - x <- data # Centered data -# jp <- 1 - K -# kv <- 0 - if(SEED) - set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - testA = NA - testB = NA - testC=NA - testAB = NA - testAC = NA - testBC = NA - testABC = NA - bsam = list() - bdat = list() -aboot=NA -bboot=NA -cboot=NA -abboot=NA -acboot=NA -bcboot=NA -abcboot=NA -nvec=NA - for(j in 1:JK){ - nvec[j] = length(x[[j]]) - for(ib in 1:nboot) { - ilow <- 1 - L - iup = 0 - for(j in 1:JK) { - ilow <- ilow + L - iup = iup + L -nv=length(x[[ilow]]) - bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in ilow:iup){ - bsam[[k]] = x[[k]][bdat[[j]]] -} -} -temp=bbwtrim(J,K,L,bsam,tr=tr) -aboot[ib]=temp$Qa -bboot[ib]=temp$Qb -cboot[ib]=temp$Qc -acboot[ib]=temp$Qac -bcboot[ib]=temp$Qbc -abboot[ib]=temp$Qab -abcboot[ib]=temp$Qabc -}} -pbA=NA -pbB=NA -pbC=NA -pbAB=NA -pbAC=NA -pbBC=NA -pbABC=NA -pbA=mean(test.stat$Qa[1,1]4)nboot<-5000 -} -n<-nrow(mat) -crit.vec<-alpha/c(1:d) -connum<-ncol(con) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -xbars<-apply(mat,2,est) -psidat<-NA -for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) -psihat<-matrix(0,connum,nboot) -psihatcen<-matrix(0,connum,nboot) -bvec<-matrix(NA,ncol=J,nrow=nboot) -bveccen<-matrix(NA,ncol=J,nrow=nboot) -print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec[ib,]<-apply(x[data[ib,],],2,est,...) -bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...) -} -# -# Now have an nboot by J matrix of bootstrap values. -# -test<-1 -bias<-NA -tval<-NA -tvalcen<-NA -for (ic in 1:connum){ -psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) -psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic]) -tvalcen[ic]<-sum((psihatcen[ic,]==0))/nboot -bias[ic]<-sum((psihatcen[ic,]>0))/nboot+sum((psihatcen[ic,]==0))/nboot-.5 -tval[ic]<-sum((psihat[ic,]==0))/nboot -if(BA){ -test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic]-.1*bias[ic] -if(test[ic]<0)test[ic]<-0 -} -if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic] -test[ic]<-min(test[ic],1-test[ic]) -} -test<-2*test -ncon<-ncol(con) -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(hoch)dvec<-alpha/(2* c(1:ncon)) -dvec<-2*dvec -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvecba<-dvec -dvec[1]<-alpha/2 -} -if(plotit && ncol(bvec)==2){ -z<-c(0,0) -one<-c(1,1) -plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") -points(bvec) -totv<-apply(x,2,est,...) -cmat<-var(bvec) -dis<-mahalanobis(bvec,totv,cmat) -temp.dis<-order(dis) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -abline(0,1) -} -temp2<-order(0-test) -ncon<-ncol(con) -zvec<-dvec[1:ncon] -if(BA)zvec<-dvecba[1:ncon] -sigvec<-(test[temp2]>=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p-value","p.crit", -"ci.lower","ci.upper")) -tmeans<-apply(mat,2,est,...) -psi<-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -output[temp2,4]<-zvec -temp<-sort(psihat[ic,]) -icl<-round(output[ic,4]*nboot/2)+1 -icu<-nboot-(icl-1) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -} -if(PCI){ -if(dif){ -plotCI(output[,2],ali=output[,5],aui=output[,6],xlab='Difference',ylab=ylab.ebar) -}} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} -wwtrimbt<-function(J, K, x, tr = 0.2, JK = J*K, con = 0, - alpha = 0.05, grp =c(1:JK), nboot = 599,SEED = TRUE, ...){ - # - # A bootstrap-t for a within-by-within omnibus tests - # for all main effects and interactions - # - # The R variable x is assumed to contain the raw - # data stored in list mode or in a matrix. - # If in list mode, x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second: level 1,2 - # x[[K]] is the data for level 1,K - # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. - # - # If the data are in a matrix, column 1 is assumed to - # correspond to x[[1]], column 2 to x[[2]], etc. - # - # When in list mode x is assumed to have length JK, the total number - # groups being tested, but a subset of the data can be analyzed - # using grp - # -if(is.data.frame(x))x=as.matrix(x) - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] -x=y -} -ncon=ncol(con) - p <- J*K -JK=p -if(p>length(x))stop("JK is less than the Number of groups") -JK=J*K - data <- list() -xx=list() - for(j in 1:length(x)) { -xx[[j]]=x[[grp[j]]] # save input data -data[[j]] = xx[[j]] - mean(xx[[j]], tr = tr) -# # Now have the groups in proper order. - } - if(SEED)set.seed(2) - # set seed of random number generator so that - # results can be duplicated. - bsam = list() - bdat = list() -aboot=NA -bboot=NA -cboot=NA -abboot=NA -test.stat=wwtrim(J,K,xx,tr=tr) -nv=length(x[[1]]) - for(ib in 1:nboot) { -bdat[[j]] = sample(nv, size = nv, replace =TRUE) -for(k in 1:JK) bsam[[k]] = data[[k]][bdat[[j]]] -temp=wwtrim(J,K,bsam,tr=tr) -aboot[ib]=temp$Qa -bboot[ib]=temp$Qb -abboot[ib]=temp$Qab -} -pbA=NA -pbB=NA -pbAB=NA -pbA=mean(test.stat$Qa[1,1]0 || ci.up<0){ -pv=alpha[i] -flag=T -} -if(flag)break -} -if(!flag){ -alpha=c(1:99)/100 -for(i in 1:length(alpha)){ -ilow<-round(alpha[i]*nboot/2) -il<-ilow+1 -uval<-nboot-ilow -b.low<-3*((1+nhat*val[il]-nhat/6)^{1/3})/nhat-3/nhat -b.hi<-3*((1+nhat*val[uval]-nhat/6)^{1/3})/nhat-3/nhat -ci.low<-dif-sigtil*b.hi -ci.up<-dif-sigtil*b.low -if(ci.low>0 || ci.up<0){ -pv=alpha[i] -flag=T -} -if(flag)break -} -}}} -list(est.dif=dif,conf.interval=c(ci.LOW,ci.UP),p.value=pv) -} - -mlrregCI<-function(x,y,nboot=300,MC=FALSE,SEED=TRUE,op.dis=TRUE){ -# -# Based on Rousseeuw et al. -# multivariate regression estimator -# compute p-value for each of the parameters using a percentile -# bootstrap method. -# -if(SEED)set.seed(2) -if(MC)library(parallel) -est=mlrreg(x,y)$coef -pval=est -n=nrow(x) -JK=(ncol(x)+1)*ncol(y) -vals=matrix(0,nrow=nboot,ncol=JK) -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -if(!MC)for(ib in 1:nboot){ -vals[ib,]=mlrreg(x[data[ib,],],y[data[ib,],])$coef -} -if(MC){ -data=listm(t(data)) -vals=mclapply(data,mlrreg.est,x,y,mc.preschedule=TRUE) -vals=t(matl(vals)) -} -pv=NULL -for(j in 1:JK){ -pv[j]=mean(vals[,j]>0)+.5*mean(vals[,j]==0) -pv[j]=2*min(c(pv[j],1-pv[j])) -} -ic=0 -il=1 -iu=ncol(x)+1 -for(iy in 1:ncol(y)){ -pval[,iy]=pv[il:iu] -il=il+ncol(x)+1 -iu=iu+ncol(x)+1 -} -list(estimates=est,p.values=pval) -} -mlrreg.est<-function(data,x,y){ -xv=x[data,] -yv=y[data,] -vals=as.vector(mlrreg(xv,yv)$coef) -vals -} -bmcppb<-function(x,alpha=.05,nboot=NA,grp=NA,est=tmean,con=0,bhop=FALSE,SEED=TRUE, -...){ -# -# Multiple comparisons for J independent groups using trimmed means -# -# A percentile bootstrap method with Rom's method is used. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are allowed. -# -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -#print(paste("Working on group ",j)) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group -} -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -tv<-sum(bcon[d,]==0)/nboot -test[d]<-sum(bcon[d,]>0)/nboot+.5*tv -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-tvec[ic,] -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} -mlrregWtest<-function(x,y,nboot=300,MC=FALSE,SEED=TRUE){ -# -# Test hypothesis that all slopes=0 based on Rousseeuw et al. -# multivariate regression estimator -# -# Strategy: a variation of the wild bootstrap method, percentile version. -# -if(SEED)set.seed(2) -if(MC)library(parallel) -estit=mlrreg.subest(y,x) #YES, y before x -n=nrow(x) -JK=ncol(x)*ncol(y) -vals=matrix(0,nrow=nboot,ncol=JK) -data=list() -for(i in 1:nboot){ -bsam=sample(n,replace=TRUE) -data[[i]]=y[bsam,] -} -if(!MC){ -vals=lapply(data,mlrreg.subest,x) -} -if(MC){ -vals=mclapply(data,mlrreg.subest,x,mc.preschedule=TRUE) -} -vals=t(matl(vals)) -nullv=rep(0,JK) -vals=rbind(vals,estit) -cen=rep(0,ncol(vals)) -if(MC)dv=pdisMC(vals,center=cen) -if(!MC)dv=pdis(vals,center=cen) -bplus=nboot+1 -pv=1-sum(dv[bplus]>=dv[1:nboot])/nboot -list(p.value=pv) -} -mlrreg.subest<-function(data,x){ -vals=as.vector(mlrreg(x,data)$coef[-1,]) -vals -} -btrim<-function(x,tr=.2,grp=NA,g=NULL,dp=NULL,nboot=599,SEED=TRUE){ -# -# Test the hypothesis of equal trimmed means, corresponding to J independent -# groups, using a bootstrap-t method. -# -# The data are assumed to be stored in x in list mode -# or in a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, columns correspond to groups. -# -# grp is used to specify some subset of the groups, if desired. -# By default, all J groups are used. -# g=NULL, x is assumed to be a matrix or have list mode -# -# if g is specifed, it is assumed that column g of x is -# a factor variable and that the dependent variable of interest is in column -# dp of x, which can be a matrix or data frame. -# -# The default number of bootstrap samples is nboot=599 -# -if(!is.null(g)){ -if(is.null(dp))stop("Specify a value for dp, the column containing the data") -x=fac2list(x[,dp],x[,g]) -} -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -if(is.na(grp[1]))grp<-c(1:length(x)) -J<-length(grp) -nval=NA -x=lapply(x,elimna) -nval=lapply(x,length) -xbar=lapply(x,mean,tr=tr) -bvec<-array(0,c(J,2,nboot)) -hval<-vector("numeric",J) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -for(j in 1:J){ -hval[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) - # hval is the number of observations in the jth group after trimming. -print(paste("Working on group ",grp[j])) -xcen<-x[[grp[j]]]-mean(x[[grp[j]]],tr) -data<-matrix(sample(xcen,size=length(x[[grp[j]]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row -# contains the bootstrap trimmed means, the second row -# contains the bootstrap squared standard errors. -} -m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means -m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq standard errors -wvec<-1/m2 # J by nboot matrix of w values -uval<-apply(wvec,2,sum) # Vector having length nboot -blob<-wvec*m1 -xtil<-apply(blob,2,sum)/uval # nboot vector of xtil values -blob1<-matrix(0,J,nboot) -for (j in 1:J)blob1[j,]<-wvec[j,]*(m1[j,]-xtil)^2 -avec<-apply(blob1,2,sum)/(length(x)-1) -blob2<-(1-wvec/uval)^2/(hval-1) -cvec<-apply(blob2,2,sum) -cvec<-2*(length(x)-2)*cvec/(length(x)^2-1) -testb<-avec/(cvec+1) -# A vector of length nboot containing bootstrap test values -ct<-sum(is.na(testb)) -if(ct>0)print("Some bootstrap estimates of the test statistic could not be computed") -test<-t1way(x,tr=tr,grp=grp) -pval<-sum(test$TEST<=testb)/nboot -# -# Determine explanatory effect size -# -e.pow=t1wayv2(x)$Effect.Size -list(test=test$TEST,p.value=pval,Explanatory.Power=e.pow, -Effect.Size=e.pow) -} - - -linconMpb.sub<-function(data,x,est,...){ -res=apply(x[data,],2,est,...) -res -} -mcdcen<-function(x){ -# -# Compute MCD measure of location only. -# -res=covmcd(x)$center -res -} -mvecen<-function(x){ -# -# Compute MCD measure of location only. -# -res=covmve(x)$center -res -} - -linconSpb.sub<-function(data,x,est,...){ -res=est(x[data,],...) -res -} - -fac2Mlist<-function(x,grp.col,lev.col,pr=TRUE){ -# -# sort and store data in a matrix or data frame into -# groups, where the jth group -# has p-variate data -# -# grp.col is column indicating levels of between factor. -# lev.col indicates the columns where repeated measures are contained -# -# Example: column 2 contains information on levels of between factor -# have a 3 by 2 design, column 3 contains time 1 data, -# column 7 contains time 2 -# fac2Mlist(x,2,c(3,7)) will store data in list mode, having length -# 2 (the number of levels), with each level containing a -# matrix having two columns. The first column is based on values -# in column 3 of the matrix x, and the second column is based on -# data in column 7 of x. -# -res=selbybw(x,grp.col,lev.col) -if(pr){ -print("Levels for between factor:") -print(sort(unique(x[,grp.col]))) -} -res=res$x -p=length(lev.col) -J=length(unique(x[,grp.col])) -y=list() -ic=1-p -iu=0 -for(j in 1:J){ -ic=ic+p -iu=iu+p -y[[j]]=matl(res[ic:iu]) -} -y -} - - - -fac2BBMlist<-function(x,grp.col,lev.col,pr=TRUE){ -# -# This function is useful when dealing with a two-way MANOVA -# It takes data stored in x, a matrix or data frame, -# and creates groups based on the data in the two columns -# indicated by the argument -# -# grp.col -# lev.col indicates the columns where p-variate are contained. -# -# Example: -# z=fac2BBMlist(plasma,c(2,3),c(7,8)) -# creates groups based on values in columns 2 (Factor A) and 3 (Factor B). -# z[[1]] contains a matrix having two columns; the data are taken -# from columns 7 and 8 of plasma -# -res=selbybbw(x,grp.col,lev.col,pr=pr) -p=length(lev.col) -J=length(unique(x[,grp.col[1]])) -K=length(unique(x[,grp.col[2]])) -y=list() -ic=1-p -iu=0 -jk=0 -for(j in 1:J){ -for(k in 1:K){ -ic=ic+p -iu=iu+p -jk=jk+1 -y[[jk]]=matl(res[ic:iu]) -}} -y -} - - -regmediate<-function(x,y,regfun=tsreg,nboot=400,alpha=.05,xout=FALSE,outfun=out,MC=FALSE,SEED=TRUE,...){ -# -# In a mediation analysis, two of the linear equations that play a role are -# y=b_{01} + b_{11}x + e_1 -# y=b_{03} + b_{13}x + b_{23} x_m + e_3 -# where x_m is the mediator variable. -# An additional assumption is -# x_m=b_{02} + b_{12}x + \epsilon_2. -# Goal: Compute a confidence interval for b_{11}-b_{13} -# -# The default regression method is the Theil-Sen estimator. -# -# The predictor values are assumed to be in the n-by-2 matrix x, with the -# mediator variable in column 2. -# MC=T. A multicore processor will be used. -# xout=T will remove leverage points using the function indicated by the argument out. -# -if(MC)library(parallel) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -if(p!=2)stop("Argument x should have two columns") -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),ncol=nboot) -data=listm(data) -if(MC){ -bvec1<-mclapply(data,regbootMC,as.matrix(x[,1]),y,regfun,mc.preschedule=TRUE) -bvec2<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE) -} -if(!MC){ -bvec1<-lapply(data,regboot,as.matrix(x[,1]),y,regfun) -bvec2<-lapply(data,regboot,x,y,regfun) -} -bvec1=matl(bvec1) -bvec2=matl(bvec2) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -dif=bvec1[2,]-bvec2[2,] -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -sig.level<-NA -temp<-mean(dif<0) -sig.level<-2*(min(temp,1-temp)) -bsort<-sort(dif) -regci<-bsort[ilow] -regci[2]<-bsort[ihi] -list(conf.interval=regci,p.value=sig.level) -} - - - -regmed2<-function(x,y,regfun=tsreg,nboot=400,alpha=.05,xout=FALSE,outfun=out,MC=FALSE, -SEED=TRUE,pr=TRUE,...){ -# -# In a mediation analysis, two of the linear equations that play a role are -# y=b_{01} + b_{11}x + e_1 -# y=b_{03} + b_{13}x + b_{23} x_m + e_3 -# where x_m is the mediator variable. -# An additional assumption is -# x_m=b_{02} + b_{12}x + \epsilon_2. -# Goal: Test hypotheses b_{12}=0 and b_{23}=0 -# -# The default regression method is the Theil-Sen estimator. -# -# The predictor values are assumed to be in the n-by-2 matrix x, with the -# mediator variable in column 2. -# MC=T. A multicore processor will be used. -# xout=T will remove leverage points using the function indicated by the argument out. -# -if(MC)library(parallel) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -if(p!=2)stop("Argument x should have two columns") -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -if(MC){ -temp1=regciMC(x[,1],x[,2],regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE) -temp2=regciMC(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE) -} -if(!MC){ -temp1=regci(x[,1],x[,2],regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE) -temp2=regci(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE) -} -if(pr){ -print("Output returned in res1 is for the slope of the regression line") -print("where the goal is to predict the mediator variable given the other") -print("predictor variable stored in column 1 of x.") -print("Output in res2 is for slope of the mediator when both predictors are used.") -} -res1=c(temp1$regci[2,],temp1$p.value[2]) -z1=t(as.matrix(res1)) -dimnames(z1)=list(NULL,c("ci.low","ci.up",'Estimate','S.E.',"p.value")) -res2=c(temp2$regci[3,],temp2$p.value[3]) -z2=t(as.matrix(res2)) -dimnames(z2)=list(NULL,c("ci.low","ci.up",'Estimate','S.E.',"p.value")) -list(res1=z1,res2=z2) -} - - -ogk.center<-function(x,beta=.9,...){ -# -# Compute OGK multivariate measure of location -# -center=ogk(x,beta=beta,...)$center -center -} -sdwe<-function(m,K=3){ -# -# Stahel-Donoho W-estimator implemented as suggested by -# Zuo, Cui and He 2004, Annals of Statistics, 32, 167--188 -# -m=elimna(m) -pd=1/(1+zdepth(m)) # projection depth -MPD=median(pd) # C in Zuo et al. notation -flag=(pd 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(i in 1:nboot)bvec[,i]=est(x[data[i,],]) -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -tv<-sum(bcon[d,]==0)/nboot -test[d]<-sum(bcon[d,]>0)/nboot+.5*tv -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-tvec[ic,] -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -COVreg<-function(x,y,cov.fun=MARest,loc.fun=MARest,xout=FALSE,outfun=out,...){ -# -# Regression estimation can be done via the usual maximum likelihood -# covariance matrix. This function uses the same approach -# using a robust covariance matrix instead. -# -# The predictors are assumed to be stored in the n-by-p matrix x. -# -xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -x=xy[,1:p] -y=xy[,p1] -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -AC=cov.fun(cbind(x,y),...)$cov -ma<-AC[1:p,p1] -m<-AC[1:p,1:p] -slope<-solve(m,ma) -mvals<-loc.fun(cbind(x,y))$center -b0<-mvals[p1]-sum(slope%*%mvals[1:p]) -res<-y-x%*%slope-b0 -list(coef=c(b0,slope),residuals=res) -} - - - - -dmedpb<-function(x,y=NULL,alpha=.05,con=0,est=median,plotit=TRUE,dif=FALSE,grp=NA, -hoch=TRUE,nboot=NA,xlab="Group 1",ylab="Group 2",ylab.ebar=NULL, -pr=TRUE,SEED=TRUE,BA=FALSE,PCI=FALSE,EBAR=PCI,...){ -# -# Use a percentile bootstrap method to compare -# medians of dependent groups. -# -# This is essentially the function rmmcppb, but set to compare medians -# by default. -# And it is adjusted to handle tied values. -# -# dif=T indicates that difference scores are to be used -# dif=F indicates that measure of location associated with -# marginal distributions are used instead. -# -# nboot is the bootstrap sample size. If not specified, a value will -# be chosen depending on the number of contrasts there are. -# -# x can be an n by J matrix or it can have list mode -# for two groups, data for second group can be put in y -# otherwise, assume x is a matrix (n-by-J) or has list mode. -# -# PCI=TRUE, if dif=TRUE and est=median, confidence intervals for difference scores are plottted -# So this is like plotting error bars. -# -# -if(dif){ -if(pr){ -print("dif=T, so analysis is done on difference scores.") -print(" Each confidence interval has probability coverage 1-alpha.") -print(" Also note a sequentially rejective method is being used.") -} -temp<-rmmcppbd(x,y=y,alpha=alpha,con=con,est=est,plotit=plotit,grp=grp, -nboot=nboot,hoch=hoch,...) -output<-temp$output -con<-temp$con -} -if(!dif){ -if(pr){ -print("dif=F, so analysis is done on marginal distributions.") -print(" Each confidence interval has probability coverage 1-alpha.") -print(" Also note that a sequentially rejective method is being used") -} -if(!is.null(y[1]))x<-cbind(x,y) -if(is.data.frame(x))x=as.matrix(x) -if(!is.list(x) && !is.matrix(x)) -stop("Data must be stored in a matrix or in list mode.") -if(is.list(x)){ -if(is.matrix(con)){ -if(length(x)!=nrow(con)) -stop("The number of rows in con is not equal to the number of groups.") -}} -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matl(x) -} -if(is.matrix(x) && is.matrix(con)){ -if(ncol(x)!=nrow(con)) -stop("The number of rows in con is not equal to the number of groups.") -mat<-x -} -if(is.matrix(x))mat<-x -if(!is.na(sum(grp)))mat<-mat[,grp] -mat<-elimna(mat) # Remove rows with missing values. -x<-mat -J<-ncol(mat) -xcen<-x -for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j]) -Jm<-J-1 -if(sum(con^2)==0){ -d<-(J^2-J)/2 -con<-matrix(0,J,d) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -d<-ncol(con) -if(is.na(nboot)){ -if(d<=4)nboot<-1000 -if(d>4)nboot<-5000 -} -n<-nrow(mat) -crit.vec<-alpha/c(1:d) -connum<-ncol(con) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -xbars<-apply(mat,2,est) -psidat<-NA -for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) -psihat<-matrix(0,connum,nboot) -psihatcen<-matrix(0,connum,nboot) -bvec<-matrix(NA,ncol=J,nrow=nboot) -bveccen<-matrix(NA,ncol=J,nrow=nboot) -if(pr)print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec[ib,]<-apply(x[data[ib,],],2,est,...) -bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...) -} -# -# Now have an nboot by J matrix of bootstrap values. -# -test<-1 -bias<-NA -tval<-NA -tvalcen<-NA -icl=round(alpha*nboot/2)+1 -icu<-nboot-(icl-1) -cimat=matrix(NA,nrow=connum,ncol=2) -for (ic in 1:connum){ -psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) -tp=sort(psihat[ic,]) -cimat[ic,1]=tp[icl] -cimat[ic,2]=tp[icu] -psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic]) -tvalcen[ic]<-sum((psihatcen[ic,]==0))/nboot -bias[ic]<-sum((psihatcen[ic,]>0))/nboot+sum((psihatcen[ic,]==0))/nboot-.5 -tval[ic]<-sum((psihat[ic,]==0))/nboot -if(BA){ -test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic]-.1*bias[ic] -if(test[ic]<0)test[ic]<-0 -} -if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot+.5*tval[ic] -test[ic]<-min(test[ic],1-test[ic]) -} -test<-2*test -ncon<-ncol(con) -dvec<-alpha/c(1:ncon) -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(hoch)dvec<-alpha/(2* c(1:ncon)) -dvec<-2*dvec -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -dvecba<-dvec -#dvec[1]<-alpha/2 -} -if(!EBAR){ -if(plotit && ncol(bvec)==2){ -z<-c(0,0) -one<-c(1,1) -plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") -points(bvec) -totv<-apply(x,2,est,...) -cmat<-var(bvec) -dis<-mahalanobis(bvec,totv,cmat) -temp.dis<-order(dis) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -abline(0,1) -}} -temp2<-order(0-test) -ncon<-ncol(con) -zvec<-dvec[1:ncon] -if(BA)zvec<-dvecba[1:ncon] -sigvec<-(test[temp2]>=zvec) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p-value","p.crit", -"ci.lower","ci.upper")) -tmeans<-apply(mat,2,est,...) -psi<-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -output[temp2,4]<-zvec -temp<-sort(psihat[ic,]) -#print(psihat[ic,]) -#icl=round(alpha*nboot/2)+1 -#icl<-round(output[ic,4]*nboot/2)+1 # This adjustment causes confusion, it is not based on Hochberg -#icu<-nboot-(icl-1) -#output[ic,5]<-temp[icl] -#output[ic,6]<-temp[icu] -output[ic,5:6]<-cimat[ic,] -} -} -num.sig=nrow(output) -ior=order(output[,3],decreasing=TRUE) -for(j in 1:nrow(output)){ -if(output[ior[j],3]<=output[ior[j],4])break -else num.sig=num.sig-1 -} -num.sig<-sum(output[,3]<=output[,4]) -#if(nrow(output)>1)ids=which(output[,3]<=output[,4]) -if(EBAR ){ -if(identical(est,median)){ -if(dif){ -plotCI(output[,2],ali=output[,5],aui=output[,6],xlab='Difference',ylab=ylab.ebar) -}}} -list(output=output,con=con,num.sig=num.sig) -} -MAT2list<-function(x,J=NULL,p=NULL){ -# -# Store the data in a matrix or data frame in a new -# R variable having list mode. -# The results are stored in y, having list mode -# Col 1 to p of x will be stored as a matrix in y[[1]], -# Col p+1 to 2p are stored in y[[2]], and so on. -# -# The function assumes ncol(x)=J*P -# either J, the number of groups, or p, the number of variables, -# must be specified. -# -# This function is used by the R function linconMpb when testing -# hypotheses about linear contrasts based on multivariate data. -# -if(is.null(dim(x)))stop("The argument x must be a matrix or data frame") -y<-list() -if(is.null(J) && is.null(p))stop("Specify J or P") -if(is.null(J))J=ncol(x)/p -if(is.null(p))p=ncol(x)/J -Jp=floor(J)*floor(p) -if(Jp != ncol(x))stop("Jp is not equal to the number of columns") -lp=1-p -up=0 -for(j in 1:J){ -lp=lp+p -up=up+p -y[[j]]<-as.matrix(x[,lp:up]) -} -y -} -linconMpb<-function(x,alpha=.05,nboot=1000,grp=NA,est=tmean,con=0,bhop=FALSE, -SEED=TRUE,PDIS=FALSE,J=NULL,p=NULL,...){ -# -# Multiple comparisons for J independent groups using trimmed means -# with multivariate data for each group. -# -# A percentile bootstrap method with Rom's method is used. -# -# The data are assumed to be stored in x -# which has list mode, -# x[[1]] contains the data for the first group in the form of a -# matrix, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are automatically removed. -# -con<-as.matrix(con) -if(is.matrix(x) || is.data.frame(x)){ -if(is.null(J) && is.null(p))stop("Specify J or P") -x=MAT2list(x,p=p,J=J) -} -if(!is.list(x))stop("Data must be stored in list mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -nullvec=rep(0,ncol(x[[1]])) -bplus=nboot+1 -tempn<-0 -mvec<-list -for(j in 1:J){ -x[[j]]<-elimna(x[[j]]) -} -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") -# Determine critical levels -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-array(NA,c(J,nboot,ncol(x[[1]]))) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -nvec=lapply(x,nrow) -for(j in 1:J){ -data<-matrix(sample(nvec[[j]],size=nvec[[j]]*nboot,replace=TRUE),nrow=nboot) -bvec[j,,]<-apply(data,1,linconMpb.sub,x[[j]],est,...) # Bootstrapped values for jth group -} -test<-NA -for (d in 1:ncon){ -tv=matrix(0,nboot,ncol(x[[1]])) #nboot by p matrix reflecting Psi hat -estit=rep(0,ncol(x[[1]])) -for(j in 1:J){ -tv=tv+con[j,d]*bvec[j,,] -estit=estit+con[j,d]*apply(x[[j]],2,est,...) -} -if(!PDIS)m1=cov(tv) -tv=rbind(tv,nullvec) -if(!PDIS)dv=mahalanobis(tv,center=estit,m1) -if(PDIS)dv=pdis(tv,center=estit) # projection distances -test[d]=1-sum(dv[bplus]>=dv[1:nboot])/nboot -} -output<-matrix(0,ncon,3) -dimnames(output)<-list(NULL,c("con.num","p.value","p.crit")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,3]<-zvec -for (ic in 1:ncol(con)){ -output[ic,1]<-ic -output[ic,2]<-test[ic] -} -num.sig<-sum(output[,2]<=output[,3]) -list(output=output,con=con,num.sig=num.sig) -} -linconMpb.sub<-function(data,x,est,...){ -res=apply(x[data,],2,est,...) -res -} -linconSpb<-function(x,alpha=.05,nboot=1000,grp=NA,est=smean,con=0,bhop=FALSE, -SEED=TRUE,PDIS=FALSE,J=NULL,p=NULL,...){ -# -# Multiple comparisons for J independent groups -# with multivariate data for each group. -# That is, linear contrasts relevant to MANOVA can be tested. -# The method can handle -# multivariate measures of location that take into account -# the overall structure of the data, as opposed to using, for example -# the marginal trimmed means, which is done by default when using -# linconMpb. -# The argument -# -# est=smean, -# -# means that by default the skipped measure of location, based on -# on projection method for detecting outliers, is used. -# -# Mahalanobis distance is used to compute a p-value, but projection -# distances could be used by setting PDIS=T. -# -# A percentile bootstrap method with Rom's method is used. -# -# alpha=.05 means the probability of one or more type I errors is .05. -# -# The data are assumed to be stored in x -# which has list mode, -# x[[1]] contains the data for the first group in the form of a -# matrix, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are automatically removed. -# -if(is.matrix(x) || is.data.frame(x)){ -if(is.null(J) && is.null(p))stop("Specify J or P") -x=MAT2list(x,p=p,J=J) -} -con<-as.matrix(con) -if(!is.list(x))stop("Data must be stored in list mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -nullvec=rep(0,ncol(x[[1]])) -bplus=nboot+1 -tempn<-0 -mvec<-list -for(j in 1:J){ -x[[j]]<-elimna(x[[j]]) -} -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") -# Determine critical levels -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-array(NA,c(J,nboot,ncol(x[[1]]))) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -nvec=lapply(x,nrow) -for(j in 1:J){ -data<-matrix(sample(nvec[[j]],size=nvec[[j]]*nboot,replace=TRUE),nrow=nboot) -bvec[j,,]<-apply(data,1,linconSpb.sub,x[[j]],est,...) # Bootstrapped values for jth group -} -test<-NA -for (d in 1:ncon){ -tv=matrix(0,nboot,ncol(x[[1]])) #nboot by p matrix reflecting Psi hat -estit=rep(0,ncol(x[[1]])) -for(j in 1:J){ -tv=tv+con[j,d]*bvec[j,,] -estit=estit+con[j,d]*est(x[[j]],...) -} -if(!PDIS)m1=cov(tv) -tv=rbind(tv,nullvec) -if(!PDIS)dv=mahalanobis(tv,center=estit,m1) -if(PDIS)dv=pdis(tv,center=estit) # projection distances -test[d]=1-sum(dv[bplus]>=dv[1:nboot])/nboot -} -output<-matrix(0,ncon,3) -dimnames(output)<-list(NULL,c("con.num","p.value","p.crit")) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,3]<-zvec -for (ic in 1:ncol(con)){ -output[ic,1]<-ic -output[ic,2]<-test[ic] -} -num.sig<-sum(output[,2]<=output[,3]) -list(output=output,con=con,num.sig=num.sig) -} -linconSpb.sub<-function(data,x,est,...){ -res=est(x[data,],...) -res -} - -MULtr.anova<-function(x,J=NULL,p=NULL,tr=.2,alpha=.05){ -# -# Do Multivariate ANOVA with trimmed means using -# Johansen's method -# -# x is assumed to have list mode with length(x)=J=number of groups and -# x[[j]] is an n_j-by-p matrix, p is the number of variables. -# -# x can also be a matrix when J and p are specified. It is assumed the data are stored in -# a matrix in the same manner expected by bwtrim. -# -# To get a p-value, use the function MULAOVp -# -if(is.matrix(x) || is.data.frame(x)){ -if(is.null(J) && is.null(p))stop("Specify J or P") -x=MAT2list(x,p=p,J=J) -} -x=lapply(x,as.matrix) -x=lapply(x,elimna) -p=ncol(x[[1]]) -iden=diag(p) -J=length(x) -tvec=list() -nval=lapply(x,nrow) -Rtil=lapply(x,wincov,tr=tr) -tvec=lapply(x,mmean,tr=tr) -g=list() -gmean=rep(0,p) # grand mean eventually -groupm=list() -Wsum=matrix(0,ncol=p,nrow=p) -W=list() -f=0 -Aw=0 -for(j in 1:J){ -dimnames(x[[j]])=list(NULL,NULL) -tvec[[j]]=as.matrix(tvec[[j]]) -g[[j]]=floor(nval[[j]]*tr) -Rtil[[j]]=Rtil[[j]]*(nval[[j]]-1)/((nval[[j]]-2*g[[j]])*(nval[[j]]-2*g[[j]]-1)) -f[j]=nval[[j]]-2*g[[j]]-1 -W[[j]]=solve(Rtil[[j]]) -groupm[[j]]=apply(x[[j]],2,tmean,tr=tr) -Wsum=Wsum+W[[j]] -gmean=gmean+W[[j]]%*%tvec[[j]] -} -Wsuminv=solve(Wsum) -for(j in 1:J){ -temp=iden-Wsuminv%*%W[[j]] -tempsq=temp%*%temp -Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/f[j] -} -Aw=Aw/2 -gmean=as.matrix(gmean) -gmean=solve(Wsum)%*%gmean # Final weighted grand mean -df=p*(J-1) -crit<-qchisq(1-alpha,df) -crit<-crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -test=0 -for(k in 1:p){ -for(m in 1:p){ -for(j in 1:J){ -test=test+W[[j]][k,m]*(groupm[[j]][m]-gmean[m])*(groupm[[j]][k]-gmean[k]) -}}} -list(test.stat=test,crit.value=crit) -} - - -MULAOVp<-function(x,J=NULL,p=NULL,tr=.2){ -# -# Do Multivariate ANOVA with trimmed means using -# Johansen's method -# -# x is assumed to have list mode with J=number of groups -# x[[j]] is an n_j by p matrix -# -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -Qa<-MULtr.anova(x,J=J,p=p,tr=tr,alpha=alval[i]) -if(Qa$test.stat>Qa$crit.value)break -} -list(test.stat=Qa$test.stat,p.value=alval[i]) -} - -YYmcp<-function(x,alpha=.05,grp=NA,tr=.2,bhop=FALSE,J=NULL,p=NULL,...){ -# -# All pairwise comparisons among J independent groups using trimmed means -# with multivariate data for each group. -# The method applies the Yanagihara - Yuan for each pair of groups -# and controls FWE via Rom's method if bhop=F. -# bhop=T, use Benjamini-Hochberg method -# -# The data are assumed to be stored in x -# which has list mode, -# x[[1]] contains the data for the first group in the form of a -# matrix, x[[2]] the data -# for the second group, etc., each matrix having the same -# number of columns Length(x)=the number of groups = J. -# -# The data can be stored in a single matrix having Jp columns -# J = number of groups. -# If this is the case, specify the argument J or p(number of variables) - -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are automatically removed. -# -con<-as.matrix(con) -if(is.matrix(x) || is.data.frame(x)){ -if(is.null(J) && is.null(p))stop("Specify J or P") -x=MAT2list(x,p=p,J=J) -} -if(!is.list(x))stop("Data must be stored in list mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -nullvec=rep(0,ncol(x[[1]])) -bplus=nboot+1 -tempn<-0 -mvec<-list -for(j in 1:J){ -x[[j]]<-elimna(x[[j]]) -} -Jm<-J-1 -# -# Determine contrast matrix -# -ncon<-(J^2-J)/2 -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -# -output<-matrix(0,ncon,4) -dimnames(output)<-list(NULL,c("Group","Group","p.value","p.crit")) -ic=0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -ic=ic+1 -output[ic,1]=j -output[ic,2]=k -output[ic,3]<-YYmanova(x[[j]],x[[k]],tr=tr)$p.value -}} -test=output[,3] -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,num.sig=num.sig) -} - - -loc2dif<-function(x,y=NULL,est=median,na.rm=TRUE,plotit=FALSE,xlab="",ylab="",...){ -# -# Compute a measure of location associated with the -# distribution of x-y, the typical difference between two randomly sampled values. -# The measure of location is indicated by the argument -# est. -# x and y are paired data or independent variables having the same length. -# If x and y have different lengths, use the function wmwloc -# -# Advantage of this estimator: relatively high efficiency even under normality versus -# using sample means. -# -if(is.null(y)){ -if(ncol(x)!=2)stop("x should be an n-by-2 matrix") -y=x[,2] -x=x[,1] -if(na.rm)m=elimna(cbind(x,y)) -x=m[,1] -y=m[,2] -} -x=elimna(x) -y=elimna(y) -temp=as.vector(outer(x,y,FUN="-")) -val<-est(temp,na.rm=TRUE,...) -if(plotit)akerd(temp,xlab=xlab,ylab=ylab) -val -} - -mlrreg<-function(x,y,cov.fun=cov.mcd,ols.op=TRUE,mcd.op=TRUE, -quantile.used=floor(.75*n),RES=FALSE,...){ -# -# Do Multivariate regression, using by default the method -# in Rousseeuw, Van Aelst, Van Driessen Agullo -# Technometrics, 46, 293-305 -# -# Note, to use the method recommended by Rousseeuw et al., the argument -# quantile.used=.75*n is used when calling cov.mcd. -# -# RES=T, the residuals will be returned. -# -# y is assumed to be multivariate with data stored in a matrix. -# -# an initial fit is found using the measures of scatter and location -# corresponding to cof.fun and mcd.op. If -# mcd.op=T, cov.mcd is used with quanitle.used=.75n -# mcd.op=F, cov.fun is used and defaults to cov.mcd with the -# default value usded by R for the argument quanitle.used -# But any function that returns location and scatter in $center and $cov -# can be used. -# -# if ols.op=T, OLS is applied after points are removed based on iniital fit -# if ols.op=F, Theil-Sen is used by calling the function mopreg -# -# Early version of this function considered estimating -# explanatory power in terms of the generalized variance -# of the predicted y values and the observed y values -# epow.cov determines which robust covariance matrix will be used. -# This idea has not been explored enough -# Some choices are: -# cov (the usual generalized variance) -# skipcov -# tbscov -# covout -# covogk -# mgvcov -# mvecov -# mcdcov -# -library(MASS) -if(!is.matrix(y))stop("y is not a matrix") -X<-cbind(x,y) -X<-elimna(X) -n<-nrow(X) -qy<-ncol(y) -qx<-ncol(x) -qxp1<-qx+1 -tqyqx<-qy+qx -y<-X[,qxp1:tqyqx] -# compute initial estimate of slopes and intercept: -if(!mcd.op)locscat<-cov.fun(X,...) -if(mcd.op)locscat<-cov.mcd(X,quan=quantile.used) -sig<-locscat$cov -mu<-locscat$center -sigxx<-sig[1:qx,1:qx] -sigxy<-sig[1:qx,qxp1:tqyqx] -sigyy<-sig[qxp1:tqyqx,qxp1:tqyqx] -Bhat<-solve(sigxx)%*%sigxy -sige<-sigyy-t(Bhat)%*%sigxx%*%Bhat -sige.inv<-solve(sige) -Ahat<-t(mu[qxp1:tqyqx]-t(Bhat)%*%mu[1:qx]) -resL<-matrix(nrow=nrow(X),ncol=qy) -for(i in 1:nrow(X))resL[i,]<-y[i,]-t(Bhat)%*%X[i,1:qx] -for(j in 1:qy)resL[,j]<-resL[,j]-Ahat[j] -drL<-NA -for(i in 1:nrow(X))drL[i]<-t(resL[i,])%*%sige.inv%*%resL[i,] -# In Rousseeuw notation, drL<- is d^2 -w<-rep(0,nrow(X)) -qdr<-qchisq(.99,qy) -iflag<-(drLalpha) -if(!FWE)id2=which(mat[,8]>alpha) -points(pts[id2,1],pts[id2,2],pch='*') -if(length(id)>0)points(pts[id,1],pts[id,2],pch='+') -} -sig.pts=unique(sig.pts) -list(points=pts,output=mat,crit=critv,sig.pts=sig.pts) -} - -rplot2g<-runmean2g - -Qancsm<-function(x1,y1,x2,y2,crit.mat=NULL,nboot=200,SEED=TRUE,REP.CRIT=FALSE, -qval=.5,q=NULL,xlab="X",ylab="Y",plotit=TRUE,pr=TRUE,xout=FALSE,outfun=out,...){ -# -# Compare two nonparametric -# regression lines corresponding to two independent groups -# using the depths of smooths. -# -# NULL hypothesis: regression lines are identical in terms of the median -# of Y, given$X, for all X -# The method is based on comparing the depth of the fitted regression lines -# and is essentially a slight variation of the method in Wilcox -# (in press) Journal of Data Science. -# -# One covariate only is allowed. -# -if(ncol(as.matrix(x1))>1)stop("One covariate only is allowed") -if(!is.null(q))qval=q -if(xout){ -flag1=outfun(x1)$keep -flag2=outfun(x2)$keep -x1=x1[flag1] -y1=y1[flag1] -x2=x2[flag2] -y2=y2[flag2] -} -if(SEED)set.seed(2) -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -xord=order(x1) -x1=x1[xord] -y1=xy[xord,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -xord=order(x2) -x2=x2[xord] -y2=xy[xord,2] -n1=length(y1) -n2=length(y2) -if(is.null(crit.mat[1])){ -if(pr)print("Determining critical value. This might take a while") -crit.val=NA -yall=c(y1,y2) -xall=c(x1,x2) -nn=n1+n2 -il=n1+1 -for(i in 1:nboot){ -data=sample(nn,nn,T) -yy1=yall[data[1:n1]] -yy2=yall[data[il:nn]] -xx1=xall[data[1:n1]] -xx2=xall[data[il:nn]] -crit.mat[i]=Qdepthcom(xx1,yy1,xx2,yy2,qval=qval) -}} -dep=Qdepthcom(x1,y1,x2,y2,qval=qval) -pv=1-mean(crit.mat2)stop("This function only allows one covariate") -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1] -y1<-m[,2] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1] -y2<-m[,2] -} -x=c(x1,x2) -y=c(y1,y2) -g=c(rep(0,length(x1)),rep(1,length(x2))) -xgy=elimna(cbind(x,g,x*g,y)) -xg=xgy[,1:3] -y=xgy[,4] -res=olswbtest(xg,y,nboot=nboot,SEED=SEED,RAD=RAD,alpha=alpha) -res[3,6] -} - -regpreCV<-function(x,y,regfun=tsreg,varfun=pbvar,adz=TRUE,model=NULL,locfun=mean, -xout=FALSE,outfun=out, -plotit=TRUE,xlab="Model Number",ylab="Prediction Error",...){ -# -# Estimate the prediction error using the regression method -# regfun in conjunction with leave-one-out cross-validation -# -# The argument model should have list mode, model[[1]] indicates -# which predictors are used in the first model. For example, storing -# 1,4 in model[[1]] means predictors 1 and 4 are being considered. -# If model is not specified, and number of predictors is at most 5, -# then all models are considered. -# -# If adz=T, added to the models to be considered is where -# all regression slopes are zero. That is, use measure of location only -# corresponding to -# locfun. -# -x<-as.matrix(x) -d<-ncol(x) -p1<-d+1 -temp<-elimna(cbind(x,y)) -x<-temp[,1:d] -y<-temp[,d+1] -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(is.null(model)){ -if(d<=5)model<-modgen(d,adz=adz) -if(d>5)model[[1]]<-c(1:ncol(x)) -} -mout<-matrix(NA,length(model),3,dimnames=list(NULL,c("est.error", -"var.used","rank"))) -for (imod in 1:length(model)){ -nmod=length(model[[imod]])-1 -temp=c(nmod:0) -mout[imod,2]=sum(model[[imod]]*10^temp) -# -if(sum(model[[imod]]==0)!=1){ -xx<-x[,model[[imod]]] -xx<-as.matrix(xx) -mout[imod,1]<-regpecv(xx,y,regfun=regfun,varfun=varfun,...) -} -# -if(sum(model[[imod]]==0)==1){ -mout[imod,1]<-locCV(y,varfun=varfun,locfun=locfun) -}} -mout[,3]=rank(mout[,1]) -if(plotit)plot(c(1:nrow(mout)),mout[,1],xlab=xlab,ylab=ylab) -mout -} - -locCV<-function(y,varfun=pbvar,locfun=median,...){ -vals=NA -n=length(y) -est=locfun(y) -for(i in 1:n)vals[i]=y[i]-locfun(y[-i],...) -res=varfun(vals) -res -} - - -esI<-function(x,tr=.2,nboot=100,SEED=TRUE){ -# -# Explanatory measure of effect size for an interaction in -# a 2-by-2 ANOVA -# -# Assume x is a mtrix with 4 columns or has list mode with length 4 -# Also assume interaction is for x_1-x_2 versus x_3-x_4 -# -if(is.matrix(x)|| is.data.frame(x))x=listm(x) -es=yuenv2(outer(x[[1]],x[[2]],"-"),outer(x[[3]],x[[4]],"-"), -tr=tr,nboot=nboot,SEED=SEED)$Effect.Size -list(Effect.Size=es) -} - - -esImcp<-function(J,K,x,tr=0.2,nboot=100,SEED=TRUE){ -# -# Compute measure of effect size for all interactions in a J-by-K design -# A robust, heteroscedastic measure of effect (explanatory measure of -# effect size) is used. -# -if(is.matrix(x)|| is.data.frame(x))x=listm(x) -con=con2way(J,K)$conAB -es=NULL -for (j in 1:ncol(con)){ -flag=(con[,j]!=0) -es[j]=esI(x[flag],tr=tr,nboot=nboot,SEED=SEED)$Effect.Size -} -list(Effect.Sizes=es,contrast.coef=con) -} - - -ESmainMCP<-function(J,K,x,tr=0.2,nboot=100,SEED=TRUE){ -# -# Compute explanatory measure of effect size for all main effects -# in a two-way design. That is, for Factor A, compute it for all levels j < j' -# For Factor B, compute it for all level kobs]) - if (sumpr0))print("Duplicate values detected; tshdreg might have more power than tsreg") -}} -nv=length(y) -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -if(!WARNS)options(warn=-1) -bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) -options(warn=0) -#Leverage points already removed. -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -regci<-matrix(0,p1,6) -vlabs="Intercept" -for(j in 2:p1)vlabs[j]=paste("Slope",j-1) -if(LABELS)vlabs[2:p1]=labels(x)[[2]] -dimnames(regci)<-list(vlabs,c("ci.low","ci.up","Estimate","S.E.","p-value",'p.adj')) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -se<-NA -pvec<-NA -for(i in 1:p1){ -bsort<-sort(bvec[i,]) -#pvec[i]<-(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot -pvec[i]<-(sum(bvec[i,].5)pvec[i]<-1-pvec[i] -regci[i,1]<-bsort[ilow] -regci[i,2]<-bsort[ihi] -se[i]<-sqrt(var(bvec[i,])) -} -if(p1==3){ -if(plotit){ -plot(bvec[2,],bvec[3,],xlab=xlab,ylab=ylab) -}} -regci[,3]=estit -pvec<-2*pvec -regci[,4]=se -regci[,5]=regci[,6]=pvec -regci[2:p1,6]=p.adjust(pvec[2:p1],method=method) -list(regci=regci,n=nrem,n.keep=nv) -} -M2m.loc<-function(m,grpc,col.dat,locfun=tmean,...){ -# -# m is a matrix or data frame. -# Compute a measure of location for each of several categories, with -# categories indicated by the values in the column of m given by the -# argument grpc. -# The argument grpc can have up to 4 values, which correspond to factors. -# -# col.dat indicates the column of m containing the outcome measure -# of interest. -# locfun indicates the measure of location, which defaults to the 20% -# trimmed mean. -# -# Example, -# M2m.loc(x,c(1,4),5,locfun=mean) -# indicates that there are 2 factors, with levels of the factors indicated -# by the values in columns 1 and 4 of the matrix x. For each combination -# of levels, -# locfun=mean -# indicates that the sample mean will be computed. -# -flagit=F -if(is.null(dim(m)))stop("Data must be stored in a matrix or data frame") -if(is.na(grpc[1]))stop("The argument grpc is not specified") -if(is.na(col.dat[1]))stop("The argument col.dat is not specified") -if(length(grpc)>4)stop("grpc must have length <= 4") -m=as.data.frame(m) -if(length(grpc)==1){ -p1=ncol(m)+1 -dum=rep(1,nrow(m)) -flagit=T -m=cbind(m,dum) -grpc=c(NULL,gprc,p1) -cat1<-sort(unique(m[,grpc[1]])) -M=NULL -for (ig1 in 1:length(cat1)){ -flag1=(m[,grpc[1]]==cat1[ig1]) -flag=(flag1==1) -msub=as.data.frame(m[flag,]) -loc=locfun(m[flag,col.dat],...) -M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc))) -} -M=M[,c(1,3)] -} -if(length(grpc)==2){ -cat1<-sort(unique(m[,grpc[1]])) -cat2<-sort(unique(m[,grpc[2]])) -M=NULL -for (ig1 in 1:length(cat1)){ -for (ig2 in 1:length(cat2)){ -flag1=(m[,grpc[1]]==cat1[ig1]) -flag2=(m[,grpc[2]]==cat2[ig2]) -flag=(flag1*flag2==1) -msub=m[flag,] -loc=locfun(m[flag,col.dat],...) -M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc))) -}}} -if(length(grpc)==3){ -cat1<-sort(unique(m[,grpc[1]])) -cat2<-sort(unique(m[,grpc[2]])) -cat3<-sort(unique(m[,grpc[3]])) -M=NULL -for (ig1 in 1:length(cat1)){ -for (ig2 in 1:length(cat2)){ -for (ig3 in 1:length(cat3)){ -flag1=(m[,grpc[1]]==cat1[ig1]) -flag2=(m[,grpc[2]]==cat2[ig2]) -flag3=(m[,grpc[3]]==cat3[ig3]) -flag=(flag1*flag2*flag3==1) -msub=m[flag,] -loc=locfun(m[flag,col.dat],...) -M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc))) -}}}} -if(length(grpc)==4){ -cat1<-sort(unique(m[,grpc[1]])) -cat2<-sort(unique(m[,grpc[2]])) -cat3<-sort(unique(m[,grpc[3]])) -cat4<-sort(unique(m[,grpc[4]])) -M=NULL -for (ig1 in 1:length(cat1)){ -for (ig2 in 1:length(cat2)){ -for (ig3 in 1:length(cat3)){ -for (ig4 in 1:length(cat4)){ -flag1=(m[,grpc[1]]==cat1[ig1]) -flag2=(m[,grpc[2]]==cat2[ig2]) -flag3=(m[,grpc[3]]==cat3[ig3]) -flag4=(m[,grpc[4]]==cat4[ig4]) -flag=(flag1*flag2*flag3*flag4==1) -msub=m[flag,] -loc=locfun(m[flag,col.dat],...) -M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc))) -}}}}} -if(flagit)M=M[,c(1,3)] -M -} -skip<-function(m,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,STAND=TRUE,pr=TRUE){ -# -# m is an n by p matrix -# -# Compute skipped location and covariance matrix -# -# op=1: -# Eliminate outliers using a projection method -# That is, first determine center of data using: -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 uses MVE center -# cop=5 uses TBS -# cop=6 uses rmba (Olive's median ball algorithm) -# -# For each point -# consider the line between it and the center, -# project all points onto this line, and -# check for outliers using -# -# MM=F, a boxplot rule. -# MM=T, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# -# op=2 use mgv (function outmgv) method to eliminate outliers -# -# Eliminate any outliers and compute means -# using remaining data. -# mgv.op=0, mgv uses all pairwise distances to determine center of the data -# mgv.op=1 uses MVE -# mgv.op=2 uses MCD -# -temp<-NA -m<-elimna(m) -if(op==2)temp<-outmgv(m,plotit=FALSE,op=mgv.op)$keep -if(op==1)temp<-outpro(m,plotit=FALSE,MM=MM,cop=outpro.cop,STAND=STAND,pr=pr)$keep -val<-var(m[temp,]) -loc<-apply(m[temp,],2,mean) -list(center=loc,cov=val) -} - -ancmppb<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,pts=NA,est=tmean,nboot=NA, -bhop=TRUE,SEED=TRUE,cov.fun=skip,cop=NULL,COV.both=FALSE,pr=TRUE,...){ -# -# Compare two independent groups using the ancova method -# with multiple covariates. -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# Design points are chosen based on depth of points in x1 if pts=NA -# Assume data are in x1 y1 x2 and y2 -# -# cov.fun determines the location and -# scatter matrix used to find closest points to -# a design point. It is used by ancdes. -# -# Choices for cov.fun include -# cov.mve -# cov.mcd -# rmba -# skip -# tbs -# -#if(pr)print("For the old version of this function, use ancmpbpb") -x1=as.matrix(x1) -y1=as.matrix(y1) -if(ncol(x1)==1)stop("Use a function designed for one covariate only") -x2=as.matrix(x2) -y2=as.matrix(y2) -if(ncol(x1)!=ncol(x2)) -stop("Number of covariates must be the same for each group") -xy=elimna(cbind(x1,y1)) -p=ncol(x1) -p1=p+1 -x1=xy[,1:p] -y1=xy[,p1] -xy=elimna(cbind(x2,y2)) -x2=xy[,1:p] -y2=xy[,p1] -x1=as.matrix(x1) -x2=as.matrix(x2) -mval1=cov.fun(x1) -mval2=cov.fun(x2) -if(is.na(pts[1])){ -x1<-as.matrix(x1) -if(!COV.both){ -if(!is.null(cop))pts<-ancdes(x1,cop=cop) -if(is.null(cop))pts=ancdes(x1,center=mval1$center) -} -if(COV.both){ -if(!is.null(cop))pts<-ancdes(rbind(x1,x2),cop=cop) -if(is.null(cop))pts=ancdes(rbind(x1,x2),center=mval1$center) -} -} -pts<-as.matrix(pts) -if(nrow(pts)>=29){ -print("WARNING: More than 28 design points") -print("Only first 28 are used.") -pts<-pts[1:28,] -} -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:nrow(pts)){ -n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)]) -n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)]) -} -flag<-rep(TRUE,nrow(pts)) -for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-FALSE -flag=as.logical(flag) -pts<-pts[flag,] -if(sum(flag)==1)pts<-t(as.matrix(pts)) -if(sum(flag)==0)stop("No comparable design points found, might increase span.") -mat<-matrix(NA,nrow(pts),7) -dimnames(mat)<-list(NULL,c("n1","n2","DIF","TEST","se","ci.low","ci.hi")) -g1<-list() -ip<-nrow(pts) -ncom<-0 -nc2<-ip -con<-matrix(0,nrow=2*ip,ncol=nrow(pts)) -for (i in 1:nrow(pts)){ -ip<-ip+1 -ncom<-ncom+1 -nc2<-nc2+1 -con[ncom,i]<-1 -con[nc2,i]<-0-1 -temp<-y1[near3d(x1,pts[i,],fr1,mval1)] -g1[[i]]<-temp[!is.na(temp)] -temp<-y2[near3d(x2,pts[i,],fr2,mval2)] -g1[[ip]]<-temp[!is.na(temp)] -} -flag.est=FALSE -if(identical(est,onestep))flag.est=TRUE -if(identical(est,mom))flag.est=TRUE -if(flag.est)mat<-pbmcp(g1,alpha=alpha,nboot=nboot,est=est,con=con,bhop=bhop,SEED=SEED,...) -if(!flag.est)mat<-linconpb(g1,alpha=alpha,nboot=nboot,est=est,con=con,bhop=bhop,SEED=SEED,...) -list(points=pts,output=mat) -} - - -hc4wmc<-function(x,y,nboot=599,k=2,grp=NA,con=0,SEED=TRUE,STOP=TRUE,...){ -# -# Test the hypothesis that J independent groups have identical slopes. -# Using least squares regression -# Data are stored in list mode or in a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# Similarly, y[[1]] contains the data for the first group, -# y[[2]] the data for the second groups, etc. -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are allowed. -# -if(STOP)stop('Suggest ols1way. This function assumes equal n. To use anyway, set STOP=FALSE') -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") -if(is.matrix(y))y<-listm(y) -if(!is.list(y))stop("Data must be stored in list mode or in matrix mode.") -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -yy<-list() -for(i in 1:length(grp)) -xx[[i]]<-x[[grp[i]]] -yy[[i]]<-y[[grp[i]]] -x<-xx -y<-yy -} -J<-length(x) -n<-length(x[[1]]) -tempn<-0 -slopes<-NA -covar<-NA -stemp<-NA -yhat<-numeric(J) -res<-matrix(,ncol=J, nrow=n) -for(j in 1:J){ -temp<-cbind(x[[j]], y[[j]]) -temp<-elimna(temp) # Remove missing values. -#n<-length(y[[j]]) -tempn[j]<-length(temp) -x[[j]]<-temp[,1] -y[[j]]<-temp[,2] -tempx<-as.matrix(x[[j]]) -tempy<-as.matrix(y[[j]]) -#Getting yhat and residuals for wild bootstrap -yhat[j]<-mean(tempy) -res[,j]<-tempy-yhat[j] -#original Slope and SE -stemp<-lsfit(tempx, tempy) -slopes[j]<-stemp$coef[k] #Slopes for original data -covar[j]<-lsfitNci4(tempx, tempy)$cov[k,k] #original HC4 for coefficient(slope) -} -# -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (h in jp:J){ -id<-id+1 -con[j,id]<-1 -con[h,id]<-0-1 -}}} -ncon<-ncol(con) -if(nrow(con)!=J){ -stop("Something is wrong with con; the number of rows does not match the number of groups.") -} -#calculating original statistic -dif.slopes<-t(con)%*%slopes -o.se<-t(con^2)%*%covar -o.stat<-dif.slopes/sqrt(o.se) #original test statistics -# -om<-max(abs(o.stat)) #Max. absolute test statistics -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -data<-matrix(ifelse(rbinom(n*nboot*J,1,0.5)==1,-1,1),ncol=nboot*J) #discrete wild bootstrap sample -test<-numeric(nboot) -u<-rep(1, n) -c<-1 -for (i in 1:nboot*J-J+1){ -d<-data[,i:i+J-1] -ystar<-u%*%t(yhat)+res*d -ystar<-listm(ystar) -i<-i+J -test[c]<-mcslope(x,ystar, con, k) -# -c<-c+1 -} -sum<-sum(test>= om) -p.val<-sum/nboot -list(p.value=p.val) -} -mcslope<-function(X, Y, con, k){ -J=length(X) -slopes<-numeric(J) -covar<-numeric(J) -for(j in 1:J){ -tempx<-as.matrix(X[[j]]) -tempy<-as.matrix(Y[[j]]) -slopes[j]<-lsfit(tempx, tempy)$coef[k] #Slopes for original data -covar[j]<-lsfitNci4(tempx, tempy)$cov[k,k] #original HC4 for coefficient(slope) -} -dif.slopes<-t(con)%*%slopes -o.se<-t(con^2)%*%covar -o.stat<-dif.slopes/sqrt(o.se) #original test statistics -om<-max(abs(o.stat)) -om -} - - -ZYmediate<-function(x,y,nboot=2000,alpha=.05,kappa=.05,SEED=TRUE,xout=FALSE,outfun=out){ -# -# Robust mediation analysis using M-estimator as -# described in Zu and Yuan, 2010, MBR, 45, 1--44. -# -# x[,1] is predictor -# x[,2] is mediator variable -# y is outcome variable. -ep=0.00000001 # convergence criteria -B=nboot # the number of bootstrap replications -kappa # the percent of cases to be controlled when robust method is used - # Zu and Yuan used .05, so this is the default value used here. -level=alpha # alpha level -if(SEED)set.seed(2) -Z=elimna(cbind(x,y)) -if(xout){ -flag<-outfun(Z[,1],plotit=FALSE,SEED=SEED)$keep -Z<-Z[flag,] -} -p=3 -n=nrow(Z) -HT=HuberTun(kappa,p) -r=HT$r -tau=HT$tau -H=robEst(Z,r,tau,ep) -R.v=H$u2*tau -oH=order(R.v) -oCaseH=(1:n)[oH] # case number with its Ri increases -oR.v=R.v[oH] - -thetaH=H$theta -aH=thetaH[1] -bH=thetaH[2] -abH=aH*bH - -muH=H$mu -SigmaH=H$Sigma -dH=H$d - - -### Use robust method -# point estimate -thetaH=H$theta -aH=thetaH[1] -bH=thetaH[2] -abH=aH*bH - -muH=H$mu -SigmaH=H$Sigma -dH=H$d - -#Standard errors -RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau) - -Zr=RH$Zr -SEHI=RH$inf -SEHS=RH$sand - -#Standard errors -RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau) - -Zr=RH$Zr -SEHI=RH$inf -SEHS=RH$sand - -#Standard errors -RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau) - -Zr=RH$Zr -SEHI=RH$inf -SEHS=RH$sand -ParEstH<-round(cbind(thetaH,SEHI[1:6],SEHS[1:6]),3) -rnames<-c("a","b","c","vx","vem","vey") -ParEstH<-cbind(rnames,ParEstH) -res=t(ParEstH) -# -Res=BCI(Z,Zr,ab=3,abH,B,level) -list(CI.ab=Res$CI,p.value=Res$pv,a.est=aH,b.est=bH,ab.est=abH) -} - - -#------------------------------------------------------------ -# Tunning parameter when use Huber type weight -#------------------------------------------------------------ -# Input: - #kappa: the proportion of cases to be controlled - #p: the number of variables -# Output - # r: the critical value of Mahalalanobis distance, as defined in (20) - # tau: the constant to make the robust estimator of Sigma to be unbiased, as defined in (20) - -HuberTun=function(kappa,p){ - prob=1-kappa - chip=qchisq(prob,p) - r=sqrt(chip) - tau=(p*pchisq(chip,p+2)+ chip*(1-prob))/p - Results=list(r=r,tau=tau) - return(Results) -} - -robEst=function(Z,r,tau,ep){ - - p=ncol(Z) - n=nrow(Z) - # Starting values - mu0=MeanCov(Z)$zbar - Sigma0=MeanCov(Z)$S - Sigin=solve(Sigma0) - - diverg=0 # convergence flag - - for (k in 1:200) { - sumu1=0 - mu=matrix(0,p,1) - Sigma=matrix(0,p,p) - d=rep(NA,n) - u1=rep(NA,n) - u2=rep(NA,n) - - for (i in 1:n) { zi=Z[i,] - zi0=zi-mu0 - di2=t(zi0)%*%Sigin%*%zi0 - di=as.numeric(sqrt(di2)) - d[i]=di - - #get u1i,u2i - if (di<=r) { - u1i=1.0 - u2i=1.0/tau - }else { - u1i=r/di - u2i=u1i^2/tau - } - u1[i]=u1i - u2[i]=u2i - - sumu1=sumu1+u1i - mu=mu+u1i*zi - Sigma=Sigma+u2i*zi0%*%t(zi0) - - } # end of loop i - - mu1=mu/sumu1 - Sigma1=Sigma/n - Sigdif=Sigma1-Sigma0 - dt=sum(Sigdif^2) - - mu0=mu1 - Sigma0=Sigma1 - Sigin=solve(Sigma0) - if (dt0) -pv=2*min(c(pstar,1-pstar)) -# Results=list(BP=BP) -# return(Results) -list(BP,pv) -} - -RobRsq<-function(x,y){ -library(robust) -z=lmRob(y~x) -res=robR2w(z) -res -} - -robR2w = function (rob.obj, correc=1.2076) { - ## R2 in robust regression, see - ## Renaud, O. & Victoria-Feser, M.-P. (2010). A robust coefficient of determination for regression. - ## Journal of Statistical Planning and Inference, 140, 1852-1862. - ## rob.obj is an lmRob object. correc is the correction for consistancy. Call: - ## - ## library(robust) - ## creat.lmRob = lmRob(original1 ~ approprie1+approprie2+creativite1+creativite2, data=creatif) - ## summary(creat.lmRob) - ## robR2w(creat.lmRob) - - ## Weights in robust regression - wt.bisquare = function(u, c = 4.685) { - U <- abs(u/c) - w <- ((1. + U) * (1. - U))^2. - w[U > 1.] <- 0. - w - } - weight.rob=function(rob.obj){ - resid.rob=rob.obj$resid - scale.rob=(rob.obj$scale)*rob.obj$df.residual/length(resid.rob) - resid.rob= resid.rob/scale.rob - weight=wt.bisquare(resid.rob) - } - - if (attr(rob.obj, "class") !="lmRob") - stop("This function works only on lmRob objects") - pred = rob.obj$fitted.values - resid = rob.obj$resid - resp = resid+pred - wgt = weight.rob(rob.obj) - scale.rob = rob.obj$scale - resp.mean = sum(wgt*resp)/sum(wgt) - pred.mean = sum(wgt*pred)/sum(wgt) - yMy = sum(wgt*(resp-resp.mean)^2) - rMr = sum(wgt*resid^2) - r2 = (yMy-rMr) / yMy - r2correc= (yMy-rMr) / (yMy-rMr +rMr*correc) - r2adjcor = 1-(1-r2correc) * (length(resid)-1) / (length(resid)-length(rob.obj$coefficients)-1) - return(list(robR2w.NoCorrection=r2, robR2w.WithCorrection=r2correc, robR2w.AdjustedWithCorrection=r2adjcor)) -} - -bi2KMSv2<-function(r1=sum(elimna(x)),n1=length(elimna(x)),r2=sum(elimna(y)),n2=length(elimna(y)), -x=NA,y=NA,nullval=0,alpha=.05){ -# -# Test the hypothesis that two independent binomials have equal -# probability of success using method KMS. -# -# Unlike the function bi2KMS, a p-value is returned -# -# r1=number of successes in group 1 -# n1=number of observations in group 1 -# -# Uses Kulinskaya et al. method American Statistician, 2010, 64, 350- -# -# null value is the hypothesized value for p1-p2 -# -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-bi2KMS(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=x,alpha=alph[i]) -if(chkit$ci[1]>nullval || chkit$ci[2]nullval || chkit$ci[2]TB)-.5*mean(test==TB) -list(test=test,p.value=pv) -} - -wmwloc<-function(x,y,na.rm=TRUE,est=median,...){ -# -# Estimate the median of the distribution of x-y -# -if(na.rm){ -x<-x[!is.na(x)] -y<-y[!is.na(y)] -} -m<-outer(x,y,FUN="-") -est=est(m,na.rm=TRUE,...) -est -} - - -DEPanc<-function(x1,y1,y2,fr1=1,tr=.2,alpha=.05,plotit=TRUE,DISDIF=FALSE,DIF=TRUE, -pts=NULL,sm=FALSE,xout=FALSE,outfun=out,nboot=500){ -# -# Compare two dependent groups using a covariate -# -# x1 is the covariate and -# y1 and y2 are the two measures. For instance time 1 and time 2. -# -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# fr1 is span for running interval smoother -# -# sm=T will create smooths using bootstrap bagging. -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -# If DISDIF=T: 1. compare groups using median of distribution of D=Y1-Y2 -# 2. if na.rm=T, case wise deletion is used, otherwise all of the data are used. -# -# Also see the R function DEPancB, which includes alternative methods for handling missing values -# -m=cbind(x1,y1,y2) -flag=is.na(x1) -m=m[!flag,] -if(is.null(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -y2<-y2[xorder] -vecn<-1 -for(i in 1:length(x1))vecn[i]<-length(y1[near(x1,x1[i],fr1)]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -} -if(!is.null(pts[1]))isub=c(1:length(pts)) -mat<-matrix(NA,length(isub),8) -dimnames(mat)<-list(NULL,c("X","n","DIF","TEST","se","ci.low","ci.hi", -"p.value")) -for (i in 1:length(isub)){ -if(is.null(pts)){ -ch=near(x1,x1[isub[i]],fr1) -mat[i,1]=x1[isub[i]] -} -if(!is.null(pts)){ -ch=near(x1,pts[i],fr1) -mat[i,1]=pts[i] -} -mat[i,2]=sum(ch) -if(!DISDIF){ -if(!DIF){ -test<-yuend(m[ch,2],m[ch,3],tr=tr) -mat[i,3]=mean(m[ch,2],tr=tr)-mean(m[ch,3],tr=tr) -mat[i,4]<-test$teststat -mat[i,5]<-test$se -mat[i,6]<-test$ci[1] -mat[i,7]<-test$ci[2] -mat[i,8]<-test$siglevel -} -if(DIF){ -test=trimci(m[ch,2]-m[ch,3],tr=tr,pr=FALSE) -mat[i,3]=mean(m[ch,2]-m[ch,3],tr=tr) -mat[i,4]<-test$test.stat -mat[i,5]<-test$se -mat[i,6]<-test$ci[1] -mat[i,7]<-test$ci[2] -mat[i,8]<-test$p.value -}} -if(DISDIF){ -test=l2drmci(m[ch,2:3],pr=FALSE,nboot=nboot,na.rm=na.rm) -mat[i,3]<-loc2dif(m[ch,2],m[ch,3],na.rm=na.rm) -mat[i,4]<-NA -mat[i,5]<-NA -mat[i,6]<-test$ci[1] -mat[i,7]<-test$ci[2] -mat[i,8]<-test$p.value -}} -if(plotit) -runmean2g(x1,y1,x1,y2,fr=fr1,est=mean,tr=tr,sm=sm,xout=xout,outfun=outfun) -list(output=mat) -} - - -DEPancpb<-function(x1,y1,y2,fr1=1,est=tmean,alpha=.05,plotit=TRUE,DISDIF=FALSE,DIF=TRUE,TLS=FALSE,SEED=TRUE, -pts=NULL,sm=FALSE,xout=FALSE,outfun=out,nboot=500,pr=FALSE,na.rm=TRUE,xlab="Group 1", ylab="Group 2",...){ -# -# Compare two dependent groups using a covariate -# -# same as DEPanc, only use bootstrap methods in all cases. -# -# x1 is the covariate and -# y1 and y2 are the two measures. For instance time 1 and time 2. -# -# case wise deletion of missing values used by default. -# To use all of the data not missing, set DIF=F and na.rm=F -# For the special case where the goal is to compare means, also set TLS=T -# (But this can produce an error if too many missing values) -# -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# TLS=F, use percentile bootstrap when DIF=FALSE; -# otherwise (TLS=TRUE) use Lin-Stivers method for means -# fr1 is span for running interval smoother -# -# sm=T will create smooths using bootstrap bagging. -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -m=cbind(x1,y1,y2) -flag=is.na(x1) -if(na.rm)m=elimna(m) -if(!na.rm){ -m=m[!flag,] -} -x1=m[,1] -y1=m[,2] -y2=m[,3] -if(is.null(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -y2<-y2[xorder] -vecn<-1 -for(i in 1:length(x1))vecn[i]<-length(y1[near(x1,x1[i],fr1)]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -} -if(!is.null(pts[1]))isub=c(1:length(pts)) -mat<-matrix(NA,length(isub),6) -dimnames(mat)<-list(NULL,c("X","n","DIF","ci.low","ci.hi", -"p.value")) -for (i in 1:length(isub)){ -if(is.null(pts)){ -ch=near(x1,x1[isub[i]],fr1) -mat[i,1]=x1[isub[i]] -} -if(!is.null(pts)){ -ch=near(x1,pts[i],fr1) -mat[i,1]=pts[i] -} -mat[i,2]=sum(ch) -if(!DISDIF){ -if(!DIF){ -if(!TLS){ -test=rmmismcp(m[ch,2],m[ch,3],alpha=alpha,SEED=SEED,est=est,plotit = FALSE, - grp = grp, nboot = 500, xlab = xlab, ylab = ylab, pr = pr, ...) -mat[i,3]=est(m[ch,2],na.rm=TRUE)-est(m[ch,3],na.rm=TRUE) -mat[i,4]<-test$output[1,6] -mat[i,5]<-test$output[1,7] -mat[i,6]<-test$output[1,4] -} -if(TLS){ -test=rm2miss(m[ch,2],m[ch,3], nboot = nboot, alpha = alpha, SEED = SEED) -mat[i,3]=mean(m[ch,2],na.rm=TRUE)-mean(m[ch,3],na.rm=TRUE) -mat[i,4]<-test$ci[1] -mat[i,5]<-test$ci[2] -mat[i,6]<-test$p.value -}} -if(DIF){ -test=onesampb(m[ch,2]-m[ch,3],est=est,nboot=nboot,alpha=alpha,SEED=SEED,...) -mat[i,3]=est(m[ch,2]-m[ch,3],na.rm=TRUE,...) -mat[i,4]<-test$ci[1] -mat[i,5]<-test$ci[2] -mat[i,6]<-test$p.value -}} -if(DISDIF){ -test=l2drmci(m[ch,2:3],pr=FALSE,nboot=nboot,na.rm=na.rm) -mat[i,3]<-loc2dif(m[ch,2],m[ch,3],na.rm=na.rm) -mat[i,4]<-test$ci[1] -mat[i,5]<-test$ci[2] -mat[i,6]<-test$p.value -}} -if(plotit) -runmean2g(x1,y1,x1,y2,fr=fr1,est=est,sm=sm,xout=xout,outfun=outfun,...) -list(output=mat) -} - - -lplotPV<-function(x,y, span = 0.75, xout = FALSE,pr=TRUE, - outfun = out,nboot=1000,SEED=TRUE,plotit=TRUE,pyhat = FALSE, expand = 0.5, low.span = 2/3, - varfun = pbvar, cor.op = FALSE, cor.fun = pbcor, scale = FALSE, - xlab = "X", ylab = "Y", zlab = "", theta = 50, phi = 25, - family = "gaussian", duplicate = "error", pc = "*", ticktype = "simple",...){ -# -# Compute a p-value based on the Strength of Association estimated via lplot -# If significant, conclude there is dependence. -# -if(SEED)set.seed(2) -x=as.matrix(x) -if(ncol(x)==2 && !scale){ -if(pr){ -print("scale=F is specified.") -print("If there is dependence, might use scale=T") -}} -vals=NA -nv=ncol(x) -m=elimna(cbind(x,y)) -x<-m[,1:nv] -y<-m[,nv+1] -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:nv] -y<-m[,nv+1] -} -x=as.matrix(x) -est=lplot(x,y,span=span,plotit=plotit,pr=FALSE, pyhat = pyhat, - outfun = outfun, expand = expand, low.span = low.span, - varfun = varfun, cor.op =cor.op, cor.fun = cor.fun, scale = scale, - xlab = xlab, ylab = ylab, zlab =zlab, theta =theta, phi = phi, - family = family, duplicate = duplicate, pc = pc, ticktype = ticktype,...) -n=nrow(x) -data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(i in 1:nboot){ -vals[i]=lplot(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE)$Strength.Assoc -} -p=mean(est$Strength2)xx=cbind(x,x[,3:p]) -vlabs=c('Intercept','x1','x2','x1*x2') -if(p>2){ -p4=p+2 -for(j in 5:p4)vlabs[j]=paste('x',j-1) -if(LABELS)vlabs[5:p4]=labels(x)[[2]][3:p] -} -clabs=c('ci.low','ci.up','Estimate','S.E.','p-value','p.adj') -if(!MC)a=regci(xx,y,regfun = regfun, nboot = nboot, alpha =alpha, SEED = SEED, pr =pr,...) -if(MC)a=regciMC(xx,y,regfun = regfun, nboot = nboot, alpha =alpha, SEED = SEED, pr =pr,...) -output=a$regci -dimnames(output)=list(vlabs,clabs) -list(output=output,n=a$n,n.keep=a$n.keep) -} - -ancovaG<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=FALSE,pts=NULL,sm=FALSE, -pr=TRUE,xout=FALSE,outfun=out,test=medpb2,...){ -# -# This function generalizes the R function ancova so that any hypothesis testing method -# can be used to compare groups at specified design points. -# -# Compare two independent groups using the ancova method coupled with method -# indicated by the argument test. -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# -# sm=T will create smooths using bootstrap bagging. -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -output=list() -if(is.null(pts[1])){ -mat<-matrix(NA,5,3) -dimnames(mat)<-list(NULL,c("X","n1","n2")) -npt<-5 -isub<-c(1:5) # Initialize isub -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -for (i in 1:5){ -mat[i,1]=x1[isub[i]] -g1<-y1[near(x1,x1[isub[i]],fr1)] -g2<-y2[near(x2,x1[isub[i]],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -mat[i,2]=length(g1) -mat[i,3]=length(g2) -output[[i]]<-test(g1,g2,...) -}} -if(!is.null(pts[1])){ -mat<-matrix(NA,length(pts),3) -dimnames(mat)<-list(NULL,c("X","n1","n2")) -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -for (i in 1:length(pts)){ -mat[i,1]=pts[i] -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -mat[i,2]=length(g1) -mat[i,3]=length(g2) -output[[i]]<-test(g1,g2,...) -}} -if(plotit) -runmean2g(x1,y1,x2,y2,fr=fr1,est=mean,tr=tr,sm=sm,xout=xout,outfun=outfun,...) -list(mat,output) -} - -mat2list<-function(m,grp.dat){ -# -# For data in a matrix m, divide the data into groups based -# on the values in column indicated -# by the argument grp.dat -# and store the data in list mode. -# -# This function is like fac2list, only it handles matrices -# -# Example: z=mat2list(m[,2:5],m[,9]) -# will divide the rows of data in columns 2-5 into groups based -# on the group id data in column 9 -# This is done via the function mat2grp -# -# z[[1]] will contain the data in m[,2:5] that is associated with first group -# z[[2]] will contain the data in m[,2:5] that is associated with second group, etc. -# -# If any entry in grp.dat is NA, this row is eliminated from m -# -if(!is.null(dim(m)))m=as.matrix(m) -if(!is.matrix(m))stop("Data must be stored in a matrix or data frame") -p=ncol(m) -p1=p+1 -M=cbind(m,grp.dat) -#print(dim(M)) -x<-mat2grp(M[,1:p1],p1) -for(i in 1:length(x))x[[i]]=x[[i]][,1:p] -x -} - -regpecv<-function(x,y,regfun=tsreg,varfun=pbvar,...){ -# -# Estimate prediction error via leave-one-out cross-validation -# -# regfun defaults to Theil-Sen estimator -# function returns measure of prediction error: robust measure of variation -# applied to the n differences y_i-y_{-i}, i=1,...,n -# where y_{-1} is estimate of y when ith vector of observations is omitted. -# -xy=elimna(cbind(x,y)) -x=as.matrix(x) -px=ncol(x) -px1=px+1 -n=nrow(xy) -vals=NA -for(i in 1:n){ -est=regfun(xy[-i,1:px],xy[-i,px1])$coef -vals[i]=xy[i,px1]-(est[1]+sum(est[2:px1]*xy[i,1:px])) -} -pe=varfun(vals) -pe -} - - -idmatch<-function(m1,m2,id.col1,id.col2=id.col1){ -# -# for the id data in column id.col of matrices m1 and m2 -# pull out data for which both m1 and m2 have matching id's -# return the data in a matrix, M1 before data and M2, the matching data time 2. -# -flag=!is.na(m1[,id.col1]) -m1=m1[flag,] # eliminate any rows where ID is missing -flag=!is.na(m2[,id.col1]) -m2=m2[flag,] -M1=NULL -#if(sum(duplicated(m1))>0)stop('Duplicate ids detected in m1') -#if(sum(duplicated(m2))>0)stop('Duplicate ids detected in m2') -#print(m1[,id.col1]) -if(sum(duplicated(m1[,id.col1]))>0)stop('Duplicate ids detected in m1') -if(sum(duplicated(m2[,id.col2]))>0)stop('Duplicate ids detected in m2') -for(i in 1:nrow(m1)){ -flag=duplicated(c(m1[i,id.col1],m2[,id.col2])) -if(sum(flag>0)){ -if(is.data.frame(m1)){ -if(!is.null(dim(M1)))M1=rbind(M1,as.data.frame(m1[i,])) -if(is.null(dim(M1)))M1=as.data.frame(m1[i,]) -} -if(!is.data.frame(m1)){ -if(!is.null(dim(M1)))M1=rbind(M1,m1[i,]) -if(is.null(dim(M1)))M1=matrix(m1[i,],nrow=1) -} -}} -M2=NULL -for(i in 1:nrow(m2)){ -flag=duplicated(c(m2[i,id.col2],m1[,id.col1])) -if(sum(flag>0)){ -if(is.data.frame(m2)){ -if(!is.null(dim(M2)))M2=rbind(M2,as.data.frame(m2[i,])) -if(is.null(dim(M2)))M2=as.data.frame(m2[i,]) -} -if(!is.data.frame(m2)){ -if(!is.null(dim(M2)))M2=rbind(M2,m2[i,]) -if(is.null(dim(M2)))M2=matrix(m2[i,],nrow=1) -} -}} -#m=cbind(M1[,id.col1],M1[,-id.col1],M2[,-id.col2]) -list(M1=M1,M2=M2) -} - - -rplotCV<-function(x,y,fr=NA,varfun=pbvar,est=tmean,xout=FALSE,outfun=out,eout=FALSE,corfun=pbvar,...){ -# -# Estimate prediction error based on -# a running interval smoother in conjunction with -# a leave-one-out cross validation method -# -# varfun is the measure of variation used on the predicted Y values. -# est is the measure of location used by the running interval smoother. -# The estimate is returned in VAR.Y.HAT -# -m=elimna(cbind(x,y)) -if(eout){ -flag<-outfun(m,plotit=FALSE)$keep -m=m[flag,] -} -x=as.matrix(x) -p=ncol(x) -p1=p+1 -x=as.matrix(m[,1:p]) -y=m[,p1] -vals=NA -if(is.na(fr)){ -if(p==1)fr=.8 -if(p>1)fr=1 -} -if(xout){ -keepit<-outfun(x,plotit=FALSE,...)$keep -x<-x[keepit,] -y<-y[keepit] -} -x=as.matrix(x) -for(i in 1:nrow(x)){ -if(p==1)vals[i]=runhat(x[-i,],y[-i],fr=fr,est=est,pts=x[i,],...) -if(p>1)vals[i]=rung3hat(x[-i,],y[-i],fr=fr,pts=t(as.matrix(x[i,])))$rmd -} -dif=y-vals -ans=varfun(elimna(dif)) -list(VAR.Y.HAT=ans) -} - -SMpre<-function(x,y,est=tmean,fr=NA,varfun=pbvar,model=NULL,adz=TRUE, -xout=FALSE,outfun=out,...){ -# -# Estimate prediction error for all of the models specified by the -# the argument model, which has list mode. -# Leave-one-out cross-validation is used in conjunction with a running interval smoother -# -x=as.matrix(x) -p=ncol(x) -p1=p+1 -xy=elimna(cbind(x,y)) -x=xy[,1:p] -y=xy[,p1] -n=nrow(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -} -if(p>5)stop("Can have at most 5 predictors") -if(is.null(model))model=modgen(p) -mout<-matrix(NA,length(model),3,dimnames=list(NULL,c("error", -"var.used","rank"))) -for(imod in 1:length(model)){ -nmod=length(model[[imod]])-1 -temp=c(nmod:0) -mout[imod,2]=sum(model[[imod]]*10^temp) -mout[imod,1]=rplotCV(x[,model[[imod]]],y,fr=fr,est=est,varfun=varfun)$VAR.Y.HAT -} -if(adz){ -va=0 - for(i in 1:n)va[i]=y[i]-tmean(y[-i]) -no=pbvar(va) -mout=rbind(mout,c(no,0,NA)) -} -mout[,3]=rank(mout[,1]) -list(estimates=mout) -} - -mch2num<-function(x){ -# convert character, stored in matrix, to numeric data. -m=matrix(NA,nrow=nrow(x),ncol=ncol(x)) -for(j in 1:ncol(x))m[,j]=as.numeric(x[,j]) -m -} - -ddep<-function(x,est=onestep,alpha=.05,grp=NA,nboot=500,plotit=TRUE,SEED=TRUE,pr=TRUE,WT=TRUE,...){ -# -# Do ANOVA on dependent groups -# using the partially centered method plus -# depth of zero among bootstrap values. -# -# An improved version of ddep that better handles heteroscedasticity -# (A weighted grand mean is used in this version.) -# -# The data are assumed to be stored in x in list mode -# or in a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, columns correspond to groups. -# -# grp is used to specify some subset of the groups, if desired. -# By default, all J groups are used. -# -# The default number of bootstrap samples is nboot=2000 -# -if(pr)print("Warning: Might not be level robust if the number of groups is relatively large and n is small") -if(pr)print("Currently seems that rmmismcp is preferable") -if(is.list(x)){ -nv<-NA -for(j in 1:length(x))nv[j]<-length(x[[j]]) -if(var(nv) !=0){ -stop("The groups are stored in list mode and appear to have different sample sizes") -} -temp<-matrix(NA,ncol=length(x),nrow=nv[1]) -for(j in 1:length(x))temp[,j]<-x[[j]] -x<-temp -} -J<-ncol(x) -if(!is.na(grp[1])){ #Select the groups of interest -J<-length(grp) -for(j in 1:J)temp[,j]<-x[,grp[j]] -x<-temp -} -x<-elimna(x) # Remove any rows with missing values. -bvec<-matrix(0,ncol=J,nrow=nboot) -hval<-vector("numeric",J) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -print("Taking bootstrap samples. Please wait.") -n<-nrow(x) -totv<-apply(x,2,est,na.rm=TRUE,...) -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot)bvec[ib,]<-apply(x[data[ib,],],2,est,na.rm=TRUE,...) #nboot by J matrix -if(!WT){ -gv<-rep(mean(totv),J) #Grand mean -#m1<-rbind(bvec,gv) -} -bplus<-nboot+1 -center<-totv -cmat<-var(bvec) -if(WT){ -wt=1/diag(cmat) -ut=sum(wt) -gv<-rep(sum(wt*totv)/ut,J) #Grand mean -} -m1<-rbind(bvec,gv) -discen<-mahalanobis(m1,totv,cmat) -#print("Bootstrap complete; computing significance level") -if(plotit && ncol(x)==2){ -plot(bvec,xlab="Group 1",ylab="Group 2") -temp.dis<-order(discen[1:nboot]) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -abline(0,1) -} -sig.level<-sum(discen[bplus]<=discen)/bplus -list(p.value=sig.level,center=totv,grand.mean=gv) -} - -ddeptr<-function(x,na.rm=TRUE,alpha=.05,grp=NA,nboot=500,plotit=TRUE,SEED=TRUE,op=FALSE,tr=.2,...){ -# -# Do ANOVA on dependent groups -# using the partially centered method plus -# depth of zero among bootstrap values. -# -# The method is like the method used by the R function ddep, -# but a weighted estimate of the grand mean is used. -# This helps deal the heteroscedasticity among the marginal distributions. -# -# The data are assumed to be stored in x in list mode -# or in a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, columns correspond to groups. -# -# trimmed means are compared -# -# grp is used to specify some subset of the groups, if desired. -# By default, all J groups are used. -# -# The default number of bootstrap samples is nboot=500 -# -# na.rm=T, all rows of data with missing values are removed. -# na.rm=F will use all of the data assuming missing values occur at random -# -if(is.list(x)){ -nv<-NA -for(j in 1:length(x))nv[j]<-length(x[[j]]) -if(var(nv) !=0){ -stop("The groups are stored in list mode and appear to have different sample sizes") -} -temp<-matrix(NA,ncol=length(x),nrow=nv[1]) -for(j in 1:length(x))temp[,j]<-x[[j]] -x<-temp -} -J<-ncol(x) -if(!is.na(grp[1])){ #Select the groups of interest -J<-length(grp) -for(j in 1:J)temp[,j]<-x[,grp[j]] -x<-temp -} -if(na.rm)x<-elimna(x) # Remove any rows with missing values. -bvec<-matrix(0,ncol=J,nrow=nboot) -hval<-vector("numeric",J) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(op)print("Taking bootstrap samples. Please wait.") -n<-nrow(x) -wt=apply(x,2,trimse,...) -wt=1/wt^2 -ut=sum(wt) -totv<-apply(x,2,tmean,na.rm=TRUE,...) -gv<-rep(sum(wt*totv)/ut,J) #Weighted grand mean -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot)bvec[ib,]<-apply(x[data[ib,],],2,tmean,na.rm=TRUE,...) #nboot by J matrix -bplus<-nboot+1 -m1<-rbind(bvec,gv) -center<-totv -cmat<-var(bvec) -discen<-mahalanobis(m1,totv,cmat) -if(op)print("Bootstrap complete; computing significance level") -if(plotit && ncol(x)==2){ -plot(bvec,xlab="Group 1",ylab="Group 2") -temp.dis<-order(discen[1:nboot]) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -abline(0,1) -} -sig.level<-sum(discen[bplus]<=discen)/bplus -list(p.value=sig.level,center=totv,weighted.grand.mean=gv[1]) -} - - -qhdplotsm<-function(x,y,q=.5,xlab="X",ylab="Y",pc=".", -xout=FALSE,outfun=out,nboot=40,fr=1,...){ -# -# Plots smooths of quantile regression lines for one or more quantiles -# using rplotsm with Harrell--Davis estimator -# -# q indicates the quantiles to be used. -# -# EXAMPLE: -# qhdplotsm(x,y,q=c(.2,.5,.8)) will plot three smooths corresponding to -# the .2, .5 and .8 quantile regression lines. -# -xy=elimna(cbind(x,y)) -x=as.matrix(x) -if(ncol(x)!=1)stop("Only one predictor is allowed") -x=xy[,1] -y=xy[,2] -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag] -y<-y[flag] -} -plot(x,y,xlab=xlab,ylab=ylab,pch=pc) -xord=order(x) -for(j in 1:length(q)){ -yhat=rplotsm(x,y,fr=fr,pyhat=TRUE,est=hd,q=q[j],plotit=FALSE,nboot=nboot)$yhat -lines(x[xord],yhat[xord]) -} -print("Done") -} - -outmah<-function(x,qval=pnorm(3),plotit=TRUE,xlab="VAR 1",ylab="VAR 2"){ -# -# detect outliers using Mahalanobis Distance -# For demonstration purposes only. Suggest -# using a method that avoids masking. -# -# In univariate case, default strategy is to use 3 standard deviation rule -# -x=elimna(x) -x=as.matrix(x) -m=apply(x,2,mean) -v=cov(x) -dis=mahalanobis(x,m,v) -crit<-sqrt(qchisq(qval,ncol(x))) -vec<-c(1:nrow(x)) -dis[is.na(dis)]=0 -dis<-sqrt(dis) -chk<-ifelse(dis>crit,1,0) -id<-vec[chk==1] -keep<-vec[chk==0] -if(is.matrix(x)){ -if(ncol(x)==2 && plotit){ -plot(x[,1],x[,2],xlab=xlab,ylab=ylab,type="n") -flag<-rep(TRUE,nrow(x)) -flag[id]<-FALSE -points(x[flag,1],x[flag,2]) -if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="*") -}} -if(!is.matrix(x))outval<-x[id] -if(is.matrix(x))outval<-x[id,] -list(out.val=outval,out.id=id,keep=keep,dis=dis,crit=crit) -} - -difQplot<-function(x,y=NULL,xlab="Quantile",ylab="Effect Size"){ -# -# Plot that provides perspective on the degree a distribution is symmetric about zero. -# This function plots the sum of q and 1-q quantiles. If the distributions are symmetric -# the plot should be approximately a horizontal line. If in addition the median -# of the difference scores is zero, the horizontal line will intercept the y-axis at zero. -# -if(is.null(y))dif=x -if(!is.null(y))dif=x-y -x=elimna(x) -qd=NA -for(i in 1:99)qd[i]=hd(dif,.5-i/200)+hd(dif,.5+i/200) -plot(.5-c(1:99)/200,qd,xlab=xlab,ylab=ylab) -} - -Dqcomhd<-function(x,y,est=hd,q=c(1:9)/10,nboot=2000,pr=TRUE, -plotit=FALSE,SEED=TRUE,xlab='Group 1', -ylab='Est.1-Est.2',na.rm=TRUE,alpha=rep(.05,length(q))){ -# -# Compare the quantiles of the marginal distributions associated with two dependent groups -# via hd estimator. Tied values are allowed. -# -# est=thd would use trimmed hd estimator -# -# When comparing lower or upper quartiles, both power and the probability of Type I error -# compare well to other methods have been derived. -# -# x: data for group 1 -# y: data for group 2 -# q: the quantiles to be compared -# nboot: Number of bootstrap samples -# -# -if(pr){ -print('Note: confidence intervals are not adjusted to control the simultaneous probability coverage') -} -if(SEED)set.seed(2) -if(na.rm){ -xy=elimna(cbind(x,y)) -x=xy[,1] -y=xy[,2] -} -pv=NULL -output=matrix(NA,nrow=length(q),ncol=10) -dimnames(output)<-list(NULL,c('q','n1','n2','est.1','est.2','est.1_minus_est.2','ci.low','ci.up','p-value','adj.p.value')) -for(i in 1:length(q)){ -output[i,1]=q[i] -output[i,2]=length(elimna(x)) -output[i,3]=length(elimna(y)) -output[i,4]=hd(x,q=q[i]) -output[i,5]=hd(y,q=q[i]) -output[i,6]=output[i,4]-output[i,5] -if(na.rm){ -temp=bootdpci(x,y,est=est,q=q[i],dif=FALSE,plotit=FALSE,pr=FALSE,nboot=nboot,alpha=alpha[i],SEED=FALSE) -output[i,7]=temp$output[1,5] -output[i,8]=temp$output[1,6] -output[i,9]=temp$output[1,3] -} -if(!na.rm){ -temp=rmmismcp(x,y,est=est,q=q[i],plotit=FALSE,pr=FALSE,nboot=nboot,alpha=alpha[i],SEED=FALSE) -output[i,7]=temp$output[1,6] -output[i,8]=temp$output[1,7] -output[i,9]=temp$output[1,4] -} -} -output[,10]=p.adjust(output[,9],method='hoch') -if(plotit){ -xax=rep(output[,4],3) -yax=c(output[,6],output[,7],output[,8]) -plot(xax,yax,xlab=xlab,ylab=ylab,type='n') -points(output[,4],output[,6],pch='*') -lines(output[,4],output[,6]) -points(output[,4],output[,7],pch='+') -lines(output[,4],output[,7],lty=2) -points(output[,4],output[,8],pch='+') -lines(output[,4],output[,8],lty=2) -} -output -} - - - -Dqdif<-function(x,y=NULL,q=.25,nboot=1000,plotit=TRUE,xlab="Group 1 - Group 2",SEED=TRUE,alpha=.05){ -# -# Compare two dependent groups by comparing the -# q and 1-q quantiles of the difference scores -# -# q should be < .5 -# if the groups do not differ, then the difference scores should be symmetric -# about zero. -# In particular, the sum of q and 1-q quantiles should be zero. -# -# q indicates the quantiles to be compared. By default, the .25 and .75 quantiles are used. -# -if(SEED)set.seed(2) -if(q>=.5)stop("q should be less than .5") -if(!is.null(y)){ -xy=elimna(cbind(x,y)) -dif=xy[,1]-xy[,2] -} -if(is.null(y))dif=elimna(x) -n=length(dif) -if(plotit)akerd(dif,xlab=xlab) -bvec=NA -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec[ib]<-hd(dif[data[ib,]],q=q)+hd(dif[data[ib,]],q=1-q) -} -est1=hd(dif,q=q) -est2=hd(dif,q=1-q) -pv=mean(bvec<0)+.5*mean(bvec==0) -p=2*min(c(pv,1-pv)) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -sbvec=sort(bvec) -ci=sbvec[low] -ci[2]=sbvec[up] -list(est.q=est1,est.1.minus.q=est2,conf.interval=ci,p.value=p) -} - -qwmwhd<-function(x,y,q=seq(5,40,5)/100,xlab="Quantile",ylab="Sum of q and 1-q Quantiles",plotit=TRUE,alpha=.05,nboot=1000,SEED=TRUE){ -# -# Plot that provides perspective on the degree a distribution is symmetric about zero. -# This function plots the sum of q and 1-q quantiles of the distribution of D=X-Y, X and Y independent. -# A 1-alpha confidence interval for the sum is indicated by a + -# If the distribution is symmetric -# the plot should be approximately a horizontal line. -# -# FWE is controlled via Hochberg's method, which was used to determine critical -# p-values based on the argument -# alpha. -# -# Can alter the quantiles compared via the argument -# q -# q must be less than .5 -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -n1=length(x) -n2=length(y) -output=matrix(NA,ncol=8,nrow=length(q)) -dimnames(output)=list(NULL,c("quantile","Est.1","Est.2","SUM","ci.low","ci.up","p_crit","p-value")) -for(i in 1:length(q)){ -test=cbmhd(x,y,q=q[i],plotit=FALSE,nboot=nboot,SEED=SEED) -output[i,1]=q[i] -output[i,2]=test$Est1 -output[i,3]=test$Est2 -output[i,4]=test$sum -output[i,8]=test$p.value -output[i,5]=test$ci[1] -output[i,6]=test$ci[2] -} -temp=order(output[,8],decreasing=TRUE) -zvec=alpha/c(1:length(q)) -output[temp,7]=zvec -output <- data.frame(output) -output$signif=rep("YES",nrow(output)) -for(i in 1:nrow(output)){ -if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]="NO" -if(output[temp[i],8]<=output[temp[i],7])break -} -if(plotit){ -plot(rep(q,3),c(output[,4],output[,5],output[,6]),type="n",xlab=xlab,ylab=ylab) -points(q,output[,6],pch="+") -points(q,output[,5],pch="+") -points(q,output[,4],pch="*") -} -list(n=c(n1,n2),output=output) -} - - -difQpci<-function(x,y=NULL,q=seq(5,40,5)/100,xlab="Quantile",ylab="Group 1 minus Group 2",plotit=TRUE,alpha=.05,nboot=1000,SEED=TRUE,LINE=FALSE){ -# -# x can be a vector, in which case compare quantiels of distribution of data in x -# x can be a matrix with 2 columns, in which case analysis is done on dif=x[,1]-x[,2] -# y supplied, then do analysis of dif=x-y -# -# Plot that provides perspective on the degree a distribution is symmetric about zero. -# This function plots the sum of q and 1-q quantiles. A 1-alpha confidence interval for the sum is indicated by a + -# If the distributions are symmetric -# the plot should be approximately a horizontal line. If in addition the median -# of the difference scores is zero, the horizontal line will intersect the y-axis at zero. -# -# Similar to difQplot, only plots fewer quantiles by default and returns p-values for -# each quantile indicated by the argument q. -# -# FWE is controlled via Hochberg's method, which was used to determine critical -# p-values based on the argument -# alpha. -# -# Can alter the quantiles compared via the argument -# q -# q must be less than .5 -# -# LINE=TRUE. When plotting, a line connecting the estimates will be included. -# -x=as.matrix(x) -if(is.null(y))dif=x -if(ncol(x)>2)stop("Should be at most two groups") -if(ncol(x)==2)dif=x[,1]-x[,2] -if(!is.null(y))dif=x-y -dif=elimna(dif) -nv=length(dif) -output=matrix(NA,ncol=8,nrow=length(q)) -dimnames(output)=list(NULL,c("quantile","Est_q","Est_1.minus.q","SUM","ci.low","ci.up","p_crit","p-value")) -for(i in 1:length(q)){ -test=Dqdif(dif,q=q[i],plotit=FALSE,nboot=nboot,SEED=SEED) -output[i,1]=q[i] -output[i,2]=test$est.q -output[i,3]=test$est.1.minus.q -output[i,8]=test$p.value -output[i,5]=test$conf.interval[1] -output[i,6]=test$conf.interval[2] -} -temp=order(output[,8],decreasing=TRUE) -zvec=alpha/c(1:length(q)) -output[temp,7]=zvec -output <- data.frame(output) -output$signif=rep("YES",nrow(output)) -for(i in 1:nrow(output)){ -if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]="NO" -if(output[temp[i],8]<=output[temp[i],7])break -} -output[,4]=output[,2]+output[,3] -if(plotit){ -plot(rep(q,3),c(output[,4],output[,5],output[,6]),type="n",xlab=xlab,ylab=ylab) -points(q,output[,6],pch="+") -points(q,output[,5],pch="+") -points(q,output[,4],pch="*") -if(LINE)lines(q,output[,4],pch="*") -} -list(n=nv,output=output) -} - -bsqrm<-function(x,y,alpha=0.05,bend=1.28){ -# -# Computes Bsqrm test statistic. This test statistic is from Ozdemir (2012) -# "mestse" was used as the standard error of one-step M-estimator and -# "mad" was used as a measure of scale. -# -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -zc<-qnorm(alpha/2) -x2<-(x-median(x))/mad(x) -y2<-(y-median(y))/mad(y) -C<-length(x[abs(x2)>bend]) -D<-length(y[abs(y2)>bend]) -e<-c(C,D) -alist<-list(x,y) -f<-(sapply(alist,length))-e -s=sapply(alist,mestse)^2 -wden=sum(1/s) -w=(1/s)/wden -yplus<-sum(w*(sapply(alist,onestep))) -tt<-((sapply(alist,onestep))-yplus)/sqrt(s) -v<-(f-1) -z<-((4*v^2)+(5*((2*(zc^2))+3)/24))/((4*v^2)+v+(((4*(zc^2))+9)/12))*sqrt(v)*(sqrt(log(1+(tt^2/v)))) -teststat<-sum(z^2) -list(teststat=teststat) -} - -bsqrmbt<-function(x,y,alpha=.05,bend=1.28,nboot=599,SEED=TRUE){ -# -# Goal: Test hypothesis that two independent groups have -# equal population M-measures of location. -# A bootstrap-t method is used. -# The method used was derived by F. Ozdemir -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -T<-bsqrm(x,y,alpha,bend)$teststat -TT<-0 -bsqrmbt<-numeric(2) -xone<-onestep(x,bend=bend) -yone<-onestep(y,bend=bend) -for(j in 1:nboot) - { - xx<-(sample(x,length(x),replace=TRUE)-xone) - yy<-(sample(y,length(y),replace=TRUE)-yone) - TT[j]<-bsqrm(xx,yy,alpha,bend)$teststat - } -TT<-sort(TT) -bott<-round(alpha*nboot)+1 -bsqrmbt<-TT[nboot-bott] -pv=mean(T<=TT) -list(critval=bsqrmbt,teststat=TRUE,p.value=pv) -} - -M2gbt=bsqrmbt - -qregplots<-function(x, y,qval=.5,q=NULL,op=1,pr=FALSE,xout=FALSE,outfun=out,plotit=FALSE,xlab="X",ylab="Y",...){ -# -# Compute the quantile regression line for one or more quantiles and plot the results -# That is, determine the qth (qval) quantile of Y given X using the -# the Koenker-Bassett approach. -# -# One predictor only is allowed -# -# v2=T, uses the function rq in the R library quantreg -# v2=F, uses an older and slower version -# -# Example: qregplots(x,y,q=c(.25,.5,.75)) will plot the regression lines for -# predicting quartiles. -# -if(!is.null(q))qval=q -x<-as.matrix(x) -if(ncol(x)!=1)stop("Current version allows only one predictor") -X<-cbind(x,y) -X<-elimna(X) -np<-ncol(X) -p<-np-1 -x<-X[,1:p] -x<-as.matrix(x) -y<-X[,np] -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -est=matrix(NA,ncol=3,nrow=length(qval)) -dimnames(est)=list(NULL,c("q","Inter","Slope")) -library(quantreg) -x<-as.matrix(x) -plot(x,y,xlab=xlab,ylab=ylab) -if(ncol(x)!=1)stop("Current version allows only one predictor") -for(j in 1:length(qval)){ -coef=coefficients((rq(y~x,tau=qval[j]))) -est[j,1]=qval[j] -est[j,2:3]=coef -abline(coef) -} -list(coef = est) -} -acbinomciv2<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05,nullval=.5){ -# Compute a p-value when testing the hypothesis that the probability of -# success for a binomial distribution is equal to -# nullval, which defaults to .5 -# The Agresti-Coull method is used. -# -# y is a vector of 1s and 0s. -# Or can use the argument -# x = the number of successes observed among -# n=nn trials. -# -res=acbinomci(x=x,nn=nn,y=y,alpha=alpha) -ci=res$ci -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-acbinomci(x=x,nn=nn,y=y,alpha=alph[i])$ci -if(chkit[1]>nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]1)stop("With more than one predictor, use logSM") -xy=elimna(cbind(x,y)) -x=xy[,1:ncol(x)] -y=xy[,ncol(xy)] -x<-as.vector(x) -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag] -y<-y[flag] -} -if(STAND)x<-(x-median(x))/mad(x) -m1<-outer(x,x,"-")^2 -m2<-exp(-1*m1)*(sqrt(m1)<=fr) -m3<-matrix(y,length(y),length(y))*m2 -yhat<-apply(m3,2,sum)/apply(m2,2,sum) #sum over rows for each column -if(plotit){ -xor<-order(x) -plot(x,y,xlab=xlab,ylab=ylab) -if(!LP)lines(x[xor],yhat[xor]) -if(LP){ -Yhat=lplot(x[xor],yhat[xor],pyhat=TRUE,plotit=FALSE)$yhat.values -lines(x[xor],Yhat) -} -} -output<-"Done" -if(pyhat)output<-yhat -list(output=output) -} - -coefalpha<-function(x){ -library(psych) -x=elimna(x) -res=alpha(x) -res -} - - -z.power<-function(n,alpha=.05,del=NULL,var=NULL){ - q=qnorm(1-alpha/2) - sig=sqrt(var) - p1=pnorm(0-q-(sqrt(n)*del)/sig) - p2=1-pnorm(q-(sqrt(n)*del)/sig) - p=p1+p2 - list(power=p) - } - -hdpb<-function(x,est=hd,alpha=.05,nboot=2000,SEED=TRUE,nv=0,...){ -# -# Compute a bootstrap, .95 confidence interval for the -# measure of location corresponding to the argument est. -# By default, the Harrell-Davis estimator is used -# -# The default number of bootstrap samples is nboot=2000 -# -# The parameter q determines the quantile estimated via the function hd -# This function is the same as onesampb, only for convenience it defaults -# to using an estimate of the median. -# -# nv=null value when computing a p-value -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print("Taking bootstrap samples. Please wait.") -x=elimna(x) -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,est,...) -bvec<-sort(bvec) -low<-round((alpha/2)*nboot) -up<-nboot-low -low<-low+1 -pv=mean(bvec>nv)+.5*mean(bvec==nv) -pv=2*min(c(pv,1-pv)) -estimate=est(x,...) -list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv) -} - -vecnorm<-function(x, p=2) sum(x^p)^(1/p) - -regYvar<-function(x,y,regfun=tsreg,pts=x,nboot=100,xout=FALSE,outfun=out,SEED=TRUE,...){ -# -# Estimate standard error of predicted value of Y using regression estimator -# corresponding to the points in the argument -# pts -# A bootstrap estimate is used -# nboot=100 indicates that 100 bootstrap samples will be used. -# regfun indicates the regression estimator that will be used. -# Theil--Sen is used by default. -# -xy=elimna(cbind(x,y)) -x<-as.matrix(x) -p=ncol(x) -p1=p+1 -vals=NA -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -nv=length(y) -x<-as.matrix(x) -pts=as.matrix(pts) -nvpts=nrow(pts) -bvec=matrix(NA,nrow=nboot,ncol=nvpts) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec[ib,]=regYsub(x[data[ib,],],y[data[ib,]],xr=pts,p1=p1,regfun=regfun,...) -} -sqsd=apply(bvec,2,var) -sqsd -} - -regYsub<-function(x,y,xr,p1,regfun=tsreg,...){ -est=regfun(x,y,...)$coef -xr=as.matrix(xr) -yhat=est[1]+xr%*%est[2:p1] -yhat -} - -regYband<-function(x,y,regfun=tsreg,npts=NULL,nboot=100,xout=FALSE,outfun=outpro,SEED=TRUE, -alpha=.05,crit=NULL,xlab="X",ylab="Y",SCAT=TRUE,ADJ=TRUE,pr=TRUE,nreps=1000, -MC=FALSE,pch='.',...){ -# -# Plot confidence band for the predicted Y value -# if ADJ=FALSE, plot confidence intervals for -# npts points between min(x) and max(x) -# if npts=NULL, then npts=20 is used. -# if ADJ=TRUE, plot confidence band for the predicted Y value for all x values such that -# the simultaneous probability coverage is .95. -# -# npts=NULL and ADJ=FALSE, npts will be set equal to 20. That is, computed confidence -# intervals for 20 point covariate values even space between min(x) and max(x). -# -# -if(!ADJ){ -if(is.null(npts))npts=20 -if(pr)print('To adjust the confidence band so that the simultaneous probability coverage is .95, set ADJ=TRUE') -} -xy=elimna(cbind(x,y)) -x<-as.matrix(x) -p=ncol(x) -if(p!=1)stop("This function assumes a single predictor only") -p1=p+1 -vals=NA -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -if(SEED)set.seed(2) -if(!ADJ)pts=seq(min(x),max(x),length.out=npts) -if(ADJ)pts=sort(unique(x)) -res=regYci(x,y,pts=pts,regfun=regfun,xout=FALSE,SEED=SEED,alpha=alpha,ADJ=ADJ,nreps=nreps,MC=MC,...) -plot(c(x,pts,pts),c(y,res[,2],res[,3]),xlab=xlab,ylab=ylab,type="n") -abline(regfun(x,y,...)$coef) -if(SCAT)points(x,y,pch=pch) -lines(pts,res[,3],lty=2) -lines(pts,res[,4],lty=2) -res -} - -ols.pred.ci<-function(x,y,xlab="X",ylab="Y",alpha=.05,xout=FALSE,RETURN=FALSE,newx=NULL){ -# -# plot the ols regression line and a 1-alpha -# confidence interval for the predicted values -# -# RETURN=T means the function will return predicted values and -# and confidence interval for the x values indicated by the argument -# newx -# newx=NULL, means predicted Y will be for seq(min(x), max(x), 0.1) -# -# xout=T removes leverage points. -# -if(ncol(as.matrix(x))!=1)stop("One predictor is allowed") -xy=elimna(cbind(x,y)) -x=xy[,1] -y=xy[,2] -if(xout){ -flag=out(x)$keep -x=x[flag] -y=y[flag] -} -tmp.lm=lm(y~x) -if(is.null(newx))newx=seq(min(x), max(x), 0.1) -a=predict(tmp.lm,interval="confidence",level=1-alpha,newdata=data.frame(x=newx)) -plot(x,y,xlab=xlab,ylab=ylab) -abline(ols(x,y,plotit=FALSE)$coef) -lines(newx,a[,2],lty=2) -lines(newx,a[,3],lty=2) -res=NULL -if(RETURN)res=a -res -} - -regYhat<-function(x,y,xr=x,regfun=tsreg,xout=FALSE,outfun=outpro,pr=FALSE,plot.pts=FALSE,pts=NULL,...){ -# -# For convenience, return estimate of Y based on data in xr (or pts) using -# regression line based on regfun -# -xy=elimna(cbind(x,y)) -x<-as.matrix(x) -xr=as.matrix(xr) -p=ncol(x) -p1=p+1 -vals=NA -x<-xy[,1:p] -y<-xy[,p1] -#print(xr[1:10,]) -if(xout){ -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -if(!is.null(pts[1]))xr=pts -xr=as.matrix(xr) -est=regfun(x,y,...)$coef -if(ncol(xr)!=p)xr=t(xr) # for a single point, need to transpose. -yhat=est[1]+xr%*%est[2:p1] -if(plot.pts)points(xr,yhat) -yhat -} - -reg.pred<-regYhat - -reg1way<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,AD=FALSE,alpha=.05,pr=TRUE,...){ -# -# Test hypothesis that for two or more independent groups, all regression parameters -# (the intercepts and slopes) are equal -# By default the Theil--Sen estimator is used -# -# Strategy: Use bootstrap estimate of standard errors followed by -# Johansen MANOVA type test statistic. -# -# x and y are assumed to have list mode having length J equal to the number of groups -# For example, x[[1]] and y[[1]] contain the data for group 1. -# -# xout=T will eliminate leverage points using the function outfun, which defaults to the MVE method. -# -# OUTPUT: -# n is sample size after missing values are removed -# nv.keep is sample size after leverage points are removed. -# -if(pr){ -if(!xout)print("Might want to consider xout=T to remove leverage points") -} -if(SEED)set.seed(2) -if(!is.list(x))stop("Argument x should have list mode") -J=length(x) # number of groups -x=lapply(x,as.matrix) -pchk=lapply(x,ncol) -temp=matl(pchk) -if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared") -nv=NULL -p=ncol(x[[1]]) -p1=p+1 -for(j in 1:J){ -xy=elimna(cbind(x[[j]],y[[j]])) -x[[j]]=xy[,1:p] -y[[j]]=xy[,p1] -x[[j]]=as.matrix(x[[j]]) -nv=c(nv,nrow(x[[j]])) -} -nv.keep=nv -if(xout){ -temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) -for(j in 1:J){ -x[[j]]=x[[j]][temp[[j]]$keep,] -y[[j]]=y[[j]][temp[[j]]$keep] -}} -x=lapply(x,as.matrix) -K=p1 -est=matrix(NA,nrow=J,ncol=p1) -grpnum=NULL -for(j in 1:J)grpnum[j]=paste("Group",j) -vlabs="Intercept" -for(j in 2:p1)vlabs[j]=paste("Slope",j-1) -dimnames(est)=list(grpnum,vlabs) -ecov=list() -ecovinv=list() -W=rep(0,p1) -gmean=rep(0,p1) -for(j in 1:J){ -est[j,]=regfun(x[[j]],y[[j]],xout=FALSE,...)$coef -nv.keep[j]=nrow(x[[j]]) -vals=matrix(NA,nrow=nboot,ncol=p1) -data<-matrix(sample(length(y[[j]]),size=length(y[[j]])*nboot,replace=TRUE),ncol=nboot) -data=listm(data) -bvec<-lapply(data,regbootMC,x[[j]],y[[j]],regfun,...) -# bvec is a p+1 by nboot matrix. -vals=t(matl(bvec)) -ecov[[j]]=var(vals) -ecovinv[[j]]=solve(ecov[[j]]) #W_j -gmean=gmean+ecovinv[[j]]%*%est[j,] -W=W+ecovinv[[j]] -} -estall=solve(W)%*%gmean -F=0 -for(k in 1:K){ -for(m in 1:K){ -for(j in 1:J){ -F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) -}}} -pvalad=NULL -# if xout=F or AD=T, compute corrected critical value, stemming from Johansen -df=K*(J-1) -if(!xout || AD){ -iden=diag(p1) -Aw=0 -for(j in 1:J){ -temp=iden-solve(W)%*%ecovinv[[j]] -tempsq=temp%*%temp -Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) -} -Aw=Aw/2 -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -crit=qchisq(alval[i],df) -critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -if(F2)stop("Only one covariate is allowed. Use ancJNmp") -x1=xy[,1] -y1=xy[,2] -nv1=length(y1) -xy=elimna(cbind(x2,y2)) -if(ncol(xy)>2)stop("Only one covariate is allowed. Use ancJNmp") -x2=xy[,1] -y2=xy[,2] -nv2=length(y2) -if(xout){ -m<-cbind(x1,y1) -p1=ncol(m) -p=p1-1 -if(identical(outfun,outblp))flag=outblp(x1,y1,plotit=FALSE)$keep -else -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -if(identical(outfun,outblp))flag=outblp(x2,y2,plotit=FALSE)$keep -else -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -if(is.null(pts[1])){ -xall=unique(c(x1,x2)) -pts=seq(min(xall),max(xall),length.out=Npts) -if(Dpts){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=x1[isub] -} -mat<-matrix(NA,5,10) -dimnames(mat)<-list(NULL,c("X","Est1","Est2","DIF","TEST","se","ci.low","ci.hi","p.value",'adj.p.values')) -mat[,1]=pts -sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -est1=regYhat(x1,y1,xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) -est2=regYhat(x2,y2,xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd1+sqsd2) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -pv=2*(1-pnorm(abs(tests))) -mat[,9]=pv -crit<-smmcrit(Inf,5) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -if(!is.null(FLAG)){ -n1=NA -n2=NA -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),10) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value','p.adjust')) -mat[,1]<-pts -sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) -est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd1+sqsd2) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -pv=2*(1-pnorm(abs(tests))) -mat[,9]=pv -crit<-smmcrit(Inf,length(pts)) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -reg1=regfun(x1,y1,...)$coef -reg2=regfun(x2,y2,...)$coef -if(plotit){ -if(xout){ -if(identical(outfun,outblp))flag=outblp(x1,y1,plotit=FALSE)$keep -else -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -if(identical(outfun,outblp))flag=outblp(x2,y2,plotit=FALSE)$keep -else -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab) -if(SCAT){ -points(x1,y1,pch=pch1) -points(x2,y2,pch=pch2) -} -abline(reg1) -abline(reg2,lty=2) -} -mat[,10]=p.adjust(mat[,9],method='hoch') -list(n=c(nv1,nv2),intercept.slope.group1=reg1,intercept.slope.group2=reg2,output=mat) -} - -ancJN<-function(x1,y1,x2,y2,pts=NULL,Npts=5,Dpts=FALSE,regfun=tsreg,fr1=1,fr2=1,SCAT=TRUE,pch1='*',pch2='+', -alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',...){ -# -# Compare the regression lines of two independent groups at specified design points -# using a robust regression estimator. -# By default, use the Theil--Sen estimator -# -# Assume data are in x1 y1 x2 and y2 -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -# Dpts=FALSE: Five covariate points are chosen uniformly space between the smallest and largest -# values observed. -# Dpts=TRUE: Five covariate points are chosen in the same manner as done by the function ancova -# -# Npts: number of points used -# -if(identical(outfun,boxplot))stop('Use outfun=outbox') -if(SEED)set.seed(2) -FLAG=pts -xy=elimna(cbind(x1,y1)) -if(ncol(xy)>2)stop('Only one covariate is allowed. Use ancJNmp') -x1=xy[,1] -y1=xy[,2] -nv1=length(y1) -xy=elimna(cbind(x2,y2)) -if(ncol(xy)>2)stop('Only one covariate is allowed. Use ancJNmp') -x2=xy[,1] -y2=xy[,2] -nv2=length(y2) -if(xout){ -m<-cbind(x1,y1) -p1=ncol(m) -p=p1-1 -flag<-outfun(x1,plotit=FALSE,...)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE,...)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -if(is.null(pts[1])){ -xall=unique(c(x1,x2)) -pts=seq(min(xall),max(xall),length.out=Npts) -if(Dpts){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=x1[isub] -} -npts=length(pts) -mat<-matrix(NA,npts,10) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value','adj.p.values')) -mat[,1]=pts -sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -est1=regYhat(x1,y1,xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) -est2=regYhat(x2,y2,xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd1+sqsd2) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -pv=2*(1-pnorm(abs(tests))) -mat[,9]=pv -crit<-smmcrit(Inf,5) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -if(!is.null(FLAG)){ -n1=NA -n2=NA -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),10) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value','p.adjust')) -mat[,1]<-pts -sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) -est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd1+sqsd2) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -pv=2*(1-pnorm(abs(tests))) -mat[,9]=pv -crit<-smmcrit(Inf,length(pts)) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -reg1=regfun(x1,y1,...)$coef -reg2=regfun(x2,y2,...)$coef -if(plotit){ -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) -if(SCAT){ -points(x1,y1,pch=pch1) -points(x2,y2,pch=pch2) -} -abline(reg1) -abline(reg2,lty=2) -} -mat[,10]=p.adjust(mat[,9],method='hoch') -list(n=c(nv1,nv2),intercept.slope.group1=reg1,intercept.slope.group2=reg2,output=mat) -} - -block.diag<-function(mat){ -# -# mat is assumed to have list mode with -# mat[[1]]...mat[[p]] each having n-by-n matrices -# -# Create a np-by-np block diagonal matrix -# -# So p is the number of blocks -# -if(!is.list(mat))stop("mat should have list mode") -np<-length(mat)*ncol(mat[[1]]) -m<-matrix(0,np,np) -n=nrow(mat[[1]]) -p=length(mat) -ilow<-1-n -iup<-0 -for(i in 1:p){ -ilow<-ilow+n -iup<-iup+n -m[ilow:iup,ilow:iup]<-mat[[i]] -} -m -} - -reg1wayMC<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro, -STAND=TRUE,alpha=.05,pr=TRUE,AD=FALSE,...){ -# -# Test hypothesis that for two or more independent groups, all regression parameters are equal -# By default the Theil--Sen estimator is used -# -# Strategy: Use bootstrap estimate of standard errors followed by -# Johansen MANOVA type test statistic -# -# x and y are assumed to have list mode having length J equal to the number of groups -# For example, x[[1]] and y[[1]] contain the data for group 1. -# -# xout=T will eliminate leverage points using the function outfun -# -# OUTPUT: -# n is sample size after missing values are removed -# nv.keep is sample size after leverage points are removed. -# -library(parallel) -if(pr){ -if(!xout)print("Might want to consider xout=T to remove leverage points") -} -if(SEED)set.seed(2) -if(!is.list(x))stop("Argument x should have list mode") -if(pr){ -if(xout)print("xout=T, so an adjusted critical is not computed and apparently not needed") -} -J=length(x) # number of groups -x=lapply(x,as.matrix) -pchk=lapply(x,ncol) -temp=matl(pchk) -if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared") -nv=NULL -nv.keep=NULL -nv.all=NULL -p=ncol(x[[1]]) -p1=p+1 -for(j in 1:J){ -xy=elimna(cbind(x[[j]],y[[j]])) -x[[j]]=xy[,1:p] -y[[j]]=xy[,p1] -x[[j]]=as.matrix(x[[j]]) -nv.all[j]=c(nv,nrow(x[[j]])) -} -if(xout){ -temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) -for(j in 1:J){ -x[[j]]=x[[j]][temp[[j]]$keep,] -y[[j]]=y[[j]][temp[[j]]$keep] -}} -x=lapply(x,as.matrix) -p1=ncol(x[[1]])+1 -K=p1 -est=matrix(NA,nrow=J,ncol=p1) -hlabs=NULL -vlabs="Intercept" -for(j in 1:J)hlabs[j]=paste("Group",j) -for(j in 2:p1)vlabs[j]=paste("Slope",j-1) -dimnames(est)<-list(hlabs,vlabs) -nv=NA -ecov=list() -ecovinv=list() -W=rep(0,p1) -gmean=rep(0,p1) -for(j in 1:J){ -est[j,]=regfun(x[[j]],y[[j]])$coef -nv.keep[j]=nrow(x[[j]]) -nv[j]=nv.keep[j] -vals=matrix(NA,nrow=nboot,ncol=p1) -data<-matrix(sample(nv[j],size=nv[j]*nboot,replace=TRUE),ncol=nboot) -data=listm(data) -bvec<-mclapply(data,regbootMC,x[[j]],y[[j]],regfun,...) -vals=t(matl(bvec)) -ecov[[j]]=var(vals) -ecovinv[[j]]=solve(ecov[[j]]) #W_j -gmean=gmean+ecovinv[[j]]%*%est[j,] -W=W+ecovinv[[j]] -} -estall=solve(W)%*%gmean -F=0 -for(k in 1:K){ -for(m in 1:K){ -for(j in 1:J){ -F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) -}}} -df=K*(J-1) -pvalad=NULL -# if xout=F, compute corrected critical value, stemming from Johansen -df=K*(J-1) -if(!xout || AD){ -iden=diag(p1) -Aw=0 -for(j in 1:J){ -temp=iden-solve(W)%*%ecovinv[[j]] -tempsq=temp%*%temp -Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) -} -Aw=Aw/2 -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -crit=qchisq(alval[i],df) -critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -if(F1){ -if(ntests<=28){ -if(alpha==.05)crit<-smmcrit(Inf,ntests) -if(alpha==.01)crit<-smmcrit01(Inf,ntests) -} -if(ntests>28)crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha) -if(is.null(crit))crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha) -} -mat[,6]=est-crit*sd -mat[,7]=est+crit*sd -list(n=nv,points=pts,output=mat) -} - -ancpar<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab="X",ylab="Y",...){ -# -# Compare the regression lines of two independent groups at specified design points. -# By default, use the Theil--Sen estimator -# -# Assume data are in x1 y1 x2 and y2 -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# For p>1 predictors, pts should be a matrix with p columns -# -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop("x1 and x2 have different number of columns") -if(ncol(x1)==1)output=ancts(x1,y1,x2,y2,pts=pts,regfun=regfun,fr1=fr1,fr2=fr2,alpha=alpha, -plotit=plotit,xout=xout,outfun=outfun,nboot=nboot,SEED=SEED,xlab=xlab,ylab=ylab,...) -if(ncol(x1)>1)output=anctsmp(x1,y1,x2,y2,regfun=regfun,alpha=alpha,pts=pts,SEED=SEED,xout=xout,outfun=outfun,nboot=nboot,...) -output -} - - - ols.coef<-function(x,y,xout=FALSE){ - # In some cases, want the OLS estimate returned in $coef - res=ols(x,y,xout=xout)$coef[,1] - list(coef=res) - } - - -reg2ciMC<-function(x,y,x1,y1,regfun=tsreg,nboot=599,alpha=.05,plotit=TRUE,SEED=TRUE, -xout=FALSE,outfun=outpro,pr=FALSE,xlab='X',ylab='Y',...){ -# -# Compute a .95 confidence interval for the difference between the -# the intercepts and slopes corresponding to two independent groups. -# The default regression method is Theil-Sen. -# -# Same as reg2ci, only takes advantage of a multi-core processor -# -# The predictor values for the first group are -# assumed to be in the n by p matrix x. -# The predictors for the second group are in x1 -# -# The default number of bootstrap samples is nboot=599 -# -# regfun can be any R function that returns the coefficients in -# the vector regfun$coef, the first element of which contains the -# estimated intercept, the second element contains the estimate of -# the first predictor, etc. -# -library(parallel) -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -x1<-as.matrix(x1) -xx1<-cbind(x1,y1) -xx1<-elimna(xx1) -x1<-xx1[,1:ncol(x1)] -x1<-as.matrix(x1) -y1<-xx1[,ncol(x1)+1] -x=as.matrix(x) -x1=as.matrix(x1) -if(xout){ -if(identical(outfun,outblp)){ -flag1=outblp(x,y,plotit=FALSE)$keep -flag2=outblp(x1,y2,plotit=FALSE)$keep -} -if(!identical(outfun,outblp)){ -flag1=outfun(x,plotit=FALSE)$keep -flag2=outfun(x1,plotit=FALSE)$keep -} -x=x[flag1,] -y=y[flag1] -x1=x1[flag2,] -y1=y1[flag2] -} -n=length(y) -n[2]=length(y1) -x<-as.matrix(x) -x1<-as.matrix(x1) -est1=regfun(x,y)$coef -est2=regfun(x1,y1)$coef -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -bvec<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE,xout=FALSE,...) -bvec=matl(bvec) -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -bvec1<-mclapply(data,regbootMC,x1,y1,regfun,mc.preschedule=TRUE,xout=FALSE,...) -bvec1=matl(bvec1) -bvec<-bvec-bvec1 -p1<-ncol(x)+1 -regci<-matrix(0,p1,6) -dimnames(regci)<-list(NULL, -c("Parameter","ci.lower","ci.upper","p.value","Group 1","Group 2")) -ilow<-round((alpha/2)*nboot)+1 -ihi<-nboot-(ilow-1) -for(i in 1:p1){ -temp<-sum(bvec[i,]<0)/nboot+sum(bvec[i,]==0)/(2*nboot) -regci[i,4]<-2*min(temp,1-temp) -bsort<-sort(bvec[i,]) -regci[i,2]<-bsort[ilow] -regci[i,3]<-bsort[ihi] -regci[,1]<-c(0:ncol(x)) -} -regci[,5]=est1 -regci[,6]=est2 -if(ncol(x)==1 && plotit){ -plot(c(x,x1),c(y,y1),type="n",xlab=xlab,ylab=ylab) -points(x,y) -points(x1,y1,pch="+") -abline(regfun(x,y,...)$coef) -abline(regfun(x1,y1,...)$coef,lty=2) -} -list(n=n,output=regci) -} - -reg2difplot<-function(x1,y1,x2,y2,regfun=tsreg,pts=x1,xlab="VAR 1",ylab="VAR 2",zlab="Group 2 minus Group 1",xout=FALSE,outfun=out,ALL=TRUE,pts.out=FALSE,SCAT=FALSE,theta=50,phi=25,ticktype='simple', -pr=TRUE,...){ -# -# Fit a regression model to both groups assuming have two predictors. -# Get predicted Y values based on points in pts. By default, use -# pts=x1 -# -# x1 a matrix containing two predictors -# x2 a matrix containing two predictors -# -# Compute differences in predicted values and plot the results as a function of the points in pts -# pts=x1 by default. -# ALL=T, pts is taken to be all points in x1 and x2. -# -# pts.out=T will remove leverage points from pts. -# -if(!is.matrix(x1))stop("x1 should be a matrix") -if(!is.matrix(x2))stop("x2 should be a matrix") -if(!is.matrix(pts))stop("pts should be a matrix") -if(ncol(x1)!=2)stop("x1 should be a matrix with two columns") -if(ncol(x2)!=2)stop("x2 should be a matrix with two columns") -if(ncol(pts)!=2)stop("pts should be a matrix with two columns") -if(ALL)pts=rbind(x1,x2) -if(pts.out){ -flag=outfun(pts,plotit=FALSE,...)$keep -pts=pts[flag,] -} -e1=regYhat(x1,y1,xout=xout,regfun=regfun,outfun=outfun,xr=pts,...) -e2=regYhat(x2,y2,xout=xout,regfun=regfun,outfun=outfun,xr=pts,...) -if(SCAT){ -library(scatterplot3d) -scatterplot3d(cbind(pts,e2-e1),xlab=xlab,ylab=ylab,zlab=zlab) -} -if(!SCAT)rplot(pts,e2-e1,xlab=xlab,ylab=ylab,zlab=zlab,theta=theta,phi=phi,pr=FALSE,ticktype=ticktype,prm=FALSE) -} - -cbmhd<-function(x,y,qest=hd,alpha=.05,q=.25,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab="",nboot=600,SEED=TRUE){ -# -# Compute a confidence interval for the sum of the qth and (1-q)th quantiles -# of the distribution of D=X-Y, where X and Y are two independent random variables. -# The Harrell-Davis estimator is used -# If the distribution of X and Y are identical, then in particular the -# distribution of D=X-Y is symmetric about zero. -# -# plotit=TRUE causes a plot of the difference scores to be created -# pop=0 adaptive kernel density estimate -# pop=1 results in the expected frequency curve. -# pop=2 kernel density estimate (Rosenblatt's shifted histogram) -# pop=3 boxplot -# pop=4 stem-and-leaf -# pop=5 histogram -# -if(SEED)set.seed(2) -if(q>=.5)stop("q should be less than .5") -if(q<=0)stop("q should be greater than 0") -x<-x[!is.na(x)] -y<-y[!is.na(y)] -n1=length(x) -n2=length(y) -m<-outer(x,y,FUN="-") -q2=1-q -est1=qest(m,q) -est2=qest(m,q2) -data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot) -bvec=NA -for(i in 1:nboot){ -mb=outer(x[data1[i,]],y[data2[i,]],"-") -bvec[i]=qest(mb,q)+qest(mb,q2) -} -p=mean(bvec>0)+.5*mean(bvec==0) -p=2*min(c(p,1-p)) -sbv=sort(bvec) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=sbv[ilow] -ci[2]=sbv[ihi] -if(plotit){ -if(pop==1 || pop==0){ -if(length(x)*length(y)>2500){ -print("Product of sample sizes exceeds 2500.") -print("Execution time might be high when using pop=0 or 1") -print("If this is case, might consider changing the argument pop") -print("pop=2 might be better") -}} -MM=as.vector(m) -if(pop==0)akerd(MM,xlab=xlab,ylab=ylab) -if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab) -if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab) -if(pop==3)boxplot(MM) -if(pop==4)stem(MM) -if(pop==5)hist(MM,xlab=xlab) -if(pop==6)skerd(MM) -} -list(q=q,Est1=est1,Est2=est2,sum=est1+est2,ci=ci,p.value=p) -} - -reg1wayISO<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=.05,pr=TRUE,...){ -# -# Test hypothesis that for two or more independent groups, all slope parameters -# are equal. -# By default the Theil--Sen estimator is used -# -# Strategy: Use bootstrap estimate of standard errors followed by -# Johansen MANOVA type test statistic. -# -# x and y are assumed to have list mode having length J equal to the number of groups -# For example, x[[1]] and y[[1]] contain the data for group 1. -# -# xout=T will eliminate leverage points using the function outfun -# -if(SEED)set.seed(2) -if(pr){ -if(!xout)print("Might want to consider xout=T to remove leverage points") -} -if(!is.list(x))stop("Argument x should have list mode") -J=length(x) # number of groups -x=lapply(x,as.matrix) -pchk=lapply(x,ncol) -temp=matl(pchk) -if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared") -nv=NULL -p=ncol(x[[1]]) -p1=p+1 -for(j in 1:J){ -xy=elimna(cbind(x[[j]],y[[j]])) -x[[j]]=xy[,1:p] -y[[j]]=xy[,p1] -x[[j]]=as.matrix(x[[j]]) -nv=c(nv,nrow(x[[j]])) -} -nv.keep=nv -if(xout){ -temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) -for(j in 1:J){ -x[[j]]=x[[j]][temp[[j]]$keep,] -y[[j]]=y[[j]][temp[[j]]$keep] -}} -x=lapply(x,as.matrix) -K=p1 -est=matrix(NA,nrow=J,ncol=p1) -nv.keep=NULL -ecov=list() -ecovinv=list() -W=rep(0,p1) -gmean=rep(0,p) -for(j in 1:J){ -est[j,]=regfun(x[[j]],y[[j]],xout=FALSE,...)$coef -nv.keep[j]=nrow(x[[j]]) -vals=matrix(NA,nrow=nboot,ncol=p1) -data<-matrix(sample(length(y[[j]]),size=length(y[[j]])*nboot,replace=TRUE),ncol=nboot) -data=listm(data) -bvec<-lapply(data,regbootMC,x[[j]],y[[j]],regfun,...) -# bvec is a p+1 by nboot matrix. -vals=t(matl(bvec)) -ecov[[j]]=var(vals) -ecovinv[[j]]=solve(ecov[[j]]) #W_j -gmean=gmean+ecovinv[[j]][2:K,2:K]%*%est[j,2:K] -W=W+ecovinv[[j]] -} -estall=solve(W[2:K,2:K])%*%gmean -estall=c(0,estall) -F=0 -for(k in 2:K){ -for(m in 2:K){ -for(j in 1:J){ -F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) -}}} -df=p*(J-1) -pvalad=NULL -AD=FALSE # Seems adjusted critical is not needed -if(AD){ -iden=diag(p1) -Aw=0 -for(j in 1:J){ -temp=iden-solve(W)%*%ecovinv[[j]] -tempsq=temp%*%temp -Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) -} -Aw=Aw/2 -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -crit=qchisq(alval[i],df) -critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -if(F2)stop("Should be at most two groups") -if(ncol(x)==2)dif=x[,1]-x[,2] -if(!is.null(y))dif=x-y -dif=elimna(dif) -dif=as.matrix(dif) -nv=length(dif) -output=matrix(NA,ncol=8,nrow=length(q)) -dimnames(output)=list(NULL,c('quantile','Est_q','Est_1.minus.q','SUM','ci.low','ci.up','p_crit','p-value')) -for(i in 1:length(q)){ -test=DqdifMC(dif,q=q[i],plotit=FALSE,nboot=nboot,SEED=SEED) -output[i,1]=q[i] -output[i,2]=test$est.q -output[i,3]=test$est.1.minus.q -output[i,8]=test$p.value -output[i,5]=test$conf.interval[1] -output[i,6]=test$conf.interval[2] -} -temp=order(output[,8],decreasing=TRUE) -zvec=alpha/c(1:length(q)) -output[temp,7]=zvec -output <- data.frame(output) -output$signif=rep('YES',nrow(output)) -for(i in 1:nrow(output)){ -if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]='NO' -if(output[temp[i],8]<=output[temp[i],7])break -} -output[,4]=output[,2]+output[,3] -if(plotit){ -plot(rep(q,3),c(output[,4],output[,5],output[,6]),type='n',xlab=xlab,ylab=ylab) -points(q,output[,6],pch='+') -points(q,output[,5],pch='+') -points(q,output[,4],pch='*') -} -list(n=nv,output=output) -} -tsregF<-function(x,y,xout=FALSE,outfun=out,iter=10,varfun=pbvar, -corfun=pbcor,plotit=FALSE,tol=.0001,...){ -# -# Compute Theil-Sen regression estimator -# -# Use Gauss-Seidel algorithm -# when there is more than one predictor -# -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(ncol(x)==1){ -temp1<-tsp1reg(x,y) -coef<-temp1$coef -res<-temp1$res -} -if(ncol(x)>1){ -for(p in 1:ncol(x)){ -temp[p]<-tsp1reg(x[,p],y)$coef[2] -} -res<-y-x%*%temp -alpha<-median(res) -r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) -tempold<-temp -for(it in 1:iter){ -for(p in 1:ncol(x)){ -r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] -temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2] -} -if(max(abs(temp-tempold))0){ -e.pow<-varfun(yhat)/varfun(y) -if(!is.na(e.pow)){ -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -e.pow=as.numeric(e.pow) -stre=sqrt(e.pow) -}} -res=NULL -list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow) -} - - -outproMC<-function(m,gval=NA,center=NA,plotit=TRUE,op=TRUE,MM=FALSE,cop=3, -xlab="VAR 1",ylab="VAR 2",STAND=TRUE,tr=.2,q=.5,pr=TRUE,...){ -# -# same as function outpro, only it takes advantage of multiple core -# processors -# -# Detect outliers using a modification of the -# Stahel-Donoho projection method. -# -# Determine center of data cloud, for each point, -# connect it with center, project points onto this line -# and use distances between projected points to detect -# outliers. A boxplot method is used on the -# projected distances. -# -# plotit=T creates a scatterplot when working with -# bivariate data. -# -# op=T -# means the .5 depth contour is plotted -# based on data with outliers removed. -# -# op=F -# means .5 depth contour is plotted without removing outliers. -# -# MM=F Use interquatile range when checking for outliers -# MM=T uses MAD. -# -# If value for center is not specified, -# there are four options for computing the center of the -# cloud of points when computing projections: -# -# cop=2 uses MCD center -# cop=3 uses median of the marginal distributions. -# cop=4 uses MVE center -# cop=5 uses TBS -# cop=6 uses rmba (Olive's median ball algorithm)# cop=7 uses the spatial (L1) median -# -# args q and tr having are not used by this function. They are included to deal -# with situations where smoothers have optional arguments for q and tr -# -# STAND=T means that marginal distributions are standardized before -# checking for outliers - -# When using cop=2, 3 or 4, default critical value for outliers -# is square root of the .975 quantile of a -# chi-squared distribution with p degrees -# of freedom. -# -# Donoho-Gasko (Tukey) median is marked with a cross, +. -# -library(parallel) -library(MASS) -m<-as.matrix(m) -if(pr){ -if(!STAND){ -if(ncol(m)>1)print('STAND=FALSE. If measures are on different scales, might want to use STAND=TRUE') -}} -if(ncol(m)==1){ -dis<-(m-median(m))^2/mad(m)^2 -dis<-sqrt(dis) -crit<-sqrt(qchisq(.975,1)) -chk<-ifelse(dis>crit,1,0) -vec<-c(1:nrow(m)) -outid<-vec[chk==1] -keep<-vec[chk==0] -} -if(ncol(m)>1){ -if(STAND)m=standm(m,est=median,scat=mad) -if(is.na(gval) && cop==1)gval<-sqrt(qchisq(.95,ncol(m))) -if(is.na(gval) && cop!=1)gval<-sqrt(qchisq(.975,ncol(m))) -m<-elimna(m) # Remove missing values -if(cop==1 && is.na(center[1])){ -if(ncol(m)>2)center<-dmean(m,tr=.5,cop=1) -if(ncol(m)==2){ -tempd<-NA -for(i in 1:nrow(m)) -tempd[i]<-depth(m[i,1],m[i,2],m) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center<-m[flag,] -if(sum(flag)>1)center<-apply(m[flag,],2,mean) -}} -if(cop==2 && is.na(center[1])){ -center<-cov.mcd(m)$center -} -if(cop==4 && is.na(center[1])){ -center<-cov.mve(m)$center -} -if(cop==3 && is.na(center[1])){ -center<-apply(m,2,median) -} -if(cop==5 && is.na(center[1])){ -center<-tbs(m)$center -} -if(cop==6 && is.na(center[1])){ -center<-rmba(m)$center -} -if(cop==7 && is.na(center[1])){ -center<-spat(m) -} -flag<-rep(0, nrow(m)) -outid <- NA -vec <- c(1:nrow(m)) -cenmat=matrix(rep(center,nrow(m)),ncol=ncol(m),byrow=TRUE) -Amat=m-cenmat -B=listm(t(Amat)) # so rows are now in B[[1]]...B[[n]] -dis=mclapply(B,outproMC.sub,Amat) -flag=mclapply(dis,outproMC.sub2,MM,gval) -flag=matl(flag) -flag=apply(flag,1,max) -} -if(sum(flag) == 0) outid <- NA -if(sum(flag) > 0)flag<-(flag==1) -outid <- vec[flag] -idv<-c(1:nrow(m)) -keep<-idv[!flag] -if(ncol(m)==2){ -if(plotit){ -plot(m[,1],m[,2],type="n",xlab=xlab,ylab=ylab) -points(m[keep,1],m[keep,2],pch="*") -if(length(outid)>0)points(m[outid,1],m[outid,2],pch="o") -if(op){ -tempd<-NA -keep<-keep[!is.na(keep)] -mm<-m[keep,] -for(i in 1:nrow(mm))tempd[i]<-depth(mm[i,1],mm[i,2],mm) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center<-mm[flag,] -if(sum(flag)>1)center<-apply(mm[flag,],2,mean) -m<-mm -} -points(center[1],center[2],pch="+") -x<-m -temp<-fdepth(m,plotit=FALSE) -flag<-(temp>=median(temp)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -}} -list(out.id=outid,keep=keep) -} - - - -olsJ2<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro, -STAND=TRUE,plotit=TRUE,xlab="X",ylab="Y",ISO=FALSE,...){ -# -# Test hypothesis that for two independent groups, all regression parameters are equal -# Least squares regression is used. -# -# Strategy: Use HC4 estimate of standard errors followed by -# Johansen type test statistic. -# -# ISO=TRUE, test slopes, ignoring intercept. -# -x1=as.matrix(x1) -p=ncol(x1) -p1=p+1 -xy=elimna(cbind(x1,y1)) -x1=xy[,1:p] -y1=xy[,p1] -x2=as.matrix(x2) -p=ncol(x2) -p1=p+1 -xy=elimna(cbind(x2,y2)) -x2=xy[,1:p] -y2=xy[,p1] -if(plotit){ -xx1=x1 -yy1=y1 -xx2=x2 -yy2=y2 -if(ncol(as.matrix(x1))==1){ -if(xout){ -flag=outfun(xx1,plotit=FALSE,...)$keep -xx1=x1[flag] -yy1=y1[flag] -flag=outfun(xx2,plotit=FALSE,...)$keep -xx2=x2[flag] -yy2=y2[flag] -} -plot(c(xx1,xx2),c(yy1,yy2),type="n",xlab=xlab,ylab=ylab) -points(xx1,yy1) -points(xx2,yy2,pch="+") -abline(lsfit(xx1,yy1,...)$coef) -abline(lsfit(xx2,yy2,...)$coef,lty=2) -}} -x=list() -y=list() -x[[1]]=x1 -x[[2]]=x2 -y[[1]]=y1 -y[[2]]=y2 -if(!ISO)output=ols1way(x,y,xout=xout,outfun=outfun,STAND=STAND,...) -if(ISO)output=ols1wayISO(x,y,xout=xout,outfun=outfun,STAND=STAND,...) -output -} -ebarplot.med<-function(x,y=NULL,alpha=.05,nse=2, liw = uiw, aui=NULL, ali=aui, -err="y", tr=0,ylim=NULL, sfrac = 0.01, gap=0, add=FALSE, -col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab="Group", - ylab=NULL, ...) { -# plots error bars using the data in -# x, which is assumed to be a matrix with J columns (J groups) or -# x has list mode. -# nse indicates how many standard errors to use when plotting. -# -# Designed specifically for medians -# Uses distribution-free confidence intervals -# -# Missing values are automatically removed. -# -if(!is.null(y)){ -if(is.matrix(x))stop("When y is given, x should not be a matrix") -if(is.list(x))stop("When y is given, x should not be in list mode") -rem=x -x=list() -x[[1]]=rem -x[[2]]=y -} -if(is.matrix(x))x<-listm(x) -mval<-NA -if(!is.list(x) && is.null(y))stop("This function assumes there - are two or more groups") -aui=NA -ali=NA -for(j in 1:length(x)){ -mval[j]<-median(x[[j]],na.rm=TRUE) -temp=sint(x[[j]],alpha=alpha,pr=FALSE) -ali[j]=temp[1] -aui[j]=temp[2] -} - -plotCI(mval,y=NULL, liw = uiw, aui=aui, ali=ali, - err="y", ylim=NULL, sfrac = 0.01, gap=0, add=FALSE, - col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab=xlab, - ylab=ylab) -} -MULtsreg<-function(x,y,tr=.2,RMLTS=TRUE){ -# Multivariate Least Trimmed Squares Estimator -# Input: -# x: data-matrix (n,p) -# y: data-matrix (n,q) -# tr: proportion of trimming -# This function calls an R function written by Kristel Joossens -# -# Output: -# If MLTS=T coef: matrix (p,q) of MLTS-regression coefficients -# IF MLTS=F betaR : matrix (p,q) of RMLTS-regression coefficients -# -# Ref: Agullo,J., Croux, C., and Van Aelst, S. (2008) -# The Multivariate Least Trimmed Squares Estimator, -# Journal of multivariate analysis, 99, 311-338. -# -x=as.matrix(x) -xy=elimna(cbind(x,y)) -xx=as.matrix(cbind(rep(1,nrow(xy)),xy[,1:ncol(x)])) -p1=ncol(x)+1 -y=as.matrix(xy[,p1:ncol(xy)]) -outp=mlts(xx,y,tr) -if(!RMLTS)coef=outp$beta -if(RMLTS)coef=outp$betaR -list(coef=coef) -} -mlts<-function(x,y,gamma,ns=500,nc=10,delta=0.01) -{ - d <- dim(x); n <- d[1]; p <- d[2] - q <- ncol(y) - h <- floor(n*(1-gamma))+1 - obj0 <- 1e10 - for (i in 1:ns) - { sorted <- sort(runif(n),na.last = NA,index.return=TRUE) - istart <- sorted$ix[1:(p+q)] - xstart <- x[istart,] - ystart <- y[istart,] - bstart <- solve(t(xstart)%*%xstart,t(xstart)%*%ystart) - sigmastart <- (t(ystart-xstart%*%bstart))%*%(ystart-xstart%*%bstart)/q - for (j in 1:nc) - { res <- y - x %*% bstart - tres <- t(res) - dist2 <- colMeans(solve(sigmastart,tres)*tres) - sdist2 <- sort(dist2,na.last = NA,index.return = TRUE) - idist2 <- sdist2$ix[1:h] - xstart <- x[idist2,] - ystart <- y[idist2,] - bstart <- solve(t(xstart)%*%xstart,t(xstart)%*%ystart) - sigmastart <- (t(ystart-xstart%*%bstart))%*%(ystart-xstart%*%bstart)/(h-p) - } - obj <- det(sigmastart) - if (obj < obj0) - { result.beta <- bstart - result.sigma <- sigmastart - obj0 <- obj - } - } - cgamma <- (1-gamma)/pchisq(qchisq(1-gamma,q),q+2) - result.sigma <- cgamma * result.sigma - res <- y - x %*% result.beta - tres<-t(res) - result.dres <- colSums(solve(result.sigma,tres)*tres) - result.dres <- sqrt(result.dres) - - qdelta <- sqrt(qchisq(1-delta,q)) - good <- (result.dres <= qdelta) - xgood <- x[good,] - ygood <- y[good,] - result.betaR <- solve(t(xgood)%*%xgood,t(xgood)%*%ygood) - result.sigmaR <- (t(ygood-xgood%*%result.betaR)) %*% - (ygood-xgood%*%result.betaR)/(sum(good)-p) - cdelta <- (1-delta)/pchisq(qdelta^2,q+2) - result.sigmaR<-cdelta*result.sigmaR - resR<-y-x%*%result.betaR - tresR<-t(resR) - result.dresR <- colSums(solve(result.sigmaR,tresR)*tresR) - result.dresR <- sqrt(result.dresR) - list(beta=result.beta,sigma=result.sigma,dres=result.dres, - betaR=result.betaR,sigmaR=result.sigmaR,dresR=result.dresR) -} - -ancCR<-function(x1,y1,x2,y2){ -v=optim(0,JNH_sub1,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par -v[2]=optim(0,JNH_sub2,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par -a=min(v) -v=c(a,max(v)) -} - - -tsregNW<-function(x,y,xout=FALSE,outfun=out,iter=10,varfun=pbvar, -corfun=pbcor,plotit=FALSE,tol=.0001,...){ -# -# Compute Theil-Sen regression estimator -# -# Use Gauss-Seidel algorithm -# when there is more than one predictor -# -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(ncol(x)==1){ -temp1<-tsp1reg(x,y) -coef<-temp1$coef -res<-temp1$res -} -if(ncol(x)>1){ -for(p in 1:ncol(x)){ -temp[p]<-tsp1reg(x[,p],y)$coef[2] -} -res<-y-x%*%temp -alpha<-median(res) -r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) -tempold<-temp -for(it in 1:iter){ -for(p in 1:ncol(x)){ -r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] -temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2] -} -if(max(abs(temp-tempold))0){ -e.pow<-varfun(yhat)/varfun(y) -if(!is.na(e.pow)){ -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -e.pow=as.numeric(e.pow) -stre=sqrt(e.pow) -}} -res=NULL -list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow) -} - - -reg2cimcp<-function(x,y,regfun=tsreg,nboot=599,alpha=0.05, -SEED=TRUE,xout=FALSE,outfun=out,...){ -# -# Like reg2ci only x1 etc have list mode containing -# data for J>1 groups. For all pairs of groups are compared via a -# call to reg2ci. -# -# x list mode contain a matrix of predictors. -# x[[1]] contains predictors for first group -# y[[1]] dependent variable for first group. -# -# -if(!is.list(x))stop('x and y should have list mode') -J=length(x) # number of groups -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -res=reg2ci(x[[j]],y[[j]],x[[k]],y[[k]],regfun=regfun,nboot=nboot,alpha=alpha, -plotit=FALSE,xout=xout,outfun=outfun,WARN=FALSE,...) -print(paste('Group', j,'Group', k)) -print(res) -}}} -} - - -epowv2<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,varfun=pbvar,xout=FALSE,outfun=outpro,plotit=FALSE,...){ -# -# Estimate the explanatory correlation between x and y -# -# It is assumed that x is a vector or a matrix having one column only -xx<-elimna(cbind(x,y)) # Remove rows with missing values -p1=ncol(xx) -p=p1-1 -x<-xx[,1:p] -y<-xx[,p1] -x<-as.matrix(x) -if(xout){ -flag<-outfun(x,plotit=plotit,...)$keep -x=x[flag,] -y=y[flag] -} -coef<-regfun(x,y)$coef -yhat<-x %*% coef[2:p1] + coef[1] -stre=NULL -temp=varfun(y) -e.pow=NULL -if(temp>0)e.pow<-varfun(yhat)/temp -if(e.pow>1)e.pow=corfun(y,yhat)$cor^2 -list(Strength.Assoc=e.pow,Explanatory.Power=sqrt(e.pow)) -} -rmblo<-function(x,y){ -# -# Remove only bad leverage points and return the -# remaining data -# -x=as.matrix(x) -p=ncol(x) -p1=p+1 -xy=elimna(cbind(x,y)) -x=xy[,1:p] -y=xy[,p1] -temp1=reglev(x,y,plotit=FALSE) -ad1=c(temp1$levpoints,temp1$regout) -flag1=duplicated(ad1) -if(sum(flag1)>0){ -flag1=ad1[flag1] -x=as.matrix(x) -x1=x[-flag1,] -y1=y[-flag1] -xy=cbind(x1,y1) -} -list(x=xy[,1:p],y=xy[,p1]) -} - - - -ols1way<-function(x,y,xout=FALSE,outfun=outpro,STAND=TRUE, -alpha=.05,pr=TRUE,BLO=FALSE,HC3=FALSE,...){ -# -# Test hypothesis that for two or more independent groups, all regression parameters -# (the intercepts and slopes) are equal -# using OLS estimator. -# -# (To compare slopes only, use ols1way2g) -# -# Strategy: Use HC4 or HC3 estimate of standard errors followed by -# Johansen MANOVA type test statistic. -# -# x and y are assumed to have list mode having length J equal to the number of groups -# For example, x[[1]] and y[[1]] contain the data for group 1. -# -# xout=T will eliminate leverage points using the function outfun, -# which defaults to the MVE method. -# -# BLO=TRUE, only bad leverage points are removed. -# -# OUTPUT: -# n is sample size after missing values are removed -# nv.keep is sample size after leverage points are removed. -# -if(!is.list(x))stop('Argument x should have list mode') -J=length(x) # number of groups -x=lapply(x,as.matrix) -pchk=lapply(x,ncol) -temp=matl(pchk) -if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared') -nv=NULL -p=ncol(x[[1]]) -p1=p+1 -for(j in 1:J){ -xy=elimna(cbind(x[[j]],y[[j]])) -x[[j]]=xy[,1:p] -y[[j]]=xy[,p1] -x[[j]]=as.matrix(x[[j]]) -nv=c(nv,nrow(x[[j]])) -} -nv.keep=nv -critrad=NULL -if(BLO)xout=FALSE -if(xout){ -temp1=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) -for(j in 1:J){ -x[[j]]=x[[j]][temp1[[j]]$keep,] -y[[j]]=y[[j]][temp1[[j]]$keep] -}} -if(BLO){ -for(j in 1:J){ -temp1=reglev(x[[j]],y[[j]],plotit=FALSE) -ad1=c(temp1$levpoints,temp1$regout) -flag1=duplicated(ad1) -if(sum(flag1)>0){ -flag1=ad1[flag1] -x[[j]]=as.matrix(x[[j]]) -x[[j]]=x[[j]][-flag1,] -y[[j]]=y[[j]][-flag1] -}}} -x=lapply(x,as.matrix) -K=p1 -est=matrix(NA,nrow=J,ncol=p1) -grpnum=NULL -for(j in 1:J)grpnum[j]=paste("Group",j) -vlabs="Intercept" -for(j in 2:p1)vlabs[j]=paste("Slope",j-1) -dimnames(est)=list(grpnum,vlabs) -ecov=list() -ecovinv=list() -W=rep(0,p1) -gmean=rep(0,p1) -for(j in 1:J){ -est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef -nv.keep[j]=nrow(x[[j]]) -ecov[[j]]=olshc4(x[[j]],y[[j]],HC3=HC3)$cov -ecovinv[[j]]=solve(ecov[[j]]) #W_j -gmean=gmean+ecovinv[[j]]%*%est[j,] -W=W+ecovinv[[j]] -} -estall=solve(W)%*%gmean -F=0 -for(k in 1:K){ -for(m in 1:K){ -for(j in 1:J){ -F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) -}}} -pvalad=NULL -df=K*(J-1) -iden=diag(p1) -Aw=0 -for(j in 1:J){ -temp=iden-solve(W)%*%ecovinv[[j]] -tempsq=temp%*%temp -Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) -} -Aw=Aw/2 -crit=qchisq(alpha,df) -crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -crit=qchisq(alval[i],df) -critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -if(F0){ -flag1=ad1[flag1] -x[[j]]=as.matrix(x[[j]]) -x[[j]]=x[[j]][-flag1,] -y[[j]]=y[[j]][-flag1] -}}} -x=lapply(x,as.matrix) -K=p1 -est=matrix(NA,nrow=J,ncol=p1) -grpnum=NULL -for(j in 1:J)grpnum[j]=paste("Group",j) -vlabs="Intercept" -for(j in 2:p1)vlabs[j]=paste("Slope",j-1) -dimnames(est)=list(grpnum,vlabs) -ecov=list() -ecovinv=list() -W=rep(0,p1) -gmean=rep(0,p) -for(j in 1:J){ -est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef -nv.keep[j]=nrow(x[[j]]) -ecov[[j]]=olshc4(x[[j]],y[[j]])$cov -ecovinv[[j]]=solve(ecov[[j]]) #W_j -gmean=gmean+ecovinv[[j]][2:K,2:K]%*%est[j,2:K] -W=W+ecovinv[[j]] -} -estall=solve(W[2:K,2:K])%*%gmean -estall=c(0,estall) -F=0 -for(k in 2:K){ -for(m in 2:K){ -for(j in 1:J){ -F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) -}}} -pvalad=NULL -df=p*(J-1) -# Adjust critical value: -iden=diag(p) -Aw=0 -for(j in 1:J){ -temp=iden-solve(W[2:K,2:K])%*%ecovinv[[j]][2:K,2:K] -tempsq=temp%*%temp -Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) -} -Aw=Aw/2 -crit=qchisq(alpha,df) -crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -crit=qchisq(alval[i],df) -critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -if(F0){ -if(sig>alpha)sig=a$Estimates[2] -if(sig>alpha)sig=.95*alpha -}} -list(cor.ci=corci,p.value=sig,cor.est=est) -} - -scorsubMC<-function(isub,x,y,pr=FALSE,STAND=TRUE,corfun=corfun,cop=cop,CPP=FALSE,RAN=FALSE,...){ -isub=as.vector(isub) -if(!CPP)corbsub<-scor(x[isub],y[isub],plotit=FALSE,pr=FALSE,STAND=STAND,corfun=corfun,cop=cop, -SEED=FALSE,RAN=RAN,...)$cor -if(CPP)stop('Need to use RStudio with WRScpp installed and use the file WRSC++') -corbsub -} -normTmm<-function(x,SEED=TRUE,nboot=2000){ -# -# Test that the tails of the distribution of x -# have more outliers than expected under normality -# -if(SEED)set.seed(45) -no=out(x,SEED=FALSE)$n.out -val=NA -x=elimna(x) -n=length(x) -for(i in 1:nboot)val[i]=out(rnorm(n),SEED=FALSE)$n.out -list(n.out=no,p.value=mean(val>=no)) -} - -rplot<-function(x,y,est=tmean,scat=TRUE,fr=NA,plotit=TRUE,pyhat=FALSE,efr=.5, -theta=50,phi=25,scale=TRUE,expand=.5,SEED=TRUE,varfun=pbvar,outfun=outpro, -nmin=0,xout=FALSE,out=FALSE,eout=FALSE,xlab='X',ylab='Y',zscale=FALSE, -zlab=' ',pr=TRUE,duplicate='error',ticktype='simple',LP=TRUE,OLD=FALSE,pch='.',prm=TRUE,...){ -# duplicate='error' -# In some situations where duplicate values occur, when plotting with -# two predictors, it is necessary to set duplicate='strip' -# -# LP=TRUE, the plot of the smooth is further smoothed via lplot (lowess) -# To get a plot as done with old version set -# LP=FALSE -# -# zscale=TRUE will standardize the dependent variable when plotting with 2 independent variables. -# -# efr is the span when computing explanatory strength of association -# -# cf qplot in the R package ggplot2 -# -if(pr){ -if(!xout)print('Suggest also looking at result using xout=TRUE') -} -x<-as.matrix(x) -p=ncol(x) -p1=p+1 -if(pr && !OLD){ -print('A new estimate of the strength of the association is used by default.') -print(' To get the old estimate, set OLD=TRUE') -} -xx<-cbind(x,y) -xx<-elimna(xx) -n=nrow(xx) -if(eout){ -flag=outfun(xx,plotit=FALSE,...)$keep -xx=xx[flag,] -} -if(xout){ -if(identical(outfun,outblp))flag=outblp(xx[,1:p],xx[,p1],plotit=FALSE)$keep -else -flag=outfun(xx[,1:p],plotit=FALSE,...)$keep -xx=xx[flag,] -} -n.keep=nrow(xx) -x<-xx[,1:p] -x<-as.matrix(x) -p1=ncol(x)+1 -y<-xx[,p1] -if(ncol(x)==1){ -if(is.na(fr))fr<-.8 -val<-rungen(x,y,est=est,scat=scat,fr=fr,plotit=plotit,pyhat=TRUE, -xlab=xlab,ylab=ylab,LP=LP,pch=pch,...) -val2<-rungen(x,y,est=est,fr=efr,plotit=FALSE,pyhat=TRUE,LP=FALSE,...)$output -val<-val$output -} -if(ncol(x)>1){ -id=chk4binary(x) -Lid=length(id) -if(Lid>0)Stop('Binary independent variables detected, use rplotv2') -if(ncol(x)==2 && !scale){ -if(pr){print('scale=FALSE is specified.') -print('If there is dependence, might want to use scale=T') -}} -if(is.na(fr))fr<-1 -val<-rung3d(x,y,est=est,fr=fr,plotit=plotit,pyhat=TRUE,SEED=SEED,nmin=nmin,LP=LP, -scale=scale,phi=phi,theta=theta,expand=expand,zscale=zscale,pr=FALSE, -duplicate='error',xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,...) -} -E.power=NULL -if(OLD){ -E.power=varfun(val)/varfun(y) -names(E.power)='' -if(E.power>1)E.power=.99 -} -if(!OLD)E.power=smRstr(x,y,fr=fr)$str^2 -stra=sqrt(E.power) -# Best correction at the moment. Not sure when or if needed. -# Maybe a correlation option is better, but need to check this. -xvals=x -if(ncol(x)==1)xvals=sort(xvals) -if(!pyhat){ -val <- NULL -xvals=NULL -} -if(!prm){ -stra=NULL -E.power=NULL -val=NULL -} -list(n=n,n.keep=n.keep,Strength.Assoc=stra,Explanatory.Power = E.power, xvals=xvals,yhat = val) -} - -Rfit<-function(x,y,xout=FALSE,outfun=outpro,...){ -# -# Fit regression line using rank-based method based -# Jaeckel's dispersion function -# via the R package Rfit -# -library(Rfit) -if(xout){ -m<-cbind(x,y) -p1=ncol(m) -p=p1-1 -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -fit=rfit(y~x) -output=summary(fit) -list(summary=output[1]$coefficients,coef=output[1]$coefficients[,1],Drop_test=output[2]$dropstat, - Drop_test_p.value=output[3]$droppval,Mult_R_squared=output[4]$R2) -} - -regunstack<-function(x,grp,xcols,ycol){ -# -# x is assumed to be a matrix or a data frame -# -# sort data in x into group indicated by col grp of x, -# Designed for a one-way ANOVA where goal is to compare slopes -# corresponding to two or more groups. -# -# returns the independent variables in x having list mode -# x[[1]] would be a matrix for group 1, x[[2]] a matrix for group 2, etc -# y[[1]] is the dependent variable for group 1, etc. -# -# xcols indicates the columns of x containing independent variables -# ycol indicates the column of x containing dependent variables -# -x=elimna(x) -val=sort(unique(x[,grp])) -xs=list() -ys=list() -for(i in 1:length(val)){ -flag=(x[,grp]==val[i]) -xs[[i]]=x[flag,xcols] -ys[[i]]=x[flag,ycol] -} -list(num.grps=length(val),x=xs,y=ys) -} - - - - -ols1way2g<-function(x,y,grp=c(1,2),iv=1,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=.05,pr=TRUE,BLO=FALSE,...){ -# -# Test hypothesis that for two or more independent groups, all slope parameters -# are equal using OLS estimator. -# -# (ols1way tests the hypothesis that all parameters are equal, not just slopes.) -# -# Use Johansen MANOVA type test statistic in conjunction with HC4 estimate of covariances. -# -# x and y are assumed to have list mode having length J equal to the number of groups -# For example, x[[1]] and y[[1]] contain the data for group 1. -# -# xout=T will eliminate leverage points using the function outfun, -# which defaults to the MVE method. -# -# BLO=TRUE, only bad leverage points are removed. -# -# OUTPUT: -# n is sample size after missing values are removed -# nv.keep is sample size after leverage points are removed. -# -if(!is.list(x))stop('Argument x should have list mode') -iv1=iv+1 -J=length(x) # number of groups -x=lapply(x,as.matrix) -pchk=lapply(x,ncol) -temp=matl(pchk) -if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared') -nv=NULL -p=ncol(x[[1]]) -p1=p+1 -for(j in 1:J){ -xy=elimna(cbind(x[[j]],y[[j]])) -x[[j]]=xy[,1:p] -y[[j]]=xy[,p1] -x[[j]]=as.matrix(x[[j]]) -nv=c(nv,nrow(x[[j]])) -} -nv.keep=nv -critrad=NULL -if(BLO)xout=FALSE -if(xout){ -temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) -for(j in 1:J){ -x[[j]]=x[[j]][temp[[j]]$keep,] -y[[j]]=y[[j]][temp[[j]]$keep] -}} -if(BLO){ -for(j in 1:J){ -temp1=reglev(x[[j]],y[[j]],plotit=FALSE) -ad1=c(temp1$levpoints,temp1$regout) -flag1=duplicated(ad1) -if(sum(flag1)>0){ -flag1=ad1[flag1] -x[[j]]=as.matrix(x[[j]]) -x[[j]]=x[[j]][-flag1,] -y[[j]]=y[[j]][-flag1] -}}} -x=lapply(x,as.matrix) -K=p1 -est=matrix(NA,nrow=J,ncol=p1) -grpnum=NULL -for(j in 1:J)grpnum[j]=paste("Group",j) -vlabs="Intercept" -for(j in 2:p1)vlabs[j]=paste("Slope",j-1) -dimnames(est)=list(grpnum,vlabs) -ecov=list() -ecovinv=list() -W=rep(0,p1) -gmean=rep(0,K) -for(j in 1:J){ -est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef -nv.keep[j]=nrow(x[[j]]) -ecov[[j]]=olshc4(x[[j]],y[[j]])$cov -ecovinv[[j]]=solve(ecov[[j]]) #W_j -gmean=gmean+ecovinv[[j]]%*%est[j,] -W=W+ecovinv[[j]] -} -estall=solve(W)%*%gmean -F=0 -for(j in 1:2){ -F=F+ecovinv[[grp[j]]][iv1,iv1]*(est[grp[j],iv1]-estall[iv1])*(est[grp[j],iv1]-estall[iv1]) -} -pvalad=NULL -df=1 -# Adjust critical value: -iden=1 -Aw=0 -for(j in 1:J){ -temp=iden-solve(W[iv1,iv1])%*%ecovinv[[grp[j]]][iv1,iv1] -tempsq=temp%*%temp -Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[grp[j]]-1) -} -Aw=Aw/2 -crit=qchisq(alpha,df) -crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -alval<-c(1:999)/1000 -for(i in 1:999){ -irem<-i -crit=qchisq(alval[i],df) -critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) -if(F0){ -flag1=ad1[flag1] -x[[j]]=as.matrix(x[[j]]) -x[[j]]=x[[j]][-flag1,] -y[[j]]=y[[j]][-flag1] -}}} -x=lapply(x,as.matrix) -K=p1 -est=matrix(NA,nrow=J,ncol=p1) -grpnum=NULL -for(j in 1:J)grpnum[j]=paste("Group",j) -vlabs="Intercept" -for(j in 2:p1)vlabs[j]=paste("Slope",j-1) -dimnames(est)=list(grpnum,vlabs) -ecov=list() -ecovinv=list() -W=rep(0,p1) -for(j in 1:J){ -est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef -nv.keep[j]=nrow(x[[j]]) -ecov[[j]]=olshc4(x[[j]],y[[j]],HC3=HC3)$cov -} -q1=ecov[[grp[1]]][iv1,iv1] -q2=ecov[[grp[2]]][iv1,iv1] -top=est[grp[1]]-est[grp[2]] -F=(est[grp[1],iv1]-est[grp[2],iv1])/sqrt(q1+q2) -df=(q1+q2)^2/(q1^2/(nv[grp[1]]-1)+q2^2/(nv[grp[2]]-1)) -pv=2*(1-pt(abs(F),df)) -crit=qt(1-alpha/2,df) -ci=est[grp[1],iv1]-est[grp[2],iv1]-crit*sqrt(q1+q2) -ci[2]=est[grp[1],iv1]-est[grp[2],iv1]+crit*sqrt(q1+q2) -list(n=nv,n.keep=nv.keep,test.stat=F,conf.interval=ci, -est=c(est[grp[1],iv1],est[grp[2],iv1]),est.dif=est[grp[1],iv1]-est[grp[2],iv1],p.value=pv) -} - -cov.roc<-function(x){ -library(robust) -temp<-covRob(x,estim='M') -val<-temp -list(center=val[3]$center,cov=val[2]$cov) -} -reg1mcp<-function(x,y,regfun=tsreg,SEED=TRUE,nboot=100,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=.05, -pr=TRUE,MC=FALSE,...){ -# -# Perform all pairwise comparisons of intercepts among J independent groups -# Do the same of the first slope, followed by the 2nd slope, etc. -# -# Control FWE via Hochberg's methods for each set of -# (J^2-J)/2 parameters. That is, control FWE for the intercepts -# Do the same for the first slope, etc. -# -# # x and y are assumed to have list mode having -# length J equal to the number of groups -# For example, x[[1]] and y[[1]] contain the data for group 1. -# -# xout=T will eliminate leverage points using the function outfun, -# which defaults to the projection method. -# -# OUTPUT: -# n is sample size after missing values are removed -# nv.keep is sample size after leverage points are removed. -# output contains all pairwise comparisons -# For each parameter, FWE is controlled using Hochberg's method -# So by default, for the intercepts, -# all pairwise comparisons are performed with FWE=.05 -# For the first slope, all pairwise comparisons are performed -# with FWE=.05, etc. -# -if(SEED)set.seed(2) -if(!is.list(x))stop('Argument x should have list mode') -if(!is.list(y))stop('Argument y should have list mode') -J=length(x) # number of groups -x=lapply(x,as.matrix) -pchk=lapply(x,ncol) -temp=matl(pchk) -if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared') -nv=NULL -p=ncol(x[[1]]) -p1=p+1 -for(j in 1:J){ -xy=elimna(cbind(x[[j]],y[[j]])) -x[[j]]=xy[,1:p] -y[[j]]=xy[,p1] -x[[j]]=as.matrix(x[[j]]) -nv=c(nv,nrow(x[[j]])) -} -nv.keep=nv -critrad=NULL -if(xout){ -temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) -for(j in 1:J){ -x[[j]]=x[[j]][temp[[j]]$keep,] -y[[j]]=y[[j]][temp[[j]]$keep] -nv.keep[j]=length(y[[j]]) -}} -tot=(J^2-J)/2 -dvec<-alpha/c(1:tot) -outl=list() -nr=tot*p1 -outp=matrix(NA,ncol=7,nrow=nr) -x=lapply(x,as.matrix) -rlab=rep('Intercept',tot) -xx=list() -yy=list() -iall=0 -ivp=c(1,tot)-tot -for(ip in 1:p){ -#iv=ip-1 -rlab=c(rlab,rep(paste('slope',ip),tot)) -} -i=0 -sk=1+tot*p -st=seq(1,sk,tot) -st=st-1 -for(j in 1:J){ -for(k in 1:J){ -if(j < k){ -i=i+1 -st=st+1 -xx[[1]]=x[[j]][,1:p] -xx[[2]]=x[[k]][,1:p] -yy[[1]]=y[[j]] -yy[[2]]=y[[k]] -if(!MC)temp=reg2ci(xx[[1]],yy[[1]],xx[[2]],yy[[2]],regfun=regfun)$output -if(MC)temp=reg2ci(xx[[1]],yy[[1]],xx[[2]],yy[[2]],regfun=regfun)$output -iall=iall+1 -outp[iall,1]=j -outp[iall,2]=k -outp[st,3]=temp[,4] -outp[st,5]=temp[,2] -outp[st,6]=temp[,3] -}}} -for(i in 1:p1){ -ivp=ivp+tot -temp2<-order(0-outp[ivp[1]:ivp[2],3]) -icc=c(ivp[1]:ivp[2]) -icc[temp2]=dvec -outp[ivp[1]:ivp[2],4]=icc -} -flag=(outp[,3]<=outp[,4]) -outp[,7]=rep(0,nr) -outp[flag,7]=1 -v=outp[1:tot,1] -vall=rep(v,p1) -outp[,1]=vall -v=outp[1:tot,2] -vall=rep(v,p1) -outp[,2]=vall -#outp[,7]=p.adjust(outp[,3],method=method) -dimnames(outp)=list(rlab,c('Group','Group','p.value','p.crit','ci.low','ci.hi','Sig')) -list(n=nv,n.keep=nv.keep,output=outp) -} - - - -qcorp1<-function(x,y,qest=hd,q=.5,xout=FALSE,outfun=outpro,plotit=FALSE,...){ -# -# Compute a measure of the strength of the association -# based on the quantile regression lines -# -X=cbind(x,y) -X=elimna(X) -x<-as.matrix(x) -p=ncol(x) -x=X[,1:p] -p1=p+1 -y=X[,p1] -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -X=cbind(x,y) -} -est=qreg(x,y,q=q)$coef -top=qreg.sub(X,est,qval=q) -null=qest(y,q) -v=c(null,rep(0,p)) -bot=qreg.sub(X,v,qval=q) -ce=sqrt(1-top/bot) -if(p==1)ce=sign(est[2])*ce -list(cor=ce) -} - -scorciMC<-function(x,y,nboot=1000,alpha=.05,V2=TRUE,SEED=TRUE,plotit=TRUE,STAND=TRUE,corfun=pcor,pr=TRUE,cop=3,...){ -# -# Compute a 1-alpha confidence interval for the skipped correlation. -# alpha=0.05 is the default. -# By default, Pearson's correlation is computed after outliers are removed via the R function outdoor -# corfun=spear, for example would replace Pearson's correlation with Spearman's correlation. -# -# The default number of bootstrap samples is nboot=1000 -# -# This function uses the R package parallel -# -if(pr){ -print('As of Sept. 4, 2019, an improved version of this function is used when n<120. To use the old version, set V2=FALSE') -} -if(ncol(as.matrix(x))!=1)stop('x should be a single vector') -if(!V2){ -m1=cbind(x,y) -m1<-elimna(m1) # Eliminate rows with missing values -nval=nrow(m1) -x<-m1[,1] -y<-m1[,2] -est<-scor(x,y,plotit=plotit,STAND=STAND,corfun=corfun,SEED=SEED,cop=cop,pr=FALSE,...)$cor -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -library(parallel) -bvec<-mclapply(data,scorsubMC,x,y,STAND=STAND,corfun=corfun,cop=cop,...) -bvec=matl(bvec) # A 1 by nboot matrix. -bvec=as.vector(bvec) -ihi<-floor((1-alpha/2)*nboot+.5) -ilow<-floor((alpha/2)*nboot+.5) -bsort<-sort(bvec) -corci<-1 -corci[1]<-bsort[ilow] -corci[2]<-bsort[ihi] -phat <- sum(bvec < 0)/nboot -sig <- 2 * min(phat, 1 - phat) -} -if(V2){ -a=scorregciH(x,y,nboot=nboot,alpha=alpha,pr=FALSE,SEED=SEED,STOP=FALSE) -est=a$Estimates[1] -sig=a$Estimates[2] -corci=a$confidence.int[2:3] -chk=sign(corci[1]*corci[2]) -if(chk>0){ -if(sig>alpha)sig=a$Estimates[2] -if(sig>alpha)sig=.95*alpha -}} -list(cor.ci=corci,p.value=sig,cor.est=est) -} - -olsLmcp<-function(x,y,xout=TRUE,outfun=outpro,ISO=FALSE,STAND=TRUE,alpha=.05,pr=TRUE,BLO=FALSE,HC3=FALSE,...){ -# -# All pairwise comparison of regression models among J independent groups -# That is, for groups j and k, all j0)rlab=c(rlab,rep(paste('slope',iv),tot)) -for(j in 1:J){ -for(k in 1:J){ -if(j < k){ -i=i+1 -xx[[1]]=x[[j]][,1:p] -xx[[2]]=x[[k]][,1:p] -yy[[1]]=y[[j]] -yy[[2]]=y[[k]] -all=olsW2g(xx,yy,iv=iv,BLO=BLO,HC3=HC3) -temp=all$p.value -iall=iall+1 -outp[iall,1]=j -outp[iall,2]=k -outp[iall,3]=all$conf.interval[1] -outp[iall,4]=all$conf.interval[2] -outp[iall,5]=temp -}}} -ivp=ivp+tot -temp2<-order(0-outp[ivp[1]:ivp[2],5]) -icc=c(ivp[1]:ivp[2]) -icc[temp2]=dvec -outp[ivp[1]:ivp[2],6]=icc -D=rep('NO',tot) -flag=(outp[ivp[1]:ivp[2],5]<=outp[ivp[1]:ivp[2],4]) -} -flag=(outp[,5]<=outp[,6]) -outp[,7]=rep(0,nr) -outp[flag,7]=1 -dimnames(outp)=list(rlab,c('Group','Group','ci.low','ci.up','p.value','p.crit','sig')) -list(n=nv,n.keep=nv.keep,output=outp) -} - - - -anctsmcp<-function(x,y,regfun=tsreg,nboot=599,alpha=0.05,pts=NULL, -SEED=TRUE,xout=FALSE,outfun=out,fr1=1,fr2=1,...){ -# -# Like reg2ci only x1 etc have list mode containing -# data for J>1 groups. For all pairs of groups are compared via a -# call to ancova. -# -# x list mode contain a matrix of predictors. -# x[[1]] contains predictors for first group -# y[[1]] dependent variable for first group. -# -# -if(!is.list(x))stop('x and y should have list mode') -J=length(x) # number of groups -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -res=ancts(x[[j]],y[[j]],x[[k]],y[[k]],regfun=regfun,pts=pts, -nboot=nboot,alpha=alpha,fr1=fr1,fr2=fr2, -plotit=FALSE,xout=xout,outfun=outfun,WARN=FALSE,...) -print(paste('Group', j,'Group', k)) -print(res) -}}} -} - -chregF<-function(x,y,bend=1.345,SEED=FALSE,xout=FALSE,outfun=out,...){ -# -# Compute Coakley Hettmansperger robust regression estimators -# JASA, 1993, 88, 872-880 -# -# x is a n by p matrix containing the predictor values. -# -# No missing values are allowed -# -# Comments in this function follow the notation used -# by Coakley and Hettmansperger -# -library(MASS) -# with old version of R, need library(lqs) when using ltsreg -# as the initial estimate. -# -if(SEED)set.seed(12) # Set seed so that results are always duplicated. -x<-as.matrix(x) -p<-ncol(x) -m<-elimna(cbind(x,y)) -x<-m[,1:p] -p1<-p+1 -y<-m[,p1] -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -x<-as.matrix(x) -cutoff<-bend -mve<-vector('list') -if(ncol(x)==1){ -mve$center<-median(x) -mve$cov<-mad(x)^2 -} -if(ncol(x)>=2)mve<-cov.mve(x) # compute minimum volume ellipsoid measures of - # location and scale and store in mve. -reg0<-ltsReg(x,y) # compute initial regression est using least trimmed - # squares. -# Next, compute the rob-md2(i) values and store in rob -rob<-1 # Initialize vector rob -mx<-mve$center -rob<-mahalanobis(x,mx,mve$cov) -k21<-qchisq(.95,p) -c62<-k21/rob -vecone<-c(rep(1,length(y))) # Initialize vector vecone to 1 -c30<-pmin(vecone,c62) # mallows weights put in c30 -k81<-median(abs(reg0$residuals)) # median of absolute residuals -k72<-1.4826*(1+(5/(length(y)-p-1)))*k81 # lms scale -c60<-reg0$residuals/(k72*c30) # standardized residuals -# compute psi and store in c27 -cvec<-c(rep(cutoff,length(y))) # Initialize vector cvec to cutoff -c27<-pmin(cvec,c60) -c27<-pmax(-1*cutoff,c27) #c27 contains psi values -# -# compute B matrix and put in c66. -# Also, transform B so that i th diag elem = 0 if c27[i] is -# between -cutoff and cutoff, 1 otherwise. -# -c66<-ifelse(abs(c27)<=bend,1,0) # Have derivative of psi in c66 -m1<-cbind(1,x) # X matrix with col of 1's added -m2<-t(m1) #X transpose -m5<-diag(c30) # matrix W, diagonal contains weights -m4<-diag(c66) # B matrix -m6<-m4%*%m1 # BX -m7<-m2%*%m6 # X'BX (nD=X'BX) -m8<-solve(m7) #m8 = (X'-B-X)inverse -m9<-m8%*%m2 #m9=X prime-B-X inverse X' -m9<-m9%*%m5 # m9=X prime-B-X inverse X'W -m10<-m9%*%c27 -c20<-m10*k72 -c21<-reg0$coef+c20 #update initial estimate of parameters. -res<-y-m1%*%c21 -list(coef=t(c21),residuals=res) -} - -DregGOLS<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,SEED=TRUE,nboot=200, -STAND=TRUE,...){ -# -# Global test that two dependent (time 1 and time 2) -# OLS regression lines are identical -# -# Use a variation of Hotelling's test coupled with a bootstrap -# estimate of the relevant covariance matrix associated with the differences -# in the estimates of the parameters. -# -if(SEED)set.seed(2) -X=elimna(cbind(x1,y1,x2,y2)) -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -n=length(y1) -if(xout){ -opf=identical(outfun,outpro) -if(!opf){ -flag1=outfun(x1)$out.id -flag2=outfun(x2)$out.id -} -if(opf){ -flag1=outpro(x1,STAND=STAND)$out.id -flag2=outfun(x2,STAND=STAND)$out.id -} -flag=unique(c(flag1,flag2)) -if(length(flag)>0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -nk=length(y1) -x1=as.matrix(x1) -x2=as.matrix(x2) -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -bvec1<-apply(data,1,regboot,x1,y1,regfun=lsfit,...) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -bvec2<-apply(data,1,regboot,x2,y2,regfun=lsfit,...) -dif=t(bvec1-bvec2) -S=cov(dif) -est1=lsfit(x1,y1)$coef -est2=lsfit(x2,y2)$coef -est=est1-est2 -k <- (nk-p1)/((nk - 1)*p1) - stat <- k * crossprod(est, solve(S, est))[1, ] - pvalue <- 1 - pf(stat, p1, nk - p1) -list(test.statistic = stat, degrees_of_freedom = c(p1, nk - p1), p.value = -pvalue,est.1=est1,est.2=est2,estimate.dif = est) -} - -difregOLS<-function(x1,y1,x2,y2,regfun=lsfit,xout=FALSE,outfun=outpro,nboot=200, -alpha=.05,SEED=TRUE,plotit=FALSE,xlab='X',ylab='Y',...){ -# -# OLS regression data from two different times i.e., two dependent groups -# -# compute confidence interval for the difference between intercepts -# and the slopes -# -if(SEED)set.seed(2) -X=elimna(cbind(x1,y1,x2,y2)) -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -n=length(y1) -if(xout){ -flag1=outfun(x1)$out.id -flag2=outfun(x2)$out.id -flag=unique(c(flag1,flag2)) -if(length(flag)>0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -nk=length(y1) -x1=as.matrix(x1) -x2=as.matrix(x2) -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -bvec1<-apply(data,1,regboot,x1,y1,regfun,...) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -bvec2<-apply(data,1,regboot,x2,y2,regfun,...) -dif=t(bvec1)-t(bvec2) -est1=lsfit(x1,y1)$coef -est2=lsfit(x2,y2)$coef -estdif=est1-est2 -se=apply(dif,2,sd) -pvec=NA -test=NA -test=estdif/se -df=nk-1 -pvec=2*(1-pt(abs(test),df)) -if(plotit){ -reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab) -} -lvec='Intercept' -ci=matrix(NA,nrow=p1,ncol=3) -ci[,1]=c(0:p) -ci[,2]=estdif+qt(alpha/2,df)*se -ci[,3]=estdif-qt(alpha/2,df)*se -dimnames(ci)=list(NULL,c('Param','ci.low','ci.hi')) -for(j in 2:p1)lvec=c(lvec,paste('slope',j-1)) -pvec=array(pvec,dimnames=lvec) -list(n=n,n.keep=nk,est.dif=estdif,est.1=est1,est.2=est2, -test.stat=test,standard.error=se,p.values=pvec,conf.intervals=ci) -} - -Dancols<-function(x1,y1,x2,y2,pts=NULL,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',CR=FALSE,...){ -# -# Compare the OLS regression lines of two dependent (within) groups -# at specified design points -# -# Assume data are in x1 y1 x2 and y2 -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# If not specified, points are chosen for you. -# -# CR=TRUE: determine interval outside of which the lines cross. -# (Analog of Johnson--Neyman method) -# -# OUTPUT: -# cross.interval indicates interval outside of which the lines have crossed. -# output cr.quant.grp1 indicates that quantiles of group 1 corresponding to -# to the end of the intervals returned in cross.interval -# -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(SEED)set.seed(2) -FLAG=pts -X=elimna(cbind(x1,y1,x2,y2)) -if(ncol(X)>4)stop('Only one covariate is allowed') -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -n=length(y1) -if(xout){ -flag1=outfun(x1,SEED=SEED,...)$out.id -flag2=outfun(x2,SEED=SEED,...)$out.id -flag=unique(c(flag1,flag2)) -if(length(flag)>0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -n.keep=length(y1) -if(is.null(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=x1[isub] -pts=unique(pts) -npt=nrow(as.matrix(pts)) -mat<-matrix(NA,npt,9) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) -mat[,1]=pts -sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED) -est1=regYhat(x1,y1,xr=pts,regfun=lsfit) #Note: if xout=T, leverage points already removed -est2=regYhat(x2,y2,xr=pts,regfun=lsfit) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -df=length(y1)-1 -pv=2*(1-pt(abs(tests),df)) -mat[,9]=pv -crit<-smmcrit(df,5) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -if(!is.null(FLAG)){ -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -pts=unique(pts) -mat<-matrix(NA,length(pts),9) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) -mat[,1]<-pts -sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED) -est1=regYhat(x1,y1,xr=pts,regfun=lsfit,,...) -est2=regYhat(x2,y2,xr=pts,regfun=lsfit,,...) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -df=length(y1)-1 -pv=2*(1-pt(abs(tests),df)) -mat[,9]=pv -crit<-smmcrit(df,length(pts)) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -if(plotit){ -plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) -points(x1,y1,pch='o') -points(x2,y2,pch='+') -abline(lsfit(x1,y1)$coef) -abline(lsfit(x2,y2)$coef,lty=2) -} -int=NULL -crq=NULL -crq2=NULL -if(CR){ -if(ncol(as.matrix(x1))>1)stop('CR=T only allowed with one covariate') -int=DancCR(x1,y1,x2,y2) -crq=mean(x1<=int[1]) -crq[2]=mean(x1<=int[2]) -crq2=mean(x2<=int[1]) -crq2[2]=mean(x2<=int[2]) -} - -list(n=n,n.keep=n.keep,output=mat,cross.interval=int,cr.quant.grp1=crq, -cr.quant.grp2=crq2) -} -Dancols_sub1<-function(pts,x1,y1,x2,y2){ -# -# -ci=abs(Dancols_sub(x1,y1,x2,y2,pts=pts)$output[1,7]) -ci -} -Dancols_sub2<-function(pts,x1,y1,x2,y2){ -# -# -ci=abs(Dancols_sub(x1,y1,x2,y2,pts=pts)$output[1,8]) -ci -} -Dancols_sub<-function(x1,y1,x2,y2,pts=NULL,fr1=1,fr2=1,alpha=.05,plotit=FALSE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',...){ -# -# Compare the OLS regression lines of two dependent (within) groups -# at specified design points -# -# Assume data are in x1 y1 x2 and y2 -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# If not specified, points are chosen for you. -# -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(SEED)set.seed(2) -FLAG=pts -X=elimna(cbind(x1,y1,x2,y2)) -if(ncol(X)>4)stop('Only one covariate is allowed') -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -n=length(y1) -if(xout){ -flag1=outfun(x1,SEED=SEED,...)$out.id -flag2=outfun(x2,SEED=SEED,...)$out.id -flag=unique(c(flag1,flag2)) -if(length(flag)>0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -n.keep=length(y1) -if(is.null(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,9) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) -pts=x1[isub] -mat[,1]=pts -sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED) -est1=regYhat(x1,y1,xr=pts,regfun=lsfit) #Note: if xout=T, leverage points already removed -est2=regYhat(x2,y2,xr=pts,regfun=lsfit) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -df=length(y1)-1 -pv=2*(1-pt(abs(tests),df)) -mat[,9]=pv -crit<-smmcrit(df,5) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -if(!is.null(FLAG)){ -n1=1 -n2=1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),9) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) -mat[,1]<-pts -sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED) -est1=regYhat(x1,y1,xr=pts,regfun=lsfit,,...) -est2=regYhat(x2,y2,xr=pts,regfun=lsfit,,...) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -df=length(y1)-1 -pv=2*(1-pt(abs(tests),df)) -mat[,9]=pv -crit<-smmcrit(df,length(pts)) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -if(plotit){ -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) -points(x1,y1,pch='o') -points(x2,y2,pch='+') -abline(lsfit(x1,y1)$coef) -abline(lsfit(x2,y2)$coef,lty=2) -} -list(n=n,n.keep=n.keep,output=mat) -} -DancCR<-function(x1,y1,x2,y2){ -v=optim(0,Dancols_sub1,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par -v[2]=optim(0,Dancols_sub2,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par -a=min(v) -v=c(a,max(v)) -} - -difregYvar<-function(x1,y1,x2,y2,regfun=tsreg,pts=NULL, -nboot=100,xout=FALSE,outfun=out,SEED=TRUE,...){ -# -# Estimate standard error of difference between the predicted value of Y -# corresponding to two dependent groups using regression estimator indicated by -# the argument -# regfun -# corresponding to the points in -# pts -# regfun defaults to tsreg, the Theil--Sen estimator -# pts default is to use all unique points among x1 and x2 -# -X=elimna(cbind(x1,y1,x2,y2)) -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -x1<-as.matrix(x1) -x2=as.matrix(x2) -if(is.null(pts)){ -pts=rbind(x1,x2) -pts=unique(pts) -} -pts=as.matrix(pts) -nvpts=nrow(pts) -bvec1=matrix(NA,nrow=nboot,ncol=nvpts) -bvec2=matrix(NA,nrow=nboot,ncol=nvpts) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec1[ib,]=regYsub(x1[data[ib,],],y1[data[ib,]],pts,p1=p1,regfun=regfun,...) -bvec2[ib,]=regYsub(x2[data[ib,],],y2[data[ib,]],pts,p1=p1,regfun=regfun,...) -} -bvec=bvec1-bvec2 -sqsd=apply(bvec,2,var) -sqsd -} - -difreg<-function(x1,y1,x2,y2,regfun=tsreg,xout=FALSE,outfun=outpro,nboot=599, -alpha=.05,SEED=TRUE,plotit=FALSE,xlab='X',ylab='Y',pr=TRUE,...){ -# -# regression data from two different times i.e., two dependent groups -# -# compute confidence interval for the difference in the slopes -# -if(SEED)set.seed(2) -X=elimna(cbind(x1,y1,x2,y2)) -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -n=length(y1) -if(xout){ -flag1=outfun(x1,...)$out.id -flag2=outfun(x2,...)$out.id -flag=unique(c(flag1,flag2)) -X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -flagF=identical(regfun,tsreg) -if(flagF){ -if(pr){ -if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -pr=FALSE -} -if(pr){ -if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -}} -nk=length(y1) -x1=as.matrix(x1) -x2=as.matrix(x2) -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -bvec1<-lapply(data,regboot,x1,y1,regfun,xout=FALSE,...) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -bvec2<-lapply(data,regboot,x2,y2,regfun,xout=FALSE,...) -bvec1=matl(bvec1) -bvec2=matl(bvec2) -dif=t(bvec1)-t(bvec2) -dif.sort=apply(dif,2,sort) -pvec=NA -for(i in 1:p1){ -pvec[i]<-(sum(dif[,i]<0)+.5*sum(dif[,i]==0))/nboot -if(pvec[i]>.5)pvec[i]<-1-pvec[i] -} -pvec<-2*pvec -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=matrix(NA,nrow=p1,ncol=3) -ci[,1]=c(0:p) -for(i in 1:p1){ -ci[i,2]=dif.sort[ilow,i] -ci[i,3]=dif.sort[ihi,i] -} -dimnames(ci)=list(NULL,c('Param','ci.low','ci.hi')) -if(plotit){ -reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab,regfun=regfun,...) -} -lvec='Intercept' -for(j in 2:p1)lvec=c(lvec,paste('slope',j-1)) -#pvec=array(pvec,dimnames=lvec) -est1=regfun(x1,y1,xout=FALSE,...)$coef -est2=regfun(x2,y2,xout=FALSE,...)$coef -list(n=n,n.keep=nk,param=lvec,p.values=pvec,est.grp1=est1,est.grp2=est2,conf.intervals=ci) -} - -Dancts<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE, -outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',pr=TRUE,...){ -# -# Compare the regression lines of two dependent groups using -# the robust regression indicated by the argument -# regfun. Default is modified Theil--Sen estimator -# -# Comparisons are done at specified design points -# This is a robust Johnson-Neyman method for dependent groups. -# -# For OLS, use Dancols -# Assume data are in x1 y1 x2 and y2 -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(SEED)set.seed(2) -FLAG=pts -X=elimna(cbind(x1,y1,x2,y2)) -if(ncol(X)>4)stop('Only one covariate is allowed') -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -if(xout){ -if(identical(outfun,outblp)){ -flag1=outblp(x1,y1,plotit=FALSE)$bad.lev -flag2=outblp(x2,y2,plotit=FALSE)$bad.lev -} -else{ -flag1=outfun(x1)$out.id -flag2=outfun(x2)$out.id -} -flag=unique(c(flag1,flag2)) -if(length(flag)>0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -flagF=identical(regfun,tsreg) -if(flagF){ -if(pr){ -if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -pr=FALSE -} -if(pr){ -if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -}} -if(is.null(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,9) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) -pts=x1[isub] -mat[,1]=pts -sqsd=difregYvar(x1,y1,x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) -est1=regYhat(x1,y1,xr=pts,regfun=regfun) #Note: if xout=T, leverage points already removed -est2=regYhat(x2,y2,xr=pts,regfun=regfun) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -pv=2*(1-pnorm(abs(tests))) -mat[,9]=pv -crit<-smmcrit(Inf,5) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -if(!is.null(FLAG)){ -n1=1 -n2=1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),9) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) -mat[,1]<-pts -sqsd=difregYvar(x1,y1,x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) -est1=regYhat(x1,y1,xr=pts,regfun=regfun) -est2=regYhat(x2,y2,xr=pts,regfun=regfun) -mat[,2]=est1 -mat[,3]=est2 -est=est1-est2 -mat[,4]=est -sd=sqrt(sqsd) -mat[,6]=sd -tests=(est1-est2)/sd -mat[,5]=tests -pv=2*(1-pnorm(abs(tests))) -mat[,9]=pv -crit<-smmcrit(Inf,length(pts)) -mat[,7]=est-crit*sd -mat[,8]=est+crit*sd -} -if(plotit){ -#if(xout){ #Leverage points already removed if xout=TRUE -#flag<-outfun(x1,...)$keep -#x1<-x1[flag] -#y1<-y1[flag] -#flag<-outfun(x2,...)$keep -#x2<-x2[flag] -#y2<-y2[flag] -#} -plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) -points(x1,y1,pch='o') -points(x2,y2,pch='+') -abline(regfun(x1,y1)$coef) -abline(regfun(x2,y2)$coef,lty=2) -} -list(output=mat) -} - - -tshd<-function(x,y,HD=TRUE,plotit=FALSE,xlab='X',ylab='Y',OPT=FALSE,tr=FALSE){ -# -# Compute the Theil-Sen regression estimator. -# Only a single predictor is allowed in this version -# -# HD=TRUE, use Harrell-Davis for slopes -# HD=FALSE, use usual median -# -# OPT=TRUE, compute the intercept using median(y)-b_1median(X) -# OPT=FALSE compute the intercept using median of y-b_1X -# -# -temp<-matrix(c(x,y),ncol=2) -temp<-elimna(temp) # Remove any pairs with missing values -x<-temp[,1] -y<-temp[,2] -ord<-order(x) -xs<-x[ord] -ys<-y[ord] -vec1<-outer(ys,ys,'-') -vec2<-outer(xs,xs,'-') -v1<-vec1[vec2>0] -v2<-vec2[vec2>0] -if(!HD)slope<-median(v1/v2,na.rm=TRUE) -if(HD)slope<-hd(v1/v2,na.rm=TRUE,tr=tr) -res=y-slope*x -if(!OPT)int=hd(res,tr=tr) -if(OPT)int=hd(y,na.rm=TRUE)-slope*hd(x,na.rm=TRUE,tr=tr) -coef=c(int,slope) -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab) -abline(coef) -} -list(coef=coef) -} - -tshdreg<-function(x,y,HD=TRUE,xout=FALSE,outfun=out,iter=5,varfun=pbvar,tr=FALSE,do.stre=TRUE, -corfun=pbcor,plotit=FALSE,tol=.0001,RES=TRUE,OPT=FALSE,xlab='X',ylab='Y',...){ -# -# Compute Theil-Sen regression estimator -# -# Use back-fitting -# when there is more than one predictor -# and estimate intercept using Harrel-Davis estimator -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(ncol(x)==1){ -temp1<-tshd(x,y,HD=HD,plotit=plotit,xlab=xlab,ylab=ylab,OPT=OPT,tr=tr) -coef<-temp1$coef -res<-y-coef[2]*x-coef[1] -} -if(ncol(x)>1){ -for(p in 1:ncol(x)){ -temp[p]<-tshd(x[,p],y)$coef[2] -} -res<-y-x%*%temp -alpha<-hd(res) -r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) -tempold<-temp -for(it in 1:iter){ -for(p in 1:ncol(x)){ -r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] -temp[p]<-tshd(x[,p],r[,p],plotit=FALSE,tr=tr)$coef[2] -} -if(max(abs(temp-tempold))0){ -e.pow<-varfun(yhat)/varfun(y) -if(!is.na(e.pow)){ -if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 -e.pow=as.numeric(e.pow) -stre=sqrt(e.pow) -}}} -if(!RES)res=NULL -list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow,residuals=res) -} - - -ltsreg<-function(x,y,tr=.5,xout=FALSE,outfun=outpro,STAND=TRUE,...){ -# -# Leasts trimmed squares regression via the function ltsReg in the -# R package robustbase -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -library(robustbase) -temp=ltsReg(y~x,alpha=1-tr) -#coef=ltsReg(y~x)[8]$coefficients -coef=temp[8]$coefficients -res=temp[7]$raw.resid -list(coef=coef,residuals=res) -} - -# For convenience when doing robust ridge regression: -ltsreg.2<-function(x,y,tr=.2,xout=FALSE,outfun=outpro,STAND=TRUE,...){ -# -# Leasts trimmed squares regression via the function ltsReg in the -# R package robustbase -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -library(robustbase) -temp=ltsReg(y~x,alpha=1-tr) -coef=temp[8]$coefficients -res=temp[7]$raw.resid -list(coef=coef,residuals=res) -} - - - - -DregG<-function(x1,y1,x2,y2,nullv=NULL,regfun=tshdreg,nboot=500,xout=FALSE,outfun=outpro, -SEED=TRUE,plotit=FALSE,pr=TRUE,...){ -# -# Global test that two dependent groups have identical -# regression parameters. -# -# Use a variation of Hotelling's test coupled with a bootstrap -# estimate of the relevant covariance matrix associated with the differences -# in the estimates of the parameters.# For OLS, use DregGOLS -# -# (plotit=F is used so that in simulations, if xout=T, the seed is not -# set everytime outpro is called.) -# -if(SEED)set.seed(2) -X=elimna(cbind(x1,y1,x2,y2)) -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -if(is.null(nullv))nullv=rep(0,p1) -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -n=length(y1) -if(xout){ -flag1=outfun(x1,...)$out.id -flag2=outfun(x2,...)$out.id -flag=unique(c(flag1,flag2)) -if(length(flag)>0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -x1=as.matrix(x1) -x2=as.matrix(x2) -flagF=FALSE -flagF1=identical(regfun,tsreg) -flagF1[2]=identical(regfun,tshdreg) -#flagF1[3]=identical(regfun,tshdreg_C) obsolete,now it causes an error -if(sum(flagF1)>0)flagF=TRUE -if(!flagF){if(pr){ -if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -pr=FALSE -} -if(pr){ -if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -}} -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -bvec1<-apply(data,1,regboot,x1,y1,regfun=regfun,xout=FALSE,...) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -bvec2<-apply(data,1,regboot,x2,y2,regfun=regfun,xout=FALSE,...) -dif=t(bvec1-bvec2) -temp<-pdis(rbind(dif,nullv)) -sig.level<-sum(temp[nboot+1]0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -flagF=FALSE -flagF1=identical(regfun,tsreg) -flagF1[2]=identical(regfun,tshdreg) -#flagF1[3]=identical(regfun,tshdreg_C) -if(sum(flagF1)>0)flagF=TRUE -if(!flagF){ -if(pr){ -if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -pr=FALSE -} -if(pr){ -if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -}} -x1=as.matrix(x1) -x2=as.matrix(x2) -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -bvec1=mclapply(data,regbootMC,x1,y1,regfun,xout=FALSE,...) -bvec1=matl(bvec1) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -bvec2=mclapply(data,regbootMC,x2,y2,regfun,xout=FALSE,...) -bvec2=matl(bvec2) -dif=t(bvec1-bvec2) -temp<-pdisMC(rbind(dif,nullv)) -sig.level<-sum(temp[nboot+1]0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -pr=FALSE -} -if(pr){ -if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -}} -nk=length(y1) -x1=as.matrix(x1) -x2=as.matrix(x2) -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -bvec1<-mclapply(data,regboot,x1,y1,regfun,mc.preschedule=TRUE,xout=FALSE,...) -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -bvec2<-mclapply(data,regboot,x2,y2,regfun,mc.preschedule=TRUE,xout=FALSE,...) -bvec1=matl(bvec1) -bvec2=matl(bvec2) -dif=t(bvec1)-t(bvec2) -dif.sort=apply(dif,2,sort) -pvec=NA -for(i in 1:p1){ -pvec[i]<-(sum(dif[,i]<0)+.5*sum(dif[,i]==0))/nboot -if(pvec[i]>.5)pvec[i]<-1-pvec[i] -} -pvec<-2*pvec -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=matrix(NA,nrow=p1,ncol=3) -ci[,1]=c(0:p) -for(i in 1:p1){ -ci[i,2]=dif.sort[ilow,i] -ci[i,3]=dif.sort[ihi,i] -} -dimnames(ci)=list(NULL,c('Param','ci.low','ci.hi')) -if(plotit){ -reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab,regfun=regfun,...) -} -lvec='Intercept' -for(j in 2:p1)lvec=c(lvec,paste('slope',j-1)) -#pvec=array(pvec,dimnames=lvec) -est1=regfun(x1,y1,xout=FALSE,...)$coef -est2=regfun(x2,y2,xout=FALSE,...)$coef -list(n=n,n.keep=nk,param=lvec,p.values=pvec,est.grp1=est1,est.grp2=est2,conf.intervals=ci) -} - -qcipb<-function(x,q=.5,alpha=.05,nboot=2000,SEED=TRUE,nv=0,...){ -# -# Compute a bootstrap, .95 confidence interval for the -# qth quantile via the Harrell--Davis estimator. -# -# Default is q=.5, meaning a confidence interval for the median is -# computed. -# -# Appears to be best method when there are tied values -# -# nv=null value when computing a p-value -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -x=elimna(x) -data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,hd,q=q) -bvec<-sort(bvec) -low<-round((alpha/2)*nboot) -up<-nboot-low -low<-low+1 -pv=mean(bvec>nv)+.5*mean(bvec==nv) -pv=2*min(c(pv,1-pv)) -estimate=hd(x,q=q) -list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv) -} -Qreg<-function(x,y,q=.5,xout=FALSE,outfun=outpro,res.vals=TRUE,plotit=FALSE,xlab='X',ylab='Y',pch='*',...){ -# -# Quantile regression. Like the function qreg, but avoids computational -# problems that can arise when there are tied values among the dependent -# variable -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -xx=as.matrix(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -init=ols(x,y)$coef -v=optim(init,qfun,x=x,y=y,q=q,method='BFGS')$par -p1=ncol(x)+1 -res=NULL -if(res.vals)res<-y-x%*%v[2:p1]-v[1] -if(ncol(x)==1){ -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab,pch=pch) -abline(v) -}} -list(coef=v,residuals=res) -} -qfun<-function(x,y,coef,q){ -x=as.matrix(x) -p1=ncol(x)+1 - r=y-coef[1]-x%*%coef[2:p1] - rhoq=sum(r*(q-as.numeric((r<0)))) - s=sum(rhoq) - s - } -Rcoefalpha<-function(x,cov.fun=wincov,pr=FALSE,...){ -# Compute coefficient alpha plus a robust analog) -# -# x is assumed to be a matrix -# output: -# coefficient alpha plus robust version -# -# NOTE: now use cov.fun=wincov by default. Use skipcov in earlier version, -# But it might not be computable. -# -# Possible choices for cov.fun: -# skipcov -# tbscov -# covout -# covogk -# mgvcov -# mvecov -# mcdcov -# wincov -# bicovM -# -x=elimna(x) -x=as.matrix(x) -mcor=winall(x,tr=0)$cov -term=sum(mcor) -diag(mcor)=0 -term1=sum(mcor) -k=ncol(x) -lam=k*term1/(k-1) -res1=lam/term -# -mcor=cov.fun(x,...) -term=sum(mcor) -diag(mcor)=0 -term1=sum(mcor) -k=ncol(x) -lam=k*term1/(k-1) -lam=lam/term -list(coef.alpha=res1,robust.alpha=lam) -} - -Dancovamp<-function(x1,y1,x2=NULL,y2,fr1=1,fr2=1,tr=0.2,alpha=0.05, pts=NULL,SEED=TRUE,DIF=TRUE,cov.fun=skipcov,...){ -# -# Compare two dependent groups using a nonparametric ANCOVA method. -# Multiple covariates are allowed. -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# Design points are chosen based on depth of points in x1 if pts=NULL -# Assume data are in x1 y1 x2 and y2 -# -# Choices for cov.fun include -# skipcov -# tbscov -# covogk -# mgvcov -# mvecov -# mcdcov -# wincov -# -if(is.null(x2))x2=x1 -flag=identical(cov.fun,cov.mve) -if(flag)if(SEED)set.seed(2) # now cov.mve always returns same result -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 should have same number of columns') -if(ncol(x1)==1)stop('For one covariate, use Dancova') -if(nrow(x1)!=nrow(x2))stop('x1 and x2 should have same number of rows') -if(length(y1)!=length(y2))stop('y1 and y2 should have same length') -p=ncol(x1) -p1=p+1 -m1=elimna(cbind(x1,y1,x2,y2)) -x1=m1[,1:p] -y1=m1[,p1] -p2=p1+1 -p3=p2+p-1 -p4=p3+1 -x2=m1[,p2:p3] -y2=m1[,p4] -if(is.null(pts[1])){ -x1<-as.matrix(x1) -x2<-as.matrix(x2) -pts<-ancdes(x1) -} -pts<-as.matrix(pts) -flag<-rep(TRUE,nrow(pts)) -if(!DIF){ -mat<-matrix(NA,nrow(pts),10) -dimnames(mat)<-list(NULL,c('n','est1','est2','DIF','TEST','se','ci.low','ci.hi','p.value','p.adj')) -} -if(DIF){ -mat<-matrix(NA,nrow(pts),8) -dimnames(mat)<-list(NULL,c('n','DIF','TEST','se','ci.low','ci.hi','p.value','p.adj')) -} -n<-1 -vecn<-1 -mval1<-cov.funl(cov.fun(x1,...)) -mval2<-cov.funl(cov.fun(x2,...)) -for(i in 1:nrow(pts)){ -t1=near3d(x1,pts[i,],fr1,mval1) -t2=near3d(x2,pts[i,],fr2,mval2) -pick=as.logical(t1*t2) -n[i]<-length(y1[pick]) -if(n[i]<5)flag[i]<-FALSE -if(n[i]>=5){ -if(!DIF){ -test<-yuend(y1[pick],y2[pick],tr=tr,alpha=alpha) -mat[i,2]=test$est1 -mat[i,3]=test$est2 -mat[i,4]=test$dif -mat[i,5]=test$teststat -mat[i,6]=test$se -mat[i,7]=test$ci[1] -mat[i,8]=test$ci[2] -mat[i,9]=test$p.value -} -if(DIF){ -test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE,alpha=alpha) -mat[i,2]=test$estimate -mat[i,3]=test$test.stat -mat[i,4]=test$se -mat[i,5]=test$ci[1] -mat[i,6]=test$ci[2] -mat[i,7]=test$p.value -} -} -mat[i,1]<-n[i] -} -if(!DIF)mat[,10]=p.adjust(mat[,9],method='hoch') -if(DIF)mat[,8]=p.adjust(mat[,7],method='hoch') -if(sum(flag)==0)print('No comparable design points found, might increase span.') -list(pts=pts,output=mat) -} - - -cov.funl<-function(m){ -list(cov=m) -} -rplotCIS<-function(x,y,tr=.2,fr=.8,plotit=TRUE,scat=TRUE,pyhat=FALSE,SEED=TRUE,dfmin=8, -eout=FALSE,xout=FALSE,xlab='x',ylab='y',outfun=out,LP=TRUE,alpha=.05,pch='.',...){ -# -# A simple method for computing a confidence band based on -# running interval smoother and a trimmed mean. -# -# rplotCI adjusts the band so that FWE=1-alpha -# -# LP=TRUE, the plot is further smoothed via lowess -# -# fr controls amount of smoothing -plotit<-as.logical(plotit) -scat<-as.logical(scat) -str=rplot(x,y,tr=tr,xout=xout,plotit=FALSE,LP=LP,fr=fr,pr=FALSE)$Strength.Assoc -m<-cbind(x,y) -if(ncol(m)>2)stop('Only one independent variable can be used') -m<-elimna(m) -nv=nrow(m) -if(eout && xout)stop('Not allowed to have eout=xout=T') -if(eout){ -flag<-outfun(m,plotit=FALSE)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(m[,1])$keep -m<-m[flag,] -} -x=m[,1] -y=m[,2] -n.keep=length(y) -rmd<-c(1:length(x)) -for(i in 1:length(x))rmd[i]<-mean(y[near(x,x[i],fr=fr)],tr=tr) -sedf=runse(x,y,fr=fr,tr=tr,pts=x,SEED=SEED) -df=sedf$df -flag=df>dfmin -se=sedf$se -low=rmd[flag]-qt(1-alpha/2,df[flag])*se[flag] -up=rmd[flag]+qt(1-alpha/2,df[flag])*se[flag] -rmd=rmd[flag] -x=x[flag] -y=y[flag] -if(plotit){ -ord=order(x) -x=x[ord] -rmd=rmd[ord] -up=up[ord] -low=low[ord] -if(LP){ -rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -up=lplot(x,up,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -low=lplot(x,low,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -} -if(scat){ -plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type='n') -lines(x,up,lty=2) -lines(x,low,lty=2) -points(x,y,pch=pch) -} -if(!scat)plot(c(x,x),c(y,rmd),type='n',ylab=ylab,xlab=xlab) -points(x,rmd,type='n') -sx<-sort(x) -xorder<-order(x) -sysm<-rmd[xorder] -lines(sx,sysm) -lines(x,up,lty=2) -lines(x,low,lty=2) -} -if(pyhat){output<-cbind(x,rmd,low,up) -dimnames(output)=list(NULL,c('x','y.hat','ci.low','ci.up')) -} -if(!pyhat)output<-'Done' -list(output=output,str=str,n=nv,n.keep=n.keep) -} -runse<-function(x,y,fr=1,tr=.2,pts=x,RNA=FALSE,outfun=out,xout=FALSE,SEED=TRUE){ -# -# Estimate SE of Yhat when using a running interval smooth -# based on a trimmed mean. -# fr controls amount of smoothing -# -# Missing values are automatically removed. -# -# RNA=F, do not remove missing values when averaging -# (computing the smooth) at x -# xout=T removes points for which x is an outlier -# -if(SEED)set.seed(2) -temp<-cbind(x,y) -if(ncol(temp)>2)stop(' 1 predictor only is allowed') -temp<-elimna(temp) # Eliminate any rows with missing values -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -temp<-temp[flag,] -} -x<-temp[,1] -y<-temp[,2] -pts<-as.matrix(pts) -vals<-NA -WSE=NA -df=NA -h=NA -for(i in 1:length(pts)){ -ysub=y[near(x,pts[i],fr)] -v=trimse(ysub,tr=tr,na.rm=TRUE) -if(is.na(v))v=0 -if(v>0){ -WSE[i]=trimse(ysub,tr=tr,na.rm=TRUE) -df[i]=length(ysub)-2*floor(tr*length(ysub))-1 -} -if(v==0){ -df[i]=0 -WSE[i]=0 -}} -list(se=WSE,df=df) -} - -rplotpbCI<-function(x,y,est=onestep,fr=1,plotit=TRUE,scat=TRUE,pyhat=FALSE, -xout=FALSE,xlab='x',ylab='y',outfun=out,LP=TRUE,alpha=.05, -nboot=500,SEED=TRUE,...){ -# -# running interval smoother based on any measure of location -# Unlike rplotCI, uses a percentile bootstrap -# method to get a confidence band -# -# LP=TRUE, the plot is further smoothed via lowess -# -# fr controls amount of smoothing -plotit<-as.logical(plotit) -scat<-as.logical(scat) -m<-cbind(x,y) -if(ncol(m)>2)stop('Only one independent variable can be used') -m<-elimna(m) -x=m[,1] -y=m[,2] -if(xout){ -flag<-outfun(m[,1])$keep -m<-m[flag,] -} -x=m[,1] -y=m[,2] -low=rep(NA,length(y)) -up=rep(NA,length(y)) -rmd<-NA -for(i in 1:length(x)){ -sel=y[near(x,x[i],fr)] -temp=onesampb(sel,est=est,nboot=nboot,alpha=alpha,SEED=SEED,...) -low[i]=temp$ci[1] -up[i]=temp$ci[2] -rmd[i]=temp$estimate -} -all=elimna(cbind(x,low,up,y,rmd)) -x=all[,1] -low=all[,2] -up=all[,3] -y=all[,4] -rmd=all[,5] -if(plotit){ -ord=order(x) -x=x[ord] -y=y[ord] -rmd=rmd[ord] -up=up[ord] -low=low[ord] -if(LP){ -rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -up=lplot(x,up,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -low=lplot(x,low,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -} -if(scat){ -plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type='n') -points(x,y) -lines(x,up,lty=2) -lines(x,low,lty=2) -} -if(!scat)plot(c(x,x),c(y,rmd),type='n',ylab=ylab,xlab=xlab) -points(x,rmd,type='n') -sx<-sort(x) -xorder<-order(x) -sysm<-rmd[xorder] -lines(sx,sysm) -lines(x,up,lty=2) -lines(x,low,lty=2) -} -if(pyhat)output<-rmd -if(!pyhat)output<-'Done' -list(output=output) -} - -Danctspb<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,SCAT=TRUE, -outfun=outpro,BLO=FALSE,nboot=500,SEED=TRUE,xlab='X',ylab='Y',pr=TRUE,eout=FALSE,...){ -# -# Compare the regression lines of two dependent groups at specified design points using -# the robust regression estimator indicated by the argument -# regfun. Default is modified Theil--Sen estimator -# -# Comparisons are done at specified design points -# This is a robust Johnson-Neyman method for dependent groups. -# -# For OLS, use Dancols -# Assume data are in x1 y1 x2 and y2 -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -# Uses bootstrap samples based on resamples of the points followed by a regression fit. -# In contrast, Dancts uses bootstrap estimate of the se of Yhat followed by a pivotal test -# statistic. -# -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(SEED)set.seed(2) -FLAG=pts -X=elimna(cbind(x1,y1,x2,y2)) -if(ncol(X)>4)stop('Only one covariate is allowed') -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -if(xout){ -flag1=outfun(x1)$out.id -flag2=outfun(x2)$out.id -flag=unique(c(flag1,flag2)) -if(length(flag)>0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -flagF=identical(regfun,tsreg) -if(identical(regfun,tshdreg))flagF=FALSE -if(flagF){ -if(pr){ -if(sum(duplicated(y1)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') -pr=FALSE -} -if(pr){ -if(sum(duplicated(y2)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') -}} -if(is.null(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=x1[isub] -} -for(i in 1:length(pts)){ -n1<-1 -n2<-1 -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),7) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value')) -mat[,1]=pts -n=length(y1) -x1=as.matrix(x1) -x2=as.matrix(x2) -data<-matrix(sample(length(y1),size=n*nboot,replace=TRUE),nrow=nboot) -est1=apply(data,1,Danctspb.sub,x1,y1,xr=pts,regfun=regfun,xout=FALSE,...) -est2=apply(data,1,Danctspb.sub,x2,y2,xr=pts,regfun=regfun,xout=FALSE,...) -mat[,2]=regYhat(x1,y1,xr=pts,regfun=regfun,...) -mat[,3]=regYhat(x2,y2,xr=pts,regfun=regfun,...) -est=est1-est2 -if(!is.matrix(est))est=matrix(est,nrow=1) -mat[,4]=mat[,2]-mat[,3] -pv1=apply(est<0,1,mean,na.rm=TRUE) -pv2=apply(est==0,1,mean,na.rm=TRUE) -pv=pv1+.5*pv2 -pv1m=1-pv -pv=2*apply(cbind(pv,pv1m),1,min) -mat[,7]=pv -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -estsort=apply(est,1,sort) -mat[,5]=estsort[ilow,] -mat[,6]=estsort[ihi,] -if(plotit){ -if(eout && xout)stop('Cannot have both eout and xout = F') -if(eout){ -flag<-outfun(cbind(x1,y1),plotit=FALSE,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(cbind(x2,y2),plotit=FALSE,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) -if(SCAT)points(x1,y1,pch='o') -if(SCAT)points(x2,y2,pch='+') -abline(regfun(x1,y1)$coef) -abline(regfun(x2,y2)$coef,lty=2) -} -list(output=mat) -} - -Danctspb.sub<-function(data,x,y,xr,regfun,...){ -x=as.matrix(x) -yhat=regYhat(x[data,],y[data],xr=xr,regfun=regfun,...) -yhat -} - -DanctspbMC<-function(x1,y1,x2,y2,pts=NULL,regfun=tshdreg,fr1=1,fr2=1,alpha=.05,SCAT=TRUE, -plotit=TRUE,xout=FALSE,outfun=outpro,nboot=500,SEED=TRUE,xlab='X',ylab='Y',WARN=FALSE,pr=TRUE,eout=FALSE,...){ -# -# Compare the regression lines of two dependent groups at specified design points using -# the robust regression estimator indicated by the argument -# regfun. Default is modified Theil--Sen estimator -# -# Similar to Dancts, which uses a bootstrap estimate of se of Y hat -# Here, do bootstrap based on bootstrap samples from the data -# as done for example by regci -# -# Comparisons are done at specified design points -# This is a robust Johnson-Neyman method for dependent groups. -# -# For OLS, use Dancols -# Assume data are in x1 y1 x2 and y2 -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(SEED)set.seed(2) -FLAG=pts -X=elimna(cbind(x1,y1,x2,y2)) -if(ncol(X)>4)stop('Only one covariate is allowed') -x1=as.matrix(x1) -x2=as.matrix(x2) -p=ncol(x1) -p1=p+1 -p2=p+2 -p3=p1+p -p4=p3+1 -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -if(xout){ -flag1=outfun(x1)$out.id -flag2=outfun(x2)$out.id -flag=unique(c(flag1,flag2)) -if(length(flag)>0)X=X[-flag,] -x1=X[,1:p] -y1=X[,p1] -x2=X[,p2:p3] -y2=X[,p4] -} -flagF=identical(regfun,tsreg) -if(identical(regfun,tshdreg))flagF=FALSE -if(flagF){ -if(pr){ -if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -pr=FALSE -} -if(pr){ -if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') -}} -if(is.null(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=x1[isub] -} -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),7) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value')) -mat[,1]=pts -n=length(y1) -x1=as.matrix(x1) -x2=as.matrix(x2) -data<-matrix(sample(length(y1),size=n*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -library(parallel) -est1=mclapply(data,Danctspb.sub,x1,y1,xr=pts,regfun=regfun,xout=FALSE,...) -est2=mclapply(data,Danctspb.sub,x2,y2,xr=pts,regfun=regfun,xout=FALSE,...) -est1=matl(est1) -est2=matl(est2) -mat[,2]=regYhat(x1,y1,xr=pts,regfun=regfun,...) -mat[,3]=regYhat(x2,y2,xr=pts,regfun=regfun,...) -est=est1-est2 -if(!is.matrix(est))est=matrix(est,nrow=1) -mat[,4]=mat[,2]-mat[,3] -pv1=apply(est<0,1,mean,na.rm=TRUE) -pv2=apply(est==0,1,mean,na.rm=TRUE) -pv=pv1+.5*pv2 -pv1m=1-pv -pv=2*apply(cbind(pv,pv1m),1,min) -mat[,7]=pv -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -estsort=apply(est,1,sort) -mat[,5]=estsort[ilow,] -mat[,6]=estsort[ihi,] -if(plotit){ -if(eout && xout)stop('Cannot have both eout and xout = F') -if(eout){ -flag<-outfun(cbind(x1,y1),plotit=FALSE,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(cbind(x2,y2),plotit=FALSE,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) -if(SCAT)points(x1,y1,pch='o') -if(SCAT)points(x2,y2,pch='+') -abline(regfun(x1,y1)$coef) -abline(regfun(x2,y2)$coef,lty=2) -} -list(output=mat) -} - -Danctspb.sub<-function(data,x,y,xr,regfun,...){ -x=as.matrix(x) -yhat=regYhat(x[data,],y[data],xr=xr,regfun=regfun,...) -yhat -} - -anctspb<-function(x1,y1,x2,y2,pts=NULL,regfun=tshdreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=outpro,nboot=500,SEED=TRUE,xlab='X',ylab='Y',...){ -# -# Compare the regression lines of two independent groups -# at specified design points using a robust regression estimator. -# -# Like ancts but uses -# a percentile bootstrap method is used. -# This might help when there are tied values among the dependent variable. -# -# Assume data are in x1 y1 x2 and y2 -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -if(SEED)set.seed(2) -FLAG=pts -xy=elimna(cbind(x1,y1)) -if(ncol(xy)>2)stop('Only one covariate is allowed') -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -if(ncol(xy)>2)stop('Only one covariate is allowed') -x2=xy[,1] -y2=xy[,2] -if(is.null(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,length(pts),9) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) -pts=x1[isub] -} -mat<-matrix(NA,length(pts),7) -dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value')) -mat[,1]<-pts -bvec1=matrix(NA,nrow=nboot,ncol=length(pts)) -bvec2=matrix(NA,nrow=nboot,ncol=length(pts)) -x1=as.matrix(x1) -x2=as.matrix(x2) -p1=ncol(x1)+1 -data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec1[ib,]=regYsub(x1[data[ib,],],y1[data[ib,]],pts,p1=p1,regfun=regfun,...) -} -data<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec2[ib,]=regYsub(x2[data[ib,],],y2[data[ib,]],pts,p1=p1,regfun=regfun,...) -} -dif=bvec10)stop('Duplicate ids detected in m1') -if(sum(duplicated(m2))>0)stop('Duplicate ids detected in m2') -for(i in 1:nrow(m1)){ -flag=duplicated(c(m1[i,id.col1],m2[,id.col2])) -if(sum(flag)==0){ -ic1=ic1+1 -idnm1[ic1]=i -} -if(sum(flag>0)){ -if(is.data.frame(m1)){ -if(!is.null(dim(M1)))M1=rbind(M1,as.data.frame(m1[i,])) -if(is.null(dim(M1)))M1=as.data.frame(m1[i,]) -} -if(!is.data.frame(m1)){ -if(!is.null(dim(M1)))M1=rbind(M1,m1[i,]) -if(is.null(dim(M1)))M1=matrix(m1[i,],nrow=1) -} -}} -M2=NULL -for(i in 1:nrow(m2)){ -flag=duplicated(c(m2[i,id.col2],m1[,id.col1])) -if(sum(flag)==0){ -ic2=ic2+1 -idnm2[ic2]=i -} -if(sum(flag>0)){ -if(is.data.frame(m2)){ -if(!is.null(dim(M2)))M2=rbind(M2,as.data.frame(m2[i,])) -if(is.null(dim(M2)))M2=as.data.frame(m2[i,]) -} -if(!is.data.frame(m2)){ -if(!is.null(dim(M2)))M2=rbind(M2,m2[i,]) -if(is.null(dim(M2)))M2=matrix(m2[i,],nrow=1) -} -}} -m=cbind(M1[,id.col1],M1[,-id.col1],M2[,-id.col2]) -nc1=ncol(m2)-1 -m1u=NULL -if(!is.null(idnm1))m1u=m1[idnm1,] -m2u=NULL -if(!is.null(idnm2))m2u=m2[idnm2,] -list(m=m,idnm1=idnm1,idnm2=idnm2,m1.no=m1u,m2.no=m2u) -} - -regcits<-function(x,y,regfun=tshdreg,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE, -xout=FALSE,outfun=outpro,plotit=FALSE,xlab='Predictor 1',ylab='Predictor 2', -MC=TRUE,...){ -if(MC)v=regciMC(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=pr,xout=xout, -outfun=outfun,plotit=plotit,xlab=xlab,ylab=ylab,...) -if(!MC)v=regci(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=pr,xout=xout, -outfun=outfun,plotit=plotit,xlab=xlab,ylab=ylab,...) -v -} - -qhdsm<-function(x,y,qval=0.5,q=NULL,pr=FALSE, -xout=FALSE,outfun=outpro,plotit=TRUE,xlab='X',ylab='Y',zlab='Z',pyhat=FALSE,fr=NULL,LP=FALSE,theta=50,phi=25,ticktype='simple',nmin=0,scale=TRUE,pr.qhd=TRUE,pch='.',...){ -# -# Compute the quantile regression line for one or more quantiles -# using combination of hd, running interval smoother and LOESS -# That is, determine the qth (qval) quantile of Y given X using the -# -# plotit=TRUE will plot the lines. WIth p=1 predictor, multiple lines can be plotted. -# Example: qhdsm(x,y,q=c(.25,.5,.75)) will plot the regression lines for -# predicting quartiles. - # -if(!is.null(q))qval=q -x<-as.matrix(x) -X<-cbind(x,y) -X<-elimna(X) -np<-ncol(X) -p<-np-1 -if(p>1 & length(q)>1)print('Only first quantile specified can be plotted') -x<-X[,1:p] -x<-as.matrix(x) -y<-X[,np] -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(p==1){ -if(is.null(fr))fr=.8 -ord=order(x) -x=sort(x) -y=y[ord] -est=matrix(NA,ncol=3,nrow=length(qval)) -dimnames(est)=list(NULL,c('q','Inter','Slope')) -#x<-as.matrix(x) -qest=matrix(NA,ncol=length(qval),nrow=length(y)) -for(j in 1:length(qval)){ -rmd=NA -for(i in 1:length(x))rmd[i]<-hd(y[near(x,x[i],fr)],q=qval[j]) -if(LP)rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -qest[,j]=rmd -} -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab,pch=pch) -for(j in 1:ncol(qest))lines(x,qest[,j]) -} -if(!pyhat)qest='DONE' -} -if(p>1){ -if(is.null(fr))fr=1 -if(p==2){ -if(pr.qhd){ -if(!scale)print('scale=F is specified. If there is dependence, might want to use scale=TRUE') -}} -qest=rplot(x,y,est=hd,q=qval[1],fr=fr,plotit=plotit,pyhat=pyhat,theta=theta, -phi=phi,scale=scale,SEED=FALSE,varfun=pbvar,xlab=xlab,ylab=ylab,zlab=zlab, -ticktype=ticktype,nmin=nmin,pr=pr) -if(!pyhat)qest='DONE' -if(pyhat)qest=qest$yhat -} -qest -} -skip.cov<-function(x,cop = 6, MM = FALSE, op = 1, mgv.op = 0, outpro.cop = 3, - STAND = FALSE){ -ans=skipcov(x,cop=cop,MM=MM,op=op,mgv.op=mgv.op,outpro.cop=outpro.cop,STAND=STAND) -list(cov=ans) -} - -skipSPR<-function(x,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,pr=FALSE){ -v=skip(x,pr=pr,STAND=TRUE,cop=cop,op=op,mgv.op=mgv.op,outpro.cop=outpro.cop) -v -} -rmdzeroG<-function(x,est=skipSPR,grp=NA,nboot=500,SEED=TRUE,...){ -# -# Do ANOVA on dependent groups -# using # depth of zero among bootstrap values -# based on difference scores. -# -# Like rmdzero, only designed for multivariate estimators such as -# computed by the R functions, skip and dmean for example. -# -# The data are assumed to be stored in x in list mode -# or in a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, columns correspond to groups. -# -# grp is used to specify some subset of the groups, if desired. -# By default, all J groups are used. -# -# The default number of bootstrap samples is nboot=500 -# -if(!is.list(x) && !is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -if(is.list(x)){ -# put the data in an n by J matrix -mat<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))mat[,j]<-x[[j]] -} -if(is.matrix(x))mat<-x -if(!is.na(grp[1])){ -mat<-mat[,grp] -} -FLAG=FALSE -if(ncol(mat)<3)FLAG=TRUE -#if(ncol(mat)<3)stop('This function is for three or more measures of location') -mat<-elimna(mat) # Remove rows with missing values. -J<-ncol(mat) -jp<-0 -Jall<-(J^2-J)/2 -dif<-matrix(NA,nrow=nrow(mat),ncol=Jall) -ic<-0 -for(j in 1:J){ -for(k in 1:J){ -if(j.5)stop('tr must be between 0 and .5') -res=yuend(x=x,y=y,tr=tr,alpha=alpha) -# -#if(tr==0)term=1 -#if(tr>0)term=sqrt(area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr) -#epow=(res$dif-null.value)*term/sqrt(winvar(x-y,tr=tr,na.rm=TRUE)) -epow=yuenv2(x,y,tr=tr)$Effect.Size -list(ci=res$ci,p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,se=res$se, -teststat=res$teststat,n=res$n,df=res$df,Effect.Size=epow) -} -qhdsm2g<-function(x1,y1,x2,y2,q=.5,qval=NULL,LP=TRUE,fr=.8,xlab='X',ylab='Y',xout=FALSE,outfun=outpro,...){ -# -# Plot of quantile smoother for two groups using qhdsm -# -# fr controls amount of smoothing -# Missing values are automatically removed. -# -if(!is.null(qval))q=qval -m1<-elimna(cbind(x1,y1)) -if(ncol(m1)>3)stop('One covariate only is allowed') -m2<-elimna(cbind(x2,y2)) -x1<-m1[,1] -y1<-m1[,2] -x2<-m2[,1] -y2<-m2[,2] -if(xout){ -flag<-outfun(m1[,1],plotit=FALSE,...)$keep -m1<-m1[flag,] -x1<-m1[,1] -y1<-m1[,2] -flag<-outfun(m2[,1],plotit=FALSE,...)$keep -m2<-m2[flag,] -x2<-m2[,1] -y2<-m2[,2] -} -flag=order(x1) -x1=x1[flag] -y1=y1[flag] -flag=order(x2) -x2=x2[flag] -y2=y2[flag] -rmd1=NA -rmd2=NA -for(i in 1:length(x1))rmd1[i]<-hd(y1[near(x1,x1[i],fr)],q=q) -for(i in 1:length(x2))rmd2[i]<-hd(y2[near(x2,x2[i],fr)],q=q) -if(LP){ -rmd1=lplot(x1,rmd1,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -rmd2=lplot(x2,rmd2,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -} -plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) -points(x1,y1) -points(x2,y2,pch='+') -lines(x1,rmd1) -lines(x2,rmd2,lty=2) -} - - - -ancGLOB_sub3<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100, -nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,xlab='X',ylab='Y',LP=TRUE,...){ -# -# -if(SEED)set.seed(2) -x1<-as.matrix(x1) -p1<-ncol(x1)+1 -p<-ncol(x1) -if(p>1)stop('Current version is for one independent variable only') -xy<-cbind(x1,y1) -xy<-elimna(xy) -x1<-xy[,1:p] -y1<-xy[,p1] -xy<-cbind(x2,y2) -xy<-elimna(xy) -x2<-xy[,1:p] -y2<-xy[,p1] -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -N1=length(y1) -N2=length(y2) -if(is.null(pts[1])){ -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=nmin]) -isub[5]<-max(sub[vecn>=nmin]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=x1[isub] -g1=list() -g2=list() -for (i in 1:5){ -g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)] -g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)] -}} -if(!is.null(pts[1])){ -if(length(pts)<2)stop('Should have at least two points (With one point, use the R function ancova)') -g1=list() -g2=list() -for (i in 1:length(pts)){ -g1[[i]]<-y1[near(x1,pts[i],fr1)] -g2[[i]]<-y2[near(x2,pts[i],fr2)] -} -} -n1=lapply(g1,length) -nv=(min(as.vector(matl(n1)))) -res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,...) -if(plotit)runmean2g(x1,y1,x2,y2,nboot=nboot,fr=fr1,est=est,xout=xout,LP=LP,...) -list(p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2) -} - - - - - - - - -ancGLOB_sub4<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100, -nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,xlab='X',ylab='Y',LP=TRUE,...){ -# -# -if(SEED)set.seed(2) -x1<-as.matrix(x1) -p1<-ncol(x1)+1 -p<-ncol(x1) -if(p>1)stop('Current version is for one independent variable only') -xy<-cbind(x1,y1) -xy<-elimna(xy) -x1<-xy[,1:p] -y1<-xy[,p1] -xy<-cbind(x2,y2) -xy<-elimna(xy) -x2<-xy[,1:p] -y2<-xy[,p1] -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -N1=length(y1) -N2=length(y2) -if(is.null(pts[1])){ -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=nmin]) -isub[5]<-max(sub[vecn>=nmin]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=x1[isub] -g1=list() -g2=list() -for (i in 1:5){ -g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)] -g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)] -}} -if(!is.null(pts[1])){ -if(length(pts)<2)stop('Should have at least two points (use the R function ancova)') -g1=list() -g2=list() -for (i in 1:length(pts)){ -g1[[i]]<-y1[near(x1,pts[i],fr1)] -g2[[i]]<-y2[near(x2,pts[i],fr2)] -}} -n1=lapply(g1,length) -nv=(min(as.vector(matl(n1)))) -res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,...) -if(plotit)runmean2g(x1,y1,x2,y2,nboot=nboot,fr=fr1,est=est,xout=xout,LP=LP,...) -list(p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2) -} - - - - - - - - -ancGLOB_sub5<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100, -nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,xlab='X',ylab='Y',LP=TRUE,...){ -# -# -if(is.null(pts))stop('pts should be specified') -if(SEED)set.seed(2) -x1<-as.matrix(x1) -p1<-ncol(x1)+1 -p<-ncol(x1) -if(p>1)stop('Current version is for one independent variable only') -xy<-cbind(x1,y1) -xy<-elimna(xy) -x1<-xy[,1:p] -y1<-xy[,p1] -xy<-cbind(x2,y2) -xy<-elimna(xy) -x2<-xy[,1:p] -y2<-xy[,p1] -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -N1=length(y1) -N2=length(y2) -if(length(pts)<2)stop('Should have at least two points (With one point, use the R function ancova)') -g1=list() -g2=list() -for (i in 1:length(pts)){ -g1[[i]]<-y1[near(x1,pts[i],fr1)] -g2[[i]]<-y2[near(x2,pts[i],fr2)] -} -n1=lapply(g1,length) -nv=(min(as.vector(matl(n1)))) -res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,nmin=nmin,...)$p.value -res -} - - -ancGLOB_pv<-function(n1,n2,est=tmean,fr1=.8,fr2=.8,nboot=500,SEED=TRUE,iter=1000,nmin=12,MC=TRUE,alpha=.05,PRM=FALSE,pts=NULL,...){ -# -# Determine critical p-value when using the function ancGLOB -# Strategy: generage data from a normal distribution, NULL true -# compute p-value, repeat -# iter times (iter=100 is default) -# (a larger choice for iter is recommended. To reduce execution time use ancGLOB_pv_C -# -# returns: -# p.crit, the critical p-value for the specified alpha value -# if PRM=T, all p-values that were computed. -# ef.iter, the actual number of iterations, which might differ from iter -# due to sample sizes where it makes no sense to compute a p-value -# based on the generated data. -# -if(SEED)set.seed(45) -bvec=list() -np1=min(c(n1,n2))+1 -nmax=max(c(n1,n2)) -for(i in 1:iter){ -bvec[[i]]=rmul(nmax,p=4) -if(n1!=n2)bvec[[i]][np1:nmax,1:2]=NA -} -if(MC){ -library(parallel) -prm=mclapply(bvec,ancGLOB_sub2,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...) -} -if(!MC)prm=lapply(bvec,ancGLOB_sub2,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...) -prm=elimna(as.vector(matl(prm))) -ef.iter=length(prm) -p.crit=hd(prm,alpha) -prm=sort(elimna(prm)) -if(!PRM)prm=NULL -list(p.crit=p.crit,prm=prm,ef.iter=ef.iter) -} - -ancGLOB_sub2<-function(bvec,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nmin=12,nboot=nboot,pts=pts,...){ -p=ancGLOB_sub3(bvec[,1],bvec[,2],bvec[,3],bvec[,4],est=est,SEED=SEED,fr1=fr1,fr2=fr2,nboot=nboot, -plotit=FALSE,nmin=12,pts=pts,...)$p.value -p -} - - -ancGLOB_pv_pts<-function(x1,x2,est=tmean,fr1=1,fr2=1,nboot=500,SEED=TRUE,iter=1000,nmin=12,MC=TRUE,alpha=.05,PRM=FALSE,pts=NULL,...){ -# -# Determine critical p-value when using the function ancGLOB and pts is specified. -# Strategy: generage data from a normal distribution, NULL true -# compute p-value, repeat -# iter times (iter=1000 is default) -# -# pts is used to indicate the covariate values where comparisons are to be made. -# Example: pts=c(1,4,6) will compare regression lines at X=1, 4 and 6 -# if pts is not specified, the function terminates with an error. -# -# -# returns: -# p.crit, the critical p-value for the specified alpha value -# if PRM=T, all p-values that were computed. -# ef.iter, the actual number of interations, which might differ from iter -# due to sample sizes where it makes no sense to compute a p-value -# based on the generated data. -# -# Like ancGLOB_pv, only pts is specified and use data in x1 and x2 -# -if(is.null(pts[1]))stop('pts is null, use ancGLOB_pv') -x1=elimna(x1) -x2=elimna(x2) -n1=length(x1) -n2=length(x2) - -if(SEED)set.seed(45) -bvec=list() -np1=min(c(n1,n2))+1 -nmax=max(c(n1,n2)) -for(i in 1:iter){ -bvec[[i]]=rmul(nmax,p=4) -if(n1!=n2)bvec[[i]][np1:nmax,1:2]=NA -bvec[[i]][1:n1,1]=x1 -bvec[[i]][1:n2,3]=x2 -} -prm=NA -if(MC){ -library(parallel) -prm=mclapply(bvec,ancGLOB_sub4,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...) -} -#if(!MC)prm=lapply(bvec,ancGLOB_sub4,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...) -if(!MC){ -for(ij in 1:length(bvec)){ -bv=as.matrix(bvec[[ij]]) -prm[ij]=ancGLOB_sub4(bv,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,nmin=nmin,...) -}} -prm=elimna(as.vector(matl(prm))) -ef.iter=length(prm) -p.crit=hd(prm,alpha) -prm=sort(elimna(prm)) -if(!PRM)prm=NULL -list(p.crit=p.crit,prm=prm,ef.iter=ef.iter) -} - -ancGLOB_sub4<-function(bvec,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nmin=12,nboot=nboot,pts=pts,...){ -p=ancGLOB_sub5(bvec[,1],bvec[,2],bvec[,3],bvec[,4],est=est,SEED=SEED,fr1=fr1,fr2=fr2,nboot=nboot,nmin=12,pts=pts,...) -p -} - - - -q2by2<-function(x,q = c(0.1, 0.25, 0.5, 0.75, 0.9), nboot = 2000,SEED=TRUE){ -# -# For a 2 by 2 ANOVA, independent groups, test main effects -# and interaction for all quantiles indicated by argument q -# -if(SEED)set.seed(2) -if(is.matrix(x))x<-listm(x) -if(length(x)!=4)stop('Current version is for a 2-by-2 ANOVA only. Should have four groups.') -A=matrix(NA,nrow=length(q),6) -B=matrix(NA,nrow=length(q),6) -AB=matrix(NA,nrow=length(q),6) -dimnames(A)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper','p.hoch')) -dimnames(B)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper','p.hoch')) -dimnames(AB)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper','p.hoch')) -con=con2way(2,2) - -for(i in 1:length(q)){ -A[i,1]=q[i] -B[i,1]=q[i] -AB[i,1]=q[i] -a=linconpb(x,nboot=nboot,est=hd,con=con$conA,SEED=FALSE,q=q[i]) -b=linconpb(x,nboot=nboot,est=hd,con=con$conB,SEED=FALSE,q=q[i]) -ab=linconpb(x,nboot=nboot,est=hd,con=con$conAB,SEED=FALSE,q=q[i]) -A[i,2:5]=a$output[,c(2,3,5,6)] -B[i,2:5]=b$output[,c(2,3,5,6)] -AB[i,2:5]=ab$output[,c(2,3,5,6)] -} -A[,6]=p.adjust(A[,3],method='hoch') -B[,6]=p.adjust(B[,3],method='hoch') -AB[,6]=p.adjust(AB[,3],method='hoch') -list(A=A,B=B,AB=AB) -} -bd1GLOB<-function(x,est=spatcen,nboot=599,alpha=.05,SEED=TRUE,MC=FALSE,...){ -# -# Test the hypothesis of equal measures of location for J -# dependent groups using a -# percentile bootstrap method. -# -# Same as bd1way, only designed for estimators that take into account the -# overall structure of the data when dealing with outliers -# -# By default, use spatial median estimator -# est=dmean.cen will use the Donoho-Gasko trimmed mean. -# -# argument est is location estimator that returns value in $center -# (So, for example, est=dmean will not run.) -# -# Data are assumed to be stored in list mode or an n by J matrix. -# misran=F means missing values do not occur at random, case wise deletion is used. -# -#if(MC){ -#if(identical(est,dmean_C))stop('Using dmean_C with MC=T can cause R to crash. Use MC=F') -#library(parallel) -#} -# Last 3 commands cause an error unless WRScpp is available. -if(!is.list(x) && !is.matrix(x))stop('Data must be store in list mode or in an n by J matrix.') -if(is.list(x)){ -m<-matrix(0,length(x[[1]]),length(x)) -for (j in 1:length(x))m[,j]<-x[[j]] -} -if(is.matrix(x))m<-x -xcen<-m -locval=est(m,...)$center -locval=as.vector(locval) -for (j in 1:ncol(m))xcen[,j]<-m[,j]-locval[j] -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(nrow(m),size=nrow(m)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(MC)bvec<-mclapply(data,bd1GLOB1,xcen=xcen,est=est,...) -if(!MC)bvec<-lapply(data,bd1GLOB1,xcen=xcen,est=est,...) -bvec=as.vector(matl(bvec)) -# A vector of nboot test statistics. -icrit<-floor((1-alpha)*nboot+.5) -test<-(length(locval)-1)*var(locval) -pv=mean((test2){ -zvec<-rep(0,Jall) -m1<-rbind(dif,zvec) -bplus<-nboot+1 -cmat=var(dif) -dv<-pdisMC(m1,center=cdif) -bplus<-nboot+1 -sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot -} -list(p.value=sig.level,center=center) -} - - -Dancovapb<-function(x1,y1,x2=x1,y2,fr1=1,fr2=1,est=hd,alpha=.05,nboot=500,pr=TRUE,SEED=TRUE, -plotit=TRUE, pts=NA,sm=FALSE,xout=FALSE,outfun=out,DIF=FALSE,na.rm=TRUE,...){ -# -# Compare two dependent groups using the ancova method -# (a method similar to the one used by the R function ancova). -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# percentile bootstrap method is used. -# -# est indicates estimator to be used; Harrell-Davis median estimator is default. -# -# Assume data are in x1 y1 x2 and y2 -# -# sm=T will create smooths using bootstrap bagging. -# pts can be used to specify the design points where the regression lines -# are to be compared. -# pts=NA means five points will be picked empirically. -# -# -if(SEED)set.seed(2) -if(DIF & !na.rm){ -if(pr)stop('With na.rm=TRUE, must have DIF=FALSE') -} -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') -xy=elimna(cbind(x1,y1,x2,y2)) -x1=xy[,1] -y1=xy[,2] -x2=xy[,3] -y2=xy[,4] -if(is.na(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,7) -dimnames(mat)<-list(NULL,c('X','n','DIF','ci.low','ci.hi','p.value','p.adjust')) -for (i in 1:5){ -t1=near(x1,x1[isub[i]],fr1) -t2=near(x2,x1[isub[i]],fr2) -pick=as.logical(t1*t2) -if(!na.rm)test=rmmismcp(y1[pick],y2[pick],est=est,,nboot=nboot,alpha=alpha,pr=FALSE, -plotit=FALSE,SEED=FALSE,...) -if(na.rm){ -test=rmmcppb(y1[pick],y2[pick],est=est,dif=DIF,nboot=nboot,plotit=FALSE,alpha=alpha, -pr=FALSE,SEED=SEED,...) -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(y1[pick]) -mat[i,3]<-test$output[,2] -mat[i,4]<-test$output[,5] -mat[i,5]<-test$output[,6] -mat[i,6]<-test$output[,3] -} -if(!na.rm){ -test=rmmismcp(y1[pick],y2[pick],est=est,nboot=nboot,alpha=alpha,pr=FALSE, -plotit=FALSE,SEED=SEED,...) -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(y1[pick]) -mat[i,3]<-test$output[,3] -mat[i,4]<-test$output[,6] -mat[i,5]<-test$output[,7] -mat[i,6]<-test$output[,4] -} -} -temp2<-order(0-mat[,6]) -bot=c(1:nrow(mat)) -dvec=sort(alpha/bot,decreasing=TRUE) -#mat[temp2,7]=dvec -mat[,7]=p.adjust(mat[,6],method='hoch') -} -if(!is.na(pts[1])){ -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -# First check sample size -# -flage=rep(TRUE,length(pts)) -for (i in 1:length(pts)){ -t1<-near(x1,pts[i],fr1) -t2<-near(x2,pts[i],fr2) -pick=as.logical(t1*t2) -if(sum(pick)<=5){print(paste('Warning: there are',sum(pick),' points corresponding to the design point X=',pts[i])) -flage[i]=FALSE -}} -pts=pts[flage] -mat<-matrix(NA,length(pts),7) -dimnames(mat)<-list(NULL,c('X','n','DIF','ci.low','ci.hi', -'p.value','p.crit')) -for (i in 1:length(pts)){ -t1<-near(x1,pts[i],fr1) -t2<-near(x2,pts[i],fr2) -pick=as.logical(t1*t2) -#print(y1[pick]) -test=rmmcppb(y1[pick],y2[pick],est=est,dif=DIF,plotit=FALSE,alpha=alpha,pr=FALSE,SEED=FALSE,...) -mat[i,3]<-test$output[,2] -mat[i,1]<-pts[i] -mat[i,2]<-length(y1[pick]) -mat[i,4]<-test$output[,5] -mat[i,5]<-test$output[,6] -mat[i,6]<-test$output[,3] -} -#temp2<-order(0-mat[,6]) -mat[,7]=p.adjust(mat[,6],method='hoch') -bot=c(1:nrow(mat)) -dvec=sort(alpha/bot,decreasing=TRUE) -#mat[temp2,7]=dvec -} -if(plotit){ -runmean2g(x1,y1,x2,y2,fr=fr1,est=est,sm=sm,xout=xout,outfun=outfun,,...) -} -list(output=mat) -} -ancdifplot<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,pr=TRUE,xout=FALSE,outfun=out,LP=TRUE, -nmin=8,scat=TRUE,xlab='X',ylab='Y',report=FALSE,...){ -# -# Compare two independent groups using the ancova method -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# -# nmin indicates minimun number of values close to a point -# -# Similar to ancova, only compute a confidence band for the difference and plot it. -# -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(xout){ -flago<-outfun(x1,...)$keep -x1<-x1[flago] -y1<-y1[flago] -flag<-outfun(x2,...)$keep -x2<-x2[flago] -y2<-y2[flago] -} -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -flag=vecn>=nmin -ptsum=sum(flag) -est=NA -low=NA -up=NA -ic=0 -xp1=NA -xp2=NA -pv=NA -for (i in 1:length(x1)){ -if(flag[i]){ -g1<-y1[near(x1,x1[i],fr1)] -g2<-y2[near(x2,x2[i],fr2)] -test<-yuen(g1,g2,tr=tr) -ic=ic+1 -xp1[ic]=x1[i] -xp2[ic]=x2[i] -est[ic]=test$dif -low[ic]=test$ci[1] -up[ic]=test$ci[2] -pv[ic]=test$p.value -}} -#print(length(pv)) -#print(length(xp1)) -if(LP){ -xy=elimna(cbind(xp1,est,low,up,pv)) -est=lplot(xy[,1],xy[,2],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -up=lplot(xy[,1],xy[,4],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -low=lplot(xy[,1],xy[,3],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -} -if(!report)output='DONE' -plot(c(x1,x2),c(y1,y2),xlab=xlab,ylab=ylab,type='n') -if(!LP){ -lines(xp1,up,lty=2) -lines(xp1,low,lty=2) -lines(xp1,est) -if(scat)points(c(x1,x2),c(y1,y2)) -if(report){ -output=cbind(xp1,est,low,up,pv) -dimnames(output)=list(NULL,c(xlab,'est.dif','lower.ci','upper.ci','p-value')) -}} -if(LP){ -lines(xy[,1],up,lty=2) -lines(xy[,1],low,lty=2) -lines(xy[,1],est) - if(scat)points(c(x1,x2),c(y1,y2)) -if(report){ -output=cbind(xy[,1],est,low,up,xy[,5]) -dimnames(output)=list(NULL,c(xlab,'est.dif','lower.ci','upper.ci','p-value')) -} -} -output -} -ancGLOB<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,p.crit=NULL,nreps=500, -alpha=.05,pr=TRUE,nboot=500,SEED=TRUE,MC=FALSE,CR=FALSE, nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,SCAT=TRUE,pch1='+',pch2='o', -xlab='X',ylab='Y',LP=TRUE,cpp=FALSE,...){ -# -# Like the function ancova, only performs a global test that the measures of location -# are equal among all the covariate values that are chosen. -# -# pts = NULL, the function picks five covariate values. -# iter=500 means that when the critical p-value is determined, simulations with 500 -# replications are used to determine the critical p-value. -# -# Reject if the p-value is less than the critical p-value. -# Works well with alpha=.05. Uncertain about alpha <.05. -# -# cpp=TRUE, a C++ function is used to determine the critical p-value -# assuming the library WRScpp has been installed. This is done as follows: -# install.packages('devtools') -# library("devtools") -# install_github( "WRScpp", "mrxiaohe") -# -# CR=TRUE: If number of points is two or three, plot 1-alpha confidence region -# -# -if(CR)plotit=FALSE # Can't plot both regression lines and confidence region -if(SEED)set.seed(2) -iter=nreps -pts.flag=is.null(pts) -if(!is.null(pts))cpp=FALSE -x1<-as.matrix(x1) -p1<-ncol(x1)+1 -p<-ncol(x1) -if(p>1)stop('Current version is for one independent variable only') -xy<-cbind(x1,y1) -xy<-elimna(xy) -x1<-xy[,1:p] -y1<-xy[,p1] -xy<-cbind(x2,y2) -xy<-elimna(xy) -x2<-xy[,1:p] -y2<-xy[,p1] -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -N1=length(y1) -N2=length(y2) -if(is.null(pts[1])){ -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=nmin]) -isub[5]<-max(sub[vecn>=nmin]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=x1[isub] -g1=list() -g2=list() -for (i in 1:5){ -g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)] -g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)] -}} -if(!is.null(pts[1])){ -if(length(pts)<2)stop('Should have at least two points (use the R function ancova)') -g1=list() -g2=list() -for (i in 1:length(pts)){ -g1[[i]]<-y1[near(x1,pts[i],fr1)] -g2[[i]]<-y2[near(x2,pts[i],fr2)] -} -} -p.alpha=NULL -if(is.null(p.crit)){ -if(pts.flag){ -if(cpp){ -library(WRScpp) -ve=ancGLOB_pv_C(N1,N2,est=est,iter=iter,fr1=fr1,fr2=fr2,nboot=nboot,SEED=SEED,...) -v=hd(ve,q=alpha) -} -else{ - v=ancGLOB_pv(N1,N2,est=est,iter=iter,fr1=fr1,fr2=fr2,nboot=nboot, -PRM=FALSE,SEED=SEED,alpha=alpha,xlab=xlab,ylab=ylab,...)$p.crit -} -} -if(!pts.flag)v=ancGLOB_pv_pts(x1,x2,pts=pts,nmin=nmin,iter=iter,est=est,fr1=fr1,fr2=fr2, -nboot=nboot,SEED=SEED,alpha=alpha,MC=MC)$p.crit -} -if(!is.null(p.crit))v=p.crit -res=aov2depth(g1,g2,est=est,SEED=SEED,CR=CR,alpha=v,...) -if(pr)print('Reject if p.test is less than p.crit') -if(plotit)runmean2g(x1,y1,x2,y2,fr=fr1,est=est,xout=FALSE,LP=LP,xlab=xlab,ylab=ylab, -SCAT=SCAT,pch1=pch1,pch2=pch2,...) -list(p.test=res$p.value,p.crit=v,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2) -} - - -aov2depth<-function(x1,x2,est=tmean,nboot=500,SEED=TRUE,nmin=12,CR=FALSE, -xlab=' DIF 1',ylab='DIF 2',zlab='DIF 3',alpha=.05,...){ -# -# 2 by K ANOVA independent group (K levels not necessarily independent and -# not completely dependent -# -# Main effect Factor A only -# -# Strategy: Use depth of zero based on estimated -# differences for each column of the K levels of Factor B -# That is, testing no main effects for Factor A in -# a manner that takes into account the pattern of the -# measures of location rather then simply averaging -# across columns. -# -# x1 can be a matrix with K columns corrspoding to groups, ditto for x2 -# Or x1 and x2 can have list mode. -# Assuming x1 and x2 contain data for indepedendent groups. -# -if(is.matrix(x1)||is.data.frame(x1))x1=listm(x1) -if(is.matrix(x2)||is.data.frame(x2))x2=listm(x2) -J=length(x1) -if(J!=length(x2))stop('x1 and x2 should have same number of groups') -if(SEED)set.seed(2) -for(j in 1:J){ -x1[[j]]=na.omit(x1[[j]]) -x2[[j]]=na.omit(x2[[j]]) -} -n1=mapply(x1,FUN=length) -n2=mapply(x2,FUN=length) -bplus=nboot+1 -bvec1=matrix(NA,nrow=nboot,ncol=J) -bvec2=matrix(NA,nrow=nboot,ncol=J) -for(j in 1:J){ -data1=matrix(sample(x1[[j]],size=n1[j]*nboot,replace=TRUE),nrow=nboot) -data2=matrix(sample(x2[[j]],size=n2[j]*nboot,replace=TRUE),nrow=nboot) -bvec1[,j]=apply(data1,1,est,...) -bvec2[,j]=apply(data2,1,est,...) -} -difb=bvec1-bvec2 -est1=mapply(x1,FUN=est,...) -est2=mapply(x2,FUN=est,...) -dif=est1-est2 -m1=var(difb) -nullvec=rep(0,J) -difz=rbind(difb,nullvec) -dis=mahalanobis(difz,dif,m1) -sig=sum(dis[bplus]<=dis)/bplus -if(CR){ -dis2<-order(dis[1:nboot]) -dis<-sort(dis) -critn<-floor((1-alpha)*nboot) -if(J==2){ -plot(difb[,1],difb[,2],xlab=xlab,ylab=ylab) -points(0,0,pch=0) -xx<-difb[dis2[1:critn],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -if(J==3){ -scatterplot3d(difb[dis2[1:critn],],xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=TRUE) -} - -} -list(p.value=sig,est1=est1,est2=est2,dif=dif,n1=n1,n2=n2) -} - -ancovaWMW<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,sm=FALSE,est=tmean, -plotit=TRUE,pts=NA,xout=FALSE,outfun=out,LP=TRUE,...){ -# -# Compare two independent groups using the ancova method in conjunction -# with Cliff's improvement on the Wilcoxon-Mann-Whitney test. -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# -# OLD version: sm=TRUE will use bootstrap bagging when plotting the regression lines -# The plot is based on measure of location indicated by the argument -# est. Default is the Harrell-Davis estimate of the median. Not working, took this out. -# -# LP=TRUE: use running interval smoother followed by LOESS -# -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -dv.sum=NULL -if(is.na(pts[1])){ -npt<-5 -CC=5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,8) -dimnames(mat)<-list(NULL,c('X','n1','n2','p.hat','ci.low','ci.hi','p.value','p.crit')) -for (i in 1:5){ -g1<-y1[near(x1,x1[isub[i]],fr1)] -g2<-y2[near(x2,x1[isub[i]],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -test<-cidv2(g1,g2,alpha=alpha) -dv.sum=rbind(dv.sum,test$summary.dvals) -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(g1) -mat[i,3]<-length(g2) -mat[i,4]<-test$p.hat -mat[i,5]<-test$p.ci[1] -mat[i,6]<-test$p.ci[2] -mat[i,7]<-test$p.value -}} -if(!is.na(pts[1])){ -CC=length(pts) -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),8) -dimnames(mat)<-list(NULL,c('X','n1','n2','p.hat','ci.low','ci.hi','p.value','p.crit')) -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -test=cidv2(g1,g2,alpha=alpha) -dv.sum=rbind(dv.sum,test$summary.dvals) -mat[i,1]<-pts[i] -mat[i,2]<-length(g1) -mat[i,3]<-length(g2) -if(length(g1)<=5)print(paste('Warning, there are',length(g1),' points corresponding to the design point X=',pts[i])) -if(length(g2)<=5)print(paste('Warning, there are',length(g2),' points corresponding to the design point X=',pts[i])) -mat[i,4]<-test$p.hat -mat[i,5]<-test$p.ci[1] -mat[i,6]<-test$p.ci[2] -mat[i,7]<-test$p.value -}} -dvec<-alpha/c(1:CC) -temp2<-order(0-mat[,6]) -mat[temp2,8]=dvec -if(plotit){ -runmean2g(x1,y1,x2,y2,fr=fr1,est=est,sm=sm,xout=FALSE,LP=LP,...) -} -list(output=mat,summary=dv.sum) -} - -ghtrim<-function(tr=.2,g=0,h=0){ -# -# Compute trimmed mean of a g-and-h distribution. -# -# -if(g==0)val=0 -if(g>0){ -low=qnorm(tr) -up=-1*low -val=integrate(ftrim,low,up,tr=tr,g=g,h=h)$value -val=val/(1-2*tr) -} -val -} - -ftrim<-function(z,tr,g,h){ -gz=(exp(g*z)-1)*exp(h*z^2/2)/g -res=dnorm(z)*gz -res -} - -DancovaV2<-function(x1=NULL,y1=NULL,x2=NULL,y2=NULL,xy=NULL,fr1=1,fr2=1, -est=tmean,alpha=.05,plotit=TRUE,xlab='X',ylab='Y',qvals=c(.25,.5,.75),sm=FALSE, -xout=FALSE,eout=FALSE,outfun=out,DIF=FALSE,LP=TRUE,method='hochberg', -nboot=500,SEED=TRUE,nreps=2000,MC=TRUE,cpp=FALSE, -SCAT=TRUE,pch1='*',pch2='+', -nmin=12,q=.5,...){ -# -# Compare two dependent groups using the ancova method. -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Like Dancova, only bootstrap samples are obtained by resampling -# from c(x1,y1,x2,y2) rather than conditioning on the x value as done by Dancova. -# This function tends to have more power than Dancova. -# -# One covariate only is allowed. -# -# method='hochberg -# By default, family wise error rate is controlled by Hochberg's methoe - -# To get critical p-value, need the following commands to get access to the software. -# library(`devtools') -# install_github( `WRScpp', `mrxiaohe') - -# Assume data are in xy having four columns: x1, y1, x2 and y2. -# -# Or can have the -# data stored in four separate variables: -# x1 y1 x2 and y2 -# -# x1 y1 are measures at time 1 -# x2 y2 are measures at time 2 -# -# LP=T, when plotting, running interval smoother is smoothed again using lplot. -# sm=T will create smooths using bootstrap bagging. -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -# q=.5 means when est=hd (Harrell-Davis estimator), median is estimated. -# -# eout=TRUE will eliminate all outliers when plotting. -# -if(SEED)set.seed(2) -iter=nreps -if(!is.null(x1[1])){ -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') -xy=cbind(x1,y1,x2,y2) -} -n=nrow(elimna(xy)) -if(plotit){ -ef=identical(est,hd) -if(!ef)runmean2g(xy[,1],xy[,2],xy[,3],xy[,4],fr=fr1,est=est,sm=sm,xout=xout,LP=LP,eout=eout, -xlab=xlab,ylab=ylab,SCAT=SCAT,pch1=pch1,pch2=pch2,...) -if(ef)runmean2g(xy[,1],xy[,2],xy[,3],xy[,4],fr=fr1,est=hd,sm=sm,xout=xout,LP=LP,q=q,eout=eout, -xlab=xlab,ylab=ylab,SCAT=SCAT,pch1=pch1,pch2=pch2,...) -} - -#eliminate this code and use Hochberg instead -#if(is.null(p.crit)){ -#if(cpp)library(WRScpp) -#p.crit=DancGLOB_pv(n,fr1=fr1,fr2=fr2,nboot=nboot,est=est,SEED=SEED,iter=iter, -#nmin=nmin,MC=MC,alpha=alpha,qvals=qvals,cpp=cpp)$p.crit -# -#} - -pts=NULL -#if(is.null(pts)){ -for(i in 1:length(qvals))pts=c(pts,qest(xy[,1],qvals[i])) -#} -if(SEED)set.seed(2) -ef=identical(est,hd) -n=nrow(xy) -est1=NA -est2=NA -J=length(pts) -est1=matrix(NA,nrow=nboot,ncol=J) -est2=matrix(NA,nrow=nboot,ncol=J) -# -data=matrix(sample(n,size=n*nboot,replace=TRUE),ncol=nboot,nrow=n) -if(!MC){ -if(!ef){ -est1=apply(data,2,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) -est2=apply(data,2,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...) -} -if(ef){ -est1=apply(data,2,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...) -est2=apply(data,2,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...) -} -est1=t(as.matrix(est1)) -est2=t(as.matrix(est2)) -} -if(MC){ -library(parallel) -data=listm(data) -if(!ef){ -est1=mclapply(data,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) -est2=mclapply(data,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...) -} -if(ef){ -est1=mclapply(data,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...) -est2=mclapply(data,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...) -} -est1=t(matl(est1)) -est2=t(matl(est2)) -} -pv=NA -for(j in 1:J){ -pv[j]=mean(est1[,j]1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -xy1=elimna(cbind(x1,y1)) -xy2=elimna=cbind(x2,y2) -n1=nrow(xy1) -n2=nrow(xy2) -if(plotit){ -ef=identical(est,hd) -if(!ef)runmean2g(xy1[,1],xy1[,2],xy2[,1],xy2[,2],fr=fr1,est=est,sm=sm,xout=xout,LP=LP,eout=eout, -xlab=xlab,ylab=ylab,SCAT=SCAT,pch1=pch1,pch2=pch2,...) -if(ef)runmean2g(xy1[,1],xy1[,2],xy2[,1],xy2[,2],fr=fr1,est=hd,sm=sm,xout=xout,LP=LP,q=q,eout=eout, -xlab=xlab,ylab=ylab,SCAT=SCAT,pch1=pch1,pch2=pch2,...) -} -if(is.null(p.crit)){ -if(FAST){ -if(alpha==.05){ -nm=max(c(n1,n2)) -if(nm<=800){ -nv=c(50,60,80,100,200,300,500,800) -if(qpts){ -pv=c(.02709,.0283,.0306,.02842,.02779,.02410,.02683,.01868,.02122) -p.crit=lplot.pred(1/nv,pv,1/n1)$yhat -} -if(!qpts){ -pv=c(.020831,.017812,.015796,.014773,.012589,.015664,.011803,.012479) -p.crit=lplot.pred(1/nv,pv,1/n1)$yhat -}} -}}} -if(is.null(p.crit)){ -p.crit=ancovaV2.pv(n1,n2,nreps=nreps,MC=MC,qpts=qpts,est=est,qvals=qvals,SEED=SEED, -alpha=alpha,nboot=nboot)$p.crit -} -pts=NULL -if(qpts)for(i in 1:length(qvals))pts=c(pts,qest(xy1[,1],qvals[i])) -if(!qpts)pts=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE)$output[,1] -if(SEED)set.seed(2) -ef=identical(est,hd) -est1=NA -est2=NA -J=length(pts) -est1=matrix(NA,nrow=nboot,ncol=J) -est2=matrix(NA,nrow=nboot,ncol=J) -# -data1=matrix(sample(n1,size=n1*nboot,replace=TRUE),ncol=nboot,nrow=n1) -data2=matrix(sample(n2,size=n2*nboot,replace=TRUE),ncol=nboot,nrow=n2) -if(!MC){ -if(!ef){ -est1=apply(data1,2,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) -est2=apply(data2,2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=est,fr=fr2,nmin=nmin,...) -} -if(ef){ -est1=apply(data1,2,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...) -est2=apply(data2,2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...) -} -est1=t(as.matrix(est1)) -est2=t(as.matrix(est2)) -} -if(MC){ -library(parallel) -data1=listm(data1) -data2=listm(data2) -if(!ef){ -est1=mclapply(data1,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) -est2=mclapply(data2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=est,fr=fr2,nmin=nmin,...) -} -if(ef){ -est1=mclapply(data1,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...) -est2=mclapply(data2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...) -} -est1=t(matl(est1)) -est2=t(matl(est2)) -} -pv=NULL -if(J==1){ -est1=t(as.matrix(est1)) -est2=t(as.matrix(est2)) -} -for(j in 1:J){ -pv[j]=mean(est1[,j]=crit -out.id=z[flag] -n.out=sum(flag) -nums=c(1:length(x)) -keep=nums[!flag] -} -if(ncol(x)>1)stop('Use function out with outfun=wmean.cov') -list(n=length(x),n.out=n.out,out.value=x[flag],out.id=nums[flag],keep=keep) -} -wmean.cov<-function(x,tr=0){ -# -# Compute Winsoriced mean and covariance for data in x -# -loc=apply(x,2,mean,tr=tr) -cv=wincov(x,tr=tr) -list(center=loc,cov=cv) -} -rngh<-function(n,rho=0,p=2,g=0,h=0,ADJ=TRUE,pr=TRUE){ -# -# Generate data from a multivariate distribution where the marginal distributions -# are g-and-h distributions that have common correlation rho. -# Strategy: adjust the correlation when generating data from multivariate normal normal distribution so that -# when transforming the marginal distributions to a g-and-h distribution, the correlation is rho. -# -# -library(MASS) -if(ADJ){ -adjrho=rngh.sub(n,g,h,rho)$rho.adjusted -rho=adjrho -if(pr)print(paste('Adjusted rho',rho)) -} -cmat<-matrix(rho,p,p) -diag(cmat)<-1 -x=mvrnorm(n=n, mu=rep(0,p), Sigma=cmat) -for(i in 1:p){ -if (g>0){ -x[,i]<-(exp(g*x[,i])-1)*exp(h*x[,i]^2/2)/g -} -if(g==0)x[,i]<-x[,i]*exp(h*x[,i]^2/2) -} -x -} - - -rngh.sub<-function(n,g,h,rho,ntest=1000000){ -# -# Determine adjusted value for rho so that -# the actual correlation is some desired value -# -# rho: desired correlation -vals=seq(rho,.99,.01) -for(i in 1:length(vals)){ -adj=vals[i] -cmat<-matrix(vals[i],2,2) -diag(cmat)<-1 -x=mvrnorm(ntest,mu=c(0,0),Sigma=cmat) -for(i in 1:2){ -if (g>0){ -x[,i]<-(exp(g*x[,i])-1)*exp(h*x[,i]^2/2)/g -} -if(g==0)x[,i]<-x[,i]*exp(h*x[,i]^2/2) -} -chk=cor(x) -if(abs(chk[1,2]-rho)<.01)break -if(chk[1,2]>=rho)break -} -list(rho.adjusted=adj,rho.actual=chk[1,2]) -} -rplot.res<-function(x,y,pv=1,est = tmean, scat = TRUE, fr = NA, plotit = TRUE, -pyhat = FALSE, efr = 0.5, theta = 50, phi = 25, scale = TRUE, -expand = 0.5, SEED = TRUE, varfun = pbvar, outfun = outpro,STAND=TRUE, -nmin = 0, xout = FALSE, out = FALSE, eout = FALSE, xlab='X', -ylab ='Y',zscale=FALSE,zlab=' ', pr=TRUE,duplicate='error', -ticktype='simple',LP=TRUE,...){ -# -# Apply rplot excluding the independent variable indicated by the argument -# pv. -# So pv=1 means will exclude the first predictor. -# Fit a smooth using the remaing variables, compute the residuals, then plot -# the smooth using the residuals as the dependent variable and -# the variables indicated by pv as the independent variables. -# -xy=na.omit(cbind(x,y)) -p=ncol(x) -p1=p+1 -if(xout){ -flag=outfun(xy[,1:p],plotit=FALSE,STAND=STAND,...)$keep -xy=xy[flag,] -} -x=xy[,1:p] -y=xy[,p1] -res=y-rplot(x[,1:2],y,est=est,scat=scat,varfun=varfun,expand=expand,nmin=nmin, -pyhat=TRUE,plotit=FALSE,fr=fr,xout=FALSE)$yhat -outp=rplot(x[,pv],res,fr=fr,xout=FALSE,efr=efr,theta=theta,phi=phi, -scale=scale,SEED=SEED,xlab=xlab,ylab=ylab,zlab=zlab,pr=FALSE, -ticktype=ticktype,LP=LP,...) -outp -} - -trimcimul<-function(x, tr = 0.2, alpha = 0.05,null.value=0){ -# -# For J dependent random variables, apply trimci to each -# FWE controlled with Rom-Hochberg method -# -# x is a matrix having J columns. (Can have list mode as well.) -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J<-ncol(x) -xbar<-vector('numeric',J) -ncon<-J -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -psihat<-matrix(0,J,6) -dimnames(psihat)<-list(NULL,c('Variable','estimate','ci.lower','ci.upper','adj.ci.lower','adj.ci.upper')) -test<-matrix(0,J,5) -dimnames(test)<-list(NULL,c('Variable','test','p.value','p.crit','se')) -temp1<-NA -nval=NULL -for (d in 1:J){ -psihat[d,1]<-d -dval=na.omit(x[,d]) -nval[d]=length(dval) -temp=trimci(dval,tr=tr,pr=FALSE,null.value=null.value) -test[d,1]<-d -test[d,2]<-temp$test.stat -test[d,3]=temp$p.value -test[d,5]<-temp$se -psihat[d,2]<-temp$estimate -psihat[d,3]<-temp$ci[1] -psihat[d,4]<-temp$ci[2] -} -temp1=test[,3] -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -test[temp2,4]<-zvec -num.sig=sum(test[,3]<=test[,4]) -# compute adjusted confidence intervals having simultaneous probability coverage 1-alpha -for(d in 1:J){ -dval=na.omit(x[,d]) -psihat[d,5:6]=trimci(dval,tr=tr,alpha=test[d,4],pr=FALSE,null.value=null.value)$ci -} -list(n=nval,test=test,psihat=psihat,num.sig=num.sig) -} - -g5plot<-function(x1,x2,x3=NULL,x4=NULL,x5=NULL,fr=.8,aval=.5,xlab='X',ylab='',color=rep('black',5),main=NULL,sub=NULL){ -# -# plot estimates of the density functions for up to 5 groups. -# using an adaptive kernel density estimator -# -if(is.matrix(x1)||is.data.frame(x1))x1=listm(x1) -if(is.list(x1)){ -x=x1 -J=length(x) -ic=0 -for(j in 1:J){ -ic=ic+1 -if(ic==1)x1=x[[1]] -if(ic==2)x2=x[[2]] -if(ic==3)x3=x[[3]] -if(ic==4)x4=x[[4]] -if(ic==5)x5=x[[5]] -} -} -x1<-elimna(x1) -x2<-elimna(x2) -x1<-sort(x1) -x2<-sort(x2) -if(!is.null(x3))x3<-sort(x3) -if(!is.null(x4))x4<-sort(x4) -if(!is.null(x5))x5<-sort(x5) -z3=NULL -z4=NULL -z5=NULL -z1<-akerd(x1,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) -z2<-akerd(x2,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) -if(!is.null(x3))z3=akerd(x3,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) -if(!is.null(x4))z4=akerd(x4,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) -if(!is.null(x5))z5=akerd(x5,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) -plot(c(x1,x2,x3,x4,x5),c(z1,z2,z3,z4,z5), xlab =xlab, ylab =ylab, type = 'n',main=main,sub=sub) -lines(x1,z1,col=color[1]) -lines(x2,z2,lty=2,col=color[2]) -if(!is.null(x3))lines(x3,z3,lty=3,col=color[3]) -if(!is.null(x4))lines(x4,z4,lty=4,col=color[4]) -if(!is.null(x5))lines(x5,z5,lty=5,col=color[5]) -} - -MEDanova<-function(x,op=3,nboot=600,MC=FALSE,SEED=TRUE){ -# -# Test global hypothesis that J independent groups -# have equal medians. -# Performs well when there are tied values. -# -# Basically, use pbadepth in conjunction with the Harrell--Davis -# estimator. -# -output=pbadepth(x,est=hd,allp=TRUE,SEED=SEED,op=op,nboot=nboot,MC=MC) -output -} - -Qmcp<-function(x,q=.5,con=0,SEED=TRUE,THD=FALSE,nboot=NA,alpha=.05,HOCH=FALSE){ -# -# Multiple comparisons among independent groups -# based on the quantile indicated by the argument -# q -# -# THD=TRUE would use the trimmed Harrell--Davis estimator -# The default is the Harrell--Davis estimator -# Familywise error is controlled with the Hochberg's method - -# -# The Harrell--Davis estimator is used in order to deal with tied values -# -est=hd -if(THD)est=thd -res=linconpb(x,est=est,q=q,nboot=nboot,SEED=SEED,con=con,method='hoch') -res -} - - -Qinterplot<-function(x,q=.5){ -# -# Plot interactions based on quantiles estimated via the -# Harrell--Davis estimator -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(length(x)!=4)stop('Should have a 2 by 2 design for a total of four groups') -qv=lapply(x,hd,q=q) -qv=as.vector(matl(qv)) -interplot(2,2,locvec=qv,xlab='Fac 1',ylab=paste(q,'Quantile'),trace.label='Fac 2') -} - - -lplot.pred<-function(x,y,pts=NULL,xout=FALSE,outfun=outpro,span=2/3,family='gaussian',...){ -# -# Using loess, compute predicted values based on the data in pts -# -x<-as.matrix(x) -d=ncol(x) -dp1=d+1 -m<-elimna(cbind(x,y)) -n.orig=nrow(m) -n.keep=n.orig -if(xout){ -flag<-outfun(m[,1:d],plotit=FALSE,...)$keep -m<-m[flag,] -n.keep=nrow(m) -} -x<-m[,1:d] -y<-m[,dp1] -if(is.null(pts))pts=x -fit=loess(y~x,span=span,family=family) -pred=predict(fit,pts) -list(n=n.orig,n.keep=n.keep,x.used=x,yhat=pred) -} - -regse<-function(x,y,xout=FALSE,regfun=tsreg,outfun=outpro,nboot=200,SEED=TRUE,...){ -# -# Estimate the standard errors and -# covariance matrix associated with the estimates of -# the regression parameters based on the estimator indicated by the -# argument -# regfun: default is Theil--Sen. -# So the diagonal elements of the matrix returned by this function -# are the squared standard errors of the intercept estimator, etc. -# -# Function returns -# param.estimates: the estimate of the intercept and slopes -# covar: the covariance matrix associated with the estimator used -# s.e.: the standard errors. -# - -if(SEED)set.seed(2) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -nrem=length(y) -estit=regfun(x,y,xout=xout,...)$coef -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) -#Leverage points already removed. -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -sqe=var(t(bvec)) -list(param.estimates=estit,covar=sqe,s.e.=sqrt(diag(sqe))) -} -smRstr<-function(x,y,fr=1,est=tmean,nmin=1,varfun=winvar,xout=FALSE,outfun=outpro,...){ -# -# Estimate explanatory strength of an association (a generlization of -# Pearson's correlation) based on a running interval smoother -# and a leave-one-out cross-validation technique. -# Prediction error is estimated as well. -# -# Arguments: -# est: the measure of location to be used by rplot -# varfun: the measure of variation used when estimating prediction error -# Example: varfun=pbvar would compute the percentage bend measure of -# of varition between observed and predicted values of the -# dependent variable. -# fr: the span used by rplot -# -# Function returns -# str: strength of the association -# pred.error: prediction error -# -xy=elimna(cbind(x,y)) -if(xout){ -flag=outfun(x,plotit=FALSE)$keep -xy=xy[flag,] -} -p=ncol(xy) -pm1=p-1 -x=xy[,1:pm1] -y=xy[,p] -x=as.matrix(x) -px=ncol(x) -px1=px+1 -n=nrow(xy) -val=NA -for(i in 1:n){ -if(px==1)val[i]=runhat(xy[-i,1:px],xy[-i,px1],x[i,1:px],fr=fr,nmin=nmin,est=est) -if(px>1)val[i]=rung3hat(xy[-i,1:px],xy[-i,px1],pts=t(as.matrix(x[i,1:px])),fr=fr,est=est,...)$rmd -} -dif=y-val -dif=elimna(dif) -pe=varfun(dif) -nopre=locCV(y,varfun=varfun,locfun=est,...)# no predictor -rat=(nopre-pe)/nopre -str=0 -if(rat>0)str=sqrt(rat) -list(str=str,pred.error=pe) -} - - -clnorm<-function(n,epsilon=.1,k=10){ -# -# generate n observations from a contaminated lognormal -# distribution -# -# Using default values, median is approximately 1.14 and 20% trimmed mean is 1.33 -if(epsilon>1)stop('epsilon must be less than or equal to 1') -if(epsilon<0)stop('epsilon must be greater than or equal to 0') -if(k<=0)stop('k must be greater than 0') -val<-rlnorm(n) -uval<-runif(n) -flag<-(uval<=1-epsilon) -val[!flag]<-k*val[!flag] -val -} -twoKlin<-function(x=NULL,x1=NULL,x2=NULL,tr=.2,alpha=.05,pr=TRUE,opt=1){ -# -# A step-down MCP based on K independent tests. -# It is essential that the tests are independent. -# -# Use Fisher method based on p-values coupled with Hochberg -# -# Data are assumed to be stored in two R variables, x1 and x2 or in one -# R variable, x -# -# If stored in x1 and x2, they are assumed to be matrices with K columns -# or to have list mode, both having length K. -# -# If the data are stored in x, -# x is assumed to have 2K columns if a matrix or length 2K if it has list mode. -# -# If data are stored in x1 and x2, for each column, compute a p-value. -# That is, perform a test based on the data in column 1 of x1 and x2, -# followed by a test using the data in column 2 of x1 and x2, etc. -# -# If data are stored in x, the first test is based -# on the data in columns 1 and K+1, -# the second test is based on columns 2 and K+2, etc. -# -# opt=1 Fisher's method -# opt=2 Chen-Nadarajah method -# opt=3 Max method -# -if(is.null(x[1])){ -if(is.matrix(x1))x=cbind(x1,x2) -if(is.list(x1))x=c(x1,x2) -} -if(is.matrix(x))x=listm(x) -crit=NA -n1=NA -n2=NA -if(is.matrix(x) || is.data.frame(x))K2=ncol(x) -if(is.list(x))K2=length(x) -K=floor(K2/2) -if(2*K!=K2)stop('Total number of groups, K2, should be an even number') -ic=0 -ic2=K -pv=NULL -for(i in 1:K){ -ic=ic+1 -ic2=ic2+1 -testit=yuen(x[[ic]],x[[ic2]],tr=tr,alpha=alpha) -n1[ic]=testit$n1 -n2[ic]=testit$n2 -pv[ic]=testit$p.value -} -pick=NULL -v=order(pv) -ic=0 -for(i in K:1){ -K2=2*K -flag=TRUE -if(opt==1){ -i2=i*2 -if(i==K)res=(0-2)*sum(log(pv)) # Fisher test statistic -if(ialpha)flag=TRUE -if(pvF<=alpha/(K+1-i)){ -ic=ic+1 -pick=c(pick,v[ic]) -flag=FALSE -if(pv[v[ic]]>alpha)flag=TRUE -} -if(flag)break -} -Decision=rep('Not Sig',length(pv)) -if(!is.null(pick))Decision[pick]='Reject' -nsig=sum(length(pick)) -list(n1=n1,n2=n2,p.values=pv, -Decisions=as.matrix(Decision),num.sig=nsig) -} - - - - -twobicipv<-function(r1=sum(x),n1=length(x),r2=sum(y),n2=length(y),x=NA,y=NA,alpha=.05){ -# -# Compute a p-value based on Beal's method for comparing two independent -# binomials. -# -alph=seq(.001,.999,.001) -for(i in 1:length(alph)){ -pv=alph[i] -chk=twobici(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=y,alpha=alph[i])$ci #$ -if(chk[1]>0 && chk[2]>0)break -if(chk[1]<0 && chk[2]<0)break -} -reg=twobici(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=y,alpha=alpha) -list(p.value=pv,ci=reg$ci,p1=reg$p1,p2=reg$p2) -} - -ols2ci<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=05, -method='hoch',SO=TRUE,HC3=FALSE,plotit=TRUE,xlab='X',ylab='Y',...){ -# -# Compare the OLS regression parameters for two independent groups -# SO=TRUE means p-values adjusted only for the slopes. SO=FALSE -#. include the intercept when adjusting -# -p=ncol(as.matrix(x1)) -p1=p+1 -if(p==1 && plotit)reg2plot(x1,y1,x2,y2,xlab='X',ylab='Y',xout=xout,outfun=outpro,regfun=ols,...) -m1=elimna(cbind(x1,y1)) -m2=elimna(cbind(x2,y2)) -x1=m1[,1:p] -y1=m1[,p1] -x2=m2[,1:p] -y2=m2[,p1] -x=list() -y=list() -x[[1]]=x1 -x[[2]]=x2 -y[[1]]=y1 -y[[2]]=y2 -ivl=c(1:ncol(as.matrix(x1))) -iv=ncol(as.matrix(x1)) -iv1=iv+1 -rlab=paste('slope',ivl) -rlab=c('intercept',rlab) -res=olsWmcp(x,y,xout=xout,outfun=outfun,STAND=STAND,alpha=alpha,HC3=HC3) -outp=matrix(NA,nrow=nrow(res$output),ncol=7) -dimnames(outp)=list(rlab,c('Est.1','Est.2','Dif','ci.low','ci.up','p.value','adj.p.value')) -print(res) -outp[,1]=ols(x1,y1,xout=xout,outfun=outfun)$coef -outp[,2]=ols(x2,y2,xout=xout,outfun=outfun)$coef -outp[,3]=outp[,1]-outp[,2] -outp[,4]=res$output[,3] -outp[,5]=res$output[,4] -outp[,6]=res$output[,5] -if(!SO)outp[,7]=p.adjust(outp[,6],method=method) -else outp[2:p1,7]=p.adjust(outp[2:p1,6],method=method) -list(n=res$n,output=outp) -} - - -ancovampG<-function(x1,y1,x2,y2,fr1=1,fr2=1, tr=.2, -alpha=.05, pts=NULL,SEED=TRUE,test=yuen,DH=FALSE,FRAC=.5,cov.fun=skip.cov,ZLIM=TRUE, -pr=FALSE,q=.5,plotit=FALSE,LP=FALSE,theta=50,xlab=' X1',ylab='X2 ',SCAT=FALSE,zlab='p.value ',ticktype='detail',...){ -# -# ANCOVA: -# -# This function generalizes the R function ancovamp -# so that any hypothesis testing method -# can be used to compare groups at specified design points. -# -# No parametric assumption is made about the form of -# the regression surface--a running interval smoother is used. -# Design points are chosen based on depth of points in x1 if pts=NULL -# Assume data are in x1 y1 x2 and y2, can have more than one covariate -# -# test: argument test determines the method that will be used to compare groups. -# two choices: yuen, qcomhd qcomhdMC -# Example: test=qcomhd would compare medians using a percentile bootstrap -# q: controls the quantile used by qcomhd. -# -# pts: a matrix of design points at which groups are compared -# -# DH=TRUE, groups compared at the deepest (1-FRAC) design points. -# if DH=TRUE, there are two covariates and plot=TRUE, plot a smooth with dependent variable=p.values if pv=TRUE -# or the estimated difference in the measures of location if pv=FALSE -# If SCAT=TRUE, instead create a scatterplot of the points used in pts, the covariate values -# and mark the significant ones with * -# -# theta can be use to rotate the plot. -# -# SEED=TRUE sets the seed for the random number generator -# so that same result is always returned when -# using a bootstrap method or when using cov.mve or cov.mcd -# -# cov.fun: returns covariance matrix in $cov (e.g. -# skipcov does not return it in $cov, but skip.cov does. So cov.mve could be used) -# -# Returns: -# designs points where comparisons were made. -# n's used, p-values -# crit.p.value: critical p-value based on Hochberg's method for controlling FWE -# sig=1 if a signficant result based on Hochberg; 0 otherwise -# -t.sel=0 -if(identical(test,yuen))t.sel=1 -if(identical(test,qcomhd))t.sel=2 -if(identical(test,qcomhdMC))t.sel=2 -if(identical(test,binom2g))t.sel=3 -if(t.sel==0)stop('Argument test should be either yuen, qcomhd, qcomhd or binom2g') -x1=as.matrix(x1) -p=ncol(x1) -p1=p+1 -m1=elimna(cbind(x1,y1)) -x1=m1[,1:p] -y1=m1[,p1] -x2=as.matrix(x2) -p=ncol(x2) -p1=p+1 -m2=elimna(cbind(x2,y2)) -x2=m2[,1:p] -y2=m2[,p1] -# -# -# -if(is.null(pts[1])){ -x1<-as.matrix(x1) -pts<-ancdes(x1,DH=DH,FRAC=FRAC) -pts=unique(pts) -} -pts<-as.matrix(pts) -n1<-1 -n2<-1 -vecn<-1 -mval1<-cov.fun(x1) -mval2<-cov.fun(x2) -for(i in 1:nrow(pts)){ -n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)]) -n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)]) -} -flag<-rep(TRUE,nrow(pts)) -for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F -flag=as.logical(flag) -pts<-pts[flag,] -if(sum(flag)==1)pts<-t(as.matrix(pts)) -dd=NULL -if(sum(flag)==0){ -print('No comparable design points found, might increase span.') -pts=NULL -mat=NULL -dd=NULL -} -if(sum(flag)>0){ -mat<-matrix(NA,nrow(pts),6) -mat[,5]=0 -dimnames(mat)<-list(NULL,c('n1','n2','p.value','crit.p.value','Sig','est.dif')) -output=list() -for (i in 1:nrow(pts)){ -g1<-y1[near3d(x1,pts[i,],fr1,mval1)] -g2<-y2[near3d(x2,pts[i,],fr2,mval2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -temp=NULL -if(identical(test,qcomhd))temp=qcomhd(g1,g2,q=q,plotit=FALSE) -if(identical(test,qcomhdMC))temp=qcomhdMC(g1,g2,q=q,plotit=FALSE) -if(identical(test,yuen))temp=yuen(g1,g2,tr=tr) -if(identical(test,binom2g))temp=binom2g(x=g1,y=g2) -if(is.null(temp$p.value))print('Argument test should be yuen, or qcomhd or qcomhdMC') -mat[i,3]=temp$p.value -output[[i]]=temp -mat[i,1]<-length(g1) -mat[i,2]<-length(g2) -if(t.sel==1)mat[i,6]=mean(g1,tr=tr)-mean(g2,tr=tr) -if(t.sel==2)mat[i,6]=hd(g1,q=q)-hd(g2,q=q) -if(t.sel==3)mat[i,6]=mean(g1)-mean(g2) -if(length(g1)<=5)print(paste('Warning, there are',length(g1),' points corresponding to the design point X=',pts[i,])) -if(length(g2)<=5)print(paste('Warning, there are',length(g2),' points corresponding to the design point X=',pts[i,])) -} -npt=nrow(pts) -dvec=alpha/c(1:npt) -temp2<-order(0-mat[,3]) -sigvec<-(mat[temp2,3]>=dvec) -dd=0 -if(sum(sigvec)0)mat[flag,5]=1 -} -if(plotit){ -if(!LP){ -library(scatterplot3d) -scatterplot3d(pts[,1],pts[,2],mat[,3],xlab=xlab, ylab=ylab,zlab='p.value',zlim=c(0,1)) -} -if(LP)lplot(pts,mat[,3],xlab=xlab, ylab=ylab,zlab='p.value',theta=theta,ZLIM=ZLIM,ticktype=ticktype) -} -list(points=pts,results=mat,num.sig=dd) -} - -anclog<-function(x1,y1,x2,y2,fr1=1,fr2=1, -alpha=.05, pts=NULL,SEED=TRUE,DH=FALSE,FRAC=.5,cov.fun=skip.cov, -pr=FALSE,q=.5,plotit=FALSE,pv=FALSE,theta=50,xlab=' ',ylab=' ',SCAT=FALSE,zlab=' ',...){ -res=ancovampG(x1=x1,y1=y1,x2=x2,y2=y2,fr1=fr1,fr2=fr2, tr=.2, -alpha=alpha, pts=pts,SEED=SEED,test=twobinom,DH=DH,FRAC=FRAC,cov.fun=cov.fun, -pr=pr,q=q,plotit=plotit,pv=pv,theta=theta,xlab=xlab,ylab=ylab,SCAT=SCAT,zlab=zlab,...) -list(points=res$points,results=res$results,num.sig=res$num.sig) -} - -Qregci<-function(x,y,nboot=100,alpha=.05, -qval=.5,q=NULL,SEED=TRUE,pr=TRUE,xout=FALSE,outfun=outpro,...){ -# -# Test the hypothesis that the quantile regression slopes are zero. -# Can use the .5 quantile regression line only, -# the .2 and .8 quantile regression lines, or -# the .2, .5 and .8 quantile regression lines. -# In the latter two cases, FWE is controlled for alpha=.1, .05, .025 and .01. -# -if(!is.null(q))qval=q -xx<-elimna(cbind(x,y)) -np<-ncol(xx) -p<-np-1 -y<-xx[,np] -x<-xx[,1:p] -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -x<-as.matrix(x) -n<-length(y) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print("Taking bootstrap samples. Please wait.") -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -# determine critical value. -crit<-NA -if(alpha==.1)crit<-1.645-1.19/sqrt(n) -if(alpha==.05)crit<-1.96-1.37/sqrt(n) -if(alpha==.025)crit<-2.24-1.18/sqrt(n) -if(alpha==.01)crit<-2.58-1.69/sqrt(n) -crit.fwe<-crit -if(length(qval)==2 || p==2){ -if(alpha==.1)crit.fwe<-1.98-1.13/sqrt(n) -if(alpha==.05)crit.fwe<-2.37-1.56/sqrt(n) -if(alpha==.025)crit.fwe<-2.60-1.04/sqrt(n) -if(alpha==.01)crit.fwe<-3.02-1.35/sqrt(n) -} -if(length(qval)==3 || p==3){ -if(alpha==.1)crit.fwe<-2.145-1.31/sqrt(n) -if(alpha==.05)crit.fwe<-2.49-1.49/sqrt(n) -if(alpha==.025)crit.fwe<-2.86-1.52/sqrt(n) -if(alpha==.01)crit.fwe<-3.42-1.85/sqrt(n) -} -if(is.na(crit.fwe)){ -print("Could not determine a critical value") -print("Only alpha=.1, .05, .025 and .01 are allowed") -} -if(p==1){ -bvec<-apply(data,1,Qindbt.sub,x,y,q=qval) -estsub<-NA -for(i in 1:length(qval)){ -estsub[i]<-Qreg(x,y,q=qval[i])$coef[2] -} -if(is.matrix(bvec))se.val<-sqrt(apply(bvec,1,FUN=var)) -if(!is.matrix(bvec))se.val<-sqrt(var(bvec)) -test<-abs(estsub)/se.val -ci.mat<-matrix(nrow=length(qval),ncol=3) -dimnames(ci.mat)<-list(NULL,c("Quantile","ci.lower","ci.upper")) -ci.mat[,1]<-qval -ci.mat[,2]<-estsub-crit*se.val -ci.mat[,3]<-estsub+crit*se.val -} -if(p>1){ -if(length(qval)>1){ -print("With p>1 predictors,only the first qval value is used") -} -bvec<-apply(data,1,regboot,x,y,regfun=Qreg,qval=qval[1]) -se.val<-sqrt(apply(bvec,1,FUN=var)) -estsub<-Qreg(x,y,q=qval[1])$coef -test<-abs(estsub)/se.val -ci.mat<-matrix(nrow=np,ncol=3) -dimnames(ci.mat)<-list(NULL,c("Predictor","ci.lower","ci.upper")) -ci.mat[,1]<-c(0:p) -ci.mat[,2]<-estsub-crit*se.val -ci.mat[,3]<-estsub+crit*se.val -} -list(n=length(y),test=test,se.val=se.val,crit.val=crit,crit.fwe=crit.fwe,est.values=estsub,ci=ci.mat) -} - - - - -Qindbt.sub<-function(isub,x,y,qval){ -# -# Perform regression using x[isub] to predict y[isub] -# isub is a vector of length n, -# a bootstrap sample from the sequence of integers -# 1, 2, 3, ..., n -# -# This function is used by other functions when computing -# bootstrap estimates. -# -# -# x is assumed to be a matrix containing values of the predictors. -# -xmat<-matrix(x[isub,],nrow(x),ncol(x)) -regboot<-NA -for(i in 1:length(qval)){ -regboot[i]<-Qreg(xmat,y[isub],q=qval[i])$coef[2] -} -regboot -} -binomcipv<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05,nullval=.5){ -# Compute a p-value when testing the hypothesis that the probability of -# success for a binomial distribution is equal to -# nullval, which defaults to .5 -# Pratt's method is used. -# -# y is a vector of 1s and 0s. -# Or can use the argument -# x = the number of successes observed among -# n=nn trials. -# -if(is.logical(y)){ -y=elimna(y) -temp=rep(0,length(y)) -temp[y]=1 -y=temp -} -res=binomci(x=x,nn=nn,y=y,alpha=alpha) -ci=res$ci -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-binomci(x=x,nn=nn,y=y,alpha=alph[i])$ci -if(chkit[1]>nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]2)stop('One or two predictors only is allowed,') -p=ncol(x) -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -if(xout){ -xy=cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag=outfun(x)$keep -x=xy[flag,1:p] -y=xy[flag,p1] -} -if(p==1){ -plot(x,y,xlab=xlab,ylab=ylab) -abline(regfun(x,y,...)$coef) -} -if(p==2){ -pyhat=regYhat(x,y,regfun=regfun,...) -temp=rplot(x,pyhat,scat=FALSE,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,pr=FALSE) -} -} -FisherLSD<-function(x,alpha=.05){ -# -# Perform Fisher's LSD method -# x is assumed to be a matrix, or data frame, or to have list mode -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -n=lapply(x,length) -J=length(x) -g=NULL -X=NULL -for(j in 1:J){ -g=c(g,rep(j,n[j])) -X=c(X,x[[j]]) -} -FT=anova1(x) -res=NULL -if(FT$p.value<=alpha)res=pairwise.t.test(X,g,p.adjust.method='none') -list(ANOVA_F_p.value=FT$p.value,LSD=res) -} -dat2form<-function(x,alpha=.05){ -# -# Perform Fisher's LSD method -# x is assumed to be a matrix, or data frame, or to have list mode -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -n=lapply(x,length) -J=length(x) -g=NULL -X=NULL -for(j in 1:J){ -g=c(g,rep(j,n[j])) -X=c(X,x[[j]]) -} -g=as.factor(g) -list(x=X,g=g) -} -T.HSD<-function(x,alpha=.05,plotit=FALSE){ -# -# Perform Tukey--Kramer MCP -# -z=dat2form(x) -temp=aov(z$x~as.factor(z$g)) -v=TukeyHSD(temp,conf.level=1-alpha) -if(plotit)plot(v) -v -} -Scheffe<-function(x,con=0,alpha=.05,WARN=TRUE){ -# -# Scheffe's MCP -# -# The data are assumed to be stored in $x$ in list mode, a matrix -# or a data frame. If in list mode, -# length(x) is assumed to correspond to the total number of groups. -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# Missing values are automatically removed. -# -# -if(WARN)print('WARNING: Suggest using lincon instead') -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -con<-as.matrix(con) -J<-length(x) -n=NA -xbar<-NA -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -n[j]=length(x[[j]]) -xbar[j]<-mean(x[[j]]) -} -N=sum(n) -df2=N-J -AOV=anova1(x) -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -df1=J-1 -psihat<-matrix(0,CC,6) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', -'p.value')) -test<-matrix(NA,CC,5) -dimnames(test)<-list(NULL,c('Group','Group','test','crit','se')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt((J-1)*AOV$MSWG*(1/n[j]+1/n[k])) -sejk<-sqrt((CC-1)*AOV$MSWG*(1/n[j]+1/n[k])) -test[jcom,5]<-sqrt(AOV$MSWG*(1/n[j]+1/n[k])) -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -psihat[jcom,6]<-1-pf(test[jcom,3]^2,df1,df2) -crit=sqrt(qf(1-alpha,df1,df2)) -test[jcom,4]<-crit -psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk -psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x)){ -stop('The number of groups does not match the number of contrast coefficients.') -} -psihat<-matrix(0,ncol(con),5) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', -'p.value')) -test<-matrix(0,ncol(con),4) -dimnames(test)<-list(NULL,c('con.num','test','crit','se')) -df1<-nrow(con)-1 -df2=N-nrow(con) -crit=sqrt(qf(1-alpha,df1,df2)) -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(df1*AOV$MSWG*sum(con[,d]^2/n)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -test[d,3]<-crit -test[d,4]=sqrt(AOV$MSWG*sum(con[,d]^2/n)) -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -psihat[d,5]<-(1-pf(test[d,2]^2,df1,df2)) -} -} -list(n=n,test=test,psihat=psihat) -} - -sintmcp<-function(x, con=0, alpha=0.05){ -# -# Dependent groups -# Multiple comparisons using medians on difference scores -# -flagcon=F -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -con<-as.matrix(con) -J<-ncol(x) -#xbar<-NULL -x<-elimna(x) # Remove missing values -nval<-nrow(x) -if(sum(con^2!=0))CC<-ncol(con) -if(sum(con^2)==0)CC<-(J^2-J)/2 -ncon<-CC -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(sum(con^2)==0){ -flagcon<-TRUE -psihat<-matrix(0,CC,7) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper','p.value','p.crit')) -temp1<-0 -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -dv<-x[,j]-x[,k] -temp=sintv2(dv,pr=FALSE) -temp1[jcom]<-temp$p.value -psihat[jcom,1]<-j -psihat[jcom,2]<-k -psihat[jcom,3]<-median(dv) -psihat[jcom,4]<-temp$ci.low -psihat[jcom,5]<-temp$ci.up -psihat[jcom,6]<-temp$p.value -}}} -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(psihat[temp2,6]>=zvec) -dd=0 -if(sum(sigvec)0){ -if(nrow(con)!=ncol(x))warning('The number of groups does not match the number - of contrast coefficients.') -ncon<-ncol(con) -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper','p.value','p.crit')) -temp1<-NA -for (d in 1:ncol(con)){ -psihat[d,1]<-d -for(j in 1:J){ -if(j==1)dval<-con[j,d]*x[,j] -if(j>1)dval<-dval+con[j,d]*x[,j] -} -temp=sintv2(dval,pr=FALSE) -temp1[d]=temp$p.value -psihat[d,5]=temp$p.value -psihat[d,2]<-median(dval) -psihat[d,3]<-temp$ci.low -psihat[d,4]<-temp$ci.up -} -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(psihat[temp2,5]>=zvec) -psihat[temp2,6]<-zvec -dd=0 -if(sum(sigvec)0)x[1:N,1:3]=NA -data[[i]]=x -} -if(MC){ -library(parallel) -res=mclapply(data,anc2COV.sub,FRAC=FRAC,TPM=TPM,tau=tau) -} -if(!MC)res=lapply(data,anc2COV.sub,FRAC=FRAC,TPM=TPM,tau=tau) -M=as.vector(list2mat(res)) -M=sort(M) -ic=round(alpha*iter) -crit=M[ic] -list(crit.val=crit,M=M) -} - -anc2COV.sub<-function(data,FRAC=.5,TPM=FALSE,tau=.05){ -val=ancovampG(data[,1:2],data[,3],data[,4:5],data[,6],DH=TRUE,SEED=FALSE,test=yuen,FRAC=FRAC,cov.fun=covl)$results[,3] -val=elimna(val) -if(!TPM)val=mean(val,na.rm=TRUE) -if(TPM)val=sum(log(val[val<=tau])) -val -} -covl<-function(x){ -res=cov(x) -list(cov=res) -} - -twoDcorR_sub<-function(data,x,y,corfun=wincor,...){ -# -# Used by TwoDcorR -# -rv=corfun(x[data,1],y[data],...)$cor -rv[2]=corfun(x[data,2],y[data],...)$cor -rv -} - - -################################################## -# R code written by: # -# # -# Jimmy A. Doi (jdoi@calpoly.edu) # -# Department of Statistics # -# Cal Poly State Univ, San Luis Obispo # -# Web: www.calpoly.edu/~jdoi # -# # -# ............................................ # -# # -# If using please cite: # -# # -# Schilling, M., Doi, J. # -# "A Coverage Probability Approach to Finding # -# an Optimal Binomial Confidence Procedure", # -# The American Statistician, 68, 133-145. # -# # -# ............................................ # -# # -# Shiny app site: jdoi.shinyapps.io/LCO-CI # -# # -# ............................................ # -# # -# Code updated on: 1AUG2014 # -################################################## - - -############################################################################## -# The function LCO.CI() generates the LCO confidence intervals # -# for X = 0, 1, ..., n for a specified n and confidence level. # -# # -# Example: To generate all LCO confidence intervals at n=20, # -# level=.90, and 3rd decimal place accuracy, use # -# # -# > LCO.CI(20,.90,3) # -############################################################################## - - -LCO.CI <- function(n,level,dp) -{ - - # For desired decimal place accuracy of dp, search on grid using (dp+1) - # accuracy then round final results to dp accuracy. - iter <- 10**(dp+1) - - p <- seq(0,.5,1/iter) - - - ############################################################################ - # Create initial cpf with AC[l,u] endpoints by choosing coverage - # probability from highest acceptance curve with minimal span. - - - cpf.matrix <- matrix(NA,ncol=3,nrow=iter+1) - colnames(cpf.matrix)<-c("p","low","upp") - - for (i in 1:(iter/2+1)){ - p <- (i-1)/iter - - bin <- dbinom(0:n,n,p) - x <- 0:n - pmf <- cbind(x,bin) - - # Binomial probabilities ordered in descending sequence - pmf <- pmf[order(-pmf[,2],pmf[,1]),] - pmf <- data.frame(pmf) - - # Select the endpoints (l,u) such that AC[l,u] will - # be at least equal to LEVEL. The cumulative sum of - # the ordered pmf will identify when this occurs. - m.row <- min(which((cumsum(pmf[,2])>=level)==TRUE)) - low.val <-min(pmf[1:m.row,][,1]) - upp.val <-max(pmf[1:m.row,][,1]) - - cpf.matrix[i,] <- c(p,low.val,upp.val) - - # cpf flip only for p != 0.5 - - if (i != iter/2+1){ - n.p <- 1-p - n.low <- n-upp.val - n.upp <- n-low.val - - cpf.matrix[iter+2-i,] <- c(n.p,n.low,n.upp) - } - } - - - ############################################################################ - # LCO Gap Fix - # If the previous step yields any violations in monotonicity in l for - # AC[l,u], this will cause a CI gap. Apply fix as described in Step 2 of - # algorithm as described in paper. - - # For p < 0.5, monotonicity violation in l can be determined by using the - # lagged difference in l. If the lagged difference is -1 a violation has - # occurred. The NEXT lagged difference of +1 identifies the (l,u) pair to - # substitute with. The range of p in violation would be from the lagged - # difference of -1 to the point just before the NEXT lagged difference of - # +1. Substitute the old (l,u) with updated (l,u) pair. Then make required - # (l,u) substitutions for corresponding p > 0.5. - - # Note the initial difference is defined as 99 simply as a place holder. - - diff.l <- c(99,diff(cpf.matrix[,2],differences = 1)) - - if (min(diff.l)==-1){ - - for (i in which(diff.l==-1)){ - j <- min(which(diff.l==1)[which(diff.l==1)>i]) - new.low <- cpf.matrix[j,2] - new.upp <- cpf.matrix[j,3] - cpf.matrix[i:(j-1),2] <- new.low - cpf.matrix[i:(j-1),3] <- new.upp - } - - # cpf flip - pointer.1 <- iter - (j - 1) + 2 - pointer.2 <- iter - i + 2 - - cpf.matrix[pointer.1:pointer.2,2]<- n - new.upp - cpf.matrix[pointer.1:pointer.2,3]<- n - new.low - } - - - ############################################################################ - # LCO CI Generation - - ci.matrix <- matrix(NA,ncol=3,nrow=n+1) - rownames(ci.matrix) <- c(rep("",nrow(ci.matrix))) - colnames(ci.matrix) <- c("x","lower","upper") - - # n%%2 is n mod 2: if n%%2 == 1 then n is odd - # n%/%2 is the integer part of the division: 5/2 = 2.5, so 5%/%2 = 2 - - if (n%%2==1) x.limit <- n%/%2 - if (n%%2==0) x.limit <- n/2 - - for (x in 0:x.limit) - { - num.row <- nrow(cpf.matrix[(cpf.matrix[,2]<=x & x<=cpf.matrix[,3]),]) - - low.lim <- - round(cpf.matrix[(cpf.matrix[,2]<=x & x<=cpf.matrix[,3]),][1,1], - digits=dp) - - upp.lim <- - round(cpf.matrix[(cpf.matrix[,2]<=x & x<=cpf.matrix[,3]),][num.row,1], - digits=dp) - - ci.matrix[x+1,]<-c(x,low.lim,upp.lim) - - # Apply equivariance - n.x <- n-x - n.low.lim <- 1 - upp.lim - n.upp.lim <- 1 - low.lim - - ci.matrix[n.x+1,]<-c(n.x,n.low.lim,n.upp.lim) - } - - - heading <- matrix(NA,ncol=1,nrow=1) - - heading[1,1] <- - paste("LCO Confidence Intervals for n = ",n," and Level = ",level,sep="") - - rownames(heading) <- c("") - colnames(heading) <- c("") - -# print(heading,quote=FALSE) - - # print(ci.matrix) -ci.matrix -} - - -############################################################################## -# The function LCO.CI() generates the LCO confidence intervals # -# for X = 0, 1, ..., n for a specified n and confidence level. # -# # -# Example: To generate all LCO confidence intervals at n=20, # -# level=.90, and 3rd decimal place accuracy, use # -# # -# > LCO.CI(20,.90,3) # -############################################################################## - -binomLCO<-function (x = sum(y), nn = length(y), y = NULL, n = NA, alpha = 0.05){ -# -# Compute a confidence interval for the probability of success using the method in -# -# Schilling, M., Doi, J. (2014) -# A Coverage Probability Approach to Finding -# an Optimal Binomial Confidence Procedure, -# The American Statistician, 68, 133-145. -# -if(!is.null(y)){ -y=elimna(y) -nn=length(y) -} -if(nn==1)stop('Something is wrong: number of observations is only 1') -cis=LCO.CI(nn,1-alpha,3) -ci=cis[x+1,2:3] -list(phat=x/nn,ci=ci,n=nn) -} - -twoDcorR<-function(x,y,corfun=wincor,alpha=.05,nboot=500,SEED=TRUE,MC=FALSE,outfun=outpro,...){ -# -# Comparing two robust dependent correlations: Overlapping case -# Winsorized correlation is used by default. -# -# x is assumed to be a matrix with 2 columns -# -# Compare correlation of x[,1] with y to x[,2] with y -# -# The confidence interval is returned in ci -# The estimates of the correlations are returned in est.rho1 and est.rho2 -# -if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') -if(ncol(x)!=2)stop('Argument x should have two columns') -m1=cbind(x,y) -m1<-elimna(m1) # Eliminate rows with missing values -nval=nrow(m1) -x<-m1[,1:2] -y=m1[,3] -est<-cor2xy(x,y,corfun=corfun,...)$cor -r12=est[1] -r13=est[2] -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -# If you use corfun=scor, set plotit=F -# -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(MC){ -library(parallel) -bvec<-mclapply(data,twoDcorR_sub,x,y,corfun,...) -} -if(!MC)bvec<-lapply(data,twoDcorR_sub,x,y,corfun,...) -mat=matrix(NA,nrow=nboot,ncol=2) -for(i in 1:nboot)mat[i,]=bvec[[i]] -ihi<-floor((1-alpha/2)*nboot+.5) -ilow<-floor((alpha/2)*nboot+.5) -bsort<-sort(mat[,1]-mat[,2]) -ci12<-1 -ci12[1]<-bsort[ilow] -ci12[2]<-bsort[ihi] -pv=mean(bsort<0)+.5*mean(bsort==0) -pv=2*min(c(pv,1-pv)) -list(est.rho1=r12,est.rho2=r13,ci=ci12,p.value=pv) -} - -spearci<-function(x,y,nboot=1000,alpha=.05,SEED=TRUE,MC=FALSE){ -if(!MC)res=corb(x,y,corfun=spear,nboot=nboot,alpha=alpha,SEED=SEED) -if(MC)res=corbMC(x,y,corfun=spear,nboot=nboot,alpha=alpha,SEED=SEED) -res -} -tauci<-function(x,y,nboot=1000,alpha=.05,SEED=TRUE,MC=FALSE){ -if(!MC)res=corb(x,y,corfun=tau,nboot=nboot,alpha=alpha,SEED=SEED) -if(MC)res=corbMC(x,y,corfun=tau,nboot=nboot,alpha=alpha,SEED=SEED) -res -} -tscor<-function(x,y,xout = FALSE, outfun = out, varfun = winvar, -WARN = TRUE, HD = FALSE, ...){ -# -# Correlation coefficient (explanatory measure of association) -# based on the Theil--Sen estimator -# -# To get a p.value, use the R function corb -# -temp=tsreg(x,y,varfun=varfun,xout=xout,outfun=outfun,HD=HD) -val=sign(temp$coef[2])*temp$Strength.Assoc -list(cor=val) -} - - -tscorci<-function(x,y,nboot=599,alpha=.05,SEED=TRUE,MC=FALSE){ -if(!MC)res=corb(x,y,corfun=tscor,nboot=nboot,alpha=alpha,SEED=SEED) -if(MC)res=corbMC(x,y,corfun=tscor,nboot=nboot,alpha=alpha,SEED=SEED) -res -} -wincorci<-function(x,y,nboot=1000,alpha=.05,SEED=TRUE,MC=FALSE,tr=0.2){ -if(!MC)res=corb(x,y,corfun=wincor,nboot=nboot,alpha=alpha,SEED=SEED,tr=tr) -if(MC)res=corbMC(x,y,corfun=wincor,nboot=nboot,alpha=alpha,SEED=SEED,tr=tr) -res -} -twoDNOV<-function(x,y,corfun=wincor,alpha=.05,nboot=500,SEED=TRUE,MC=FALSE){ -# -# Comparing two robust dependent correlations: Non-overlapping case -# Winsorized correlation is used by default. -# -# Both x and y are assumed to be a matrix with 2 columns -# -# Compare correlation of x[,1] x[,2] to the correlation between -# y[,1] and y[,2] -# -if(nrow(x)!=nrow(y))stop('x and y have different sample sizes; should be equal') -m1=cbind(x,y) -if(ncol(m1)!=4)stop('Both x and y should have two columns') -m1<-elimna(m1) # Eliminate rows with missing values -nval=nrow(m1) -x<-m1[,1:2] -y=m1[,3:4] -r12=corfun(x[,1],x[,2])$cor -r13=corfun(y[,1],y[,2])$cor -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -# If you use corfun=scor, set plotit=F -# -data<-matrix(sample(nrow(y),size=nrow(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(MC){ -library(parallel) -bvec1<-mclapply(data,corbsub,x[,1],x[,2],corfun) -bvec2<-mclapply(data,corbsub,y[,1],y[,2],corfun) -} -if(!MC){ -bvec1<-lapply(data,corbsub,x[,1],x[,2],corfun) -bvec2<-lapply(data,corbsub,y[,1],y[,2],corfun) -} -mat1=matl(bvec1) -mat2=matl(bvec2) -ihi<-floor((1-alpha/2)*nboot+.5) -ilow<-floor((alpha/2)*nboot+.5) -bsort<-sort(mat1-mat2) -ci12<-bsort[ilow] -ci12[2]<-bsort[ihi] -ci12 -pv=mean(bsort<0) -pv=2*min(c(pv,1-pv)) -list(est.rho1=r12,est.rho2=r13,est.dif=r12-r13,ci=ci12,p.value=pv) -} - -comdvar<-function(x,y,alpha=.05){ -# -# Test the hypothesis that two dependent variables have equal variances. -# A heteroscedastic version of the Morgan-Pitman test is used. -# (The HC4 estimator is used to deal with heteroscedasticity) -# -xy=elimna(cbind(x,y)) -est1=var(xy[,1]) -est2=var(xy[,2]) -pv=pcorhc4(xy[,1]-xy[,2],xy[,1]+xy[,2],alpha=alpha) -list(p.value=pv$p.value, est1=est1, est2=est2,test.stat=pv$test.stat) -} -ptests<-function(pv,Fisher=TRUE){ -# -# pv: p-values based on N independent tests -# Test hypothesis that all N null hypotheses are true. -# Fisher=TRUE, use Fisher's method -# Fisher=FALSE, use Chen-Nadarajah method -# -ntests=length(pv) -if(Fisher){ -res=(0-2)*sum(log(pv)) # Fisher test statistic -pvF=1-pchisq(res,2*ntests) #Fisher p-value based on all tests. -} -if(!Fisher){ -res=sum(qnorm(pv/2)^2) # C-N test -pvF=1-pchisq(res,ntests) -} -list(test.stat=res,p.value=pvF) -} -mcpPV<-function(pv,alpha=.05,opt=1){ -# -# pv: A collection of p-values based on independent tests -# -# Perform the step-down multiple comparison method in -# Wilcox, R. R. \& Clark, F. (in press). -# Robust multiple comparisons based on combined -# probabilities from independent tests. Journal of Data Science -# based on K independent p-values -# -# opt=1 Fisher's method -# opt=2 Chen-Nadarajah method -# opt=3 Max method -# -K=length(pv) -pick=NULL -v=order(pv) -ic=0 -for(i in K:1){ -flag=TRUE -if(opt==1){ -i2=i*2 -if(i==K)res=(0-2)*sum(log(pv)) # Fisher test statistic -if(ialpha)flag=TRUE -if(pvF<=alpha/(K+1-i)){ -ic=ic+1 -pick=c(pick,v[ic]) -flag=FALSE -if(pv[v[ic]]>alpha)flag=TRUE -} -if(flag)break -} -Decision=rep('Not Sig',length(pv)) -if(!is.null(pick))Decision[pick]='Reject' -nsig=sum(length(pick)) -list(p.values=pv, -Decisions=as.matrix(Decision),num.sig=nsig) -} -rungenv2<-function(x, y, est = onestep, fr = 1, LP = TRUE, ...){ -# -# Return x and predicted y values not sorted in ascending order, -# rather, keep x as originally entered and return corresponding Yhat values -# -xord=order(x) -res=rungen(x,y,est=est,fr=fr,LP=LP,pyhat=TRUE,plotit=FALSE)$output -res[order(xord)] -} - - -adrun<-function(x,y,est=tmean,iter=10,pyhat=FALSE,plotit=TRUE,fr=1,xlab='X', -ylab='Y',zlab='', -theta=50,phi=25,expand=.5,scale=TRUE,zscale=TRUE,xout=FALSE,eout=xout,outfun=out,ticktype='simple',...){ -# -# additive model based on running interval smoother -# and backfitting algorithm -# -m<-elimna(cbind(x,y)) -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -x=x[flag,] -y=y[flag] -} -x<-as.matrix(x) -p<-ncol(x) -if(p==1)val<-rungen(x[,1],y,est=est,pyhat=TRUE,plotit=plotit,fr=fr, -xlab=xlab,ylab=ylab,...)$output -if(p>1){ -library(MASS) -library(akima) -np<-p+1 -x<-m[,1:p] -y<-m[,np] -fhat<-matrix(NA,ncol=p,nrow=length(y)) -fhat.old<-matrix(NA,ncol=p,nrow=length(y)) -res<-matrix(NA,ncol=np,nrow=length(y)) -dif<-1 -for(i in 1:p) -fhat.old[,i]<-rungenv2(x[,i],y,est=est,pyhat=TRUE,plotit=FALSE,fr=fr,...) -eval<-NA -for(it in 1:iter){ -for(ip in 1:p){ -res[,ip]<-y -for(ip2 in 1:p){ -if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2] -} -fhat[,ip]<-rungenv2(x[,ip],res[,ip],est=est,pyhat=TRUE,plotit=FALSE,fr=fr,...) -} -eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2)))) -if(it > 1){ -itm<-it-1 -dif<-abs(eval[it]-eval[itm]) -} -fhat.old<-fhat -if(dif<.01)break -} -val<-apply(fhat,1,sum) -aval<-est(y-val,...) -val<-val+aval -if(plotit && p==2){ -fitr<-val -iout<-c(1:length(fitr)) -nm1<-length(fitr)-1 -for(i in 1:nm1){ -ip1<-i+1 -for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 -} -fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane -# This is necessary when doing three dimensional plots -# with the R function interp -mkeep<-x[iout>=1,] -fitr<-interp(mkeep[,1],mkeep[,2],fitr) -persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype) -}} -if(!pyhat)val<-'Done' -val -} -ancov2COV<-function(x1,y1,x2,y2,tr=.2,test=yuen,cr=NULL,pr=TRUE,DETAILS=FALSE,cp.value=FALSE, -plotit=FALSE,xlab='X',ylab='Y',zlab=NULL,span=.75,PV=TRUE,FRAC=.5,MC=FALSE,q=.5, -iter=1000,alpha=.05,TPM=FALSE,tau=.05,est=tmean,fr=1,...){ -# -# ANCOVA two covariates, no parametric assumption about the regression surface. -# Use all design points nested deeply within the cloud of data. -# Global test statistic is the average of the p-values based on -# Yuen's test performed at each of the deepest -# points where comparisons can be made. -# -# TPM=TRUE: replace average of the p-values with the test statistic -# studied by -# Zaykin, D. V., Zhivotovsky, L. A., Westfall, P. H., & Weir, B.S. (2002). -# Truncated product method for combining p-values. -# Genetic Epidemiology 22, 170--185. -# -# x1 and x2 assumed to be a matrix or data frame with two columns -# -# if plotit=TRUE then if -# PV=TRUE create a plot of the p.values as a function of the two covariates -# using LOESS. -# if PV=FALSE, plot the difference between the dependent variable as a function of -# the covariates -# -# By default, Yuen's test is used, but other tests can be used via the argument -# test -# -# pr=TRUE: warning messages will be printed -# -# DETAILS=TRUE: all p.values are reported for all covariate points used. -# -# span: the span used by LOESS - -# fr: span used by rung3hat when estimating the difference between -# predicted Y for group 1 minus predicted Y for group 2. -# -# FRAC is the fraction of least deep covariate points that are ignored -# -# MC=TRUE: use a multicore processor to compute a critical value and global p.value -# -# com.p.value=TRUE: compute p.value based on the global hypothesis of no differences. -# -# iter=1000: number of iterations used to compute a critical value or global p.value -# -# Function returns: -# test.stat: the test statistic. there are two allowed choices: yuen or qcomhd -# crit.p.val: the critical value, reject if test.stat<=crit.p.val -# min.p.val.point: the values of the covariate that had the smallest p-value -# min.p.value: the minimum p-value among all p-values that were computed. -# -com.p.value=cp.value -if(pr)print('Reject if test.stat is less than or equal to crit.value') -if(FRAC<=0 || FRAC >=1)stop('FRAC should be a value between 0 and 1.') -if(ncol(x1)!=2)stop('Should have two covariates') -xy1=elimna(cbind(x1,y1)) -x1=xy1[,1:2] -y1=xy1[,3] -xy2=elimna(cbind(x2,y2)) -x2=xy2[,1:2] -y2=xy2[,3] -n=min(c(nrow(x1),nrow(x2))) -if(n<50){ -if(pr)print('Warning: sample size is less than 50; critical value unknown') -} -if(is.null(cr)){ -if(n>=50 & n<=80)cr=as.vector(regYhat(c(50,75),c(.23,.264),xr=n)) -if(n>80)cr=.27 -} -flag0=is.null(cr) -flag1=FRAC!=.5 -flag3=flag0+flag1+com.p.value+TPM -if(flag3>0){ -comp.pv=anc2COV.CV(nrow(x1),nrow(x2),iter=iter,MC=MC,TPM=TPM,tau=tau) -MV=sort(comp.pv$M) -ic=round(alpha*iter) -cr=MV[ic] -} -DONE=FALSE -if(identical(test,qcomhd)){ -val=ancovampG(x1,y1,x2,y2,DH=TRUE,SEED=TRUE,test=qcomhd,q=q,FRAC=FRAC) -DONE=TRUE -} -if(identical(test,qcomhdMC)){ -val=ancovampG(x1,y1,x2,y2,DH=TRUE,SEED=TRUE,test=qcomhdMC,q=q,FRAC=FRAC) -DONE=TRUE -} -if(!DONE){ -if(identical(test,yuen))val=ancovampG(x1,y1,x2,y2,DH=TRUE,SEED=TRUE,test=yuen,tr=tr,FRAC=FRAC) -#if(!identical(test,yuen))val=ancovampG(x1,y1,x2,y2,DH=TRUE,SEED=TRUE,test=test,FRAC=FRAC,...) -} -est.dif=rung3hat(x1,y1,fr=fr,pts=val$points,est=est)$rmd-rung3hat(x2,y2,pts=val$points,fr=fr,est=est)$rmd -pavg=mean(val$results[,3]) -if(TPM){ -vals=val$results[,3] -vals=elimna(vals) -pavg=sum(log(vals[vals<=tau])) -} -mpv=which(val$results[,3]==min(val$results[,3])) -points=val$points -results=val$results -rem.res=results[mpv,3] -rem.points=points[mpv,] -points=cbind(points,est.dif) -dimnames(points)=list(NULL,c('COV 1','COV 2','EST.DIF')) -if(plotit){ -if(is.null(zlab)){ -if(PV)zlab='P-Value' -if(!PV)zlab='Est.Dif' -} -if(PV)lplot(points[,1:2],results[,3],xlab=xlab,ylab=ylab,zlab=zlab,tick='det',span=span) -if(!PV)lplot(points[,1:2],est.dif,xlab=xlab,ylab=ylab,zlab=zlab,tick='det',span=span) -} -nk=nrow(points) -if(!DETAILS){ -points=NULL -results=NULL -} -pval=NULL -if(com.p.value || TPM)pval=1-mean(pavg<=comp.pv$M) -list(num.points.used=nk,test.stat=pavg,crit.value=cr,GLOBAL.p.value=pval,min.p.val.point=rem.points,min.p.value=rem.res,all.points.used=points,all.results=results[,1:3]) -} - -Qanova<-function(x,q=.5,op=3,nboot=2000,MC=FALSE,SEED=TRUE){ -# -# Test global hypothesis that J independent groups -# have equal medians. -# Performs well when there are tied values. -# -# Basically, use pbadepth in conjunction with the Harrell--Davis -# estimator. -# -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -chkcar=NA -for(j in 1:length(x))chkcar[j]=length(unique(x[[j]])) -if(min(chkcar<20)){ -print('Warning: Sample size is less than') -print('20 for one or more groups. Type I error might not be controlled') -} -output=pbadepth(x,est=hd,q=q,allp=TRUE,SEED=SEED,op=op,nboot=nboot,MC=MC,na.rm=TRUE) -output -} - -runYhat<-function(x,y,pts=NULL,est=tmean,fr=1,nmin=1,xout=FALSE,outfun=outpro,XY.used=FALSE,...){ -# -# Fit a running interval smoother using the data in x and y -# Use the fit to estimate the typical value of Y -# corresponding to the covariates values in pts -# -# pts=NULL means all points in x, after missing values are removed, are used. That is, predict y for each x -# -x<-as.matrix(x) -p=ncol(x) -p1=p+1 -xx<-cbind(x,y) -xx<-elimna(xx) -x=xx[,1:p] -y=xx[,p1] -if(is.null(pts))pts=x -x=as.matrix(x) -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(ncol(x)==1){ -vals=runhat(x[,1],y,pts=pts,est=est,fr=fr,nmin=nmin,...) -nvals=1 -for(i in 1:length(pts)){ -nvals[i]<-length(y[near(x[,1],pts[i],fr=fr)]) -} -} -if(ncol(x)>1){ -temp=rung3hat(x,y,pts=pts,est=est,fr=fr,...) -vals=temp$rmd -nvals=temp$nval -} -XY=NULL -if(XY.used)XY=cbind(x,y) -list(Y.hat=vals,nvals=nvals,xy.used=XY,pts.used=pts) -} - -rplot.pred=runYhat - -wmwpb<-function(x,y=NULL,est=median,alpha=.05,nboot=2000,SEED=TRUE,pr=TRUE, -na.rm=TRUE,...){ -# -# Compute a bootstrap confidence interval for a -# measure of location associated with -# the distribution of x-y, -# est indicates which measure of location will be used -# x and y are possibly dependent -# -# loc2dif.ci computes a non-bootstrap confidence interval -# -if(is.null(y[1])){ -if(!is.matrix(x) & !is.data.frame(x))stop('With y missing, x should be a matrix') -y=x[,2] -x=x[,1] -} -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data1<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-NA -for(i in 1:nboot)bvec[i]<-wmwloc(x[data1[i,]],y[data2[i,]],est=est,na.rm=na.rm,...) -bvec<-sort(bvec) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) -sig.level<-2*(min(temp,1-temp)) -estdiff=wmwloc(x,y,est=est,na.rm=na.rm,...) -list(estimate=estdiff,ci=c(bvec[low],bvec[up]),p.value=sig.level) -} -idealfIQR<-function(x){ -# -# Compute the interquartile range using the ideal fourths. -x=elimna(x) -res=idealf(x)$qu-idealf(x)$ql -res -} -conCON<-function(J,conG=1){ -# -# Create contrast coefficients for comparisons to a controll -# -# J = number of groups including the control group. -# conG = the group that is the control. By default, assume group 1 is the control -# -Jm1=J-1 -A=matrix(rep(1,Jm1^2+Jm1),nrow=J) -A[-conG,]=-1*diag(Jm1) -if(conG>1)A=-1*A -list(conCON=A) -} - -btsqrk<-function(alist,alpha=0.05,tr=0.2){ -#computes B2_tk test statistics for k independent samples. -#alist should be a list type object -#s's are computed by trimse which can be found in all Rallfun files written by Wilcox Rand -k<-length(alist) -# Remove any missing values in alist -for (i in 1:k){alist[[i]]<-alist[[i]][!is.na(alist[[i]])]} -zc<-qnorm(alpha/2) -e=trunc(tr*sapply(alist,length)) -f<-(sapply(alist,length))-(2*e) -s=sapply(alist,trimse,tr=tr)^2 -wden=sum(1/s) -w=(1/s)/wden -yplus<-sum(w*(sapply(alist,mean,trim=tr))) -tt<-((sapply(alist,mean,trim=tr))-yplus)/sqrt(s) -v<-(f-1) -z<-((4*v^2)+(5*((2*(zc^2))+3)/24))/((4*v^2)+v+(((4*(zc^2))+9)/12))*sqrt(v)*(sqrt(log(1+(tt^2/v)))) -teststat<-sum(z^2) -crit<-qchisq(1-alpha,k-1) -bt2pvalue<-1-(pchisq(teststat,k-1)) -list(p.value=bt2pvalue,teststat=teststat,crit=crit,e=e,f=f,s=s,w=w,tt=tt) -} - -t1waybtsqrk<-function(x,alpha=.05,nboot=599,tr=0.2,SEED=TRUE){ -# -# One-way ANOVA for trimmed means, independent groups. -# Uses a method studied by Ozdemir et al. -# -if(SEED)set.seed(2) -B=nboot -if(is.matrix(x))x=listm(x) -x=lapply(x,elimna) -T.test<-btsqrk(x,alpha=alpha,tr=tr)$teststat -means<-c() -ylist<-list(0) -TT<-c() -b<-floor((1-alpha)*B) -means<-sapply(x,mean,tr) -k<-length(x) -for (i in 1:B) -{ - for (j in 1:k) - {ylist[[j]]<-(sample(x[[j]],length(x[[j]]),replace=TRUE)-means[j])} - TT<-c(TT,btsqrk(ylist,alpha,tr)$teststat) -} -TT=sort(TT) -pval<-mean(T<=TT,na.rm=TRUE) -list(test.stat=T.test,crit.value=TT[b],p.value=pval) -} -logSMpred<-function(x,y,pts=NULL,fr=2,LP=TRUE,xout=FALSE,outfun=outpro,...){ -# -# A smoother designed specifically for binary outcomes -# LP=TRUE: With two independent variables, smooth the initial smooth using LOESS -# Return predicted probability of 1 for points in -# pts -# based on the data in x and y -# -# -x=as.matrix(x) -p=ncol(x) -p1=p+1 -xx<-elimna(cbind(x,y)) -x<-xx[,1:p] -y<-xx[,p1] - -n=nrow(xx) -yy=rep(1,n) -vals=sort(unique(y)) -if(length(vals)>2)stop('y should be binary') -flag=y==vals[2] -yy[!flag]=0 -y=yy - -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x=as.matrix(x) -if(is.null(pts))pts=x -pts=as.matrix(pts) -library(MASS) -m=covmve(x) -phat<-NA -m1=matrix(NA,nrow=nrow(pts),ncol=nrow(pts)) -yhat=NULL -for(i in 1:nrow(pts)){ -d<-mahalanobis(x,pts[i,],m$cov) -flag=sqrt(d)<=fr -w=flag*exp(-1*d) -yhat[i]=sum(w*y)/sum(w) -} -yhat -} -idrange<-function(x,na.rm=FALSE){ -# -# Compute the interquartile range based on the ideal fourths. -# -temp=idealf(x,na.rm=na.rm) -res=temp$qu-temp$ql -res -} -prodepth<-function(x,pts=x,ndir=1000,SEED=TRUE){ -# -# Determine an approximation of the projection depth of -# pts in -# x -# using the R package library(DepthProc) -# -# ndir indicates how many randomly chosen projections will be used -# -# Advantage over zoudepth, much faster execution time. -# Should be noted, however, that using the function twice on the same -# data generally results in different values for the depths. -# Setting -# SEED=TRUE -# avoids this. -# -# -if(SEED){ -oldSeed <- .Random.seed -set.seed(45) -} -library(DepthProc) -res=as.vector(depthProjection(pts,x,ndir=ndir)) -if(SEED) { - assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) -} -res -} -zoudepth<-function(x,pts=x, zloc = median, zscale = mad, SEED=TRUE){ -# -# Determine projection depth using the R function zdepth -# The Nelder--Mead method for finding the maximum of a function is used -# -# SEED, included for convenience when this function is used with certain classification techniques. -# -res=1/(1+zdepth(x,pts,zloc,zscale)) -res -} -KNN<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,k=3,prob=TRUE,SEED=NULL){ -# -# Do classification using the kNN method -# -# k: number of nearest neighbors -# -# train is the training set -# test is the test data -# g contains labels for the data in the training set, -# If data for the two groups are stored in -# x1 -# and -# x2, -# the function creates labels for you. -# -# This function removes the need to call library class. -# For more information, use the command ?knn -# -# SEED=NULL, used for convenience when called by other functions that expect SEED -# -library(class) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -}} -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -}} -res=knn(train,test,cl=as.factor(g),k=k,prob=prob) -res -} - -KNNv2<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,k=3,prob=TRUE,SEED=NULL){ -# -# Do classification using the kNN method -# -# -# train is the training set -# test is the test data -# g contains labels for the data in the training set, -# -# This function removes the need to call library class. -# For more information, use the command ?knn -# -# SEED=NULL, used for convenience when called by other functions that expect SEED -# -library(class) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -}} -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -}} -res=knn(train,test,cl=as.factor(g),k=k,prob=prob) -res=as.numeric(as.vector(res))+1 -res -} - -estBetaParams <- function(mu, var) { -# -# Estimate parameters of the beta distribution, r and s, given the mean and variance. - alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2 - beta <- alpha * (1 / mu - 1) -list(r=alpha,s=beta) -} -KNNdist<-function(train=NULL,test=NULL,g,k=3,x1=NULL,x2=NULL,prob=FALSE,plotit=FALSE,SEED=NULL, -xlab='Group 1',ylab='Group 2',depthfun=prodepth,...){ -# -# Do classification using depths and the kNN method. -# Points are transformed to their depth in each group and knn is applied -# using the resulting depth values. -# See Li et al., 2012, DD-classifier: nonparametric classification -# procedure based on DD-plot. Journal of the American Statistical Association, -# 107, 737--753 -# -# depthfun indicates how the depth of a point is computed. -# By default, projection distances are used. -# -# train is the training set -# test is the test data -# g: labels for the data in the training set. -# -# depthfun must be a function having the form depthfun(x,pts). -# That is, compute depth for the points in pts relative to points in x. -# -# SEED: not used here, included for convenience when this function is called by other functions -# -library(class) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -}} -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -}} -g=as.numeric(as.vector(g)) -train=elimna(train) -if(is.null(test))stop('Argument test is null, no data') -test=elimna(test) -train=as.matrix(train) -test=as.matrix(test) -if(ncol(train)!=ncol(test))stop('The first two arguments, train and test, should have the same number of coluimns') -P=ncol(train) -P1=P+1 -xall=as.data.frame(matrix(NA,nrow=nrow(train),ncol=P1)) -xall[,1:P]=train -xall[,P1]=g -xall=elimna(xall) -x1=xall[,1:P] -xall=as.matrix(xall) -x1=as.matrix(x1) -g=as.vector(xall[,P1]) -ids=unique(g) -x2=elimna(test) -x1=as.matrix(x1) -x2=as.matrix(x2) -n=nrow(x1) -n2=nrow(x2) -p=length(ids) -d=matrix(NA,nrow=n,ncol=p) -D=matrix(NA,nrow=n2,ncol=p) -for(i in 1:length(ids)){ -flag=g==ids[i] -d[,i]=depthfun(as.matrix(x1[flag,]),pts=x1,...) -D[,i]=depthfun(as.matrix(x1[flag,]),pts=x2,...) -} -res=NULL -res=knn(d,D,cl=as.factor(g),k=k,prob=prob) -if(plotit){ -if(p==2){ -plot(d[,1],d[,2],xlab=xlab,ylab=ylab,type='n') -flag=g==ids[1] -points(d[flag,1],d[flag,2]) -points(d[!flag,1],d[!flag,2],pch='*') -}} -res=as.numeric(res) -res -} -olshomci<-function(x,y,alpha=.05){ -# -# Computes confidence interval for the slope of the -# least squares regression line assuming homoscedasticity and that there is -# only one independent variable. -# -# Not recommended in practice; use a method that allows heteroscedasticity -# -if(length(x)!=length(y))stop('x and y have different lengths') -temp=ols(x,y) -df=temp$n-2 #degrees of freedom -df -temp$summary -ci=temp$summar[2,1]-qt(1-alpha/2,df)*temp$summary[2,2] -ci[2]=temp$summar[2,1]+qt(1-alpha/2,df)*temp$summary[2,2] -list(ci=ci) -} -qcor<-function(x,y,q=.5,qfun=qest,xout=FALSE,outfun=outpro){ -# -# Compute quantile correlation as in Li, Li and Tsai, JASA 2015 -# -if(xout){ -flag<-outfun(x,plotit=plotit)$keep -x<-x[flag] -y<-y[flag] -} -dif=y-qfun(x,q) -flag=dif<0 -psi=q-flag -qcov=mean(psi*(x-mean(x))) -qc=qcov/sqrt((q-q^2)*var(x)) -list(cor=qc,cov=qcov) -} -stepmcp<-function(x,tr=.2,alpha=.05){ -# -# Step-down MCP method based on trimmed means -# -# x is assumed to have list mode, or a matrix or data with J columns -# J=number of groups. -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -if(J<3)stop('For two groups only, use yuen') -if(J>5)stop('Currently limited to at most five groups') -com=modgen(J) -jp1=J+1 -mout=matrix(NA,nrow=length(com),ncol=3, -dimnames=list(NULL,c('Groups','p-value','p.crit'))) -mout[,3]=alpha -jm2=J-2 -com=com[jp1:length(com)] -mout=mout[jp1:nrow(mout),] -for(i in 1:length(com)){ -nmod=length(com[[i]])-1 -temp=c(nmod:0) -mout[i,1]=sum(com[[i]]*10^temp) -temp=t1way(x[com[[i]]],tr=tr)$p.value -pnum=length(com[[i]]) -pe=1-(1-alpha)^(pnum/J) -if(length(com[[i]])<=jm2)mout[i,3]=pe -mout[i,2]=t1way(x[com[[i]]],tr=tr)$p.value -} -mout -} - - -rfit.est<-function(x,y,xout=FALSE,outfun=outpro,...){ -# -# Fit regression line using rank-based method based -# Jaeckel's dispersion function -# via the R package Rfit -# -library(Rfit) -library(quantreg) -if(xout){ -m<-cbind(x,y) -p1=ncol(m) -p=p1-1 -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -fit=rfitv2(y~x) -fit=fit$coef -list(coef=fit) -} - -Rfit.est=rfit.est - -rfitv2 <- function (formula, data, subset, yhat0 = NULL, - scores = Rfit::wscores, symmetric = FALSE, TAU = 'F0', ...) { - -# Below is taken from quantreg (under GPL) # - call<-match.call() - mf<-match.call(expand.dots=FALSE) - m<-match(c('formula','data','subset'),names(mf),0) - mf<-mf[c(1,m)] - mf[[1]]<-as.name("model.frame") - mf<-eval.parent(mf) -# - - x <- model.matrix(attr(mf, "terms"), data = mf) - if( abs(max(x) - min(x)) < .Machine$double.eps ^ 0.5 ) stop("x cannot only contain an intercept") - x1 <- as.matrix(x[,colnames(x)!='(Intercept)']) - x1 <- as.matrix(cbind(rep(1,nrow(x1)),x1)) - - y <- model.response(mf) - - qrx <- qr(x1) - Q<-as.matrix(qr.Q(qrx)) - q1<-Q[,1] - xq<-as.matrix(Q[,2:qrx$rank]) - - if( is.null(yhat0) ) { - fit0<-suppressWarnings(rq(y~xq-1)) - } else { - fit0 <- lsfit(xq, yhat0, intercept = FALSE) - } -# ord<-order(fit0$resid) - -## 20141211: set initial fit to null model if it has lower dispersion - betahat0 <- fit0$coef - if( disp(betahat0, xq, y, scores) > disp(rep(0,length(betahat0)), xq, y, scores) ) { - betahat0 <- rep(0, length(betahat0) ) - } - ord <- order(y - xq%*%betahat0) -## - - fit <- jaeckel(as.matrix(xq[ord,]), y[ord], betahat0, scores=scores, ...) - if( fit$convergence != 0 ) { - fit2 <- jaeckel(as.matrix(xq[ord,]), y[ord], jitter(fit$par), scores=scores, ...) - if( fit$convergence != 0 ) { - warning("rfit: Convergence status not zero in jaeckel") - if( fit2$value < fit$value ) fit <- fit2 - } else { - fit <- fit2 - } - rm(fit2) - } - rm(ord) - betahat <- fit$par - - yhat <- xq %*% betahat - ehat <- y - yhat - alphahat <- ifelse(symmetric, signedrank(ehat), median(ehat)) - ehat <- ehat - alphahat - yhat <- yhat+alphahat - - bhat <- lsfit(x,yhat,intercept=FALSE)$coef - - r.gettau <- switch(TAU, - F0 = gettauF0, - R = gettau, - N = function(...) NA - ) - - tauhat <- r.gettau(ehat, ncol(xq), scores, ...) - if (symmetric) { - taushat <- tauhat - } else { - taushat <- taustar(ehat, qrx$rank) - } - - res <- list( coefficients = bhat, residuals = ehat, fitted.values = yhat, - scores = scores, x = x, y = y, tauhat = tauhat, qrx1=qrx, - taushat = taushat, symmetric = symmetric, betahat = bhat,disp=fit$value) - res$call <- call - class(res) <- list("rfit") - res - -} -loc2plot<-function(x,y,plotfun=akerd,xlab='X',ylab='',...){ -# -# Plot an estimate of the distribution of X-Y -# By default, -# plotfun=akerd, meaning that a kernel adaptive estimator is used. -# Other options are: -# skerd -# kdplot -# rdplot -# -# See Wilcox Introduction to Robust Estimation and Hypothesis Testing -# section 3.2 for details. -# -m=elimna(cbind(x,y)) -x=m[,1] -y=m[,2] -temp=temp=as.vector(outer(x,y,FUN='-')) -plotfun(temp,xlab=xlab,ylab=ylab,...) -} -q2gci<-function(x,y,q=c(.1,.25,.5,.75,.9),nboot=2000,plotit=TRUE,SEED=TRUE,xlab='Group 1',ylab='Est.1-Est.2',alpha=.05){ -# -# Compare quantiles using pb2gen -# via hd estimator. Tied values are allowed. -# When comparing lower or upper quartiles, both power and the probability of Type I error -# compare well to other methods that have been derived. -# q: can be used to specify the quantiles to be compared -# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles -# Function returns p-values and critical p-values based on Hochberg's method. -# -x=elimna(x) -y=elimna(y) -if(sum(duplicated(x)>0))stop('Duplicate values were detected; use qcomhd or medpb2') -if(sum(duplicated(y)>0))stop('Duplicate values were detected; use qcomhd or medpb2') -if(SEED)set.seed(2) -pv=NULL -output=matrix(NA,nrow=length(q),ncol=10) -dimnames(output)<-list(NULL,c('q','n1','n2','est.1','est.2','est.1_minus_est.2','ci.low','ci.up','p_crit','p-value')) -for(i in 1:length(q)){ -output[i,1]=q[i] -output[i,2]=length(elimna(x)) -output[i,3]=length(elimna(y)) -output[i,4]=qest(x,q=q[i]) -output[i,5]=qest(y,q=q[i]) -output[i,6]=output[i,4]-output[i,5] -temp=pb2gen(x,y,nboot=nboot,est=qest,q=q[i],SEED=FALSE,alpha=alpha,pr=FALSE) -output[i,7]=temp$ci[1] -output[i,8]=temp$ci[2] -output[i,10]=temp$p.value -} -temp=order(output[,10],decreasing=TRUE) -zvec=alpha/c(1:length(q)) -output[temp,9]=zvec -#print(output) -output <- data.frame(output) -output$signif=rep('YES',nrow(output)) -for(i in 1:nrow(output)){ -if(output[temp[i],10]>output[temp[i],9])output$signif[temp[i]]='NO' -if(output[temp[i],10]<=output[temp[i],9])break -} -if(plotit){ -xax=rep(output[,4],3) -yax=c(output[,6],output[,7],output[,8]) -plot(xax,yax,xlab=xlab,ylab=ylab,type='n') -points(output[,4],output[,6],pch='*') -lines(output[,4],output[,6]) -points(output[,4],output[,7],pch='+') -points(output[,4],output[,8],pch='+') -} -output -} -cov2cor<-function(x){ -# -# Convert a covariance matrix to a correlation matrix -# -p=ncol(x) -m=x -for(i in 1:p){ -for(j in 1:p){ -m[i,j]=m[i,j]/sqrt(x[i,i]*x[j,j]) -}} -m -} - - - - -linpairpb<-function(x,tr=.2,alpha=.05,nboot=NA,est=tmean,method='hoch',bhop=FALSE,SEED=TRUE,...){ -# -# Report results for all pairwise comparsisons -# in a format convenient when using other functions that use this function -# -if(is.matrix(x))x=listm(x) -J=length(x) -con=con.all.pairs(J) -if(bhop)method='BH' -a=linconpb(x,tr=tr,alpha=alpha,nboot=nboot,est=est,SEED=SEED,method=method,...) -r=nrow(a$output) -cc=ncol(a$output) -cp1=cc+3 -mat=matrix(NA,nrow=r,ncol=cp1) -for(i in 1:r){ -g=which(a$con[,i]!=0) -z=c(g,a$output[i,2:cc],est(x[[g[1]]],...),est(x[[g[2]]],...)) -e1=est(x[[g[1]]],...) -e2=est(x[[g[2]]],...) -mat[i,]=c(g,a$output[i,2:cc],e1,e2) -} -num.sig<-sum(mat[,4]<=mat[,5]) -dimnames(mat)=list(NULL,c('Group','Group','psihat','p.value','p.crit','ci.lower','ci.upper','Est .1','Est. 2','p.adjusted')) -list(output=mat,num.sig=num.sig) -} - -func.plot<-function(fit, x = NULL, method ='MBD', depth = NULL, plotit = TRUE, - prob = 0.5, color = 6, outliercol = 2, barcol = 4, fullout = FALSE, - factor = 1.5, xlim = c(1, nrow(fit)), ylim = c(min(fit) - - 0.5 * diff(range(fit)), max(fit) + 0.5 * diff(range(fit))),xlab='Time',ylab='Y', - ...){ -# -# functional boxplot for functional data using -# method in Sun and Genton. -# -# -# fit is assumed to be an n-by-p matrix -# n= number of subjects -# p= number points where the function has been evaluated. -# -# rows with missing values are automatically removed. -# -library(fda) -elimna(fit) -fit=t(fit) -res=fbplot(fit, x = NULL, method = method, depth = depth, plot = plotit, - prob =prob, color =color, outliercol =outliercol, barcol = barcol, -fullout = fullout, factor = factor, xlim =xlim, ylim = ylim, xlab=xlab,ylab=ylab,...) -res -} - -func.out<-function(x,xlab='Time',ylab=' '){ -# -# A spaghetti plot for functional data that indicates outliers with a dashed line -# x is a matrix with n rows and p columns -# -# It is assumed that the function is measured at times 1, 2, ..., p -# -x=elimna(t(x)) # colums with missing data are automatically removed -x=t(x) -p=ncol(x) -n=nrow(x) -plot(c(1:p),seq(min(x),max(x),length.out=p),type='n',xlab=xlab,ylab=ylab) -flag=func.plot(x,plotit=FALSE)$outpoint -chk=c(1:n) -flag2=chk -nsub=length(flag) -if(nsub>0)flag2=chk[-flag] -for(j in 1:length(flag2))lines(c(1:p),x[flag2[j],]) -if(nsub>0)for(j in 1:nsub)lines(c(1:p),x[flag[j],],lty=2) -} - -spag.plot<-function(x, regfun=tsreg,type = c('l', - 'p', 'b', 'o', 'c'), legend = FALSE, trace.label = deparse(substitute(trace.factor)), - fixed = FALSE, xlab = 'Time', ylab ='', - xtick = FALSE, xaxt = par('xaxt'), axes = TRUE, fit.lin=FALSE,...){ -# -# Create a spaghetti plot for data stored in a matrix with -# n rows and p columns. The p columns -# contain measures taken at p times for each subject. -# This function converts x into a form that can be used by interaction.plot -# -# fit.line=TRUE means that a linear fit is plotted. -# -# regfun: The linear fit is based on the regression estimator indicated by -# regfun. The default is Theil--Sen estimator -# -# -# type: the type of plot (see plot.default): lines or points or both. -# -x=as.matrix(x) -n=nrow(x) -p=ncol(x) -np=n*p -m=matrix(NA,nrow=np,3) -pvec=c(1:p) -ic=1-p -iu=0 -for(i in 1:n){ -ic=ic+p -iu=iu+p -m[ic:iu,1]=i # create Subject id. -m[ic:iu,2]=pvec -m[ic:iu,3]=x[i,] -} -if(!fit.lin)interaction.plot(m[,2],m[,1],m[,3],xlab=xlab,ylab=ylab,legend=legend, -xtick=xtick,xaxt=xaxt,axes=axes) -if(fit.lin){ -fit=by(m[,2:3],m[,1],regYval,regfun=regfun) -fit1 <- unlist(fit) -names(fit1) <- NULL -#plotting the linear fit by id -interaction.plot(m[,2],m[,1], fit1, - xlab=xlab, ylab=ylab, legend=legend) -} -} -regYval<-function(m,regfun=tsreg){ -val=as.vector(regYhat(m[,1],m[,2],regfun=regfun)) -val -} -FBplot=func.plot - -mcpKadjp <- function (p, k=1, proc = c('Holm'), rawp=p) { -# -# MCP method based on results in -# -# Keselman, H. J., Miller, C. E., & Holland, B. (2011). -# Many tests of significance: New methods for controlling Type I errors. -# Psychological Methods, 16, 420-431. -# -# Also see -# Keselman, H. J., & Miller, C. E. (2012). -# Correction to many tests of significance: -# New methods for controlling Type I errors. Psychological Methods, 17(4), 679. -# -# p: The p-values to be adjusted. -# k: The value for k-FWER -# proc: indicates the method to be used. Choices are: -#' Holm' -# 'Hochberg', -#' 'RS', Romano-Shaikh procedure - # 'Sarkar', - # 'BH' , Benjamini--Hochberg -# -## Generalized Hochberg is valid under MTP2 condition of the joint null -## distribution of the p-values -## Sarkar procedure is only valid for independent test statistics -# -D1 <- function(k=1, s=1000) { -#To calculate D1 values for Romano-Shaikh procedure - alpha <- NULL - for (i in 1:s) { - if (i <= k) alpha[i]=k/s - else alpha[i]=k/(s+k-i) - } - S <- NULL - S[1:k] <- 0 - for (I in (k+1):s) { - tmp <- NULL - tmp[1:(k-1)] <- 0 - tmp[k]=I*alpha[s-I+k]/k - for (j in (k+1):I) tmp[j]=I*(alpha[s-I+j]-alpha[s-I+j-1])/j - S[I] <- sum(tmp) - if (S[I] < S[I-1]) break - - maxI <- I - maxS <- round(S[I],4) - } - return(list(S=S, maxI=maxI, maxS=maxS)) -} -#modified the function mt.rawp2adjp from MTP package to k-FWER procedures - m <- length(rawp) - n <- length(proc) - index <- order(rawp) - spval <- rawp[index] - adjp <- matrix(0, m, n + 1) - dimnames(adjp) <- list(NULL, c('rawp', proc)) - adjp[, 1] <- spval - -#################### Calculate adjusted p-values ###################### - -#generalized Holm procedure based on Lehmann and Romano (2005) - if (is.element('Holm', proc)) { - crit <- sapply(c(rep(k,k-1),k:m), function(i) - k/(m-i+k)) - tmp <- 1/crit*spval - tmp[tmp>1] <- 1 - for (i in 2:m) tmp[i] <- max(tmp[i-1], tmp[i]) - adjp[, 'Holm'] <- tmp - } -#generalized Hochberg procedure (Step-up version of Lehmann and Romano) - if (is.element('Hochberg', proc)) { - crit <- sapply(c(rep(k,k-1),k:m), function(i) - k/(m-i+k)) - tmp <- 1/crit*spval - tmp[tmp>1] <- 1 - for (i in (m-1):1) tmp[i] <- min(tmp[i+1], tmp[i]) - adjp[, 'Hochberg'] <- tmp - } -#generalized Hochberg procedure based on Romano and Shaikh(2006) - if (is.element('RS', proc)) { - d <- D1(k,m)$maxS - crit <- sapply(c(rep(k,k-1),k:m), function(i) - k/(m-i+k)/d) - tmp <- 1/crit*spval - tmp[tmp>1] <- 1 - for (i in (m-1):1) tmp[i] <- min(tmp[i+1], tmp[i]) - adjp[, 'RS'] <- tmp - } -#generalized Hochberg procedure based on Sarkar(2008) - ### Only for independent case ### - if (is.element('Sarkar', proc)) { - crit <- sapply(c(rep(k,k-1),k:m), function(i) - (prod((1:k)/(m-i+(1:k))))) - tmp <- 1/crit*(spval^k) - tmp[tmp>1] <- 1 - for (i in (m-1):1) tmp[i] <- min(tmp[i+1], tmp[i]) - # Next line used to protect against possibility of adjp1] <- 1 - for (i in (m-1):1) tmp[i] <- min(tmp[i+1], tmp[i]) - adjp[, 'BH'] <- tmp - } -### The following line returns original order of p-values - adjp <- adjp[order(index),] - return(adjp) -} - -discANOVA<-function(x,nboot=500,SEED=TRUE){ -# -# Test the global hypothesis that for two or more independent groups, -# the corresponding discrete distributions are identical. -# That is, test the hypothesis that independent groups have identical -# multinomial distributions. A generalization of the Storer--Kim method is used. -# -# Could also use a chi-squared test via the function: disc2.chi.sq -# -# The method is designed for situations where the -# sample size is relatively small. The method can be sensitive to -# differences that are missed using a measure of location. -# -# Control over the Type I error probability is excellent, even when n=10 -# -# x is a matrix with n rows and J columns -# or it can have list mode. -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -library(mc2d) -if(SEED)set.seed(2) -vals=lapply(x,unique) -vals=sort(elimna(list2vec(vals))) -K=length(unique(vals)) -n=lapply(x,length) -n=list2vec(n) -J=length(x) -step1=discANOVA.sub(x) -test=step1$test -C1=step1$C1 -HT=NULL -for(i in 1:K)HT[i]=mean(C1[i,]) -tv=NULL -TB=NA -VP=NA -B1hat=NA -xx=list() -for(ib in 1:nboot){ -xx=list() -for(j in 1:J){ -temp=rmultinomial(n[j],1,HT) -xx[[j]]=which(temp[1,]==1) -for(i in 2:n[j])xx[[j]][i]=which(temp[i,]==1) -} -TB[ib]=discANOVA.sub(xx)$test -} -pv=1-mean(test>TB)-.5*mean(test==TB) -list(test=test,p.value=pv) -} -discANOVA.sub<-function(x){ -# -# -x=lapply(x,elimna) -vals=lapply(x,unique) -vals=sort(elimna(unique(list2vec(vals)))) -n=lapply(x,length) -n=list2vec(n) -K=length(vals) -J=length(x) -C1=matrix(0,nrow=K,ncol=J) -for(j in 1:J){ -for(i in 1:K){ -C1[i,j]=C1[i,j]+sum(x[[j]]==vals[i]) -} -C1[,j]=C1[,j]/n[j] -} -test=0 -for(i in 1:K)test=test+var(C1[i,]) -list(test=test,C1=C1) -} - - -discmcp<-function(x,alpha=.05,nboot=500,SEED=TRUE,...){ -# -# Multiple comparisons for J independent groups -# having discrete distributions. -# The method is based on a chi-squared test for each pair of groups to be compared -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# Missing values are allowed. -# -# Probability of one or more Type I errors controlled using Hochberg's method. -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') -J<-length(x) -ncon=(J^2-J)/2 -Jm<-J-1 -# -# Determine critical values -dvec=alpha/c(1:ncon) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -output<-matrix(NA,nrow=ncon,ncol=4) -dimnames(output)<-list(NULL,c('Group','Group','p.value','p.crit')) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j=zvec) -output[temp2,4]<-zvec -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,num.sig=num.sig) -} -discstep<-function(x,nboot=500,alpha=.05,SEED=TRUE){ -# -# Step-down multiple comparison procedure for comparing -# J independent discrete random variables. -# The method is based on a generalization of the Storer--Kim method -# comparing independent binomials; it can be sensitive to differences -# not detected by measures of location. -# -# x is a matrix with n rows and J columns -# or it can have list mode -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -library(mc2d) -if(SEED)set.seed(2) -vals=lapply(x,unique) -vals=sort(elimna(list2vec(vals))) -K=length(unique(vals)) -n=lapply(x,length) -n=list2vec(n) -J=length(x) -if(J==2)stop('For 2 groups use disc2com') -if(J>5)stop('Designed for 5 groups or less') -com=modgen(J) -ntest=length(com) -jp1=J+1 -com=com[jp1:length(com)] -ntest=length(com) -mout=matrix(NA,nrow=ntest,ncol=3) -dimnames(mout)=list(NULL,c('Groups','p-value','p.crit')) -test=NULL -for(i in 1:ntest){ -test[i]=discANOVA.sub(x[com[[i]]])$test #$ -nmod=length(com[[i]])-1 -temp=c(nmod:0) -mout[i,1]=sum(com[[i]]*10^temp) -} -mout[,3]=alpha -xx=list() -pv=NA -jm2=J-2 -mout[,3]=alpha -TB=matrix(NA,nrow=nboot,ncol=ntest) -step1=discANOVA.sub(x) -C1=step1$C1 -HT=NULL -for(i in 1:K)HT[i]=mean(C1[i,]) -for(ib in 1:nboot){ -xx=list() -for(j in 1:J){ -temp=rmultinomial(n[j],1,HT) -xx[[j]]=which(temp[1,]==1) -for(i in 2:n[j])xx[[j]][i]=which(temp[i,]==1) -} -for(k in 1:ntest)TB[ib,k]=discANOVA.sub(xx[com[[k]]])$test #$ -} -for(k in 1:ntest){ -mout[k,2]=1-mean(test[k]>TB[,k])-.5*mean(test[k]==TB[,k]) -pnum=length(com[[k]]) -pe=1-(1-alpha)^(pnum/J) -if(length(com[[k]])<=jm2)mout[k,3]=pe -} -list(results=mout[nrow(mout):1,]) -} - -medcurve<-function(x){ -# -# returns the median curve for functional data -# -chk=FBplot(x,plot=FALSE)$depth -id=which(chk==max(chk)) -if(length(id)==1)est=x[id,] -if(length(id)>1)est=apply(x[id,],2,mean) -est -} -cumrelf<-function(x,y=NA,xlab='X',ylab='CUM REL FREQ',pr.freq=FALSE){ -# -# plot the cumulative relative frequencies for 1 or more groups -# -# x can be a matrix, columns corresponding to groups, or x -# x can have list mode. -# y=NA, if data are stored in y, it is assumes there two groups -# with data for the second group stored in y -# -xu=NULL -cf=NULL -if(!is.na(y[1])){ -xx=list() -xx[[1]]=x -xx[[2]]=y -x=xx -} -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(length(x)==1)stop('This function is designed for two or more groups') -x=elimna(x) -for(j in 1:length(x)){ -z=splot(x[[j]],plotit=FALSE) -xu=c(xu,sort(unique(x[[j]]))) -cf=c(cf,cumsum(z$frequencies)/length(x[[j]])) -} -plot(xu,cf,,type='n',xlab=xlab,ylab=ylab) -for(j in 1:length(x)){ -z=splot(x[[j]],plotit=FALSE) -if(pr.freq)print(z) -lines(sort(unique(x[[j]])),cumsum(z$frequencies)/length(x[[j]]),lty=j) -} -} -regGmcp<-function(x,y,regfun=tsreg,SEED=TRUE,nboot=100,xout=FALSE,AD=FALSE, - outfun=outpro,STAND=TRUE,alpha=0.05,pr=TRUE,MC=FALSE,ISO=TRUE,...) -{ -# -# If ISO = FALSE: -# All pairwise comparisons of regression parameters are performed among J independent groups -# That is, for groups j and k, all j1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') -xy=elimna(cbind(x1,y1,x2,y2)) -} -if(is.null(pts)){ -for(i in 1:length(qvals))pts=c(pts,qest(xy[,1],qvals[i])) -} -if(SEED)set.seed(2) -n=nrow(xy) -est1=NA -est2=NA -J=length(pts) -est1=matrix(NA,nrow=nboot,ncol=J) -est2=matrix(NA,nrow=nboot,ncol=J) - -data=matrix(sample(n,size=n*nboot,replace=TRUE),ncol=nboot,nrow=n) -if(!MC){ -est1=apply(data,2,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) -est2=apply(data,2,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...) -est1=t(as.matrix(est1)) -est2=t(as.matrix(est2)) -} - -if(MC){ -library(parallel) -data=listm(data) -est1=mclapply(data,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) -est2=mclapply(data,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...) -est1=t(matl(est1)) -est2=t(matl(est2)) -} - -e1=runhat(xy[,1],xy[,2],pts=pts,est=est,fr=fr1,...) -e2=runhat(xy[,3],xy[,4],pts=pts,est=est,fr=fr2,...) -dif=e1-e2 - -pv=NA -for(j in 1:J){ -pv[j]=mean(est1[,j]=zvec) -output[temp2,7]<-zvec -output[,7]<-output[,7] -INT=bw.es.I(J,K,x,CI=CI,tr=tr,alpha=alpha,REL.MAG=REL.M)$Interaction.ES -list(output=output,Effect.Sizes=INT) -} - - -bwiJ2plot<-function(J,K,x,fr=.8,aval=.5,xlab = 'X', ylab = '', -color = rep('black', 5),BOX=FALSE){ -# -# This function is for a J by 2 between by within design -# -# Plot distribution of the difference scores for -# each of the J independpent groups -# -# x: can be a matrix, organized as expected by bwimcp -# or it can have list mode. -# -if(K!=2)stop('Should have only two dependent variables') -if(J>5)stop('Can only have five levels for the independent factor') - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} -dif=list() -for(j in 1:5)dif[[j]]=NULL -JK=J*K -m<-matrix(c(1:JK),J,K,byrow=TRUE) -ic<-c(-1,0) -for(j in 1:J){ -ic<-ic+2 -dif[[j]]=x[[ic[1]]]-x[[ic[2]]] -} -if(!BOX)g5plot(dif[[1]],dif[[2]],dif[[3]],dif[[4]],dif[[5]],fr = fr, - aval = aval, xlab = xlab, ylab = ylab, color = color) -if(BOX)boxplot(dif) -} - -Dancova<-function(x1,y1,x2=x1,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=NA, -sm=FALSE,xout=FALSE,outfun=out,DIF=FALSE,LP=FALSE,xlab='X',ylab='Y',...){ -# -# Compare two dependent groups using a method similar to the one used by the R function ancova -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# -# sm=T will create smooths using bootstrap bagging. -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -# DIF=FALSE: marginal trimmed means are compared -# DIF=TRUE: Trimmed means of difference scores are used. -# -if(!is.null(x2)){ -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') - -if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') -xy=elimna(cbind(x1,y1,x2,y2)) -x1=xy[,1] -y1=xy[,2] -x2=xy[,3] -y2=xy[,4] -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -if(!is.na(pts[1]))mat=Dancovapts(x1,y1,x2,y2,fr1=fr1,fr2=fr2,tr=tr,alpha=alpha, -plotit=FALSE,pts=pts,sm=sm,xout=xout,outfun=outfun,DIF=DIF,...) -if(is.na(pts[1])){ -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -n=length(y1) -ivals=c(1:n) -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,9) -dimnames(mat)<-list(NULL,c('X','n','DIF','TEST','se','ci.low','ci.hi','p.value','p.adjust')) -for (i in 1:5){ -t1=near(x1,x1[isub[i]],fr1) -t2=near(x2,x1[isub[i]],fr2) -iv1=ivals[t1] -iv2=ivals[t2] -pick=unique(c(iv1,iv2)) -mat[i,2]<-length(y1[pick]) -if(!DIF)test<-yuend(y1[pick],y2[pick],tr=tr) -if(DIF)test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE) -mat[i,1]<-x1[isub[i]] -if(!DIF){ -mat[i,4]<-test$teststat -mat[i,3]<-test$dif -} -if(DIF){ -mat[i,4]<-test$test.stat -mat[i,3]<-test$estimate -} -mat[i,5]<-test$se -mat[i,6]<-test$ci[1] -mat[i,7]<-test$ci[2] -mat[i,8]<-test$p.value -} -temp2<-order(0-mat[,8]) -bot=c(1:nrow(mat)) -dvec=sort(alpha/bot,decreasing=TRUE) -#mat[temp2,9]=dvec -mat[,9]=p.adjust(mat[,8],method='hoch') -} -if(plotit){ -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -runmean2g(x1,y1,x2,y2,fr=fr1,est=tmean,sm=sm,xout=FALSE,LP=LP,xlab=xlab,ylab=ylab,...) -}} -list(output=mat) -} - - - -Dancovapts<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE,xout=FALSE,outfun=out,DIF=FALSE,LP=TRUE,...){ -# -# Compare two dependent groups using a method similar to the one used by the R function ancova -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# -# sm=T will create smooths using bootstrap bagging. -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -# DIF=FALSE: marginal trimmed means are compared -# DIF=TRUE: Trimmed means of difference scores are used. -# -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') -xy=elimna(cbind(x1,y1,x2,y2)) -x1=xy[,1] -y1=xy[,2] -x2=xy[,3] -y2=xy[,4] -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n=length(y1) -npts=length(pts) -mat<-matrix(NA,nrow=npts,nco=9) -dimnames(mat)<-list(NULL,c('X','n','DIF','TEST','se','ci.low','ci.hi', -'p.value','p.crit')) -for (i in 1:npts){ -t1=near(x1,pts[i],fr1) -t2=near(x2,pts[i],fr2) -ivals=c(1:n) -iv1=ivals[t1] -iv2=ivals[t2] -pick=unique(c(iv1,iv2)) -mat[i,2]<-length(y1[pick]) -if(!DIF)test<-yuend(y1[pick],y2[pick],tr=tr,alpha=alpha) -if(DIF)test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE,alpha=alpha) -mat[i,1]<-pts[i] -if(!DIF){ -mat[i,4]<-test$teststat -mat[i,3]<-test$dif -} -if(DIF){ -mat[i,4]<-test$test.stat -mat[i,3]<-test$estimate -} -mat[i,5]<-test$se -mat[i,6]<-test$ci[1] -mat[i,7]<-test$ci[2] -mat[i,8]<-test$p.value -} -temp2<-order(0-mat[,8]) -bot=c(1:nrow(mat)) -dvec=sort(alpha/bot,decreasing=TRUE) -mat[temp2,9]=dvec -mat -} -qrchk<-function(x,y,qval=.5,q=NULL,nboot=1000,com.pval=FALSE,SEED=TRUE,alpha=.05,pr=TRUE, -xout=FALSE,outfun=out,chk.table=FALSE,MC=FALSE,...){ -# -# Test of a linear fit based on quantile regression -# The method stems from He and Zhu 2003, JASA, 98, 1013-1022. -# Here, resampling is avoided using approximate critical values if -# com.pval=F -# -# To get a p-value, via simulations, set com.pval=T -# nboot is number of simulations used to determine p-value. -# Execution time can be quite high -# -# This function quickly determines .1, .05, .025 and .01 -# critical values for n<=400 and p<=6 (p= number of predictors) -# and when dealing with the .5 quantile. -# Otherwise, critical values are determined via simulations, which -# can have high execution time. -# -if(!is.null(q))qval=q -if(pr){ -if(!com.pval)print('To get a p-value, set com.pval=T and use MC=T if a multicore processor is available') -print('Reject if test statistic is >= critical value') -} -x<-as.matrix(x) -p<-ncol(x) -pp1<-p+1 -yx<-elimna(cbind(y,x)) #Eliminate missing values. -y<-yx[,1] -x<-yx[,2:pp1] -store.it=F -x<-as.matrix(x) -p.val<-NULL -crit.val<-NULL -x<-as.matrix(x) -if(xout){ -flag<-outfun(x,...)$keep -x<-x[flag,] -y<-y[flag] -} -# shift the marginal x values so that the test statistic is -# invariant under changes in location -n<-length(y) -x=standm(x) -if(p<=6){ -if(qval==.5){ -aval<-c(.1,.05,.025,.01) -aokay<-duplicated(c(alpha,aval)) -aokay<-sum(aokay) -if(aokay>0){ -crit10<-matrix(c(.0254773,.008372,.00463254,.0023586,.000959315,.00042248, -.00020069, -.039728,.012163,.0069332,.0036521,.001571,.0006882, .0003621, -.055215,.0173357,.009427,.004581,.0021378,.00093787,.00045287, -.075832,.0228556,.0118571,.005924,.00252957,.0011593,.00056706, -.103135,.0298896,.0151193,.0073057,.00305456,.0014430,.000690435, -.12977,.03891,.018989,.009053,.0036326,.001617,.000781457),ncol=6,nrow=7) -crit05<-matrix(c(.031494,.010257,.00626,.00303523,.0012993,.000562247, -.00025972, -.046296,.015066,.00885556,.0045485,.0110904,.00086946,.000452978, -.063368,.0207096546,.010699,.005341,.0025426,.0011305,.000539873, -.085461,.027256,.014067,.0071169,.002954,.0013671,.000660338, -.11055,.03523,.017511,.0084263,.0036533,.0016338,.00081289, -.13692,.043843,.0222425,.0102265,.004283,.0019,.000907241),ncol=6,nrow=7) -crit025<-matrix(c(.0361936,.012518,.007296,.0036084,.00172436,.000725365, -.000327776, -.05315,.017593,.0102389,.0055043,.00227459,.0010062,.000523526, -.07214,.023944,.013689,.0060686,.0028378,.00136379,.000635645, -.093578,.0293223,.0156754,.0086059,.0035195,.001694,.00074467, -.118414,.03885,.0201468,.0094298,.0040263,.00182437,.000916557, -.14271,.047745,.0253974,.011385,.004725,.00207588,.0010191),ncol=6,nrow=7) -crit01<-matrix(c(.0414762,.0146553,.0098428,.0045274,.00219345,.00096244, -.000443827, -.058666,.020007,.01129658,.0063092,.002796,.0011364,.000628054, -.079446,.0267958,.015428,.0071267,.0034163,.0015876,.000734865, -.102736,.0357572,.017786,.0093682,.0042367,.0019717,.000868506, -.125356,.041411,.0234916,.0106895,.0047028,.0020759,.00101052, -.14837,.053246,.027759,.012723,.00528,.002437,.00116065),ncol=6,nrow=7) -if(alpha==.1)critit<-crit10 -if(alpha==.05)critit<-crit05 -if(alpha==.025)critit<-crit025 -if(alpha==.01)critit<-crit01 -nvec<-c(10,20,30,50,100,200,400) -nval<-duplicated(c(n,nvec)) -nval<-nval[2:7] -if(sum(nval)>0)crit.val<-critit[nval,p] -if(is.null(crit.val)){ -if(n<=400){ -loc<-rank(c(n,nvec)) -xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5) -yy<-c(critit[loc[1]-1,p],critit[loc[1],p]) -} -icoef<-lsfit(xx,yy)$coef -crit.val<-icoef[1]+icoef[2]/n^1.5 -}}}} -if(is.null(crit.val)){ -# no critical value found -if(!com.pval){ -print('Critical values not available, will set com.pval=T') -print('and compute a p-value') -com.pval<-T -}} -gdot<-cbind(rep(1,n),x) -gdot<-ortho(gdot) -x<-gdot[,2:pp1] -x<-as.matrix(x) -temp<-rqfit(x,y,qval=qval,res=TRUE) -coef<-temp$coef -psi<-NA -psi<-ifelse(temp$residuals>0,qval,qval-1) -rnmat<-matrix(0,nrow=n,ncol=pp1) -ran.mat<-apply(x,2,rank) -flagvec<-apply(ran.mat,1,max) -for(j in 1:n){ -flag<-ifelse(flagvec<=flagvec[j],TRUE,FALSE) -flag<-as.numeric(flag) -rnmat[j,]<-apply(flag*psi*gdot,2,sum) -} -rnmat<-rnmat/sqrt(n) -temp<-matrix(0,pp1,pp1) -for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,]) -temp<-temp/n -test<-max(eigen(temp)$values) -if(com.pval){ -if(SEED)set.seed(2) -if(MC)library(parallel) -xy=list() -p1=p+1 -for(i in 1:nboot)xy[[i]]=rmul(n,p=p1) -if(MC)temp3=mclapply(xy,qrchkv2.sub2,qval=qval,mc.preschedule=TRUE) -if(!MC)temp3=lapply(xy,qrchkv2.sub2,qval=qval) -rem=matl(temp3) -p.val=sum(test>=rem) -rem<-sort(rem) -p.val<-1-p.val/nboot -ic<-round((1-alpha)*nboot) -crit.val<-rem[ic] -} -de='Fail to reject' -if( test>=crit.val)de='Reject' -list(test.stat=test,crit.value=crit.val,p.value=p.val,Decision=de) -} -qrchkv2.sub2<-function(xy,qval){ -p1=ncol(xy) -p=p1-1 -val=qrchkv2(xy[,1:p],xy[,p1],qval=qval) -val -} - -regYciCV<-function(n,alpha=.05,nboot=1000,regfun=tsreg,SEED=TRUE,MC=FALSE,null.value=0,xout=FALSE,...){ -# -# Determine a critical value for regYci -# -if(SEED)set.seed(2) -mv=NA -chk=0 -if(MC)library(parallel) -xy=list() -for (i in 1:nboot)xy[[i]]=rmul(n) -if(!MC)est=lapply(xy,regciCV.sub,regfun=regfun,null.value=null.value,...) -if(MC)est=mclapply(xy,regciCV.sub,regfun=regfun,null.value=null.value,...) -est=as.vector(matl(est)) -est=sort(est) -ic=round(alpha*nboot) -crit=est[ic] -crit -} -regciCV.sub<-function(xy,regfun,null.value=0,xout=FALSE,...){ - pv=regYci(xy[,1],xy[,2],SEED=FALSE,regfun=regfun,null.value=null.value,xout=xout,...)[,5] - min(pv) -} - -regYci.sum<-function(x,y,regfun=tsreg,pts=x,nboot=100,xout=FALSE,outfun=out,SEED=TRUE,alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,ADJ=FALSE,MC=FALSE, -scale=FALSE,span=.75,xlab='X',xlab1='X1',xlab2='X2',ylab='p-values',theta=50,phi=25,pch='*',...){ -# -# Summarize results from regYci so that results are easier to read. -# single independent variable is assumed. -# -xy=elimna(cbind(x,y)) -x<-as.matrix(x) -p=ncol(x) -if(p>1)stop('This function is designed for one independent variable only') -p1=p+1 -x<-xy[,1:p] -y<-xy[,p1] -x<-as.matrix(x) -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -x=as.matrix(x) -} -res=regYci(x,y,regfun=regfun,nboot=nboot,null.value=null.value,alpha=alpha,crit=crit, -ADJ=ADJ,MC=MC,plotPV=plotPV,...) -xord=order(pts) -outp=cbind(pts[xord],res[xord,]) -dimnames(outp)=list(NULL,c('X','Pred. Y','Lower.ci','Upper.ci','p.value')) -outp -} - -trq.fit<- -function(x, y, a1 = 0.1, a2, z, int = TRUE, method = "primal", tol = 0.0001) -{ -#compute trimmed regression quantiles -#z is the rq strcture - if(missing(a2)) a2 <- a1 - if(a1 < 0 | a2 < 0) - stop("trimming proportion negative.") - if(a1 + a2 - 1 > tol) - stop("trimming proportion greater than 1.") - if(method!="primal" & method!="dual") - stop("invalid method: should be 'primal' or 'dual'.") - x <- as.matrix(x) - if(missing(z)){ -# z <- rq.fit.br(x, y, tau = -1) #This function is not working properly. -ny=length(y) -xx=cbind(rep(1,ny),x) - z <- rq.fit.br(xx, y, tau = -1) -print(z) -} - p <- z$sol[1, ] - q <- matrix(z$sol[ - c(1:3), ], nrow(z$sol) - 3, ncol(z$sol)) - n <- nrow(z$dsol) - s <- NULL - if(length(dimnames(x)[[2]]) == 0) - dimnames(x) <- list(NULL, paste("X", 1:(nrow(q) - 1), sep = "") - ) - if(int) { - x <- cbind(1, x) - dimnames(x)[[2]][1] <- 'Intercept' - } - xbar <- apply(x, 2, "mean") - xxinv <- solve(t(x) %*% x) - if(abs(a1 + a2 - 1) <= tol) { - -#single quantile case - i <- sum(p < a1) - s$coef <- q[, i] - names(s$coef) <- dimnames(x)[[2]] - s$resid <- y - x %*% s$coef - PI <- 3.14159 - x0 <- qnorm(a1) - d0 <- (1/sqrt(2 * PI)) * exp( - (x0^2/2)) - d0 <- ((4.5 * d0^4)/(2 * x0^2 + 1)^2)^0.2 - d <- d0 * (length(s$resid) - length(s$coef))^(-0.2) - if(d > min(a1, 1 - a1)) - d <- min(a1, 1 - a1) - s$d <- d - i <- sum(p < a1 + d) - j <- sum(p < a1 - d) - shat <- as.numeric(xbar %*% t(q[, i] - q[, j]))/(2 * d) - s$int <- int - s$v <- a1 * (1 - a1) * shat^2 - s$cov <- s$v * xxinv - } - else { -#real trimming - p1 <- p[-1] - f <- 1/(1 - a1 - a2) - d <- pmax((pmin(p1, 1 - a2) - c(a1, pmax(p1[ - length(p1)], - a1))), 0) - if(method == "primal") { - s$coef <- q[, 1:length(p1)] %*% d * f - s$resid <- y - x %*% s$coef - s$int <- int - } - else { -#Jureckova-Gutenbrunner trimmed least squares - i <- max(1, sum(p < a1)) - g <- (z$dsol[, i + 1] - z$dsol[, i])/(p[i + 1] - p[ - i]) - wa <- z$dsol[, i] + (a1 - p[i]) * g - j <- sum(p < 1 - a2) - g <- (z$dsol[, j + 1] - z$dsol[, j])/(p[j + 1] - p[ - j]) - wb <- z$dsol[, j] + (1 - a2 - p[j]) * g - wt <- wa - wb - if(min(wt) < 0) - warning("some weights negative!") - s <- lsfit(x, y, abs(wt), int = F) - } -#now compute covariance matrix estimate - mu <- xbar %*% s$coef - v <- d %*% (z$sol[2, 1:length(d)] - mu)^2 - k <- qrq(z, c(a1, a2)) - mu - v <- v + a1 * k[1]^2 + a2 * k[2]^2 + (a1 * k[1] + a2 * k[2])^2 - names(s$coef) <- dimnames(x)[[2]] - s$v <- as.vector(f^2 * v) - s$cov <- s$v * xxinv - } - class(s) <- "trq" - s -} -"print.trq"<- -function(object, digits = 4) -{ - n <- length(object$resid) - p <- length(object$coef) - options(warn = -1) - if(object$int) { - df.num <- p - 1 - fstat <- c(t(object$coef[-1]) %*% solve(object$cov[-1, -1]) %*% - (object$coef[-1]))/df.num - } - else { - df.num <- p - fstat <- t(object$coef) %*% solve(object$cov) %*% (object$coef - )/df.num - } - pvalue <- 1 - pf(fstat, df.num, (n - p)) - regstat <- c(sqrt(object$v), n, fstat, df.num, (n - p), pvalue) - names(regstat) <- c("rse", "n", "F.stat", "df.num", "df.den", "p.value" - ) - err <- sqrt(diag(object$cov)) - tstat <- c(object$coef/err) - tabcoef <- cbind(object$coef, err, tstat, 2 * (1 - pt(abs(tstat), - n - p))) - dimnames(tabcoef) <- list(names(object$coef), c("coef", "std.err", - "t.stat", "p.value")) - options(warn = 0) - print(round(tabcoef, digits)) - cat(paste("Winsorized Standard Error of Regression= ", format(round( - sqrt(object$v), digits)), "\n", "N = ", format(n), - ", F-statistic = ", format(round(fstat, digits)), " on ", - format(df.num), " and ", format((n - p)), " df, ", "p-value = ", - format(round(pvalue, digits)), "\n\n", sep = "")) - invisible(list(summary = regstat, coef.table = tabcoef)) -} -"qrq"<- -function(s, a) -{ -#computes linearized quantiles from rq data structure -#v is the rq structure e.g. rq(x,y) -#a is a vector of quantiles required - if(min(a) < 0 | max(a) > 1) stop("alphas out of range [0,1]") - r <- s$sol[1, ] - q <- s$sol[2, ] - q <- c(q[1], q) - J <- length(r) - r <- c(0, (r[1:J - 1] + r[2:J])/2, 1) - u <- rep(0, length(a)) - for(k in 1:length(a)) { - i <- sum(r < a[k]) - w <- (a[k] - r[i])/(r[i + 1] - r[i]) - u[k] <- w * q[i + 1] + (1 - w) * q[i] - } - u -} -trqreg<-function(x,y,a1=.1,a2,xout=FALSE,outfun=outpro){ -library(quantreg) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -n=length(y) -xx=cbind(rep(1,n),x) -z <- rq.fit.br(xx, y, tau = -1) -res=trq.fit(x,y,z=z,a1=a1,a2) -coef=res$coef[1:p1] -list(coef=coef) -} -cor2xy<-function(x,y,corfun=spear,...){ - est1=corfun(x[,1],y,...)$cor - est2=corfun(x[,2],y,...)$cor - list(cor=c(est1,est2)) - } -TWOpovPV<-function(x,y,alpha=.05,CN=FALSE){ -# -# Comparing two dependent correlations: Overlapping case -# -# x is assumed to be a matrix with 2 columns -# -# Compare correlation of x[,1] with y to x[,2] with y -# returns a confidence interval stored in -# ci -# -# This function is exactly like TWOpov, only it returns a p-value as well. -# -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-TWOpov(x,y,alpha=alph[i],CN=CN,ZCI=TRUE)$ci -if(sign(chkit[1]*chkit[2])==1)break -} -p.value<-irem/100 -if(p.value<=.1){ -iup<-(irem+1)/100 -alph<-seq(.001,iup,.001) -for(i in 1:length(alph)){ -irem=i -p.value<-alph[i] -chkit<-TWOpov(x,y,alpha=alph[i],CN=CN,ZCI=TRUE)$ci -if(sign(chkit[1]*chkit[2])==1)break -}} -if(p.value<=.1){ -iup<-(irem+1)/100 -alph<-seq(.001,iup,.001) -for(i in 1:length(alph)){ -p.value<-alph[i] -chkit<-TWOpov(x,y,alpha=alph[i],CN=CN,ZCI=TRUE)$ci -if(sign(chkit[1]*chkit[2])==1)break -}} -res=TWOpov(x,y,alpha=alpha,CN=CN) -list(p.value=p.value,est.rho1=res$est.rho1,est.rho2=res$est.rho2,ci=res$ci) -} - -TWOpNOVPV<-function(x,y,HC4=TRUE,alpha=.05){ -# -# Comparing two dependent correlations: Non-overlapping case -# -# Compute a .95 confidence interval -# for the difference between two dependent Pearson correlations, -# non-overlapping case. -# -# Both x and y are assumed to be matrices with two columns. -# The function compares the correlation between x[,1] and x[,2] -# to the correlation between y[,1] and y[,2]. -# -# For simulation results, see Wilcox (2009). -# COMPARING PEARSON CORRELATIONS: DEALING WITH -# HETEROSCEDASTICITY AND NON-NORMALITY, Communications in Statistics--Simulations -# and Computations, 38, 2220-2234. -# -# This function is exactly like TWOpNOV, only it returns a p-value as well. -# -# Note: To get a p-value, HC4=TRUE must be used. -# -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) -chkit=c(chkit$ci.lower,chkit$ci.upper) -if(sign(chkit[1]*chkit[2])==1)break -} -p.value<-irem/100 -if(p.value<=.1){ -iup<-(irem+1)/100 -alph<-seq(.001,iup,.001) -for(i in 1:length(alph)){ -p.value<-alph[i] -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) -chkit=c(chkit$ci.lower,chkit$ci.upper) -if(sign(chkit[1]*chkit[2])==1)break -}}} -p.value<-irem/100 -if(p.value<=.1){ -iup<-(irem+1)/100 -alph<-seq(.001,iup,.001) -for(i in 1:length(alph)){ -p.value<-alph[i] -chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) -chkit=c(chkit$ci.lower,chkit$ci.upper) -if(sign(chkit[1]*chkit[2])==1)break -}} -if(p.value<=.001){ -alph<-seq(.0001,.001,.0001) -for(i in 1:length(alph)){ -p.value<-alph[i] -chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) -chkit=c(chkit$ci.lower,chkit$ci.upper) -if(sign(chkit[1]*chkit[2])==1)break -}} -if(p.value<=.001){ -alph<-seq(.0001,.001,.0001) -for(i in 1:length(alph)){ -p.value<-alph[i] -chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) -chkit=c(chkit$ci.lower,chkit$ci.upper) -if(sign(chkit[1]*chkit[2])==1)break -}} -res=TWOpNOV(x,y,alpha=alpha,HC4=TRUE) -ci=c(res$ci.lower,res$ci.upper) -list(p.value=p.value,est.1=res$est.1,est.2=res$est.2,ci=ci) #ci.lower=res$ci.lower,ci.upper=res$ci.upper) -} - -regYci<-function(x,y,regfun=tsreg,pts=unique(x),nboot=100,ADJ=FALSE,xout=FALSE,outfun=out,SEED=TRUE,alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,scale=TRUE,span=.75, -xlab='X',xlab1='X1',xlab2='X2',ylab='p-values',zlab='p-values', -theta=50,phi=25,MC=FALSE,nreps=1000,SM=FALSE,pch='*',...){ -# -# Compute confidence interval for the typical value of Y, given X, based on some regression estimator -# By default, -# regfun=tsreg meaning that the Theil--Sen estimator is used. -# -# ADJ=TRUE, the critical value is adjusted so that the simultaneous probability coverage is 1-alpha. -# The adjustment has been studied with one independent variable. It is unknown how well it works with -# more than one independent variable. -# -# If there is a single independent variable, -# regfun=tsreg, ols or qreg, and alpha=.05, an adjustment can be made quickly. Otherwise an -# adjustment must be computed, which can require relatively high execution time. -# To reduce execution time, set -# MC=TRUE, assuming a multi-core processor is available. -# -# nreps: Number of replications used to compute a critical value. Execution time can be high -# MC=TRUE can reduce execution time considerably if a multi-core processor is available. -# - -xy=elimna(cbind(x,y)) -x<-as.matrix(x) -p=ncol(x) -p1=p+1 -vals=NA -x<-xy[,1:p] -y<-xy[,p1] -x<-as.matrix(x) -n=nrow(x) -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -n=nrow(m) -x<-m[,1:p] -y<-m[,p1] -x=as.matrix(x) -} -if(ADJ){ -if(n<10)stop('Should have a sample size of at least 10') -if(alpha==.05){ -alpha=.01 # assuming tsreg,tsreg_C, tshdreg or qreg are being used. -if(identical(regfun,ols)){ -nv=c(10,20,50,100,400) -pval=c(.001,.004, .008, .008, .01) -ipos=sum(nv<=n) -alpha=pval[ipos] -} -if(identical(regfun,tshdreg))alpha=.009 -if(identical(regfun,qreg))alpha=.009 -crit=qnorm(1-alpha/2) -} -} -if(SEED)set.seed(2) -if(is.null(crit)){ -if(!ADJ)crit=qnorm(1-alpha/2) -if(ADJ){ -padj=regYciCV(n,nboot=nreps,regfun=regfun,MC=MC,SEED=FALSE, -null.value=0,...) -crit=qnorm(1-padj/2) -}} -sqsd=regYvar(x,y,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED,...) -sd=sqrt(sqsd) -est=regYhat(x,y,regfun=regfun,xr=pts,...) -pv=2*(1-pnorm(abs(est-null.value)/sd)) -if(length(pts)==1)est=matrix(c(est,est-crit*sd,est+crit*sd,pv),nrow=1) -if(length(pts)>1)est=cbind(est,est-crit*sd,est+crit*sd,pv) -dimnames(est)=list(NULL,c('Pred. Y','Lower.ci','Upper.ci','p.value')) -if(plotPV){ -if(ncol(x)>2)stop('Can plot only with one or two independent variables') -if(ncol(x)==1)plot(pts,pv,xlab=xlab,ylab=ylab,pch=pch) -if(ncol(x)==2){ -if(SM)lplot(pts,pv,xlab=xlab1,ylab=xlab2,zlab=zlab,span=span,ticktype='detail',scale=scale,theta=theta,phi=phi) -if(!SM){ -library(scatterplot3d) -scatterplot3d(pts[,1],pts[,2],pv,xlab=xlab1,ylab=xlab2,zlab=zlab) -} -}} -if(p==1){ -xord=order(pts) -if(length(pts)==1)outp=matrix(c(pts[xord],est[xord,]),nrow=1) -if(length(pts)>1)outp=cbind(pts[xord],est[xord,]) -dimnames(outp)=list(NULL,c('X','Pred. Y','Lower.ci','Upper.ci','p.value')) -est=outp -} -est -} - - - -anclin<-function(x1,y1,x2,y2,regfun=tsreg,pts=NULL,ALL=FALSE,npts=25,plotit=TRUE,SCAT=TRUE, -pch1='*',pch2='+', -nboot=100,ADJ=TRUE,xout=FALSE,outfun=outpro,SEED=TRUE,p.crit=.015, -alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,scale=TRUE,span=.75,xlab='X',ylab='p-values',ylab2='Y',MC=FALSE,nreps=1000,pch='*',...){ -# -# ANCOVA: -# For two independent groups, compute confidence intervals for difference between -# the typical value of Y, given X, -# based on some regression estimator -# By default, -# regfun=tsreg meaning that the Theil--Sen estimator is used. -# -# The functions anclin and regYci2g are identical. -# -# In contrast to the function ancJN, this function can deal with a larger number of -# covariate values and it controls the probability of one or more Type I errors using -# a method that is better, in terms of power, than using Hochberg or Hommel. -# -# ADJ=TRUE, the critical value is adjusted so that the simultaneous -# probability coverage is 1-alpha. -# A single covariate is assumed. -# If alpha=.05, an adjustment can be made quickly. Otherwise an -# adjustment must be computed, which can require relatively high execution time. -# To reduce execution time, set -# MC=TRUE, assuming a multi-core processor is available. -# If n1<20 and n2<100, assuming that n11)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x1<-xy[,1:p] -y1<-xy[,p1] -x1<-as.matrix(x1) -xy=elimna(cbind(x2,y2)) -x2<-as.matrix(x2) -p=ncol(x2) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x2<-xy[,1:p] -y2<-xy[,p1] -x2<-as.matrix(x2) -n1=length(y1) -n2=length(y2) -n=min(c(n1,n2)) -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -n1=nrow(m) -x1<-m[,1:p] -y1<-m[,p1] -x1=as.matrix(x1) -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -n2=nrow(m) -n=min(c(n1,n2)) -x2<-m[,1:p] -y2<-m[,p1] -x2=as.matrix(x2) -} -if(is.null(pts)){ -xall=unique(c(x1,x2)) -if(ALL)pts=xall -if(!ALL)pts=seq(min(xall),max(xall),length.out=npts) -} -if(ADJ){ -if(n<10)stop('Should have a sample size of at least 10') -if(alpha==.05){ -alpha=p.crit -crit=qnorm(1-alpha/2) -} -if(!ADJ)p.crit=alpha -if(n<20 & max(c(n1,n2))<100) crit=NULL -if(p>1)crit=NULL -} -if(is.null(crit) & !ADJ)crit=qnorm(1-alpha/2) -if(is.null(crit) & ADJ){ -if(SEED)set.seed(2) -padj=regYciCV2G(n1,n2,nboot=nreps,regfun=regfun,MC=MC,SEED=FALSE,ALL=ALL, -null.value=null.value,pts=pts,alpha=alpha,...)$crit.est -crit=qnorm(1-padj/2) -p.crit=padj -} -sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) -sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) -sd=sqrt(sqsd1+sqsd2) -est1=regYhat(x1,y1,regfun=regfun,xr=pts,...) -est2=regYhat(x2,y2,regfun=regfun,xr=pts,...) -chk.test=abs(est1-est2-null.value)/sd -pv=2*(1-pnorm(abs(est1-est2-null.value)/sd)) -est=cbind(pts,est1-est2,est1-est2-crit*sd,est1-est2+crit*sd,pv) -dimnames(est)=list(NULL,c('X','Est.Dif','Lower.ci','Upper.ci','p.value')) -if(plotit){ -plotPV=FALSE -plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab2) -reg1=regfun(x1,y1,...)$coef -reg2=regfun(x2,y2,...)$coef -if(SCAT){ -points(x1,y1,pch=pch1) -points(x2,y2,pch=pch2) -} -abline(reg1) -abline(reg2,lty=2) -} -if(plotPV){ -plot(pts,pv,xlab=xlab,ylab=ylab,pch=pch) -} -list(output=est,p.crit=p.crit,crit.value=crit,num.sig=sum(est[,5]<=p.crit)) -} - -regYciCV2G<-function(n1,n2,crit=NULL,g=0,h=0,nboot=1000,regfun=tsreg,ALL=TRUE, -alpha=.05,SEED=TRUE,MC=FALSE,null.value=0,pts=NULL,npts=100,nmiss=0,...){ -n=max(n1,n2) -if(nmiss>n)stop('Number of missing values is greater than max(n1,n2)') -if(SEED)set.seed(2) -mv=NA -chk=0 -if(n1!=n2)nmiss=max(c(n1,n2))-min(c(n1,n2)) -if(MC)library(parallel) -xy=list() -for (i in 1:nboot){ -x1=ghdist(n,g=g,h=h) -x2=ghdist(n,g=g,h=h) -if(nmiss>0)x2[1:nmiss]=NA -xx=c(x1,x2) -xx=elimna(xx) -if(is.null(pts)){ -if(!ALL)pts=seq(min(xx),max(xx),length.out = npts) -if(ALL)pts=unique(xx) -} -y1=ghdist(n,g=g,h=h) -y2=ghdist(n,g=g,h=h) -xy[[i]]=cbind(x1,y1,x2,y2) -} -if(!MC)est=lapply(xy,regciCV2G.sub,regfun=regfun,null.value=null.value,npts=npts,...) -if(MC)est=mclapply(xy,regciCV2G.sub,regfun=regfun,null.value=null.value,pts=pts,npts=npts,...) -est=as.vector(matl(est)) -type1=NULL -if(!is.null(crit))type1=mean(est<=crit) -list(global.p.value=type1,crit.est=hd(est,alpha)) -} - -regYci2G=anclin - -regY2G.sub<-function(xy,regfun,null.value=0,...){ - pv=regYci2Gv2(xy[,1],xy[,2],xy[,3],xy[,4],SEED=FALSE,regfun=regfun,null.value=null.value,...)[,4] - min(pv) -} - - -regciCV2G.sub<-function(xy,regfun,null.value=0,pts=NULL,npts=100,...){ - pv=regYci2Gv2(xy[,1],xy[,2],xy[,3],xy[,4],SEED=FALSE,regfun=regfun,null.value=null.value,plotit=FALSE, -npts=npts,pts=pts,...)$output[,5] - min(pv) -} - -# To avoid nested calls, need: - -regYci2Gv2<-function(x1,y1,x2,y2,regfun=tsreg,pts=NULL,ALL=FALSE,npts=25,plotit=TRUE,SCAT=TRUE, -pch1='*',pch2='+', -nboot=100,ADJ=FALSE,xout=FALSE,outfun=outpro,SEED=TRUE,p.crit=.015, -alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,scale=TRUE,span=.75,xlab='X',xlab1='X1',xlab2='X2',ylab='p-values',ylab2='Y',theta=50,phi=25,MC=FALSE,nreps=1000,pch='*',...){ -# -# ANCOVA: -# For two independent groups, compute confidence intervals for difference between -# the typical value of Y, given X, -# based on some regression estimator -# By default, -# regfun=tsreg meaning that the Theil--Sen estimator is used. -# -# The functions anclin and regYci2g are identical. -# -# In contrast to the function ancJN, this function can deal with a larger number of -# covariate values and it controls the probability of one or more Type I errors using -# a method that is better, in terms of power, than using Hochberg or Hommel. -# -# ADJ=TRUE, the critical value is adjusted so that the simultaneous -# probability coverage is 1-alpha. -# If there is a single covariate, -# regfun=tsreg or tshdreg, and alpha=.05, an adjustment can be made quickly. Otherwise an -# adjustment must be computed, which can require relatively high execution time. -# To reduce execution time, set -# MC=TRUE, assuming a multi-core processor is available. -# If n1<20 and n2<100, assuming that n11)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x1<-xy[,1:p] -y1<-xy[,p1] -x1<-as.matrix(x1) -xy=elimna(cbind(x2,y2)) -x2<-as.matrix(x2) -p=ncol(x2) -p1=p+1 -vals=NA -x2<-xy[,1:p] -y2<-xy[,p1] -x2<-as.matrix(x2) -n1=length(y1) -n2=length(y2) -n=min(c(n1,n2)) -#print(c(n1,n2,n)) -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -n1=nrow(m) -x1<-m[,1:p] -y1<-m[,p1] -x1=as.matrix(x1) -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -n2=nrow(m) -n=min(c(n1,n2)) -x2<-m[,1:p] -y2<-m[,p1] -x2=as.matrix(x2) -} -if(is.null(pts)){ -xall=unique(c(x1,x2)) -if(ALL)pts=xall -if(!ALL)pts=seq(min(xall),max(xall),length.out=npts) -} -if(ADJ){ -if(n<10)stop('Should have a sample size of at least 10') -if(alpha==.05){ -#if(identical(regfun,tsreg) || identical(regfun,tsreg_C))alpha=p.crit causes an error if WRScpp not installed -#if(identical(regfun,tsreg))alpha=p.crit -alpha=p.crit -crit=qnorm(1-alpha/2) -} -if(!ADJ)p.crit=alpha -if(n<20 & max(c(n1,n2))<100) crit=NULL -if(p>1)crit=NULL -} -if(is.null(crit) & !ADJ)crit=qnorm(1-alpha/2) -if(is.null(crit) & ADJ){ -if(SEED)set.seed(2) -print(c(n1,n2)) -padj=regYciCV2G(n1,n2,nboot=nreps,regfun=regfun,MC=MC,SEED=FALSE,ALL=ALL, -null.value=null.value,pts=pts,alpha=alpha,...)$crit.est -crit=qnorm(1-padj/2) -p.crit=padj -} -sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) -sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) -sd=sqrt(sqsd1+sqsd2) -est1=regYhat(x1,y1,regfun=regfun,xr=pts,...) -est2=regYhat(x2,y2,regfun=regfun,xr=pts,...) -pv=2*(1-pnorm(abs(est1-est2-null.value)/sd)) -est=cbind(pts,est1-est2,est1-est2-crit*sd,est1-est2+crit*sd,pv) -dimnames(est)=list(NULL,c('X','Est.Dif','Lower.ci','Upper.ci','p.value')) -if(plotit){ -plotPV=FALSE -plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab2) -reg1=regfun(x1,y1,...)$coef -reg2=regfun(x2,y2,...)$coef -if(SCAT){ -points(x1,y1,pch=pch1) -points(x2,y2,pch=pch2) -} -abline(reg1) -abline(reg2,lty=2) -} -if(plotPV){ -if(ncol(x1)>2)stop('Can plot only with one or two independent variables') -if(ncol(x1)==1)plot(pts,pv,xlab=xlab,ylab=ylab,pch=pch) -if(ncol(x2)==2)lplot(pts,pv,xlab=xlab1,ylab=xlab2,zlab=ylab,span=span,ticktype='detail',scale=scale,theta=theta,phi=phi) -} -list(output=est,p.crit=p.crit,crit.value=crit,num.sig=sum(est[,5]<=p.crit)) -} - - - - -ancdet<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2, -alpha=.05,method="EP",plotit=TRUE,plot.dif=FALSE,pts=NA,sm=FALSE, -pr=TRUE,xout=FALSE,outfun=out,MC=FALSE, -npts=25,p.crit=NULL,nreps=5000,SEED=TRUE,EST=FALSE, -SCAT=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+',...){ -# -# Like the function ancova, but a more detailed analysis -# plot.dif=TRUE: plot difference in the estimates plus a -# confidence band having simultaneous probability coverate 1-alpha -# -# npts = number of covariate values to be used -# -# Argument method indicates which measure of effect size will be used -# EP: explanatory measure of effect size -# QS: quantile shift measure of effect size -# AKP: trimmed mean Winsorized variance analog of Cohen's d -# WMW: P(X1)stop('One covariate only is allowed with this function') -if(is.null(p.crit))set.seed(2) -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(xout){ -flag<-outfun(x1)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2)$keep -x2<-x2[flag] -y2<-y2[flag] -} -res1=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE,method=method)$output -pts=seq(res1[1,1],res1[5,1],length.out=npts) - -if(alpha!=.05)EST=TRUE -if(is.null(p.crit)){ -if(!EST){ -nv=c(30, 50, 60, 70, 80, 100, -150, 200, 300, 400, 500, 600, 800) -pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, -0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, -.003372966, 0.003350022) -n1= length(y1) - n2=length(y2) -p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 -} -if(EST)p.crit=ancdet.pv(length(y1),length(y2),nreps=nreps,tr=tr,npts=npts,MC=MC) -} - -if(plot.dif)plotit=FALSE -critv=qnorm(1-p.crit/2) -res=ancova(x1,y1,x2,y2,fr1=fr1,fr2=fr2,tr=tr,alpha=alpha,pr=FALSE,plotit=plotit,pts=pts,SCAT=SCAT)$output -res[,7]=res[,4]-critv*res[,6] # adjust confidence interval based on adjusted p-value -res[,8]=res[,4]+critv*res[,6] # adjust confidence interval based on adjusted p-value -if(plot.dif){ -yhat=plot(c(res[,1],res[,1],res[,1]),c(res[,4],res[,7],res[,8]),type='n',xlab=xlab,ylab=ylab) -z1=lplot(res[,1],res[,4],plotit=FALSE,pyhat=TRUE)$yhat -z2=lplot(res[,1],res[,7],plotit=FALSE,pyhat=TRUE)$yhat -z3=lplot(res[,1],res[,8],plotit=FALSE,pyhat=TRUE)$yhat -lines(res[,1],z1) -lines(res[,1],z2,lty=2) -lines(res[,1],z3,lty=2) -} -sig=rep(0,nrow(res)) -sig[res[,9]<=p.crit]=1 -sig=as.matrix(sig,ncol=1) -dimnames(sig)=list(NULL,'Sig.Dif') -res=cbind(res,sig) -list(p.crit=p.crit,output=res[,-10],num.sig=sum(sig),p.crit=p.crit) -} - - -ancdet.sub<-function(xy,tr=.2, -alpha=.05,plotit=FALSE,plot.dif=FALSE,pts=NA,sm=FALSE, -pr=TRUE,xout=FALSE,outfun=out,LP=TRUE, -npts=25,p.crit=NULL,nreps=2000, -SCAT=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+',...){ -# -# Like ancova, but a more detailed analysis based on using -# npts covariate values -# -xy1=elimna(xy[,1:2]) -xy2=elimna(xy[,3:4]) -x1=xy1[,1] -y1=xy1[,2] -x2=xy2[,1] -y2=xy2[,2] -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -res1=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE)$output -pts=seq(res1[1,1],res1[5,1],length.out=npts) -res=ancova(x1,y1,x2,y2,tr=tr,alpha=alpha,plotit=FALSE, -pr=FALSE,pts=pts,skip.crit=TRUE)$output -res.out=min(res[,9]) -res.out -} - -ancdet.pv<-function(n1,n2,nreps=2000,alpha=.05,npts=25,tr=.2,MC=FALSE,SEED=TRUE){ -if(SEED)set.seed(2) -pvals=NA -xy=list() -n=max(c(n1,n2)) -nmiss=n-min(c(n1,n2)) -for (i in 1:nreps){ -xy[[i]]=rmul(n,p=4) -xy[[i]][1:nmiss,1:2]=NA -} -if(!MC)pvals=lapply(xy,ancdet.sub,npts=npts,tr=tr) -if(MC){ -library(parallel) -pvals=mclapply(xy,ancdet.sub,npts=npts,tr=tr) -} -pvals=matl(pvals) -pv=hd(pvals,alpha) -pv -} - -ancdet2C<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,test=yuen,q=.5, -alpha=.05,plotit=TRUE,op=FALSE,pts=NA,sm=FALSE,FRAC=.5, -pr=TRUE,xout=FALSE,outfun=outpro,MC=FALSE, -p.crit=NULL,nreps=2000,SEED=TRUE,FAST=TRUE,ticktype='detail', -xlab='X1',ylab='X2',zlab='Y',pch1='*',pch2='+',...){ -# -# method MC3 in Wilcox (2017, Intro to Robust Estimation and Hypothesis Testing, 4th ed.) -# Multiple comparisons using an improvement on Hochberg to control FWE -# -# Like ancdet, only two covariate values can be used. -# Like method MC2, -# use the deepest half of the covariate values. -# -# politit=TRUE. Plot covariate points. Significant points are indicated by -# pch='+' -# -# test can have one of three values: yuen (default), qcomhd or qcomhdMC -# -if(ncol(as.matrix(x1))!=2)stop('Two covariates only can be used') -if(is.null(p.crit))set.seed(2) -xy=elimna(cbind(x1,y1)) -x1=xy[,1:2] -y1=xy[,3] -xy=elimna(cbind(x2,y2)) -x2=xy[,1:2] -y2=xy[,3] -if(min(length(y1),length(y2))<50)stop('The minimum sample size must be greater than or equal to 50') -if(xout){ -flag<-outfun(x1,plotit=FALSE)$keep -x1<-x1[flag,] -y1<-y1[flag] -flag<-outfun(x2,plotit=FALSE)$keep -x2<-x2[flag,] -y2<-y2[flag] -} -if(FAST){ -if(FRAC==.5){ -if(is.null(p.crit)){ -if(alpha==.05){ -nv=c(50, 55, 60, 70, 80, 100, 200, 300, 400, 500, 600,800) -pv=c(0.004585405, 0.003199894, 0.002820089, 0.002594342, 0.002481210, 0.001861313, - 0.001419821, 0.001423000, 0.001313700, 0.001351900, 0.001075, 0.00095859) - n1= length(y1) - n2=length(y2) -p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 - # Using K=n1 points, i.e., K=n1 tests are performed -if(max(n1,n2)>max(nv)){ -p.crit=min(pv) -print('Warning: p.crit has not been computed exactly for sample sizes greater than 800') -if(n1>800)p.crit1=regYhat(1/pv[8:12,1],pv[8:12,2],1/n1) -if(n1<=800)p.crit1=lplot.pred(1/nv,pv,1/n1)$yhat -if(n2>800)p.crit1=regYhat(1/pv[8:12,1],pv[8:12,2],1/n2) -if(n2<=800)p.crit1=lplot.pred(1/nv,pv,1/n2)$yhat -p.crit=(p.crit1+p.crit2)/2 -} -}}}} -res1=ancov2COV(x1,y1,x2,y2,DETAILS=TRUE,pr=FALSE,FRAC=FRAC,tr=tr,test=test,q=q,MC=MC) -if(is.null(p.crit))p.crit=ancdet2C.pv(length(y1),length(y2),MC=MC,nreps=nreps, -SEED=SEED) -LL=length(ncol(res1$all.results)) -if(LL==1)num.sig=sum(res1$all.results[,3]<=p.crit) -if(LL==0)num.sig=NA -sig.points=NA -if(LL==1){ -flag=res1$all.results[,3]<=p.crit -sig.points=res1$all.points.used[flag,1:2] -} -if(plotit){ -if(!op){ -if(pr)print('To plot the estimated differences for the covariate points used, set op=TRUE') -if(LL==0)plot(res1$all.points.used[,1],res1$all.points.used[,2],xlab=xlab,ylab=ylab) -if(LL==1){ -plot(res1$all.points.used[,1],res1$all.points.used[,2],type='n',xlab=xlab,ylab=ylab) -points(res1$all.points.used[!flag,1],res1$all.points.used[!flag,2],pch=pch1) -points(res1$all.points.used[flag,1],res1$all.points.used[flag,2],pch=pch2) -}} -if(op) -lplot(res1$all.points.used[,1:2],res1$all.points.used[,3],xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -} -list(num.sig=num.sig,p.crit=p.crit,points.used=cbind(res1$all.points.used[,1:3],res1$all.results),sig.points=sig.points) -} - - -ancdet2C.pv<-function(n1,n2,nreps=2000,alpha=.05,FRAC=.5,tr=.2,MC=FALSE,SEED=TRUE){ -pvals=NA -xy=list() -n=max(c(n1,n2)) -nmiss=n-min(c(n1,n2)) -for (i in 1:nreps){ -xy[[i]]=rmul(n,p=6) -xy[[i]][1:nmiss,1:3]=NA -} -if(!MC)pvals=lapply(xy,ancdet2C.sub,tr=tr,FRAC=FRAC) -if(MC){ -library(parallel) -pvals=mclapply(xy,ancdet2C.sub,tr=tr,FRAC=FRAC) -} -pvals=matl(pvals) -pv=hd(pvals,alpha) -pv -} - -ancdet2C.sub<-function(xy,tr=.2,FRAC=.5){ -# -xy1=elimna(xy[,1:3]) -xy2=elimna(xy[,4:6]) -x1=xy1[,1:2] -y1=xy1[,3] -x2=xy2[,1:2] -y2=xy2[,3] -res1=ancov2COV(x1,y1,x2,y2,pr=FALSE,FRAC=FRAC)$min.p.value -res1 -} - -Dancdet<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,DIF=TRUE, -alpha=.05,plotit=TRUE,plot.dif=FALSE,pts=NA,sm=FALSE, -pr=TRUE,xout=FALSE,outfun=out,MC=FALSE, -npts=25,p.crit=NULL,nreps=2000,SEED=TRUE, -SCAT=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+',...){ -# -# ANCOVA for dependent groups. -# -# Like Dancova, but a more detailed analysis -# plot.dif=TRUE: plot difference in the estimates plus a -# confidence band having simultaneous probability coverate 1-alpha -# -# npts = number of covariate values to be used -# -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') -xy=elimna(cbind(x1,y1,x2,y2)) -x1=xy[,1] -y1=xy[,2] -x2=xy[,3] -y2=xy[,4] -#if(is.null(p.crit))set.seed(2) -if(alpha!=.05)p.crit=ancdet.pv(length(y1),length(y2),nreps=nreps,tr=tr,npts=npts,MC=MC) -else{ -if(n>800)p.crit=0.00335002 -if(n <= 800){ -nv=c(30, 50, 60, 70, 80, 100, -150, 200, 300, 400, 500, 600, 800) -pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, -0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, -.003372966, 0.003350022) -n1= length(y1) - n2=length(y2) -p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 -}} - -res1=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE)$output # Get lowest and covariate -# values where comparisons can be made. -pts=seq(res1[1,1],res1[5,1],length.out=npts) -#if(is.null(p.crit))p.crit=ancdet.pv(length(y1),length(y2),nreps=nreps,tr=tr,npts=npts,MC=MC) -if(plot.dif)plotit=FALSE -res=Dancova(x1,y1,x2,y2,fr1=fr1,fr2=fr2,tr=tr,alpha=p.crit, -DIF=DIF,plotit=plotit,pts=pts,SCAT=SCAT)$output -if(plot.dif){ -yhat=plot(c(res[,1],res[,1],res[,1]),c(res[,3],res[,6],res[,7]),type='n',xlab=xlab,ylab=ylab) -z1=lplot(res[,1],res[,3],plotit=FALSE,pyhat=T)$yhat -z2=lplot(res[,1],res[,6],plotit=FALSE,pyhat=T)$yhat -z3=lplot(res[,1],res[,7],plotit=FALSE,pyhat=T)$yhat -lines(res[,1],z1) -lines(res[,1],z2,lty=2) -lines(res[,1],z3,lty=2) -} -sig=rep(0,nrow(res)) -sig[res[,8]<=p.crit]=1 -sig=as.matrix(sig) -res=res[,-9] -dimnames(sig)=list(NULL,'Sig.Dif') -res=cbind(res,sig) -list(p.crit=p.crit,output=res,num.sig=sum(sig),p.crit=p.crit) -} - -dmedian<-function(x,depfun=pdepth,...){ -# -# Compute the median based on the deepest point for the multivariate -# data in x -# -# For continuous variables, this function returns a unique median -# -# Projection distances are used by default. -# Another option is depfun=zdepth -# -if(is.null(dim(x)) || dim(x)==1)stop('x should be a matrix with two or more columns') -val=depfun(x,...) -id=which(val==max(val)) -list(center=x[id,]) -} - - -ancJNmp<-function(x1,y1,x2,y2,regfun=qreg,p.crit=NULL,DEEP=FALSE,WARN=FALSE, -plotit=TRUE,xlab='X1',ylab='X2',null.value=0,FRAC=.5,cov1=FALSE,SMM=TRUE,ALL=TRUE,pr=TRUE, -alpha=.05,nreps=1000, MC=FALSE, pts=NULL,SEED=TRUE,nboot=100,xout=FALSE,outfun=outpro,...){ -# -# Compare two independent groups using a generalization of the ancts function that -# allows more than one covariate. -# -# DEEP=FALSE: If pts=NULL, design points are chosen to be deepest point in -# x1 plus points on the .5 depth contour. -# -# DEEP=TRUE, choose deepest half of c(x1,x2) and use critical p-value indicated by -# p.crit, the critical p-value,which defaults to .015 when alpha=.05. -# If alpha!=.05, p.crit must be computed, which can require high execution time. -# MC=TRUE will reduce execution time considerably. -# -# cov1=TRUE: the covariates that are used are taken to be the points in x1. If -# -# plotit=TRUE: if p=2 covariates, plot covariate points with non-significant points indicated by * and -# significant points by + - -# (This function replaces anctsmp, which does not have an option for using the deepest half of covariate points.) -# -if(SEED)set.seed(2) # now cov.mve always returns same result - -stop('This function has been replaced by an improved ancJNPVAL') -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have a different number of columns') -p=ncol(x1) -p1=p+1 -m1=elimna(cbind(x1,y1)) -x1=m1[,1:p] -y1=m1[,p1] -m2=elimna(cbind(x2,y2)) -x2=m2[,1:p] -y2=m2[,p1] -# -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -n1=length(y1) -n2=length(y2) -nv=c(n1,n2) -if(cov1){ -pts=unique(x1) -if(alpha==.05)p.crit=0.00676 -} -if(DEEP)pts=NULL -if(!is.null(pts[1])){ -p.crit=NULL -DEEP=FALSE -} -if(is.null(pts[1])){ -if(!DEEP){ -x1<-as.matrix(x1) -pts<-ancdes(unique(rbind(x1,x2))) -p.crit=NULL -} -if(DEEP){ -pts=ancov2COV(x1,y1,x2,y2,DETAILS=TRUE,cr=.27,pr=FALSE,FRAC=FRAC)$all.points.used[,1:2] -}} -pts<-as.matrix(pts) -ntests=nrow(pts) -mat<-matrix(NA,ntests,8) -dimnames(mat)<-list(NULL,c('Est 1', 'Est 2','DIF','TEST','se','ci.low','ci.hi','p.value')) -sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -# xout=F because leverage points have already been removed. -est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) -est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) -mat[,1]=est1 -mat[,2]=est2 -est=est1-est2 -mat[,3]=est -sd=sqrt(sqsd1+sqsd2) -mat[,5]=sd -tests=(est1-est2)/sd -mat[,4]=tests -pv=2*(1-pnorm(abs(tests))) -mat[,8]=pv -crit=NULL -if(!cov1){ -if(!DEEP){ -if(ntests==1)crit=qnorm(1-alpha/2) -if(length(pts)>1){ -if(ntests<=28){ -if(alpha==.05)crit<-smmcrit(Inf,ntests) -if(alpha==.01)crit<-smmcrit01(Inf,ntests) -} -if(ntests>28 || is.null(crit))crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha) -}}} -if(cov1){ -if(!DEEP){ -if(alpha==.05)p.crit=0.00676 -if(alpha!=.05)p.crit=ancJNmpcp(n1,n2,alpha=alpha,regfun=regfun,nreps=nreps,MC=MC,cov1=cov1)$pc.est -crit=qnorm(1-p.crit/2) -}} -if(DEEP){ -if(p==2){ -p.crit=.012 -if(alpha!=.05)p.crit=ancJNmpcp(n1,n2,alpha=alpha,regfun=regfun,nreps=nreps,MC=MC,cov1=cov1)$pc.est -crit=qnorm(1-p.crit/2) -} -if(p>2){ -if(length(pts)>1){ -if(SMM){ -if(ntests<=28){ -if(alpha==.05)crit<-smmcrit(Inf,ntests) -if(alpha==.01)crit<-smmcrit01(Inf,ntests) -} -if(ntests>28 || is.null(crit))crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha) -} -if(!SMM){ -p.crit=ancJNmpcp(n1,n2,alpha=alpha,regfun=regfun,nreps=nreps,MC=MC,cov1=FALSE) -crit=qnorm(1-p.crit/2) -}}}} -mat[,6]=est-crit*sd -mat[,7]=est+crit*sd -flag=rep(FALSE,nrow(mat)) -flag.chk1=as.logical(mat[,6]>null.value) -flag.chk2=(mat[,7]0) -num.sig=sum(flag.chk) -if(p==2){ -if(plotit){ -plot(pts[,1],pts[,2],xlab=xlab,ylab=ylab,type='n') -flag[flag.chk]=TRUE -points(pts[!flag,1],pts[!flag,2],pch='*') -points(pts[flag,1],pts[flag,2],pch='+') #significant points -}} -output.sig=NULL -if(p==2){ -if(num.sig>0){ -output.sig=matrix(NA,nrow=num.sig,ncol=8) -output.sig[,1]=pts[flag,1] -output.sig[,2]=pts[flag,2] -output.sig[,3]=mat[flag,1] -output.sig[,4]=mat[flag,2] -output.sig[,5]=mat[flag,3] -output.sig[,6]=mat[flag,6] -output.sig[,7]=mat[flag,7] -output.sig[,8]=mat[flag,8] -dimnames(output.sig)<-list(NULL,c('COV 1','COV 2','Est 1', 'Est 2','DIF','ci.low','ci.hi','p.value')) -if(!ALL){ -mat=NULL -pts=NULL -} -if(pr){ -if(ALL)print('To get only the results for all covariate points where this is a significant result, set ALL=FALSE') -} -}} -list(n=nv,num.sig=num.sig,p.crit=p.crit,points=pts,output.sig=output.sig,output=mat) -} - -ancJNmpcp<-function(n1,n2,regfun=qreg,CPP=FALSE,nreps=1000,alpha=.05,MC=FALSE, -SEED=TRUE,cov1=FALSE){ -if(CPP)library(WRScpp) -if(MC)library(parallel) -if(SEED)set.seed(2) -x=list() -n=max(c(n1,n2)) -nmiss=n-min(c(n1,n2)) -for(i in 1:nreps){ -x[[i]]=rmul(n,p=6) -if(n10)x[[i]][1:nmiss,1:3]=NA -} -if(n1>n2){ -if(nmiss>0)x[[i]][1:nmiss,4:6]=NA -} -} - -if(!MC)vals=lapply(x,ancJNmpcp.sub,regfun=regfun,cov1=cov1) -if(MC)vals=mclapply(x,ancJNmpcp.sub,regfun=regfun,cov1=cov1) -vals=as.vector(matl(vals)) -pc.est=hd(vals,alpha) -list(pc.est=pc.est) -} - -ancJNmpcp.sub<-function(x,regfun=qreg,cov1=FALSE){ -pts=NULL -z=elimna(x[,1:3]) -z2=elimna(x[,4:6]) -if(cov1)pts=z[,1:2] -res1=ancJNmp(z[,1:2],z[,3],z2[,1:2],z2[,3],SEED=TRUE,plotit=FALSE, -regfun=regfun,pts=pts)$output -v=min(res1[,8]) -} - - -ancovaV2.pv<-function(n1,n2,nreps=2000,MC=FALSE,qpts=FALSE,qvals = c(0.25, 0.5, 0.75), -nboot=500,SEED=TRUE,est=tmean,alpha=.05){ -iter=nreps -if(SEED)set.seed(45) -xy=list() -for(i in 1:iter){ -xy[[i]]=list() -xy[[i]][[1]]=rnorm(n1) -xy[[i]][[2]]=rnorm(n1) -xy[[i]][[3]]=rnorm(n2) -xy[[i]][[4]]=rnorm(n2) -} -if(!MC)pv=lapply(xy,ancovaV2pv.sub,qpts=qpts,qvals=qvals,nboot=nboot,MC=FALSE,est=est) -if(MC){ -library(parallel) -pv=mclapply(xy,ancovaV2pv.sub,qpts=qpts,qvals=qvals,nboot=nboot,MC=FALSE,est=est) -} -pv=as.vector(matl(pv)) -p=hd(pv,q=alpha) -list(p.crit=p) -} - -ancovaV2pv.sub<-function(xy,qpts=FALSE,qvals = c(0.25, 0.5, 0.75),nboot=500,MC=TRUE, -est=tmean){ -res=ancovaV2(xy[[1]],xy[[2]],xy[[3]],xy[[4]],est=est,plotit=FALSE,p.crit=.03,SEED=TRUE,qpts=qpts, -nboot=nboot,MC=MC) -rm=min(res$output[,2]) -rm -} - -ancovaUB.pv=ancovaV2.pv - -list.dif<-function(x1,x2){ -# -# Form all differences -# -if(!is.list(x1))stop('Argument x1 should have list mode') -if(!is.list(x2))stop('Argument x2 should have list mode') -if(length(x1)!=length(x2))stop('x1 and x2 have different lengths') -dif=list() -for(j in 1:length(x1))dif[[j]]=x1[[j]]-x2[[j]] -dif -} -kerSORT<-function(x,xlab='',ylab='',pts=NA){ -# -# kernel density estimator using Silverman's rule of thumb -# -# -A=min(c(sd(x),idealfIQR(x)/1.34)) -bw=1.06*A/n^.2 -init=density(x,bw=bw,kernel='epanechnikov') -plot(init$x,init$y,xlab=xlab,ylab=ylab,type='n') -lines(init$x,init$y) -} - -twohc4cor<-function(x1,y1,x2,y2,alpha=.05){ -# -# Compare two independent Pearson correlations using the HC4 method -# -# -X<-elimna(cbind(x1,y1)) -x1<-X[,1] -y1<-X[,2] -X<-elimna(cbind(x2,y2)) -x2<-X[,1] -y2<-X[,2] -x1=(x1-mean(x1))/sd(x1) -y1=(y1-mean(y1))/sd(y1) -x2=(x2-mean(x2))/sd(x2) -y2=(y2-mean(y2))/sd(y2) -temp1=olshc4(x1,y1) -temp2=olshc4(x2,y2) -test=(temp1$ci[2,2]-temp2$ci[2,2])/sqrt(temp1$ci[2,6]^2+temp2$ci[2,6]^2) -df=length(x1)+length(x2)-4 -pv=2*(1-pt(abs(test),df)) -pv -} - -#BD2 -BD2=function(matrizDatos){ - n=dim(matrizDatos)[1] - p=dim(matrizDatos)[2] - cont=rep(0,n) - for (i in 1:(n-1)){ - for (j in (i+1):n){ - cont=cont+estaEntre(c(i,j),matrizDatos) - } - } - contg=(cont/combinat(n,2)) -} - -#indicator function -estaEntre=function(v,matrizDatos){ - n=dim(matrizDatos)[1] - p=dim(matrizDatos)[2] - Z=matrizDatos - inf=t(apply(Z[v,],2,min)) - sup=t(apply(Z[v,],2,max)) - resultados=colSums((t(Z)<=t(sup)%*%rep(1,n))* (t(Z)>=t(inf)%*%rep(1,n)))==p -} - -#combination -combinat=function(n,p){ - if (n=t(inf)%*%rep(1,n))) - resultado=(resul/p) -} - - - - -#function boxplot -#fit: p by n functional data matrix, n is the number of curves -#method: BD2, BD3, MBD -fbplot<-function(fit,x=NULL,method='MBD',depth=NULL,plot=TRUE,prob=0.5,color=6,outliercol=2,barcol=4,fullout=FALSE, factor=1.5,xlab='Time',ylab='Y',...){ - - if(is.fdSmooth(fit) | is.fdPar(fit)){ fit = fit$fd } - if(is.fd(fit)){ - if(length(x)==0){ - x = seq(fit$basis$rangeval[1],fit$basis$rangeval[2],len=101) - } - fit = eval.fd(x,fit) - } - - tp=dim(fit)[1] - n=dim(fit)[2] - if (length(x)==0) {x=1:tp} - #compute band depth - if (length(depth)==0){ - if (method=='BD2') {depth=BD2(t(fit))} - else if (method=='BD3') {depth=BD3(t(fit))} - else if (method=='MBD') {depth=MBD(t(fit))} - else if (method=='Both') {depth=round(BD2(t(fit)),4)*10000+MBD(t(fit))} - } - - dp_s=sort(depth,decreasing=TRUE) - index=order(depth,decreasing=TRUE) - if (plot) { - plot(x,fit[,index[1]],lty=1,lwd=2,col=1,type='l',xlab=xlab,ylab=ylab,...) - } - for (pp in 1:length(prob)){ - m=ceiling(n*prob[pp])#at least 50% - center=fit[,index[1:m]] - out=fit[,index[(m+1):n]] - inf=apply(center,1,min) - sup=apply(center,1,max) - - if (prob[pp]==0.5){ #check outliers - dist=factor*(sup-inf) - upper=sup+dist - lower=inf-dist - outly=(fit<=lower)+(fit>=upper) - outcol=colSums(outly) - remove=(outcol>0) - #outlier column - colum=1:n - outpoint=colum[remove==1] - out=fit[,remove] - woout=fit - good=woout[,(remove==0),drop=FALSE] - maxcurve=apply(good,1,max) - mincurve=apply(good,1,min) - if (sum(outly)>0){ - if (plot) { - matlines(x,out,lty=2,col=outliercol,type='l',...) - } - } - barval=(x[1]+x[tp])/2 - bar=which(sort(c(x,barval))==barval)[1] - if (plot) { - lines(c(x[bar],x[bar]),c(maxcurve[bar],sup[bar]),col=barcol,lwd=2) - lines(c(x[bar],x[bar]),c(mincurve[bar],inf[bar]),col=barcol,lwd=2) - } - } - xx=c(x,x[order(x,decreasing=TRUE)]) - supinv=sup[order(x,decreasing=TRUE)] - yy=c(inf,supinv) - if (plot) { - if (prob[pp]==0.5) {polygon(xx,yy,col=color[pp],border=barcol,lwd=2)} - else {polygon(xx,yy,col=color[pp],border=NA)} - } - } - if (plot) { - lines(x,fit[,index[1]],lty=1,lwd=2,col=1,type='l') - lines(x,maxcurve,col=barcol,lwd=2) - lines(x,mincurve,col=barcol,lwd=2) - if (fullout) { - if (sum(outly)>0){ - if (plot) { - matlines(x,out,lty=2,col=outliercol,type='l',...) - } - } - } - } - return(list(depth=depth,outpoint=outpoint)) -} - - - -funyuenpb<-function(x1,x2,tr=.2,pts=NULL,npts=25,plotit=TRUE,alpha=.05, -SEED=TRUE, -nboot=2000,xlab='T',ylab='Est.dif',FBP=TRUE,method='hochberg',COLOR=TRUE){ -# -# x1 and x2 are n-by-p matrices, -# Designed for functional data. -# For example, p measures taken over time where p is typically large -# -# Goal: at speficied times, compare the two groups. -# pts: Can specify time points where comparisons are to be made -# if pts=NULL, pick -# npts points evenly space between min and max time points -# -p=ncol(x1) -pm1=p-1 -if(p!=ncol(x2))stop('ncol(x1) is not equal to ncol(x2)') -n1=nrow(x1) -n2=nrow(x2) -if(SEED)set.seed(2) -if(is.null(pts)){ -np=round(p/npts) -if(np==0)np=1 -pts=seq(2,pm1,np) -notpts=-1*length(pts) -pts=pts[c(-1,notpts)] -} -npts=length(pts) -xsub1=x1[,pts] -xsub2=x2[,pts] -res=NA -dif=NA -bvals=matrix(nrow=nboot,ncol=npts) -for(j in 1:nboot){ -data1=sample(n1,size=n1,replace=TRUE) -data2<-sample(n2,size=n2,replace=TRUE) -bvals[j,]=apply(xsub1[data1,],2,tmean,tr=tr)-apply(xsub2[data2,],2,tmean,tr=tr) -} -bsort=apply(bvals,2,sort) -crit<-alpha/2 -icl<-round(crit*nboot)+1 -icu<-nboot-icl -op=matrix(NA,nrow=length(pts),ncol=7) -dimnames(op)=list(NULL,c('est 1','est 2','dif','p.value', -'adjust.p.value','ci.low','ci.hi')) -op[,1]=apply(xsub1,2,tmean,tr=tr) -op[,2]=apply(xsub2,2,tmean,tr=tr) -op[,3]=op[,1]-op[,2] -bsort=apply(bvals,2,sort) -bs=bvals<0 -pv=apply(bs,2,mean) -pv2=rbind(pv,1-pv) -pv2=apply(pv2,2,min) -op[,4]=2*pv2 -#flag0=op[,4]==0 -#op[flag0,4]=.004 -op[,5]=p.adjust(op[,4],method=method) -op[,6]=bsort[icl,] -op[,7]=bsort[icu,] -if(plotit){ -if(!FBP){ -xlow=c(1:nrow(op)) -xax=rep(c(1:nrow(op)),3) -rplot(xlow,op[,3],xlab=xlab,ylab=ylab,scat=FALSE) -plot(xax,as.vector(op[,c(3,6,7)]),type='n',xlab=xlab,ylab=ylab) -lines(xlow,op[,3]) -lines(xlow,op[,6],lty=2) -lines(xlow,op[,7],lty=2) -} -if(FBP){ -par(mfrow=c(1,2)) -if(COLOR)FBplot(x1) -if(!COLOR)func.out(x1) -lines(medcurve(x2)) -if(COLOR)FBplot(x2) -if(!COLOR)func.out(x2) -lines(medcurve(x1)) -par(mfrow=c(1,1)) -}} -op=cbind(pts,op) -op -} -anova.nestA<-function(x,tr=.2){ -# -# J-by-K nested ANOVA -# x is assumed to have list mode with length J. -# x[[j]] is assumed to be a matrix with n_j rows and K columns -# j=1, ..., J -# -# Strategy: For fixed level of factor A compute trimmed mean for each -# level of factor B and use these trimmed means as the unit of analysis -# That is, perform an ANOVA using these trimmed means -# -if(!is.list(x))stop('x should have list mode') -y=list() -J=length(x) -for(j in 1:J)y[[j]]=apply(x[[j]],2,tmean,tr=tr) -res=t1way(y,tr=tr) -res -} - -anova.nestAP<-function(x,tr=.2){ -# -# J-by-K nested ANOVA -# x is assumed to have list mode with length J. -# x[[j]] is assumed to be a matrix with n_j rows and K columns -# j=1, ..., J -# -# pool data for each level of A and do anova -# -if(!is.list(x))stop('x should have list mode') -y=list() -J=length(x) -for(j in 1:J)y[[j]]=as.vector(x[[j]]) -res=t1way(y,tr=tr) -res -} - -r.gauss.pro<-function(n,C,M,t){ -# -# generate data from a Gaussian Process -# -# n is the sample size -# C is the covariance function -# for example: -# C <- function(x, y) 0.01 * exp(-10000 * (x - y)^2) # covariance function -# M is the mean function. For example -#M <- function(x) sin(x) # mean function -# t is the interval over which the mean is computed -#t <- seq(0, 1, by = 0.01) # will sample the GP at these points -# -library(MASS) -k <- length(t) -m <- M(t) -S <- matrix(nrow = k, ncol = k) -for (i in 1:k) for (j in 1:k) S[i, j] = C(t[i], t[j]) -z=matrix(NA,nrow=n,ncol=k) -#for(i in 1:n) -#z[i,] <- mvrnorm(1, m, S) -z=mvrnorm(n, m, S) -z -} - -Flplot<-function(x,est=mean,xlab='Time',ylab='Y',plotit=TRUE){ -# -# average n curves and plot results -# -es=apply(x,2,est) -if(plotit){ -plot(es,xlab=xlab,ylab=ylab,type='n') -lines(es) -} -es -} - - - -FQplot<-function(x,xlab='Time',ylab='Y',plotit=TRUE){ -# -# Compute the median and quartiles of n curves and plot results -# -es=apply(x,2,hd) -es1=apply(x,2,hd,q=.25) -es2=apply(x,2,hd,q=.75) -if(plotit){ -plot(rep(c(1:ncol(x)),3),c(es,es1,es2),xlab=xlab,ylab=ylab,type='n') -lines(es) -lines(es1,lty=2) -lines(es2,lty=2) -} -es -} - - - -Flplot2g<-function(x1,x2,est=mean,xlab='Time',ylab='Y',plotit=TRUE){ -# -# average n curves and plot results -# -x1=elimna(x1) -x2=elimna(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 should have the same number of columns') -x1=elimna(x1) -x2=elimna(x2) -es1=apply(x1,2,est) -es2=apply(x2,2,est) -if(plotit){ -plot(rep(1:ncol(x1),2),c(es1,es2),xlab=xlab,ylab=ylab,type='n') -lines(es1) -lines(es2,lty=2) -} -list(est.1=es1,est.2=es2) -} - - - -funloc<-function(x,tr=.2,pts=NULL,npts=25,plotit=TRUE,alpha=.05,nv=rep(0,ncol(x)), -xlab='T',ylab='Est.',FBP=TRUE,method='hochberg',COLOR=TRUE){ -# -# x1 and x2 are n-by-p matrices, -# Designed for functional data. -# For example, p measures taken over time where p is typically large -# -# nv is the null value when testing some hypothesis -# -# Goal: at speficied times, compute measures of location and confidence intervals. -# pts: Can specify time points where comparisons are to be made -# if pts=NULL, pick -# npts points evenly space between min and max time points -# -# FBP=T: creates a functional boxplot -# FBP=F: plot an estimate of the typical value plus 1-alpha confidence intervals. -# -p=ncol(x) -pm1=p-1 -if(is.null(pts)){ -np=round(p/npts) -if(np==0)np=1 -pts=seq(2,pm1,np) -} -res=NA -dif=NA -op=matrix(NA,nrow=length(pts),ncol=6) -dimnames(op)=list(NULL,c('est.','s.e.','p.value', -'adjust.p.value','ci.low','ci.hi')) -for(i in 1:length(pts)){ -z=trimci(x[,i],tr=tr,null.value=nv[i],pr=FALSE) -op[i,1]=z$estimate -op[i,2]=z$se -op[i,3]=z$p.value -op[i,5]=z$ci[1] -op[i,6]=z$ci[2] -} -op[,4]=p.adjust(op[,3],method=method) -if(plotit){ -if(!FBP){ -xlow=c(1:nrow(op)) -xax=rep(c(1:nrow(op)),3) -plot(xax,as.vector(op[,c(3,5,6)]),type='n',xlab=xlab,ylab=ylab) -lines(xlow,op[,1]) -lines(xlow,op[,5],lty=2) -lines(xlow,op[,6],lty=2) -} -if(FBP){ -if(COLOR)FBplot(x) -if(!COLOR)func.out(x) -}} -op=cbind(pts,op) -op - -} - - -funlocpb<-function(x,est=tmean,nboot=2000,SEED=TRUE, -pts=NULL,npts=25,plotit=TRUE,alpha=.05,nv=rep(0,ncol(x)), -xlab='T',ylab='Est.',FBP=TRUE,method='hochberg',COLOR=TRUE,...){ -# -# x1 and x2 are n-by-p matrices, -# Designed for functional data. -# For example, p measures taken over time where p is typically large -# -# nv is the null value when testing some hypothesis -# -# Goal: at speficied times, compute measures of location and confidence intervals. -# pts: Can specify time points where comparisons are to be made -# if pts=NULL, pick -# npts points evenly space between min and max time points -# -# FBP=T: creates a functional boxplot -# FBP=F: plot an estimate of the typical value plus 1-alpha confidence intervals. -# -p=ncol(x) -pm1=p-1 -if(is.null(pts)){ -np=round(p/npts) -if(np==0)np=1 -pts=seq(2,pm1,np) -} -res=NA -dif=NA -op=matrix(NA,nrow=length(pts),ncol=5) -dimnames(op)=list(NULL,c('est.','p.value', -'adjust.p.value','ci.low','ci.hi')) -x=elimna(x) -n=nrow(x) -for(i in 1:length(pts)){ -z=onesampb(x[,i],est=est,nboot=nboot,alpha=alpha,SEED=SEED,nv=nv,...) -op[i,1]=z$estimate -op[i,2]=z$p.value -op[i,4]=z$ci[1] -op[i,5]=z$ci[2] -} -op[,3]=p.adjust(op[,2],method=method) -if(plotit){ -if(!FBP){ -xlow=c(1:nrow(op)) -xax=rep(c(1:nrow(op)),3) -plot(xax,as.vector(op[,c(3,5,6)]),type='n',xlab=xlab,ylab=ylab) -lines(xlow,op[,1]) -lines(xlow,op[,5],lty=2) -lines(xlow,op[,6],lty=2) -} -if(FBP){ -if(COLOR)FBplot(x) -if(!COLOR)func.out(x) -}} -op=cbind(pts,op) -op - -} - - -MULMreg<-function(x,y,regfun=MMreg, -xout=FALSE,outfun=outpro,...){ -# -# Multivariate regression: simply estimate parameters for -# for each column of Y values based on some multivariate regression -# estimator. -# -# Use MMreg by default -# -# x and y are assumed to be matrices with two or more columns -# -# -x<-as.matrix(x) -y<-as.matrix(y) -n.keep=nrow(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE)$keep -x<-x[flag,] -y<-y[flag,] -x<-as.matrix(x) -y<-as.matrix(y) -n.keep=nrow(x) -} -p1=ncol(x)+1 -q=ncol(y) -est=matrix(NA,nrow=p1,ncol=q) -dimnames(est)=list(c('Inter',rep('Slope',ncol(x))),NULL) -for(i in 1:q)est[,i]=regfun(x,y[,i],...)$coef -list(coef=est) -} - -MULR.yhat<-function(x,y,pts=x,regfun=MULMreg, -xout=FALSE,outfun=outpro,...){ -# -# Compute predicted Y values based on some multivariate regression -# estimator. -# -# Use MULMreg by default -# -# -x<-as.matrix(x) -y<-as.matrix(y) -n.keep=nrow(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE)$keep -x<-x[flag,] -y<-y[flag,] -x<-as.matrix(x) -n.keep=nrow(x) -} -if(is.null(pts))pts=x -q=ncol(y) -p1=ncol(x)+1 -yhat=matrix(NA,nrow=nrow(pts),ncol=q) -coef=regfun(x,y)$coef -slope=as.matrix(coef[2:p1,]) -for(j in 1:q){ -for(i in 1:nrow(pts)){ -yhat[i,j]=coef[1,j]+sum(slope[,j]*x[i,]) -}} -list(yhat=yhat) -} -corCOMmcp_sub<-function(data,x,y,corfun=wincor,...){ -# -# -rv=NA -for(j in 1:ncol(x))rv[j]=corfun(x[data,j],y[data])$cor -rv -} -corCOMmcp<-function(x,y,corfun=wincor,alpha=.05,nboot=500,SEED=TRUE,MC=FALSE,xout=FALSE,outfun=outpro,method='hommel',...){ -# -# Comparing robust dependent correlations: Overlapping case -# That is, have two or more independent variables, compare -# cor(y,x_j) to cor(y,x_k) for each j0)stop('IV1 and IV2 have duplicate values making this method invalid') -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -#if(length(IV1)+length(IV2) != p)stop('ncol(x) should equal the number of variables indicated by IV1 and IV2') -if(length(IV1)+length(IV2) > p)stop('IV!+IV2 should be less than or equal ncol(x)') -if(max(c(IV1,IV2))>p)stop('IV1 or IV2 has a value that exceeds the number of col. in x') -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -nrem=length(y) -if(xout){ -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -nkeep=length(y) -#estit=regfun(x,y,xout=xout,...)$coef[2:p1] -nv=length(y) -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(k in 1:2){ -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -if(!MC){ -bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) -if(k==1)bvec1=bvec -if(k==2)bvec2=bvec -} -if(MC){ -library(parallel) -data=listm(t(data)) -bvec<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE,xout=FALSE,...) -if(k==1)bvec1=matl(bvec) -if(k==2)bvec2=matl(bvec) -data=t(matl(data)) -}} -#Leverage points already removed. -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -bvec1=bvec1[2:p1,] # don't need the intercept -bvec2=bvec2[2:p1,] # don't need the intercept -v1=NA -v2=NA -for(i in 1:nboot){ -v1[i]=regIVcom_sub(bvec1[IV1,i],x[data[i,],IV1],tr=tr) -v2[i]=regIVcom_sub(bvec2[IV2,i],x[data[i,],IV2],tr=tr) -} -pv=bmp(v1,v2)$phat -pv=2*min(c(pv,1-pv)) -est=regfun(x,y)$coef[2:p1] -e1=regIVcom_sub(est[IV1],x[,IV1],tr=tr) -e2=regIVcom_sub(est[IV2],x[,IV2],tr=tr) -rat=NA -if(e2>0)rat=e1/e2 -ep1=e1/winvar(y,tr=tr) -ep2=e2/winvar(y,tr=tr) -list(n=nrem,n.keep=nkeep,est.1=e1,est.2=e2,e.pow1=ep1,e.pow2=ep2,strength.assoc.1=sqrt(ep1), -strength.assoc.2=sqrt(ep2), -ratio=rat,strength.ratio=sqrt(rat),p.value=pv) -} - -regIVcom_sub<-function(slope,x,tr){ -yhat=apply(t(slope*t(x)),1,sum) -str=winvar(yhat,tr=tr) -str -} - -regIVstr<-function(x,y,regfun=tsreg,xout=FALSE,outfun=outpro,tr=.2,...){ -# -# Estimate strength of each independent variable -# when all of them are entered into the model. -# -xy=cbind(x,y) -xy=elimna(xy) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -p=ncol(x) -p1=p+1 -est=regfun(x,y,...)$coef[2:p1] -top=NA -for(j in 1:p)top[j]=regIVcom_sub(est[j],x[,j],tr=tr) -bot=winvar(y,tr=tr) -str=top/bot -list(explanatory.power=str,explanatory.strength=sqrt(str)) -} - - - - -Q2anova<-function(J,K,x,alpha=.05,nboot=2000,MC=FALSE){ -# -# Two-way ANOVA for medians, tied values allowed. -# -if(is.matrix(x)|| is.data.frame(x))x=listm(x) -if(J*K != length(x))stop('Total number of groups is not equal to JK') -chkcar=NA -for(j in 1:length(x))chkcar[j]=length(unique(x[[j]])) -if(min(chkcar)<20){ -print('Warning: Sample size is less than') -print('20 for one more groups. Type I error might not be controlled') -} -con=con2way(J,K) -A=pbadepth(x,est=hd,con=con$conA,alpha=alpha,nboot=nboot,MC=MC) -B=pbadepth(x,est=hd,con=con$conB,alpha=alpha,nboot=nboot,MC=MC) -AB=pbadepth(x,est=hd,con=con$conAB,alpha=alpha,nboot=nboot,MC=MC) -list(Fac.A=A,Fac.B=B,Fac.AB=AB) -} -Q3anova<-function(J,K,L,x,alpha=.05,nboot=600,MC=FALSE){ -# -# Three-way ANOVA for medians, tied values allowed. -# -if(is.matrix(x)|| is.data.frame(x))x=listm(x) -if(J*K*L != length(x))stop('Total number of groups is not equal to JKL') -chkcar=NA -for(j in 1:length(x))chkcar[j]=length(unique(x[[j]])) -if(min(chkcar)<20){ -print('Warning: Sample size is less than') -print('20 for one more groups. Type I error might not be controlled') -} -con=con3way(J,K,L) -A=pbadepth(x,est=hd,con=con$conA,alpha=alpha,nboot=nboot,MC=MC) -B=pbadepth(x,est=hd,con=con$conB,alpha=alpha,nboot=nboot,MC=MC) -C=pbadepth(x,est=hd,con=con$conC,alpha=alpha,nboot=nboot,MC=MC) -AB=pbadepth(x,est=hd,con=con$conAB,alpha=alpha,nboot=nboot,MC=MC) -AC=pbadepth(x,est=hd,con=con$conAC,alpha=alpha,nboot=nboot,MC=MC) -BC=pbadepth(x,est=hd,con=con$conBC,alpha=alpha,nboot=nboot,MC=MC) -ABC=pbadepth(x,est=hd,con=con$conABC,alpha=alpha,nboot=nboot,MC=MC) -list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) -} -scat3d2g<-function(x1,x2,pch1='+',pch2='*',tick.marks=TRUE, -xlab='X1', ylab='X1', zlab='X3'){ -# -# plot data in x1 and x2 -# marking points in x1 with symbol indicated by pch1 -# marking points in x2 with symbol indicated by pch2 -# -if(ncol(x1)!=3)stop('x1 should be a matrix with 3 columns') -if(ncol(x2)!=3)stop('x2 should be a matrix with 3 columns') -library(scatterplot3d) -temp=scatterplot3d(x=c(x1[,1],x2[,1]),y=c(x1[,2],x2[,2]), -z=c(x1[,3],x2[,3]),type='n',tick.marks=tick.marks, -xlab=xlab, ylab=ylab, zlab=zlab) -temp$points(x1,pch=pch1) -temp$points(x2,pch=pch2) -} -scat2d2g<-function(x1,x2,xlab='X1',ylab='X2',ticktype='detailed',pch1='+', -pch2='*'){ -# -# Create a scatterplot marking data from the first group with the symbol -# indicated by pch1 and the symbol indicated by pch2 for group 2. -# -if(ncol(x1)!=2)stop('x1 should be a matrix with 2 columns') -if(ncol(x2)!=2)stop('x2 should be a matrix with 2 columns') -plot(rbind(x1,x2),type='n',xlab=xlab,ylab=ylab) -points(x1,pch=pch1) -points(x2,pch=pch2) -} -qhdsm.pred<-function(x,y,pts=x,q=.5,fr=1,nmin=1,xout=FALSE,outfun=outpro,...){ -# -# Predict the qth quantile of Y based on the values in pts, using the -# the data in x and y. -# -xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -x=xy[,1:p] -y=xy[,p1] -x=as.matrix(x) -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(ncol(x)==1){ -vals=runhat(x[,1],y,pts=pts,est=hd,q=q,fr=fr,nmin=nmin,...) -nvals=1 -for(i in 1:length(pts)){ -nvals[i]<-length(y[near(x,pts[i],fr=fr)]) -} -} -if(ncol(x)>1){ -temp=rung3hat(x,y,pts=pts,est=hd,q=q,fr=fr,...) -vals=temp$rmd -nvals=temp$nval -} -list(Y.hat=vals,nvals=nvals) -} -skmcp<-function(x,alpha=.05){ -# -# Multiple comparisons for J independent groups -# and binary data. -# The method is based on the Storer--Kim -# method for comparing independent binomials. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# Missing values are allowed. -# -# Probability of one or more Type I errors controlled using Hochberg's method. -# -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') -J<-length(x) -ncon=(J^2-J)/2 -Jm<-J-1 -# -# Determine critical values -dvec=alpha/c(1:ncon) -output<-matrix(NA,nrow=ncon,ncol=4) -dimnames(output)<-list(NULL,c('Group','Group','p.value','p.crit')) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j=zvec) -output[temp2,4]<-zvec -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,num.sig=num.sig) -} - -smmvalv3<-function(ntests,df,iter=20000,alpha=.05,SEED=TRUE){ -# -# ntests=number of tests to be performed -# -if(SEED)set.seed(1) -vals<-NA -tvals<-NA -dfvec=rep(df,ntests) -z=matrix(nrow=iter,ncol=ntests) -for(j in 1: ntests)z[,j]=rt(iter,dfvec[j]) -vals=apply(abs(z),1,max) -vals<-sort(vals) -ival<-round((1-alpha)*iter) -qval<-vals[ival] -qval -} -binmat<-function(m,col,lower, upper,INC=TRUE){ -# -# pull out the rows of the matrix m based on the values in the column -# indicated by the argument -# col -# that are between lower and upper, inclusive. Note: the built-in function findInterval could be used instead -# -# Example: binmat(m,3,10,15) will return all rows such that the -# values in column 3 are between 10 and 15, inclusive. -# -if(is.vector(m)){ -m=as.matrix(m) -col=1 -} -if(INC){ -flag1=m[,col]<=upper -flag2=m[,col]>=lower -} -if(!INC){ -flag1=m[,col]lower -} -flag=as.logical(flag1*flag2) -m[flag,] -} - - -boxdif<-function(x,names){ -# -# For J dependent groups, compute all pairwise differences and then -# create boxplots for all pairs of groups. -# -if(is.null(dim(x)))stop('x should be a matrix or data frame') -ic=0 -J=ncol(x) -n=nrow(x) -N=(J^2-J)/2 -ic=0 -dif=matrix(NA,nrow=n,ncol=N) -for(j in 1:J){ -for(k in 1:J){ -if(jbest){ -best=new -idk=j -idcost=k -}}} -# Now check to see whether cost matters for the kernel that is being used -temp=NA -for(k in 1:length(cost))temp[k]=comdepthsvm(x1,x2,kernel=kvals[idk],cost=cost[k])$est.prob -if(var(temp)==0)flag=FALSE -fin=comdepthsvm(x1,x2,MISS=MISS,best.kernel=kvals[idk],best.cost=cost[idc],TABLE=TABLE) -list(best.kernel=kvals[idk],best.cost=cost[idcost],est.prob=fin$est.prob,cost.matters=flag, -miss.class.vectors=fin$miss.class.vectors,TABLE=fin$TABLE) -} - - - -cumrelfT<-function(x,y,pts=NULL,q=c(.1,.25,.5,.75,.9),xlab='X',ylab='CUM REL FREQ',plotit=TRUE, -op=1){ -# -# Compare the cumulative relative frequencies for 2 independent groups -# based on the values in pts. -# -# x and y are vectors. -# -# op=1 use twobinom -# op=2 usd bi2KMS - -x=elimna(x) -y=elimna(y) -if(is.null(pts)){ -for(i in 1:length(q))pts[i]=qest(x,q[i]) -} -output=matrix(NA,nrow=length(pts),ncol=5) -for(j in 1:length(pts)){ -flag1=x<=pts[j] -flag2=y<=pts[j] -temp=NULL -if(op==1)temp=twobinom(x=flag1,y=flag2) -if(op==2)temp=bi2KMSv2(x=flag1,y=flag2) -if(is.null(temp))stop('op should be equal to 1 or 2') -output[j,2]=temp$p1 -output[j,3]=temp$p2 -output[j,4]=temp$est.dif -output[j,5]=temp$p.value -} -output[,1]=pts -if(plotit){ -m=list() -m[[1]]=x -m[[2]]=y -cumrelf(m,xlab=xlab,ylab=ylab) -} -#output[,6]=p.adjust(output[,5],method='hoch') # can beat this adjusted p-value -#dimnames(output)=list(NULL,c('pts','est.p1','est.p2','est.dif','p.value','p.adjusted')) -dimnames(output)=list(NULL,c('pts','est.p1','est.p2','est.dif','p.value')) -output -} -rplotCITAP.pv<-function(n,nreps=2000,alpha=.05,npts=25,tr=.2,fr=5,MC=FALSE,nmin=12,SEED=TRUE,LP=FALSE){ -if(SEED)set.seed(2) -pvals=NA -xy=list() -for (i in 1:nreps){ -xy[[i]]=rmul(n) -} -if(!MC)pvals=lapply(xy,rplotCITAP.sub,npts=npts,tr=tr,fr=fr,alpha=alpha,nmin=nmin) -if(MC){ -library(parallel) -pvals=mclapply(xy,rplotCITAP.sub,npts=npts,tr=tr,fr=fr,alpha=alpha,nmin=nmin) -} -pvals=matl(pvals) -pv=hd(pvals,alpha) -pv -} - -rplotCITAP.sub<-function(xy,tr=.2,fr=NA,SEED=TRUE,nmin=12, -pts=NA,npts=25,LP=FALSE,alpha=.05,xout=FALSE,...){ -# -# prediction interval running interval smoother based on a trimmed mean. -# Unlike rplot, includes a confidence band. -# -x=xy[,1] -y=xy[,2] -xord=order(x) -x=x[xord] -y=y[xord] -infit=rplot(x,y,tr=tr,xout=xout,plotit=FALSE,LP=LP,fr=fr,pr=FALSE,pyhat=TRUE,nmin=nmin) -res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr)$output -pts=seq(res1[1,1],res1[5,1],length.out=npts) -rmd=infit$pyhat -res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr,nmin=nmin)$output -pts=seq(res1[1,1],res1[5,1],length.out=npts) -y.hat=NA -pv=NA -civ=matrix(NA,nrow=npts,ncol=2) -for(i in 1:length(pts)){ -doit=trimci(y[near(x,pts[i],fr)],tr=tr,alpha=alpha,pr=FALSE) -pv[i]=doit$p.value -} -min(pv) -} - -rplotCI.pv<-rplotCITAP.pv - -rplotCIv2.pv<-function(n,nreps=4000,alpha=.05,tr=.2,fr=.5, -MC=TRUE,nmin=12,SEED=TRUE){ -if(SEED)set.seed(2) -pvals=NA -xy=list() -for (i in 1:nreps){ -xy[[i]]=rmul(n) -} -if(!MC)pvals=lapply(xy,rplotCIv2.sub,tr=tr,fr=fr,nmin=nmin) -if(MC){ -library(parallel) -pvals=mclapply(xy,rplotCIv2.sub,tr=tr,fr=fr,nmin=nmin) -} -pvals=matl(pvals) -pv=hd(pvals,alpha) -pv -} - -rplotCIv2.sub<-function(xy,nmin,tr,fr){ -x=xy[,1] -y=xy[,2] -n=length(y) -nv=NA -for(j in 1:n)nv[j]=sum(near(x,x[j],fr=fr)) -pts=x[nv>=nmin] -n.keep=length(pts) -for(j in 1:n.keep)nv[j]=sum(near(x,x[j],fr=fr)) -pts=x[nv>=nmin] -rmd=NA -for(i in 1:length(pts))rmd[i]<-trimci(y[near(x,pts[i],fr)],tr=tr,pr=FALSE)$p.value -pv=min(rmd) -pv -} - - -bdmP<-function(x){ -# -# Test the null hypothesis in Bruner et al. (2016) -# for a one-way design -# -if(is.matrix(x))x=listm(x) -J=length(x) -library(rankFD) -nv=lapply(x,length) -nv=as.vector(matl(nv)) -z=x[[1]] -g=rep(1,nv[1]) -for(j in 2:J){ -z=c(z,x[[j]]) -g=c(g,rep(j,nv[j])) -} -xg=cbind(z,g) -xg=as.data.frame(xg) -res=rankFD(z~g,data=xg,hypothesis = 'H0p') -w=as.vector(res$ANOVA.Type.Statistic) -list(test.stat=w[1],df1=w[2],df2=w[3],p.value=w[4],q.hat=bdm(x)$q.hat) -} -rplotCIM<-function(x,y,est=hd,fr=.5,p.crit=NA,plotit=TRUE,scat=TRUE, -pyhat=FALSE, pts=NA,npts=25,xout=FALSE, -xlab='X',ylab='Y',low.span=2/3,nmin=16, -outfun=out,LP=TRUE,LPCI=FALSE,MID=TRUE,alpha=.05,pch='.',...){ -# -# Confidence interval for running interval smoother based on a median -# Unlike rplot, includes a confidence band having simultaneous probability -# coverage equal to 1-alpha. -# -# LP=TRUE, the plot is further smoothed via lowess -# -# fr controls amount of smoothing. If the association is relatively strong, might want to use fr=.2 -# -chk=FALSE -if(identical(est,hd))chk=TRUE -if(!chk)stop('Current version, argument est must be hd') -n=length(y) -if(n<50)stop('Need at least n=50') -xord=order(x) -x=x[xord] -y=y[xord] -infit=rplot(x,y,est=est,xout=xout,plotit=plotit,LP=LP,fr=fr,pr=FALSE,pyhat=TRUE,xlab=xlab,ylab=ylab) -rmd=infit$yhat -m<-cbind(x,y) -if(ncol(m)>2)stop('One covariate only is allowed with this function') -m<-elimna(m) -nv=nrow(m) -if(xout){ -flag<-outfun(m[,1])$keep -m<-m[flag,] -} -x=m[,1] -y=m[,2] -n.keep=length(y) -res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr,nmin=nmin)$output -pts=seq(res1[1,1],res1[5,1],length.out=npts) -flag=duplicated(pts) -npts=length(pts) -civ=matrix(NA,nrow=npts,ncol=2) -for(i in 1:length(pts)){ -xx=y[near(x,pt=pts[i],fr)] -civ[i,]=sint(xx,alpha=alpha/npts) -} -up=civ[!flag,2] -low=civ[!flag,1] -if(plotit){ -if(LPCI){ -up=lplot(pts,up,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat -low=lplot(pts,low,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat -} -pts=pts[!flag] -lines(pts,up,lty=2) -lines(pts,low,lty=2) -} -if(pyhat){output<-cbind(pts,rmd[!flag],low,up) -dimnames(output)=list(NULL,c('pts','y.hat','ci.low','ci.up')) -} -if(!pyhat)output<-'Done' -list(output=output,str=infit$Strength.Assoc,n=nv,n.keep=n.keep) -} -lplotCI<-function(x,y,plotit=TRUE,xlab='X',ylab='Y',p.crit=NULL,alpha=.05,span=NULL, -CIV=FALSE,xout=FALSE,outfun=outpro, pch='.',SEED=TRUE,nboot=100,pts=NULL,npts=25,nreps=2000,...){ -# -# Confidence band using LOESS -# -# Method allows heteroscedasticity and adjust the confidence intervals -# so that the simultaneous probabillty coverage is approximately 1-alpha -# -# If CIV=FALSE and plotit=TRUE, creates a plot with the confidence intervals. -# CIV=TRUE, returns the confidence intervals for the points in pts -# pts =NULL, the function picks -# npts points, extending between M-1.5*mad(x) and M+1.5*mad(x) -# -# -# For alpha=0.05, n <=2000 execution time is low. Otherwise -# the adjusted critical value must be computed. -# -# p.crit=NULL: If alpha=.05, determined quickly, otherwise it is computed. -# -xy=elimna(cbind(x,y)) -if(ncol(xy)>2)stop('Current version limited to a single predictor variable') -if(xout){ -flag<-outfun(xy[,1],plotit=FALSE,...)$keep -xy<-xy[flag,] -} -n=nrow(xy) -if(is.null(span)){ -span=2/3 -if(n >=300)span=.5 -if(n >=800)span=.3 -} -x=xy[,1] -y=xy[,2] -xord=order(x) -y=y[xord] -x=x[xord] -M=median(x) -low=M-1.5*mad(x) -up=M+1.5*mad(x) -if(is.null(pts))pts=seq(low,up,length.out=npts) -if(npts<=5)p.crit=alpha/npts -if(alpha==.05){ -if(is.null(p.crit)){ -if(n<30)stop('Should have n>=30') -nv=c(30,50,70,100,150, 200,300, 500, 1000, 2000) -pv=c(0.003599898, 0.002661925, 0.002399994, 0.002877103, 0.003000428, 0.003538190, - 0.003872710, 0.004396500, 0.004075000, 0.0045161) - -if(npts==25){ -if(n<=2000)p.crit=lplot.pred(1/nv,pv,pts=1/n)$yhat -if(n>2000)p.crit=.00452 -}}} -if(is.null(p.crit)){ -print('p.crit is being computed, this might take some time.') -pts.stand=NULL -if(!is.null(pts))pts.stand=(median(x)-pts)/mad(x) -p.crit=lplotbsepvv3(n,nreps=nreps,npts=npts,pts=pts.stand,alpha=alpha) -} -plx<-predict(loess(y ~ x,span=span), se=TRUE) -se=lplotse(x,y,nboot=nboot,SEED=SEED,pts=pts,span=span) -lfit=lplot.pred(x,y,pts=pts,span=2/3)$yhat -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab,pch=pch) -lines(x,plx$fit) -if(is.null(p.crit))p.crit=alpha -lines(pts,lfit - qt(1-p.crit/2,plx$df)*se, lty=2) -lines(pts,lfit + qt(1-p.crit/2,plx$df)*se, lty=2) -} -ci.low=lfit - qt(1-p.crit/2,plx$df)*se -ci.up=lfit + qt(1-p.crit/2,plx$df)*se -if(!CIV)ci=NULL -if(CIV){ -ci=cbind(pts,lfit,ci.low,ci.up) -dimnames(ci)=list(NULL,c('X','Y.hat','ci.low','ci.up')) -} -list(p.crit=p.crit,Conf.Intervals=ci) -} - - -lplotbsepvv3<-function(n,nreps=2000,alpha=0.05,pts=NULL,npts=25){ -# -# Determine critical p-value for lplotCI. -# -set.seed(2) -pv=NA -for(i in 1:nreps){ -x=rnorm(n) -y=rnorm(n) -xord=order(x) -y=y[xord] -x=x[xord] -M=median(x) -low=M-1.5*mad(x) -up=M+1.5*mad(x) -if(is.null(pts))pts=seq(low,up,length.out=npts) -plx<-predict(loess(y ~ x), se=TRUE) -est=lplot.pred(x,y,pts=pts)$yhat -se=lplotse(x,y,SEED=FALSE,pts=pts) -test=abs(est/se) -pall=2*(1-pt(abs(test),plx$df)) -pv[i]=min(elimna(pall)) -} -hd(pv,alpha) -} - -lplotse<-function(x,y,pts=x,nboot=100,SEED=TRUE,span=2/3){ -# -# compute estimae of SE -# return the values corresponding to the order x values -# -xord=order(x) -y=y[xord] -x=x[xord] -if(SEED)set.seed(2) -n=length(y) -ev=matrix(NA,nrow=nboot,ncol=length(pts)) -for(i in 1:nboot){ -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -ev[i,]=lplot.pred(x[data[i,]],y[data[i,]],pts=pts,span=span)$yhat -} -se=apply(ev,2,sd) -se -} -BFBANOVA<-function(x,nboot=1000,SEED=TRUE){ -# -# One-way ANOVA bootstrap version of Brown-Forsyhte -# -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -TV=BFANOVA(x)$test.statistic -ylist<-list() -dat=list() -TT<-NA -#means<-sapply(x,mean) -for (j in 1:J)ylist[[j]]<-x[[j]]-mean(x[[j]]) -for (i in 1:nboot){ -for(j in 1:J)dat[[j]]=sample(ylist[[j]],length(ylist[[j]]),replace=TRUE) -TT[i]<-BFANOVA(dat)$test.statistic -} -pval<-mean(TV<=TT,na.rm=TRUE) -list(test.stat=TV,p.value=pval) -} -BFANOVA<-function(x){ -# -# Brown-Forsyhte ANOVA, generalized to trimmed means -# -# (Not known whether a generalization to trimmed means performs relatively well.) -# -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -xall=elimna(as.vector(matl(x))) -YB=mean(xall) -ybar=lapply(x,mean) -v=lapply(x,var) -n=lapply(x,length) -ybar=matl(ybar) -n=as.vector(matl(n)) -v=as.vector(matl(v)) -ybar=as.vector(matl(ybar)) -w=n/v -N=sum(n) -top=sum(n*(ybar-YB)^2) -bot=sum((1-n/N)*v) -FS=top/bot -df1=J-1 -fv=NA -for(j in 1:J)fv[j]=(1-n[j]/N)*v[j] -df2=1/sum(fv^2/(n-1)) -pv=1-pf(FS,df1,df2) -list(test.statistic=FS,df1=df1,df2=df2,p.value=pv) -} -olshc4.band<-function(x,y,alpha=.05,xout=FALSE,outfun=outpro,plotit=TRUE,pr=TRUE, -xlab='X',ylab='Y',nreps=5000,pch='.',CI=FALSE,ADJ=TRUE,SEED=TRUE){ -# -# Heterocedastic confidence band with simultaneous -# probability coverate 1-alpha -# -# CI=TRUE, confidence intervals are returned. -# CI=FALSE, only a plot is created. -# -# -if(!CI){ -if(pr)print('This function returns the confidence intervals when CI=TRUE') -} -xy=elimna(cbind(x,y)) -if(ncol(xy)!=2)stop('Only one independent variable allowed') -x=xy[,1] -y=xy[,2] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1] -y<-m[,2] -} -flagdup=duplicated(x) -xu=x[!flagdup] -yu=y[!flagdup] -xs=order(xu) -xu=xu[xs] -yu=yu[xs] -n=length(yu) -df=n-2 -nv=c(20,30,40,50,75,100,200,300,500,1000,2000) -cr=c(0.009649481,0.01115032,0.0125336,0.01196315,0.01350826, -0.01346237,0.01326992,0.01291531,0.01465537,0.01488745, 0.01595954) -ysqse=trimse(y,tr=0)^2 -temp=olshc4(x,y) -b1sqse=temp$cov[2,2] -yhat=temp$ci[1,2]+temp$ci[2,2]*xu -se=sqrt(ysqse+b1sqse*(xu-mean(xu))^2) -ci=matrix(NA,ncol=4,nrow=length(xu)) -ci[,1]=xu -ci[,2]=yhat -if(!ADJ)crit=qt(1-alpha/2,df) -if(ADJ){ -adj=NA -if(alpha==.05){ -if(n>=20 && n<=2000)adj=lplot.pred(1/nv,cr,1/n)$yhat -if(is.na(adj))adj=olshc4.bandCV(n=n,nreps=nreps,alpha=alpha,SEED=SEED) -} -crit=qnorm(1-adj/2) # Don't need Student's T, adjustment deals with n. -} -ci[,3]=yhat-crit*se -ci[,4]=yhat+crit*se -dimnames(ci)=list(NULL,c('X','Yhat','ci.low','ci.up')) -if(plotit){ -plot(x,y,pch=pch,xlab=xlab,ylab=ylab) -abline(temp$ci[1,2],temp$ci[2,2]) -lines(xu,ci[,3],lty=2) -lines(xu,ci[,4],lty=2) -} -if(!CI)ci=NA -list(conf.intervals=ci) -} - - -olshc4.bandCV<-function(n,nreps=5000,alpha=.05,SEED=TRUE){ -# -# Heterocedastic confidence band with simultaneous -# probability coverage 1-alpha -# -if(SEED)set.seed(2) -pv=NA -for(i in 1:nreps){ -x=rnorm(n) -y=rnorm(n) -ysqse=trimse(y,tr=0)^2 -temp=olshc4(x,y) -ysqse=trimse(y,tr=0)^2 -b1sqse=temp$cov[2,2] -yhat=temp$ci[1,2]+temp$ci[2,2]*x -se=sqrt(ysqse+b1sqse*(x-mean(x))^2) -pv[i]=min(2*(1-pnorm(abs(yhat)/se))) -} -hd(pv,q=alpha) -} - -olshc4band=olshc4.band - -lplotcom2<-function(x,y,xout=FALSE,pts1=NULL,pts2=NULL,outfun=outpro,span=2/3,npts=10,tr=.2,...){ -# -# For two independent variables, estimate their relative importance when using LOESS -# -library(stats) -x<-as.matrix(x) -m<-elimna(cbind(x,y)) -n.orig=nrow(m) -n.keep=n.orig -d<-ncol(x) -if(d!=2)stop('Current version is for two independent variables only') -if(xout){ -flag<-outfun(m[,1:2],plotit=FALSE,...)$keep -m<-m[flag,] -} -n.keep=nrow(m) -M=apply(m,2,median) -SD=apply(m,2,mad) -low=M-1.5*SD -up=M+1.5*SD -if(is.null(pts1))pts1=seq(low[1],up[1],length.out=npts) -if(is.null(pts2))pts2=seq(low[2],up[2],length.out=npts) -e1=NA # -e2=NA -for(j in 1:length(pts1)){ # Determine strength of x2 given a value stored in pts1. -v2=cbind(rep(pts1[j],n.keep),m[,2]) -vals=lplot.pred(m[,1:2],m[,3],v2,span=span)$yhat -vals=elimna(vals) -nv=length(vals) -e2[j]=NA -if(nv>=10)e2[j]=winsd(vals,tr=tr,na.rm=TRUE)/winsd(m[,3],tr=tr,na.rm=TRUE) -} -for(j in 1:length(pts2)){ # Determine strength of x1 given a value stored in pts2. -v1=cbind(m[,1],rep(pts2[j],n.keep)) -vals=lplot.pred(m[,1:2],m[,3],v1,span=span)$yhat -vals=elimna(vals) -nv=length(vals) -e1[j]=NA -if(nv>=10)e1[j]=winsd(vals,tr=tr,na.rm=TRUE)/winsd(m[,3],tr=tr,na.rm=TRUE) - } -p=mean(outer(e1,e2,FUN='-')<0,na.rm=TRUE) -list(str1=e1,str2=e2,p=p,mean.str1=mean(e1),mean.str2=mean(e2)) -} - -lplotCIMC<-function(data,x,y,pts1,pts2,npts,tr,span){ -temp=lplotcom2(x[data,],y[data],pts1=pts1,pts2=pts2,npts=npts,tr=tr,span=span) -v=c(temp$mean.str1,temp$mean.str2) -} -linWMW<-function(x,con,locfun=median,nreps=100,SEED=TRUE){ -# -# Determine distribution of Y_i=sum_j c_jX_j -# Then estimate P(Y<0) and measure of location -# based on -# locfun, which defaults to the median. -# -con=as.vector(con) -if(sum(con)!=0)stop('Contrast coefficients must sum to zero') -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -if(length(con)!=J)stop('Length of con should equal number of groups') -x=elimna(x) -nv=as.vector(matl(lapply(x,FUN='length'))) -nmin=min(nv) -est=NA -p=NA -B=list() -M=matrix(NA,nrow=nmin,ncol=J) -for(i in 1:nreps){ -for(j in 1:J)M[,j]=sample(x[[j]],nmin) -B[[i]]=M -} -L=lapply(B,linWMWMC.sub,con=con) -est=lapply(L,locfun) -p=lapply(L,linWMWMC.sub2) -est=as.vector(matl(est)) -p=as.vector(matl(p)) -list(p=mean(p),center=mean(est)) -} - -linWMWMC.sub<-function(M,con){ -L=apply(t(con*t(M)),1,sum) -L -} - -linWMWMC.sub2<-function(L){ -phat=mean(L<0)+.5*mean(L==0) -phat -} - - -linEP<-function(x,con,locfun=tmean,tr=.2,nreps=200,SEED=TRUE){ -# -# Estimate exlanatory power for a linear contrast aimed at main effects -# -# con = contrast coefficients -# x is a matrix or has list mode. -# -# When dealing with main effects. Could pool the data and use yuenv2. -# Or could estimate distribution of the linear contrast, which is the -# strategy here. Note: for interactions, this latter strategy is needed. -# -# (Uses the function linWMWMC.sub) -# -if(sum(con)!=0)stop('Contrast coefficients must sum to zero') -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -if(length(con)!=J)stop('Length of con should be equal to the number of groups') -x=elimna(x) -nv=as.vector(matl(lapply(x,FUN='length'))) -nmin=min(nv) -B=list() -M=matrix(NA,nrow=nmin,ncol=J) -for(i in 1:nreps){ -for(j in 1:J)M[,j]=sample(x[[j]],nmin) -B[[i]]=M -} -if(length(con)==2)ef.size=yuenv2(x[[1]],x[[2]],tr=tr)$Effect.Size -else{ -flag=con==1 -con1=con -con1[!flag]=0 -con2=abs(con) -con2[flag]=0 -L1=lapply(B,linWMWMC.sub,con=con1) -L2=lapply(B,linWMWMC.sub,con=con2) -ef.size=NA -for(j in 1:length(L1))ef.size[j]=yuenv2(L1[[j]],L2[[j]],SEED=FALSE)$Effect.Size -} -list(Effect.Size=mean(ef.size)) -} -linconEP<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,INT=FALSE,nreps=200,POOL=FALSE){ -# -# -# This function is used when estimating effect size via -# a variation of explanatory power. -# -# It is restricted to the usual main effects and interactions in a two-way design. -# This function is used by bbmcpEP. -# -# con: used to indicate main effects and is passed to this function via bbmcpEP -# -# POOL=TRUE: For the usual main effects in a two-way where -# for a fixed level of Factor A, say, one can simply pool the data over the -# levels of Factor A. POOL=TRUE means that data with contrast coefficients -# = 1 are pooled, the same is for data with contrast coefficients -# = -1 and the resulting two groups are compared. -# -# A heteroscedastic test of d linear contrasts using trimmed means. -# -# The data are assumed to be stored in $x$ in list mode, a matrix -# or a data frame. If in list mode, -# length(x) is assumed to correspond to the total number of groups. -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# Missing values are automatically removed. -# -# -if(tr==.5)stop('Use the R function medpb to compare medians') -if(is.data.frame(x))x=as.matrix(x) -flag<-TRUE -if(alpha!= .05 && alpha!=.01)flag<-FALSE -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -sam=NA -h<-vector('numeric',J) -w<-vector('numeric',J) -xbar<-vector('numeric',J) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -sam[j]=length(x[[j]]) -h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) - # h is the number of observations in the jth group after trimming. -w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) -xbar[j]<-mean(x[[j]],tr) -} -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -psihat<-matrix(0,CC,7) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', -'p.value','Effect.Size')) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c('Group','Group','test','crit','se','df')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) -sejk<-sqrt(w[j]+w[k]) -test[jcom,5]<-sejk -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) -test[jcom,6]<-df -psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) -psihat[jcom,7]=yuenv2(x[[j]],x[[k]])$Effect.Size -if(CC>28)flag=FALSE -if(flag){ -if(alpha==.05)crit<-smmcrit(df,CC) -if(alpha==.01)crit<-smmcrit01(df,CC) -if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED) -} -test[jcom,4]<-crit -psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk -psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x)){ -stop('The number of groups does not match the number of contrast coefficients.') -} -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', -'p.value','Effect.Size')) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c('con.num','test','crit','se','df')) -df<-0 -for (d in 1:ncol(con)){ -if(POOL){ -id1=which(con[,d]==1) -id2=which(con[,d]==-1) -y1=pool.a.list(x[id1]) -y2=pool.a.list(x[id2]) -xx=list(y1,y2) -conP=matrix(c(1,-1)) -} -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) -if(flag){ -if(alpha==.05)crit<-smmcrit(df,ncol(con)) -if(alpha==.01)crit<-smmcrit01(df,ncol(con)) -} -if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED) -test[d,3]<-crit -test[d,4]<-sejk -test[d,5]<-df -if(!POOL)temp=linEP(x,con[,d],tr=tr,nreps=nreps,SEED=SEED) -if(POOL)temp=linEP(xx,conP,tr=tr,nreps=nreps,SEED=SEED) -if(!INT){ -psihat[d,6]=linEP(x,con[,d],tr=tr,nreps=nreps,SEED=SEED)$Effect.Size -} -if(INT){ -id=con[,d]!=0 -psihat[d,6]=Inter.EP(x[id],tr=tr,nreps=nreps,SEED=SEED)$Effect.Size -} -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) -} -} -list(n=sam,test=test,psihat=psihat) -} -bbmcpEP<-function(J,K,x,tr=.2,alpha=.05,grp=NA,op=FALSE,nreps=200,SEED=TRUE,pr=TRUE,POOL=TRUE){ -# -# Test all linear contrasts associated with -# main effects for Factor A and B and all interactions based on trimmed means -# By default, -# tr=.2, meaning 20% trimming is used. -# -# This function is the same as bbmpc, only it also reports a measures of effect size -# based on explanatory power. -# -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # - JK <- J * K - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp[1])) { - yy <- x - x<-list() - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop('Data must be stored in list mode or a matrix.') - for(j in 1:JK) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] # Remove missing values - } - # - - if(JK != length(x)) - warning('The number of groups does not match the number of contrast coefficients.') -for(j in 1:JK){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} - # Create the three contrast matrices -temp<-con2way(J,K) -conA<-temp$conA -conB<-temp$conB -conAB<-temp$conAB -if(!op){ -Factor.A<-linconEP(x,con=conA,tr=tr,nreps=nreps,INT=FALSE,pr=FALSE,POOL=POOL) -Factor.B<-linconEP(x,con=conB,tr=tr,nreps=nreps,INT=FALSE,pr=FALSE,POOL=POOL) -Factor.AB<-linconEP(x,con=conAB,tr=tr,nreps=nreps,INT=TRUE,pr=FALSE,POOL=FALSE) -} -All.Tests<-NA -if(op){ -Factor.A<-NA -Factor.B<-NA -Factor.AB<-NA -con<-cbind(conA,conB,conAB) -All.Tests<-lincon(x,con=con,tr=tr,alpha=alpha) -} -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,All.Tests=All.Tests,conA=conA,conB=conB,conAB=conAB) -} - -rmmcpES<-function(x, con = 0, tr = 0.2, alpha = 0.05,dif=TRUE,hoch=TRUE,pr=TRUE){ -# -# Like rmmcp,only a robust version of Cohen's d is included. -# Designed only for all pairwise comparisons. -# -if(con!=0)stop('This function is for all pairwise comparisons only') -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -a=rmmcp(x,con=con,tr=tr,alpha=alpha,dif=dif,hoch=hoch) -test=a$test -J=ncol(x) -CC=(J^2-J)/2 -psihat<-matrix(0,CC,6) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper','Effect.Size')) -psihat[,1:5]=a$psihat -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -if(dif)psihat[ic,6]=D.akp.effect(x[,j],x[,k],tr=tr) -if(!dif){ -psihat[ic,6]=yuendv2(x[,j],x[,k],tr=tr)$Effect.Size -if(pr)print('Note: With dif=FALSE, explanatory measure of effect size is used') -} -}}} -list(test=test,psihat=psihat) -} - -interWMWpb<-function(x,nreps=100,SEED=TRUE,nboot=500,alpha=.05,nmax=10^8,MC=TRUE){ -# -# -# -if(MC)library(parallel) -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -x=elimna(x) -J<-length(x) -if(J!=4)stop('Number of groups should be four') -nv=lapply(x,length) -y=list() -pv=NA -N=max(pool.a.list(nv)) -mat=matrix(NA,nrow=N,ncol=4) -for(i in 1:nboot){ -for(j in 1:4)mat[1:nv[[j]],j]=sample(x[[j]],nv[[j]],replace=TRUE) -y[[i]]=mat -} -if(!MC)pv=lapply(y,interWMWpb.lsub) -if(MC)pv=mclapply(y,interWMWpb.lsub) -pv=pool.a.list(pv) -est=interWMW(x,nreps=nreps,SEED=SEED,nmax=nmax) -pv=sort(pv) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=pv[ilow] -ci[2]=pv[ihi] -pval=mean(pv<.5)+.5*mean(pv==.5) -pval=2*min(c(pval,1-pval)) -list(p.est=est$p.est,ci=ci,p.value=pval,row.results=est$results.4.rows) -} - -interWMWpb.lsub<-function(x,nreps=nreps){ -v=interWMW(x,nreps=nreps,SEED=FALSE)$p.est -v -} - - -linWMWpb<-function(x,con,nreps=100,SEED=TRUE,nboot=500,alpha=.05,MC=FALSE){ -# -# Compute a confidence interval for the probability that a linear contrast -# is less than zero. -# -con=as.vector(con) -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -x=elimna(x) -J<-length(x) -if(J!=length(con))stop('Number of groups should be equal to the number of rows in con') -nv=lapply(x,length) -N=max(pool.a.list(nv)) -mat=matrix(NA,nrow=N,ncol=J) -y=list() -pv=NA -est=linWMW(x,con=con,nreps=nreps,SEED=SEED)$p -for(i in 1:nboot){ -#for(j in 1:J)y[[j]]=sample(x[[j]],nv[[j]],replace=TRUE) -for(j in 1:J)mat[1:nv[[j]],j]=sample(x[[j]],nv[[j]],replace=TRUE) -y[[i]]=mat -} -if(!MC)pv=lapply(y,linWMWpb.lsub,con=con,nreps=nreps,SEED=SEED) -if(MC){ -library(parallel) -pv=mclapply(y,linWMWpb.lsub,con=con,nreps=nreps,SEED=SEED) -} -pv=pool.a.list(pv) -pv=sort(pv) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=pv[ilow] -ci[2]=pv[ihi] -pval=mean(pv<.5)+.5*mean(pv==.5) -pval=2*min(c(pval,1-pval)) -list(p.est=est,ci=ci,p.value=pval) -} - -linWMWpb.lsub<-function(x,nreps=nreps,con=con,SEED=SEED){ -v=linWMW(x,nreps=nreps,con=con,SEED=SEED)$p -v -} - -permg.t<-function(x,y,alpha=.05,tr=0,nboot=1000,SEED=TRUE){ -# -# Do a two-sample permutation test based on trimmed means using the -# Chung--Romano version of a permuation test. - -# The default number of permutations is nboot=1000 -# -if(SEED)set.seed(2) -x<-x[!is.na(x)] -y<-y[!is.na(y)] -xx<-c(x,y) -tval<-yuen(x,y,tr=tr)$teststat -vec<-c(1:length(xx)) -v1<-length(x)+1 -difb<-NA -tv<-NA -for(i in 1:nboot){ -data <- sample(xx, size = length(xx), replace = FALSE) -temp1<-data[c(1:length(x))] -temp2<-data[c(v1:length(xx))] -tv[i]<-yuen(temp1,temp2,tr=tr)$teststat -} -tv<-sort(tv) -icl<-floor((alpha/2)*nboot+.5) -icu<-floor((1-alpha/2)*nboot+.5) -reject<-'no' -list(teststat=tval,lower.crit=tv[icl],upper.crit=tv[icu],reject=reject) -} - -loc2dif.ci<-function(x,y,est=median,alpha=.05,nboot=2000,SEED=TRUE){ -# -# Confidence interval for the median of D=X-Y, -# where X and Y are independent -# -x=elimna(x) -y=elimna(y) -n1=length(x) -n2=length(y) -es=loc2dif(x,y) -FLAG=FALSE -cliff=cid(x,y,alpha=alpha)$ci.p -del1=WMW2med(x,y,cliff[1]) -del2=WMW2med(x,y,cliff[2]) -ci=loc2dif(x+del1,y,est=est) -ci[2]=loc2dif(x+del2,y,est=est) -if(var(cliff)==0)FLAG=TRUE -if(escliff[2])FLAG=TRUE -if(FLAG)ci=wmwpb(x,y,est=est,alpha=alpha,nboot=nboot,SEED=SEED,pr=FALSE)$ci -list(n1=n1,n2=n2,est=es,conf.int=ci) -} - - - -WMW2med<-function(x,y,q){ -# -# If P(Xalpha)flag=TRUE -if(pvF<=alpha/(K+1-i)){ -ic=ic+1 -pick=c(pick,v[ic]) -flag=FALSE -if(pv[v[ic]]>alpha)flag=TRUE -} -if(flag)break -} -Decision=rep('Not Sig',length(pv)) -if(!is.null(pick))Decision[pick]='Reject' -nsig=sum(length(pick)) -list(n1=n1,n2=n2,p.values=pv, -Decisions=as.matrix(Decision),num.sig=nsig) -} - -shiftPBci<-function(x,y,locfun=median,alpha=.05,null.val=.5,nboot=500,SEED=TRUE,...){ -# -# confidence interval for the quantile shift measure of effect size. -# (Same as shiftQSci) -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -n1=length(x) -n2=length(y) -v=NA -ef=shiftes(x,y,locfun=locfun,SEED=FALSE)$Q.Effect -for(i in 1:nboot){ -X=sample(x,n1,replace=TRUE) -Y=sample(y,n2,replace=TRUE) -v[i]=shiftes(X,Y,locfun=locfun,SEED=FALSE)$Q.Effect -} -v=sort(v) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=v[ilow] -ci[2]=v[ihi] -pv=mean(v=10 & n2>=10){ -x=elimna(x) -y=elimna(y) -nt=n1*n2 -if(nt<10^6)L=outer(x,y,FUN='-') -if(nt>=10^6){ -if(SEED)set.seed(2) -L=NULL -nmin=min(c(n1,n2,100)) -vef=NA -vefND=NA - -for(i in 1:iter){ -id1=sample(n1,nmin) -id2=sample(n2,nmin) -L=outer(x[id1],y[id2],FUN='-') -est=locfun(L,...) -vef[i]=mean(L-est<=est) -if(est<0)ef.sizeND=mean(L-est>=est) -} -ef.size=mean(vef) -} -if(nt<10^6){ -est=locfun(L,...) -ef.size=mean(L-est<=est) -}} - -list(Q.Effect=ef.size) -} - - -shiftQS=shiftes - -shiftesci<-function(x,y,locfun=median,alpha=.05,nboot=500,SEED=TRUE,...){ -# -# confidence interval for the quantile shift measure of effect size. -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -n1=length(x) -n2=length(y) -v=NA -for(i in 1:nboot){ -X=sample(x,n1,replace=TRUE) -Y=sample(y,n2,replace=TRUE) -v[i]=shiftes(X,Y,locfun=locfun)$Q.Effect -} -v=sort(v) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=v[ilow] -ci[2]=v[ihi] -ci -} - - -shiftci<-function(x,y,locfun=median,alpha=.05,pr=TRUE,...){ -# -# confidence interval for the quantile shift measure of effect size. -# -# OLD VERSION USE shiftesci -# -x=elimna(x) -y=elimna(y) -if(pr){ -if(sum(duplicated(x)>0))print('Duplicate values detected; suggest using shiftPBci') -if(sum(duplicated(y)>0))print('Duplicate values detected; suggest using shiftPBci') -} -n1=length(x) -n2=length(y) -if(pr){ -if(min(c(n1,n2)<40))print('Minimum sample size is less than 40; suggest using shiftPBci') -} -L=outer(x,y,FUN='-') -L=as.vector(L) -est=locfun(L,...) -ef=shiftes(x,y,locfun=locfun)$Q.Effect -ci=cidv2(x-2*est,y,alpha=alpha)$p.ci -list(n1=n1,n2=n2,effect.size=ef,conf.int=ci) -} - - -medpb.es<-function(x,alpha=.05,nboot=NA,grp=NA,est=median,con=0,bhop=FALSE, -SEED=TRUE,INT=FALSE,...){ -# -# Multiple comparisons for J independent groups using medians. -# -# A percentile bootstrap method with Rom's method is used. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# -# con can be used to specify linear contrasts; see the function lincon -# -# Missing values are allowed. -# -# A shift-type measure of effect size,Q, is reported. No effect, Q=.5 -# For two groups, let D=X-Y, let M be the population median of D. -# Let F be the distribution D-M. Then -# Q=F(M). If the median of D is M, there is no effect. -# Q represents a shift in location to some relatively high or low quantile associated with $F_0$ -# -con<-as.matrix(con) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -dvec<-alpha/c(1:ncon) -if(nrow(con)!=J)stop('Something is wrong with con; the number of rows does not match the number of groups.') -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(!bhop){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(j in 1:J){ -#print(paste('Working on group ',j)) -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group -} -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -tv<-sum(bcon[d,]==0)/nboot -test[d]<-sum(bcon[d,]>0)/nboot+.5*tv -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,7) -dimnames(output)<-list(NULL,c('con.num','psihat','p.value','p.crit','ci.lower','ci.upper','Q.effect')) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -output[,2]=tvec -for (ic in 1:ncol(con)){ -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -see=lin.ES(x,con=con[,ic],locfun=median) -if(!INT)output[ic,7]=lin.ES(x,con=con[,ic],locfun=median)$Effect.Size -if(!INT)output[ic,7]=lin.ES(x,con=con[,ic],locfun=median)$Effect.Size -if(INT){ -id=which(con[,ic]!=0) -output[ic,7]=interQS(x[id],locfun=median,SEED=SEED)$Q.Effect -} -} -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - -medpbQS=medpb.es - -rplotCI<-function(x,y,tr=.2,fr=.5,p.crit=NA,plotit=TRUE,scat=TRUE, -SEED=TRUE,pyhat=FALSE,npts=25,xout=FALSE, -xlab='X',ylab='Y',low.span=2/3,nmin=12,pr=TRUE, -outfun=outpro,LPCI=FALSE,MID=TRUE,alpha=.05,pch='.',...){ -# -# Confidence interval for running interval smoother based on a trimmed mean. -# Unlike rplot, includes an approximate confidence band having simultaneous probability -# coverage equal to 1-alpha. More precisely, the simultaneous probability -# is for K=npts points -# -# LP=TRUE, the plot is further smoothed via lowess -# -# fr controls amount of smoothing -# -# To specify the points where confidence intervals are computed, -# use rplotCIsmm -# -if(pr){ -if(!LPCI)print('To get smoother plot, set LPCI=TRUE') -} -m<-cbind(x,y) -if(ncol(m)>2)stop('Only one independent variable can be used') -m<-elimna(m) -x=m[,1] -y=m[,2] -if(xout){ -xy=cbind(x,y) -flag=outfun(x,plotit=FALSE)$keep -x=xy[flag,1] -y=xy[flag,2] -} -n.used=NA -n=length(y) -if(n<50)stop('Need at least n=50') -nv=c(50, 60, 70, 80, 100, -150, 200, 300, 400, 500, 600, 800, 1000) -if(npts==25) pv=c(0.004846408, -0.004553274, -0.004236101, -0.004099674, - 0.00353898, #n=100 - 0.003366984, -0.003038767, - 0.003061386, - 0.002793521, - 0.002479689, - 0.002606313, - 0.0026630370, - 0.002836043) -if(npts==10) pv=c( -0.007612451, -0.008383655, -0.006992874, - 0.0068073, -0.005733889, -0.005767139, -0.006130155, -0.005447419, -0.005555951, -0.005228471, -0.005642503, -0.005402162, -0.005569217) -FLAG=FALSE -if(npts==25 || npts==10)FLAG=TRUE -if(alpha!=.05 || !FLAG){ -if(is.na(p.crit)){ -print('p.crit must be estimated, execution time might be high') -print('Or use the R function rplotCIsmm') -} -p.crit=rplotCITAP.pv(n,tr=tr,fr=fr,alpha=alpha,nmin=nmin,npts=npts,nreps=nreps) -} -rem.n=n -if(n>1000)n=1000 -if(is.na(p.crit))p.crit=lplot.pred(1/nv,pv,1/n)$yhat -n=rem.n -xord=order(x) -x=x[xord] -y=y[xord] -infit=rplot(x,y,tr=tr,xout=FALSE,plotit=plotit,LP=LPCI,fr=fr,pr=FALSE,pyhat=TRUE,xlab=xlab, -ylab=ylab) -rmd=infit$pyhat -m<-cbind(x,y) -if(ncol(m)>2)stop('One covariate only is allowed with this function') -m<-elimna(m) -nv=nrow(m) -if(xout){ -flag<-outfun(m[,1])$keep -m<-m[flag,] -} -x=m[,1] -y=m[,2] -n.keep=length(y) -res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr,nmin=nmin)$output -pts=seq(res1[1,1],res1[5,1],length.out=npts) -y.hat=NA -civ=matrix(NA,nrow=npts,ncol=2) -for(i in 1:length(pts)){ -xx=y[near(x,pt=pts[i],fr)] -doit=trimci(xx,tr=tr,alpha=p.crit,pr=FALSE) -civ[i,]=doit$ci -y.hat[i]=doit$estimate -n.used[i]=doit$n -} -up=civ[,2] -low=civ[,1] -if(plotit){ -if(LPCI){ -up=lplot(pts,up,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat -y.hat=lplot(pts,y.hat,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat -low=lplot(pts,low,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat -} -lines(pts,up,lty=2) -lines(pts,low,lty=2) -} -if(pyhat){output<-cbind(pts,y.hat,low,up,n.used) -dimnames(output)=list(NULL,c('pts','y.hat','ci.low','ci.up','n.used')) -} -if(!pyhat)output<-'Done' -list(output=output,str=infit$Strength.Assoc,n=nv,n.keep=n.keep) -} - -signmcp<-function(x,y = NULL, alpha = 0.05, method='AC' , AUTO=TRUE,Method="hochberg"){ -# -# Dependent groups -# Perform sign test for all pairwise differences -# -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -J<-ncol(x) -nval<-nrow(x) -ncon<-(J^2-J)/2 -dvec<-alpha/c(1:ncon) -psihat<-matrix(NA,ncon,9) -dimnames(psihat)<-list(NULL,c("Group","Group","n","N","Prob_x_less_than_y","ci.lower","ci.upper", -"p.value","p.adjusted")) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -temp=signt(x[,j],x[,k],alpha=alpha,method=method,AUTO=AUTO) -psihat[jcom,1]<-j -psihat[jcom,2]<-k -psihat[jcom,3]<-temp$n -psihat[jcom,4]<-temp$N -psihat[jcom,5]<-temp$Prob_x_less_than_y -psihat[jcom,6:7]=temp$ci -if(method!='SD')psihat[jcom,8]=temp$p.value -}}} -if(method!='SD')psihat[,9]=p.adjust(psihat[,8],method=Method) -list(output=psihat) -} - - - - - -loc2difpb<-function(x,y,est=median,alpha=.05,nboot=2000,SEED=TRUE){ -# -# A percentile bootstrap -# confidence interval for the median of D=X-Y -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -n1=length(x) -n2=length(y) -v=NA -for(i in 1:nboot){ -X=sample(x,n1,replace=TRUE) -Y=sample(y,n2,replace=TRUE) -v[i]=loc2dif(X,Y) -} -pv=mean(v<0)+.5*mean(v==0) -pv=2*min(c(pv,1-pv)) -vs=sort(v) -crit<-alpha/2 -icl<-round(crit*nboot)+1 -icu<-nboot-icl -ci=vs[icl] -ci[2]=vs[icu] -list(ci=ci,p.value=pv) -} - -Dcbmhd<-function(x=NULL,y=NULL,d=NULL,qest=hd,alpha=.05,q=.25,plotit=FALSE,pop=0, -fr=.8,rval=15,xlab='',ylab='',nboot=600,SEED=TRUE){ -# -# -# Compute a confidence interval for the sum of the qth and (1-q)th quantiles -# of the distribution of D=X-Y, where X and Y are two -# dependent random variables. -# The Harrell-Davis estimator is used -# If the distribution of X and Y are identical, then in particular the -# distribution of D=X-Y is symmetric about zero. -# -# plotit=TRUE causes a plot of the difference scores to be created -# pop=0 adaptive kernel density estimate -# pop=1 results in the expected frequency curve. -# pop=2 kernel density estimate (Rosenblatt's shifted histogram) -# pop=3 boxplot -# pop=4 stem-and-leaf -# pop=5 histogram -# -if(SEED)set.seed(2) -if(q>=.5)stop('q should be less than .5') -if(q<=0)stop('q should be greater than 0') -if(is.null(d))d=x-y -if(is.null(d))stop('Apparently x or y contain no data') -d=elimna(d) -n=length(d) -q2=1-q -est1=qest(d,q) -est2=qest(d,q2) -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -bvec=NA -for(i in 1:nboot){ -bvec[i]=qest(d[data[i,]],q)+qest(d[data[i,]],q2) -} -p=mean(bvec>0)+.5*mean(bvec==0) -p=2*min(c(p,1-p)) -sbv=sort(bvec) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=sbv[ilow] -ci[2]=sbv[ihi] -if(plotit){ -if(pop==1 || pop==0){ -if(length(x)*length(y)>2500){ -print('Product of sample sizes exceeds 2500.') -print('Execution time might be high when using pop=0 or 1') -print('If this is case, might consider changing the argument pop') -print('pop=2 might be better') -}} -MM=d -if(pop==0)akerd(MM,xlab=xlab,ylab=ylab) -if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab) -if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab) -if(pop==3)boxplot(MM) -if(pop==4)stem(MM) -if(pop==5)hist(MM,xlab=xlab) -if(pop==6)skerd(MM) -} -list(q=q,n=n,Est1=est1,Est2=est2,sum=est1+est2,ci=ci,p.value=p) -} -Dqcihd<-function(x,y,alpha=.05,q=c(1:9/10), -plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab="",nboot=600,SEED=TRUE){ -# -# Compute a confidence interval for the quantiles for D=X-Y, X and Y independent. -# -# The Harrell-Davis estimator is used -# If the distribution of X and Y are identical, then in particular the -# distribution of D=X-Y is symmetric about zero. -# -# plotit=TRUE causes a plot of the difference scores to be created -# pop=0 adaptive kernel density estimate -# pop=1 results in the expected frequency curve. -# pop=2 kernel density estimate (Rosenblatt's shifted histogram) -# pop=3 boxplot -# pop=4 stem-and-leaf -# pop=5 histogram -# -if(SEED)set.seed(2) -x<-x[!is.na(x)] -y<-y[!is.na(y)] -n1=length(x) -n2=length(y) -m<-outer(x,y,FUN="-") -est=NA -for(i in 1:length(q))est[i]=hd(m,q=q[i]) -data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot) -data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot) -bvec=matrix(NA,nrow=nboot,ncol=length(q)) -for(i in 1:nboot){ -mb=outer(x[data1[i,]],y[data2[i,]],"-") -for(j in 1:length(q)) -bvec[i,j]=hd(mb,q=q[j]) -} -p=NA -ci=matrix(NA,nrow=length(q),ncol=2) -for(j in 1:length(q)){ -p[j]=mean(bvec[,j]>0)+.5*mean(bvec[,j]==0) -p[j]=2*min(c(p[j],1-p[j])) -sbv=sort(bvec[,j]) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci[j,1]=sbv[ilow] -ci[j,2]=sbv[ihi] -} -if(plotit){ -if(pop==1 || pop==0){ -if(length(x)*length(y)>2500){ -print("Product of sample sizes exceeds 2500.") -print("Execution time might be high when using pop=0 or 1") -print("If this is case, might consider changing the argument pop") -print("pop=2 might be better") -}} -MM=as.vector(m) -if(pop==0)akerd(MM,xlab=xlab,ylab=ylab) -if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab) -if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab) -if(pop==3)boxplot(MM) -if(pop==4)stem(MM) -if(pop==5)hist(MM,xlab=xlab) -if(pop==6)skerd(MM) -} -output=cbind(as.matrix(q),as.matrix(est),ci,as.matrix(p)) -dimnames(output)=list(NULL,c("Quantile","Estimates","ci.low","ci.up","p-value")) -output -} -lplotcom2v2<-function(x,y,xout=FALSE,pts1=NULL,pts2=NULL,outfun=outpro,span=2/3,npts=10,tr=.2,...){ -# -# For two independent variables, estimate their relative importance when using LOESS -# -library(stats) -x<-as.matrix(x) -m<-elimna(cbind(x,y)) -n.orig=nrow(m) -n.keep=n.orig - -d<-ncol(x) -if(d!=2)stop('Current version is for two independent variables only') -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -} #$ -n.keep=nrow(m) -M=apply(m,2,median) -SD=apply(m,2,mad) -low=M-1.5*SD -up=M+1.5*SD -if(is.null(pts1))pts1=seq(low[1],up[1],length.out=npts) -if(is.null(pts2))pts2=seq(low[2],up[2],length.out=npts) -e1=NA -e2=NA -for(j in 1:length(pts1)){ # Determine strength of x2 given a value stored in pts1. -v2=cbind(rep(pts1[j],n.keep),m[,2]) -vals=lplot.pred(m[,1:2],m[,3],v2,span=span)$yhat -vals=elimna(vals) -nv=length(vals) -e2[j]=NA -if(nv>=10)e2[j]=winsd(vals,tr=tr,na.rm=TRUE) -} -for(j in 1:length(pts2)){ # Determine strength of x1 given a value stored in pts2. -v1=cbind(m[,1],rep(pts2[j],n.keep)) -vals=lplot.pred(m[,1:2],m[,3],v1,span=span)$yhat -vals=elimna(vals) -nv=length(vals) -e1[j]=NA -if(nv>=10)e1[j]=winsd(vals,tr=tr,na.rm=TRUE) - } -p=mean(outer(e1,e2,FUN='-')<0,na.rm=TRUE) -list(str1=e1,str2=e2,p=p,mean.str1=mean(e1),mean.str2=mean(e2)) -} - -lplotcomBCI<-function(x,y,xout=FALSE,pts1=NULL,pts2=NULL,p.crit=NULL, -outfun=outpro,span=2/3,npts=10,tr=.2,nboot=500, -SEED=TRUE,SEQ=FALSE,MAD.OP=FALSE,plotit=TRUE,ticktype='simple', -xlab='X1',ylab='X2',zlab='Y',reverse.x1=FALSE,reverse.x2=FALSE,pr=FALSE, -MEDIAN=FALSE,Q1=FALSE,Q2=FALSE,alpha=.05,MC=FALSE,...){ -# -# For two independent variables, estimate their relative importance when using LOESS -# p.crit is the critical p-value. If not specified, the function returns the approximate 0.05 critical p-value -# -# By default, use the average of the strength of the associations, so essentially a global test based on the quartiles -# MEDIAN=TRUE, use the median of the independent variables only. -# Q1=TRUE, use the lower quartile of the independent variables only. -# Q2=TRUE, use the upper quartile of the independent variables only. -# -# ADJ.CI=TRUE: Confidence intervals are based on the critical p-value -# otherwise use alpha -# -if(SEED)set.seed(2) -library(stats) -x<-as.matrix(x) -m<-elimna(cbind(x,y)) -n=nrow(m) -x=m[,1:2] -y=m[,3] -if(xout){ -flag=outfun(x,plotit=FALSE)$keep -x=x[flag,] -y=y[flag] -n=nrow(x) -} -if(n<50)stop('The sample size must be greater than or equal to 50') -if(MEDIAN){ -pts1=median(x[,1]) -pts2=median(x[,2]) -if(is.null(p.crit)){ -if(n<=200)p.crit=regYhat(c(1/50,1/100,1/200),c(.114,.080,.065),1/n) -if(n>200)p.crit=.062 -} -} -if(Q1){ -pts1=qest(x[,1],.25) -pts2=qest(x[,2],.25) -if(is.null(p.crit)){ -if(n<=200)p.crit=regYhat(c(1/50,1/100,1/200),c(.142,.095,.082),1/n) -if(n>200)p.crit=.062 -} -} -if(Q2){ -pts1=qest(x[,1],.75) -pts2=qest(x[,2],.75) -if(is.null(p.crit)){ -if(n<=200)p.crit=regYhat(c(1/50,1/100,1/200),c(.142,.095,.082),1/n) -if(n>200)p.crit=.062 -} -} -if(is.null(pts1)){ -pts1=qest(x[,1],c(.25,.5,.75)) -if(is.null(p.crit)){ -if(n<=200)p.crit=regYhat(c(1/50,1/100,1/200),c(.082,.076,.067),1/n) -if(n>200)p.crit=.06 -} -if(reverse.x1)pts1=sort(pts1,TRUE) -if(is.null(pts2))pts2=qest(x[,2],c(.25,.5,.75)) -if(reverse.x2)pts2=sort(pts2,TRUE) -} -if(SEQ){ -if(MAD.OP){ -M=apply(m,2,median) -SD=apply(m,2,mad) - -low=M-1.5*SD -up=M+1.5*SD -} -else{ -low=apply(m,2,qest,.25) -hi=apply(m,2,qest,.75) -} -pts1=seq(low[1],up[1],length.out=npts) -pts2=seq(low[2],up[2],length.out=npts) -} -v1=NA -v2=NA -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -if(!MC){ -for(i in 1:nboot){ -ib=data[i,] -temp=lplotcom2v2(x[ib,],y[ib],pts1=pts1,pts2=pts2,npts=npts,tr=tr,span=span) -v1[i]=temp$mean.str1 -v2[i]=temp$mean.str2 -}} -if(MC){ -library(parallel) -data=listm(t(data)) -bvec<-mclapply(data,lplotCIMCv2,x,y,pts1=pts1,pts2=pts2,npts=npts,tr=tr,span=span) -bvec=matl(bvec) # a 2-by-nboot matrix. -dif=sort(bvec[1,]-bvec[2,]) -} -if(!MC)dif=sort(v1-v2) -nbl=length(dif) -#ilow<-round((alpha/2) * nbl) -ilow<-round((p.crit/2) * nbl) -ihi<-nbl - ilow -ilow<-ilow+1 -ci.low=dif[ilow] -ci.hi=dif[ihi] -pv=mean(dif<0,na.rm=TRUE) -pv=2*min(pv,1-pv) -est=lplotcom2(x,y,xout=FALSE,pts1=pts1,pts2=pts2,outfun=outfun,span=span, -npts=npts,tr=tr) -if(plotit)lplot(x,y,ticktype=ticktype,xlab=xlab,ylab=ylab,zlab=zlab,pr=pr) -list(p.crit=p.crit,p.value=pv,str.x1.given.x2=est$str1,str.x2.given.x1=est$str2,mean.str1=est$mean.str1, -mean.str2=est$mean.str2, -ci.low=ci.low,ci.hi=ci.hi,pts.x1=pts1,pts.x2=pts2) -} - -lplotcomBCI9<-function(x,y,xout=FALSE,pr=TRUE, -outfun=outpro,span=2/3,npts=10,tr=.2,nboot=500, -SEED=TRUE,plotit=TRUE,ticktype='simple',ADJ.CI=TRUE, -xlab='X1',ylab='X2',zlab='Y',alpha=.05,MC=FALSE,...){ -# -# For two independent variables, estimate their relative importance when using LOESS -# Focus on the quartiles: none tests based on all possible combinations. -# -p.crit=NA -if(pr){ -if(alpha!=.05){ -if(pr)print('Critical p-value is taken to be the value of alpha. Unknown how to adjust when alpha is not .05') -p.crit=alpha -} -if(ADJ.CI)print('Confidence intervals are based on the critical p-value') -} -if(SEED)set.seed(2) -library(stats) -x<-as.matrix(x) -m<-elimna(cbind(x,y)) -n=nrow(m) -n.orig=n -x=m[,1:2] -y=m[,3] -if(xout){ -flag=outfun(x,plotit=FALSE)$keep -x=x[flag,] -y=y[flag] -n=nrow(x) -} -if(is.na(p.crit)){ -if(n<=100)p.crit=regYhat(c(1/50,1/100),c(.042,.025),1/n) -else p.crit=.025 -} -output<-matrix(NA,nrow=9,ncol=7) -dimnames(output)=list(NULL,c('pts1','pts2','p-value','str.x1.given.x2','str.x2.given.x1','ci.low','ci.hi')) -pts1=qest(x[,1],c(.25,.5,.75)) -pts2=qest(x[,2],c(.25,.5,.75)) - -v1=matrix(NA,nrow=nboot,ncol=3) -v2=matrix(NA,nrow=nboot,ncol=3) - -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -if(!MC){ -for(i in 1:nboot){ -ib=data[i,] -for(j in 1:3){ -temp=lplotcom2v2(x[ib,],y[ib],pts1=pts1[j],pts2=pts2[j],npts=npts,tr=tr,span=span) -v1[i,j]=temp$mean.str1 -v2[i,j]=temp$mean.str2 -}}} -if(MC){ -library(parallel) -data=listm(t(data)) -for(j in 1:3){ -bvec<-mclapply(data,lplotCIMCv2,x,y,pts1=pts1[j],pts2=pts2[j],npts=npts,tr=tr,span=span) -bvec=matl(bvec) # a 2-by-nboot matrix. -v1[,j]=bvec[1,] -v2[,j]=bvec[2,] -} -} -pc=matrix(NA,3,3) #rows are for pts1, columns for pts2 -ic=0 -for(j in 1:3){ -for(k in 1:3){ -est=lplotcom2(x,y,xout=FALSE,pts1=pts1[j],pts2=pts2[k],outfun=outfun,span=span, -npts=npts,tr=tr) -ic=ic+1 -output[ic,1]=pts1[j] -output[ic,2]=pts2[k] -dif=sort(v1[,j]-v2[,k]) -nbl=length(dif) -if(ADJ.CI)ilow<-round((p.crit/2) * nbl) -else ilow<-round((alpha/2) * nbl) -ihi<-nbl - ilow -ilow<-ilow+1 -ci.low=dif[ilow] -ci.hi=dif[ihi] -pv=mean(dif<0,na.rm=TRUE) -pc[j,k]=2*min(pv,1-pv) -output[ic,3]=pc[j,k] -output[ic,6]=ci.low -output[ic,7]=ci.hi -output[ic,4]=est$mean.str1 -output[ic,5]=est$mean.str2 -}} -if(plotit)lplot(x,y,ticktype=ticktype,xlab=xlab,ylab=ylab,zlab=zlab) -list(n=n.orig,n.keep=n,p.crit=p.crit,output=output) -} - -lplotCIMCv2<-function(data,x,y,pts1,pts2,npts,tr,span){ -temp=lplotcom2v2(x[data,],y[data],pts1=pts1,pts2=pts2,npts=npts,tr=tr,span=span) -v=c(temp$mean.str1,temp$mean.str2) -} - - -linconES<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,INT=FALSE, -locfun=tmean){ -# -# Like the function lincon, only -# this function estimates effect size via -# quantile shift perspective. -# -# A heteroscedastic test of d linear contrasts using trimmed means. -# -# The data are assumed to be stored in $x$ in list mode, a matrix -# or a data frame. If in list mode, -# length(x) is assumed to correspond to the total number of groups. -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# Missing values are automatically removed. -# -# -if(tr==.5)stop('Use the R function medpb to compare medians') -if(is.data.frame(x))x=as.matrix(x) -flag<-TRUE -if(alpha!= .05 && alpha!=.01)flag<-FALSE -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -con<-as.matrix(con) -J<-length(x) -sam=NA -h<-vector('numeric',J) -w<-vector('numeric',J) -xbar<-vector('numeric',J) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -sam[j]=length(x[[j]]) -h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) - # h is the number of observations in the jth group after trimming. -w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) -xbar[j]<-mean(x[[j]],tr) -} -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -psihat<-matrix(0,CC,8) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', -'p.value','Q.effect','Rel.Q')) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c('Group','Group','test','crit','se','df')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) -sejk<-sqrt(w[j]+w[k]) -test[jcom,5]<-sejk -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) -test[jcom,6]<-df -psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) -psihat[jcom,7]=lin.ES(x[c(j,k)],con=c(1,-1))$Effect.Size -psihat[jcom,8]=(psihat[jcom,7]-.5)/.5 -if(CC>28)flag=FALSE -if(flag){ -if(alpha==.05)crit<-smmcrit(df,CC) -if(alpha==.01)crit<-smmcrit01(df,CC) -} -if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED) -test[jcom,4]<-crit -psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk -psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x)){ -stop('The number of groups does not match the number of contrast coefficients.') -} -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', -'p.value','Q.effect')) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c('con.num','test','crit','se','df')) -df<-0 -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) -if(flag){ -if(alpha==.05)crit<-smmcrit(df,ncol(con)) -if(alpha==.01)crit<-smmcrit01(df,ncol(con)) -} -if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED) -test[d,3]<-crit -test[d,4]<-sejk -test[d,5]<-df -if(!INT)psihat[d,6]=lin.ES(x,con[,d],tr=tr,nreps=nreps,SEED=SEED)$Effect.Size -if(INT){ -id=con[,d]!=0 -psihat[d,6]=interQS(x[id],nreps=nreps,locfun=locfun,SEED=SEED)$Q.Effect -} -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) -} -} -if(pr){ -print('Note: confidence intervals are adjusted to control FWE') -print('But p-values are not adjusted to control FWE') -print('Adjusted p-values can be computed with the R function p.adjusted') -print('Under normality and homoscedasticity, Cohen d= .2, .5, .8') -print('corresponds approximately to Rel.Q = 0.55, 0.65 and 0.70, respectively') -} -list(n=sam,test=test,psihat=psihat) -} - -STRIPchart<-function(x,method ='overplot', jitter = 0.1, offset = 1/3, - vertical = FALSE, group.names, add = FALSE, - at = NULL, xlim = NULL, ylim = NULL, - ylab = NULL, xlab = NULL, dlab ='', glab ='', - log = '', pch = 0, col = par('fg'), cex = par('cex'), - axes = TRUE, frame.plot = axes, ...){ -# -# Same as stripchart, only it accepts a matrix, unlike stripchart, which -# allows x to be a data frame or list mode, but not a matrix. -# -if(is.matrix(x))x=listm(x) -stripchart(x,method=method,jitter=jitter,offset = offset, - vertical = vertical, group.names=group.names, add = add, - at =at, xlim = xlim, ylim = ylim, - ylab = ylab, xlab = xlab, dlab = dlab, glab = glab, - log = log, pch = pch, col = col, cex = cex, - axes = axes, frame.plot = frame.plot, ...) -} -interWMW<-function(x,locfun=median,nreps=200,SEED=TRUE,nmax=10^8){ -# -# Goal: estimate P(X_1-X_2 < X_3-X_4). -# -# That is, dealing with an interaction in a 2-by-2 ANOVA design based on -# a Wilcoxon--Mann--Whitney approach but allow heteroscedasticity. -# -# Strategy: estimate the distribution of X_1-X_2, non-parametrically do the same -# for X_3-X_4, then estimate P(X_1-X_2< X_3-X_4) -# -# x should be a matrix with four columns or have list mode with length=4 -# -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -x=elimna(x) -J<-length(x) -if(J!=4)stop('Number of groups should be four') -#nx=pool.a.list(lapply(x,FUN='length')) -LL=list() -LL[[1]]=outer(x[[1]],x[[2]],FUN='-') -LL[[2]]=outer(x[[3]],x[[4]],FUN='-') -nv=c(length(LL[[1]]),length(LL[[2]])) -ntot=nv[1]*nv[2] -if(ntot<=nmax)p=bmp(LL[[1]],LL[[2]])$phat -else{ -nmin=min(nv) -est=NA -p=NA -pest=NA -B=list() -M=matrix(NA,nrow=nmin,ncol=2) -for(i in 1:nreps){ -for(j in 1:2)M[,j]=sample(LL[[j]],nmin) -B[[i]]=M -pest[i]=mean(M[,1]10^3){ -if(SEED)set.seed(2) -Nmin1=min(c(nv[1],nv[2],100)) -Nmin2=min(c(nv[3],nv[4],100)) -for(i in 1:iter){ -id1=sample(nv[1],Nmin1) -id2=sample(nv[2],Nmin1) -L1=outer(x[[1]][id1],x[[2]][id2],FUN='-') -id1=sample(nv[3],Nmin2) -id2=sample(nv[4],Nmin2) -L2=outer(x[[3]][id1],x[[4]][id2],FUN='-') -ef[i]=pxly(L1,L2,iter=iter,SEED=SEED) -}} -if(nt<=10^3){ -L1=outer(x[[1]],x[[2]],FUN='-') -L2=outer(x[[3]],x[[4]],FUN='-') -ef=pxly(L1,L2,iter=iter,SEED=SEED) -} -ef=mean(ef) -ef -} - -interWMWAP<-function(x,nreps=100,SEED=TRUE,nboot=500,alpha=.05,nmax=10^8,MC=TRUE){ -# -# Interaction in a 2-by-2 design using P(X_1-X_210^3){ -if(SEED)set.seed(2) -for(i in 1:nreps){ -for(j in 1:4)M[,j]=sample(x[[j]],nmin) -L1=outer(M[,g[1]],M[,g[2]],FUN='+') -L2=outer(M[,g[3]],M[,g[4]],FUN='+') -ef[i]=shiftes(L1,L2,locfun=locfun)$Q.Effect -}} -else{ -L1=outer(x[[g[1]]],x[[g[2]]],FUN='+') -L2=outer(x[[g[3]]],x[[g[4]]],FUN='+') -ef=shiftes(L1,L2,locfun=locfun)$Q.Effect -} -es=mean(ef) -list(Q.Effect=es) -} - -QS1way<-function(x,locfun=median,alpha=0.05,SEED=TRUE,nboot=500,CI=TRUE){ -# -# Estimate quantile shift function when comparing all -# pairs of groups in a one-way (independent) groups design -# -# CI=TRUE: confidence intervals for the measure of effect size are computed. -# -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J=length(x) -Jall=(J^2-J)/2 -con1=con1way(J) -output=matrix(NA,nrow=Jall,ncol=5) -dimnames(output)=list(NULL,c('Group','Group','Effect.Size','low.ci','up.ci')) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j10^3){ -if(SEED)set.seed(2) -for(i in 1:nreps){ -for(j in 1:4)M[,j]=sample(x[[j]],nmin) -L1=outer(M[,1],M[,2],FUN='-') -L2=outer(M[,3],M[,4],FUN='-') -ef[i]=shiftes(L1,L2,locfun=locfun)$Q.Effect -}} -else{ -L1=outer(x[[1]],x[[2]],FUN='-') -L2=outer(x[[3]],x[[4]],FUN='-') -ef=shiftes(L1,L2,locfun=locfun,...)$Q.effect -} -es=mean(ef) -list(Q.Effect=es) -} - - -linconQS<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,INT=FALSE, -locfun=tmean){ -# -# -# This function is used when estimating effect size via -# quantile shift perspective. -# -# A heteroscedastic test of d linear contrasts using trimmed means. -# -# The data are assumed to be stored in x in list mode, a matrix -# or a data frame. If in list mode, -# length(x) is assumed to correspond to the total number of groups. -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# Missing values are automatically removed. -# -# -if(tr==.5)stop('Use the R function medpb to compare medians') -if(is.data.frame(x))x=as.matrix(x) -flag<-TRUE -if(alpha!= .05 && alpha!=.01)flag<-FALSE -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -con<-as.matrix(con) -J<-length(x) -sam=NA -h<-vector('numeric',J) -w<-vector('numeric',J) -xbar<-vector('numeric',J) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -sam[j]=length(x[[j]]) -h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) - # h is the number of observations in the jth group after trimming. -w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) -xbar[j]<-mean(x[[j]],tr) -} -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -psihat<-matrix(0,CC,8) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', -'p.value','Q.effect','Rel.Q')) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c('Group','Group','test','crit','se','df')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) -sejk<-sqrt(w[j]+w[k]) -test[jcom,5]<-sejk -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) -test[jcom,6]<-df -psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) -psihat[jcom,7]=lin.ES(x[c(j,k)],con=c(1,-1))$Effect.Size -psihat[jcom,8]=(psihat[jcom,7]-.5)/.5 -if(CC>28)flag=FALSE -if(flag){ -if(alpha==.05)crit<-smmcrit(df,CC) -if(alpha==.01)crit<-smmcrit01(df,CC) -} -if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED) -test[jcom,4]<-crit -psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk -psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x)){ -stop('The number of groups does not match the number of contrast coefficients.') -} -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', -'p.value','Q.effect')) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c('con.num','test','crit','se','df')) -df<-0 -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) -if(flag){ -if(alpha==.05)crit<-smmcrit(df,ncol(con)) -if(alpha==.01)crit<-smmcrit01(df,ncol(con)) -} -if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED) -test[d,3]<-crit -test[d,4]<-sejk -test[d,5]<-df -if(!INT)psihat[d,6]=lin.ES(x,con[,d],tr=tr,nreps=nreps,SEED=SEED)$Effect.Size -if(INT){ -id=con[,d]!=0 -psihat[d,6]=interQS(x[id],nreps=nreps,locfun=locfun,SEED=SEED)$Q.Effect -} -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) -} -} -if(pr){ -print('Note: confidence intervals are adjusted to control FWE') -print('But p-values are not adjusted to control FWE') -print('Adjusted p-values can be computed with the R function p.adjusted') -print('Under normality and homoscedasticity, Cohen d= 0, .2, .5, .8') -print('correspond approximately to Q.effect = 0.5, 0.55, 0.65 and 0.70, respectively') -} -list(n=sam,test=test,psihat=psihat) -} - -bbmcpQS<-function(J,K,x,locfun,nreps=100,SEED=TRUE,POOL=TRUE,pr=TRUE){ -# -# For independent groups, -# compute quantile shift measure of effect size for all main effects and interactions. -# -# To get an explanatory measure of effect size, use bbmcpEP -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # - JK <- J * K - if(is.matrix(x) || is.data.frame(x)) - x <- listm(x) - if(!is.list(x)) - stop('Data must be stored in list mode or a matrix.') - if(JK != length(x)) - print('Warning: JK does not match the number of groups.') -x=elimna(x) # Remove missing values. -DONE=FALSE -if(J==2 & K==2){ -Factor.A=list() -Factor.A[[1]]=linconQS(x[1:2],pr=FALSE) -Factor.A[[2]]=linconQS(x[3:4],pr=FALSE) -Factor.B=list() -Factor.B[[1]]=linconQS(x[c(1,3)],pr=FALSE) -Factor.B[[2]]=linconQS(x[c(2,4)],pr=FALSE) -Factor.AB=linconQS(x,con=c(1,-1,-1,1),INT=TRUE,pr=FALSE) -DONE=TRUE -} -temp<-con2way(J,K) -conA<-temp$conA -conB<-temp$conB -conAB<-temp$conAB -if(!DONE){ - # Create the three contrast matrices -if(!POOL){ # For each level of Factor A, compute effect size -# for all pairwise comparisons among the levels of B -ID=matrix(c(1:JK),nrow=J,ncol=K,byrow=TRUE) -Factor.A=list() -for(j in 1:J){ -id=as.vector(ID[j,]) -Factor.A[[j]]=linconQS(x[id],pr=FALSE) -} -Factor.B=list() -ID=t(ID) -for(k in 1:K){ -id=as.vector(ID[k,]) -Factor.B[[k]]=linconQS(x[id],pr=FALSE) -} -}} -# Do interactions -Factor.AB=list() -for(l in 1:ncol(conAB)){ -#id=which(conAB[,l]!=0) -Factor.AB[[l]]=linconQS(x,con=conAB[,l],INT=TRUE,pr=FALSE) -} -# -if(POOL){ -ID=matrix(c(1:JK),nrow=J,ncol=K,byrow=TRUE) -LEV.A=list() -for(j in 1:J){ -id=as.vector(ID[j,]) -LEV.A[[j]]=pool.a.list(x[id]) -} -Factor.A=linconQS(LEV.A,pr=FALSE) -ID=t(ID) -LEV.B=list() -for(k in 1:K){ -id=as.vector(ID[k,]) -LEV.B[[k]]=pool.a.list(x[id]) -} -Factor.B=linconQS(LEV.B,pr=FALSE) -} -if(pr){ -print('The columns of conAB contain the contrast coefficients for the interactions.') -print('For example, the output in FactorAB[[1]] are the results based') -print('on the contrast coefficients in column 1') -print('which is the interaction for the first two rows and the first two columns') -print(' ') -print('Note: Under normality and homoscedasticity, Cohen d= 0, .2, .5, .8') -print('correspond approximately to Q.effect = 0.5, 0.55, 0.65 and 0.70, respectively') -} -if(!POOL){ -print('Factor.A: for each row of 1st factor, perform all pairwise') -print(' among the levels of Factor B and store the results in Factor.A') -print('Do the same for the second factor and store the results in Factor.B') -} -if(POOL){ -print('Factor.A: for each row of 1st factor, pool the data over the levels') -print('of Factor B. Then do all pairwise comparisons and store the results in Factor.A') -print('Do the same for the second factor and store the results in Factor.B') -} -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,conAB=conAB) -} - - -bwiDIF<-function(J,K,x,JK=J*K,grp=c(1:JK),alpha=.05,SEED=TRUE){ -# -# Same as bwimcp only use a Patel type approach -# -# Multiple comparisons for interactions -# in a split-plot design. -# The analysis is done by taking difference scores -# among all pairs of dependent groups and -# determining which of -# these differences differ across levels of Factor A -# using trimmed means. -# -# FWE is controlled via Hochberg's method -# To adjusted p-values, use the function p.adjust -# -# For MOM or M-estimators, use spmcpi which uses a bootstrap method -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# - if(is.matrix(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} - -JK<-J*K -if(JK!=length(x))stop('Something is wrong. Expected ',JK,' groups but x contains ', length(x), 'groups instead.') -MJ<-(J^2-J)/2 -MK<-(K^2-K)/2 -JMK<-J*MK -MJMK<-MJ*MK -Jm<-J-1 -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -output<-matrix(0,MJMK,9) -dimnames(output)<-list(NULL,c('A','A','B','B','p.hat','p.value','ci.low','ci.up','p.adjust')) -jp<-1-K -kv<-0 -kv2<-0 -test<-NA -for(j in 1:J){ -jp<-jp+K -xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) -for(k in 1:K){ -kv<-kv+1 -xmat[,k]<-x[[kv]] -} -xmat<-elimna(xmat) -for(k in 1:K){ -kv2<-kv2+1 -x[[kv2]]<-xmat[,k] -}} -m<-matrix(c(1:JK),J,K,byrow=TRUE) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j=zvec) -output[temp2,7]<-zvec -output[,7]<-output[,7] -output -} - -bwmcpAKP<-function(J,K,x,tr=.2,pr=TRUE){ -# -# Compute Algina et al measure of effect size for all pairwise comparisons -# in a between-by-within design -# -if(pr){ -print('A[[1]] contains the estimated effect size for level 1 of Factor A;') -print(' all pairwise comparisons over Factor B') -print('A[[2]] contains results for level 2, etc.') -} -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -JK=J*K -ID=matrix(c(1:JK),nrow=J,ncol=K,byrow=TRUE) -A=list() -for (j in 1:J)A[[j]]=wmcpAKP(x[ID[j,]]) -B=list() -for(k in 1:K)B[[k]]=bmcpAKP(x[ID[,k]],tr=tr) -AB=bwimcpAKP(J,K,x)[,c(1:4,8)] -list(Factor.A=A,Factor.B=B,interactions=AB) -} - -bmcpQS<-function(x,locfun=median,...){ -# -# Compute quantile shift measure of effect size for all pairs of J independent groups -# -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -J=length(x) -C=(J^2-J)/2 -A=matrix(NA,nrow=C,ncol=3) -dimnames(A)=list(NULL,c('Group','Group','Effect.Size')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if(j=null.value)ef.sizeND=mean(L-est+null.value<=est) -ef.size=mean(L-est+null.value<=est) -if(est=est) -list(Q.effect=ef.size) -} - - - -linQS<-function(x,con,locfun=median,nreps=200,SEED=TRUE){ -# -# Determine distribution of Y_i=sum_j c_jX_j -# Then estimate quantile shift in location measure of effect size -# locfun, which defaults to the median. -# -if(sum(con)!=0)stop('Contrast coefficients must sum to zero') -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -if(length(con)!=J)stop('Length of con should equal number of groups') -x=elimna(x) -nv=as.vector(matl(lapply(x,FUN='length'))) -nmin=min(nv) -est=NA -p=NA -B=list() -M=matrix(NA,nrow=nmin,ncol=J) -for(i in 1:nreps){ -for(j in 1:J)M[,j]=sample(x[[j]],nmin) -B[[i]]=M -} -L=lapply(B,linWMWMC.sub,con=con) -est=lapply(L,locfun) -p=lapply(L,linQS.sub2,locfun=locfun) -est=as.vector(matl(est)) -p=as.vector(matl(p)) -list(Q.effect=mean(p),center=mean(est)) -} - -linQS.sub2<-function(L,locfun=median){ -phat=mean(L-locfun(L)11)*(nval2>11)) -flag[i]=flag.chk -if(flag.chk){ -Y1=y1[near3d(x1,pts[i,],fr=fr1,m=m1)] -Y2=y2[near3d(x2,pts[i,],fr=fr2,m=m2)] -temp=yuen(Y1,Y2,tr=tr) -temp=pool.a.list(temp[1:7]) -ic=ic+1 -output[ic,]=temp -}} -sel.pts=NULL -sig.points=NULL -if(ic>0){ -n.sel=sum(flag) -output=output[1:n.sel,] -sel.pts=pts[flag,] -} - -if(sum(flag)==0){ -print('Could not find any point with 12 or more nearest neighbors') -output=matrix(NA,nrow=1,ncol=8) -n.sel=0 -num.sig=0 -} -id.sig=NULL -padj=NULL -p.crit=NULL -if(n.sel>0){ -sel.id=c(1:n.sel) -if(n.sel<=25){ -if(n.sel==1)padj=output[7] -else -padj=p.adjust(output[,7],method='hoch') -flag=padj<=alpha -if(sum(flag)==1)sig.points=sel.pts[flag] -if(sum(flag)>1)sig.points=sel.pts[flag,] -num.sig=sum(flag) -id.sig=sel.id[flag] -} -if(n.sel>25){ -if(n.sel<=100)p.crit=0.0806452604/n.sel-0.0002461736 -if(n.sel>100)p.crit=6.586286e-02/n.sel+4.137143e-05 -flag=output[,7]<=p.crit -if(sum(flag)>0)sig.points=sel.pts[flag,] -} -num.sig=sum(flag) -id.sig=sel.id[flag] -} -list(selected.points=sel.pts,output=output,significant.points=sig.points,num.sig=num.sig,id.sig=id.sig) -} - - - -reg.vs.rplot<-function(x,y,xout=FALSE,fr=1,est=median,regfun=Qreg,Qreg.plot=TRUE,qv=c(.25,.75),SMQ=FALSE, -pr=TRUE,xlab='Reg.Est',ylab='Rplot.Est',pch='*'){ -# -# If the linear model is correct, the plot returned here should be -# tightly clustered around a line having slope=1 and intercept=0, indicated -# by a dashed line. -# -if(pr)print('This function was updated July 2022') -xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -x=xy[,1:p] -y=xy[,p1] -e1=regYhat(x,y,xout=xout,regfun=regfun) -e2=rplot.pred(x,y,xout=xout,est=est,fr=fr)$Y.hat -if(Qreg.plot){ -if(!SMQ)qplotreg(e1,e2,xlab=xlab,ylab=ylab,pch=pch,qval=qv) -if(SMQ)qhdsm(e1,e2,xlab=xlab,ylab=ylab,pch=pch,qval=qv,LP=TRUE) -} -if(!(Qreg.plot)) lplot(e1,e2,xlab=xlab,ylab=ylab,pc=pch) -abline(0,1,lty=2) -} - -reg.vs.lplot<-function(x,y,xout=FALSE,Qreg.plot=TRUE,qv=c(.25,.75),SMQ=FALSE,pch='*',pr=TRUE, -outfun=outpro,fr=1,est=mean,regfun=tsreg,xlab='Reg.est',ylab='Lplot.est',span=.75,...){ -# -# -# -if(pr)print('This function was updated July 2022') -xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -x=xy[,1:p] -y=xy[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -e1=regYhat(x,y,regfun=regfun) -e2=lplot.pred(x,y,,est=est,span=span)$yhat -if(Qreg.plot){ -if(!SMQ)qplotreg(e1,e2,xlab=xlab,ylab=ylab,pch=pch,qval=qv) -if(SMQ)qhdsm(e1,e2,xlab=xlab,ylab=ylab,pch=pch,qval=qv,LP=TRUE) -} -if(!(Qreg.plot)) lplot(e1,e2,xlab=xlab,ylab=ylab,pc=pch) -abline(0,1,lty=2) -} - -bbdetmcp<-function(J,K,x,tr=0.2){ -# -# For each level of Factor A, do all pairiwise comparisons -# among levels of B and store results in A in list mode. -# -# For each level of Factor B, do all pairiwise comparisons -# among levels of A and store results in B in list mode. -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -A=list() -B=list() -JK=J*K -mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) -for(j in 1:J)A[[j]]=lincon(x[mat[j,]],tr=tr,pr=FALSE) -for(k in 1:K)B[[k]]=lincon(x[mat[,k]],tr=tr,pr=FALSE) -list(Levels.of.A=A,Level.of.B=B) -} - -bbdetmcpQS<-function(J,K,x,tr=0.2){ -# -# For each level of Factor A, do all pairiwise comparisons -# among levels of B and store results in A in list mode. -# -# For each level of Factor B, do all pairiwise comparisons -# amonglevels of A andstore results in B in list mode. -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -A=list() -B=list() -JK=J*K -mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) -for(j in 1:J)A[[j]]=linconQS(x[mat[j,]],tr=tr,pr=FALSE) -for(k in 1:K)B[[k]]=linconQS(x[mat[,k]],tr=tr,pr=FALSE) -list(Levels.of.A=A,Level.of.B=B) -} - - -ancJNPVAL<-function(x1,y1,x2,y2,regfun=MMreg,p.crit=NULL,DEEP=TRUE, -plotit=TRUE,xlab='X1',ylab='X2',null.value=0,WARNS=FALSE, -alpha=.05, pts=NULL,SEED=TRUE,nboot=100,xout=FALSE,outfun=outpro,...){ -# -# Compare two independent groups using a generalization of the ancts function that -# allows more than one covariate. -# -# Design points can be specified via the argument -# pts: a matrix with p=ncol(x1) columns. -# -# DEEP=FALSE: If pts=NULL, design points are chosen to be deepest point in -# rbind(x1,x2) plus points on the .5 depth contour. -# -# DEEP=TRUE, choose deepest half of the points in rbind(x1,x2) and use critical p-value indicated by -# p.crit. -# -# alpha=.05, refers to the desired probability of one or more Type I errors. If -# p.crit=NULL, -# when alpha=.05 or .01 and number of covariates is <=6, p.crit is -# determined quickly by this function. That is, the familywise error will be approximately alpha. -# -# If number of covariates is > 6, unknown how to adjust p.crit to control familywise error. -# -# plotit=TRUE: if p=2 covariates, plot covariate points with -# non-significant points indicated by * and significant points by + - -# (This function replaces anctsmp, which does not have an option for -# using the deepest half of the covariate points.) -# -if(SEED)set.seed(2) -if(!is.null(pts[1]))DEEP=FALSE -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have a different number of columns') -p=ncol(x1) -if(p==1)stop('Should have at least two covariates') -if(p>6)stop('Current version is limited to six covariates or less') -p1=p+1 -m1=elimna(cbind(x1,y1)) -x1=m1[,1:p] -y1=m1[,p1] -m2=elimna(cbind(x2,y2)) -x2=m2[,1:p] -y2=m2[,p1] -# -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -n1=length(y1) -n2=length(y2) -if(is.null(pts[1])){ -if(!DEEP){ -x1<-as.matrix(x1) -pts<-ancdes(unique(rbind(x1,x2))) -p.crit=NULL -} -if(DEEP){ -xall=unique(rbind(x1,x2)) -pd=pdepth(xall) -id.keep=which(pd>median(pd)) -pts=xall[id.keep,] -pts=unique(pts) -}} -pts<-as.matrix(pts) -ntests=nrow(pts) -mat<-matrix(NA,ntests,8) -dimnames(mat)<-list(NULL,c('Est 1', 'Est 2','DIF','TEST','se','ci.low','ci.hi','p.value')) -if(!WARNS)options(warn=-1) -sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -# xout=F because leverage points have already been removed. -est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) -est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) -if(!WARNS)options(warn=0) #turn warnings back on -mat[,1]=est1 -mat[,2]=est2 -est=est1-est2 -mat[,3]=est -sd=sqrt(sqsd1+sqsd2) -mat[,5]=sd -tests=(est1-est2)/sd -mat[,4]=tests -pv=2*(1-pnorm(abs(tests))) -mat[,8]=pv -crit=NULL -if(ntests==1)crit=qnorm(1-alpha/2) -if(nrow(pts)>1){ -if(ntests<=25){ -if(alpha==.05)crit<-smmcrit(Inf,ntests) -if(alpha==.01)crit<-smmcrit01(Inf,ntests) -}} -if(ntests>25){ -pvals.05=c(NA,.00615847,0.002856423,.00196,0.001960793,0.001120947) -pvals.01=c(NA,0.001006744,0.000237099,0.0003169569,0.0002031497,9.442465e-05) -if(alpha==.05){ -crit=qnorm(1-pvals.05[p]/2) -p.crit=pvals.05[p] -} -if(alpha==.01){ -crit=qnorm(1-pvals.01[p]/2) -p.crit=pvals.01[p] -} -} -mat[,6]=est-crit*sd -mat[,7]=est+crit*sd -flag=rep(FALSE,nrow(mat)) -flag.chk1=as.logical(mat[,6]>null.value) -flag.chk2=(mat[,7]0) -#if(!is.null(p.crit))num.sig=sum(mat[,8]<=p.crit) -num.sig=sum(mat[,8]<=p.crit) -if(p==2){ -if(plotit){ -plot(pts[,1],pts[,2],xlab=xlab,ylab=ylab,type='n') -flag[flag.chk]=TRUE -points(pts[!flag,1],pts[!flag,2],pch='*') -points(pts[flag,1],pts[flag,2],pch='+') -}} -sig.points=NULL -if(!is.null(p.crit)){ -if(num.sig>0){ -pick=which(mat[,8]<=p.crit) -sig.points=pts[pick,] -}} -list(n1=n1,n2=n2,num.sig=num.sig,p.crit=p.crit,points=pts,output=mat, significant.points=sig.points) -} - - -qhatDEP<-function(x1,x2,depthfun=prodepth,...){ -# -# Compute apparent probability of correct classification -# -x1<-x1[!is.na(x1)] -x2<-x2[!is.na(x2)] -x1=as.matrix(x1) -x2=as.matrix(x2) -tv=c(rep(1,nrow(x1)),rep(2,nrow(x2))) -see=discdepth(x1,x2,z=rbind(x1,x2)) -qhat=mean(tv==see) -qhat -} -qhatdepPB<-function(x1,x2,nboot=500,alpha=.05,depthfun=prodepth, -SEED=TRUE,...){ -# -# -if(SEED)set.seed(2) -bvec=NA -x1=as.matrix(x1) -x2=as.matrix(x2) -n1=nrow(x1) -n2=nrow(x2) -for(i in 1:nboot){ -dat1=sample(n1,n1,replace=TRUE) -dat2=sample(n2,n2,replace=TRUE) -bvec[i]=qhatDEP(x1[dat1,],x2[dat2,],depthfun=depthfun) -} -est=qhatDEP(x1,x2) -bvec=sort(bvec) -crit<-alpha/2 -icl<-round(crit*nboot)+1 -icu<-nboot-icl -ci<-bvec[icl] -ci[2]<-bvec[icu] -list(estimate=est,ci=ci) -} - - -discdepth<-function(train=NULL,test=NULL,g,x1=NULL,x2=NULL,depthfun=prodepth,...){ -# -# x1 and x2 contain the data for the two groups -# Goal, classify the values in test using depths associated with the training data -# -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -} -Train=cbind(train,g) -Train=elimna(Train) -p=ncol(train) -p1=p+1 -train=Train[,1:p] -g=Train[,p1] -flag=g==min(g) -x1=Train[flag,1:p] -x2=Train[!flag,1:p] -} -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -if(is.null(test))stop('No test data, argument test is NULL') -test=elimna(test) -z=as.matrix(test) -x1=as.matrix(x1) -x2=as.matrix(x2) -z=as.matrix(z) -d1=depthfun(x1,pts=z,...) -d2=depthfun(x2,pts=z,...) -flag=d1>d2 -N=nrow(z) -id=rep(2,N) -id[flag]=1 -id -} - -dis.depth.bag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,nboot=100,SEED=TRUE,...){ -# -# -# g=class id -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# The function will then create appropriate labels and store them in g. -# -# Uses data depths. -# KNNdist uses data depths, for the n1!=n2 it can be a bit biased, meaning that -# when there is no association, the probability of a correct classification will be less than .5 -# -# -if(is.null(test))stop('test =NULL, no test data provided') -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group labels, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n1,n,replace=TRUE) -id2=sample(n2,n,replace=TRUE) - -dvec[i,]=discdepth(x1=x1[id1,],x2=x2[id2,],test=test,depthfun=depthfun,...) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} - - -pdepth<-function(m,pts=m,MM=FALSE,cop=3,dop=1,center=NA, SEED=TRUE){ -# -# projection depth -# -# SEED, included for convenience when this function is used with certain classification techniques. -# -v=pdis(m,pts=pts,MM=MM,cop=cop,dop=dop,center=center) -v=1/(1+v) -v -} -pdis<-function(m,pts=m,MM=FALSE,cop=3,dop=1,center=NA,na.rm=TRUE){ -# -# Compute projection distances for points in pts relative to points in m -# That is, the projection distance from the center of m -# -# -# MM=F Projected distance scaled -# using interquatile range. -# MM=T Scale projected distances using MAD. -# -# There are five options for computing the center of the -# cloud of points when computing projections: -# cop=1 uses Donoho-Gasko median -# cop=2 uses MCD center -# cop=3 uses median of the marginal distributions. -# cop=4 uses MVE center -# cop=5 uses skipped mean -# -m<-elimna(m) # Remove missing values -pts=elimna(pts) -m<-as.matrix(m) -nm=nrow(m) -pts<-as.matrix(pts) -if(ncol(m)>1){ -if(ncol(pts)==1)pts=t(pts) -} -npts=nrow(pts) -mp=rbind(m,pts) -np1=nrow(m)+1 -if(ncol(m)==1){ -m=as.vector(m) -pts=as.vector(pts) -if(is.na(center[1]))center<-median(m) -dis<-abs(pts-center) -disall=abs(m-center) -temp=idealf(disall) -if(!MM){ -pdis<-dis/(temp$qu-temp$ql) -} -if(MM)pdis<-dis/mad(disall) -} -else{ -if(is.na(center[1])){ -if(cop==1)center<-dmean(m,tr=.5,dop=dop) -if(cop==2)center<-cov.mcd(m)$center -if(cop==3)center<-apply(m,2,median) -if(cop==4)center<-cov.mve(m)$center -if(cop==5)center<-smean(m) -} -dmat<-matrix(NA,ncol=nrow(mp),nrow=nrow(mp)) -for (i in 1:nrow(mp)){ -B<-mp[i,]-center -dis<-NA -BB<-B^2 -bot<-sum(BB) -if(bot!=0){ -for (j in 1:nrow(mp)){ -A<-mp[j,]-center -temp<-sum(A*B)*B/bot -dis[j]<-sqrt(sum(temp^2)) -} -dis.m=dis[1:nm] -if(!MM){ -#temp<-idealf(dis) -temp<-idealf(dis.m) -dmat[,i]<-dis/(temp$qu-temp$ql) -} -if(MM)dmat[,i]<-dis/mad(dis.m) -}} -pdis<-apply(dmat,1,max,na.rm=na.rm) -pdis=pdis[np1:nrow(mp)] -} -pdis -} -yuenQS<-function(x,y=NULL,tr=.2,alpha=.05, plotit=FALSE,op=TRUE, -cor.op=FALSE,loc.fun=median,pr=TRUE,xlab='X',ylab=' ' ){ -# -# Perform Yuen's test for trimmed means on the data in x and y. -# The default amount of trimming is 20% -# Missing values (values stored as NA) are automatically removed. -# -# A confidence interval for the trimmed mean of x minus the -# the trimmed mean of y is computed and returned in yuen$ci. -# The significance level is returned in yuen$p.value -# -# Unlike the function yuen, a robust quantile shift measure -# of effect size is returned. -# -if(pr){ -print('Note: Under normality and homoscedasticity, Cohen d= 0, .2, .5, .8') -print('correspond approximately to Q.Effect = 0.5, 0.55, 0.65 and 0.70, respectively') -} -if(tr==.5)stop('Use medpb to compare medians.') -if(tr>.5)stop('cannot have tr>.5') -if(is.null(y)){ -if(is.matrix(x) || is.data.frame(x)){ -y=x[,2] -x=x[,1] -} -if(is.list(x)){ -y=x[[2]] -x=x[[1]] -} -} -library(MASS) -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -n1=length(x) -n2=length(y) -h1<-length(x)-2*floor(tr*length(x)) -h2<-length(y)-2*floor(tr*length(y)) -q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) -q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) -df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) -crit<-qt(1-alpha/2,df) -m1=mean(x,tr) -m2=mean(y,tr) -mbar=(m1+m2)/2 -dif=m1-m2 -low<-dif-crit*sqrt(q1+q2) -up<-dif+crit*sqrt(q1+q2) -test<-abs(dif/sqrt(q1+q2)) -yuen<-2*(1-pt(test,df)) -e.pow=shiftQS(x,y,tmean,tr=tr)$Q.Effect -if(plotit){ -g2plot(x,y,xlab=xlab,ylab=ylab) -} -list(ci=c(low,up),n1=n1,n2=n2, -p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test, -crit=crit,df=df,Q.Effect=e.pow) -} - -regbtci<-function(x,y,regfun=qreg,alpha=.05,nboot=300,xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Bootstrap-t confidence intervals for regression parameters -# -if(SEED)set.seed(2) -xx<-elimna(cbind(x,y)) -np<-ncol(xx) -p<-np-1 -y<-xx[,np] -x<-xx[,1:p] -x<-as.matrix(x) -n.orig=length(y) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -vlabs='Intercept' -for(j in 2:np)vlabs[j]=paste('Slope',j-1) -regout<-matrix(0,np,5) -dimnames(regout)<-list(vlabs,c('ci.low','ci.up','Estimate','S.E.','p-value')) -val=regse(x,y,regfun=regfun,nboot=nboot,SEED=SEED,...) -tests=val$param.estimates/val$s.e. -pv=2*(1-pnorm(abs(tests))) -est=regfun(x,y,...) -regout[,3]=est$coef -regout[,1]=est$coef-qnorm(1-alpha/2)*val$s.e. -regout[,2]=est$coef+qnorm(1-alpha/2)*val$s.e. -regout[,4]=val$s.e. -regout[,5]=pv -list(output=regout,n=n.orig,n.keep=length(y)) -} - - - -dtrimQS<-function(x,y=NULL,tr=.2,pr=TRUE){ -# -# Trimmed mean based on difference scores -# Also returns quantile shift measure of location -# -# -if(pr){ -print('Note: Under normality and homoscedasticity, Cohen d= 0, .2, .5, .8') -print('correspond approximately to Q.effect = 0.5, 0.55, 0.65 and 0.70, respectively') -} -if(!is.null(y))L=x-y -else L=x -L=elimna(L) -output=trimci(L,tr=tr,pr=FALSE) -ef=depQS(L,locfun=mean,tr=tr) -list(ci=output$ci,estimate=output$estimate,test=output$test.stat, -se=output$se,p.value=output$p.value,n=output$n,Q.effect=ef$Q.effect) -} -lindQS<-function(x,con,locfun=median,...){ -# -# For dependent variables X_1...X_J -# compute quantile shift measure of effect size for -# Y_i=sum_j c_jX_j -# -x=elimna(x) -if(sum(con)!=0)stop('Contrast coefficients must sum to zero') -if(is.data.frame(x))x=as.matrix(x) -if(is.list(x))x<-matl(x) -J<-ncol(x) -if(length(con)!=J)stop('Length of con should equal number of groups') -L=linWMWMC.sub(x,con=con) -est=locfun(L) -p=linQS.sub2(L,locfun=locfun) -list(Q.effect=p,center=est) -} - - - - -bwiQS<-function(J,K,x,locfun=median,...){ -# -# Quantile shift measure of effect size for interactions in a -# between-by-within design -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -JK<-J*K -MJ<-(J^2-J)/2 -MK<-(K^2-K)/2 -MJMK<-MJ*MK -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(JK!=length(x))stop('Something is wrong. Expected ',JK,' groups but x contains ', length(x), ' groups instead.') -m=matrix(c(1:JK),nrow=J,byrow=TRUE) -output=matrix(NA,ncol=5,nrow=MJMK) -dimnames(output)<-list(NULL,c('A','A','B','B','Q.Effect')) -ic=0 -for(j in 1:J){ -for(jj in 1:J){ -if(j1)stop('One covariate only is allowed with this function') -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -xor1=order(x1) -xor2=order(x2) -x1=x1[xor1] -x2=x2[xor2] -y1=y1[xor1] -y2=y2[xor2] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -bot<-min(sub[vecn>=nmin]) -itop<-max(sub[vecn>=nmin]) -xbot=x1[bot] -xup=x1[itop] - -if(BOTH){ -vecn=1 -n1=1 -n2=1 -for(i in 1:length(x2))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x2))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x2))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x2)) -bot<-max(sub[vecn>=nmin]) -itop<-min(sub[vecn>=nmin]) -xbot[2]=x2[itop] #CORRECT, need to switch -xup[2]=x2[bot] -} -xbot=max(xbot) -xup=min(xup) -pts=seq(xbot,xup,length.out=npts) -if(alpha!=.05)EST=TRUE -if(is.null(p.crit)){ -nv=c(30, 50, 60, 70, 80, 100, 150, 200, -300, 400, 500, 600, 800) -if(Ycrit)pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, -0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, -.003372966, 0.003350022) -if(!Ycrit) -pv=c(0.008566, # 30 -0.0083847, # 50 -0.006758, # 60 -0.006871, # 70 -0.006157, # 80 -0.006629, #100 -0.006629, # 150 -0.004681, # 200 -0.004537, # 300 -0.004952, # 400 - 0.004294, # 500 - 0.004288, # 600 - 0.004148) -n1= length(y1) - n2=length(y2) -p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 -p.crit=(alpha/.05)*p.crit # Crude approximation when alpha != .05, tends to be conservative. -} -temp=ancovaWMW(x1,y1,x2,y2,pts=pts,fr1=fr1,fr2=fr2,alpha=p.crit,plotit=plotit) -res=temp$output[,1:7] -if(plotit){ -x=res[,1] -y=res[,4] -minx=min(x) -maxx=max(x) -plot(c(minx,maxx,x),c(0,1,y),xlab=xlab,ylab=ylab,type='n') -points(x,y,pch=pc) -if(!sm){lines(res[,1],res[,5],lty=2) -lines(res[,1],res[,6],lty=2) -lines(res[,1],res[,4]) -} -if(sm){ -plin=lplot.pred(res[,1],res[,4],span=span)$yhat -lines(res[,1],plin) -low.line=lplot.pred(res[,1],res[,5],span=span)$yhat -lines(res[,1],low.line,lty=2) -up.line=lplot.pred(res[,1],res[,6],span=span)$yhat -lines(res[,1],up.line,lty=2) -} - -} -sig=rep(0,nrow(res)) -sig[res[,7]<=p.crit]=1 -sig=as.matrix(sig,ncol=1) -dimnames(sig)=list(NULL,'Sig.Dif') -res=cbind(res,sig) -list(p.crit=p.crit,output=res,summary=temp$summary,num.sig=sum(sig)) -} - -ancdetwmwQ<-function(x1,y1,x2,y2,fr1=1,fr2=1,nmin=8,q=.05, -alpha=.05,plotit=TRUE,pts=NA,span=2/3,sm=TRUE, xout=FALSE,outfun=out,MC=FALSE, -npts=25,p.crit=NULL, -SCAT=TRUE,xlab='X',ylab='P.hat',pc='.',...){ -# -# Like the function ancdet, only use analog of Wilcoxon--Mann--Whitney -# plot=TRUE: plot estimates P.hat plus a -# confidence band having simultaneous probability coverage 1-alpha -# -# span = the span when using loess to plot the regression line. -# -# npts = number of covariate values to be used -# -# sm=TRUE will smooth the plot using lowess -# -# Covariate points are chosen that lie between the q and 1-q quantiles -# -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -xor1=order(x1) -xor2=order(x2) -x1=x1[xor1] -x2=x2[xor2] -y1=y1[xor1] -y2=y2[xor2] -n1<-1 -n2<-1 -vecn<-1 - -xbot=max(qest(x1,q),qest(x2,q)) -xup=min(qest(x1,1-q),qest(x2,1-q)) - -pts=seq(xbot,xup,length.out=npts) - -nchk1=0 -for(i in 1:length(pts))nchk1[i]=length(y1[near(x1,pts[i],fr1)]) -nchk2=0 -for(i in 1:length(pts))nchk2[i]=length(y2[near(x2,pts[i],fr2)]) -flag1=nchk1>=nmin -flag2=nchk2>=nmin -flag=as.logical(flag1*flag2) -pts=pts[flag] -if(is.null(p.crit)){ -nv=c(30, 50, 60, 70, 80, 100, 150, 200, -300, 400, 500, 600, 800) -pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, -0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, -.003372966, 0.003350022) -n1= length(y1) - n2=length(y2) -p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 -p.crit=(alpha/.05)*p.crit # Crude approximation when alpha != .05, tends to be conservative. -} -temp=ancovaWMW(x1,y1,x2,y2,pts=pts,fr1=fr1,fr2=fr2,alpha=p.crit,plotit=plotit) -res=temp$output -if(plotit){ -x=res[,1] -y=res[,4] -minx=min(x) -maxx=max(x) -plot(c(minx,maxx,x),c(0,1,y),xlab=xlab,ylab=ylab,type='n') -points(x,y,pch=pc) -if(!sm){lines(res[,1],res[,5],lty=2) -lines(res[,1],res[,6],lty=2) -lines(res[,1],res[,4]) -} -if(sm){ -plin=lplot.pred(res[,1],res[,4],span=span)$yhat -lines(res[,1],plin) -low.line=lplot.pred(res[,1],res[,5],span=span)$yhat -lines(res[,1],low.line,lty=2) -up.line=lplot.pred(res[,1],res[,6],span=span)$yhat -lines(res[,1],up.line,lty=2) -} - -} -sig=rep(0,nrow(res)) -sig[res[,7]<=p.crit]=1 -sig=as.matrix(sig,ncol=1) -dimnames(sig)=list(NULL,'Sig.Dif') -res=cbind(res,sig) -list(p.crit=p.crit,output=res,summary=temp$summary,num.sig=sum(sig),p.crit=p.crit) -} - - - -regIVcommcp<-function(x,y,regfun = tsreg, nboot = 200, - xout = FALSE, outfun = outpro, SEED = TRUE, MC = FALSE, tr = 0.2, - ...){ -# -# For each pair of the independent variables in x, compare strength -# when both are included in the model. -# -x<-as.matrix(x) -J=ncol(x) -A=(J^2-J)/2 -output=matrix(NA,nrow=A,ncol=6) -ic=0 -for(i in 1:J){ -for(k in 1:J){ -if(i=crit) -mtest=max(abs(res[flag])) -hdPV=optimize(hdpv,interval=c(.001,.999),dat=tval,obs=mtest) -if(is.na(num.sig))num.sig=0 -list(n=n,cor=test$cor,test.stats=res,crit.val=crit, -num.sig=num.sig,p.value=1-hdPV$minimum) -} - -hdpv=function(val,dat,obs){z=abs(hd(dat,val)-obs) -z -} - -mscorpbMC<-function(x,corfun=pcor,nboot=500,alpha=0.05,SEED=TRUE,WARN=FALSE, -outfun=outpro,pr=TRUE){ -# -# For p-variate data, test the hypothesis that the -# skipped correlation is zero for all pairs of variables. -# The probability of one or more Type I errors is indicated by the -# argument -# alpha -# -if(pr)print('Here, the p-value is the smallest alpha value for which one or more hypotheses are rejected') -library(parallel) -if(SEED)set.seed(2) -x=elimna(x) -n=nrow(x) -tval=NA -y=list() -for(i in 1:nboot)y[[i]]=apply(x,2,sample,replace=TRUE) -v=mclapply(y,mscor,corfun=corfun,outfun=outpro) -for(i in 1:nboot)tval[i]=max(abs(elimna(as.vector(v[[i]]$test.stat)))) - -crit=hd(tval,q=1-alpha) -test=mscor(x) -res=test$test.stat -flag=upper.tri(res) -num.sig=sum(abs(res[flag])>=crit) -mtest=max(abs(res[flag])) -if(!WARN)options(warn=-1) -hdPV=optimize(hdpv,interval=c(.001,.999),dat=tval,obs=mtest) -if(!WARN)options(warn=0) -if(is.na(num.sig))num.sig=0 -list(n=n,cor=test$cor,test.stats=res,crit.val=crit, -num.sig=num.sig,p.value=1-hdPV$minimum) -} - - - -mscorci.cr<-function(n,p,iter=500,corfun=pcor,alpha=c(.05,.025,.01),TV=FALSE,SEED=TRUE){ -# -# Determine critical p-values for the function mscorci -# Returns the estimate of the distribution of the null minimum p-value -# plus the critical p-values corresponding to the levels indicated by -# alpha. -# -if(SEED)set.seed(65) -x=list() -library(parallel) -for(i in 1:iter)x[[i]]=rmul(n,p=p) -tval=mclapply(x,mscorci.cr.sub,corfun=corfun,nboot=iter) -tval=list2vec(tval) -crit.p=NA -for(j in 1:length(alpha))crit.p[j]=hd(tval,alpha[j]) -if(!TV)tval=NULL -list(crit.p.values=crit.p,tval=tval) -} - -mscorci.cr.sub<-function(x,corfun,nboot=500){ -v=mscorci(x,SEED=FALSE,corfun=corfun,nboot=nboot,crit.pv=1)$p.values -mp=min(as.vector(v),na.rm=T) -mp -} -scorv2<-function(x,y=NULL,corfun=pcor,gval=NA,plotit=FALSE,op=TRUE,cop=3,xlab="VAR 1", -ylab="VAR 2",STAND=TRUE,pr=TRUE,SEED=TRUE,MC=FALSE){ -# -# Compute a skipped correlation coefficient. -# -# Eliminate outliers using a projection method -# That is, compute Donoho-Gasko median, for each point -# consider the line between it and the median, -# project all points onto this line, and -# check for outliers using a boxplot rule. -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# For information about the argument cop, see the function -# outpro. -# -# Eliminate any outliers and compute correlation using -# remaining data. -# -# Nearly the same as scor, but does not reset the SEED, which corrects problems with other functions -# -# MC=TRUE, the multicore version of outpro is used -# -# corfun=pcor means Pearson's correlation is used. -# corfun=spear means Spearman's correlation is used. -# corfun=tau means Kendall tau is used. -if(SEED){ -set.seed(12) # So when using MVE or MCD, get consistent results -} -if(is.null(y[1]))m<-x -if(!is.null(y[1]))m<-cbind(x,y) -m<-elimna(m) -if(!MC)temp<-outpro(m,gval=gval,plotit=plotit,op=op,cop=cop, -xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep -if(MC)temp<-outproMC(m,gval=gval,plotit=plotit,op=op,cop=cop, -xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep -tcor<-corfun(m[temp,])$cor -if(!is.null(dim((m))))tcor<-tcor[1,2] -test<-abs(tcor*sqrt((nrow(m)-2)/(1-tcor**2))) -if(ncol(m)!=2)diag(test)<-NA -crit<-6.947/nrow(m)+2.3197 -list(cor=tcor,test.stat=test,crit.05=crit) -} - -scorreg<-function(x,y,corfun=spear,cop=3,MM=FALSE,gval=NA, -outfun=outpro,alpha=.05,MC=NULL,SEED=TRUE,ALL=TRUE,...){ -# -# x is an n by p matrix -# -# Compute a skipped correlation matrix between y and each variable in x. -# -# corfun indicates the correlation to be used -# corfun=pcor uses Pearson's correlation -# corfun=spear uses Spearman's correlation -# -# ALL=TRUE: eliminate all outliers among cbind(x,y) -# ALL=FALSE: skipped correlation is computed for each x[,j] and y. So outliers are eliminated only -# for these two variables and this done for j=1,...p, p=number of predictors. -# -# This function returns the p by p matrix of correlations -# -# Method: Eliminate outliers using a projection technique. -# That is, compute Donoho-Gasko median, for each point -# consider the line between it and the median, -# project all points onto this line, and -# check for outliers using a boxplot rule. -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# cop determines how center of the scatterplot is -# estimated; see the function outpro. -# cop=l Donoho-Gasko halfspace median -# cop=2 MCD measure of location -# cop=3 marginal medians -# cop=4 MVE measure of location -# -# gval is critical value for determining whether a point -# is an outlier. It is determined automatically if not specified, -# assuming that Spearman's correlation is used. Critical -# values when using some other correlation have not been -# determined. -# -m<-elimna(cbind(x,y)) -m=as.matrix(m) -p1<-ncol(m) -p=p1-1 -n<-nrow(m) -e=NA -if(!ALL){ -if(is.null(MC)){ -if(n>=200)MC=TRUE -else MC=FALSE -} -e=NA -for(j in 1:p)e[j]=scorv2(m[,j],m[,p1],MC=MC,corfun=corfun,SEED=SEED)$cor.value -} -if(ALL){ -if(n<500) -flag=outpro(m,cop=cop,plotit=FALSE)$keep -else flag=outpro.depth(m,plotit=FALSE,SEED=SEED)$keep -xy=m[flag,] -for(j in 1:p)e[j]=corfun(xy[,j],xy[,p1],...)$cor -} -list(cor=e) -} - -mscorci<-function(x,y=NULL,nboot=1000,alpha=c(.05,.025,.01),SEED=TRUE, -STAND=TRUE,corfun=pcor,outfun=outpro, crit.pv=NULL, -pvals=NULL,hoch=FALSE,iter=500,pval.SEED=TRUE,pr=TRUE){ -# -# For p-variate data, test the hypothesis of a zero skipped correlation for each pair of variables in a manner -# that controls the probability of one or more Type I errors. -# -# The function also returns confidence intervals for each of the skipped correlations when hoch=FALSE. -# alpha=0.05 is the default. -# By default, Pearson's correlation is computed after outliers are removed via the R function indicated by -# outfun, which defaults to a projection-type method. -# corfun=spear, for example would replace Pearson's correlation with Spearman's correlation. -# -# alpha=c(.05,.025,.01) is the default, meaning that when determining critical p-values, this is done for -# for alpha .05, 0.25 and .01. So can use different alpha values if desired. -# For other purposes the family wise error (FWE) rate is taken to be -# alpha[1]=.05 by default. So setting the argument alpha=.01, FWE is taken to be .01 and a critical p-value is -# computed for the.01 level only. -# -# The default number of bootstrap samples is -# nboot=500 -# -# hoch=TRUE is the default in order to reduce execution time. -# If n>=60, this might suffice when testing at the 0.05 level. But power might be increased by using -# hoch=FALSE at the expense of higher execution time. -# -# If alpha is less than .05, say .025 or .01, hoch=FALSE is recommended. -# -# Note: confidence intervals are reported only when hoch=FALSE. -# -# pvals can be used to supply a vector of p-values estimating the distribution of the minimum p-value among the tests that are -# are performed when all hypotheses are true. -# -# iter=500: number of replications used to estimate the distribution of the minimum p-value. -# Or use the argument crit.pv as indicated below. -# Note: in the journal article dealing with this method, iter=1000 was used. - -# By default -# pvals=NULL, the functions computes these values if the p-values suggest that there might be -# significant results and hoch=FALSE; this can result in high execution time. -# The pvals are computed via the R function -# mscorci.cr(n,p,iter=500,corfun=pcor,alpha=alpha,SEED=TRUE). -# -# Critical p-values are a function of n and p. Once known, can supply them via the argument -# crit.pv as follows: -# -# pv=mscorci.cr(n,p)$crit.p.values -# mscorci(x,crit.pv=pv) -# -# -# -if(pr){ -if(!hoch){print('To reduce execution time, critical p-values are not computed when the observed p.values are too large to') -print('reject at the 0.05 level. To compute them any way, use the R function mscorci.cr') -} -if(hoch){ -print('Hochberg adjusted p-values are used.') -print('This is reasonable when n>120 and alpha=.05. Otherwise suggest using hoch=FALSE') -print('With hoch=TRUE, unadjusted 1-alphaj[1] confidence intervals are reported') -}} -if(SEED)set.seed(2) -if(!is.null(y))x=cbind(x,y) -x<-elimna(x) # Eliminate rows with missing values -nval=nrow(x) -p=ncol(x) -J=(p^2-p)/2 -est<-mscor(x,STAND=STAND,corfun=corfun,outfun=outfun)$cor -flag=upper.tri(est) -est=est[flag] -data<-matrix(sample(nval,size=nval*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -bvec<-lapply(data,scorci.sub,x,STAND=STAND,corfun=corfun,outfun=outfun) -bvec=matl(bvec) # A J by nboot matrix. - -phat=0 -sig=0 -for(j in 1:J){ -phat[j]=sum(bvec[j,] < 0)/nboot -sig[j] <- 2 * min(phat[j], 1 - phat[j]) -} -# -# Compute critical p-values if any of the p-values are sufficiently small. -# -FLAG=FALSE -if(p==2 && sig[1]<=.15){ -FLAG=TRUE -if(hoch){ -if(pr)print('If the p.value is <=.15, suggest using hoch=FALSE') -}} -if(p>2){ -if(min(sig)<=alpha[1])FLAG=TRUE -} -if(FLAG){ -if(is.null(crit.pv)){ -if(!hoch){ -if(pr)print('Computing critical p-values. Execution time might require several minutes') -temp=mscorci.cr(nval,p,iter=iter,corfun=corfun,alpha=alpha,SEED=pval.SEED,TV=TRUE) #returns tval in case want to adjust p-values. -# Need to add code to do this. (See mscorpbMC for how this might be done.) -crit.pv=temp$crit.p.values -}}} -ci.mat=matrix(NA,nrow=J,ncol=4) -dimnames(ci.mat)=list(NULL,c('Var i','Var j','ci.low','ci.up')) -for(j in 1:J)bvec[j,]<-sort(bvec[j,]) -if(J==1)bvec=as.matrix(bvec) -ic=0 -if(is.null(crit.pv))crit.pv=alpha[1] -for(j in 1:p){ -for(k in 1:p){ -if(j40){ -if(n<=70){ -vv=p.crit.n60(alpha[1],sig[j]) -sigadj[j]=vv$adj.p.value -crit.p=vv$crit.p.value -}} -if(n>70){ -if(n<=100){ -vv=p.crit.n80(alpha[1],sig[j]) -sigadj[j]=vv$adj.p.value -crit.p=vv$crit.p.value -} -} -if(n>100){ -if(n<=120) -{ -vv=p.crit.n100(alpha[1],sig[j]) -crit.p=vv$crit.p.value -sigadj[j]=vv$adj.p.value -}} -if(n>120){ # no adjustment -sigadj[j]=sig[j] #i.e., no adjustment -crit.p=alpha[1] -}} -hadj=p.adjust(sig,method='hoch') -ci.mat=matrix(NA,nrow=p,ncol=3) -dimnames(ci.mat)=list(NULL,c('Var','ci.low','ci.up')) -for(j in 1:p)bvec[j,]<-sort(bvec[j,]) -if(p==1)bvec=as.matrix(bvec) -ic=0 -if(is.null(crit.pv))crit.pv=alpha[1] -for(j in 1:p){ -ic=ic+1 -ci.mat[ic,1]=j -ihi<-floor((1-crit.p[1]/2)*nboot+.5) -ilow<-floor((crit.p[1]/2)*nboot+.5) -ci.mat[ic,2]=bvec[ic,ilow] -ci.mat[ic,3]=bvec[ic,ihi] -} -p.mat=matrix(NA,nrow=p,ncol=4) -p.mat[,1]=est -p.mat[,2]=sig -p.mat[,3]=sigadj -adj.p=p.adjust(sigadj,method='hochberg') -p.mat[,4]=adj.p -dimnames(p.mat)=list(NULL,c('Est.','p-value','adjusted p.value','Hoch.adjusted.p.value')) -list(Estimates=p.mat,confidence.int=ci.mat) -} - - -scorreg.sub<-function(data,xy,corfun=corfun,outfun=outfun,ALL=ALL){ -p1=ncol(xy) -p=p1-1 -est<-scorreg(xy[data,1:p],xy[data,p1],corfun=corfun,SEED=FALSE,ALL=ALL)$cor -est -} - - -p.crit.n30<-function(alpha=.05,p.obs=NULL){ -p.table=c( -0.696 ,0.828 ,0.208 ,0.328 ,0.152 -,0.632 ,0.184 ,0.452 ,0.86 ,0.988 -,0.772 ,0.832 ,0.288 ,0.288 ,0.944 -,0.672 ,0.868 ,0.476 ,0.148 ,0.292 -,0.792 ,0.852 ,0.236 ,0.9, 1 -,0.484 ,0.932 ,0.704 ,0.4 ,0.904 -,0.656 ,0.32 ,0.104 ,0.676 ,0.572 -,0.936 ,0.14 ,0.148 ,0.86 ,0.508 -,0.748 ,0.328 ,0.816 ,0.268 ,0.364 -,0.152 ,0.816 ,0.5 ,0.972 ,0.684 -,0.156 ,0.676 ,0.244 ,0.948 ,0.612 -,0.28 ,0.092 ,0.712 ,0.152 ,0.704 -,0.192 ,0.904 ,0.372 ,0.908 ,0.992 -,0.692 ,0.956 ,0.704 ,0.964 ,0.484 -,0.496 ,0.768 ,0.172 ,0.336 ,0.108 -,0.052 ,0.62 ,0.192 ,0.664 ,0.716 -,0.148 ,0.6 ,0.384 ,0.52 ,0.536 -,0.992 ,0.272 ,0.68 ,0.232 ,0.368 -,0.788 ,0.572 ,0.516 ,0.476 ,0.832 -,0.988 ,0.72 ,0.432 ,0.756 ,0.564 -,0.792 ,0.44 ,0.996 ,0.388 ,0.456 -,0.848 ,0.364, 1 ,0.584 ,0.104 -,0.68 ,0.848 ,0.268 ,0.424 ,0.836 -,0.316 ,0.828 ,0.12 ,0.392 ,0.588 -,0.38 ,0.544 ,0.108 ,0.248 ,0.972 -,0.94 ,0.184 ,0.156 ,0.444 ,0.28 -,0.612 ,0.22 ,0.544 ,0.888 ,0.808 -,0.436 ,0.736 ,0.424 ,0.792 ,0.324 -,0.672 ,0.012 ,0.36 ,0.656 ,0.54 -,0.872 ,0.596 ,0.788 ,0.896 ,0.532 -,0.34 ,0.664 ,0.28 ,0.484 ,0.888 -,0.66 ,0.824 ,0.032 ,0.524 ,0.496 -,0.84 ,0.564 ,0.432 ,0.668 ,0.664 -,0.332 ,0.576 ,0.568 ,0.388 ,0.876 -,0.06 ,0.948 ,0.664 ,0.764 ,0.12 -,0.416 ,0.956 ,0.936 ,0.848 ,0.904 -,0.596 ,0.792 ,0.232 ,0.68 ,0.404 -,0.556 ,0.356 ,0.44 ,0.936 ,0.748 -,0.968 ,0.528 ,0.8 ,0.152 ,0.68 -,0.792 ,0.664 ,0.872 ,0.856 ,0.176 -,0.908 ,0.124 ,0.744 ,0.708 ,0.632 -,0.68 ,0.972 ,0.244 ,0.984 ,0.76 -,0.828 ,0.256 ,0.888 ,0.688 ,0.312 -,0.828 ,0.124 ,0.296 ,0.396 ,0.8 -,0.756 ,0.104 ,0.228 ,0.884 ,0.948 -,0.96 ,0.52 ,0.724 ,0.824 ,0.436 -,0.672 ,0.868 ,0.772 ,0.612 ,0.48 -,0.036 ,0.868 ,0.52 ,0.268 ,0.232 -,0.608 ,0.676 ,0.476 ,0.588 ,0.904 -,0.508 ,0.236 ,0.952 ,0.848 ,0.628 -,0.924 ,0.132 ,0.812 ,0.696 ,0.3 -,0.948 ,0.904 ,0.868 ,0.392 ,0.072 -,0.38 ,0.624 ,0.608 ,0.756 ,0.332 -,0.088 ,0.42 ,0.764 ,0.648 ,0.084 -,0.428 ,0.04 ,0.408 ,0.548 ,0.216 -,0.636 ,0.784 ,0.24 ,0.9 ,0.512 -,0.476 ,0.504 ,0.288 ,0.812 ,0.6 -,0.696 ,0.492 ,0.42 ,0.068 ,0.236 -,0.604 ,0.564 ,0.888 ,0.816 ,0.52 -,0.092 ,0.096 ,0.372 ,0.54 ,0.328 -,0.96 ,0.276 ,0.38 ,0.1 ,0.412 -,0.732 ,0.184 ,0.044 ,0.772 ,0.892 -,0.244 ,0.344 ,0.976 ,0.04 ,0.088 -,0.032 ,0.796 ,0.24 ,0.524 ,0.808 -,0.472 ,0.472 ,0.152 ,0.696 ,0.728 -,0.756 ,0.784 ,0.452 ,0.764 ,0.764 -,0.144 ,0.988 ,0.552 ,0.788 ,0.5 -,0.46 ,0.42 ,0.468 ,0.516 ,0.832 -,0.528 ,0.724 ,0.148 ,0.648 ,0.456 -,0.28 ,0.804 ,0.496 ,0.464 ,0.52 -,0.864 ,0.228 ,0.544 ,0.708 ,0.912 -,0.528 ,0.18 ,0.188 ,0.092 ,0.44 -,0.452 ,0.596 ,0.424 ,0.32 ,0.808 -,0.036 ,0.508 ,0.836 ,0.064 ,0.924 -,0.4 ,0.324 ,0.464 ,0.888 ,0.948 -,0.688 ,0.856 ,0.76 ,0.16 ,0.44 -,0.372 ,0.328 ,0.088 ,0.984 ,0.496 -,0.428 ,0.892 ,0.636 ,0.236 ,0.704 -,0.704 ,0.416 ,0.9 ,0.716 ,0.976 -,0.908 ,0.524 ,0.604 ,0.436 ,0.332 -,0.996 ,0.428, 1 ,0.244 ,0.712 -,0.456 ,0.808 ,0.984 ,0.804 ,0.62 -,0.552 ,0.732 ,0.264 ,0.488 ,0.604 -,0.424 ,0.936 ,0.808 ,0.356 ,0.164 -,0.152 ,0.34 ,0.34 ,0.644 ,0.4 -,0.784 ,0.308 ,0.296 ,0.672 ,0.664 -,0.64 ,0.76 ,0.24 ,0.464 ,0.656 -,0.84 ,0.76 ,0.176 ,0.148 ,0.184 -,0.296 ,0.516 ,0.62 ,0.396 ,0.384 -,0.84 ,0.984 ,0.964 ,0.46 ,0.224 -,0.968 ,0.292 ,0.78 ,0.696 ,0.128 -,0.384 ,0.98 ,0.852 ,0.408 ,0.644 -,0.744 ,0.876 ,0.688 ,0.924 ,0.06 -,0.36 ,0.4 ,0.528 ,0.084 ,0.216 -,0.4 ,0.984 ,0.488 ,0.152 ,0.608 -,0.332 ,0.5 ,0.884 ,0.78 ,0.912 -,0.236 ,0.368 ,0.276 ,0.74 ,0.96 -,0.912 ,0.36 ,0.608 ,0.804 ,0.9 -,0.688 ,0.348 ,0.748 ,0.544 ,0.956 -,0.384 ,0.892 ,0.728 ,0.164 ,0.392 -,0.876 ,0.836 ,0.54 ,0.604 ,0.456 -,0.144 ,0.3 ,0.848 ,0.272 ,0.668 -,0.908 ,0.004 ,0.812 ,0.408 ,0.676 -,0.928 ,0.224 ,0.052 ,0.756 ,0.928 -,0.428 ,0.096 ,0.996 ,0.996 ,0.828 -,0.504 ,0.616 ,0.788 ,0.644 ,0.26 -,0.764 ,0.616 ,0.248 ,0.556 ,0.972 -,0.912 ,0.66 ,0.72 ,0.792 ,0.204 -,0.904 ,0.32 ,0.228 ,0.628 ,0.912 -,0.804 ,0.072 ,0.656 ,0.456 ,0.992 -,0.3 ,0.808 ,0.692 ,0.84 ,0.544 -,0.072 ,0.652 ,0.524 ,0.884 ,0.168 -,0.208 ,0.216 ,0.948 ,0.896 ,0.92 -,0.964 ,0.784 ,0.812 ,0.708 ,0.936 -,0.508 ,0.488 ,0.156 ,0.94 ,0.088 -,0.508 ,0.72 ,0.636 ,0.552 ,0.016 -,0.464 ,0.348 ,0.576 ,0.904 ,0.248 -,0.324 ,0.516 ,0.988 ,0.616 ,0.716 -,0.664 ,0.576 ,0.336 ,0.792 ,0.824 -,0.896 ,0.804 ,0.524 ,0.332 ,0.804 -,0.94 ,0.424 ,0.964 ,0.644 ,0.604 -,0.4 ,0.984 ,0.38 ,0.696 ,0.248 -,0.244 ,0.772 ,0.836 ,0.048 ,0.696 -,0.724 ,0.576 ,0.6 ,0.348 ,0.88 -,0.776 ,0.376 ,0.644 ,0.648 ,0.08 -,0.424 ,0.912 ,0.964 ,0.224 ,0.984 -,0.476 ,0.928 ,0.64 ,0.944 ,0.512 -,0.644 ,0.596 ,0.388 ,0.28 ,0.124 -,0.212 ,0.388 ,0.416 ,0.884 ,0.964 -,0.996 ,0.428 ,0.832 ,0.464 ,0.88 -,0.984 ,0.256 ,0.664 ,0.344 ,0.496 -,0.192 ,0.124 ,0.392 ,0.268 ,0.4 -,0.944 ,0.816 ,0.648 ,0.252 ,0.16 -,0.24 ,0.716 ,0.272 ,0.136 ,0.832 -,0.212 ,0.548 ,0.776 ,0.328 ,0.492 -,0.952 ,0.62 ,0.688 ,0.26 ,0.084 -,0.264 ,0.856 ,0.912 ,0.796 ,0.78 -,0.276 ,0.692 ,0.628 ,0.26 ,0.592 -,0.66 ,0.66 ,0.912 ,0.84 ,0.244 -,0.66 ,0.892 ,0.332 ,0.092 ,0.584 -,0.804 ,0.408 ,0.036 ,0.22 ,0.02 -,0.648 ,0.52 ,0.212 ,0.34 ,0.4 -,0.38 ,0.156 ,0.464 ,0.32 ,0.944 -,0.84 ,0.98 ,0.676 ,0.396 ,0.86 -,0.884 ,0.272 ,0.712 ,0.444 ,0.24 -,0.296 ,0.956 ,0.436 ,0.096 ,0.448 -,0.796 ,0.084 ,0.872 ,0.368 ,0.828 -,0.656 ,0.192 ,0.984 ,0.668 ,0.452 -,0.992 ,0.904 ,0.572 ,0.768 ,0.42 -,0.444 ,0.42, 0 ,0.456 ,0.464 -,0.908 ,0.884 ,0.704 ,0.164 ,0.604 -,0.924 ,0.748 ,0.688 ,0.648 ,0.968 -,0.332 ,0.636 ,0.472 ,0.956 ,0.924 -,0.6 ,0.788 ,0.488 ,0.156 ,0.904 -,0.892 ,0.372 ,0.948 ,0.868 ,0.06 -,0.58 ,0.604 ,0.9 ,0.212 ,0.824 -,0.632 ,0.416 ,0.5 ,0.576 ,0.932 -,0.472 ,0.932 ,0.936 ,0.96 ,0.26 -,0.556 ,0.372 ,0.748 ,0.368 ,0.256 -,0.076 ,0.676 ,0.292 ,0.504 ,0.6 -,0.216 ,0.796 ,0.488 ,0.132 ,0.076 -,0.02 ,0.48 ,0.848 ,0.772 ,0.524 -,0.22 ,0.908 ,0.432 ,0.952 ,0.556 -,0.12 ,0.868 ,0.756 ,0.732 ,0.56 -,0.084 ,0.7 ,0.34 ,0.2 ,0.704 -,0.336 ,0.092 ,0.22 ,0.944 ,0.044 -,0.844 ,0.356 ,0.72 ,0.276 ,0.664 -,0.828 ,0.492 ,0.392 ,0.368 ,0.32 -,0.304 ,0.804 ,0.856 ,0.528 ,0.6 -,0.056 ,0.908 ,0.124 ,0.448 ,0.632 -,0.232 ,0.008 ,0.2 ,0.552 ,0.884 -,0.82 ,0.92 ,0.744 ,0.26 ,0.492 -,0.94 ,0.96 ,0.572 ,0.536 ,0.196 -,0.992 ,0.524 ,0.356 ,0.116 ,0.072 -,0.084 ,0.46 ,0.604 ,0.884 ,0.752 -,0.812 ,0.36 ,0.492 ,0.508 ,0.42 -,0.54 ,0.132 ,0.084 ,0.328 ,0.984 -,0.104 ,0.592 ,0.172 ,0.992 ,0.688 -,0.572 ,0.312 ,0.304 ,0.596 ,0.796 -,0.488 ,0.388 ,0.188 ,0.456 ,0.716 -,0.168 ,0.292 ,0.36 ,0.848 ,0.02 -,0.756 ,0.6 ,0.956 ,0.676 ,0.864 -,0.96 ,0.304 ,0.276 ,0.576 ,0.32 -,0.324 ,0.776 ,0.66 ,0.652 ,0.832 -,0.052 ,0.24 ,0.08 ,0.844 ,0.668 -,0.44 ,0.844 ,0.476 ,0.224 ,0.604 -,0.876 ,0.436 ,0.8 ,0.228 ,0.364 -,0.792 ,0.052 ,0.94 ,0.444 ,0.796 -,0.436 ,0.276 ,0.908 ,0.092 ,0.74 -,0.128 ,0.76 ,0.256 ,0.56 ,0.376 -,0.604 ,0.82 ,0.864 ,0.328 ,0.24 -,0.244 ,0.28 ,0.648 ,0.452 ,0.56 -,0.712 ,0.14 ,0.908 ,0.256 ,0.544 -,0.176 ,0.36 ,0.924 ,0.584 ,0.216 -,0.68 ,0.82 ,0.628 ,0.828 ,0.316 -,0.52 ,0.34 ,0.172 ,0.916 ,0.54 -,0.88 ,0.636 ,0.796 ,0.696 ,0.976 -,0.68 ,0.368 ,0.456 ,0.764 ,0.736 -,0.356 ,0.188 ,0.992 ,0.94 ,0.572 -,0.112 ,0.736 ,0.476 ,0.58 ,0.772 -,0.944 ,0.348 ,0.248 ,0.292 ,0.992 -,0.916 ,0.128 ,0.904 ,0.804 ,0.66 -,0.972 ,0.044 ,0.228 ,0.82 ,0.296 -,0.92 ,0.368 ,0.924 ,0.96 ,0.928 -,0.38 ,0.184 ,0.86 ,0.8 ,0.136 -,0.304 ,0.512 ,0.684 ,0.612 ,0.624 -,0.868 ,0.908 ,0.548 ,0.396 ,0.436 -,0.668 ,0.92 ,0.196 ,0.156 ,0.176 -,0.088 ,0.888 ,0.524 ,0.196 ,0.736 -,0.736 ,0.884 ,0.072 ,0.824 ,0.456 -,0.404 ,0.212 ,0.664 ,0.404 ,0.608 -,0.532 ,0.62 ,0.816 ,0.496 ,0.836 -,0.328 ,0.868 ,0.48 ,0.636 ,0.836 -,0.668 ,0.424 ,0.364 ,0.276 ,0.376 -,0.744 ,0.228 ,0.604 ,0.656 ,0.936 -,0.344 ,0.54 ,0.868 ,0.876 ,0.184 -,0.204 ,0.976 ,0.752 ,0.796 ,0.324 -,0.88 ,0.108 ,0.552 ,0.92 ,0.132 -,0.44 ,0.312 ,0.184 ,0.936 ,0.44 -,0.62 ,0.492 ,0.976 ,0.764 ,0.94 -,0.48 ,0.908 ,0.888 ,0.332 ,0.74 -,0.532 ,0.64 ,0.976 ,0.668 ,0.992 -,0.988 ,0.892 ,0.516 ,0.496 ,0.56 -,0.016 ,0.616 ,0.224 ,0.3 ,0.684 -,0.616 ,0.452 ,0.976 ,0.248 ,0.132 -,0.256 ,0.136 ,0.956 ,0.144 ,0.96 -,0.664 ,0.26 ,0.772 ,0.108 ,0.868 -,0.516 ,0.268 ,0.376 ,0.532 ,0.68 -,0.56 ,0.428 ,0.64 ,0.272 ,0.808 -,0.22 ,0.156 ,0.184 ,0.436 ,0.452 -,0.128 ,0.924 ,0.488 ,0.268 ,0.584 -,0.596 ,0.892 ,0.284 ,0.916 ,0.424 -,0.576 ,0.844 ,0.212 ,0.696 ,0.2 -,0.88 ,0.548 ,0.728 ,0.88 ,0.72 -,0.468 ,0.208 ,0.524 ,0.896 ,0.06 -,0.516 ,0.736 ,0.508 ,0.524 ,0.9 -,0.408 ,0.82 ,0.68 ,0.16 ,0.776 -,0.84 ,0.756 ,0.236 ,0.8 ,0.84 -,0.548 ,0.628 ,0.54 ,0.768 ,0.328 -,0.476 ,0.604 ,0.22 ,0.844 ,0.396 -,0.704 ,0.556 ,0.128 ,0.068 ,0.08 -,0.424 ,0.544 ,0.556 ,0.464 ,0.74 -,0.716 ,0.752 ,0.068 ,0.804 ,0.024 -,0.632 ,0.68 ,0.868 ,0.328 ,0.448 -,0.14 ,0.364 ,0.596 ,0.916 ,0.148 -,0.504 ,0.62 ,0.3 ,0.536 ,0.024 -,0.892 ,0.932 ,0.056 ,0.532 ,0.084 -,0.248 ,0.268 ,0.944 ,0.212 ,0.92 -,0.5 ,0.1 ,0.736 ,0.648 ,0.648 -,0.236 ,0.604 ,0.588 ,0.416 ,0.88 -,0.92 ,0.956 ,0.6 ,0.988 ,0.848 -,0.54 ,0.384 ,0.868 ,0.748 ,0.256 -,0.18 ,0.196 ,0.988 ,0.588 ,0.94 -,0.856 ,0.856 ,0.512 ,0.008 ,0.748 -,0.46 ,0.672 ,0.848 ,0.932 ,0.14 -,0.708 ,0.812 ,0.608 ,0.692 ,0.756 -,0.424 ,0.84 ,0.16 ,0.744 ,0.92 -,0.892 ,0.676 ,0.68 ,0.164 ,0.796 -,0.656 ,0.496 ,0.576 ,0.44 ,0.088 -,0.232 ,0.592 ,0.04 ,0.808 ,0.632 -,0.86 ,0.112 ,0.392 ,0.196 ,0.696 -,0.912 ,0.872 ,0.72 ,0.484 ,0.348 -,0.424 ,0.556 ,0.408 ,0.612 ,0.592 -,0.636 ,0.584 ,0.088 ,0.28 ,0.444 -,0.332 ,0.444 ,0.952 ,0.172 ,0.664 -,0.008 ,0.468 ,0.624 ,0.7 ,0.3 -,0.356 ,0.516 ,0.308 ,0.964 ,0.38 -,0.984 ,0.956 ,0.96 ,0.604 ,0.044 -,0.436 ,0.956 ,0.192 ,0.24 ,0.164 -,0.888 ,0.904 ,0.98 ,0.924 ,0.584 -,0.22 ,0.988 ,0.644 ,0.644 ,0.652 -,0.712 ,0.676 ,0.136 ,0.144 ,0.656 -,0.548 ,0.836 ,0.804 ,0.856 ,0.492 -,0.86 ,0.744 ,0.808 ,0.404 ,0.62 -,0.772 ,0.852 ,0.712 ,0.44 ,0.5 -,0.768 ,0.728 ,0.276 ,0.776 ,0.316 -,0.396 ,0.656 ,0.676 ,0.764 ,0.4 -,0.988 ,0.276 ,0.952 ,0.32 ,0.552 -,0.976 ,0.244 ,0.676 ,0.916 ,0.204 -,0.152 ,0.548 ,0.708 ,0.764 ,0.524 -,0.564 ,0.244 ,0.656 ,0.928 ,0.068 -,0.984 ,0.524 ,0.9 ,0.792 ,0.636 -,0.488 ,0.56 ,0.352 ,0.452 ,0.328 -,0.504 ,0.348 ,0.804 ,0.272 ,0.348 -,0.6 ,0.972 ,0.816 ,0.208 ,0.28 -,0.652 ,0.944 ,0.468 ,0.1 ,0.676 -,0.7 ,0.664 ,0.948 ,0.688 ,0.112 -,0.816 ,0.088 ,0.572 ,0.236 ,0.912 -,0.408 ,0.752 ,0.532 ,0.84 ,0.464 -,0.292 ,0.052 ,0.088 ,0.784 ,0.396 -,0.592 ,0.652 ,0.3 ,0.24 ,0.588 -,0.936 ,0.084 ,0.696 ,0.74 ,0.516 -,0.952 ,0.684 ,0.564 ,0.636 ,0.968 -,0.184 ,0.3 ,0.256 ,0.804 ,0.64 -,0.256 ,0.844 ,0.8 ,0.992 ,0.66 -,0.492 ,0.428 ,0.94 ,0.064 ,0.748 -,0.424 ,0.212 ,0.092 ,0.076 ,0.144 -,0.776 ,0.228 ,0.48 ,0.596 ,0.324 -,0.348 ,0.804 ,0.812 ,0.944 ,0.976 -,0.864 ,0.956 ,0.996 ,0.54 ,0.736 -,0.408 ,0.172 ,0.732 ,0.876 ,0.564 -,0.028 ,0.2 ,0.444 ,0.612 ,0.252 -,0.584 ,0.208 ,0.992 ,0.32 ,0.684 -,0.144 ,0.38 ,0.852 ,0.084 ,0.292 -,0.576 ,0.504 ,0.532 ,0.788 ,0.768 -,0.664 ,0.86 ,0.728 ,0.556 ,0.664 -,0.28 ,0.588 ,0.48 ,0.616 ,0.576 -,0.796 ,0.412 ,0.596 ,0.216 ,0.972 -,0.952 ,0.572 ,0.836 ,0.772 ,0.672 -,0.176 ,0.96 ,0.892 ,0.04 ,0.416 -,0.808 ,0.78 ,0.68 ,0.896 ,0.424 -,0.404 ,0.556 ,0.824 ,0.004 ,0.816 -,0.632 ,0.06 ,0.708 ,0.352 ,0.136 -,0.416 ,0.78 ,0.94 ,0.872 ,0.128 -,0.072 ,0.74 ,0.96 ,0.308 ,0.472 -,0.252 ,0.112 ,0.376 ,0.816 ,0.408 -,0.332 ,0.964 ,0.364 ,0.624 ,0.728 -,0.764 ,0.088 ,0.024 ,0.052 ,0.032 -,0.348 ,0.388 ,0.672 ,0.816 ,0.188 -,0.064 ,0.62 ,0.744 ,0.408 ,0.572 -,0.672 ,0.06 ,0.38 ,0.92 ,0.676 -,0.848 ,0.756 ,0.504 ,0.92 ,0.092 -,0.532 ,0.928 ,0.076 ,0.552 ,0.572 -,0.28 ,0.916 ,0.788 ,0.312 ,0.868 -,0.536 ,0.448 ,0.724 ,0.032 ,0.38 -,0.828 ,0.836 ,0.328 ,0.18 ,0.544 -,0.228 ,0.352 ,0.572 ,0.4 ,0.872 -,0.5 ,0.36 ,0.48 ,0.324 ,0.656 -,0.96 ,0.484 ,0.152 ,0.744 ,0.804 -,0.908 ,0.844 ,0.216 ,0.968 ,0.784 -,0.476 ,0.84 ,0.384 ,0.26 ,0.576 -,0.444 ,0.472 ,0.636 ,0.272 ,0.8 -,0.476 ,0.512 ,0.54 ,0.512 ,0.96 -,0.272 ,0.764 ,0.324 ,0.952 ,0.604 -,0.568 ,0.764 ,0.912 ,0.652 ,0.988 -,0.192 ,0.408 ,0.684 ,0.208 ,0.164 -,0.16 ,0.36 ,0.22 ,0.512 ,0.636 -,0.372 ,0.376 ,0.548 ,0.636 ,0.824 -,0.132 ,0.308 ,0.72 ,0.916 ,0.688 -,0.556 ,0.556 ,0.608 ,0.708 ,0.22 -,0.308 ,0.272 ,0.612 ,0.936 ,0.5 -,0.608 ,0.956 ,0.76 ,0.832 ,0.668 -,0.776 ,0.852 ,0.728 ,0.812 ,0.892 -,0.408 ,0.96 ,0.708 ,0.744 ,0.408 -,0.832 ,0.556 ,0.764 ,0.116 ,0.896 -,0.052 ,0.452 ,0.9 ,0.232 ,0.484 -,0.776 ,0.672 ,0.536 ,0.252 ,0.504 -,0.044 ,0.584 ,0.908 ,0.96 ,0.932 -,0.24 ,0.824 ,0.84 ,0.672 ,0.856 -,0.116 ,0.104 ,0.912 ,0.648 ,0.852 -,0.644 ,0.612 ,0.82 ,0.408 ,0.86 -,0.724 ,0.684 ,0.68 ,0.516 ,0.78 -,0.632 ,0.432 ,0.98 ,0.956 ,0.216 -,0.228 ,0.576 ,0.304 ,0.4 ,0.448 -,0.616 ,0.292 ,0.412 ,0.572 ,0.136 -,0.568 ,0.488 ,0.228 ,0.46 ,0.5 -,0.452 ,0.804 ,0.596 ,0.076 ,0.22 -,0.92 ,0.868 ,0.492 ,0.428 ,0.524 -,0.424 ,0.212 ,0.512 ,0.592 ,0.604 -,0.784 ,0.688 ,0.48 ,0.588 ,0.564 -,0.052 ,0.484 ,0.176 ,0.932 ,0.196 -,0.968 ,0.744 ,0.9 ,0.648 ,0.832 -,0.836 ,0.22 ,0.632 ,0.804 ,0.436 -,0.184 ,0.588 ,0.864 ,0.884 ,0.82 -,0.696 ,0.58 ,0.768 ,0.584 ,0.148 -,0.66 ,0.696 ,0.268 ,0.04 ,0.716 -,0.116 ,0.536 ,0.988 ,0.704 ,0.612 -,0.872 ,0.052 ,0.352 ,0.624 ,0.624 -,0.332 ,0.044 ,0.8 ,0.476 ,0.992 -,0.788 ,0.872 ,0.276 ,0.208 ,0.632 -,0.804 ,0.644 ,0.4 ,0.868 ,0.708 -,0.636 ,0.088 ,0.036 ,0.936 ,0.04 -,0.804 ,0.532 ,0.396 ,0.284 ,0.652 -,0.928 ,0.768 ,0.82 ,0.668 ,0.408 -,0.944 ,0.972 ,0.84 ,0.608 ,0.652 -,0.304 ,0.316 ,0.728 ,0.968 ,0.804 -,0.284 ,0.644 ,0.244 ,0.532 ,0.076 -,0.748 ,0.048 ,0.384 ,0.424 ,0.752 -,0.748 ,0.984 ,0.148 ,0.58 ,0.58 -,0.28 ,0.248 ,0.7 ,0.572 ,0.648 -,0.632 ,0.276 ,0.888 ,0.932 ,0.572 -,0.308 ,0.072 ,0.5 ,0.392 ,0.068 -,0.468 ,0.856 ,0.232 ,0.268,1 -,0.164 ,0.484 ,0.032 ,0.852 ,0.86 -,0.912 ,0.272 ,0.724 ,0.536 ,0.304 -,0.456 ,0.752 ,0.344 ,0.18 ,0.968 -,0.676 ,0.204 ,0.664 ,0.948 ,0.504 -,0.696 ,0.468 ,0.508 ,0.324 ,0.292 -,0.364 ,0.476 ,0.152 ,0.272 ,0.948 -,0.684 ,0.352 ,0.26 ,0.836 ,0.084 -,0.912 ,0.876 ,0.944 ,0.164 ,0.504 -,0.704 ,0.604 ,0.216 ,0.248 ,0.536 -,0.112 ,0.48 ,0.976 ,0.94 ,0.648 -,0.764 ,0.664 ,0.54 ,0.012 ,0.424 -,0.02 ,0.48 ,0.512 ,0.02 ,0.436 -,0.42 ,0.072 ,0.54 ,0.76 ,0.576 -,0.192 ,0.096 ,0.436 ,0.58 ,0.86 -,0.196 ,0.368 ,0.984 ,0.552 ,0.632 -,0.72 ,0.644 ,0.952 ,0.228 ,0.056 -,0.376 ,0.368 ,0.04 ,0.316 ,0.464) -cp=hd(p.table,alpha) -pv=NULL -if(!is.null(p.obs))w=optimize(hdpv,interval=c(.001,.999),dat=p.table,obs=p.obs)$minimum -list(crit.p.value=cp,adj.p.value=w) -} -p.crit.n60<-function(alpha, p.obs = NULL){ -p.table=c(0.24 ,0.656 ,0.648 ,0.364 ,0.856 -,0.052 ,0.476 ,0.304 ,0.216 ,0.72 -,0.476 ,0.908 ,0.912 ,0.216 ,0.212 -,0.476 ,0.488 ,0.868 ,0.072 ,0.088 -,0.504 ,0.26 ,0.192 ,0.244 ,0.512 -,0.432 ,0.32 ,0.236, 1 ,0.396 -,0.504 ,0.4 ,0.756 ,0.996 ,0.08 -,0.14 ,0.224 ,0.964 ,0.492 ,0.94 -,0.528 ,0.652 ,0.884 ,0.84 ,0.648 -,0.592 ,0.844 ,0.572 ,0.104 ,0.712 -,0.948 ,0.612 ,0.88 ,0.684 ,0.552 -,0.716 ,0.156 ,0.996 ,0.296 ,0.62 -,0.02 ,0.164 ,0.532 ,0.372 ,0.104 -,0.78 ,0.996 ,0.84 ,0.552 ,0.588 -,0.668 ,0.088 ,0.78 ,0.76 ,0.708 -,0.208 ,0.976 ,0.336 ,0.052 ,0.904 -,0.648 ,0.588 ,0.668 ,0.108 ,0.996 -,0.808 ,0.824 ,0.312 ,0.808 ,0.936 -,0.616 ,0.212 ,0.496 ,0.628 ,0.736 -,0.152 ,0.24 ,0.504 ,0.964 ,0.808 -,0.528 ,0.232 ,0.2 ,0.356 ,0.26 -,0.984 ,0.832 ,0.424 ,0.584 ,0.2 -,0.356 ,0.432 ,0.568 ,0.348 ,0.784 -,0.364 ,0.368 ,0.92 ,0.124 ,0.556 -,0.096 ,0.828 ,0.676 ,0.752 ,0.724 -,0.724 ,0.168 ,0.524 ,0.064 ,0.876 -,0.112 ,0.408 ,0.544 ,0.56 ,0.104 -,0.288 ,0.808 ,0.116 ,0.54 ,0.008 -,0.988 ,0.46 ,0.616 ,0.644 ,0.64 -,0.996 ,0.36 ,0.472 ,0.544 ,0.316 -,0.848 ,0.868 ,0.872 ,0.46 ,0.816 -,0.16 ,0.444 ,0.688 ,0.008 ,0.96 -,0.296 ,0.132 ,0.868 ,0.452 ,0.352 -,0.996 ,0.696 ,0.816 ,0.668 ,0.748 -,0.024 ,0.968 ,0.692 ,0.8 ,0.2 -,0.548 ,0.632 ,0.824 ,0.668 ,0.216 -,0.228 ,0.336 ,0.388 ,0.824 ,0.824 -,0.376 ,0.728 ,0.72 ,0.932 ,0.1 -,0.136 ,0.1 ,0.96 ,0.988 ,0.516 -,0.86 ,0.576 ,0.952 ,0.78 ,0.84 -,0.948 ,0.94 ,0.536 ,0.704 ,0.816 -,0.352 ,0.164 ,0.716 ,0.264 ,0.94 -,0.228 ,0.404 ,0.704 ,0.744 ,0.308 -,0.156 ,0.468 ,0.124 ,0.708 ,0.676 -,0.432 ,0.472 ,0.244 ,0.124 ,0.124 -,0.908 ,0.36 ,0.668 ,0.34 ,0.8 -,0.48 ,0.112 ,0.792 ,0.428 ,0.724 -,0.28 ,0.724 ,0.768 ,0.972 ,0.524 -,0.436 ,0.008 ,0.664 ,0.648 ,0.704 -,0.94 ,0.12 ,0.308 ,0.884 ,0.824 -,0.248 ,0.112 ,0.572 ,0.492 ,0.052 -,0.664 ,0.788 ,0.604 ,0.344 ,0.288 -,0.996 ,0.696 ,0.996 ,0.852 ,0.28 -,0.004 ,0.276 ,0.732 ,0.964 ,0.248 -,0.456 ,0.044 ,0.232 ,0.776 ,0.196 -,0.344 ,0.248 ,0.84 ,0.716 ,0.764 -,0.628 ,0.312 ,0.616 ,0.352 ,0.944 -,0.156 ,0.032 ,0.948 ,0.532 ,0.3 -,0.792 ,0.844 ,0.148 ,0.224 ,0.512 -,0.328 ,0.104 ,0.344 ,0.652 ,0.932 -,0.972 ,0.356 ,0.168 ,0.284 ,0.364 -,0.276 ,0.68 ,0.376 ,0.436 ,0.016 -,0.936 ,0.416 ,0.212 ,0.664 ,0.824, -1 ,0.9 ,0.652 ,0.836 ,0.2 -,0.036 ,0.072 ,0.88 ,0.748 ,0.668 -,0.964 ,0.6 ,0.772 ,0.288 ,0.968 -,0.484 ,0.928 ,0.436 ,0.588 ,0.976 -,0.364 ,0.508 ,0.064 ,0.784 ,0.884 -,0.54 ,0.08 ,0.252 ,0.768 ,0.156 -,0.872, 0 ,0.672 ,0.572 ,0.94 -,0.272 ,0.936 ,0.792 ,0.824 ,0.092 -,0.884 ,0.492 ,0.336 ,0.724 ,0.64 -,0.124 ,0.896 ,0.308 ,0.224 ,0.64 -,0.932 ,0.712 ,0.1 ,0.884 ,0.76 -,0.808 ,0.1 ,0.384 ,0.416 ,0.828 -,0.992 ,0.6 ,0.288 ,0.02 ,0.392 -,0.34 ,0.9 ,0.444 ,0.892 ,0.76 -,0.964 ,0.88 ,0.428 ,0.612 ,0.728 -,0.104 ,0.268 ,0.488 ,0.348 ,0.488 -,0.208 ,0.52 ,0.96 ,0.572 ,0.18 -,0.976 ,0.812 ,0.668 ,0.064 ,0.768 -,0.98 ,0.484 ,0.84 ,0.876 ,0.132 -,0.56 ,0.392 ,0.536 ,0.48 ,0.096 -,0.608 ,0.556 ,0.196 ,0.884 ,0.744 -,0.944 ,0.432 ,0.104 ,0.444 ,0.324 -,0.896 ,0.472 ,0.716 ,0.792 ,0.984 -,0.528 ,0.472 ,0.716 ,0.368 ,0.644 -,0.432 ,0.288 ,0.004 ,0.716 ,0.688 -,0.684 ,0.94 ,0.772 ,0.856 ,0.16 -,0.784 ,0.244 ,0.412 ,0.384 ,0.264 -,0.028 ,0.128 ,0.14 ,0.328 ,0.896 -,0.224 ,0.676 ,0.32 ,0.668 ,0.156 -,0.852 ,0.648 ,0.232 ,0.6 ,0.804 -,0.692 ,0.528 ,0.728 ,0.588 ,0.64 -,0.252 ,0.304 ,0.8 ,0.052 ,0.492 -,0.76 ,0.4 ,0.708 ,0.628 ,0.888 -,0.992 ,0.22 ,0.276 ,0.64 ,0.644 -,0.232 ,0.476 ,0.584 ,0.72 ,0.5 -,0.688 ,0.08 ,0.208 ,0.488 ,0.836 -,0.708, 1 ,0.948 ,0.364 ,0.424 -,0.636 ,0.94 ,0.308 ,0.172 ,0.352 -,0.536 ,0.112 ,0.968 ,0.832 ,0.192 -,0.36 ,0.888 ,0.552 ,0.784 ,0.376 -,0.956 ,0.2 ,0.504 ,0.676 ,0.224 -,0.66 ,0.172 ,0.256 ,0.664 ,0.324 -,0.648 ,0.936 ,0.676 ,0.184 ,0.552 -,0.648 ,0.812 ,0.304 ,0.464 ,0.564 -,0.264 ,0.14 ,0.088 ,0.52 ,0.516 -,0.404 ,0.464 ,0.852 ,0.2 ,0.66 -,0.668 ,0.228 ,0.024 ,0.5 ,0.436 -,0.712 ,0.776 ,0.264 ,0.576 ,0.956 -,0.912 ,0.892 ,0.864 ,0.268 ,0.56 -,0.528 ,0.368 ,0.036 ,0.536 ,0.412 -,0.784 ,0.776 ,0.484 ,0.7 ,0.016 -,0.732 ,0.672 ,0.544 ,0.312 ,0.128 -,0.292 ,0.832 ,0.34 ,0.74 ,0.916 -,0.764 ,0.34 ,0.256 ,0.164 ,0.704 -,0.552 ,0.3 ,0.032 ,0.736 ,0.62 -,0.192 ,0.532 ,0.38 ,0.552 ,0.264 -,0.504 ,0.148 ,0.856 ,0.16 ,0.252 -,0.236 ,0.048 ,0.144 ,0.08 ,0.912 -,0.08 ,0.828 ,0.072 ,0.708 ,0.5 -,0.324 ,0.196 ,0.48 ,0.304 ,0.28 -,0.764 ,0.516 ,0.42 ,0.948 ,0.352 -,0.408 ,0.248 ,0.212 ,0.596 ,0.252 -,0.54 ,0.008 ,0.984 ,0.512 ,0.46 -,0.716 ,0.396 ,0.288 ,0.416 ,0.544 -,0.164 ,0.676 ,0.636 ,0.68 ,0.52 -,0.108 ,0.484 ,0.936 ,0.892 ,0.292 -,0.988 ,0.808 ,0.432 ,0.712 ,0.96 -,0.296 ,0.956 ,0.4 ,0.576 ,0.52 -,0.328 ,0.708 ,0.272 ,0.58 ,0.832 -,0.476 ,0.544 ,0.676 ,0.636 ,0.096 -,0.712 ,0.076 ,0.98 ,0.584 ,0.816 -,0.524 ,0.08 ,0.244 ,0.392 ,0.784 -,0.588 ,0.256 ,0.372 ,0.212 ,0.512 -,0.936 ,0.176 ,0.636 ,0.088 ,0.62 -,0.928 ,0.608 ,0.564 ,0.54 ,0.152 -,0.736 ,0.732 ,0.1 ,0.412 ,0.596 -,0.58 ,0.044 ,0.792 ,0.876 ,0.464 -,0.88 ,0.732 ,0.112 ,0.304 ,0.88 -,0.748 ,0.944 ,0.64 ,0.74 ,0.26 -,0.184 ,0.532 ,0.256 ,0.172 ,0.808 -,0.828 ,0.44 ,0.976 ,0.356 ,0.316, -1 ,0.736, 1 ,0.116 ,0.292 -,0.908 ,0.86 ,0.344 ,0.236 ,0.476 -,0.444 ,0.64 ,0.668 ,0.98 ,0.416 -,0.344 ,0.916 ,0.84 ,0.5 ,0.548 -,0.92 ,0.68 ,0.82 ,0.956 ,0.368 -,0.764 ,0.536 ,0.192 ,0.272 ,0.892 -,0.148 ,0.2 ,0.644 ,0.252 ,0.468 -,0.304 ,0.248 ,0.96 ,0.768 ,0.632 -,0.268 ,0.588 ,0.54 ,0.444 ,0.548 -,0.856 ,0.732 ,0.884 ,0.672 ,0.324 -,0.62 ,0.524 ,0.568 ,0.08 ,0.992 -,0.744 ,0.484 ,0.628 ,0.644 ,0.612, -0 ,0.592 ,0.948 ,0.128 ,0.892 -,0.972 ,0.108 ,0.68 ,0.72 ,0.876 -,0.328 ,0.236 ,0.612 ,0.716 ,0.912 -,0.168 ,0.612 ,0.12 ,0.676 ,0.58 -,0.504 ,0.34 ,0.964 ,0.256 ,0.668 -,0.584 ,0.388 ,0.016 ,0.796 ,0.68 -,0.628 ,0.476 ,0.776 ,0.696 ,0.348 -,0.656 ,0.036 ,0.036 ,0.596 ,0.824 -,0.464 ,0.732 ,0.58 ,0.364 ,0.38 -,0.632 ,0.488 ,0.108 ,0.832 ,0.856 -,0.448 ,0.272 ,0.932 ,0.388 ,0.788 -,0.476 ,0.576 ,0.776 ,0.672 ,0.312 -,0.7 ,0.86 ,0.784 ,0.988 ,0.328 -,0.792 ,0.196 ,0.236 ,0.344 ,0.396 -,0.596 ,0.98 ,0.972 ,0.492 ,0.624 -,0.68 ,0.744 ,0.996 ,0.548 ,0.976 -,0.828 ,0.764 ,0.784 ,0.408 ,0.768 -,0.452 ,0.232 ,0.572 ,0.112 ,0.468 -,0.96 ,0.772 ,0.668 ,0.928 ,0.788 -,0.832 ,0.128 ,0.104 ,0.652 ,0.972 -,0.888 ,0.852 ,0.572 ,0.484 ,0.272 -,0.44 ,0.948 ,0.352 ,0.684 ,0.932 -,0.46 ,0.576 ,0.044 ,0.456 ,0.972 -,0.904 ,0.784 ,0.048 ,0.312 ,0.352 -,0.844 ,0.8 ,0.616 ,0.676 ,0.604 -,0.836 ,0.936 ,0.732 ,0.728 ,0.372 -,0.34 ,0.344 ,0.988 ,0.312 ,0.688 -,0.04 ,0.272 ,0.632 ,0.576 ,0.244 -,0.188 ,0.46 ,0.78 ,0.648 ,0.248 -,0.308 ,0.02 ,0.712 ,0.572 ,0.392 -,0.884 ,0.64 ,0.048 ,0.34 ,0.724 -,0.832 ,0.932 ,0.664 ,0.336 ,0.8 -,0.52 ,0.352 ,0.844 ,0.084 ,0.424 -,0.548 ,0.692 ,0.188 ,0.364 ,0.872 -,0.136 ,0.084 ,0.952 ,0.652 ,0.592 -,0.028 ,0.836 ,0.804 ,0.536 ,0.984 -,0.932 ,0.984 ,0.628 ,0.84 ,0.8 -,0.496 ,0.196 ,0.664 ,0.608 ,0.416 -,0.032 ,0.448 ,0.348 ,0.984 ,0.912 -,0.448 ,0.856 ,0.588 ,0.896 ,0.776 -,0.092 ,0.728 ,0.928 ,0.496 ,0.432 -,0.056 ,0.468 ,0.364 ,0.488 ,0.224 -,0.788 ,0.084 ,0.096 ,0.788 ,0.62 -,0.22 ,0.804 ,0.18 ,0.532 ,0.368 -,0.496 ,0.476 ,0.904 ,0.156 ,0.744 -,0.344 ,0.396 ,0.152 ,0.644 ,0.5 -,0.888 ,0.276 ,0.756 ,0.604 ,0.76 -,0.92 ,0.412 ,0.872 ,0.536 ,0.612 -,0.54 ,0.216 ,0.668 ,0.6 ,0.148 -,0.78 ,0.672 ,0.472 ,0.816 ,0.844 -,0.964 ,0.42 ,0.824 ,0.78 ,0.296 -,0.956 ,0.104 ,0.704 ,0.288 ,0.92 -,0.984 ,0.228 ,0.528 ,0.804 ,0.5 -,0.668 ,0.356 ,0.86 ,0.536 ,0.412 -,0.028 ,0.04 ,0.284 ,0.536 ,0.348 -,0.524 ,0.796 ,0.872 ,0.912 ,0.216 -,0.496 ,0.912 ,0.684 ,0.18 ,0.14 -,0.132 ,0.48 ,0.64 ,0.524 ,0.992 -,0.416 ,0.764 ,0.484 ,0.848 ,0.788 -,0.764 ,0.588 ,0.284 ,0.7 ,0.056 -,0.76 ,0.408 ,0.664 ,0.744 ,0.104 -,0.86 ,0.96 ,0.66 ,0.816 ,0.256 -,0.892 ,0.62 ,0.692 ,0.832 ,0.592 -,0.052 ,0.84 ,0.96 ,0.888 ,0.828 -,0.244 ,0.388 ,0.036 ,0.188 ,0.34 -,0.94 ,0.304 ,0.472 ,0.436 ,0.728 -,0.636 ,0.796 ,0.836 ,0.748 ,0.328 -,0.452 ,0.248 ,0.352 ,0.212 ,0.892 -,0.8 ,0.892 ,0.816 ,0.708 ,0.356 -,0.344 ,0.5 ,0.232 ,0.548 ,0.744 -,0.56 ,0.28 ,0.964 ,0.284 ,0.352 -,0.268 ,0.908 ,0.924 ,0.664 ,0.788 -,0.56 ,0.616 ,0.748 ,0.208 ,0.476 -,0.632 ,0.88 ,0.364 ,0.192 ,0.824 -,0.368 ,0.188 ,0.86 ,0.872 ,0.196 -,0.86 ,0.204 ,0.616 ,0.572 ,0.56 -,0.44 ,0.972 ,0.952 ,0.812 ,0.268 -,0.14 ,0.276 ,0.736 ,0.6 ,0.312 -,0.404 ,0.636 ,0.376 ,0.064 ,0.416 -,0.4 ,0.248 ,0.904 ,0.412 ,0.748 -,0.316 ,0.064 ,0.524 ,0.632 ,0.588 -,0.536 ,0.656 ,0.768 ,0.68 ,0.14 -,0.448 ,0.568 ,0.708 ,0.156 ,0.628 -,0.596 ,0.84 ,0.788 ,0.052 ,0.14 -,0.724 ,0.856 ,0.544 ,0.616 ,0.544 -,0.496 ,0.268 ,0.776 ,0.316 ,0.152 -,0.72 ,0.684 ,0.1 ,0.84 ,0.04 -,0.788 ,0.948 ,0.868 ,0.196 ,0.508 -,0.212 ,0.412 ,0.872 ,0.652 ,0.072 -,0.412 ,0.988 ,0.86 ,0.912 ,0.156 -,0.432 ,0.54 ,0.492 ,0.624 ,0.508 -,0.508 ,0.992 ,0.852 ,0.788 ,0.244 -,0.632 ,0.824 ,0.112 ,0.036 ,0.184 -,0.668 ,0.756 ,0.884 ,0.788 ,0.52 -,0.44 ,0.084 ,0.932 ,0.752 ,0.6 -,0.656 ,0.84 ,0.832 ,0.6 ,0.664 -,0.96 ,0.664 ,0.984 ,0.652 ,0.832 -,0.928 ,0.592 ,0.44 ,0.304 ,0.86 -,0.04 ,0.724 ,0.636 ,0.188 ,0.96 -,0.9 ,0.564 ,0.28 ,0.34 ,0.492 -,0.456 ,0.988 ,0.328 ,0.612 ,0.76 -,0.964 ,0.736 ,0.744 ,0.64 ,0.272 -,0.436 ,0.064 ,0.688 ,0.416 ,0.956 -,0.824 ,0.14 ,0.648 ,0.492 ,0.072 -,0.244 ,0.436 ,0.728 ,0.48 ,0.188 -,0.168 ,0.432 ,0.46 ,0.044 ,0.824 -,0.924 ,0.336 ,0.328 ,0.224 ,0.96 -,0.552 ,0.948 ,0.712 ,0.96 ,0.396 -,0.716 ,0.16 ,0.176 ,0.988 ,0.62 -,0.892 ,0.18 ,0.576 ,0.264 ,0.924 -,0.16 ,0.356 ,0.724 ,0.772 ,0.532 -,0.944 ,0.744 ,0.268 ,0.184 ,0.972 -,0.968 ,0.044 ,0.732 ,0.732 ,0.616 -,0.716 ,0.804 ,0.86 ,0.344 ,0.704 -,0.932 ,0.568 ,0.356 ,0.944 ,0.232 -,0.452 ,0.212 ,0.692 ,0.88 ,0.86 -,0.4 ,0.904 ,0.008 ,0.528 ,0.576 -,0.86 ,0.696 ,0.116 ,0.444 ,0.472 -,0.224 ,0.372 ,0.248 ,0.68 ,0.104 -,0.476 ,0.228 ,0.532 ,0.24 ,0.648 -,0.828 ,0.416 ,0.08 ,0.48 ,0.76 -,0.3 ,0.556 ,0.628 ,0.396 ,0.864 -,0.272 ,0.564 ,0.984 ,0.312 ,0.968 -,0.448 ,0.044 ,0.664 ,0.408 ,0.732 -,0.464 ,0.532 ,0.76 ,0.712 ,0.132 -,0.324 ,0.936 ,0.872 ,0.768 ,0.432 -,0.848 ,0.464 ,0.72 ,0.496 ,0.464 -,0.752 ,0.808 ,0.372 ,0.204 ,0.604 -,0.432 ,0.128 ,0.268 ,0.336 ,0.728 -,0.824 ,0.212 ,0.704 ,0.172 ,0.408 -,0.9 ,0.924 ,0.448 ,0.912 ,0.688 -,0.748 ,0.672 ,0.044 ,0.704 ,0.568 -,0.356 ,0.116 ,0.94 ,0.688 ,0.948 -,0.776 ,0.664 ,0.732 ,0.108 ,0.72 -,0.24 ,0.964 ,0.42 ,0.412 ,0.764 -,0.104 ,0.868 ,0.308 ,0.62 ,0.608 -,0.404 ,0.6 ,0.664 ,0.152 ,0.432 -,0.544 ,0.26 ,0.604 ,0.584 ,0.64 -,0.404 ,0.08 ,0.244 ,0.452 ,0.78 -,0.7 ,0.896 ,0.66, 0 ,0.5 -,0.368 ,0.468 ,0.432 ,0.76 ,0.68 -,0.528 ,0.548 ,0.076 ,0.448 ,0.288 -,0.592 ,0.64 ,0.668 ,0.548 ,0.32 -,0.852 ,0.916 ,0.9 ,0.696 ,0.6 -,0.968 ,0.22 ,0.992 ,0.456 ,0.788 -,0.168 ,0.988 ,0.896 ,0.268 ,0.552 -,0.596 ,0.54 ,0.384 ,0.596 ,0.896 -,0.896 ,0.14 ,0.588 ,0.52 ,0.508 -,0.784 ,0.892 ,0.548 ,0.652 ,0.34 -,0.94 ,0.78 ,0.76 ,0.8 ,0.22 -,0.78 ,0.428 ,0.504 ,0.592 ,0.084 -,0.928 ,0.324 ,0.664 ,0.732 ,0.784 -,0.98 ,0.38 ,0.812 ,0.236 ,0.092 -,0.156 ,0.712 ,0.424 ,0.776 ,0.612 -,0.156 ,0.544 ,0.332 ,0.292 ,0.644 -,0.804 ,0.42 ,0.368 ,0.004 ,0.74 -,0.52 ,0.472 ,0.06 ,0.664 ,0.572 -,0.684 ,0.592 ,0.476 ,0.116 ,0.296 -,0.564 ,0.24 ,0.556 ,0.488 ,0.588 -,0.168 ,0.324 ,0.408 ,0.284 ,0.472 -,0.56 ,0.752 ,0.76 ,0.992 ,0.16 -,0.42 ,0.564 ,0.984 ,0.82 ,0.72 -,0.356 ,0.328 ,0.96 ,0.356 ,0.644 -,0.268 ,0.544 ,0.104 ,0.84 ,0.972 -,0.556 ,0.248 ,0.04 ,0.372 ,0.592 -,0.588 ,0.468 ,0.968 ,0.44 ,0.64 -,0.216 ,0.792 ,0.476 ,0.724 ,0.068 -,0.472 ,0.992 ,0.484 ,0.888 ,0.908 -,0.376 ,0.84 ,0.468 ,0.748 ,0.356 -,0.712 ,0.628 ,0.912 ,0.496 ,0.648 -,0.124 ,0.396 ,0.508 ,0.312 ,0.128 -,0.788 ,0.92 ,0.468 ,0.372 ,0.216 -,0.54 ,0.84 ,0.608 ,0.464 ,0.744 -,0.336 ,0.184 ,0.496 ,0.44 ,0.444 -,0.136 ,0.504 ,0.568 ,0.852 ,0.804 -,0.376 ,0.708 ,0.676 ,0.476 ,0.708 -,0.06 ,0.98 ,0.436 ,0.796 ,0.448 -,0.46 ,0.452 ,0.144 ,0.504 ,0.592 -,0.848 ,0.628 ,0.5 ,0.784 ,0.492 -,0.444 ,0.196 ,0.876 ,0.832 ,0.636 -,0.24 ,0.908 ,0.484 ,0.544 ,0.808 -,0.256 ,0.664 ,0.272 ,0.716 ,0.196 -,0.272 ,0.484 ,0.94 ,0.168 ,0.956 -,0.856 ,0.82, 0 ,0.868 ,0.796 -,0.44 ,0.656 ,0.82 ,0.208 ,0.924 -,0.352 ,0.832 ,0.844 ,0.324 ,0.216 -,0.832 ,0.348 ,0.904 ,0.244 ,0.324 -,0.816 ,0.64 ,0.892 ,0.116 ,0.392 -,0.472 ,0.772 ,0.464 ,0.556 ,0.892 -,0.232 ,0.224 ,0.788 ,0.5 ,0.04 -,0.532 ,0.284 ,0.62 ,0.464 ,0.924 -,0.804 ,0.252 ,0.224 ,0.38 ,0.244 -,0.076 ,0.424 ,0.988 ,0.78 ,0.324 -,0.076 ,0.496 ,0.844 ,0.496 ,0.288 -,0.556 ,0.696 ,0.76 ,0.352 ,0.952 -,0.26 ,0.752 ,0.084 ,0.08 ,0.324 -,0.776 ,0.632 ,0.712 ,0.868 ,0.12 -,0.808 ,0.76 ,0.444 ,0.664 ,0.16 -,0.308 ,0.912 ,0.16 ,0.04 ,0.692 -,0.336 ,0.672 ,0.664 ,0.556 ,0.876 -,0.172 ,0.52 ,0.188 ,0.904 ,0.552 -,0.32 ,0.292 ,0.216 ,0.58 ,0.988 -,0.724 ,0.704 ,0.212 ,0.74 ,0.348 -,0.912 ,0.592 ,0.108 ,0.332 ,0.536 -,0.22 ,0.452 ,0.124 ,0.98 ,0.364 -,0.908 ,0.272 ,0.564 ,0.556 ,0.668 -,0.432 ,0.928 ,0.596 ,0.992 ,0.456 -,0.832 ,0.624 ,0.832 ,0.064 ,0.52 -,0.096 ,0.492 ,0.62 ,0.416 ,0.448 -,0.928 ,0.68 ,0.976 ,0.192 ,0.728 -,0.496 ,0.756 ,0.372 ,0.18 ,0.196 -,0.808 ,0.816 ,0.996 ,0.352 ,0.648 -,0.48 ,0.172 ,0.94 ,0.864 ,0.844 -,0.228 ,0.4 ,0.12 ,0.12 ,0.344 -,0.952 ,0.632 ,0.376 ,0.264 ,0.88 -,0.252 ,0.332 ,0.56 ,0.756 ,0.468 -,0.144 ,0.26 ,0.316 ,0.528 ,0.224 -,0.512 ,0.568 ,0.724 ,0.912 ,0.384 -,0.616 ,0.304 ,0.652 ,0.2 ,0.996 -,0.292 ,0.36 ,0.788 ,0.612 ,0.768 -,0.748 ,0.624 ,0.664 ,0.696 ,0.792 -,0.64 ,0.176 ,0.78 ,0.8 ,0.6 -,0.108 ,0.568 ,0.168 ,0.92 ,0.044 -,0.872 ,0.848 ,0.296 ,0.9 ,0.648 -,0.544 ,0.124 ,0.66 ,0.664 ,0.28 -,0.164 ,0.564 ,0.768 ,0.552 ,0.852 -,0.508 ,0.652 ,0.8 ,0.532 ,0.596 -,0.204 ,0.036 ,0.22 ,0.076 ,0.972 -,0.684 ,0.148 ,0.248 ,0.24 ,0.948 -,0.356 ,0.06 ,0.684 ,0.244 ,0.516 -,0.192 ,0.912 ,0.388 ,0.656 ,0.852 -,0.644 ,0.704 ,0.976 ,0.9 ,0.664 -,0.98 ,0.744 ,0.156 ,0.676 ,0.78 -,0.936 ,0.78 ,0.284 ,0.3 ,0.904 -,0.38 ,0.324 ,0.524 ,0.228 ,0.7 -,0.264 ,0.868 ,0.62 ,0.416 ,0.356 -,0.772 ,0.464 ,0.92 ,0.9 ,0.148 -,0.204 ,0.364 ,0.956 ,0.888 ,0.536 -,0.196 ,0.048 ,0.232 ,0.872 ,0.496 -,0.524 ,0.576 ,0.7 ,0.368 ,0.248 -,0.532 ,0.408 ,0.372 ,0.492 ,0.432 -,0.508 ,0.468 ,0.576 ,0.704 ,0.84 -,0.472 ,0.08 ,0.728 ,0.548 ,0.336 -,0.572 ,0.564 ,0.032 ,0.352 ,0.84) -cp=hd(p.table,alpha) -pv=NULL -if(!is.null(p.obs))w=optimize(hdpv,interval=c(.001,.999),dat=p.table,obs=p.obs)$minimum -list(crit.p.value=cp,adj.p.value=w) -} -p.crit.n80<-function(alpha=.05,p.obs=NULL){ -p.table=c( -0.46 ,0.36 ,0.66 ,0.56 ,0.704 -,0.848 ,0.008 ,0.232 ,0.072 ,0.784 -,0.944 ,0.096, 0 ,0.252 ,0.464 -,0.132, 1 ,0.116 ,0.288 ,0.236 -,0.512 ,0.056 ,0.68 ,0.356 ,0.164 -,0.36 ,0.444 ,0.448 ,0.656 ,0.464 -,0.616 ,0.296 ,0.7 ,0.34 ,0.152 -,0.248 ,0.776 ,0.516 ,0.084 ,0.908 -,0.084 ,0.268 ,0.048 ,0.612 ,0.876 -,0.752 ,0.108 ,0.916 ,0.756 ,0.424 -,0.772 ,0.044 ,0.788 ,0.936 ,0.48 -,0.824 ,0.784 ,0.944 ,0.5 ,0.236 -,0.564 ,0.956 ,0.1 ,0.536 ,0.772 -,0.82 ,0.956 ,0.556 ,0.76 ,0.78 -,0.144 ,0.512 ,0.964 ,0.928 ,0.04 -,0.86 ,0.364 ,0.98 ,0.252 ,0.548 -,0.252 ,0.264 ,0.96 ,0.46 ,0.744 -,0.932 ,0.58 ,0.448 ,0.708 ,0.928 -,0.976 ,0.288 ,0.224 ,0.436 ,0.84 -,0.056 ,0.68 ,0.04 ,0.848 ,0.46 -,0.104 ,0.5 ,0.736 ,0.808 ,0.436 -,0.692 ,0.944 ,0.552 ,0.024 ,0.36 -,0.668 ,0.764 ,0.952 ,0.62 ,0.072 -,0.972 ,0.816 ,0.392 ,0.964 ,0.356 -,0.62 ,0.272 ,0.416 ,0.68 ,0.116 -,0.632 ,0.904 ,0.36 ,0.212 ,0.632 -,0.664 ,0.576 ,0.56 ,0.516 ,0.876 -,0.82 ,0.736 ,0.044 ,0.648 ,0.36 -,0.328 ,0.42 ,0.708 ,0.868 ,0.356 -,0.076 ,0.856 ,0.828 ,0.124 ,0.72 -,0.4 ,0.5 ,0.028 ,0.64 ,0.936 -,0.492 ,0.96 ,0.28 ,0.688 ,0.488 -,0.684 ,0.24 ,0.828 ,0.624 ,0.928 -,0.492 ,0.696 ,0.2 ,0.424 ,0.868 -,0.22 ,0.532 ,0.204 ,0.376 ,0.256 -,0.612 ,0.64 ,0.72 ,0.76 ,0.216 -,0.468 ,0.756 ,0.576 ,0.856 ,0.132 -,0.916 ,0.248 ,0.364 ,0.828 ,0.896 -,0.536 ,0.792 ,0.6 ,0.72 ,0.86 -,0.592 ,0.056 ,0.628 ,0.016 ,0.688 -,0.656 ,0.484 ,0.26 ,0.66 ,0.304 -,0.476 ,0.648 ,0.848 ,0.68 ,0.832 -,0.92 ,0.568, 1 ,0.056 ,0.236 -,0.648 ,0.42 ,0.284 ,0.708 ,0.296 -,0.944 ,0.952 ,0.336 ,0.48 ,0.356 -,0.86 ,0.496 ,0.976 ,0.692 ,0.624 -,0.1 ,0.568 ,0.236 ,0.88 ,0.36 -,0.864 ,0.124 ,0.94 ,0.884 ,0.512 -,0.22 ,0.896 ,0.3 ,0.684 ,0.7 -,0.316 ,0.696 ,0.66 ,0.864 ,0.548 -,0.884 ,0.656 ,0.204 ,0.88 ,0.936 -,0.264 ,0.604 ,0.34 ,0.832 ,0.728 -,0.644 ,0.924 ,0.524 ,0.808 ,0.612 -,0.36 ,0.936 ,0.884 ,0.904 ,0.748 -,0.6 ,0.648 ,0.16 ,0.8 ,0.312 -,0.42 ,0.544 ,0.744 ,0.292 ,0.5 -,0.028 ,0.804 ,0.9 ,0.648 ,0.984 -,0.432 ,0.844 ,0.936 ,0.796 ,0.948 -,0.608 ,0.976 ,0.552 ,0.94 ,0.424 -,0.848 ,0.916 ,0.728 ,0.764 ,0.604 -,0.508 ,0.74 ,0.468 ,0.268 ,0.748 -,0.072 ,0.468 ,0.82 ,0.24 ,0.596 -,0.18 ,0.188 ,0.612 ,0.152 ,0.996 -,0.96 ,0.332 ,0.72 ,0.44 ,0.364 -,0.704 ,0.612 ,0.248 ,0.72 ,0.568 -,0.956 ,0.524 ,0.352 ,0.708 ,0.368 -,0.924 ,0.384 ,0.476 ,0.912 ,0.736 -,0.368 ,0.412 ,0.232 ,0.348 ,0.016 -,0.568 ,0.34 ,0.608 ,0.356 ,0.772 -,0.944 ,0.336 ,0.504 ,0.908 ,0.812 -,0.292 ,0.904 ,0.16 ,0.076 ,0.928 -,0.912 ,0.12 ,0.28 ,0.156 ,0.248 -,0.988 ,0.44 ,0.764 ,0.088 ,0.256 -,0.208 ,0.08 ,0.288 ,0.172 ,0.428 -,0.428 ,0.276 ,0.084 ,0.344 ,0.132 -,0.492 ,0.728 ,0.26 ,0.956 ,0.56 -,0.344 ,0.176 ,0.864 ,0.54 ,0.24 -,0.724 ,0.384 ,0.916 ,0.956 ,0.692 -,0.88 ,0.66 ,0.372 ,0.128 ,0.568 -,0.636 ,0.28 ,0.288 ,0.888 ,0.872 -,0.42 ,0.356 ,0.604 ,0.72 ,0.852 -,0.408 ,0.976 ,0.52 ,0.556 ,0.9 -,0.364 ,0.716 ,0.588 ,0.72 ,0.312 -,0.224 ,0.26 ,0.116 ,0.952 ,0.404 -,0.952 ,0.948 ,0.22 ,0.676 ,0.58 -,0.724 ,0.144 ,0.084 ,0.396 ,0.664 -,0.16 ,0.412 ,0.796 ,0.476 ,0.284 -,0.8 ,0.348 ,0.736 ,0.26 ,0.672 -,0.372 ,0.904 ,0.768 ,0.82 ,0.736 -,0.548 ,0.788 ,0.068 ,0.008 ,0.548 -,0.304, 1 ,0.2 ,0.12 ,0.168 -,0.4 ,0.504 ,0.68 ,0.96 ,0.924 -,0.884 ,0.348 ,0.044 ,0.236 ,0.416 -,0.32 ,0.612 ,0.512 ,0.34 ,0.604 -,0.868 ,0.412 ,0.376 ,0.376 ,0.88 -,0.864 ,0.928 ,0.364 ,0.42 ,0.048 -,0.116 ,0.66 ,0.916 ,0.344 ,0.596 -,0.768 ,0.84 ,0.964 ,0.92 ,0.948 -,0.54 ,0.828 ,0.44 ,0.932 ,0.972 -,0.244 ,0.948 ,0.1 ,0.228 ,0.88 -,0.808 ,0.404 ,0.016 ,0.996 ,0.236 -,0.88 ,0.076 ,0.156 ,0.172 ,0.692 -,0.312 ,0.248 ,0.968 ,0.264 ,0.088 -,0.296 ,0.824 ,0.444 ,0.24 ,0.996 -,0.42 ,0.744 ,0.5 ,0.872 ,0.556 -,0.68 ,0.172 ,0.216 ,0.688 ,0.94 -,0.136 ,0.78 ,0.408 ,0.768 ,0.348 -,0.568 ,0.324 ,0.116 ,0.968 ,0.132 -,0.528 ,0.92 ,0.98 ,0.308 ,0.528 -,0.112 ,0.056 ,0.1 ,0.616 ,0.636 -,0.628 ,0.288 ,0.576 ,0.296 ,0.992 -,0.048 ,0.088 ,0.664, 1 ,0.044 -,0.796 ,0.284 ,0.02 ,0.692 ,0.488 -,0.524 ,0.344 ,0.472 ,0.796 ,0.244 -,0.112 ,0.9 ,0.012 ,0.328 ,0.508 -,0.664 ,0.892 ,0.404 ,0.792 ,0.744 -,0.752 ,0.864 ,0.448 ,0.756 ,0.252 -,0.1 ,0.788 ,0.948 ,0.448 ,0.964 -,0.416 ,0.5 ,0.236 ,0.828 ,0.344 -,0.964 ,0.552 ,0.392 ,0.948 ,0.864 -,0.908 ,0.12, 0 ,0.14 ,0.516 -,0.856 ,0.476 ,0.828 ,0.232 ,0.636 -,0.612 ,0.668 ,0.892 ,0.792 ,0.76 -,0.968 ,0.072 ,0.896 ,0.636 ,0.62 -,0.32 ,0.072 ,0.684 ,0.6 ,0.9 -,0.452 ,0.196 ,0.892 ,0.788 ,0.532 -,0.46 ,0.576 ,0.6 ,0.948 ,0.98 -,0.992 ,0.156 ,0.292 ,0.956 ,0.7 -,0.472 ,0.428 ,0.6 ,0.772 ,0.864 -,0.388 ,0.636 ,0.308 ,0.492 ,0.188 -,0.144 ,0.916 ,0.808 ,0.76 ,0.212 -,0.516 ,0.556 ,0.056 ,0.2 ,0.676 -,0.076 ,0.62 ,0.984 ,0.824 ,0.204 -,0.024 ,0.656 ,0.176 ,0.804 ,0.936 -,0.576 ,0.316 ,0.544 ,0.94 ,0.128 -,0.62 ,0.464 ,0.116 ,0.188 ,0.372 -,0.732 ,0.956 ,0.256 ,0.832 ,0.816 -,0.152 ,0.22 ,0.632 ,0.712 ,0.364 -,0.988 ,0.504 ,0.728 ,0.984 ,0.776 -,0.8 ,0.876 ,0.612 ,0.896 ,0.152 -,0.532 ,0.88 ,0.968 ,0.256 ,0.456 -,0.552 ,0.056 ,0.352 ,0.808 ,0.64 -,0.172 ,0.176 ,0.092 ,0.62 ,0.9 -,0.768 ,0.024 ,0.1 ,0.896 ,0.36 -,0.212 ,0.42 ,0.52 ,0.884 ,0.684 -,0.896 ,0.596 ,0.664 ,0.848 ,0.432 -,0.04 ,0.688 ,0.884 ,0.032 ,0.628 -,0.2 ,0.832 ,0.508 ,0.784 ,0.476 -,0.956 ,0.628 ,0.232 ,0.844 ,0.94 -,0.068 ,0.952 ,0.212 ,0.352 ,0.6 -,0.196 ,0.808 ,0.628 ,0.112 ,0.628 -,0.94 ,0.216 ,0.816 ,0.212 ,0.788 -,0.524 ,0.9 ,0.72 ,0.364 ,0.436 -,0.152 ,0.176 ,0.544 ,0.56 ,0.152 -,0.772 ,0.732 ,0.324 ,0.224 ,0.456 -,0.732 ,0.304 ,0.124 ,0.524 ,0.256 -,0.376 ,0.464 ,0.836 ,0.66 ,0.964 -,0.784 ,0.772 ,0.624 ,0.56 ,0.04 -,0.584 ,0.168 ,0.132 ,0.62 ,0.276 -,0.096 ,0.928 ,0.588 ,0.66 ,0.952 -,0.908 ,0.224 ,0.348 ,0.64 ,0.456 -,0.22 ,0.052 ,0.232 ,0.908 ,0.92 -,0.648 ,0.492 ,0.416 ,0.444 ,0.98 -,0.92 ,0.624 ,0.612 ,0.684 ,0.744 -,0.624 ,0.068 ,0.58 ,0.048 ,0.656 -,0.352 ,0.56 ,0.708 ,0.124 ,0.7 -,0.9 ,0.268 ,0.836 ,0.208 ,0.752 -,0.136 ,0.54 ,0.136 ,0.336 ,0.988 -,0.78 ,0.612 ,0.772 ,0.36 ,0.452 -,0.616 ,0.424 ,0.736 ,0.856 ,0.5 -,0.472 ,0.62 ,0.736 ,0.896 ,0.08 -,0.8 ,0.276 ,0.124 ,0.116 ,0.692 -,0.404 ,0.78 ,0.484 ,0.268 ,0.624 -,0.776 ,0.28 ,0.908 ,0.576 ,0.208 -,0.028 ,0.752 ,0.58 ,0.904 ,0.672 -,0.716 ,0.364 ,0.732 ,0.3 ,0.444 -,0.568 ,0.388 ,0.476 ,0.356 ,0.124 -,0.432 ,0.996 ,0.492 ,0.964 ,0.356 -,0.58 ,0.792 ,0.948 ,0.204 ,0.392 -,0.808 ,0.296 ,0.252 ,0.404, 0 -,0.836 ,0.096 ,0.336 ,0.892 ,0.112 -,0.476 ,0.54 ,0.364 ,0.916 ,0.9 -,0.548 ,0.808 ,0.272 ,0.212 ,0.38 -,0.384 ,0.656 ,0.38 ,0.436 ,0.58 -,0.728 ,0.464 ,0.88 ,0.988 ,0.888 -,0.208 ,0.476 ,0.28 ,0.984 ,0.536 -,0.692 ,0.28 ,0.396 ,0.632 ,0.66 -,0.812 ,0.636 ,0.728 ,0.12 ,0.896 -,0.548 ,0.536 ,0.032 ,0.74 ,0.336 -,0.572 ,0.932 ,0.188 ,0.196 ,0.82 -,0.456 ,0.892 ,0.424 ,0.276 ,0.848 -,0.948 ,0.952 ,0.656 ,0.332 ,0.92 -,0.552 ,0.664 ,0.536 ,0.708 ,0.972 -,0.44 ,0.864 ,0.076, 1 ,0.104 -,0.416 ,0.104 ,0.324 ,0.24 ,0.708 -,0.992 ,0.996 ,0.184 ,0.156 ,0.18 -,0.852 ,0.836 ,0.092 ,0.896 ,0.08 -,0.816 ,0.384 ,0.268 ,0.324 ,0.1 -,0.708 ,0.188 ,0.732 ,0.056 ,0.776 -,0.768 ,0.744 ,0.22 ,0.648 ,0.236 -,0.648 ,0.752 ,0.936 ,0.672 ,0.796 -,0.4 ,0.732 ,0.64 ,0.9 ,0.208 -,0.536 ,0.828 ,0.54 ,0.4 ,0.04 -,0.084 ,0.5 ,0.232 ,0.724 ,0.752 -,0.076 ,0.564 ,0.836 ,0.352 ,0.808 -,0.916 ,0.928 ,0.752 ,0.584 ,0.344 -,0.584 ,0.62 ,0.316 ,0.072 ,0.604 -,0.656 ,0.676 ,0.524 ,0.392 ,0.168 -,0.744 ,0.56 ,0.568 ,0.936 ,0.792 -,0.592 ,0.804 ,0.2 ,0.852 ,0.972 -,0.736 ,0.616 ,0.744 ,0.284 ,0.764 -,0.824 ,0.484 ,0.76 ,0.804 ,0.472 -,0.968 ,0.868 ,0.72 ,0.496 ,0.168 -,0.868 ,0.676 ,0.388 ,0.948 ,0.06 -,0.104 ,0.552 ,0.432 ,0.368 ,0.472 -,0.376 ,0.244 ,0.556 ,0.86 ,0.976 -,0.116 ,0.784 ,0.748 ,0.736 ,0.68 -,0.828 ,0.156 ,0.916 ,0.22 ,0.16 -,0.576 ,0.408 ,0.752 ,0.92 ,0.216 -,0.484 ,0.832 ,0.96 ,0.584 ,0.616 -,0.968 ,0.728 ,0.776 ,0.664 ,0.796 -,0.36 ,0.656 ,0.336 ,0.816 ,0.892 -,0.808 ,0.728 ,0.432 ,0.376 ,0.456 -,0.496 ,0.524 ,0.684 ,0.84 ,0.7 -,0.448 ,0.264 ,0.28 ,0.476 ,0.68 -,0.328 ,0.844 ,0.24 ,0.528 ,0.456 -,0.872 ,0.924 ,0.58 ,0.78 ,0.772 -,0.636 ,0.144 ,0.684 ,0.212 ,0.284 -,0.22 ,0.324 ,0.28 ,0.468 ,0.692 -,0.604 ,0.008 ,0.888 ,0.236 ,0.88 -,0.468 ,0.984 ,0.656 ,0.736 ,0.532 -,0.084 ,0.444 ,0.824 ,0.552 ,0.288 -,0.588 ,0.148 ,0.704 ,0.632 ,0.556 -,0.024 ,0.508 ,0.852 ,0.18 ,0.112 -,0.164 ,0.248 ,0.364 ,0.588 ,0.268 -,0.192 ,0.564 ,0.172 ,0.052 ,0.98 -,0.22 ,0.648 ,0.032 ,0.84 ,0.512 -,0.528 ,0.324 ,0.28 ,0.296 ,0.108 -,0.02 ,0.368 ,0.704 ,0.048 ,0.36 -,0.536 ,0.36 ,0.252 ,0.732 ,0.816 -,0.544 ,0.216 ,0.416 ,0.924 ,0.64 -,0.14 ,0.8 ,0.528 ,0.196 ,0.884 -,0.08 ,0.572 ,0.748 ,0.372 ,0.6 -,0.928 ,0.144 ,0.668 ,0.896 ,0.524 -,0.636 ,0.792 ,0.46 ,0.748 ,0.996 -,0.02 ,0.94 ,0.18 ,0.12 ,0.236 -,0.072 ,0.256 ,0.992 ,0.98 ,0.204 -,0.796 ,0.556 ,0.844 ,0.952 ,0.924 -,0.72 ,0.004 ,0.712 ,0.62 ,0.864 -,0.4 ,0.724 ,0.86 ,0.876, 0 -,0.192 ,0.532 ,0.28 ,0.128 ,0.576 -,0.14 ,0.124 ,0.648 ,0.924 ,0.284 -,0.724 ,0.46 ,0.208 ,0.94 ,0.256 -,0.828 ,0.048 ,0.608 ,0.36 ,0.94 -,0.924 ,0.664 ,0.988 ,0.476 ,0.88 -,0.204 ,0.188 ,0.656 ,0.192 ,0.416 -,0.528 ,0.408 ,0.032 ,0.448 ,0.572 -,0.352 ,0.18 ,0.032 ,0.46 ,0.468 -,0.276 ,0.944 ,0.304 ,0.072 ,0.316 -,0.504 ,0.76 ,0.904 ,0.076 ,0.46 -,0.592 ,0.768 ,0.42 ,0.484 ,0.728 -,0.528 ,0.06 ,0.924 ,0.616 ,0.34 -,0.208, 1 ,0.428 ,0.564 ,0.26 -,0.136 ,0.608 ,0.544 ,0.508 ,0.82 -,0.264 ,0.296 ,0.156 ,0.192 ,0.628 -,0.472 ,0.996 ,0.136 ,0.104 ,0.112 -,0.412 ,0.896 ,0.776 ,0.016 ,0.232 -,0.116 ,0.408 ,0.26 ,0.372 ,0.46 -,0.944 ,0.28 ,0.932 ,0.636 ,0.632 -,0.168 ,0.124 ,0.412 ,0.228 ,0.292 -,0.676 ,0.68 ,0.644 ,0.996 ,0.948 -,0.312 ,0.444 ,0.832 ,0.356 ,0.408 -,0.952 ,0.848 ,0.184 ,0.404 ,0.892 -,0.92 ,0.896 ,0.604 ,0.064 ,0.416 -,0.436 ,0.312 ,0.668 ,0.948 ,0.172 -,0.996 ,0.508 ,0.536 ,0.444 ,0.832 -,0.772 ,0.26 ,0.916 ,0.12 ,0.436 -,0.652 ,0.732 ,0.872 ,0.104 ,0.02 -,0.328 ,0.692 ,0.464 ,0.096 ,0.148 -,0.348 ,0.772 ,0.84 ,0.472 ,0.416 -,0.132 ,0.388 ,0.168 ,0.92 ,0.012 -,0.764 ,0.484 ,0.148 ,0.3 ,0.392 -,0.36 ,0.68 ,0.5 ,0.18, 1 -,0.676 ,0.596 ,0.856 ,0.952 ,0.992 -,0.812 ,0.612 ,0.66 ,0.66 ,0.132 -,0.344 ,0.784 ,0.052 ,0.356 ,0.464 -,0.476 ,0.24 ,0.296 ,0.768 ,0.584 -,0.588 ,0.628 ,0.484 ,0.228 ,0.556 -,0.34 ,0.94 ,0.964 ,0.42 ,0.008 -,0.268 ,0.976 ,0.288 ,0.276 ,0.344 -,0.496 ,0.384 ,0.796 ,0.692 ,0.244 -,0.368 ,0.764 ,0.672 ,0.24 ,0.204 -,0.224 ,0.276 ,0.764 ,0.34 ,0.9 -,0.548 ,0.532 ,0.54 ,0.388 ,0.788 -,0.168 ,0.532 ,0.172 ,0.976 ,0.788 -,0.724 ,0.972 ,0.628 ,0.616 ,0.408 -,0.832 ,0.968 ,0.788 ,0.384 ,0.34 -,0.636 ,0.592 ,0.404 ,0.168 ,0.792 -,0.572 ,0.636 ,0.656 ,0.892 ,0.06 -,0.22 ,0.832 ,0.404 ,0.496 ,0.256 -,0.136 ,0.82 ,0.212 ,0.948 ,0.696 -,0.508 ,0.892 ,0.64 ,0.3 ,0.564 -,0.996 ,0.42 ,0.232 ,0.08 ,0.952 -,0.544 ,0.464 ,0.304 ,0.876 ,0.608 -,0.372 ,0.1 ,0.74 ,0.968 ,0.996 -,0.692 ,0.876 ,0.62 ,0.672 ,0.648 -,0.316 ,0.816 ,0.352 ,0.376 ,0.972 -,0.844 ,0.528 ,0.752 ,0.916 ,0.448 -,0.244 ,0.668 ,0.232 ,0.744 ,0.132 -,0.524 ,0.492 ,0.912 ,0.936 ,0.94 -,0.188 ,0.516 ,0.18 ,0.312 ,0.16 -,0.932 ,0.084 ,0.384 ,0.604 ,0.688 -,0.48 ,0.116 ,0.936 ,0.576 ,0.8 -,0.952 ,0.316 ,0.488 ,0.084 ,0.736 -,0.392 ,0.232 ,0.576 ,0.396 ,0.896 -,0.692 ,0.292 ,0.788 ,0.864 ,0.192 -,0.28 ,0.044 ,0.14 ,0.808 ,0.992 -,0.876 ,0.288 ,0.86 ,0.908 ,0.064 -,0.228 ,0.964 ,0.796 ,0.272 ,0.344 -,0.524 ,0.232 ,0.648 ,0.68 ,0.064 -,0.104 ,0.392 ,0.772 ,0.196 ,0.868 -,0.08 ,0.428 ,0.856 ,0.98 ,0.076 -,0.456 ,0.556 ,0.22 ,0.816 ,0.512 -,0.128 ,0.276 ,0.64 ,0.316 ,0.516 -,0.768 ,0.368 ,0.828 ,0.044 ,0.676 -,0.8 ,0.28 ,0.176 ,0.936 ,0.056 -,0.016 ,0.34 ,0.72 ,0.764 ,0.272 -,0.604 ,0.152 ,0.86 ,0.096 ,0.292 -,0.984 ,0.832 ,0.356 ,0.152 ,0.732 -,0.812 ,0.304 ,0.276 ,0.76 ,0.88 -,0.676 ,0.212 ,0.204 ,0.352 ,0.476 -,0.244 ,0.84 ,0.812 ,0.176 ,0.028 -,0.352 ,0.54 ,0.832 ,0.48 ,0.416 -,0.732 ,0.376 ,0.528 ,0.416 ,0.012 -,0.196 ,0.46 ,0.7 ,0.5 ,0.096 -,0.072 ,0.612 ,0.304 ,0.472 ,0.376 -,0.476 ,0.408 ,0.5 ,0.828 ,0.32 -,0.22 ,0.036 ,0.172 ,0.712 ,0.756 -,0.52 ,0.632 ,0.26 ,0.316 ,0.16 -,0.688 ,0.484 ,0.692 ,0.336 ,0.736 -,0.444 ,0.936 ,0.624 ,0.276 ,0.504 -,0.58 ,0.376 ,0.648 ,0.296 ,0.12 -,0.864, 1 ,0.832 ,0.668 ,0.924 -,0.916 ,0.668 ,0.48 ,0.828 ,0.724 -,0.448 ,0.624 ,0.82 ,0.624 ,0.944 -,0.1 ,0.044 ,0.804 ,0.436 ,0.22 -,0.808 ,0.548 ,0.32 ,0.264 ,0.284 -,0.872 ,0.876 ,0.74 ,0.568 ,0.824 -,0.608 ,0.544 ,0.176 ,0.288 ,0.084 -,0.092 ,0.916 ,0.764 ,0.168 ,0.872 -,0.376 ,0.52 ,0.288 ,0.88 ,0.888 -,0.904 ,0.892 ,0.372 ,0.42 ,0.984 -,0.52 ,0.372 ,0.476 ,0.348 ,0.756 -,0.044 ,0.736 ,0.252 ,0.732 ,0.776 -,0.632 ,0.976 ,0.08 ,0.36 ,0.596 -,0.72 ,0.228 ,0.36 ,0.2 ,0.924 -,0.676 ,0.744 ,0.58 ,0.644 ,0.3 -,0.82 ,0.296 ,0.44 ,0.516 ,0.716 -,0.46 ,0.428 ,0.46 ,0.372 ,0.604 -,0.16 ,0.484 ,0.164 ,0.38 ,0.708 -,0.964 ,0.988 ,0.844 ,0.216 ,0.912 -,0.228 ,0.368 ,0.22 ,0.064 ,0.384 -,0.72 ,0.636 ,0.852 ,0.776 ,0.444 -,0.944 ,0.992 ,0.74 ,0.384 ,0.528 -,0.536 ,0.296 ,0.056 ,0.34 ,0.152 -,0.152 ,0.316 ,0.148 ,0.816 ,0.576 -,0.936 ,0.896 ,0.212 ,0.792 ,0.392 -,0.728 ,0.692 ,0.324 ,0.496 ,0.68 -,0.536 ,0.372 ,0.316 ,0.276 ,0.9 -,0.664 ,0.008 ,0.464 ,0.564 ,0.088 -,0.52 ,0.032 ,0.584 ,0.396 ,0.864 -,0.18 ,0.708 ,0.62 ,0.82 ,0.248 -,0.736 ,0.496 ,0.284 ,0.496 ,0.152 -,0.66 ,0.524 ,0.536 ,0.56 ,0.232 -,0.084 ,0.396 ,0.38 ,0.028 ,0.708 -,0.184 ,0.824 ,0.264 ,0.892 ,0.62 -,0.204 ,0.62 ,0.728 ,0.984 ,0.948 -,0.656 ,0.876 ,0.92 ,0.84 ,0.932 -,0.572 ,0.516 ,0.024 ,0.248 ,0.756 -,0.428 ,0.28 ,0.572 ,0.936 ,0.228 -,0.416 ,0.04 ,0.44 ,0.252 ,0.872 -,0.408 ,0.484 ,0.468 ,0.036 ,0.388 -,0.1 ,0.956 ,0.64 ,0.904 ,0.436 -,0.152 ,0.144 ,0.428 ,0.628 ,0.748 -,0.632 ,0.132 ,0.204 ,0.6 ,0.236 -,0.732 ,0.508 ,0.128 ,0.42 ,0.724 -,0.04 ,0.876 ,0.528 ,0.852 ,0.844 -,0.652 ,0.968 ,0.26 ,0.924 ,0.124 -,0.312 ,0.884 ,0.96 ,0.132 ,0.464 -,0.304 ,0.1 ,0.684 ,0.22 ,0.42 -,0.404 ,0.94 ,0.244 ,0.884 ,0.484 -,0.788 ,0.42 ,0.88 ,0.836 ,0.112 -,0.468 ,0.928 ,0.52 ,0.592 ,0.452 -,0.192 ,0.408 ,0.94 ,0.148 ,0.216 -,0.456 ,0.06 ,0.968 ,0.444 ,0.236 -,0.348 ,0.652 ,0.716 ,0.628 ,0.16 -,0.084 ,0.392 ,0.284 ,0.34 ,0.988 -,0.404 ,0.476 ,0.724 ,0.108 ,0.988 -,0.632 ,0.84 ,0.588 ,0.744 ,0.008 -,0.232 ,0.336 ,0.804 ,0.368 ,0.604) -cp=hd(p.table,alpha) -pv=NULL -if(!is.null(p.obs))w=optimize(hdpv,interval=c(.001,.999),dat=p.table,obs=p.obs)$minimum -list(crit.p.value=cp,adj.p.value=w) -} - -p.crit.n100<-function(alpha=.05,p.obs=NULL){ -p.table=c( -0.516 ,0.516 ,0.14 ,0.42 ,0.124 -,0.872 ,0.396 ,0.692 ,0.92 ,0.452 -,0.048 ,0.34 ,0.18 ,0.952 ,0.348 -,0.004 ,0.74 ,0.04 ,0.612 ,0.052 -,0.888 ,0.416 ,0.096 ,0.16 ,0.62 -,0.132 ,0.536 ,0.96 ,0.24 ,0.512 -,0.672 ,0.54 ,0.204 ,0.128 ,0.296 -,0.828 ,0.4 ,0.324 ,0.388 ,0.988 -,0.568 ,0.764 ,0.82 ,0.392 ,0.992 -,0.996 ,0.804 ,0.416 ,0.308 ,0.924 -,0.1 ,0.016 ,0.368 ,0.264 ,0.94 -,0.56 ,0.156 ,0.68 ,0.616 ,0.252 -,0.724 ,0.38 ,0.544 ,0.02 ,0.092 -,0.692 ,0.912 ,0.04 ,0.58 ,0.072 -,0.416 ,0.912, 1 ,0.252 ,0.428 -,0.068 ,0.504 ,0.692 ,0.452 ,0.932 -,0.668 ,0.912 ,0.056 ,0.86 ,0.908 -,0.276 ,0.54 ,0.424 ,0.556 ,0.248 -,0.42 ,0.94 ,0.632 ,0.544 ,0.168 -,0.824 ,0.44 ,0.136 ,0.9 ,0.984 -,0.34 ,0.984 ,0.428 ,0.208 ,0.216 -,0.024 ,0.744 ,0.34 ,0.644 ,0.488 -,0.716 ,0.984 ,0.292 ,0.512 ,0.908 -,0.336 ,0.908 ,0.528 ,0.364 ,0.924 -,0.884 ,0.928 ,0.608 ,0.06 ,0.624 -,0.588 ,0.536 ,0.88 ,0.224 ,0.124 -,0.184 ,0.416 ,0.004 ,0.84 ,0.784 -,0.016 ,0.692 ,0.892 ,0.04 ,0.084 -,0.844 ,0.38 ,0.512 ,0.392 ,0.28 -,0.312 ,0.96 ,0.084 ,0.216 ,0.34 -,0.544 ,0.952 ,0.88 ,0.024 ,0.292 -,0.456 ,0.076 ,0.932 ,0.78 ,0.304 -,0.06 ,0.692 ,0.996 ,0.132 ,0.82 -,0.592 ,0.872 ,0.812 ,0.132 ,0.392 -,0.84 ,0.736 ,0.188 ,0.132 ,0.736 -,0.236 ,0.44 ,0.684 ,0.62 ,0.484 -,0.828 ,0.2 ,0.688 ,0.404 ,0.78 -,0.948 ,0.968 ,0.284 ,0.776 ,0.672 -,0.016 ,0.088 ,0.712 ,0.776 ,0.324 -,0.788 ,0.74 ,0.584 ,0.456 ,0.572 -,0.896 ,0.976 ,0.164 ,0.576 ,0.112 -,0.724 ,0.268 ,0.18 ,0.232 ,0.224 -,0.996 ,0.692 ,0.552 ,0.524 ,0.756 -,0.1 ,0.684 ,0.416 ,0.98 ,0.02 -,0.452 ,0.6 ,0.94 ,0.148 ,0.808 -,0.272 ,0.484 ,0.404 ,0.064 ,0.64 -,0.564 ,0.108 ,0.832 ,0.232 ,0.784 -,0.504 ,0.364 ,0.784 ,0.584 ,0.616 -,0.708 ,0.576 ,0.668 ,0.984 ,0.428 -,0.984 ,0.72 ,0.736 ,0.66 ,0.876 -,0.508 ,0.2 ,0.444 ,0.94 ,0.808 -,0.404 ,0.436 ,0.104 ,0.656 ,0.616 -,0.524 ,0.884 ,0.24 ,0.34 ,0.424 -,0.052 ,0.46 ,0.872 ,0.452 ,0.132 -,0.704 ,0.508 ,0.22 ,0.972 ,0.632 -,0.5 ,0.544 ,0.02 ,0.112, 1 -,0.204 ,0.728 ,0.46 ,0.556 ,0.732 -,0.228 ,0.472 ,0.068 ,0.344 ,0.972 -,0.404 ,0.816 ,0.368 ,0.38 ,0.912 -,0.948 ,0.108 ,0.308 ,0.584 ,0.816 -,0.692 ,0.024 ,0.604 ,0.74 ,0.696 -,0.48 ,0.888 ,0.356 ,0.464 ,0.548 -,0.232 ,0.76 ,0.892 ,0.096 ,0.18 -,0.596 ,0.556 ,0.56 ,0.776 ,0.368 -,0.66 ,0.856 ,0.4 ,0.968 ,0.356 -,0.752 ,0.648 ,0.62 ,0.888 ,0.932 -,0.86 ,0.448 ,0.248 ,0.936 ,0.22 -,0.364 ,0.92 ,0.424 ,0.316 ,0.684 -,0.812 ,0.652 ,0.372 ,0.308 ,0.616 -,0.816 ,0.04 ,0.304 ,0.864 ,0.356 -,0.632 ,0.82 ,0.976 ,0.612 ,0.112 -,0.088 ,0.7 ,0.568 ,0.948 ,0.084 -,0.848 ,0.452 ,0.004 ,0.392 ,0.796 -,0.528 ,0.372 ,0.472 ,0.404 ,0.48 -,0.244 ,0.144 ,0.944 ,0.852 ,0.52 -,0.436 ,0.396 ,0.388 ,0.596 ,0.496 -,0.972 ,0.424 ,0.424 ,0.144 ,0.772 -,0.1 ,0.748 ,0.192 ,0.508 ,0.78 -,0.176 ,0.944 ,0.136 ,0.564 ,0.724 -,0.76 ,0.6 ,0.032 ,0.504 ,0.78 -,0.936 ,0.76 ,0.124 ,0.62 ,0.092 -,0.768 ,0.852 ,0.556 ,0.456 ,0.468 -,0.984 ,0.7 ,0.568 ,0.288 ,0.048 -,0.928 ,0.892 ,0.728 ,0.32 ,0.836 -,0.788 ,0.352 ,0.748 ,0.128 ,0.92 -,0.352 ,0.236 ,0.56 ,0.192 ,0.72 -,0.516 ,0.668 ,0.652 ,0.056 ,0.876 -,0.916 ,0.124 ,0.176 ,0.652 ,0.088 -,0.508 ,0.8 ,0.112 ,0.1 ,0.264 -,0.836 ,0.912 ,0.376 ,0.792 ,0.876 -,0.892 ,0.284 ,0.892 ,0.088 ,0.376 -,0.24 ,0.98 ,0.416 ,0.96 ,0.848 -,0.632 ,0.304 ,0.5 ,0.716 ,0.388 -,0.976 ,0.24 ,0.32 ,0.808 ,0.168 -,0.352 ,0.064 ,0.7 ,0.42 ,0.224 -,0.176 ,0.988 ,0.224 ,0.904 ,0.592 -,0.696 ,0.42 ,0.376 ,0.696 ,0.44 -,0.328 ,0.816 ,0.312 ,0.968 ,0.432 -,0.784 ,0.92 ,0.6 ,0.928 ,0.784 -,0.96 ,0.54 ,0.872 ,0.816 ,0.728 -,0.688 ,0.436 ,0.796 ,0.42 ,0.464 -,0.112 ,0.328 ,0.168 ,0.244 ,0.052 -,0.864 ,0.312 ,0.992 ,0.832 ,0.584 -,0.396 ,0.596 ,0.72 ,0.684 ,0.448 -,0.88 ,0.9 ,0.748 ,0.424 ,0.66 -,0.644 ,0.188 ,0.528 ,0.98 ,0.58 -,0.716 ,0.352 ,0.512 ,0.716 ,0.548 -,0.988, 0 ,0.36 ,0.4 ,0.784 -,0.212 ,0.756 ,0.956 ,0.5 ,0.512 -,0.852 ,0.216 ,0.004 ,0.596 ,0.504 -,0.56 ,0.672 ,0.988 ,0.76 ,0.924 -,0.108 ,0.62 ,0.412 ,0.44 ,0.816 -,0.664 ,0.924 ,0.62 ,0.816 ,0.184 -,0.244 ,0.644 ,0.172 ,0.3 ,0.22 -,0.02 ,0.572 ,0.156 ,0.768 ,0.484 -,0.576 ,0.992 ,0.192 ,0.428 ,0.736 -,0.708 ,0.888 ,0.136 ,0.192 ,0.948 -,0.244 ,0.528 ,0.188 ,0.956 ,0.036 -,0.016 ,0.636 ,0.048 ,0.42 ,0.184 -,0.16 ,0.496 ,0.512 ,0.18 ,0.052 -,0.972 ,0.488 ,0.988 ,0.284 ,0.252 -,0.876 ,0.244 ,0.072 ,0.712 ,0.684 -,0.78 ,0.468 ,0.504 ,0.732 ,0.84 -,0.496 ,0.168 ,0.256 ,0.092 ,0.916 -,0.76 ,0.152 ,0.72 ,0.14 ,0.356 -,0.824 ,0.172 ,0.248 ,0.624 ,0.932 -,0.856 ,0.612 ,0.52 ,0.584 ,0.904 -,0.576 ,0.528 ,0.092 ,0.132 ,0.288 -,0.096 ,0.516 ,0.316 ,0.448 ,0.792 -,0.756 ,0.748 ,0.016 ,0.52 ,0.64 -,0.924 ,0.688 ,0.824 ,0.392 ,0.908 -,0.852 ,0.652 ,0.184 ,0.244 ,0.288 -,0.324 ,0.324 ,0.736 ,0.628 ,0.816 -,0.868 ,0.576 ,0.584 ,0.92 ,0.812 -,0.104 ,0.48 ,0.884 ,0.816 ,0.24 -,0.024 ,0.204 ,0.32 ,0.452 ,0.44 -,0.036 ,0.96 ,0.716 ,0.648 ,0.04 -,0.256 ,0.684 ,0.296 ,0.484 ,0.772 -,0.52 ,0.068 ,0.26 ,0.352 ,0.096 -,0.348 ,0.18 ,0.556 ,0.276 ,0.956 -,0.944 ,0.052 ,0.916 ,0.52 ,0.624 -,0.668 ,0.98 ,0.112 ,0.144 ,0.644 -,0.348 ,0.488 ,0.88 ,0.16 ,0.056 -,0.66 ,0.964 ,0.316 ,0.14 ,0.36 -,0.62 ,0.168 ,0.124 ,0.852 ,0.58 -,0.512 ,0.424 ,0.024 ,0.676 ,0.192 -,0.636 ,0.544 ,0.78 ,0.712 ,0.368 -,0.156 ,0.068 ,0.056 ,0.112 ,0.26 -,0.448 ,0.68 ,0.316 ,0.9 ,0.78 -,0.62 ,0.264 ,0.856 ,0.04 ,0.936 -,0.6 ,0.128 ,0.328 ,0.924 ,0.94 -,0.708 ,0.476 ,0.044 ,0.172 ,0.648 -,0.356 ,0.688 ,0.888 ,0.136 ,0.976 -,0.212 ,0.652 ,0.624 ,0.9 ,0.76 -,0.396 ,0.736 ,0.812 ,0.1 ,0.868 -,0.844 ,0.952 ,0.076 ,0.616 ,0.964 -,0.064 ,0.28 ,0.576 ,0.944 ,0.532 -,0.596 ,0.704 ,0.952 ,0.832 ,0.18 -,0.928 ,0.504 ,0.072 ,0.4 ,0.432 -,0.644 ,0.168 ,0.784 ,0.516 ,0.408 -,0.224 ,0.476 ,0.992 ,0.588 ,0.668 -,0.324 ,0.064 ,0.592 ,0.96 ,0.652 -,0.24 ,0.964 ,0.988 ,0.06 ,0.068 -,0.828 ,0.736 ,0.432 ,0.508 ,0.92 -,0.34 ,0.832 ,0.504 ,0.192 ,0.88 -,0.532 ,0.168 ,0.128 ,0.46 ,0.456 -,0.992 ,0.328 ,0.728 ,0.72 ,0.488 -,0.728 ,0.848 ,0.372 ,0.688 ,0.324 -,0.748 ,0.628 ,0.936 ,0.556 ,0.8 -,0.088 ,0.26 ,0.66 ,0.412 ,0.264 -,0.432 ,0.148 ,0.24 ,0.672 ,0.216 -,0.268 ,0.512 ,0.296 ,0.412 ,0.564 -,0.224 ,0.196 ,0.42 ,0.424 ,0.168 -,0.852 ,0.144 ,0.96 ,0.008 ,0.568 -,0.196 ,0.704 ,0.624 ,0.6 ,0.452 -,0.628 ,0.272 ,0.056 ,0.536 ,0.364 -,0.364 ,0.696 ,0.272 ,0.472 ,0.008 -,0.112 ,0.924 ,0.82 ,0.428 ,0.96 -,0.748 ,0.308 ,0.912 ,0.472 ,0.544 -,0.744 ,0.072 ,0.36 ,0.604 ,0.98 -,0.308 ,0.304 ,0.352 ,0.024 ,0.956 -,0.82 ,0.692 ,0.572 ,0.036 ,0.86 -,0.896 ,0.976 ,0.284 ,0.764 ,0.24 -,0.68 ,0.788 ,0.316 ,0.068 ,0.06 -,0.688 ,0.26 ,0.496 ,0.416 ,0.388 -,0.58 ,0.2 ,0.508 ,0.412 ,0.344 -,0.212 ,0.932 ,0.588 ,0.212 ,0.936 -,0.232 ,0.26 ,0.82 ,0.764 ,0.648 -,0.924 ,0.508 ,0.84 ,0.66 ,0.46 -,0.788 ,0.596 ,0.092 ,0.656 ,0.244 -,0.68 ,0.628 ,0.732 ,0.56 ,0.596 -,0.908 ,0.376 ,0.64 ,0.628 ,0.824 -,0.36 ,0.764 ,0.484 ,0.892 ,0.576 -,0.128 ,0.972 ,0.4 ,0.444 ,0.856 -,0.744 ,0.32 ,0.004 ,0.58 ,0.116 -,0.636 ,0.368 ,0.696 ,0.904 ,0.536 -,0.04 ,0.132 ,0.812 ,0.916 ,0.468 -,0.736 ,0.108 ,0.684 ,0.96 ,0.284 -,0.068 ,0.832 ,0.736 ,0.248 ,0.624 -,0.26 ,0.964 ,0.316 ,0.504 ,0.2 -,0.36 ,0.46 ,0.8 ,0.164 ,0.284 -,0.372 ,0.792 ,0.808 ,0.716 ,0.148 -,0.232 ,0.724 ,0.86 ,0.692 ,0.204 -,0.484 ,0.444 ,0.432 ,0.384 ,0.256 -,0.732 ,0.084 ,0.316 ,0.264 ,0.18 -,0.236 ,0.592 ,0.42 ,0.76 ,0.556 -,0.116 ,0.72 ,0.252 ,0.632 ,0.72 -,0.476 ,0.896 ,0.784 ,0.328 ,0.852 -,0.548 ,0.132 ,0.692 ,0.92 ,0.596 -,0.268 ,0.204 ,0.852 ,0.948 ,0.88 -,0.82 ,0.328 ,0.7 ,0.684 ,0.16 -,0.868 ,0.44 ,0.912 ,0.192 ,0.168 -,0.844 ,0.4 ,0.32 ,0.768 ,0.52 -,0.8 ,0.464 ,0.884 ,0.448 ,0.908 -,0.116 ,0.136 ,0.036 ,0.368 ,0.076 -,0.424 ,0.224 ,0.72 ,0.008 ,0.932 -,0.732 ,0.764 ,0.088 ,0.788 ,0.852 -,0.392 ,0.66 ,0.904 ,0.524 ,0.612 -,0.256 ,0.892 ,0.912 ,0.512 ,0.984 -,0.912 ,0.94 ,0.204 ,0.408 ,0.156 -,0.868 ,0.1 ,0.988) -cp=hd(p.table,alpha) -pv=NULL -if(!is.null(p.obs))w=optimize(hdpv,interval=c(.001,.999),dat=p.table,obs=p.obs)$minimum -list(crit.p.value=cp,adj.p.value=w) -} - - -mscorciH<-function(x,nboot=1000,alpha=.05,SEED=TRUE,method='hoch', -corfun=pcor,outfun=outpro, crit.pv=NULL,ALL=TRUE,MC=TRUE,pr=TRUE){ -# -# Test the hypothesis of a zero skipped correlation for each pair of variables in -# x, an n-by-p matrix. -# -# Use Hochberg adjusted critical p-values based on adjusted p-values to control the probability of one or more Type I errors. -# -# The function also returns 1-alpha confidence intervals for each of the skipped correlations -# alpha=0.05 is the default. -# By default, Pearson's correlation is computed after outliers are removed via the R function indicated by -# outfun, which defaults to a projection-type method. -# corfun=spear, for example would replace Pearson's correlation with Spearman's correlation. -# -# The default number of bootstrap samples is -# nboot=500 -# -# -if(pr){ -print('Each confidence interval has, approximately, 1-alpha probability coverage') -} -if(SEED)set.seed(2) -xy=elimna(x) -x=as.matrix(x) -p=ncol(x) -p1=p+1 -J=(p^2-p)/2 -x=as.matrix(x) -n=nrow(x) -est<-mscor(x,corfun=corfun,outfun=outfun)$cor -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(!MC)bvec<-lapply(data,scorci.sub,x,corfun=corfun,outfun=outfun,STAND=TRUE) -if(MC){ -library(parallel) -bvec<-mclapply(data,scorci.sub,x,corfun=corfun,outfun=outfun,STAND=TRUE) -} -bvec=matl(bvec) # A J by nboot matrix. J=(p^2-p)/2, p=number of IV variables. - -# -# Compute critical p-values when p=1 and then use Hochberg adjustment -# - -phat=0 -sig=matrix(NA,p,p) -sigadj=matrix(NA,p,p) -ic=0 -for(j in 1:p){ -for(k in 1:p){ -if(j40){ -if(n<=70){ -vv=p.crit.n60(alpha[1],sig[j,k]) -sigadj[j,k]=vv$adj.p.value -crit.p=vv$crit.p.value -} -} -if(n>70){ -if(n<=100){ -vv=p.crit.n80(alpha[1],sig[j,k]) -sigadj[j,k]=vv$adj.p.value -crit.p=vv$crit.p.value -} -} -if(n>100){ -if(n<=120) -{ -vv=p.crit.n100(alpha[1],sig[j,k]) -crit.p=vv$crit.p.value -sigadj[j,k]=vv$adj.p.value -}} -if(n>120){ # no adjustment -sigad[j,k]=sig[j,k] #i.e., no adjustment -crit.p=alpha -}}}} -ci.mat=matrix(NA,nrow=J,ncol=7) -dimnames(ci.mat)=list(NULL,c('Var i','Var j','Est','ci.low','ci.up','P-value','FWE Adjusted p-value' )) -crit.pv=crit.p -for(j in 1:J)bvec[j,]<-sort(bvec[j,]) -if(J==1)bvec=as.matrix(bvec) -ic=0 -if(is.null(crit.pv))crit.pv=alpha[1] -for(j in 1:p){ -for(k in 1:p){ -if(j=60, this might suffice when testing at the 0.05 level. But power might be increased by using -# hoch=FALSE at the expense of higher execution time. -# -# If alpha is less than .05, say .025 or .01, hoch=FALSE is recommended. -# -# Note: confidence intervals are reported only when hoch=FALSE. -# -# pvals can be used to supply a vector of p-values estimating the distribution of the minimum p-value among the tests that are -# are performed when all hypotheses are true. -# -# iter=500: number of replications used to estimate the distribution of the minimum p-value. -# Or use the argument crit.pv as indicated below. -# Note: in the journal article dealing with this method, iter=1000 was used. - -# By default -# pvals=NULL, the functions computes these values if the p-values suggest that there might be -# significant results and hoch=FALSE; this can result in high execution time. -# The pvals are computed via the R function -# mscorci.cr(n,p,iter=500,corfun=pcor,alpha=alpha,SEED=TRUE). -# -# Critical p-values are a function of n and p. Once known, can supply them via the argument -# crit.pv as follows: -# -# pv=scorregci.cr(n,p)$crit.p.values -# scorregci(x,crit.pv=pv) -# -# When hoch=TRUE, unadjusted confidence intervals are returned. -# -# -# -if(pr){ -if(!hoch){print('To reduce execution time, critical p-values are not computed when the observed p.values are too large to') -print('reject at the 0.05 level. To compute them any way, use the R function scorregci.cr') -} -if(hoch){ -print('Hochberg adjusted p-values are used.') -print('This is reasonable when n>=60 and alpha=.05. Otherwise suggest using hoch=FALSE') -print('To get confidence intervals, use hoch=FALSE') -}} -if(SEED)set.seed(2) -xy=elimna(cbind(x,y)) -x=as.matrix(x) -p=ncol(x) -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -x=as.matrix(x) -n=nrow(x) -est<-scorreg(x,y,corfun=corfun,outfun=outfun,SEED=FALSE,ALL=ALL,...)$cor -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(!MC)bvec<-lapply(data,scorreg.sub,xy,corfun=corfun,outfun=outfun,ALL=ALL,...) -if(MC){ -library(parallel) -bvec<-mclapply(data,scorreg.sub,xy,corfun=corfun,outfun=outfun,ALL=ALL,...) -} -bvec=matl(bvec) # A p by nboot matrix. - -phat=0 -sig=0 -for(j in 1:p){ -phat[j]=sum(bvec[j,] < 0)/nboot -sig[j] <- 2 * min(phat[j], 1 - phat[j]) -} -# -# Compute critical p-values if any of the p-values are sufficiently small. -# -FLAG=FALSE -if(p==2 && sig[1]<=.15){ -FLAG=TRUE -if(hoch){ -if(pr)print('If the p.value is <=.15, suggest using hoch=FALSE') -}} -if(p>2){ -if(min(sig)<=alpha[1])FLAG=TRUE -} -if(FLAG){ -if(is.null(crit.pv)){ -if(!hoch){ -if(pr)print('Computing critical p-values. Execution time might require several minutes') -temp=scorregci.cr(nval,p,iter=iter,corfun=corfun,alpha=alpha,SEED=pval.SEED,TV=TRUE,ALL=ALL) #returns tval in case want to adjust p-values. -# Need to add code to do this. (See mscorpbMC for how this might be done.) -crit.pv=temp$crit.p.values -}}} -ci.mat=matrix(NA,nrow=p,ncol=3) -dimnames(ci.mat)=list(NULL,c('Var','ci.low','ci.up')) -for(j in 1:p)bvec[j,]<-sort(bvec[j,]) -if(p==1)bvec=as.matrix(bvec) -ic=0 -if(is.null(crit.pv))crit.pv=alpha[1] -for(j in 1:p){ -ic=ic+1 -ci.mat[ic,1]=j -ihi<-floor((1-crit.pv[1]/2)*nboot+.5) -ilow<-floor((crit.pv[1]/2)*nboot+.5) -ci.mat[ic,2]=bvec[ic,ilow] -ci.mat[ic,3]=bvec[ic,ihi] -} - -p.mat=matrix(NA,nrow=p,ncol=3) -p.mat[,1]=est -p.mat[,2]=sig -adj.p=NULL -if(hoch){ -adj.p=p.adjust(sig,method='hochberg') -p.mat[,3]=adj.p -} -dimnames(p.mat)=list(NULL,c('Est.','p-value','adjusted p.value')) -list(Estimates=p.mat,confidence.int=ci.mat,critical.p.values=crit.pv) -} - -scorreg.sub<-function(data,xy,corfun=corfun,outfun=outfun,ALL=ALL,...){ -p1=ncol(xy) -p=p1-1 -est<-scorreg(xy[data,1:p],xy[data,p1],corfun=corfun,SEED=FALSE,ALL=ALL,...)$cor -est -} - -scorreg.cr<-function(n,p,iter=500,nboot=500,corfun=pcor,alpha=c(.05,.025,.01),TV=FALSE,ALL=TRUE,SEED=TRUE,outfun=outpro){ -# -# Determine critical p-values for the function scorregci -# Returns the estimate of the distribution of the null minimum p-value -# plus the critical p-values corresponding to the levels indicated by -# alpha. -# -# p = number or predictors -# -# Function assumes that a multicore processor is used and that the R package parallel has been installed. -# -if(SEED)set.seed(65) -x=list() -library(parallel) -p1=p+1 -for(i in 1:iter){ -x[[i]]=rmul(n,p=p1) -} -tval=mclapply(x,scorreg.cr.sub,p=p,corfun=corfun,nboot=nboot,ALL=ALL) -tval=list2vec(tval) -crit.p=NA -for(j in 1:length(alpha))crit.p[j]=hd(tval,alpha[j]) -if(!TV)tval=NULL -list(crit.p.values=crit.p,tval=tval) -} - -scorreg.cr.sub<-function(x,corfun,p=p,nboot=500,ALL=ALL,outfun=outfun){ -p1=p+1 -v=scorregci(x[,1:p],x[,p1],SEED=FALSE,corfun=corfun,nboot=nboot,crit.pv=1,pr=FALSE,hoch=TRUE,ALL=ALL,outfun=outfun)$Estimates[,2] -mp=min(as.vector(v),na.rm=T) -mp -} - -ols.ridge<-function(x,y,k=NULL,xout=FALSE,outfun=outpro,MSF=TRUE){ -library(MASS) -x=as.matrix(x) -if(ncol(x)==1)stop('Should have two or more independent variables.') -xy=elimna(cbind(x,y)) -x=as.matrix(x) -p=ncol(x) -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -x=as.matrix(x) -if(xout){ -flag<-outfun(x)$keep -x<-x[flag,] -x<-as.matrix(x) -y<-y[flag] -} -if(is.null(k)){ -if(!MSF)k=ridge.est.k(x,y) -else{ -ires=ols(x,y)$residuals -sigh=sqrt(sum(ires^2)/(n-p-1)) -k=p^(1+1/p)*sigh -} -} -a=lm.ridge(y~x,lambda=k) -a=coef(a) -list(coef=a) -} - -rob.ridge<-function(x,y,Regfun=tsreg,k=NULL,xout=FALSE,outfun=outpro,plotit=FALSE,MSF=TRUE, -STAND=TRUE,INT=FALSE,locfun=median,...){ -# -# Do robust regression based on the robust estimator indicated by -# Regfun -# which defaults to Theil--Sen -# -# When MSF=FALSE, the bias parameter, k, is estimated based on results in -# Kidia, G. (2003). -# Performance of Some New Ridge -# Regression Estimators Communications in Statistics - # Simulation and Computation - #Vol. 32, No. 2, pp. 419--435 (in file CommB2003.pdf) -# The method was derived when using OLS. -# -# MSF=TRUE, use the method in Shabbir et al.(2032). doi.org/10.1080/03610918.2023.2186874 -# -# For results on the LTS version see -# Kan et al. 2013, J Applied Statistics, 40, 644-655 -# However, suggest using this function when testing hypotheses in conjunction with regci or regciMC -# -x=as.matrix(x) -xy=elimna(cbind(x,y)) -x=as.matrix(x) -n=length(y) -p=ncol(x) -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -x=as.matrix(x) -if(xout){ -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else flag<-outfun(x,plotit=plotit)$keep -x<-x[flag,] -x<-as.matrix(x) -y<-y[flag] -} -if(STAND){ -x=standm(x) -y=y-mean(y) -} -if(is.null(k)){ -if(!MSF)k=ridge.est.k(x,y,regfun=Regfun,...) -else{ -ires=Regfun(x,y)$residuals -sigh=sqrt(sum(ires^2)/(n-p-1)) -k=p^(1+1/p)*sigh -} -} -n=nrow(x) -init=Regfun(x,y,...)$coef -y=as.matrix(y) -if(!INT){ -kbeta=diag(k,nrow=p,ncol=p) -slopes=as.matrix(init[2:p1]) -xtx=t(x)%*%x -beta=solve(xtx+kbeta)%*%xtx -beta=beta%*%slopes -res=y-x%*%beta -b0=locfun(res) -beta=as.vector(beta) -beta=c(b0,beta) -} -if(INT){ -kbeta=diag(k,nrow=p1,ncol=p1) -slopes=as.matrix(init) -x1=cbind(rep(1,n),x) -xtx=t(x1)%*%x1 -beta=solve(xtx+kbeta)%*%xtx -beta=beta%*%slopes -} -res<-y-x%*%beta[2:p1]-beta[1] -list(coef=as.vector(beta),k=k,residuals=res) -} - -rob.ridge.test<-function(x,y,regfun=tsreg,xout=FALSE,outfun=outpro,MC=FALSE,method='hoch', -nboot=599,alpha = 0.05,MSF=TRUE,SEED=TRUE,...){ -# -# -# Test the global hypothesis that all slopes are equal to zero, -# If it rejects, it also suggests which slope is significant, but it cannot reject more than one slope. -# -# A robust ridge estimator is used base on the robust estimator indicated by the argument -# regfun -# -# MC=TRUE: takes advantage of a multicore processor. -# -p=ncol(x) -if(p==1)stop('Should have two or more independent variables') -p1=p+1 -if(!MC)a=regci(x,y,regfun=rob.ridge,Regfun=regfun,xout=xout, -alpha=alpha,SEED=SEED,MSF=MSF)$regci -if(MC)a=regciMC(x,y,regfun=rob.ridge,Regfun=regfun,xout=xout, -alpha=alpha,SEED=SEED,MSF=MSF)$regci -pv=a[2:p1,5] -padj=min(p.adjust(pv,method=method)) -id=NULL -if(padj<=alpha)id=which(pv==min(pv)) -list(p.value=padj,a.sig.slope=id) -} - -ridge.test<-function(x,y,k=NULL,alpha=.05,pr=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,method='hoch', -locfun=mean,scat=var,MSF=TRUE,...){ -# -# -# Using a ridge estimator -# test the hypothesis of a zero slope for each of p independent variables. -# If the smallest p-value is less than or equal to alpha, reject the corresponding hypothesis -# as well as the hypothesis that all slopes are zero. -# But no other slopes can be declared significant due to the bias associated with the -# ridge estimator. -# -# The method uses an analog of the heteroscedastic method -# recommended by Long and Ervin (2000). -# p-values are adjusted to control the probability of one or more Type I errors;' -# Hochberg's method is used by default. -# -# Advantage: -# Power tends to be at least as high as OLS and potentially much higher -# But when the null hypothsis is false, confidence intervals can be highly -# inaccurate. -# -# -# STAND=TRUE: x is standardized and y is centered, based on measues of location -# and scatter indicatd by -# locfun and scat -# locfun=median would use the median and scat=madsq uses MAD. -# For n<=40, this helps control the Type I error probability. -# -x<-as.matrix(x) -if(nrow(x) != length(y))stop('Length of y does not match number of x values') -m<-cbind(x,y) -m<-elimna(m) -p=ncol(x) -if(p==1)stop('There should be at least two independent variables') -p1=p+1 -x=m[,1:p] -y=m[,p1] -n=nrow(x) -df=n-p1 -if(xout){ -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else - flag<-outfun(x,plotit=FALSE)$keep -x<-x[flag,] -x<-as.matrix(x) -y<-y[flag] -n=nrow(x) -df=n-p1 -} -if(STAND){ -x=standm(x,locfun=locfun,scat=scat) -y=y-locfun(y) -} -if(is.null(k)){ -if(!MSF)k=ridge.est.k(x,y) -else{ -ires=ols(x,y)$residuals -sigh=sqrt(sum(ires^2)/(n-p-1)) -k=p^(1+1/p)*sigh -} -} -x1=cbind(rep(1,n),x) -ols.est=ols(x,y)$coef -est=ols.ridge(x,y,k=k)$coef -x<-cbind(rep(1,nrow(x)),x[,1:ncol(x)]) -res<-y-x1%*%est -p=ncol(x) -kmat=matrix(0,p,p) -diag(kmat)=k -xtx<-solve(t(x)%*%x+kmat) -h<-diag(x%*%xtx%*%t(x)) -hc3<-xtx%*%t(x)%*%diag(as.vector(res^2/(1-h)^2))%*%x%*%xtx -df<-nrow(x)-ncol(x) -crit<-qt(1-alpha/2,df) -al<-ncol(x) -ci<-matrix(NA,nrow=p,ncol=6) -se=sqrt(diag(hc3)) -p=p-1 -for(j in 2:p1){ -ci[j,1]=se[j] -ci[j,2]=est[j]/sqrt(hc3[j,j]) -ci[j,3]=2*(1-pt(abs(ci[j,2]),df)) -ci[j,5]=est[j] -ci[j,6]=ols.est[j] -} -ci[,4]=p.adjust(ci[,3],method=method) -sig='No slope is signficant' -if(sum(ci[2:al,4]<=alpha)>0){ -id=which(ci[2:al,4]==min(ci[2:al,4])) -sig=paste('Slope',id, 'is signficant') -} -ci=ci[2:p1,] # Eliminate results related to the intercept, not relevant -vlabs=NA -for(j in 1:p)vlabs[j]=paste('Slope',j) -dimnames(ci)=list(vlabs,c('s.e.','test.stat','p-value','Adjusted p','Ridge.Est.','OLS.est')) -list(output=ci,Sig.slope=sig) -} - -lasso.est<-function(x,y,xout=FALSE,STAND=TRUE,outfun=outpro,regout=FALSE,lam=NULL,...){ -# -# Lasso regression via the R package glmnet. -# This function includes the option of eliminating leverage points -# This function is for convenience, it returns the estimates of the -# coefficients only. The R function cv.glmnet provides more complete details -# and includes other options. -# -# xout=TRUE eliminate leverage points with the function -# outfun -# regout=TRUE eliminate regression outliers with the function elo -# -library(glmnet) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -nrem=length(y) -if(regout){ -flag=elo(x,y,outfun=outfun,lev=TRUE,reg=xout)$keep -xy<-xy[flag,] -x<-xy[,1:p] -y<-xy[,p1] -xout=FALSE -} -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -xy<-xy[flag,] -x<-xy[,1:p] -y<-xy[,p1] -} -if(STAND)x=standm(x) -z=cv.glmnet(x, y, family='gaussian',lam=lam) -e=coef(z,s=z$lambda.min) -e=as.vector(e) -list(coef=e,lambda.min.used=z$lambda.min) -} - -elo<-function(x,y,lev=TRUE,reg=TRUE,outfun=outpro,plotit=FALSE,SEED=TRUE){ -# -# -# lev=TRUE, remove points flagged as leverage points -# reg=TRUE, remove points flagged as regression outliers -# So lev=TRUE and reg=TRUE removes both -# -# For regression outliers, the Rousseeuw and van Zomeren (1990) method is used. -# (See section 10.15.1 in Wilcox, 2017, Intro to Robust Estimation and -# Hypothsis Testing) -# -# outfun: the function used to check for leverage points. -# - -a=reglev(x,y,plotit=plotit,SEED=SEED) -o=outfun(x,plotit=plotit) -L=NULL -B=NULL -if(lev){ -if(length(o$out.id)>0)L=o$out.id -} -if(reg){ -if(length(a$regout)>0)B=a$regout -} -e=unique(c(L,B)) -n=length(y) -id=c(1:n) -keep=id[-e] -list(keep=keep,reg.out.id=L,leverage.id=B) -} - - -scorall<-function(x,outfun=outpro,corfun=pcor,RAN=FALSE,...){ -# -# Eliminate outliers and compute a correlation based on the -# remaining data. -# -x=elimna(x) -if(!RAN)flag=outpro(x)$keep -if(RAN)flag=outpro.depth(x)$keep -est=corfun(x[flag,],...)$cor -est -} - - - -splotg5<-function(x1,x2=NULL,x3=NULL,x4=NULL,x5= NULL,xlab="X",ylab="Rel. Freq."){ -# -# Frequency plot for up to five variables. -# -# -freqx2=NULL -freqx3=NULL -freqx4=NULL -freqx5=NULL -x1<-x1[!is.na(x1)] -x2<-x2[!is.na(x2)] -x3<-x3[!is.na(x3)] -x4<-x4[!is.na(x4)] -x5<-x5[!is.na(x5)] - -xall=c(x1,x2,x3,x4,x5) -xall=xall[!is.na(xall)] -temp=sort(unique(xall)) -XL=list(x1,x2,x3,x4,x5) -NN=0 -for(j in 1:5)if(!is.null(XL[[j]]))NN=NN+1 -freqx1<-NA -for(i in 1:length(temp)){ -freqx1[i]<-sum(x1==temp[i]) -} -freqx1<-freqx1/length(x1) -if(!is.null(x2)){ -freqx2<-NA -for(i in 1:length(temp)){ -freqx2[i]<-sum(x2==temp[i]) -} -freqx2<-freqx2/length(x2) -} -if(!is.null(x3)){ -freqx3<-NA -for(i in 1:length(temp)){ -freqx3[i]<-sum(x3==temp[i]) -} -freqx3<-freqx3/length(x3) -} -if(!is.null(x4)){ -x4<-x4[!is.na(x4)] -freqx4<-NA -for(i in 1:length(temp)){ -freqx4[i]<-sum(x4==temp[i]) -} -freqx4<-freqx4/length(x4) -} -if(!is.null(x5)){ -x5<-x5[!is.na(x5)] -freqx5<-NA -for(i in 1:length(temp)){ -freqx5[i]<-sum(x5==temp[i]) -} -freqx5<-freqx5/length(x5) -} -X=rep(temp,NN) -pts=c(freqx1,freqx2,freqx3,freqx4,freqx5) -plot(X,pts,type="n",xlab=xlab,ylab=ylab) -points(X,pts) -lines(temp,freqx1) -if(NN>=2)lines(temp,freqx2,lty=2) -if(NN>=3)lines(temp,freqx3,lty=3) -if(NN>=4)lines(temp,freqx4,lty=4) -if(NN>=5)lines(temp,freqx5,lty=5) -} - -freq5<-function(x1,x2=NULL,x3=NULL,x4=NULL,x5= NULL,xlab="X",ylab="Rel. Freq."){ -# -# Compute relative frequencies associated with the sample space for up to five variables. -# -# -temp2=NULL -temp3=NULL -temp4=NULL -temp5=NULL -freqx2=NULL -freqx3=NULL -freqx4=NULL -freqx5=NULL - -x1<-x1[!is.na(x1)] -temp1<-sort(unique(x1)) -freqx1<-NA -for(i in 1:length(temp1)){ -freqx1[i]<-sum(x1==temp1[i]) -} -freqx1<-freqx1/length(x1) -N=1 - -if(!is.null(x2)){ -N=2 -x2<-x2[!is.na(x2)] -temp2<-sort(unique(x2)) -freqx2<-NA -for(i in 1:length(temp2)){ -freqx2[i]<-sum(x2==temp2[i]) -} -freqx2<-freqx2/length(x2) -} - -if(!is.null(x3)){ -N=3 -x3<-x3[!is.na(x3)] -temp3<-sort(unique(x3)) -freqx3<-NA -for(i in 1:length(temp3)){ -freqx3[i]<-sum(x3==temp3[i]) -} -freqx3<-freqx3/length(x3) -} - -if(!is.null(x4)){ -N=4 -x4<-x4[!is.na(x4)] -temp4<-sort(unique(x4)) -freqx4<-NA -for(i in 1:length(temp4)){ -freqx4[i]<-sum(x4==temp4[i]) -} -freqx4<-freqx4/length(x4) -} - -if(!is.null(x5)){ -N=5 -x5<-x5[!is.na(x5)] -temp5<-sort(unique(x5)) -freqx5<-NA -for(i in 1:length(temp5)){ -freqx5[i]<-sum(x5==temp5[i]) -} -freqx5<-freqx5/length(x5) -} - -v=list() -v[[1]]=cbind(temp1,freqx1) -v[[2]]=cbind(temp2,freqx2) -v[[3]]=cbind(temp3,freqx3) -v[[4]]=cbind(temp4,freqx4) -v[[5]]=cbind(temp5,freqx5) -for(j in 1:N)dimnames(v[[j]])=list(NULL,c('Value','Rel. Freq')) -v -} - -splotg2=splotg5 -#s2plot=splotg5 #Used in earlier versions. - -trim2gmul<-function(x,y, tr = 0.2, alpha = 0.05){ -# -# For two independent p-variate distributions, apply yuen to each column of data -# FWE controlled with Hochberg's method -# -# x and y are matrices having p columns. (Can have list mode as well.) -# -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -if(!is.matrix(y))y<-matl(y) -if(!is.matrix(y))stop('Data must be stored in a matrix or in list mode.') - -J<-ncol(x) -if(J!=ncol(y))stop('x and y should have the same number of columns') - -xbar<-vector('numeric',J) -ncon<-J -dvec<-alpha/c(1:ncon) -psihat<-matrix(0,J,4) -dimnames(psihat)<-list(NULL,c('Variable','difference','ci.lower','ci.upper')) -test<-matrix(0,J,5) -dimnames(test)<-list(NULL,c('Variable','test','p.value','p.crit','se')) -temp1<-NA -nval=NULL -for (d in 1:J){ -psihat[d,1]<-d -#dval=na.omit(x[,d]) -#nval[d]=length(dval) -temp=yuen(x[,d],y[,d],tr=tr) -test[d,1]<-d -test[d,2]<-temp$teststat -test[d,3]=temp$p.value -test[d,5]<-temp$se -psihat[d,2]<-temp$dif -psihat[d,3]<-temp$ci[1] -psihat[d,4]<-temp$ci[2] -} -temp1=test[,3] -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -test[temp2,4]<-zvec -num.sig=sum(test[,3]<=test[,4]) -list(n=c(nrow(x),nrow(y)),test=test,psihat=psihat,num.sig=num.sig) -} - -loc2gmulpb<-function(x,y,est=tmean,nboot=2000,alpha = 0.05,SEED=TRUE,...){ -# -# For two independent p-variate distributions, apply yuen to each column of data -# FWE controlled with Hochberg's method -# -# x and y are matrices having p columns. (Can have list mode as well.) -# -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -if(!is.matrix(y))y<-matl(y) -if(!is.matrix(y))stop('Data must be stored in a matrix or in list mode.') - -J<-ncol(x) -if(J!=ncol(y))stop('x and y should have the same number of columns') - -xbar<-vector('numeric',J) -ncon<-J -dvec<-alpha/c(1:ncon) -psihat<-matrix(0,J,4) -dimnames(psihat)<-list(NULL,c('Variable','difference','ci.lower','ci.upper')) -test<-matrix(0,J,4) -dimnames(test)<-list(NULL,c('Variable','p.value','p.crit','se')) -temp1<-NA -nval=NULL -for (d in 1:J){ -psihat[d,1]<-d -#temp=yuen(x[,d],y[,d],tr=tr) -temp=pb2gen(x[,d],y[,d],est=est,SEED=SEED,...) -test[d,1]<-d -#test[d,2]<-temp$teststat -test[d,2]=temp$p.value -test[d,4]<-sqrt(temp$sq.se) -psihat[d,2]<-temp$est.dif -psihat[d,3]<-temp$ci[1] -psihat[d,4]<-temp$ci[2] -} -temp1=test[,2] -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -test[temp2,3]<-zvec -num.sig=sum(test[,2]<=test[,3]) -list(n=c(nrow(x),nrow(y)),test=test,psihat=psihat,num.sig=num.sig) -} - - -trimmulCI<-function(x, tr = 0.2, alpha = 0.05,null.value=0,nboot=2000,SEED=TRUE,MC=TRUE){ -# -# For J dependent random variables, apply trimci to each. -# Confidence intervals are designed to have simultaneous probability coverage 1-alpha -# Useful when the number of variables is large say >20 and n is very small <=20 -# -# x is a matrix having J columns. (Can have list mode as well.) -# -# Output: -# num.sig = number of significant results. -# -# -if(SEED)set.seed(2) -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J<-ncol(x) -xbar<-vector('numeric',J) -ncon<-J -psihat<-matrix(0,J,4) -dimnames(psihat)<-list(NULL,c('Variable','estimate','ci.lower','ci.upper')) -test<-matrix(0,J,4) -dimnames(test)<-list(NULL,c('Variable','test','p.value','se')) -# Determine critical p-value -p.crit=trimmul.crit(x,tr=tr,nboot=nboot,alpha=alpha,SEED=SEED,MC=MC) -# -for (d in 1:J){ -psihat[d,1]<-d -dval=na.omit(x[,d]) -nval[d]=length(dval) -temp=trimci(dval,tr=tr,pr=FALSE,null.value=null.value,alpha=p.crit) -test[d,1]<-d -test[d,2]<-temp$test.stat -test[d,3]=temp$p.value -test[d,4]<-temp$se -psihat[d,2]<-temp$estimate -psihat[d,3]<-temp$ci[1] -psihat[d,4]<-temp$ci[2] -} -CI.sig=sum(psihat[,3]>null.value)+sum(psihat[,4]nmax){ -nmin=min(c(nmin,100)) -} -B=list() -M=matrix(NA,nrow=nmin,ncol=J) -for(i in 1:nreps){ -for(j in 1:J)M[,j]=sample(x[[j]],nmin) -B[[i]]=M -} -L=lapply(B,linWMWMC.sub,con=con) -ef.size=NA -for(j in 1:length(L))ef.size[j]=linES.sub(L[[j]],locfun=locfun,...) -ef=mean(ef.size) -} -if(POOL){ -y=list() -id1=which(con==1) -id2=which(con==-1) -v1=pool.a.list(x[id1]) -v2=pool.a.list(x[id2]) -if(length(v1)*length(v2)=nmax){ -B=list() -M=matrix(NA,nrow=nmin,ncol=J) -for(i in 1:nreps){ -for(j in 1:J)M[,j]=sample(x[[j]],nmin) -B[[i]]=M -} -L=lapply(B,linWMWMC.sub,con=con) -ef.size=NA -for(j in 1:length(L))ef.size[j]=linES.sub(L[[j]],locfun=locfun,...) -ef=mean(ef.size) -}} -list(Effect.Size=ef) -} - -linES.sub<-function(L,locfun,...){ -est=locfun(L,...) -if(est>=0)ef.size=mean(L-est<=est) -ef.size -} - - -rmlinES<-function(x, con = NULL){ -# -# Dependent groups: -# For each linear contrast, compute Algina et al. effect size based on the linear sum -# -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J=ncol(x) -if(is.null(con)){ -C=(J^2-J)/2 -con=matrix(0,ncol=C,nrow=J) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(hoch)dvec<-alpha/c(1:ncon) -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(sum(con^2)==0){ -flagcon<-TRUE -psihat<-matrix(0,CC,6) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper','Q.effect')) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c('Group','Group','test','p.value','p.crit','se')) -temp1<-0 -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -q1<-(nrow(x)-1)*winvar(x[,j],tr) -q2<-(nrow(x)-1)*winvar(x[,k],tr) -q3<-(nrow(x)-1)*wincor(x[,j],x[,k],tr)$cov -sejk<-sqrt((q1+q2-2*q3)/(h1*(h1-1))) -if(!dif){ -test[jcom,6]<-sejk -test[jcom,3]<-(xbar[j]-xbar[k])/sejk -temp1[jcom]<-2 * (1 - pt(abs(test[jcom,3]), df)) -test[jcom,4]<-temp1[jcom] -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -} -if(dif){ -dv<-x[,j]-x[,k] -test[jcom,6]<-trimse(dv,tr) -temp<-trimci(dv,alpha=alpha/CC,pr=FALSE,tr=tr) -test[jcom,3]<-temp$test.stat -temp1[jcom]<-temp$p.value -test[jcom,4]<-temp1[jcom] -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-mean(dv,tr=tr) -psihat[jcom,4]<-temp$ci[1] -psihat[jcom,5]<-temp$ci[2] -psihat[jcom,6]=depQS(x[,j],x[,k],locfun=locfun,...)$Q.effect -} -}}} -if(hoch)dvec<-alpha/c(1:ncon) -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2,4]>=zvec) -if(sum(sigvec)0){ -if(nrow(con)!=ncol(x))warning('The number of groups does not match the number - of contrast coefficients.') -ncon<-ncol(con) -psihat<-matrix(0,ncol(con),5) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper','Q.effect')) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c('con.num','test','p.value','p.crit','se')) -temp1<-NA -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,5]=lindQS(x,con[,d],locfun=locfun,...)$Q.effect -if(!dif){ -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-0 -for(j in 1:J){ -for(k in 1:J){ -djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1)) -sejk<-sejk+con[j,d]*con[k,d]*djk -}} -sejk<-sqrt(sejk) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -test[d,5]<-sejk -temp1[d]<-2 * (1 - pt(abs(test[d,2]), df)) -} -if(dif){ -for(j in 1:J){ -if(j==1)dval<-con[j,d]*x[,j] -if(j>1)dval<-dval+con[j,d]*x[,j] -} -temp1[d]<-trimci(dval,tr=tr,pr=FALSE)$p.value -test[d,1]<-d -test[d,2]<-trimci(dval,tr=tr,pr=FALSE)$test.stat -test[d,5]<-trimse(dval,tr=tr) -psihat[d,2]<-mean(dval,tr=tr) -}} -test[,3]<-temp1 -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2,3]>=zvec) -if(sum(sigvec) 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(sum(con^2)==0){ -flagcon<-TRUE -psihat<-matrix(0,CC,7) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper','p.value','adj.p.value')) -temp1<-0 -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -dv<-x[,j]-x[,k] -temp=sintv2(dv,pr=FALSE) -temp1[jcom]<-temp$p.value -psihat[jcom,1]<-j -psihat[jcom,2]<-k -psihat[jcom,3]<-median(dv) -psihat[jcom,4]<-temp$ci.low -psihat[jcom,5]<-temp$ci.up -psihat[jcom,6]<-temp$p.value -}}} -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(psihat[temp2,6]>=zvec) -dd=0 -if(sum(sigvec)0){ -if(nrow(con)!=ncol(x))warning('The number of groups does not match the number - of contrast coefficients.') -ncon<-ncol(con) -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper','p.value','adj.p.value')) -temp1<-NA -for (d in 1:ncol(con)){ -psihat[d,1]<-d -for(j in 1:J){ -if(j==1)dval<-con[j,d]*x[,j] -if(j>1)dval<-dval+con[j,d]*x[,j] -} -temp=sintv2(dval,pr=FALSE) -temp1[d]=temp$p.value -psihat[d,5]=temp$p.value -psihat[d,2]<-median(dval) -psihat[d,3]<-temp$ci.low -psihat[d,4]<-temp$ci.up -} -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(psihat[temp2,5]>=zvec) -psihat[temp2,6]<-zvec -dd=0 -if(sum(sigvec)n/2)stop('n.id should be less than n/2') -if(n.id<=0)stop('n.id should be greater than zero') -d=ics.distances(v) -dr=rank(d) -idout=n+1-n.id -id=which(dr>=idout) -j=c(1:n) -} -list(n=n,out.id=id,keep=j[-id]) -} - -ridge.Gtest<-function(x,y,k=NULL,regfun=tsreg,xout=FALSE,outfun=outpro,STAND=FALSE,PV=FALSE,iter=5000, -locfun=mean,scat=var,MC=FALSE,MSF=TRUE,...){ -# -# Test the hypothesis that all slope parameters are zero. -# using a robust analog of a ridge estimator. -# -# -# PV=TRUE: computes a p-value at the expense of higher execution time. -# Otherwise, simply test at the 0.05 level using an approximate critical value. -# -# -# STAND=TRUE: x is standardized and y is centered, based on measures of location -# and scatter indicated by -# locfun and scat -# locfun=median would use the median and scat=madsq uses MAD. -# -x<-as.matrix(x) -if(nrow(x) != length(y))stop("Length of y does not match number of x values") -m<-cbind(x,y) -m<-elimna(m) -p=ncol(x) -if(p==1)stop('There should be at least two independent variables') -p1=p+1 -x=m[,1:p] -y=m[,p1] -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -x<-x[flag,] -x<-as.matrix(x) -y<-y[flag] -} -n=nrow(x) -if(STAND){ -if(!PV)print('Suggest using PV=TRUE when STAND=TRUE') -x=standm(x,locfun=locfun,scat=scat) -y=y-locfun(y) -} -if(is.null(k)){ -if(!MSF)k=ridge.est.k(x,y) -else{ -ires=ols(x,y)$residuals -sigh=sqrt(sum(ires^2)/(n-p-1)) -k=p^(1+1/p)*sigh -}} -est=rob.ridge(x,y,k=k,Regfun=regfun,MSF=MSF)$coef -est=as.matrix(est) -x<-cbind(rep(1,nrow(x)),x[,1:ncol(x)]) -res<-y-x%*%est -p=ncol(x) -kmat=matrix(0,p,p) -diag(kmat)=k -xtx<-solve(t(x)%*%x+kmat) -h<-diag(x%*%xtx%*%t(x)) -hc3<-xtx%*%t(x)%*%diag(as.vector(res^2/(1-h)^2))%*%x%*%xtx -slopes=as.matrix(est[2:p1]) -Ssq=hc3[2:p1,2:p1] -f.test=t(slopes)%*%solve(Ssq)%*%slopes -f.test=(n-p)*f.test/((n-1)*p) -crit.val=NULL -if(!PV){ -if(n<20)crit.val=2.56 -if(n>500)crit.val=2.24 -if(is.null(crit.val)){ -nx=c(20,30,40, -50,75,100, -200,500) -ny=c(2.56,2.51,2.40, -2.41,2.34,2.30, - 2.29,2.24) -options(warn=-1) -if(n<=500)crit.f=lplot.pred(1/nx,ny,1/n)$yhat -options(warn=0) -}} -f.test=as.vector(f.test) -pv=NULL -if(PV){ -crit.f=NULL -if(!MC)v=ridgeGnull(n,p,regfun=regfun,iter=iter) -if(MC)v=ridgeGnullMC(n,p,regfun=regfun,iter=iter) -pv=mean(v>=f.test) -} -list(n=n,Ridge.est=est,F.test=f.test,critical.05.value=crit.f,p.value=pv) -} - - -ridgeGnullMC<-function(n,p,regfun=MMreg,iter=5000,SEED=TRUE){ -# -# Determine null distribution of ridge.Gtest -# -if(SEED)set.seed(45) -fv=NA -p1=p+1 -a=list() -library(parallel) -for(i in 1:iter)a[[i]]=rmul(n,p1) -fv=mclapply(a,ridgeGnullMC.sub,p=p,regfun=regfun) -fv=matl(fv) -fv=as.vector(fv) -fv -} -ridgeGnullMC.sub<-function(x,p,regfun=regfun){ -p1=p+1 -v=ridgeG.sub(x[,1:p],x[,p1],regfun=regfun)$F.test -v -} - - -dlinplot<-function(x,con,xlab='DV',ylab='',sym.test=FALSE){ -# -# For dependent variables, -# determine distribution of Y_i=sum_j c_jX_j -# and then plot the distribution -# -# The function also tests the hypothesis that Y has a median of zero. -# sym.test=TRUE: test the hypothesis that Y is symmetric. -# -# A quantile shift measure of effect size is returned as well. -# -if(is.matrix(con)){ -if(ncol(con>1))print('Warning: Argument con should be a vector. Only the first contrast coefficients are used.') -} -con=as.vector(con) -if(sum(con)!=0)stop('Contrast coefficients must sum to zero') -if(is.data.frame(x))x=as.matrix(x) -if(is.list(x))x=matl(x) -x=elimna(x) -n=nrow(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -if(length(con)!=ncol(x))stop('Length of con should equal number of groups') -x=elimna(x) -L=NA -linv=NA -for(i in 1:n){ -L[i]=sum(con*x[i,]) -} -akerd(L,xlab=xlab,ylab=ylab) -mt=sintv2(L) -sym=NULL -Q=depQS(L) -if(sym.test)sym=Dqdif(L) -list(median=mt$median,n=mt$n,ci.low=mt$ci.low,ci.up=mt$ci.up, -p.value=mt$p.value,Q.effect=Q$Q.effect,sym.test=sym) -} - - dlin.sign<-function(x,con){ - # For dependent variables, -# determine distribution of Y_i=sum_j c_jX_{ij} -# and then do a sign test -con=as.vector(con) -if(sum(con)!=0)stop('Contrast coefficients must sum to zero') -if(is.data.frame(x))x=as.matrix(x) -if(is.list(x))x=matl(x) -x=elimna(x) -n=nrow(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -if(length(con)!=ncol(x))stop('Length of con should equal number of groups') -x=elimna(x) -L=NA -linv=NA -for(i in 1:n){ -L[i]=sum(con*x[i,]) -} -a=signt(dif=L) -list(Prob_a_value_is_less_than_zerro=a$Prob_x_less_than_y,ci=a$ci,n=a$n,N=a$N,p.value=a$p.value) - } - - -wwwmcppbtr<-function(J,K,L, x,tr=.2,alpha=.05,dif=TRUE,op=FALSE,grp=NA,nboot=2000,SEED=TRUE,pr=TRUE){ -# -# Based on a percentile bootstrap method. -# -# dif=TRUE: use a linear combination of the variables, test the hypothesis that the trimmed mean is zero -# dif=FALSE: Use the marginal trimmed means instead. -# -# MULTIPLE COMPARISONS FOR A 3-WAY within-by-within-by within ANOVA -# Do all multiple comparisons associated with -# main effects for Factor A and B and C and all interactions -# based on trimmed means -# - # The data are assumed to be stored in x in list mode or in a matrix. - # If grp is unspecified, it is assumed x[[1]] contains the data - # for the first level of both factors: level 1,1. - # x[[2]] is assumed to contain the data for level 1 of the - # first factor and level 2 of the second factor: level 1,2 - # x[[j+1]] is the data for level 2,1, etc. - # If the data are in wrong order, grp can be used to rearrange the - # groups. For example, for a two by two design, grp<-c(2,4,3,1) - # indicates that the second group corresponds to level 1,1; - # group 4 corresponds to level 1,2; group 3 is level 2,1; - # and group 1 is level 2,2. - # - # Missing values are automatically removed. - # -if(is.data.frame(x))x=as.matrix(x) - JKL <- J*K*L - if(is.matrix(x)) - x <- listm(x) - if(!is.na(grp[1])) { - yy <- x - x<-list() - for(j in 1:length(grp)) - x[[j]] <- yy[[grp[j]]] - } - if(!is.list(x)) - stop("Data must be stored in list mode or a matrix.") - for(j in 1:JKL) { - xx <- x[[j]] - x[[j]] <- xx[!is.na(xx)] # Remove missing values - } - # - - if(JKL != length(x)) - warning("The number of groups does not match the number of contrast coefficients.") -for(j in 1:JKL){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -x[[j]]<-temp -} - # Create the three contrast matrices -temp<-con3way(J,K,L) -conA<-temp$conA -conB<-temp$conB -conC<-temp$conC -conAB<-temp$conAB -conAC<-temp$conAC -conBC<-temp$conBC -conABC<-temp$conABC -Factor.A<-rmmcppb(x,con=conA,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=pr) -Factor.B<-rmmcppb(x,con=conB,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) -Factor.C<-rmmcppb(x,con=conC,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) -Factor.AB<-rmmcppb(x,con=conAB,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) -Factor.AC<-rmmcppb(x,con=conAC,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) -Factor.BC<-rmmcppb(x,con=conBC,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) -Factor.ABC<-rmmcppb(x,con=conABC,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) -list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, -Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, -Factor.ABC=Factor.ABC,conA=conA,conB=conB,conC=conC, -conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC) -} - - - - -lsa.linear<-function(x,y){ - require(lars) - - ## Least square approximation. This version Oct 19, 2006 - ## Reference Wang, H. and Leng, C. (2006) and Efron et al. (2004). - ## - ## Written by Chenlei Leng - ## - ## Input - ## obj: lm/glm/coxph or other object - ## - ## Output - ## beta.ols: the MLE estimate - ## beta.bic: the LSA-BIC estimate - ## beta.aic: the LSA-AIC estimate - - lsa <- function(obj) - { - intercept <- attr(obj$terms,'intercept') - if(class(obj)[1]=='coxph') intercept <- 0 - n <- length(obj$residuals) - Sigma <- vcov(obj) - SI <- solve(Sigma) - - beta.ols <- coef(obj) - - l.fit <- lars.lsa(SI, beta.ols, intercept, n) - - t1 <- sort(l.fit$BIC, ind=T) - - t2 <- sort(l.fit$AIC, ind=T) - - beta <- l.fit$beta - - if(intercept) { - beta0 <- l.fit$beta0+beta.ols[1] - beta.bic <- c(beta0[t1$ix[1]],beta[t1$ix[1],]) - beta.aic <- c(beta0[t2$ix[1]],beta[t2$ix[1],]) - } - - else { - beta0 <- l.fit$beta0 - beta.bic <- beta[t1$ix[1],] - beta.aic <- beta[t2$ix[1],] - } - - - - obj <- list(beta.ols=beta.ols, beta.bic=beta.bic, - beta.aic = beta.aic) - obj - } - - ################################### - ## lars variant for LSA - lars.lsa <- function (Sigma0, b0, intercept, n, - type = c("lasso", "lar"), - eps = .Machine$double.eps,max.steps) - { - type <- match.arg(type) - TYPE <- switch(type, lasso = "LASSO", lar = "LAR") - - n1 <- dim(Sigma0)[1] - - ## handle intercept - if (intercept) { - a11 <- Sigma0[1,1] - a12 <- Sigma0[2:n1,1] - a22 <- Sigma0[2:n1,2:n1] - Sigma <- a22-outer(a12,a12)/a11 - b <- b0[2:n1] - beta0 <- crossprod(a12,b)/a11 - } - - else { - Sigma <- Sigma0 - b <- b0 - } - - Sigma <- diag(abs(b))%*%Sigma%*%diag(abs(b)) - b <- sign(b) - - nm <- dim(Sigma) - m <- nm[2] - im <- inactive <- seq(m) - - Cvec <- drop(t(b)%*%Sigma) - ssy <- sum(Cvec*b) - if (missing(max.steps)) - max.steps <- 8 * m - beta <- matrix(0, max.steps + 1, m) - Gamrat <- NULL - arc.length <- NULL - R2 <- 1 - RSS <- ssy - first.in <- integer(m) - active <- NULL - actions <- as.list(seq(max.steps)) - drops <- FALSE - Sign <- NULL - R <- NULL - k <- 0 - ignores <- NULL - - while ((k < max.steps) & (length(active) < m)) { - action <- NULL - k <- k + 1 - C <- Cvec[inactive] - Cmax <- max(abs(C)) - if (!any(drops)) { - new <- abs(C) >= Cmax - eps - C <- C[!new] - new <- inactive[new] - for (inew in new) { - R <- updateR(Sigma[inew, inew], R, drop(Sigma[inew, active]), - Gram = TRUE,eps=eps) - if(attr(R, "rank") == length(active)) { - ##singularity; back out - nR <- seq(length(active)) - R <- R[nR, nR, drop = FALSE] - attr(R, "rank") <- length(active) - ignores <- c(ignores, inew) - action <- c(action, - inew) - } - else { - if(first.in[inew] == 0) - first.in[inew] <- k - active <- c(active, inew) - Sign <- c(Sign, sign(Cvec[inew])) - action <- c(action, inew) - } - } - } - else action <- -dropid - Gi1 <- backsolve(R, backsolvet(R, Sign)) - dropouts <- NULL - A <- 1/sqrt(sum(Gi1 * Sign)) - w <- A * Gi1 - if (length(active) >= m) { - gamhat <- Cmax/A - } - else { - a <- drop(w %*% Sigma[active, -c(active,ignores), drop = FALSE]) - gam <- c((Cmax - C)/(A - a), (Cmax + C)/(A + a)) - gamhat <- min(gam[gam > eps], Cmax/A) - } - if (type == "lasso") { - dropid <- NULL - b1 <- beta[k, active] - z1 <- -b1/w - zmin <- min(z1[z1 > eps], gamhat) - # cat('zmin ',zmin, ' gamhat ',gamhat,'\n') - if (zmin < gamhat) { - gamhat <- zmin - drops <- z1 == zmin - } - else drops <- FALSE - } - beta[k + 1, ] <- beta[k, ] - beta[k + 1, active] <- beta[k + 1, active] + gamhat * w - - Cvec <- Cvec - gamhat * Sigma[, active, drop = FALSE] %*% w - Gamrat <- c(Gamrat, gamhat/(Cmax/A)) - - arc.length <- c(arc.length, gamhat) - if (type == "lasso" && any(drops)) { - dropid <- seq(drops)[drops] - for (id in rev(dropid)) { - R <- downdateR(R,id) - } - dropid <- active[drops] - beta[k + 1, dropid] <- 0 - active <- active[!drops] - Sign <- Sign[!drops] - } - - actions[[k]] <- action - inactive <- im[-c(active)] - } - beta <- beta[seq(k + 1), ] - - dff <- b-t(beta) - - RSS <- diag(t(dff)%*%Sigma%*%dff) - - if(intercept) - beta <- t(abs(b0[2:n1])*t(beta)) - else - beta <- t(abs(b0)*t(beta)) - - if (intercept) { - beta0 <- as.vector(beta0)-drop(t(a12)%*%t(beta))/a11 - } - else { - beta0 <- rep(0,k+1) - } - dof <- apply(abs(beta)>eps,1,sum) - BIC <- RSS+log(n)*dof - AIC <- RSS+2*dof - object <- list(AIC = AIC, BIC = BIC, - beta = beta, beta0 = beta0) - object - } - - ##This part is written by Hansheng Wang. - vcov.rq <- function(object,...) - { - q=object$tau - x=as.matrix(object$x) - resid=object$residuals - f0=density(resid,n=1,from=0,to=0)$y - COV=q*(1-q)*solve(t(x)%*%x)/f0^2 - COV - } - - # adaptive lasso for linear reg, tuning parameter by bic - # calls software from Wang and Leng (2007, JASA). - ok<-complete.cases(x,y) - x<-x[ok,] # get rid of na's - y<-y[ok] # since regsubsets can't handle na's - m<-ncol(x) - n<-nrow(x) - as.matrix(x)->x - lm(y~x)->out - lsa(out)->out.lsa - coeff<-out.lsa$beta.bic - coeff2<-coeff[2:(m+1)] # get rid of intercept - pred<-x%*%coeff2+coeff[1] - st<-sum(coeff2 !=0) # number nonzero - mse<-sum((y-pred)^2)/(n-st-1) - if(st>0) x.ind<-as.vector(which(coeff2 !=0)) else x.ind<-0 - return(list(fit=pred,st=st,mse=mse,x.ind=x.ind,coeff=coeff2, - intercept=coeff[1])) -} - -LADlasso.Z <- function(x , y , STAND = TRUE, grid = seq(log(0.01),log(1400),length.out=100),xout=FALSE,outfun=outpro,...){ -# -# Zheng, Q., Gallagher, C., and Kulasekera, K. (2016). Robust adaptive lasso for variable -# selection. Communications in Statistics - Theory and Methods, 46(9):4642-4659. -# -# The code used here is a slight modification of code supplied by Qi Zheng -# - ####X = matrix, list, or dataframe of predictor values - ####Y = vector or single-dimension matrix/list/dataframe of outcome values - ####grid = grid of lambda(tuning parameter) values to check - - library(lars) - library(quantreg) - - X=x - Y=y -p=ncol(X) -p1=p+1 -xy=elimna(cbind(X,Y)) -X=xy[,1:p] -Y=xy[,p1] - if(STAND)X=standm(X) - if(xout){ -X<-as.matrix(X) -flag<-outfun(X,plotit=FALSE,...)$keep -X<-X[flag,] -Y<-Y[flag] -X<-as.matrix(X) -n.keep=nrow(X) -} - object1=lsa.linear(X,Y); - adlasso=object1$coef; - n=length(Y); - grid=exp(grid); - rqob=rq(Y~0+X); - BIC=rep(0,100); - weights=1/abs(rqob$coef); - for ( i in 1:100){ - rqfit=rq.fit.lasso(X,Y,lambda=grid[i]*weights); - betalad_tmp=rqfit$coef; - betalad_tmp=betalad_tmp*(betalad_tmp>1e-8); - mse=mean(abs(rqfit$resi)); - mdsize=length(which(betalad_tmp!=0)); - BIC[i]=log(mse)+mdsize*log(n)/n; - } - step=which.min(BIC); - betalad=rq.fit.lasso(X,Y,lambda=grid[step]*weights)$coef; - ladlasso=betalad*(betalad>1e-8) - colnames(ladlasso) <- names(X) - # add an intercept for convenience - alpha<-median(Y-X%*%ladlasso) - coef<-c(alpha,ladlasso) -res<-Y-X%*%ladlasso-alpha - list(coef=coef,residuals=res) -} - -RA.lasso=LADlasso.Z - -H.lasso<- function(x,y,lambda.lasso.try=NULL,k=1.5, STAND=TRUE, xout=FALSE,outfun=outpro,...){ -# -# A slight modification of code supplied by Jung et al. (2016) -# -# -X=x -Y=y - -library(glmnet) -X=as.matrix(X) -p1<-ncol(X)+1 -p<-ncol(X) -xy<-cbind(X,Y) -xy<-elimna(xy) -X<-xy[,1:p] -Y<-xy[,p1] -if(STAND)X=standm(X) -if(is.null(lambda.lasso.try))lambda.lasso.try=seq(0.01,0.6,length.out=100) -library(glmnet) -if(xout){ -X<-as.matrix(X) -flag<-outfun(X,plotit=FALSE,...)$keep -X<-X[flag,] -Y<-Y[flag] -X<-as.matrix(X) -n.keep=nrow(X) -} -n<-length(Y) -Y.orgn<- Y -model.for.cv<- cv.glmnet(X, Y, family='gaussian',lambda=lambda.lasso.try) -lambda.lasso.opt<- model.for.cv$lambda.min -model.est<- glmnet(X,Y,family='gaussian',lambda=lambda.lasso.opt) -fit.lasso<- predict(model.est,X,s=lambda.lasso.opt) -res.lasso<- Y-fit.lasso -sigma.init<- mad(Y-fit.lasso) -beta.pre<- c(model.est$a0,as.numeric(model.est$beta)) -Y.old<- Y -tol = 10 -n.iter <- 0 -while(tol>1e-4 & n.iter<100) -{ -Y.new<- fit.lasso + winsorized(res.lasso,a=k, sigma=sigma.init) -model.for.cv<- cv.glmnet(X,Y.new, family='gaussian',lambda=lambda.lasso.try) -model.est<- glmnet(X,Y.new,family='gaussian',lambda=model.for.cv$lambda.min) -fit.lasso<- predict(model.est,X,s=model.for.cv$lambda.min) -res.lasso<- Y.new-fit.lasso -beta.post <- c(model.est$a0,as.numeric(model.est$beta)) -tol<- sum((beta.pre-beta.post)^2) -n.iter<- n.iter+1 -beta.pre<- beta.post -} -sigma.est<- mean((Y.new-cbind(rep(1,n),X)%*%beta.post)^2) -Y.fit<- cbind(rep(1,n),X)%*%beta.post -Y.res<- Y.new - Y.fit -#object<- list(coefficient=beta.post,fit=Y.fit, iter = n.iter, sigma.est = sigma.est, -list(coef=beta.post,fit=Y.fit, iter = n.iter, sigma.est = sigma.est, -lambda.lasso.opt = model.est$lambda, residuals = Y.res) -} - -winsorized<- function(x,a=1.5,sigma=1) { -s<-sigma -newx<-x -indp<-x>(a*s) -newx[indp]<-(a*s) -indn<- x<(a*-s) -newx[indn]<- (-a*s) -newx -} - -OS.lasso<- function(x,y,lambda.lasso.try=NULL,lambda.gamma.try=NULL,xout=FALSE,outfun=outpro,details=FALSE,...){ -# -# Outlier Shifting lasso -# Jung, Y., Lee, S., and Hu, J. (2016). Robust regression for highly corrupted response -# by shifting outliers. Statistical Modelling, 16(1):1--23. -# -# -X=x -Y=y -if(is.null(lambda.lasso.try))lambda.lasso.try=seq(0.01,0.6,length.out=100) -if(is.null(lambda.gamma.try))lambda.gamma.try = seq(1,4,length.out=50) -library(glmnet) -X<-as.matrix(X) -p1<-ncol(X)+1 -p<-ncol(X) -xy<-cbind(X,Y) -xy<-elimna(xy) -X<-xy[,1:p] -Y<-xy[,p1] -if(xout){ -X<-as.matrix(X) -flag<-outfun(X,plotit=FALSE,...)$keep -X<-X[flag,] -Y<-Y[flag] -X<-as.matrix(X) -n.keep=nrow(X) -} -x=X -y=Y -n<-length(Y) -Y.orgn<- Y -model.for.cv<- cv.glmnet(X, Y, family="gaussian",lambda=lambda.lasso.try) -lambda.lasso.opt<- model.for.cv$lambda.min -model.est<- glmnet(X,Y,family="gaussian",lambda=lambda.lasso.opt) -fit.lasso<- predict(model.est,X,s=lambda.lasso.opt) -res.lasso<- Y - fit.lasso -sigma.est<- mad(Y-fit.lasso) -beta.est<- as.numeric(model.est$beta) -gamma.est<-rep(0,n) -n.fold<- 5 -n.cv <- n/n.fold -CV.error2<-CV.error<-rep(NA,length(lambda.gamma.try)) -Y.pred.cv<-matrix(NA,nrow=length(Y),ncol=length(lambda.gamma.try)) -Y.new<- Y -for (tt in 1:length(lambda.gamma.try)) -{ -gamma.est.cv<-rep(0,n-n.cv) -for (jj in 1:n.fold) -{ -sample.out.index<- (1+n.cv*(jj-1)):(n.cv*(jj)) -X.train<- X[-sample.out.index,] -Y.train<- Y[-sample.out.index] -X.test<- X[sample.out.index,] -model.train.temp<- glmnet(X.train,Y.train,family="gaussian", -lambda=lambda.lasso.opt) -beta.pre<-beta.post<- c(model.train.temp$a0, -as.numeric(model.train.temp$beta)) -tol<-100; n.iter <- 0 -while(tol>1e-6 & n.iter<100) -{resid.temp<- Y.train-cbind(rep(1,n-n.cv),X.train)%*%beta.pre -nonzero<-which(abs(resid.temp)>=sigma.est*lambda.gamma.try[tt]) -gamma.est.cv[nonzero]<- resid.temp[nonzero] -Y.train.new <- Y.train - gamma.est.cv -model.train.temp<-glmnet(X.train,Y.train.new, -family="gaussian",lambda=lambda.lasso.opt) -beta.post <- c(model.train.temp$a0, -as.numeric(model.train.temp$beta)) -tol<- sum((beta.pre-beta.post)^2) -n.iter<- n.iter+1 -beta.pre<-beta.post -} -Y.pred.cv[sample.out.index,tt] <-cbind(rep(1,n.cv), -X.test)%*%beta.post -} -CV.error[tt]<- mean((Y.pred.cv[,tt]-Y.orgn)^2) -CV.error2[tt]<- mean(abs(Y.pred.cv[,tt]-Y.orgn)) -} -lambda.gamma.opt<- lambda.gamma.try[which.min(CV.error)] -model.opt<- glmnet(X,Y.orgn,family="gaussian",lambda=lambda.lasso.opt) -beta.pre<- beta.post<- c(model.opt$a0,as.numeric(model.opt$beta)) -tol<-100; n.iter <- 0 -while(tol>1e-6 & n.iter<100) -{ -resid.opt<- Y.orgn-cbind(rep(1,n),X)%*%beta.pre -nonzero<-which(abs(resid.opt)>=sigma.est*lambda.gamma.opt) -gamma.est[nonzero]<- resid.opt[nonzero] -Y.new <- Y.orgn - gamma.est -model.opt<- glmnet(X,Y.new,family="gaussian",lambda=lambda.lasso.opt) -beta.post <- c(model.opt$a0,as.numeric(model.opt$beta)) -tol<- mean((beta.pre-beta.post)^2) -n.iter<- n.iter+1 -beta.pre<-beta.post} -Y.fit<- cbind(rep(1,n),X)%*%beta.post -res=y-Y.fit -if(details) -list(coef=beta.post,fit=fit.lasso,iter = n.iter, -sigma.est = sigma.est,CV.error=CV.error, -n.outlier=length(which(gamma.est!=0)), -gamma.est=gamma.est,lambda.opt=lambda.gamma.opt) -else list(coef=beta.post,residuals=res) -} - -plot3D<-function(x,y,xlab='X1',ylab='X2',zlab='Y',theta=50,phi=25, -duplicate='error',pc='*',ticktype='simple',expand=.5){ -# -# A 3D plot: supplied for convenience -# -# Example: plot a regression surface -# x and y generated from regression model with no error term. -# -x=as.matrix(x) -if(ncol(x)!=2)stop('x should have two columns only') -library(akima) -fitr<-interp(x[,1],x[,2],y,duplicate=duplicate) -persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, -scale=scale,ticktype=ticktype) -} - -LAD.lasso<-function(x,y,lam=NULL,WARN=FALSE,xout=FALSE,outfun=outpro,STAND=TRUE){ -# -# LAD (weighted) lasso based on -# Wang et al. Journal of Business and Economic Statistics -# -x=as.matrix(x) -p=ncol(x) -p1=p+1 -xy=cbind(x,y) -xy=elimna(xy) -if(STAND)xy=standm(xy) -x=xy[,1:p] -y=xy[,p1] - -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -xy=cbind(x,y) -x=xy[flag,1:p] -y=xy[flag,p1] -} -n=nrow(x) -if(p==1)stop('Should have two or more independent variables') -library(quantreg) -if(!WARN)options(warn=-1) -temp<-rq(y~x) -init<-temp[1]$coefficients -if(is.null(lam))lam=log(n)/(n*abs(init[2:p1])) -yy=c(y,rep(0,p)) -M=diag(n*lam) -xx=rbind(x,M) -coef=rq(yy~xx)[1]$coefficients -if(!WARN)options(warn=0) -res=y-x%*%coef[2:p1]-coef[1] -rv=order(lam) -list(coef=coef,lambda=lam,slopes.rank=rv,residuals=res) -} - - -ESfun<-function(x,y,QSfun=median,method=c('EP','QS','QStr','AKP','WMW','KMS'),tr=.2,pr=TRUE,SEED=TRUE){ -type=match.arg(method) -switch(type, - EP=yuenv2(x,y,tr=tr,SEED=SEED)$Effect.Size, #Explanatory power - QS=shiftQS(x,y,locfun=QSfun)$Q.Effect, #Quantile shift based on the medians - QStr=yuenQS(x,y,tr=tr,pr=pr)$Q.Effect, #Based on trimmed means - AKP=akp.effect(x,y,tr=tr), #Robust analog of Cohen's d - WMW=pxly(x,y,SEED=SEED), # P(X 0)flag<-(flag==1) -idv<-c(1:n) -outid <- idv[flag] -keep<-idv[!flag] -n.out=length(outid) -list(n=n,n.out=n.out,out.id=outid,keep=keep) -} - - Rdepth<-function(x,y,z=NULL, ndir = NULL){ -# -# -# z: -# An m by p+1 matrix containing row wise the hyperplanes for which to compute -# the regression depth. The first column should contain the intercepts. -# If z is not specified, it is set equal to cbind(x,y). -# -# Required: mrfDepth - -# For convenience, the arguments correspond to conventions in WRS - -x=cbind(x,y) -library(mrfDepth) -a=rdepth(x,z=z,ndir=ndir) -a -} - - -multsm<-function(x,y,pts=x,fr=.5,xout=FALSE,outfun=outpro,plotit=TRUE,pr=TRUE, -xlab='X',ylab='Prob',ylab2='Y',zlab='Prob',ticktype='det',vplot=NULL,scale=TRUE, -L=TRUE,...){ -# -# -# A smoother for multinomial regression based on logSM -# -# Example: Assuming x is a vector, and possible values -# for y are 0,1 and 2. -# multsm(x,y,c(-1,0,1)) -# This would estimate -# P(Y=0|x=-1), P(Y=1|x=-1), P(Y=2|x=-1), P(Y=0|x=0), etc. -# -# Returns estimates of the probabilities associated with -# each possible value of y given a value for independent variable that is stored in pts -# -# vplot indicates the value of the dependent variable for which probabilities will be plotted. -# vplot=1 means that the first largest value will be used. -# By default, vplot=NULL meaning that all values of y will when there is a single independent variable. -# -# vplot=c(1,3) means that the first and third values will be used. If the first value is 5, say and the third is 8, -# plot P(y=5|pts) and P(Y=8|pts) -# For more than one independent variable, the first value in vplot is used only. If no value is specified, the smallest y value is used. -# -# scale =TRUE is the default and is relevant when plotting and there are two dependent variables. See the function lplot. -# -# -# L=TRUE: for p=2, use LOESS (lplot) to plot the regression surface; otherwise use a running interval smoother (rplot). -# -# VALUE: -# For each value in pts, returns the probabilities for each of the y values. -# -if(pr){ -if(!xout)print('Suggest also looking at result using xout=TRUE') -} -xy=cbind(x,y) -xy=elimna(xy) -p1=ncol(xy) -p=p1-1 -if(p==1)pts=sort(pts) -x=xy[,1:p] -y=xy[,p1] -x=as.matrix(x) -if(xout){ -flag=outfun(x,plotit=FALSE,...)$keep -x=x[flag,] -y=y[flag] -} -x=standm(x,est=median,scar=madsq) -rempts=pts -pts=standm(pts,est=median,scar=madsq) -n=length(y) -temp<-sort(unique(y)) -nv=length(temp) -nv1=nv+1 -if(p==1){ -x=as.matrix(x) -pts=sort(pts) -} -pts=as.matrix(pts) -res=matrix(NA,nrow=nrow(pts),ncol=nv) -lab=NULL -for(k in 1:nv){ -est=logSMpred(x,y==temp[k],pts=pts) -res[,k]=est -lab=c(lab,paste('Y value',temp[k])) -} -dimnames(res)=list(NULL,lab) -if(plotit){ -if(is.null(vplot))vplot=c(1:nv) -#vplot=vplot+1 # adjustment to match col of res -if(p==1){ -nlines=min(ncol(res),6) -nlines=nlines-1 -plot(c(rep(rempts,length(vplot))),c(as.vector(res[,vplot])),type='n',xlab=xlab,ylab=ylab,ylim=c(0,1)) -for(k in 1:length(vplot))lines(rempts,res[,vplot[k]],lty=k) -} -if(p>1){ -if(p==2){ -if(L)lplot(rempts,res[,vplot[1]],xlab=xlab,ylab=ylab2,zlab=zlab,ticktype=ticktype,scale=scale,pr=FALSE) -if(!L)rplot(rempts,res[,vplot[1]],xlab=xlab,ylab=ylab2,zlab=zlab,ticktype=ticktype,scale=scale,pr=FALSE) -} -}} -list(estimates=res,pts=pts) -} - - - -multireg.prob<-function(x,y,pts=x,xout=FALSE,outfun=outpro,plotit=TRUE,xlab='X',ylab='Prob',zlab='Prob',ticktype='det',vplot=NULL, -L=TRUE,scale=TRUE,...){ -# -# -# Returns estimate of P(Y=k|X=pts) -# for all possible values of k and all points stored in pts. -# using a multinomial logit model -# -# Requires R package nnet -# -# scale =TRUE is the default: -# if there is only p=1 independent variable, the y-axis of the plot of the regression line will range between 0 and 1. -# This can provide a useful perspective, particularly when there is no association. -# if scale=TRUE, the y-axis is limited to the range of estimated probabilities. -# -library(nnet) -xy=cbind(x,y) -xy=elimna(xy) -p1=ncol(xy) -p=p1-1 -x=xy[,1:p] -y=xy[,p1] -if(p==1){ -pts=sort(pts) -} -x=as.matrix(x) -if(xout){ -flag=outfun(x,plotit=FALSE,...)$keep -x=x[flag,] -y=y[flag] -} -pts=as.matrix(pts) -npts=nrow(pts) -est=summary(multinom(y~x))$coefficients -x=as.matrix(x) -nv=length(unique(y)) -nvm1=nv-1 -w=NA -pr=NA -if(is.null(dim(est)))est=matrix(est,nrow=1) -ans=matrix(NA,nrow=npts,ncol=nvm1) -for(k in 1:nrow(pts)){ -for(j in 1:nvm1){ -w[j]=exp(est[j,1]+sum(est[j,2:p1]*pts[k,])) -} -bot=1+sum(w) -ans[k,]=w/bot -} -v0=1-apply(ans,1,sum) -ptn=c(1:nrow(pts)) -res=cbind(ptn,v0,ans) -temp=sort(unique(y)) -dimnames(res)=list(NULL,c('pts.no',temp)) -if(plotit){ -if(is.null(vplot))vplot=max(y) -vplot=vplot+1 # adjustment to match col of res -if(p==1){ -nlines=min(ncol(res),6) -nlines=nlines-1 -if(scale)plot(c(pts[1:2],rep(pts,length(vplot))),c(0,1,as.vector(res[,vplot])),type='n',xlab=xlab,ylab=ylab) -if(!scale)plot(rep(pts,length(vplot)),as.vector(res[,vplot]),type='n',xlab=xlab,ylab=ylab) -for(k in 1:length(vplot))lines(pts,res[,vplot[k]],lty=k) -} -if(p>1){ -if(ylab=='Prob')ylab='Y' -if(p==2){ -if(L)lplot(pts,res[,vplot[1]],xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,scale=scale,pr=FALSE) -if(!L)rplot(pts,res[,vplot[1]],xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,scale=scale,pr=FALSE) -} -} -} -list(estimates=res,pts=pts) -} - - - - - - - -logIVcom<-function(x,y,IV1=1,IV2=2,nboot=500,xout=FALSE,outfun=outpro,SEED=TRUE, -val=NULL,...){ -# -# For binary dependent variables. Assumes the logistic regression model is true. -# -# compare strength of the association for two subsets of independent variables -# IV1 and IV2 indicate the two sets of independent variables to be compared -# Example: IV1=c(1,2), IV2=1 would compare the first two independent -# variables to the third. -# -# If y is not binary, it has K possible values -# val: determines the value that will be used. -# Example: sort(unique(y))= 2, 4, 8 -# val=2, the function focuses on P(Y=4|X), the second possible -# value in y -# Default is to use the largest value, 8 in the example. -# -options(warn=-1) -library(parallel) -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -if(max(c(IV1,IV2))>p)stop('IV1 or IV2 has a value that exceeds the number of col. in x') -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -nrem=length(y) -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -nkeep=length(y) -temp<-sort(unique(y)) -nv=length(temp) -if(is.null(val))idv=nv -y=y==temp[nv] -y=as.numeric(y) -est1=sd(logreg.pred(x[,IV1],y,x[,IV1])) -est2=sd(logreg.pred(x[,IV2],y,x[,IV2])) -nv=length(y) -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. - -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) - -bvec1<-mclapply(data,regIVbinv2_sub,x[,IV1],y,x[,IV1]) -bvec2<-mclapply(data,regIVbinv2_sub,x[,IV2],y,x[,IV2]) -bvec1=as.vector(matl(bvec1)) -bvec2=as.vector(matl(bvec2)) -# bvec1 and bvec2 are nboot standard deviations based on bootstrap predict prob(Y|X) -pv1=mean(bvec1=nmin]) -UP<-max(sub[vecn>=nmin]) -pts=seq(x1[LOW],x1[UP],length.out=npts) -} -ES=NA -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -ES[i]=ESfun(g1,g2,method=method,pr=FALSE) -} -res=cbind(pts,ES) -dimnames(res)=list(NULL,c('pts','ES')) -if(plotit)lplot(res[,1],res[,2],xlab=xlab,ylab='ES') -} -if(p>1){ -if(SEED)set.seed(2) # now cov.mve always returns same result -ES=NA -x1=as.matrix(x1) -p=ncol(x1) -p1=p+1 -m1=elimna(cbind(x1,y1)) -x1=m1[,1:p] -y1=m1[,p1] -x2=as.matrix(x2) -p=ncol(x2) -p1=p+1 -m2=elimna(cbind(x2,y2)) -x2=m2[,1:p] -y2=m2[,p1] -# -if(is.na(pts[1])){ -x1<-as.matrix(x1) -pts<-ancdes(x1,FRAC=FRAC,DH=TRUE) -} -pts<-as.matrix(pts) -n1<-1 -n2<-1 -vecn<-1 -mval1<-cov.mve(x1) -mval2<-cov.mve(x2) -for(i in 1:nrow(pts)){ -n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)]) -n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)]) -} -flag<-rep(TRUE,nrow(pts)) -for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F -flag=as.logical(flag) -pts<-pts[flag,] -if(sum(flag)==1)pts<-t(as.matrix(pts)) -if(sum(flag)==0)stop('No comparable design points found, might increase span.') -for (i in 1:nrow(pts)){ -g1<-y1[near3d(x1,pts[i,],fr1,mval1)] -g2<-y2[near3d(x2,pts[i,],fr2,mval2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -ES[i]=ESfun(g1,g2,method=method,tr=tr,pr=FALSE) -} -if(p==2){ -if(plotit) lplot(pts,ES,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -} -res=cbind(pts,ES) -vlabs=NA -for(j in 1:ncol(pts))vlabs[j]=paste('X',j) -dimnames(res)=list(NULL,c(vlabs,'ES')) -} -res -} - - -sband<-function(x,y,plotit=TRUE,CI=TRUE,alpha=.05,crit=NULL, -sm=TRUE,op=1,xlab='First Group',ylab='Est. 2 - Est. 1'){ -# -# Compute a confidence band for the shift function. -# Assuming two independent groups are being compared -# -# The default critical value is the approximate .05 critical value. -# -# If flag=TRUE, the exact simultaneous probability coverage isomputed -# based on the critical value indicated the the argument -# crit. The default value yields, approximately, a .95 confidence band. -# -# If plotit=TRUE, a plot of the shift function is created, assuming that -# the graphics window has already been activated. -# -# This function removes all missing observations. -# -# When plotting, the median of x is marked with a + and the two -# quaratiles are marked with o. -# -# sm=TRUE, shift function is smoothed using: -# op!=1, running interval smoother, -# otherwise use lowess. -# -# Note: which group is the reference group matters. -# sband(x,y) often gives different results than sband(y,x). -# -x<-x[!is.na(x)] # Remove missing values from x. -y<-y[!is.na(y)] # Remove missing values from y. -n1=length(x) -n2=length(y) -if(is.null(crit))crit=ks.crit(n1=n1,n2=n2,alpha=alpha) -plotit<-as.logical(plotit) -pc<-NA -pc<-1-kssig(length(x),length(y),crit) -chk=sum(duplicated(x,y)) -if(chk>0){ -crit=ksties.crit(x,y,alpha) -pc=1-kstiesig(x,y,crit) -} -xsort<-sort(x) -ysort<-c(NA,sort(y)) -l<-0 -u<-0 -ysort[length(y)+1+1]<-NA -for(ivec in 1:length(x)) -{ -isub<-max(0,ceiling(length(y)*(ivec/length(x)-crit))) -l[ivec]<-ysort[isub+1]-xsort[ivec] -isub<-min(length(y)+1,floor(length(y)*(ivec/length(x)+crit))+1) -u[ivec]<-ysort[isub+1]-xsort[ivec] -} -id.sig.greater=NULL -id.sig.less.than=NULL -num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)]) -id.sig.greater=which(l>0) -id.sig.less.than=which(u<0) -qhat<-c(1:length(x))/length(x) -m<-matrix(c(qhat,l,u),length(x),3) -dimnames(m)<-list(NULL,c('qhat','lower','upper')) -if(plotit){ -xsort<-sort(x) -ysort<-sort(y) -del<-0 -for (i in 1:length(x)){ -ival<-round(length(y)*i/length(x)) -if(ival<=0)ival<-1 -if(ival>length(y))ival<-length(y) -del[i]<-ysort[ival]-xsort[i] -} -xaxis<-c(xsort,xsort) -yaxis<-c(m[,1],m[,2]) -allx<-c(xsort,xsort,xsort) -ally<-c(del,m[,2],m[,3]) -temp2<-m[,2] -temp2<-temp2[!is.na(temp2)] -plot(allx,ally,type='n',ylab=ylab,xlab=xlab) -ik<-rep(F,length(xsort)) -if(sm){ -if(op==1){ -ik<-duplicated(xsort) -del<-lowess(xsort,del)$y -} -if(op!=1)del<-runmean(xsort,del,pyhat=TRUE) -} -lines(xsort[!ik],del[!ik]) -lines(xsort,m[,2],lty=2) -lines(xsort,m[,3],lty=2) -temp<-summary(x) -text(temp[3],min(temp2),"+") -text(temp[2],min(temp2),"o") -text(temp[5],min(temp2),"o") -} -flag=is.na(m[,2]) -m[flag,2]=-Inf -flag=is.na(m[,3]) -m[flag,3]=Inf -q.greater=NULL -if(length(id.sig.greater)>0)q.greater=m[id.sig.greater,1] -q.less=NULL -if(length(id.sig.less.than)>0)q.less=m[id.sig.less.than,1] -if(!CI)m=NULL -list(m=m,crit=crit,numsig=num,q.sig.greater=q.greater,q.sig.less=q.less,prob.coverage=pc) -} - - -ancESband<-function(x1=NULL,y1=NULL,x2=NULL,y2=NULL,fr1=1,fr2=1,method='WMW', -pr=TRUE,FAST=TRUE,alpha=.05,plotit=TRUE,xlab='X',ylab='ES',npts=25, -xout=FALSE,outfun=out,nboot=500,SEED=TRUE, -nmin=12,SCAT=TRUE,pc='.',...){ -# -# Compute a measure of effect size for -# two independent groups when there is a single covariate. -# -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Confidence intervals are computed so that the simultaneous probability -# coverage is approximately .95 when npts=25 covariate points are used. -# -# Three methods can be used: -# -# 'AKP': trimmed-Winsorized analog of Cohen's d -# 'QS': quantile shift -# 'WMW': P(Y11)stop('One covariate only is allowed with this function') -if(method=='EP')stop('Using method EP not recommended at this time') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -xy1=elimna(cbind(x1,y1)) -xy2=elimna=cbind(x2,y2) -x1=xy1[,1] -y1=xy1[,2] -x2=xy2[,1] -y2=xy2[,2] -n1.in=nrow(xy1) -n2.in=nrow(xy2) - -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -xor1=order(x1) -xor2=order(x2) -x1=x1[xor1] -x2=x2[xor2] -y1=y1[xor1] -y2=y2[xor2] -n1n=length(y1) -n2n=length(y2) -nv=c(30, 50, 60, 70, 80, 100, -150, 200, 300, 400, 500, 600, 800) -pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, -0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, -.003372966, 0.003350022) -p.crit=(lplot.pred(1/nv,pv,1/n1n)$yhat+lplot.pred(1/nv,pv,1/n2n)$yhat)/2 -if(alpha!=.05){ -p.crit=p.crit*alpha/.05 # A crude adjustment -} -if(npts<=15)p.crit=alpha/npts -qmin=nboot*p.crit -bmin=ceiling(1/p.crit) -if(qmin<1){ -stop(paste('nboot must be at least ',bmin)) -} -EST=ancES(x1,y1,x2,y2,plotit=FALSE,npts=npts,method=method) -pts=EST[,1] -MAT=matrix(NA,nrow=nboot,ncol=length(pts)) -for(i in 1:nboot){ -id1=sample(n1n,n1n,replace=TRUE) -id2=sample(n2n,n2n,replace=TRUE) -B=ancES(x1[id1],y1[id1],x2[id2],y2[id2],plotit=FALSE,method=method,pts=pts,npts=npts,SEED=FALSE) -MAT[i,]=B[,2] -} - -flag1=MAT<.5 -flag2=MAT==.5 -pv1=apply(flag1,2,mean,na.rm=TRUE) -pv2=apply(flag2,2,mean,na.rm=TRUE) -pv=pv1+.5*pv2 -one.m.pv=1-pv -pv=2*apply(rbind(pv,one.m.pv),2,min) -ci.low=NA -ci.up=NA -qlow=p.crit/2 -qhi=1-p.crit/2 -ci.low=NA -ci.up=NA -for(i in 1:length(pts)){ -if(!is.na(pts[i]))ci.low[i]=qest(MAT[,i],qlow) -if(!is.na(pts[i]))ci.up[i]=qest(MAT[,i],qhi) -} -pvm=matrix(NA,nrow=length(pts),ncol=5) -pvm[,1]=pts -pvm[,2]=EST[,2] -pvm[,3]=pv -pvm[,4]=ci.low -pvm[,5]=ci.up -num.sig=sum(pv=nmin]) -UP<-max(sub[vecn>=nmin]) -pts=seq(x1[LOW],x1[UP],length.out=npts) -} -ES=NA -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -ES[i]=ESfun(g1,g2,method=method,pr=FALSE,tr=tr) -} -res=cbind(pts,ES) -dimnames(res)=list(NULL,c('pts','ES')) -if(plotit){ -vp=0 -if(method=='AKP' || method=='QS')vp=-1 -plot(c(res[1,1],res[1,1],res[,1]),c(1,vp,res[,2]),xlab=xlab,ylab='ES',type='n') -v=lplot(res[,1],res[,2],xlab=xlab,ylab='ES',plotit=FALSE,pyhat=TRUE)$yhat.values -points(res[,1],res[,2],pch=pch) -lines(res[,1],v) -}} -if(p>1){ -if(SEED)set.seed(2) # now cov.mve always returns same result -ES=NA -x1=as.matrix(x1) -p=ncol(x1) -p1=p+1 -m1=elimna(cbind(x1,y1)) -x1=m1[,1:p] -y1=m1[,p1] -x2=as.matrix(x2) -p=ncol(x2) -p1=p+1 -m2=elimna(cbind(x2,y2)) -x2=m2[,1:p] -y2=m2[,p1] -# -if(is.na(pts[1])){ -x1<-as.matrix(x1) -pts<-ancdes(x1,FRAC=FRAC,DH=TRUE) -} -pts<-as.matrix(pts) -n1<-1 -n2<-1 -vecn<-1 -mval1<-cov.mve(x1) -mval2<-cov.mve(x2) -for(i in 1:nrow(pts)){ -n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)]) -n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)]) -} -flag<-rep(TRUE,nrow(pts)) -for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F -flag=as.logical(flag) -pts<-pts[flag,] -if(sum(flag)==1)pts<-t(as.matrix(pts)) -if(sum(flag)==0)stop('No comparable design points found, might increase span.') -for (i in 1:nrow(pts)){ -g1<-y1[near3d(x1,pts[i,],fr1,mval1)] -g2<-y2[near3d(x2,pts[i,],fr2,mval2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -ES[i]=ESfun(g1,g2,method=method,tr=tr,pr=FALSE) -} -if(p==2){ -if(plotit) lplot(pts,ES,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) -} -res=cbind(pts,ES) -vlabs=NA -for(j in 1:ncol(pts))vlabs[j]=paste('X',j) -dimnames(res)=list(NULL,c(vlabs,'ES')) -} -res -} - -MULNC<-function(x1,x2,alpha=.05,SEED=TRUE,nboot=500,nullval=.5,PV=FALSE,pr=TRUE){ -# -# bivariate analog of Cliff's method, which is an analog of the -# Wilcoxon--Mann--Whitney test -# -# -# Let PG= event that x1 'dominates' x2. -# That is, for the ith and jth randomly sampled points -# x1[i,1]>x2[j,1] and x1[i,2]>x2[j,2] -# PL= event that x2 is 'dominates' x1. -# -# Function returns: -# -# phat.GT: the estimated probability of event PG -# phat.LT: the estimated probability of event PL -# d.ci: confidence interval for Pr(PL)-Pr(PG), the difference between the probabilities of these two events. -# phat, the estimated probability that the event PL is more likely than PG -# phat.ci: confidence interval for the estimand corresponding to phat -# p.value: testing Pr(PL)=Pr(PG) -# -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -m=matrix(0,nrow=n1,ncol=n2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 should be matrices with two columns') -if(ncol(x1)!=2)stop('x1 and x2 should be matrices with two columns') -for(i in 1:n1){ -m[i,]=(x1[i,1]>x2[,1])*(x1[i,2]>x2[,2]) -id=x1[i,1]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2] Y_1 and X_2 < Y_2 or if -# X_1 < Y_1 and X_2 > Y_2. -# -# Let PG= event that simulatensouly X_1 > Y_1 and X_2 < Y_2 -# PL= event thatsimulatensouly X_1 < Y_1 and X_2 > Y_2 -# -# Function returns: -# -# phat1: the estimated probability of event PG -# phat2: the estimated probability of event PL - -# phat, the estimated probability that the event PL is more likely than PG -# ci.p = A confidence interval for the estimand corresponding to phat -# -if(SEED)set.seed(2) -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -d=NA -ci.mat=matrix(NA,nrow=nboot,ncol=2) -d.mat=matrix(NA,nrow=nboot,ncol=2) -n=min(c(n1,n2)) -for(i in 1:nboot){ -id1=sample(n,n,replace=TRUE) -id2=sample(n,n,replace=TRUE) -v=MULNC.sub(x1[id1,],x2[id2,],alpha=alpha) -ci.mat[i,]=v$ci -d.mat[i,]=v$d.ci -} -v=MULNC.sub(x1,x2) -ci=mean(ci.mat[,1]) -ci[2]=mean(ci.mat[,2]) -dci=mean(d.mat[,1]) -dci[2]=mean(d.mat[,2]) -list(n1=n1,n2=n2,phat.GT=v$phat.GT,phat.LT=v$phat.LT,d.ci=dci,phat=v$phat,ci.p=ci) -} - - -MULNC.int<-function(J,K,x,x1=NULL,x2=NULL,x3=NULL,x4=NULL,alpha=.05,plotit=TRUE,SEED=TRUE,pr=TRUE){ -# -# Rank-based multiple comparisons for all interactions when dealing with bivariate data. -# in J by K design. The method is based on an -# extension of the Patel-Hoel definition of no interaction. -# -# The familywise type I error probability is controlled by using -# a critical value from the Studentized maximum modulus distribution. -# -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# x is assumed to have list mode, x[[1]]... x[[JK]] contain bivariate data stored in an n-by-2 matrix -# -# Consider a 2-by-2 design. -# Let PG= event that x1 is 'dominates' x2. -# That is, for the ith and jth randomly sampled points -# x1[i,1]>x2[j,1] and x1[i,2]>x2[j,2] -# PL= event that x2 is 'dominates' x1. -# Let P1= Pr(PL)-Pr(PG), the difference between the probabilities of these two events. -# Define P2 in an analogous fashion for x3 and x4 -# no interaction is taken to mean P1=P2. - -# -if(!is.null(x1)){ -x=list(x1,x2,x3,x4) -J=2 -K=2 -} -if(!is.list(x))stop('Data for each group must be stored in list mode.') -p=J*K -grp=c(1:p) -if(p>4){ -if(pr)print('Confidence intervals are adjusted so that the simultaneous probability coverage is approximately 1-alpha') -} -CCJ<-(J^2-J)/2 -CCK<-(K^2-K)/2 -CC<-CCJ*CCK -test<-matrix(NA,CC,8) -test.p<-matrix(NA,CC,7) -nv=NA -for(j in 1:p){ -if(ncol(x[[j]])!=2)stop('One or more groups do not contain bivariate data') -x[[j]]=elimna(x[[j]]) -nv[j]=length(x[[j]]) -} -if(var(nv)!=0)stop('Unequal sample sizes detected, use MULNCpb.int instead') -mat<-matrix(grp,ncol=K,byrow=TRUE) -dimnames(test)<-list(NULL,c('Factor A','Factor A','Factor B','Factor B','delta','ci.lower','ci.upper','p.value')) -jcom<-0 -crit<-smmcrit(200,CC) -if(alpha!=.05)crit<-smmcrit01(200,CC) -alpha<-1-pnorm(crit) -for (j in 1:J){ -for (jj in 1:J){ -if (j < jj){ -for (k in 1:K){ -for (kk in 1:K){ -if (k < kk){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-jj -test[jcom,3]<-k -test[jcom,4]<-kk -temp1<-MULNC.sub(x[[mat[j,k]]],x[[mat[j,kk]]]) -temp2<-MULNC.sub(x[[mat[jj,k]]],x[[mat[jj,kk]]]) -delta=temp2$d-temp1$d -sqse<-temp1$sq.se+temp2$sq.se -test[jcom,5]<-delta/2 -test[jcom,6]<-delta/2-crit*sqrt(sqse/4) -test[jcom,7]<-delta/2+crit*sqrt(sqse/4) -test[jcom,8]=2*(1-pnorm(abs((delta/2)/sqrt(sqse/4)))) -}}}}}} -list(test=test) -} -MULNC.sub<-function(x1,x2,alpha=.05){ -# -# bivariate analog of Cliff's method, which is an analog of the -# Wilcoxon--Mann--Whitney test -# -# -# Let PG= event that x1 is 'greater than' x2. That is, for the ith and jth randomly sampled points -# x1[i,1]>x2[j,1] and x1[i,2]>x2[j,2] -# PL= event that x1 is 'less than' x2. -# -# Function returns: -# -# phat.GT: the estimated probability of event PG -# phat.LT: the estimated probability of event PL - -# phat, the estimated probability that the event PL is more likely than PG -# -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -m=matrix(0,nrow=n1,ncol=n2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 should be matrices with two columns') -if(ncol(x1)!=2)stop('x1 and x2 should be matrices with two columns') -for(i in 1:n1){ -m[i,]=(x1[i,1]>x2[,1])*(x1[i,2]>x2[,2]) -id=x1[i,1]x2[j,1] and x1[i,2]>x2[j,2] -# PL= event that x1 is 'less than' x2. -# -# Function returns: -# -# phat.GT: the estimated probability of event PG -# phat.LT: the estimated probability of event PL - -# phat, the estimated probability that the event PL is more likely than PG -# -if(SEED)set.seed(2) -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -d=NA -for(i in 1:nboot){ -id1=sample(n1,n1,replace=TRUE) -id2=sample(n2,n2,replace=TRUE) -d[i]=MULNC.sub(x1[id1,],x2[id2,])$phat -} -low=round(alpha*nboot/2) -up=nboot-low -low=low+1 -ds=sort(d) -ci=ds[low] -ci[2]=ds[up] -pv=mean(ds<.5)+.5*mean(ds==.5) -pv=2*min(c(pv,1-pv)) -phat=MULNC.sub(x1,x2)$phat -list(n1=n1,n2=n2,phat=phat,ci.p=ci,p.value=pv) -} - -MULNCpb.int<-function(J,K,x,x1=NULL,x2=NULL,x3=NULL,x4=NULL,alpha=.05,plotit=TRUE,nboot=500,SEED=TRUE,method='hoch'){ -# -# Rank-based multiple comparisons for all interactions when dealing with bivariate data -# in a J-by-K design. The method is based on an -# extension of the Patel-Hoel definition of no interaction. -# -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# x is assumed to have list mode, x[[1]]... x[[JK]] contain bivariate data stored in an n-by-2 matrix -# -# For a 2-by-2 design, data can be stored in the arguments -# x1, x2, x3, x4 -# where each of these arguments is an n-by-2 matrix. -# -# Consider a 2-by-2 design. -# Let PG= event that x1 is 'dominates' x2. -# That is, for the ith and jth randomly sampled points -# x1[i,1]>x2[j,1] and x1[i,2]>x2[j,2] -# PL= event that x2 is 'dominates' x1. -# Let P1= Pr(PL)-Pr(PG), the difference between the probabilities of these two events. -# Define P2 in an analogous fashion for x3 and x4 -# no interaction is taken to mean P1=P2. - -# -if(!is.null(x1)){ -x=list(x1,x2,x3,x4) -J=2 -K=2 -} -if(!is.list(x))stop('Data for each group must be stored in list mode.') -if(SEED)set.seed(2) -p=J*K -grp=c(1:p) -CCJ<-(J^2-J)/2 -CCK<-(K^2-K)/2 -CC<-CCJ*CCK -test<-matrix(NA,CC,9) -test.p<-matrix(NA,CC,7) -for(j in 1:p){ -if(ncol(x[[j]])!=2)stop('One or more groups do not contain bivariate data') -x[[j]]=elimna(x[[j]]) -} -mat<-matrix(grp,ncol=K,byrow=TRUE) -dimnames(test)<-list(NULL,c('Factor A','Factor A','Factor B','Factor B','phat','ci.lower','ci.upper','p.value','adjusted p-value')) -jcom<-0 -low=round(alpha*nboot/2)+1 -up=nboot-low -for (j in 1:J){ -for (jj in 1:J){ -if (j < jj){ -for (k in 1:K){ -for (kk in 1:K){ -if (k < kk){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-jj -test[jcom,3]<-k -test[jcom,4]<-kk -d=NA -for(b in 1:nboot){ -n1=nrow(x[[mat[j,k]]]) -n2=nrow(x[[mat[j,kk]]]) -n3=nrow(x[[mat[jj,k]]]) -n4=nrow(x[[mat[jj,kk]]]) -X1=x[[mat[j,k]]] -X2=x[[mat[j,kk]]] -X3=x[[mat[jj,k]]] -X4=x[[mat[jj,kk]]] -id1=sample(n1,n1,replace=TRUE) -id2=sample(n2,n2,replace=TRUE) -id3=sample(n3,n3,replace=TRUE) -id4=sample(n4,n4,replace=TRUE) -d[b]=MULNC.sub(X1[id1,],X2[id2,])$phat-MULNC.sub(X3[id3,],X4[id4,])$phat -} -d=sort(d) -ci=d[low+1] -ci[2]=d[up] -pv=mean(d<0) -pv=2*min(c(pv,1-pv)) -test[jcom,5]=MULNC.sub(X1,X2)$phat-MULNC.sub(X3,X4)$phat -test[jcom,6]<-ci[1] -test[jcom,7]<-ci[2] -test[jcom,8]<-pv -}}}}}} -test[,9]=p.adjust(test[,8],method=method) -list(test=test) -} - -perm.rho<-function(x,y,alpha=.05,nboot=1000,SEED=TRUE){ -# -# Do a permutation test based on Pearson's correlation -# Diciccio--Romano version of a permuation test (JASA, 2017, 112, 1211-1220) -# - -# The default number of permutations is nboot=1000 -# -if(SEED)set.seed(2) -xx<-cbind(x,y) -xx=elimna(xx) -x=xx[,1] -y=xx[,2] -n=length(x) -tval<-perm.rho.sub(x,y) -vec<-c(1:length(xx)) -v1<-length(x)+1 -difb<-NA -tv<-NA -for(i in 1:nboot){ -id=sample(n,n) -tv[i]<-perm.rho.sub(x,y[id]) -} -tv<-sort(tv) -icl<-floor((alpha/2)*nboot+.5) -icu<-floor((1-alpha/2)*nboot+.5) -reject<-0 -if(tval>=tv[icu] || tval <=tv[icl])reject<-1 -list(teststat=tval,lower.crit=tv[icl],upper.crit=tv[icu],reject=reject) -} - -perm.rho.sub<-function(x,y){ -rho=cor(x,y) -n=length(x) -xbar=mean(x) -ybar=mean(y) -m22=sum((x-xbar)^2*(y-ybar)^2)/n -m20=sum((x-xbar)^2)/n -m02=sum((y-ybar)^2)/n -tau=sqrt(m22/(m20*m02)) -S=sqrt(n)*rho/tau -S -} - -binomCP<-function(x = sum(y), nn = length(y), y = NULL, n = NA, alpha = 0.05){ -# -# Clopper-Pearson -# -# y is a vector of 1s and 0s. -# x is the number of successes observed among n trials -# -if(!is.na(n))nn=n -q=binom.test(x,nn,conf.level=1-alpha)[4] -ci=q$conf.int[1:2] -list(phat=x/nn,ci=ci,n=nn) -} - -kmsbinomci<-function(x = sum(y), nn = length(y), y = NULL, n = NA, alpha = 0.05){ -# -# Boinomial -# Confidence interval for the probability of success. -# Kulinskaya, E., Morgenthaler, S. & Staudte, R. (2008). -# Meta Analysis: A guide to calibrating and combining statistical evidence p. 140 -# -if(!is.null(y[1])){ -y=elimna(y) -nn=length(y) -} -if(nn==1)stop('Something is wrong: number of observations is only 1') -n<-nn -cr=qnorm(1-alpha/2) -ntil=n+.75 -ptil=(x+3/8)/ntil -crit=qnorm(1-alpha/2) -if(x!=n && x!=0){ -term1=sin(asin(sqrt(ptil))-crit/(2*sqrt(n))) -term2=sin(asin(sqrt(ptil))+crit/(2*sqrt(n))) -lower=term1^2 -upper=term2^2 -} -if(x==0){ #Use Clopper-Pearson -lower<-0 -upper<-1-alpha^(1/n) -} -if(x==1){ -upper<-1-(alpha/2)^(1/n) -lower<-1-(1-alpha/2)^(1/n) -} -if(x==n-1){ -lower<-(alpha/2)^(1/n) -upper<-(1-alpha/2)^(1/n) -} -if(x==n){ -lower<-alpha^(1/n) -upper<-1 -} -phat=x/n -list(phat=phat,se=sqrt(ptil*(1-ptil)/ntil),ci=c(lower,upper),n=n) -} - - - -binom.conf<-function(x = sum(y), nn = length(y),AUTO=TRUE,pr=TRUE, -method=c('AC','P','CP','KMS','WIL','SD'), y = NULL, n = NA, alpha = 0.05){ -# -# -# P: Pratt's method -# AC: Agresti--Coull -# CP: Clopper--Pearson -# KMS: Kulinskaya et al. 2008, p. 140 -# WIL: Wilson type CI. Included for completeness; was used in simulations relevant to binom2g -# SD: Schilling--Doi -# -if(pr) print('Note: To perform the sign test, use the the R function signt') -if(nn<35){ -if(AUTO)method='SD' -} -type=match.arg(method) -switch(type, - P=binomci(x=x,nn=nn,y=y,n=n,alpha=alpha), - AC=acbinomci(x=x,nn=nn,y=y,n=n,alpha=alpha), - CP=binomCP(x=x,nn=nn,y=y,n=n,alpha=alpha), - KMS=kmsbinomci(x=x,nn=nn,y=y,n=n,alpha=alpha), - WIL=wilbinomci(x=x,y=y,n=nn,alpha=alpha), - SD=binomLCO(x=x,nn=nn,y=y,alpha=alpha), - ) -} - -binom.conf.pv<-function(x = sum(y), nn = length(y),y=NULL,method='AC',AUTO=TRUE, pr=FALSE, PVSD=TRUE,alpha=.05,nullval=.5,NOTE=TRUE){ -# -# p-value for the methods available in binom.conf -# AC: Agresti--Coull -# P: Pratt's method -# CP: Clopper--Pearson -# KMS: Kulinskaya et al. 2008, p. 140 -# WIL: Wilson type CI. Included for completeness; was used in simulations relevant to binom2g -# SD: Schilling--Doi -# -# AUTO=TRUE, use SD if n<35 -# PVSD=FALSE: no p-value when using SD to avoid possibly high execution time -# use p-value based on what method indicates. Default is AC, Agresti--Coull -# -if(pr) print('Note: To perform the sign test, use the the R function signt') -if(!PVSD & nn<35)AUTO=FALSE -if(AUTO){ -if(nn<35)method='SD' -} -ci<-binom.conf(x,nn,alpha=alpha,method=method,AUTO=FALSE,pr=FALSE) -pv=NULL -if(method=='SD'){ -if(!PVSD){ -if(NOTE)print('To get a p-value when method=SD, set PVSD=TRUE, but execution time might be high') -}} -if(method!='SD' || PVSD){ -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-binom.conf(x,nn,alpha=alph[i],method=method,AUTO=FALSE,pr=FALSE)$ci -if(chkit[[1]]>nullval || chkit[[2]]nullval || chkit[[2]]2){ -mat=cbind(con[,k]*est[,2],con[,k]*est[,3]) -LM=apply(mat,1,min) -UM=apply(mat,1,max) -term1=sum(con[,k]*est[,1]) -EST[k]=term1 -term2=sqrt(sum((con[,k]*est[,1]-LM)^2)) -term3=sqrt(sum((con[,k]*est[,1]-UM)^2)) -L[k]=term1-term2 -U[k]=term1+term3 -PV[k]=lincon.binPV(r,n,con=con[,k],nullval=null.value,binCI=acbinomci)$p.value -} -} -adj=p.adjust(PV,method='hoch') -CI=cbind(EST,L,U,PV,adj) -dimnames(CI)=list(NULL,c('Est','ci.low','ci.hi','p-value','Adjusted p.value')) -list(p.hat=est[,1],CI=CI,con=con) -} - - -lincon.bin.sub<-function(r,n,con=NULL,alpha=.05,null.value=0,x=NULL,binCI=acbinomci){ -# -# r: number of successes for J independent groups -# n: corresponding sample sizes -# -# Compute confidence interval for a linear combination of independent binomials -# using: -# A note on confidence interval estimation for a linear function -# of binomial proportion. -# Zou, G. Y., Huang, W. & Zheng, X (2009) CSDA, 53, 1080-1085 -# -# con: contrast coeffiients -# if NULL, all pairwise comparisons are performed. -# -# x: if not NULL, taken to be a matrix containing 0s and 1s, columns correspond to groups -# r and n are computed using the data in x -# -# binCI defaults to Agresti--Coull -# Other choices for binCI: -# binomci: Pratt's method -# binomCP: Clopper--Pearson -# kmsbinomci: Kulinskaya et al -# wilbinomci: Wilson -# binomLCO: Schilling--Doi -# -if(!is.null(x)){ -r=apply(x,2,sum) -n=rep(nrow(x),ncol(x)) -} -J=length(r) -est=matrix(NA,nrow=J,ncol=3) -for(j in 1:J){ -v=binCI(r[j],n[j],alpha=alpha) -est[j,]=c(v$phat,v$ci) -} -if(!is.null(con))con=as.matrix(con) -if(is.null(con))con=con.all.pairs(J) -NT=ncol(con) -L=NA -U=NA -EST=NA -for(k in 1:NT){ -mat=cbind(con[,k]*est[,2],con[,k]*est[,3]) -LM=apply(mat,1,min) -UM=apply(mat,1,max) -term1=sum(con[,k]*est[,1]) -EST[k]=term1 -term2=sqrt(sum((con[,k]*est[,1]-LM)^2)) -term3=sqrt(sum((con[,k]*est[,1]-UM)^2)) -L[k]=term1-term2 -U[k]=term1+term3 -} -CI=cbind(EST,L,U) -dimnames(CI)=list(NULL,c('Est','ci.low','ci.hi')) -list(p.hat=est[,1],CI=CI,con=con) -} - - -lincon.binPV<-function(r,n,con=NULL,alpha=.05,nullval=0,binCI=acbinomci){ -# -# Compare two binomials using the method in Zou et al.2009 CSDA. -# -# x and y are vectors of 1s and 0s. -# Or can use the argument -# r1 = the number of successes observed among group 1 -# r2 = the number of successes observed among group 2 -# n1 = sample size for group 1 -# n2 = sample size for group 2 -# -# nullval is the hypothesized value of the linear contrast -# -# binCI defaults to Agresti--Coull -# Other choices for binCI: -# binomci: Pratt's method -# binomCP: Clopper--Pearson -# kmsbinomci: Kulinskaya et al -# wilbinomci: Wilson -# binomLCO: Schilling--Doi -# -ci=lincon.bin.sub(r=r,n=n,alpha=alpha,con=con,binCI=binCI) -p.value=1 -p1.hat=r1/n1 -p2.hat=r2/n2 -alph<-c(1:99)/100 -for(i in 1:99){ -irem<-i -chkit<-lincon.bin.sub(r=r,n=n,alpha=alph[i],con=con,binCI=binCI)$CI[2:3] -if(chkit[1]>nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]=5)res=binom.conf(B,n,method=method,alpha=alpha,pr=FALSE) -res -} - - -smbinRC<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,method='ACSK',nboot=1000,est=tmean,alpha=.05,FWE.method='hoch', -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# y is assumed to be binary -# -# Split data based on the covariates indicated by -# IV -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# -# Then compare the probability of a success corresponding to the resulting groups -# -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -# Next convert y to 0 and 1s -n=length(y) -yy=rep(0,n) -flag=which(y==max(y)) -yy[flag]=1 -y=yy -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -if(length(unique(y))>2)stop('y should be binary') -z=list() -group=list() -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -for(i in 1:N.int)L1[i]=paste('IV1.G',i) -for(i in 1:N.int2)L2[i]=paste('IV2.G',i) -dimnames(est.mat)=list(L1,L2) -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2,...) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) -} -NT=N.int*N.int2 -MID=matrix(c(1:NT),nrow=N.int,ncol=N.int2,byrow=TRUE) -# pull out groups indicated by the columns of MID and do tests -IV1res=NULL -a=NULL -r=NA -n=NA -for(j in 1:N.int2){ -zsub=z[MID[,j]] -r=lapply(zsub,sum) -n=lapply(zsub,length) -r=as.vector(matl(r)) -n=as.vector(matl(n)) -a=binpair(r,n,method=method,alpha=alpha) -IV1res=rbind(IV1res,a[,3:11]) -} -# update adjusted p-value -IV1res[,9]=p.adjust(IV1res[,8],method=FWE.method) -#Now do IV2 -IV2res=NULL -r=NA -n=NA -a=NULL -for(j in 1:N.int){ -zsub=z[MID[j,]] -r=lapply(zsub,sum) -n=lapply(zsub,length) -r=as.vector(matl(r)) -n=as.vector(matl(n)) -a=binpair(r,n,method=method,alpha=alpha) -IV2res=rbind(IV2res,a[,3:11]) -IV2res[,9]=p.adjust(IV2res[,8],method=FWE.method) -} -list(Independent.variables.summary=group,Res.4.IV1=IV1res,Res.4.IV2=IV2res) -} - -smgridRC<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,alpha=.05,PB=FALSE,est=tmean,nboot=1000,pr=TRUE,method='hoch', -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Compare measures of location among grids defined by quantiles of two IVs. By default 20% trimming is used -# est=median would use medians -# est=hd would use the Harrell-Davis estimator for the median. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# Then compare binomials# -# -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -# tr: amount of trimming -# -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE)$keep -x<-x[flag,] -y<-y[flag] -} -if(identical(est,median))PB=TRUE -if(identical(est,hd))PB=TRUE -z=list() -group=list() -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -n.mat=matrix(NA,nrow=N.int,ncol=N.int2) -DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -for(i in 1:N.int)L1[i]=paste('IV1.G',i) -for(i in 1:N.int2)L2[i]=paste('IV2.G',i) -dimnames(est.mat)=list(L1,L2) -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2[,p1],...) -n.mat[j,k]=length(xsub2[,p1]) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) -} -NT=N.int*N.int2 -MID=matrix(c(1:NT),nrow=N.int,ncol=N.int2,byrow=TRUE) -# pull out groups indicated by the columns of MID and do tests -IV1res=NULL -a=NULL -for(j in 1:N.int2){ -zsub=z[MID[,j]] -DV.mat[,j]=matl(lapply(zsub,est,...)) -if(!PB)a=lincon(zsub,tr=tr,pr=FALSE,alpha=alpha)$psihat[,3:8] - -if(PB){ -if(identical(est,tmean))a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,SEED=SEED,tr=tr)$output[,c(3:9)] -else -a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,method=method,SEED=SEED,...)$output[,c(3:9)] -} -IV1res=rbind(IV1res,a) -} -#Now do IV2 -IV2res=NULL -a=NULL -for(j in 1:N.int){ -zsub=z[MID[j,]] -if(!PB){ -a=lincon(zsub,tr=tr,pr=FALSE,alpha=alpha)$psihat[,3:8] -} -if(PB){ -print(zsub) -if(identical(est,tmean))a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,SEED=SEED,tr=tr)$output[,c(3:9)] -else -a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,SEED=SEED,...)$output[,c(3:9)] -} -IV2res=rbind(IV2res,a) -} -if(!PB){ #fix labels add adjusted p-value -IV1res=cbind(IV1res[,1:4],p.adjust(IV1res[,4],method=method),IV1res[,5:6]) -IV2res=cbind(IV2res[,1:4],p.adjust(IV2res[,4],method=method),IV2res[,5:6]) -dimnames(IV1res)=list(NULL,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) -dimnames(IV2res)=list(NULL,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) -} -if(PB){ -IV1res[,3]=p.adjust(IV1res[,2],method=method) -IV2res[,3]=p.adjust(IV2res[,2],method=method) -IV1res=IV1res[,c(1,4,5,2,3,6,7)] -IV2res=IV2res[,c(1,4,5,2,3,6,7)] -dimnames(IV1res)=list(NULL,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) -dimnames(IV2res)=list(NULL,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) -} -list(est.loc.4.DV=est.mat,n=n.mat,Independent.variables.summary=group,Res.4.IV1=IV1res,Res.4.IV2=IV2res) -} - -sm.inter<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,PB=FALSE,est=tmean,tr=.2,nboot=1000,pr=TRUE,con=NULL, -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Split on two variables. -# Look for interactions -# PB=FALSE: use trimmed means -# PB=TRUE: use percentile bootstrap. -# -# TR: amount of trimming when using a non-bootstrap method. To alter the amount of trimming when using -# a bootstrap method use -# tr. Example, tr=.25 would use 25% trimming. -# -# est=median would use medians -# est=hd would use the Harrell-Davis estimator for the median. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# - -# Then test the hypothesis of equal measures of location -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -# if(length(unique(y)>2))stop('y should be binary') -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -if(identical(est,median))PB=TRUE -if(identical(est,hd))PB=TRUE -z=list() -group=list() -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -for(i in 1:N.int)L1[i]=paste('IV1.G',i) -for(i in 1:N.int2)L2[i]=paste('IV2.G',i) -dimnames(est.mat)=list(L1,L2) -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2[,p1],...) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('For group',del,'The sample size is less than 6')) -} -test=NULL -if(is.null(con))con=con2way(N.int,N.int2)$conAB -if(!PB){ -a=lincon(z,con=con,tr=tr,pr=FALSE) -test=a$test -psihat=a$psihat -} -if(PB){ -a=linconpb(z,con=con,est=est,...) -psihat=a$output -} -ES=IND.PAIR.ES(z,con=con)$effect.size -list(Group.summary=group,loc.est=est.mat,test=test,psihat=psihat,con=con,Effect.Sizes=ES) -} - -smgridLC=sm.inter - -Depth.class<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,DIST=TRUE,SEED=TRUE,...){ -# -# Do classification using max depths or max depth distribution as suggested by -# Makinde and Fasoranbaku (2018). JAS -# -# depthfun indicates how the depth of a point is computed. -# By default, projection depth is used. depthfun=zonoid would use zonoid depth -# -# train is the training set -# test is the test data -# g: labels for the data in the training set. -# -# depthfun must be a function having the form depthfun(x,pts). -# That is, compute depth for the points in pts relative to points in x. -# -# -library(class) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -} -Train=cbind(train,g) -Train=elimna(Train) -p=ncol(train) -p1=p+1 -train=Train[,1:p] -g=Train[,p1] -flag=g==min(g) -x1=Train[flag,1:p] -x2=Train[!flag,1:p] -} -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -train=elimna(train) -test=elimna(test) -train=as.matrix(train) -test=as.matrix(test) -if(ncol(test)==1)test=t(test) # If test is a vector, a single point, transpose to get correct number of columns. -if(ncol(test)!=ncol(train))stop('test and train do not have the same number of columns') -ntest=nrow(test) -P=ncol(train) -P1=P+1 -xall=as.data.frame(matrix(NA,nrow=nrow(train),ncol=P1)) -xall[,1:P]=train -xall[,P1]=g -xall=elimna(xall) -x1=xall[,1:P] -xall=as.matrix(xall) -g=as.vector(xall[,P1]) -ids=unique(g) # Number of categories -x2=elimna(test) -x1=as.matrix(x1) -x2=as.matrix(x2) -n=nrow(x1) -n2=nrow(x2) -p=length(ids) -D=matrix(NA,nrow=p,ncol=n2) -for(i in 1:p){ -flag=g==ids[i] -D[i,]=depthfun(as.matrix(x1[flag,]),pts=x2,SEED=SEED,...) # depth of test points relative to train data in cat i -} -if(!DIST)res=apply(D,2,which.max) -if(DIST){ -res=NA -all.dep=list() -for(i in 1:p){ -flag=g==ids[i] -all.dep[[i]]=depthfun(x1[flag,],pts=x1[flag,],...) #Have depth of all point in the training set for class i -} -for(j in 1:ntest){ -dt=NA -cum.depth=NA -for(i in 1:p){ -flag=g==ids[i] -chkpt=matrix(x2[j,],nrow=1) -#dt[i]=depthfun(x1[flag,],pts=x2[j,],...) -dt[i]=depthfun(x1[flag,],chkpt,...) -cum.depth[i]=mean(all.dep[[i]]<=dt[i]) -} -chkit=which(cum.depth==max(cum.depth)) -res[j]=chkit[1] -}} -res -} - -Depth.class.bag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,DIST=FALSE,nboot=100,SEED=TRUE,...){ -# -# -# g=class id -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# The function will then create appropriate labels and store them in g. -# -# KNN classification using data depths. -# KNNdist uses data depths, for the n1!=n2 it can be a bit biased, meaning that -# when there is no association, the probability of a correct classification will be less than .5 -# -# -if(SEED)set.seed(2) -if(is.null(test))stop('test =NULL, no test data provided') -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group labels, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n1,n,replace=TRUE) -id2=sample(n2,n,replace=TRUE) -dvec[i,]=Depth.class(x1=x1[id1,],x2=x2[id2,],test=test,depthfun=depthfun,DIST=DIST,...) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} - - -smbin.test<-function(x,y,IV=1,Qsplit=.5,method='SK',nboot=1000, -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# -# Qsplit: split the independent variable indicated by the argument -# IV; it defaults to 1, the first variable , column 1, in x. -# Example: -# IV=1: split on the first independent variable, -# IV=2: split on the second independent variable, -# -# Qsplit indicates the quantiles to be used and defaults to .5. -# Example Qsplit=c(.25,.5,.75) would split based on the quartiles -# -# Then compare the probability of success corresponding to the groups. -# -# if(length(unique(y)>2))stop('y should be binary') -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -m<-xy[flag,] -x<-m[,1:p] -y<-m[,p1] -} -z=list() -N.int=length(Qsplit)+1 -qv=quantile(x[,IV],Qsplit) -qv=c(min(x[,IV]),qv,max(x[,IV])) -group=list() -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV,qv[j],qv[j1]) -z[[j]]=xsub[,p1] -group[[j]]=summary(xsub[,IV]) -} -r=NA -n=NA -for(j in 1:N.int){ -r[j]=sum(z[[j]]) -n[j]=length(z[[j]]) -} -a=binpair(r,n,method=method) -list(Group.summary=group,output=a) -} - -smbinAB<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,method='KMS', -xout=FALSE,outfun=outpro,...){ -# -# y is assumed to be binary -# -# x a matrix or data frame -# -# Split on two indepnddent variables. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# - -# Then test the hypotheses about the probability of a success. -# -# IV[1]: indicates the column containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -# -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -if(length(unique(y))>2)stop('y should be binary') -flag=max(y) -y[flag]=1 -y[!flag]=0 -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -JK=J*K -MAT=matrix(1:JK,J,K,byrow=TRUE) -z=list() -group=list() -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -n.mat=matrix(NA,nrow=N.int,ncol=N.int2) -Nsuc=matrix(NA,nrow=N.int,ncol=N.int2) -DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -Nsuc[j,k]=sum(xsub2[,p1]==1) -est.mat[j,k]=mean(xsub2[,p1]) -n.mat[j,k]=length(xsub2[,p1]) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('For group',del,'the sample size is less than 6')) -} -A=list() -B=list() -for(j in 1:J)A[[j]]=lincon.bin(Nsuc[j,],n.mat[j,],method=method)$CI -for(j in 1:K)B[[j]]=lincon.bin(Nsuc[,j],n.mat[,j],method=method)$CI -list(est.loc.4.DV=est.mat,n=n.mat,A=A,B=B) -} - -smgrid<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,PB=FALSE,est=tmean,nboot=1000,pr=TRUE, -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Split on two variables, not just one. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# - -# Then test the hypothesis of equal measures of location -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -# if(length(unique(y)>2))stop('y should be binary') -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -z=list() -group=list() -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -for(i in 1:N.int)L1[i]=paste('IV1.G',i) -for(i in 1:N.int2)L2[i]=paste('IV2.G',i) -dimnames(est.mat)=list(L1,L2) -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2,...) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) -} -test=NULL -ES=IND.PAIR.ES(z)$effect.size -if(!PB){ -a=lincon(z,tr=tr,pr=FALSE) -chk.sig=sign(a$psihat[,4])*sign(a$psihat[,5]) -num.sig=sum(chk.sig>=0) -test=a$test -res=a$psihat -} -if(PB){ -a=linpairpb(z,est=est,nboot=nboot,...) -num.sig=a$num.sig -res=a$output -} -list(Group.summary=group,loc.est=est.mat,test=test,output=res,num.sig=num.sig,Effect.Sizes=ES) -} - - -reg.hyp.split<-function(x,y,split.reg=Qreg,TR=.2,alpha = 0.05, PB = FALSE, est = tmean, nboot = 1000, pr = TRUE, - method = "hoch", xout = FALSE, outfun = outpro, SEED = TRUE, ...){ -# -# Split design space based on the hyperplane associated with the argument -# split.reg -# Default is a quantile regression estimate based on the data in x -# Split the original data then split the results again to get a 2-by-2 ANOVA design -# -# Compare measures of location based on the resulting splits -# -# Choices for split.reg: any R function that returns coefficients in $coef -# Ex. split.reg=depreg would use a deepest regression estimator. -# Could get different split using different quantiles -# Ex.split.reg=Qreg,q=.25, would split the design space based .25 quantile hyperplanes. -# split.reg=mdepreg.coef would use the deepest regression line estimator. -# -# -p=ncol(x) -xy=elimna(cbind(x,y)) -if(xout){ -flag<-outfun(xy[,1:p],plotit=FALSE,...)$keep -xy<-xy[flag,] -} -if(identical(est,median))PB=TRUE -if(identical(est,hd))PB=TRUE -if(p<2)stop('Should have two or more independent variables') -pm1=p-1 -p1=p+1 -hat=reg.pred(xy[,1:pm1],xy[,p],regfun=split.reg,...) -res=xy[,p]-hat -flag=res>0 -x1=xy[flag,] -x2=xy[!flag,] -# -hat=reg.pred(x1[,1:pm1],x1[,p],regfun=split.reg,...) -res=x1[,p]-hat -flag=res>0 -xy1=x1[flag,] -xy2=x1[!flag,] -# -hat=reg.pred(x2[,1:pm1],x2[,p],regfun=split.reg,...) -res=x2[,p]-hat -flag=res>0 -xy3=x2[flag,] -xy4=x2[!flag,] -y=list() -y[[1]]=xy1[,p1] -y[[2]]=xy2[,p1] -y[[3]]=xy3[,p1] -y[[4]]=xy4[,p1] -group=list() -group[[1]]=summary(xy1[,1:p]) -group[[2]]=summary(xy2[,1:p]) -group[[3]]=summary(xy3[,1:p]) -group[[4]]=summary(xy4[,1:p]) -if(!PB)a=lincon(y,tr=TR) -if(PB)a=linconpb(y,est=est,nboot=nboot,...) -list(Independent.variables.summary=group,output=a) -} - -regbin.hyp.split<-function(x,y,split.reg=Qreg,alpha = 0.05, nboot = 1000, - method ='SK', xout = FALSE, outfun = outpro, SEED = TRUE, ...){ -# -# y is assumed to be binary -# -# Split design space based on the hyperplane associated with the argument -# split.reg -# Default is a squantile regression estimate based on the data in x -# Split the original data then split the results again to get a 2-by-2 ANOVA design -# -# Compare binomial distributions using based on the argument -# method, which defaults to Storer--Kim. To get confidence intervals use -# method='KMS' -# -# Choices for split.reg: any R function that returns coefficients in $coef -# Ex. split.reg=depreg would use a deepest regression estimator. -# Could get different split using different quantiles -# Ex.split.reg=Qreg,q=.25, would split the design space based .25 quantile hyperplanes. -# split.reg=mdepreg.coef would use the deepest regression line estimator. -# -# -p=ncol(x) -xy=elimna(cbind(x,y)) -if(xout){ -flag<-outfun(xy[,1:p],plotit=FALSE,...)$keep -xy<-xy[flag,] -} -if(length(unique(y))>2)stop('y should be binary') -n=length(y) -yy=rep(0,n) -flag=which(y==max(y)) -yy[flag]=1 -y=yy -if(p<2)stop('Should have two or more independent variables') -pm1=p-1 -p1=p+1 -hat=reg.pred(xy[,1:pm1],xy[,p],regfun=split.reg,...) -res=xy[,p]-hat -flag=res>0 -x1=xy[flag,] -x2=xy[!flag,] -# -hat=reg.pred(x1[,1:pm1],x1[,p],regfun=split.reg,...) -res=x1[,p]-hat -flag=res>0 -xy1=x1[flag,] -xy2=x1[!flag,] -# -hat=reg.pred(x2[,1:pm1],x2[,p],regfun=split.reg,...) -res=x2[,p]-hat -flag=res>0 -xy3=x2[flag,] -xy4=x2[!flag,] -r=NA -r[1]=sum(xy1[,p1]) -r[2]=sum(xy2[,p1]) -r[3]=sum(xy3[,p1]) -r[4]=sum(xy4[,p1]) -n=NA -n[1]=nrow(xy1) -n[2]=nrow(xy2) -n[3]=nrow(xy3) -n[4]=nrow(xy4) -group=list() -group[[1]]=summary(xy1[,1:p]) -group[[2]]=summary(xy2[,1:p]) -group[[3]]=summary(xy3[,1:p]) -group[[4]]=summary(xy4[,1:p]) -a=binpair(r,n,method=method,alpha=alpha) -list(Independent.variables.summary=group,output=a) -} - -KNNbag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,nboot=100,SEED=TRUE,...){ -# -# -# g=class id -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# The function will then create appropriate labels and store them in g. -# -# KNN classification using data depths. -# KNNdist uses data depths, for the n1!=n2 it can be a bit biased, meaning that -# when there is no association, the probability of a correct classification will be less than .5 -# It removes any row vector with missing values -# -# -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -} -Train=cbind(train,g) -Train=elimna(Train) -p=ncol(train) -p1=p+1 -train=Train[,1:p] -g=Train[,p1] -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x1=elimna(x1) -x2=elimna(x2) -test=elimna(test) -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n1,n,replace=TRUE) -id2=sample(n2,n,replace=TRUE) -dvec[i,]=KNNdist(x1=x1[id1,],x2=x2[id2,],test=test,depthfun=depthfun) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} -SVMbag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,kernel='radial',nboot=100,SEED=TRUE,...){ -# -# -# g=class id -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# The function will then create appropriate labels and store them in g. -# -# Support Vector Machine classification method. -# Unlike standard SVM this function has the following property. Suppose n1!=n2 and n2/n1 is small. If there is no -# association between the training data and the labels, the probability of a misclassification is .5 -# In contrast, using standard SVM, it is approximately n2/(n1+n2) -# -if(is.null(test))stop('Argument test is null, contains no data') -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n1,n,replace=TRUE) -id2=sample(n2,n,replace=TRUE) -dvec[i,]=SVM(x1=x1[id1,],x2=x2[id2,],test=test,kernel=kernel) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} - -mulwmw.dist.new<-function(m1,m2,new,cop=3){ -# -# -# Determine center corresponding to two -# independent groups, project all points onto line -# connecting the centers based on m1 and m2. Return projected distances for m1 -# m2. -# new: new data, not known whether it came from group 1 or 2. -# This function is used in pro.class, a classification method. -# -# -# There are three options for computing the center of the -# cloud of points when computing projections: -# cop=1 uses Donoho-Gasko median -# cop=2 uses MCD center -# cop=3 uses median of the marginal distributions. -# -# When using cop=2 or 3, default critical value for outliers -# is square root of the .975 quantile of a -# chi-squared distribution with p degrees -# of freedom. -# -if(is.null(dim(m1))||dim(m1)[2]<2){stop("m1 and m2 should have two or more columns") -} -m1<-elimna(m1) # Remove missing values -m2<-elimna(m2) -new=elimna(new) -FLAG=FALSE -new=as.matrix(new) -if(ncol(new)==1){ -FLAG=TRUE -new=t(new) # If test is a vector, a single point, transpose to get correct number of columns. -new=rbind(new,new) #avoid R from aborting. -} -n1=nrow(m1) -n2=nrow(m2) -if(cop==1){ -if(ncol(m1)>2){ -center1<-dmean(m1,tr=.5) -center2<-dmean(m2,tr=.5) -} -if(ncol(m1)==2){ -tempd<-NA -for(i in 1:nrow(m1)) -tempd[i]<-depth(m1[i,1],m1[i,2],m1) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center1<-m1[flag,] -if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) -for(i in 1:nrow(m2)) -tempd[i]<-depth(m2[i,1],m2[i,2],m2) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center2<-m2[flag,] -if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) -}} -if(cop==2){ -center1<-cov.mcd(m1)$center -center2<-cov.mcd(m2)$center -} -if(cop==3){ -center1<-apply(m1,2,median) -center2<-apply(m2,2,median) -} -if(cop==4){ -center1<-smean(m1) -center2<-smean(m2) -} -center<-(center1+center2)/2 -B<-center1-center2 -if(sum(center1^2).5 -# -# SEED=NULL, done for convenience when this function is called by other functions. -# -if(is.null(test))stop('Argument test is null, contains no data') -if(is.null(test))stop('Argument test is null, contains no data') -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -}} -CHK=FALSE -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -CHK=TRUE -d=mulwmw.dist.new(x1,x2,test) -}} -if(!CHK){ -if(is.null(g))stop('The argument g should contain the group id values') -xg=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=xg[,1:p] -g=xg[,p1] -if(length(unique(g))!=2)stop('Should have only two unique values in g') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] - d=mulwmw.dist.new(x1,x2,test) -} -pdf1=NA -pdf2=NA -for(i in 1:length(d$dis.new)){ #Avoid sorting issue done by akerd -pdf1[i]=akerd(d$dist1,pts=d$dis.new[i],pyhat=TRUE,plotit=FALSE) -pdf2[i]=akerd(d$dist2,pts=d$dis.new[i],pyhat=TRUE,plotit=FALSE) -} -dec=rep(2,nrow(test)) -dec[pdf1/pdf2>rule]=1 -dec -} - -pro.classPD.bag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,rule=1,nboot=100,SEED=TRUE,...){ -# -# -# A bagged version of pro.classPD -# -# g=class id -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# The function will then create appropriate labels and store them in g. -# -# -if(is.null(test))stop('Argument test is null, contains no data') -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group labels, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n1,n,replace=TRUE) -id2=sample(n2,n,replace=TRUE) -dvec[i,]=pro.classPD(x1=x1[id1,],x2=x2[id2,],test=test,rule=rule) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} - - - -UB.class<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL, -method=c('DIS','DEP','PRO'),depthfun=prodepth,...){ -# -# A collection of classification methods for which the error rate is not -# impacted by unequal sample sizes. -# Bagged version are available in class.bag -# -# DIS: Points classified based on their depths -# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS -# PRO: project the points onto a line connecting the centers of the data clouds. -# Then use estimate of the pdf for each group to make a decision about future points. -# -type=match.arg(method) -switch(type, - DIS=discdepth(train=train,test=test,g=g,x1=x1,x2=x2), - DEP=Depth.class(train=train,test=test,g=g,x1=x1,x2=x2), - PRO=pro.classPD(train=train,test=test,g=g,x1=x1,x2=x2), - ) -} - -smbin.inter<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,alpha=.05,con=NULL,xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Split on two variables. -# Look for interactions when dealing with binary dependent variable. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] - if(length(unique(y))>2)stop('y should be binary') -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -z=list() -group=list() -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -if(is.null(con))con=con2way(N.int,N.int2)$conAB -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -for(i in 1:N.int)L1[i]=paste('IV1.G',i) -for(i in 1:N.int2)L2[i]=paste('IV2.G',i) -dimnames(est.mat)=list(L1,L2) -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=mean(xsub2[,p1],...) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -r=NA -n=NA -for(j in 1:length(z)){ -r[j]=sum(z[[j]]) -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -del=which(n<=5) -if(pr)print(paste('For group',del,'the sample size is less than 6')) -} -test=lincon.bin(r,n,con=con) -list(Group.summary=group,Prob.est=est.mat,output=test$CI,con=con) -} - -class.logR<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,sm=TRUE,fr=2,rule=.5,SEED=NULL){ -# -# Do classification using logistic or a smoother -# sm=TRUE: a smoother will be used with the span taken to be -# fr is this span -# sm=FALSE: use logistic regression. -# -# -# train is the training set -# test is the test data -# g contains labels for the data in the training set, -# -# This function removes the need to call library class. -# For more information, use the command ?knn -# -# SEED=NULL, used for convenience when called by other functions that expect SEED -# -if(!is.null(train)){ -train=as.matrix(train) -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g))if(dim(g)>1)stop('Argument g should be a vector') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(is.null(x1))stop('Something is wrong, no data in x1') -if(is.null(x2))stop('Something is wrong, no data in x2') -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -n1=nrow(x1) -n2=nrow(x2) -train=rbind(x1,x2) -g=c(rep(1,n1),rep(2,n2)) -if(!sm)e=logreg.pred(train,g,test) -if(sm)e=logSMpred(train,g,test,fr=fr) -if(is.null(test))stop('Argument test is null, contains no data') -test=as.matrix(test) -res=rep(1,nrow(test)) -flag=e>rule -res[flag]=2 -res -} - - - -UBROC<-function(train=NULL,g=NULL,x1=NULL,x2=NULL,method='KNN',reps=10,pro.p=.8,SEED=TRUE,POS=TRUE,EN=TRUE,...){ -# -# Compute ROC curve based on an 'unbiased' classification method if EN=TRUE, possible bias if EN=FALSE -# method indicates the method to be used -# -# Current choices available: -# KNN: Nearest neighbor using robust depths -# DIS: Points classified based on their depths -# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS -# SVM: support vector machine -# RF: Random forest -# NN: neural network -# ADA: ada boost -# PRO: project the points onto a line connecting the centers of the data clouds. -# Then use estimate of the pdf for each group to make a decision about future points. -# -# -# reps: number of resamples resamples -# pro.p controls the proportion used in the training, the rest are used in the test set -# -library(ROCR) -CHK=FALSE -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -CHK=TRUE -}} -if(!CHK){ -if(is.null(g))stop('The argument g should contain the group id values') -xg=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=xg[,1:p] -g=xg[,p1] -if(length(unique(g))!=2)stop('Should have only two unique values in g') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -n1=nrow(x1) -n2=nrow(x2) -ns1=round(pro.p*n1) -ns2=round(pro.p*n2) -if(EN)ns1=ns2=min(ns1,ns2) -PRED=NULL -LABS=NULL -for(j in 1:reps){ -N1=sample(n1,ns1) -N2=sample(n2,ns2) -test1=x1[-N1,] -test2=x2[-N2,] -a1=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test1,method=method,...) -a2=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test2,method=method,...) -PRED=c(PRED,c(a1,a2)) -LABS=c(LABS,c(rep(1,length(a1)),rep(2,length(a2)))) -pred=prediction(PRED,LABS) -} -if(POS)perf <- performance(pred,'tpr','fpr') -if(!POS)perf <- performance(pred,'tnr','fnr') -plot(perf) -} - -ROCmul.curve<-function(train=NULL,g=NULL,x1=NULL,x2=NULL,method=c('KNN','DIS'),pro.p=.8, -SEED=TRUE,reps=10,POS=TRUE,...){ -# -# Required ROCR -# -# Using cross validation -# -# Plot ROC curves based on two or more methods -# Current choices available: -# KNN: Nearest neighbor using robust depths -# DIS: Points classified based on their depths -# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS -# SVM: support vector machine -# RF: Random forest -# NN: neural network -# ADA: ada boost -# PRO: project the points onto a line connecting the centers of the data clouds. -# Then use estimate of the pdf for each group to make a decision about future points. -# -# reps number of resamples, the resulting ROC curves are averaged and the average is plotted. -# pro.p controls the proportion used in the training, the rest are used in the test set -# -library(ROCR) -n.meth=length(method) -UBROC(train=train,g=g,x1=x1,x2=x2,method=method[1],reps=reps,pro.p=pro.p,SEED=SEED,POS=POS,...) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -} -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -k=1 -if(n.meth>1){ -for(k in 2:n.meth){ -CHK=TRUE -if(!CHK){ -if(is.null(g))stop('The argument g should contain the group id values') -xg=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=xg[,1:p] -g=xg[,p1] -if(length(unique(g))!=2)stop('Should have only two unique values in g') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -ns1=round(pro.p*n1) -ns2=round(pro.p*n2) -PRED=NULL -LABS=NULL -for(j in 1:reps){ -N1=sample(n1,ns1) -N2=sample(n2,ns2) -test1=x1[-N1,] -test2=x2[-N2,] -a1=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test1,method=method[k],...) -a2=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test2,method=method[k],...) -PRED=c(PRED,c(a1,a2)) -LABS=c(LABS,c(rep(1,length(a1)),rep(2,length(a2)))) -pred=prediction(PRED,LABS) -} -if(POS)perf <- performance(pred,'tpr','fpr') -if(!POS)perf <- performance(pred,'tnr','fnr') -plot(perf,lty=k,add=TRUE) -}} -} -# perf=performance(pred, "spec") -# auroc<- perf@y.values -pro.class.probs<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,nonpar=TRUE,rule=.5,fr=2){ -# -# Same as pro.class, but also reports probabilities fo being in second class for each vector in test. -# -# project the data onto a line, then estimate the probability that -# a value in test data is in first group. -# -if(rule<=0 || rule>=1)stop('rule should be greater than 0 and less than 1') -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -}} -CHK=FALSE -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -CHK=TRUE -d=mulwmw.dist.new(x1,x2,test) -}} -if(!CHK){ -if(is.null(g))stop('The argument g should contain the group id values') -xg=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=xg[,1:p] -g=xg[,p1] -if(length(unique(g))!=2)stop('Should have only two unique values in g') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] - d=mulwmw.dist.new(x1,x2,test) -} -flag=g==min(g) -gnum=g -gnum[flag]=0 -gnum[!flag]=1 -x=c(d$dist1,d$dist2) -if(nonpar){ -v=logSMpred(x,gnum,d[[3]],fr=fr) -} -if(!nonpar){ -v=logreg.pred(x,gnum,d[[3]]) -} -dec=rep(2,nrow(test)) -dec[v1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n,n,replace=TRUE) -id2=sample(n,n,replace=TRUE) -if(!PR)dvec[i,]=pro.class(x1=x1[id1,],x2=x2[id2,],test=test) -if(PR)dvec[i,]=pro.class.probs(x1=x1[id1,],x2=x2[id2,],test=test)$prob.in.second.class -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -if(!PR)idec=chk2>chk1 -if(PR){ -chk=apply(dvec,2,mean) -idec=chk>.5 -} -dec[idec]=2 -dec -} - - - -pro.class<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,nonpar=TRUE,rule=.5,fr=2,SEED=TRUE){ -# -# Project the data onto a line, then estimate the probability that -# a value in test data is in first group. Impacted by unequal sample sizes. To avoid this use -# pro.class.bag. Or use this function but with equal samples sizes for the test data. -# -# nonpar=TRUE: use a smoother to estimate probabilities -# in which case -# fr is the span. -# FALSE: use logistic regression -# -if(is.null(test))stop('Argument test is NULL') -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group labels, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -}} -CHK=FALSE -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -CHK=TRUE -d=mulwmw.dist.new(x1,x2,test) -}} -if(!CHK){ -if(is.null(g))stop('The argument g should contain the group id values') -xg=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=xg[,1:p] -g=xg[,p1] -if(length(unique(g))!=2)stop('Should have only two unique values in g') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] - d=mulwmw.dist.new(x1,x2,test) -} -flag=g==min(g) -gnum=g -gnum[flag]=0 -gnum[!flag]=1 -x=c(d$dist1,d$dist2) -if(nonpar){ -g1=rep(0,n1) -v=logSMpred(x,gnum,d[[3]],fr=fr,SEED=SEED) -} -if(!nonpar){ -v=logreg.pred(x,gnum,d[[3]]) -} -dec=rep(2,nrow(test)) -dec[v2){ -center1<-dmean(m1,tr=.5) -center2<-dmean(m2,tr=.5) -} -if(ncol(m1)==2){ -tempd<-NA -for(i in 1:nrow(m1)) -tempd[i]<-depth(m1[i,1],m1[i,2],m1) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center1<-m1[flag,] -if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) -for(i in 1:nrow(m2)) -tempd[i]<-depth(m2[i,1],m2[i,2],m2) -mdep<-max(tempd) -flag<-(tempd==mdep) -if(sum(flag)==1)center2<-m2[flag,] -if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) -}} -if(cop==2){ -center1<-cov.mcd(m1)$center -center2<-cov.mcd(m2)$center -} -if(cop==3){ -center1<-apply(m1,2,median) -center2<-apply(m2,2,median) -} -if(cop==4){ -center1<-smean(m1) -center2<-smean(m2) -} -center<-(center1+center2)/2 -B<-center1-center2 -if(sum(center1^2)1)stop('Argument g should be a vector') -}} -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -}} -g=as.numeric(as.vector(g)) -flag=g==min(g) -gnum=g -gnum[flag]=0 -gnum[!flag]=1 -g=gnum -test=as.matrix(test) -FLAG=FALSE -if(ncol(test)==1){ -FLAG=TRUE -test=t(test) # If test is a vector, a single point, transpose to get correct number of columns. -test=rbind(test,test) #avoid R from aborting. -} -svm_model=svm(train,as.factor(g),kernel=kernel) -dec=predict(svm_model,as.matrix(test)) -dec=as.vector(as.numeric(dec)) -if(FLAG)dec=dec[1] -dec -} - -UB.class.error<-function(train=NULL,g=NULL,x1=NULL,x2=NULL,method=c('KNN','SVM','DIS','DEP','PRO','PROBAG'), -alpha=.05,pro.p=.8,SEED=TRUE,...){ -# -# Use cross validation to estimate error rates associated with the classification method indicated by the argument -# method -# -# By default, estimates are computed for each method listed in the argument' -# method -# -# To include a neural net method, included 'NNbag' in methods; not included automatically to avoid high execution time. -# -# Also reports estimates of a false positive or false negative, but no confidence interval is included. The obvious approach performs poorly -# -# pro.p = proportion used from each of the two training groups; remainder used as test data. -# -# g=class id -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# -# FN: False negative, assign to group 2 by mistake e.g., NULL predict no fracture but Non-null gets a fracture -# FP: False positive, assign to group 1 by mistake like NULL, e.g, will not have a fracture, but did -# TE: Overall all error rate. -# -# -n.est=length(method) -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -} -Train=cbind(train,g) -Train=elimna(Train) -p=ncol(train) -p1=p+1 -train=Train[,1:p] -g=Train[,p1] -flag=g==min(g) -x1=Train[flag,1:p] -x2=Train[!flag,1:p] -} -if(is.null(x1))stop('Something is wrong, no data in x1') -if(is.null(x2))stop('Something is wrong, no data in x2') -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -ns1=round(pro.p*n1) -ns2=round(pro.p*n2) -ciFN=matrix(NA,nrow=reps,ncol=2) -ciFP=matrix(NA,nrow=reps,ncol=2) -ciTE=matrix(NA,nrow=reps,ncol=2) -phat.FN=NA -phat.FP=NA -phat.TE=NA -RES=matrix(NA,nrow=n.est,ncol=3) -dimnames(RES)=list(method,c('phat.FN','phat.FP','phat.TE')) -for(k in 1:n.est){ -N1=sample(n1,ns1) -N2=sample(n2,ns2) -test1=x1[-N1,] -test2=x2[-N2,] -a1=UB.class(x1=x1[N1,],x2=x2[N2,],test=test1,method=method[k],SEED=SEED,...) -a2=UB.class(x1=x1[N1,],x2=x2[N2,],test=test2,method=method[k],SEED=SEED,...) -flag1=a1!=1 # ID False negatives e.g., predict no fracture but fracture occurred. -flag2=a2!=2 # ID False positives e.g., predict fracture but no fracture occurred. -FN=binom.conf(y=flag1,alpha=alpha,pr=FALSE) -FP=binom.conf(y=flag2,alpha=alpha,pr=FALSE) #CI does not work well when P(FP)=.5 -TE=binom.conf(y=c(flag1,flag2),alpha=alpha,pr=FALSE) #total error -RES[k,]=c(FN$phat,FP$phat,TE$phat) -} -RES -} - -quantregForest <-function(x,y, nthreads = 1, keep.inbag=FALSE, ...){ -# -# This function does robust random Forest regression based on -# Nicolai Meinshausen (2006) Quantile Regression Forests, -# Journal of Machine Learning Research, 7, 983-999. -# The code used here is based on a modification code downloaded from github, which is maintained by -# Loris Michel -# -# - -x=as.data.frame(x) - if(is.null(nrow(x)) || is.null(ncol(x))) - stop(' x contains no data ') - if( nrow(x) != length(y) ) - stop(' predictor variables and response variable must contain the same number of samples ') - - if (any(is.na(x))) stop('NA not permitted in predictors') - if (any(is.na(y))) stop('NA not permitted in response') - ## Check for categorial predictors with too many categories (copied from randomForest package) - if (is.data.frame(x)) { - ncat <- sapply(x, function(x) if(is.factor(x) && !is.ordered(x)) - length(levels(x)) else 1) - } else { - ncat <- 1 - } - maxcat <- max(ncat) - if (maxcat > 32) - stop('Can not handle categorical predictors with more than 32 categories.') - ## Note that crucial parts of the computation - ## are only invoked by the predict method - cl <- match.call() - cl[[1]] <- as.name('quantregForest') - qrf <- if(nthreads > 1){ - parallelRandomForest(x=x, y=y, nthreads = nthreads,keep.inbag=keep.inbag, ...) - }else{ - randomForest( x=x,y=y ,keep.inbag=keep.inbag,...) - } - nodesX <- attr(predict(qrf,x,nodes=TRUE),'nodes') - rownames(nodesX) <- NULL - nnodes <- max(nodesX) - ntree <- ncol(nodesX) - n <- nrow(x) - valuesNodes <- matrix(nrow=nnodes,ncol=ntree) - for (tree in 1:ntree){ - shuffledNodes <- nodesX[rank(ind <- sample(1:n,n)),tree] - useNodes <- sort(unique(as.numeric(shuffledNodes))) - valuesNodes[useNodes,tree] <- y[ind[match(useNodes,shuffledNodes )]] - } - - qrf[['call']] <- cl - qrf[['valuesNodes']] <- valuesNodes - if(keep.inbag){ - # - # create a prediction vector with same shape as predictOOBNodes - predictOOBNodes <- attr(predict(qrf,newdata=x,nodes=TRUE),'nodes') - rownames(predictOOBNodes) <- NULL - valuesPredict <- 0*predictOOBNodes - ntree <- ncol(valuesNodes) - valuesPredict[qrf$inbag >0] <- NA - # - # for each tree and observation sample another observation of the same node - for (tree in 1:ntree){ - is.oob <- qrf$inbag[,tree] == 0 - n.oob <- sum(is.oob) - if(n.oob!=0) { - y.oob <- sapply(which(is.oob), - function(i) { - cur.node <- nodesX[i, tree] - y.sampled <- if (length(cur.y <- y[setdiff(which(nodesX[,tree] == cur.node) - ,i)])!=0) { - cur.y[sample(x = 1:length(cur.y), size = 1)] - } else { - NA - } - return(y.sampled) - }) - valuesPredict[is.oob, tree] <- y.oob - } - } - - minoob <- min( apply(!is.na(valuesPredict),1,sum)) - if(minoob<10) stop('need to increase number of trees for sufficiently many out-of-bag observations') - valuesOOB <- t(apply( valuesPredict,1 , function(x) sample( x[!is.na(x)], minoob))) - qrf[['valuesOOB']] <- valuesOOB - } - class(qrf) <- c('quantregForest','randomForest') - - return(qrf) -} - -predict.robust.Forest<-function(object,newdata=NULL, what=tmean,... ){ -# -# Goal: estimate a measure of location for newdata based on the regions stemming from the Random Forest method -# -# what: a function indicating the measure of location to be estimated. Default is a 20% trimmed mean -# -# Example: -# a=quantregForest(x,y) -# predict.robust.Forest(a,newdata = new,what=hd) -# -# For convenience, these steps are combined in the function regR.Forest, which calls this function and eliminates the need for the -# library command. -# -# For each region generated by the random Forest method,this would estimate the median based on the Harrell-Davis estimator. -# -# predict.robust.Forest(a,newdata = new,what=mean,tr=.1) 10% trimmed mean -# -# To estimate one or more quantiles, can use predict.quantregForest or could use this function -# with what containing the quantiles to be estimated. For example what=c(.25,.75) would estimate the -# lower and upper quartiles. -# -# This code is based on modifications of code written by L. Michel -# - class(object) <- 'randomForest' - if(is.null(newdata)){ - if(is.null(object[['valuesOOB']])) stop('need to fit with option keep.inbag=TRUE if trying to get out-of-bag observations') - valuesPredict <- object[['valuesOOB']] - }else{ - predictNodes <- attr(predict(object,newdata=newdata,nodes=TRUE),'nodes') - rownames(predictNodes) <- NULL - valuesPredict <- 0*predictNodes - ntree <- ncol(object[['valuesNodes']]) - for (tree in 1:ntree){ - valuesPredict[,tree] <- object[['valuesNodes']][ predictNodes[,tree],tree] - } - } - if(is.function(what)){ - if(is.function(what(1:4))){ - result <- apply(valuesPredict,1,what) - }else{ - if(length(what(1:4))==1){ - result <- apply(valuesPredict,1,what,...) - }else{ - result <- t(apply(valuesPredict,1,what)) - } - } - }else{ - if( !is.numeric(what)) stop(' argument what needs to be either a function or a vector with quantiles') - if( min(what)<0) stop(' if what specifies quantiles, the minimal values needs to be non-negative') - if( max(what)>1) stop(' if what specifies quantiles, the maximal values cannot exceed 1') - if(length(what)==1){ - result <- apply( valuesPredict,1,quantile, what,na.rm=TRUE) - }else{ - result <- t(apply( valuesPredict,1,quantile, what,na.rm=TRUE)) - colnames(result) <- paste('quantile=',what) - } - } - return(result) -} - -regR.Forest<-function(x,y,newdata=NULL,pts=x,pyhat=FALSE,loc.fun=tmean,xout=FALSE,plotit=TRUE,outfun=outpro, span = 0.75,LP=TRUE,pch='.', -ZLIM = FALSE, scale = TRUE, xlab = 'X', ylab = 'Y', ticktype='simple',frame=TRUE,eout=FALSE, - zlab ='', theta = 50, phi = 25,...){ - -# Goal: estimate a measure of location for newdata based -# on the Random Forest method -# Default, estimate measure of location for training data x -# -# loc.fun: a function indicating the measure of location to be estimated. -# Default is a 20% trimmed mean -# -# Method, initially use random forest then smooth using LOESS -# pyhat=TRUE: return the predicted values -# if LP=FALSE, return the random forest predicted values instead. -# -x<-as.matrix(x) -p=ncol(x) -p1=p+1 -if(p==1){ -xs=order(x) -x=x[xs] -y=y[xs] -} -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:p] -x<-as.matrix(x) -y<-xx[,p1] -x<-as.data.frame(x) -if(xout){ -x<-as.data.frame(x) -flag<-outfun(x,plotit=FALSE)$keep -x<-x[flag,] -y<-y[flag] -x<-as.data.frame(x) -} -if(is.null(newdata))newdata=as.data.frame(x) -library(randomForest) -a=quantregForest(x,y) -res=predict.robust.Forest(a,newdata=newdata,what=loc.fun,...) -if(plotit){ -if(p==2) -lplot(x,res,ZLIM=ZLIM,span=span,scale=scale,xlab=xlab,ylab=ylab,zlab,zlab,ticktype=ticktype,frame=frame,theta=theta, -phi=phi,pyhat=pyhat,eout=eout,xout=FALSE,pr=FALSE) -if(p==1){ -plot(x[,1],y,xlab=xlab,ylab=ylab,pch=pch) -xs=order(x[,1]) -e=lplot.pred(x[xs,1],res[xs],span=span)$yhat -lines(x[,1],e) -} -} -if(LP)res=lplot.pred(x,res,pts=pts) -if(!pyhat)res=NULL -res -} - - -RFreg=regR.Forest - -class.forest<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,sm=FALSE,fr=2,SEED=NULL){ -# -# Do classification using random forest -# -# -# train is the training set -# test is the test data -# g contains labels for the data in the training set, -# -# Alternatively, store the data for the two groups in -# x1 and x2, in which case the function creates labels, i.e., no need to specify train and g. -# -# SEED=NULL, used for convenience when called by other functions that expect SEED -# -if(is.null(test))stop('Argument test is null, contains no data') -library(randomForest) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -} -g=as.numeric(as.vector(g)) -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -if(is.vector(test))stop('Argument test is a vector, should contain two or more variables') -g=as.factor(g) -train=as.data.frame(train) -d=randomForest(g~., train) -test=as.data.frame(test) -e=predict(d,newdata=test) -res=as.numeric(as.vector(e))+1 -res -} - - -RFbag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,kernel='radial',nboot=100,SEED=TRUE,...){ -# -# -# g=class id -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# The function will then create appropriate labels and store them in g. -# -# Random forest classification using data depths. -# class., for the n1!=n2 it can be a bit biased, meaning that -# when there is no association, the probability of a correct classification will be less than .5 -# -# -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group labels, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n,n,replace=TRUE) -id2=sample(n,n,replace=TRUE) -xs1=as.data.frame(x1[id1,]) -xs2=as.data.frame(x2[id2,]) -dvec[i,]=class.forest(x1=xs1,x2=xs2,test=test) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} - -class.gbm<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,SEED=NULL,n.trees=100){ -# -# Do classification using boosting via the R package gdm -# -# -# train is the training set -# test is the test data -# g contains labels for the data in the training set, -# -# Alternatively, store the data for the two groups in -# x1 and x2, in which case the function creates labels, i.e., no need to specify train and g. -# -# This function removes the need to call library gbm. -# -# SEED=NULL, used for convenience when called by other functions that expect SEED -# -if(is.null(test))stop('Argument test is null, contains no data') -library(gbm) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -} -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} - -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -if(is.vector(test))stop('Argument test is a vector, should contain two or more variables') -if(ncol(train)!=ncol(test))stop('training data and test data have different number of columns') -data=data.frame(g,train) -a=gbm(g~., data=data,distribution ='bernoulli',n.trees=n.trees) -test=data.frame(test) -e=predict(a,newdata=test,n.trees=n.trees) -res=rep(1,nrow(test)) -flag=e>0 -res[flag]=2 -res -} - -nearNN<-function(x,pt=x,K=10,mcov,...){ -# -# identify the K rows in x that are closest to vector in pt -# mcov: some type of covariance matrix associated with x -# -if(!is.matrix(x)& !is.data.frame(x))stop('Data are not stored in a matrix or data frame.') -pt=as.vector(pt) -x=elimna(x) -n=nrow(pts) -if(K>nrow(x))stop(' Cannot have K>n') -dis=sqrt(mahalanobis(x,t(pt),mcov)) -chk.dup=sum(duplicated(dis)) -if(chk.dup>0)dis=jitter(dis) -ord=sort(dis) -id=which(dis<=ord[K]) -id -} - -KNNreg<-function(x,y,pts=NULL,K=10,est=tmean,cov.fun=covmcd, -xout=FALSE,outfun=outpro,...){ -x<-as.matrix(x) -if(ncol(x)==1)stop('Should have two or more independent variables') -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(is.null(pts))pts=x -n=1 -if(is.matrix(pts) || is.data.frame(pts)) -n=nrow(pts) -if(n==1)pts=matrix(pts,nrow=1) -e=NA -mcov=cov.fun(x)$cov -for(i in 1:n){ -e[i]=est(y[nearNN(x,pt=pts[i,],K=K,mcov=mcov,...)]) -} -e -} - -NN.class<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,SEED=NULL){ -# -# Do classification using the neural network method via the R package neuralnet. -# This function provides another way of applying this approach using R commands -# consistent with other classification methods in Rallfun -# -# -# train is the training set -# test is the test data -# g contains labels for the data in the training set, -# -# Alternatively, store the data for the two groups in -# x1 and x2, in which case the function creates labels, i.e., no need to specify train and g. -# -# SEED=NULL, used for convenience when called by other functions that expect SEED -# -if(is.null(test))stop('Argument test is NULL, contains no data') -library(neuralnet) - -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -}} -if(!is.null(x1)){ -if(!is.null(x2)){ -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -}} -g=as.numeric(as.vector(g)) -train=elimna(train) -if(is.null(test))stop('Argument test is null, no data') -test=elimna(test) -train=as.matrix(train) -test=as.matrix(test) - - -if(is.vector(test))stop('Argument test is a vector, should contain two or more variables') -# Next, store data as expected by neuralnet -ddata=as.matrix(cbind(g,train)) -dimnames(ddata)=list(NULL,NULL) -p1=ncol(ddata) -p=p1-1 -ddata=as.data.frame(ddata) -if(p>8)stop('Current version limited to 8 independent variables') -if(p==2)d=neuralnet(V1~V2+V3,data=ddata) -if(p==3)d=neuralnet(V1~V2+V3+V4,data=ddata) -if(p==4)d=neuralnet(V1~V2+V3+V4+V5,data=ddata) -if(p==5)d=neuralnet(V1~V2+V3+V4+V5+V6,data=ddata) -if(p==6)d=neuralnet(V1~V2+V3+V4+V5+V6+V7,data=ddata) -if(p==7)d=neuralnet(V1~V2+V3+V4+V5+V6+V7+V8,data=ddata) -if(p==8)d=neuralnet(V1~V2+V3+V4+V5+V6+V7+V8+V9,data=ddata) -e=predict(d,newdata=test) -res=rep(1,nrow(test)) -flag=e>.5 -res[flag]=2 -res -} - -NNbag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,nboot=100,SEED=TRUE,...){ -# -# -# g=class id -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# The function will then create appropriate labels and store them in g. -# -# KNN classification using data depths. -# KNNdist uses data depths, for the n1!=n2 it can be a bit biased, meaning that -# when there is no association, the probability of a correct classification will be less than .5 -# -# -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group labels, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n1,n,replace=TRUE) -id2=sample(n2,n,replace=TRUE) -dvec[i,]=NN.class(x1=x1[id1,],x2=x2[id2,],test=test) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} - - - class.ada.bag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,nboot=100, -SEED=TRUE,baselearner='bbs',...){ -# -# class.bag: for n1!=n2 -# when there is no association, the expected probability of a correct classification can differ from .5 -# -# This function deals with this via bootstrap bagging -# g=class labels -# if there are two classes and the training data are stored in separate variables, can enter -# the data for each class via the arguments -# x1 and x2. -# The function will then create appropriate labels and store them in g. -# -# nboot: number of bootstrap sample. Using nboot=20, bias remains with n1=200, n2=100 -# -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group labels, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -#test=as.data.frame(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n1,n,replace=TRUE) -id2=sample(n2,n,replace=TRUE) -dvec[i,]=class.ada(x1=x1[id1,],x2=x2[id2,],test=test,baselearner=baselearner) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} - -CLASS.BAG<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,SEED=TRUE,kernel='radial',nboot=100, -method=c('KNN','SVM','DIS','DEP','PRO','NN','RF','ADA','LSM'),depthfun=prodepth,baselearner ='bbs',sm=TRUE,rule=.5,...){ -# -# A collection of classification methods for which the error rate is not -# impacted by unequal sample sizes. -# Bagged version of various classification methods is used. -# -# For methods that do not require bagging, see UB.class. -# -# KNN: calls KNNbag: a robust analog of the K nearest neighbor method -# SVM: a type of bagging method used in conjunction with support vector machine -# DIS: Points classified based on their depths -# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS -# PRO: project the points onto a line connecting the centers of the data clouds. -# Then use estimate of the pdf for each group to make a decision about future points. -# NN: Neural network -# RF: Random forest -# ADA: adaboost method -# LSM: Uses a smoother designed for a binary dependent variable. sm=FALSE, uses logistic regression -# -type=match.arg(method) -switch(type, - KNN=KNNbag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,nboot=nboot), - SVM=SVMbag(train=train,test=test,g=g,x1=x1,x2=x2,depthfun=depthfun,SEED=SEED,nboot=nboot,...), - DIS=dis.depth.bag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,nboot=nboot), - DEP=Depth.class.bag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,nboot=nboot), - PRO=pro.class.bag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,nboot=nboot,...), - NN=NNbag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,...), - RF=RFbag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,kernel=kernel,nboot=nboot,...), - ADA=class.ada.bag(train=train,test=test,g=g,x1=x1,x2=x2,baselearner=baselearner,SEED=SEED,nboot=nboot,...), - LSM=LSMbag(train=train,test=test,g=g,x1=x1,x2=x2,sm=sm,rule=rule,SEED=SEED,nboot=nboot,...), - ) -} - -qloc.dif<-function(x,y,est=tmean,...){ -# -# Compute a measure of location for each group. -# Using the data in first group, determine what quantiles -# these measure of location correspond to. -# The difference is used as a measure of effect size. -# -m1=est(x) -m2=est(y) -q1=mean(x<=m1) -q2=mean(x<=m2) -delta=q1-q2 -delta -} - -loc.dif.summary<-function(x,y){ -# -# Estimate the difference between a collection of measures of location:: -# MEAN: -# MEAN20: 20% mean -# MED: median -# OS: One-step M-estimator -# Mdif: median of typical difference -# -x=elimna(x) -y=elimna(y) -output=matrix(NA,ncol=1,nrow=5) -output[1,1]=tmean(x,tr=0)-tmean(y,tr=0) -output[2,1]=tmean(x,tr=0.2)-tmean(y,tr=0.2) -output[3,1]=median(x)-median(y) -output[4,1]=onestep(x)-onestep(y) -output[5,1]=wmwloc(x,y) -dimnames(output)=list(c('MEAN','MEAN.20%','MEDIAN','M-EST','Mdif'),c('Est')) -output -} - - -dep.loc.summary<-function(x,y){ -# -# Estimate the measures of location based on difference scores: -# MEAN: -# MEAN20: 20% mean -# MED: median -# OS: One-step M-estimator -# Mdif: median of typical difference -# -d=NULL -chk=ncol(x) -if(is.vector(x))d=x-y -if(!is.null(chk)){ -if(dim(x)[2]==2)d=x[,1]-x[,2] -} -if(is.null(d))stop('x and y should be vectors, or x should have two columns') -d=elimna(d) -output=matrix(NA,ncol=1,nrow=4) -output[1,1]=tmean(d,tr=0) -output[2,1]=tmean(d,tr=0.2) -output[3,1]=median(d) -output[4,1]=onestep(d) -dimnames(output)=list(c('MEAN','MEAN.20%','MEDIAN','M-EST'),c('Est')) -output -} - - - -ES.sum.REL.MAG<-function(REL.M,n = 10000,reps=10){ -# -# Determine small medium and large equivalent measures of effect size based on the values in -# REL.M -# -if(length(REL.M)!=3)stop('Should have three value in REL.M') -if(n>10000)n=10000 -x=rnorm(n) -y=rnorm(n) -output=matrix(0,ncol=3,nrow=6) -int=matrix(NA,ncol=3,nrow=6) -dimnames(output)=list(c('AKP','EP','QS (median)','QStr','WMW','KMS'),c('S','M','L')) -for(k in 1:reps){ -for(j in 1:3)int[,j]=ES.summary(x,y-REL.M[j],)[,1] -output=output+int -} -output=output/reps -output -} - -ES.summary<-function(x,y,tr=.2,NULL.V=c(0,0,.5,.5,.5,0),REL.MAG=NULL, REL.M=NULL,n.est=1000000){ -# -# Estimate a collection of effect sizes: -# AKP: Homoscedastic robust analog of Cohen's d -# EP: Explanatory power -# QS: Quantile shift based on the median of the distribution of X-Y, -# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y -# WMW: P(X.5 & output[5,5]< .5){ -output[5,3:5]=1-output[5,3:5] -} -if(output[5,1]<.5 & output[5,5]> .5){ -output[5,3:5]=1-output[5,3:5] -} - -a=akp.effect.ci(x,y,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED) -output[1,6:7]=a$ci -output[1,8]=a$p.value -a=EPci(x,y,tr=tr,alpha=alpha,SEED=SEED,nboot=nboot) -output[2,6:7]=a$ci -output[2,8]=yuen(x,y,tr=tr)$p.value -a=shiftPBci(x,y,locfun=QSfun,alpha=alpha,nboot=nboot,SEED=SEED) -output[3,6:7]=a$ci -output[3,8]=a$p.value -a=shiftPBci(x,y,locfun=tmean,alpha=alpha,nboot=nboot,SEED=SEED) -output[4,6:7]=a$ci -output[4,8]=a$p.value -a=cidv2(x,y,alpha=alpha) -output[5,6:7]=a$p.ci -output[5,8]=a$p.value -a=KMS.ci(x,y,alpha=alpha,nboot=nboot,SEED=SEED) -output[6,6:7]=a$ci -output[6,8]=a$p.value -if(output[6,1]<0)output[6,3:5]=-1*output[6,3:5] -output[,9]=p.adjust(output[,8],method=method) -output -} - - -class.ada<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,sm=FALSE,fr=2,SEED=NULL,baselearner='btree'){ -# -# Do classification using adaboost -# -# baselearner='btree': Stumps -# bbs: Splines -# bols: linear models -# -# train is the training set -# test is the test data -# g contains labels for the data in the training set, -# -# This function removes the need to call library mboost. -# -# SEED=NULL, used for convenience when called by other functions that expect SEED -# -library(mboost) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g)){ -if(dim(g)>1)stop('Argument g should be a vector') -} -g=as.numeric(as.vector(g)) -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -x1=as.matrix(x1) -x2=as.matrix(x2) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -n1=nrow(x1) -n2=nrow(x2) -g=c(rep(0,n1),rep(1,n2)) -train=rbind(x1,x2) -if(is.null(test))stop('Argument test is null, contains no data') -if(is.vector(test))stop('Argument test is a vector, should contain two or more variables') -g=as.factor(g) -ddata=data.frame(g,train) -d=mboost(g~., data=ddata,family=AdaExp(),baselearner=baselearner) -test=data.frame(test) -e=predict(d,newdata=test) -res=rep(1,nrow(test)) -flag=e>0 -res[flag]=2 -res -} - -CLASS.fun<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL, -method=c('KNN','DIS','DEP','SVM','RF','NN','ADA','PRO','LSM','GBT'), -depthfun=prodepth,kernel='radial',baselearner='btree',sm=TRUE,rule=.5,...){ -# -# A collection of classification methods: -# -# KNN: Nearest neighbor using robust depths -# DIS: Points classified based on their depths -# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS -# SVM: support vector machine -# RF: Random forest -# NN: neural network -# ADA: ada boost -# PRO: project the points onto a line connecting the centers of the data clouds. -# Then use estimate of the pdf for each group to make a decision about future points. -# GBT: Gradient boosted trees: requires R package gbm -# -type=match.arg(method) -switch(type, - KNN=KNNdist(train=train,test=test,g=g,x1=x1,x2=x2,depthfun=depthfun), - DIS=discdepth(train=train,test=test,g=g,x1=x1,x2=x2), - DEP=Depth.class(train=train,test=test,g=g,x1=x1,x2=x2), - SVM=SVM(train=train,test=test,g=g,x1=x1,x2=x2,kernel=kernel), - RF=class.forest(train=train,test=test,g=g,x1=x1,x2=x2), - NN=NN.class(train=train,test=test,g=g,x1=x1,x2=x2), - ADA=class.ada(train=train,test=test,g=g,x1=x1,x2=x2,baselearner=baselearner), - PRO=pro.classPD(train=train,test=test,g=g,x1=x1,x2=x2), - LSM=class.logR(train=train,test=test,g=g,x1=x1,x2=x2,sm=sm,rule=rule), - GBT=class.gbm(train=train,test=test,g=g,x1=x1,x2=x2), - ) -} - - - -BWPHmcp<-function(J,K, x, method='KMS'){ -# -# For a between-by-within design: -# Check for interactions by comparing binomials -# That is, use a Patel--Hoel approach. -# -# KMS is the Kulinskaya et al. method. Other options: -# 'ZHZ' -# 'SK' -# -# -p<-J*K -connum<-(J^2-J)*(K^2-K)/4 -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode') -imap<-matrix(c(1:p),J,K,byrow=TRUE) -outm<-matrix(NA,ncol=10,nrow=connum) -outsk<-matrix(NA,ncol=8,nrow=connum) -dimnames(outsk)<-list(NULL,c('Fac.A','Fac.A','Fac.B','Fac.B','p1','p2','p.value','p.adj')) -dimnames(outm)<-list(NULL,c('Fac.A','Fac.A','Fac.B','Fac.B','p1','p2','ci.low','ci.up','p.value','p.adj')) -ic<-0 -for (j in 1:J){ -for (jj in 1:J){ -if(j1)stop('Argument g should be a vector') -traing=elimna(cbind(train,g)) -p=ncol(train) -p1=p+1 -train=traing[,1:p] -test=traing[,p1] -if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') -} -x=fac2list(train,g) -x1=x[[1]] -x2=x[[2]] -} -test=as.matrix(test) -n.test=nrow(test) -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -n1=nrow(x1) -n2=nrow(x2) -n=min(c(n1,n2)) -dvec=matrix(NA,nrow=nboot,ncol=n.test) -for(i in 1:nboot){ -id1=sample(n,n,replace=TRUE) -id2=sample(n,n,replace=TRUE) -dvec[i,]=class.logR(x1=x1[id1,],x2=x2[id2,],test=test,sm=sm,rule=rule) -} -dec=rep(1,n.test) -test1=dvec==1 -test2=dvec==2 -chk1=apply(test1,2,sum) -chk2=apply(test2,2,sum) -idec=chk2>chk1 -dec[idec]=2 -dec -} - - - -class.error.com<-function(x1=NULL,x2=NULL,train=NULL,g=NULL,method=NULL, -pro.p=.8,nboot=100,EN=FALSE,FAST=TRUE, -SEED=TRUE,...){ -# -# For two classification methods indicated by the arguments -# class.fun1 and -# class.fun2 -# use cross validation coupled with resampling to estimate the probability that of a correct classification. -# -# The data for the two groups can be entered via the arguments -# x1 and x2 -# or -# store all of the data in the argument train in which case g specifies the group -# -# Choices for these two arguments: -# -# Current choices available: -# KNN: Nearest neighbor using robust depths -# DIS: Points classified based on their depths -# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS -# SVM: support vector machine -# RF: Random forest -# NN: neural network -# ADA: ada boost -# PRO: project the points onto a line connecting the centers of the data clouds. -# Then use estimate of the pdf for each group to make a decision about future points. -# LSM: smooth version of logistic regression when sm=TRUE; otherwise use logistic regression. -# -# EN=TRUE; use equal samples for the test data to deal with classification bias -# Otherwise, the ratio of the sample sizes is n1/n2 -# -# method=NULL All of the methods listed above will be compared if -# FAST=FALSE -# For method 'PRO', execution time might take several minutes if the sample sizes are large -# For this reason.PRO is is not used if FAST=TRUE -# -# pro.p=.8 means 80% of the data will be used as training data -# nboot=number of bootstrap samples - -# Returns estimate of the error rate plus -# FP (false positive): average proportion of values in x1 erroneously classified as coming from x2 -# Example, x1 contains no fracture, x2 contains fractures. -# FN (false negative): average proportion of values in x2 erroneously classified as coming from x1 -# -# -# -if(is.null(method))method=c('KNN','DIS','DEP','SVM','RF','NN','ADA','PRO','LSM') -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g))if(dim(g)>1)stop('Argument g should be a vector') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -if(is.null(x1))stop('Something is wrong, no data in x1') -if(is.null(x2))stop('Something is wrong, no data in x2') -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -dimnames(x1)=list(NULL,NULL) # can be necessary to eliminate labels to avoid an error in randomForest. -dimnames(x2)=list(NULL,NULL) -n1=nrow(x1) -n2=nrow(x2) -ns1=round(pro.p*n1) -ns2=round(pro.p*n2) -if(EN)ns1=ns2=min(c(ns1,ns2)) -P1hat=NA -P2hat=NA -Av=NA -Bv=NA -Cv=NA -Dv=NA - -J=length(method) -TE=matrix(NA,nrow=nboot,ncol=J) -FP=matrix(NA,nrow=nboot,ncol=J) -FN=matrix(NA,nrow=nboot,ncol=J) - -for(k in 1:nboot){ -N1=sample(n1,ns1) -N2=sample(n2,ns2) -test1=x1[-N1,] -test2=x2[-N2,] -for(j in 1:J){ -a1=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test1,method=method[j],...) -a2=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test2,method=method[j],...) -flag1=a1!=1 # ID False negatives e..g., method 1 predict no fracture but fracture occurred. So !flag1 is correct decision -flag2=a2!=2 # ID False positives e..g., predict fracture but no fracture occurred. -flag=c(flag1,flag2) #Overall mistakes -TE[k,j]=mean(flag) -FN[k,j]=mean(flag1) -FP[k,j]=mean(flag2) -}} - -ERR=matrix(NA,nrow=3,ncol=J) -dimnames(ERR)=list(c('TE','FP','FN'),method) -v=apply(TE,2,mean) -ERR[1,]=v -v=apply(FP,2,mean) -ERR[2,]=v -v=apply(FN,2,mean) -ERR[3,]=v - -list(Error.rates=ERR) -} - -runstest.med<-function(x){ -# -# runs test based on whether values are < or > than the median -# -library(tseries) -x=elimna(x) -n=length(x) -g=rep(1,n) -flag=x 8, might include the argument -# op=3 and -# outfun=outproadj -# But this will increase execution time considerably. -# -# m1 is an n by p matrix -# -# For single distribution, m2=NULL, -# test the hypothesis that the measure of location estimated by -# locfun is equal to the value specified by -# nullv. -# -# For two dependent groups, meaning that -# m2 is not mull -# test hypothesis that the difference scores have measures of locations that are equal to nullv -# -# This is done by computing a confidence interval for each of the -# p variables under study using a percentile bootstrap. -# -# -if(is.null(m2))D=m1 -else{ -if(ncol(m1) != ncol(m2))stop('Number of variables in group 1 does not equal the number in group 2.') -D=m1-m2 -} -names(D)=NULL -p=ncol(D) -nb1=nboot+1 -if(SEED)set.seed(2) -D<-elimna(D) -n<-nrow(D) -val<-matrix(0,ncol=p,nrow=nboot) -bvec=matrix(NA,nboot,p) -ci=matrix(NA,2,p) -est=locfun(D,...) -if(is.list(est))est=est$center -for(j in 1: nboot){ -id<-sample(n,size=n,replace=TRUE) -v1<-locfun(D[id,],...) -if(is.list(v1)){ -val[j,]=(v1$centerY)')) -res -} - - - -anc.best.ex<-function(x,tr=.2){ -# -# Used by anc.best.crit -pvec=NA -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -est=lapply(x,tmean,tr=tr) -est=matl(est) -R=order(est,decreasing = TRUE) -ic=0 -for(j in 2:J){ -ic=ic+1 -pvec[ic]=yuen(x[[R[1]]],x[[R[[j]]]],tr=tr)$p.value -} -pvec -} - -anc.bestH<-function(x,rem=NULL,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ -# -# -# Identify group with largest trimmed mean -# Make a decision if every p.value<=p.crit -# -# p.crit: If NULL, critical p-values are determined so that that FWE is alpha -# This is done using a simulation to determine the null distribution based on -# iter=5000 replications. -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=lapply(x,tmean,tr=tr) -n=lapply(x,length) -est=matl(est) -n=as.vector(matl(n)) -R=order(est,decreasing = TRUE) -pvec=NA -if(is.null(rem)){ -pvdist=anc.bestH.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) -} -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.adj')) -for(i in 2:J){ -im1=i-1 -a=yuen(x[[R[1]]],x[[R[[i]]]],alpha=qest(pvdist[,im1],alpha)) -pvec[im1]=mean(pvdist<=a$p.value) -output[im1,1:7]=c(a$est.1,R[[i]],a$est.2,a$dif,a$ci[1],a$ci[2],a$p.value) -} -output[,8]=p.adjust(output[,7]) -Best='No Decisions' -id=output[,8]<=alpha -if(sum(id>0))Best=output[id,2] -if(sum(id)==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) -put -} - -anc.best<-function(x,p.crit=NULL,alpha=.05,tr=.2,iter=5000,SEED=TRUE,NEG=FALSE){ -# -# -# For J independent groups -# Identify group with largest trimmed mean -# Make a decision if every p.value<=p.crit -# -# p.crit: If NULL, critical p-values are determined so that that FWE is alpha -# This is done using a simulation to determine the null distribution based on -# iter=5000 replications. -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# Confidence intervals DO NOT necessarily have simultaneous probability coverage 1-alpha -# using the adjusted level. -# -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -if(J<3)stop('Should have 3 or more groups') -if(NEG)for(j in 1:J)x[[j]]=0-x[[j]] -Jm1=J-1 -est=lapply(x,tmean,tr=tr) -n=lapply(x,length) -est=matl(est) -n=as.vector(matl(n)) -R=order(est,decreasing = TRUE) -pvec=NA -if(is.null(p.crit)){ -v=anc.best.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) -p.crit=v$fin.crit -pvdist=v$pvdist -} -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=yuen(x[[R[1]]],x[[R[[i]]]],alpha=p.crit[im1]) -pvec[im1]=a$p.value -output[im1,]=c(a$est.1,R[[i]],a$est.2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) -} -Best='No Decisions' -flag=sum(output[,7]<=output[,8]) -id=output[,7]<=output[,8] -if(sum(id>0))Best=output[id,2] -if(flag==Jm1)Best='All' -#setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) -setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) -#put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=dpv,Larger.than=Best,n=n,output=output) -put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) -put -} - -anc.best.crit<-function(J,n=30,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ -# -# Determine critical p-values for anc.best -# -if(SEED)set.seed(2) -Jm1=J-1 -rem=matrix(NA,iter,Jm1) -for(k in 1:iter){ -if(length(n)==1){ -x=rmul(n,p=J) -x=listm(x) -} -else{ -x=list() -if(length(n)!=J)stop('J is not equal to the length of n') -for(j in 1:J)x[[j]]=rnorm(n[j]) -} -rem[k,]=anc.best.ex(x,tr=tr) -} -# -init=apply(rem,2,qest,alpha) -z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) -fin.crit=z$par*init -list(fin.crit=fin.crit,pvdist=rem) -} - -anc.best.fun<-function(a,init,iter,rem,Jm1,alpha){ -# -chk=0 -init=a*init -for(i in 1:iter){ -flag=0 -for(j in 1:Jm1)if(rem[i,j]<=init[j])flag=flag+1 -if(flag>0)chk=chk+1 -} -chk=chk/iter -dif=abs(chk-alpha) -dif -} - - -anc.best.fun<-function(a,init,iter,rem,Jm1,alpha){ -# -chk=0 -init=a*init -for(i in 1:iter){ -flag=0 -for(j in 1:Jm1)if(rem[i,j]<=init[j])flag=flag+1 -if(flag>0)chk=chk+1 -} -chk=chk/iter -dif=abs(chk-alpha) -dif -} - - - - -anc.bestpb<-function(x,loc.fun=tmean,nboot=3000,p.crit=NULL,alpha=.05,iter=5000,SEED=TRUE,...){ -# -# -# Identify group with largest trimmed mean -# Make a decision if every p.value<=p.crit -# -# p.crit: If NULL, critical p-values are determined so that that FWE is alpha -# This is done using a simulation to determine the null distribution based on -# iter=5000 replications. -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=lapply(x,loc.fun,...) -n=lapply(x,length) -est=matl(est) -n=as.vector(matl(n)) -R=order(est,decreasing = TRUE) -pvec=NA -if(is.null(p.crit))p.crit=anc.best.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED)$fin.crit -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=pb2gen(x[[R[1]]],x[[R[[i]]]],alpha=p.crit[im1],nboot=nboot,est=loc.fun,SEED=SEED,...) -pvec[im1]=a$p.value -output[im1,]=c(a$est.1,R[[i]],a$est.2,a$est.dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) -} -Best='No Decision' -flag=sum(pvec<=p.crit) -if(flag==Jm1)Best=R[[1]] -list(Group.with.largest.est=R[[1]],Best=Best,n=n,output=output) -} - - anc.bestpb.PV<-function(x,loc.fun=tmean,nboot=2000,alpha=.05,iter=5000,SEED=TRUE,...){ -# -# -# Identify group with largest trimmed mean -# Make a decision if every p.value<=p.crit -# -# p.crit: If NULL, critical p-values are determined so that that FWE is alpha -# This is done using a simulation to determine the null distribution based on -# iter=5000 replications. -# -# -# Returns: -# a p-value related to making a decision about which group has the largest measure of location. -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=lapply(x,loc.fun,...) -n=lapply(x,length) -est=matl(est) -n=as.vector(matl(n)) -R=order(est,decreasing = TRUE) -pvec=NA -aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) -id=which(aval==alpha) -if(length(id)==0)stop('alpha be one of the values .001(.001).1 or 11(.01).99') -v=anc.best.crit.det(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) -p.crit=v[id,] -if(is.null(p.crit))p.crit=anc.best.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=pb2gen(x[[R[1]]],x[[R[[i]]]],alpha=p.crit[im1],nboot=nboot,est=loc.fun,SEED=SEED,...) -pvec[im1]=a$p.value -output[im1,]=c(a$est.1,R[[i]],a$est.2,a$est.dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) -} -# Determine p-value for overall decision -na=length(aval) -for(i in 1:na){ -chk=sum(output[,7]<=v[i,]) -pv=aval[i] -if(chk==Jm1)break -} -Best='No Decisions' -flag=sum(output[,7]<=output[,8]) -id=output[,7]<=output[,8] -if(sum(id>0))Best=output[id,2] -if(flag==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) -put -} - - -anc.best.PV<-function(x,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ -# -# -# Identify group with largest trimmed mean -# Make a decision if every p.value<=p.crit - -# Unlike ancbest, this function returns a p-value associated with making a decision -# about which group has the largest trimmed mean. -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=lapply(x,tmean,tr=tr) -n=lapply(x,length) -est=matl(est) -n=as.vector(matl(n)) -R=order(est,decreasing = TRUE) -pvec=NA - -aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) -id=which(aval==alpha) -if(length(id)==0)stop('alpha be one one values .001(.001).1 or 11(.01).99') -v=anc.best.crit.det(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) -p.crit=v[id,] - - -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=yuen(x[[R[1]]],x[[R[[i]]]],alpha=p.crit[im1]) -pvec[im1]=a$p.value -output[im1,]=c(a$est.1,R[[i]],a$est.2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) -} - -# Determine p-value for overall decision -na=length(aval) -for(i in 1:na){ -chk=sum(output[,7]<=v[i,]) -pv=aval[i] -if(chk==Jm1)break -} -Best='No Decisions' -flag=sum(output[,7]<=output[,8]) -id=output[,7]<=output[,8] -if(sum(id>0))Best=output[id,2] -if(flag==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) -put -} - - - - -anc.best.crit.det<-function(J,n,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ -# -# Determine critical p-values for anc.best -# -if(SEED)set.seed(2) -Jm1=J-1 -rem=matrix(NA,iter,Jm1) -for(k in 1:iter){ -if(length(n)==1){ -x=rmul(n,p=J) -x=listm(x) -} -else{ -x=list() -if(length(n)!=J)stop('J is not equal to the length of n') -for(j in 1:J)x[[j]]=rnorm(n[j]) -} -rem[k,]=anc.best.ex(x,tr=tr) -} -aval=c(seq(.001,.1,.001),seq(.011,.99,.01)) -na=length(aval) -fin.crit=matrix(NA,na,Jm1) -for(i in 1:na){ -init=apply(rem,2,qest,aval[i]) -z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=aval[i],method='Brent',lower=0,upper=1) -fin.crit[i,]=z$par*init -} -fin.crit -} - - - -IND.PAIR.ES<-function(x,con=NULL,fun=ES.summary,...){ -# -# J independent groups -# For each column of a specified matrix of linear contrast -# coefficients, pool the data having a contrast coefficient of 1 into one -# group, do the same for contrast coefficient of -1, -# then estimate measures of effect size. -# -# Default, all pairwise comparisons using all of the measures of effect size -# via ES.summary -# -# To get individual measures of effect size, use -# fun=ESfun and include the argument -# method. -# Example: fun=ESfun, method='EP' does explanatory power. -# Choices for method are: -# EP: Explanatory power -# QS: Quantile shift based on the medians -# QStr: Based on trimmed means -# AKP: Robust analog of Cohen's d -# WMW: P(X10^3){ -if(SEED)set.seed(2) -Nmin1=min(c(nv[1],nv[2],100)) -Nmin2=min(c(nv[3],nv[4],100)) -for(i in 1:iter){ -id1=sample(nv[1],Nmin1) -id2=sample(nv[2],Nmin1) -L1=outer(x[[1]][id1],x[[2]][id2],FUN='-') -id1=sample(nv[3],Nmin2) -id2=sample(nv[4],Nmin2) -L2=outer(x[[3]][id1],x[[4]][id2],FUN='-') -ef[i]=ESfun(L1,L2,method=method,tr=tr,pr=pr,SEED=FALSE) -}} -else{ -L1=outer(x[[1]],x[[2]],FUN='-') -L2=outer(x[[3]],x[[4]],FUN='-') -ef=ESfun(L1,L2,method=method,tr=tr,pr=pr) -} -} -ef=mean(ef) -ef -} - -inter.TDES.sub<-function(x,method='QS',iter=5,SEED=TRUE,tr=.2,pr=FALSE,switch=FALSE){ -# -# -# Measures of effect size for an interaction in a 2-by-2 design -# For level 1 of Factor A, estimate the distribution of the -# the typical difference for levels 1 and 2 Factor B -# Do the same for level 2 of Factor A, and compute a measure of -# effect size based on these two distributions. -# -# swithch=TRUE, interchange the rows and columns -# -# Choices for the argument method: -# 'DNT',`EP',`QS',`QStr',`AKP',`KMS' -# DNT= De Neve and Thas P(X_1-X_2 < X_3-X_4) so a WMW-type measure -# EP=explanatory power, -# QS= quantile shift (median, -# QStr= quantile shift (trimmed mean) , -# AKP =trimmed mean version of Cohen's d, -# KMS=heteroscedastic analog of Cohen's d -# -# -if(is.matrix(x))x=listm(x) -if(switch)x=x[1,3,2,4] -ef=NA -if(length(x)!=4)stop('Limited to a two-by-two design') -x=elimna(x) -FLAG=TRUE -if(method=='DNT'){ -ef=WMWinter.est(x,iter=iter,SEED=SEED) -FLAG=FALSE -} -if(FLAG){ -nv=as.vector(matl(lapply(x,FUN='length'))) -nt=prod(nv) -if(nt>10^3){ -if(SEED)set.seed(2) -Nmin1=min(c(nv[1],nv[2],100)) -Nmin2=min(c(nv[3],nv[4],100)) -for(i in 1:iter){ -id1=sample(nv[1],Nmin1) -id2=sample(nv[2],Nmin1) -L1=outer(x[[1]][id1],x[[2]][id2],FUN='-') -id1=sample(nv[3],Nmin2) -id2=sample(nv[4],Nmin2) -L2=outer(x[[3]][id1],x[[4]][id2],FUN='-') -ef[i]=ESfun(L1,L2,method=method,tr=tr,pr=pr,SEED=FALSE) -}} -else{ -L1=outer(x[[1]],x[[2]],FUN='-') -L2=outer(x[[3]],x[[4]],FUN='-') -ef=ESfun(L1,L2,method=method,tr=tr,pr=pr) -} -} -ef=mean(ef) -ef -} - - - inter.TDES<-function(x,iter=5,SEED=TRUE,tr=.2,pr=FALSE,switch=FALSE){ -# -# -# Compute six measures of effect size for an interaction in a 2-by-2 design -# For level 1 of Factor A, estimate the distribution of the -# the typical difference for levels 1 and 2 Factor B -# Do the same for level 2 of Factor A, and compute a measure of -# effect size based on these two distributions. -# -# swithch=TRUE, interchange the rows and columns -# -# The measues are: -# 'DNT',`EP',`QS',`QStr',`AKP',`KMS' -# DNT= De Neve and Thas P(X_1-X_2 < X_3-X_4) so a WMW-type measure -# EP=explanatory power, -# QS= quantile shift (median), -# QStr= quantile shift (trimmed mean, trimming controlled by the argument tr), -# AKP =trimmed mean version of Cohen's d, -# KMS=heteroscedastic analog of Cohen's d. Under normality and homoscedasticity, 2(KMS) = Cohen's d -# -# -meth=c('DNT','EP','QS','QStr','AKP','KMS') -est=matrix(NA,ncol=2,nrow=6) -est[,1]=c(0.5, 0.0, 0.5 ,0.5, 0.0, 0.0) -for(j in 1:6)est[j,2]=inter.TDES.sub(x,method=meth[j],iter=iter,SEED=SEED,tr=tr,pr=pr,switch=switch) -dimnames(est)=list(c('DNT','EP','QS','QStr','AKP','KMS'),c('NULL','EST')) -est -} - - -interES.2by2<-function(x,tr=.2,SW=FALSE){ -# -# Estimate a collection of effect sizes -# for the first row of a 2-by-2 design -# do the same for the second row -# return estimates of the differences -# -# AKP: Homoscedastic robust analog of Cohen's d -# EP: Explanatory power -# QS: Quantile shift based on the median of the distribution of X-Y, -# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y -# KMS: Robust heteroscedastic analog of Cohen's d -# PH: Patel--Hoel, uses Cliff'a analog of Wilcoxon--Mann--Whitney -# -# switch=TRUE: reverses rows and columns - -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(SW)x=x[c(1,3,2,4)] -J=length(x) -if(J!=4)stop('Should have four groups; designed for a 2-by-2 ANOVA only') -a=c('AKP','EP','QS','QStr','KMS','WMW') -output=matrix(NA,ncol=4,nrow=6) -output[,1]=c(0.0,0.0,0.5,0.5,0.0,0.5) -for(j in 1:6){ -output[j,2]=ESfun(x[[1]],x[[2]],method=a[j],tr=tr,pr=FALSE) -output[j,3]=ESfun(x[[3]],x[[4]],method=a[j],tr=tr,pr=FALSE) -output[j,4]=output[j,2]-output[j,3] -} -dimnames(output)=list(c('AKP','EP','QS (median)','QStr', -'KMS','PH'),c('NULL','Est 1','Est 2','Diff')) -output -} - -interJK.ESmul<-function(J,K,x,method='QS',tr=.2,SEED=TRUE){ -# -# Compute measures of effect size for interactions associated with -# in J-by-K design. -# This is done for all relevant tetrad cells using interES.2by2 -# Missing values are automatically removed. -# -# Methods, see the R function ESfun -# Defaults to quantile shfit -# -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") -CCJ<-(J^2-J)/2 -CCK<-(K^2-K)/2 -CC<-CCJ*CCK -JK=J*K -test<-matrix(NA,CC,7) -x=elimna(x) -mat=matrix(c(1:JK),nrow=J,ncol=K,byrow=TRUE) -dimnames(test)<-list(NULL,c("Factor A","Factor A","Factor B","Factor B","Effect Size 1","Effect Size 2","Diff")) -jcom<-0 -for (j in 1:J){ -for (jj in 1:J){ -if (j < jj){ -for (k in 1:K){ -for (kk in 1:K){ -if (k < kk){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-jj -test[jcom,3]<-k -test[jcom,4]<-kk -id1=mat[j,k] -id2=mat[j,kk] -a=ESfun(x[[id1]],x[[id2]],method=method,tr=tr,pr=FALSE,SEED=SEED) -id1=mat[jj,k] -id2=mat[jj,kk] -b=ESfun(x[[id1]],x[[id2]],method=method,tr=tr,pr=FALSE,SEED=SEED) -test[jcom,5:7]<-c(a,b,a-b) -}}}}}} -list(EFFECT.est=test) -} - -linsign<-function(x,con,nreps=200,SEED=TRUE,nmax=10^8){ -# -# Estimate the probability that a linear contrast is less than zero -# -if(sum(con)!=0)stop('Contrast coefficients must sum to zero') -if(SEED)set.seed(2) -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -nv=as.vector(matl(lapply(x,FUN='length'))) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -nv=as.vector(matl(lapply(x,FUN='length'))) -if(length(con)!=J)stop('Length of con should equal number of groups') -x=elimna(x) -B=list() -np=prod(nv) -nmin=min(nv) -if(np>nmax)nmin=min(c(nmin,100)) -M=matrix(NA,nrow=nmin,ncol=J) -for(i in 1:nreps){ -for(j in 1:J)M[,j]=sample(x[[j]],nmin) -B[[i]]=M -} -L=lapply(B,linWMWMC.sub,con=con) -ef.size=NA -for(j in 1:length(L))ef.size[j]=mean(L[[j]]<0) -ef=mean(ef.size) -ef -} - -LCES<-function(x,con,nreps=200,tr=.2,SEED=TRUE){ -# -# For each column of con, compute four measures of effect size: -# quantile shift based on median -# quantile shift based on a trimmed mean -# AKP generalization of Cohen's d -# SIGN: analog of the sign test. -# -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -x=elimna(x) -con=as.matrix(con) -d=ncol(con) -mat=matrix(NA,nrow=4,ncol=d) -LAB=NULL -for(i in 1:d){ -LAB[i]=paste('Con',i) -mat[1,i]=lin.ES(x,as.vector(con[,i]),locfun=median,nreps=nreps)$Effect.Size -mat[2,i]=lin.ES(x,as.vector(con[,i]),locfun=mean,nreps=nreps,tr=tr)$Effect.Size -mat[3,i]=lin.akp(x,con[,i],locfun=mean,nreps=nreps,tr=tr)$Effect.Size -mat[4,i]=linsign(x,con[,i],nreps=nreps) -} -mat=cbind(c(0.5,0.5,0.0,0.5),mat) -LAB=c('NULL',LAB) -dimnames(mat)=list(c('QS','Qstr','AKP','SIGN'),LAB) -list(EST=mat,con=con) -} - -qno.est<-function(x,q=.5){ -# -# Estimate of the qth quantile -# In some situations, offers a distinct advantage over the Harrell-Davis estimator when -# comparing extreme quantiles and distributions have heavy tails. -# -n<-length(x) -x<-sort(x) -s<-numeric() -ifelse(n>2, {for(g in 1:(n-2)){ -s[g]<-x[g+1]*(dbinom(g, size=n, prob=q)*(1-q)+dbinom(g+1, size=n, prob=q)*q) -} -sum(s,na.rm = TRUE) -t1<-(2*dbinom(0, size=n, prob=q)*q+dbinom(1, size=n, prob=q)*q)*x[1] -t2<-(2*(1-q)*dbinom(n, size=n, prob=q)+dbinom(n-1, size=n, prob=q)*(1-q))*x[n] -t3<-dbinom(0, size=n, prob=q)*(2-3*q)*x[2]-dbinom(0, size=n, prob=q)*(1-q)*x[3]- -dbinom(n, size=n, prob=q)*q*x[n-2]+dbinom(n, size=n, prob=q)*(3*q-1)*x[n-1] -quan<-sum(s,na.rm = T)+t1+t2+t3}, -ifelse(n==2,{quan <- (1-q)*x[1]+q*x[2]},quan<-x)) -quan -} - -bw.es.A<-function(J,K,x,tr=.2,pr=TRUE,fun=ES.summary,...){ -# -# Between-by-within design. - -#Using REL.M, can change default values for small, medium and large -# Example REL.M=c(.1,.3,.5) -# -# For each level of Factor B, compute effect sizes -# for all pairs of levels of Factor A . -# -# The R variable x is assumed to contain the raw -# x stored in list mode. x[[1]] contains the x -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the x for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the x for level 1,K -# x[[K+1]] is the x for level 2,1, x[2K] is level 2,K, etc. -# -# The default amount of trimming is tr=.2 -# -# It is assumed that x has length JK, the total number of -# groups being tested, but a subset of the x can be analyzed -# using grp -# -# -if(pr){ -if(!identical(fun,ES.summary.CI))print('To get confidence intervals, set the argument fun=ES.summary.CI') -} -if(is.matrix(x) || is.data.frame(x))x=listm(x) -JK=J*K -mat=matrix(c(1:JK),J,K,byrow=TRUE) -B=list() -for(k in 1:K){ -B[[k]]=IND.PAIR.ES(x[mat[,k]],fun=fun,...) -if(k==1){ -if(pr){ -print('B[[1]] contains pairwise measures of effect size for all levels of Factor A') -print(' and level 1 of Factor B') -print(' B[[2]] contains pairwise measures of effect size for all levels of Factor A') -print('and level 2 of Factor B') -}}} -list(B=B) -} - -dep.ES.summary<-function(x,y=NULL,tr=.2, alpha=.05, REL.MAG=NULL,SEED=TRUE,nboot=2000){ -# -# -# For two dependent groups, -# compute confidence intervals for four measures of effect size based on difference scores: -# -# AKP: robust standardized difference similar to Cohen's d -# QS: Quantile shift based on the median of the distribution of difference scores, -# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y -# SIGN: P(X0.5)REL.EF[4,]=.5-(REL.EF[4,]-.5) -output[,3:5]=REL.EF -output -} - -dep.ES.summary.sub<-function(x,y=NULL,tr=.2){ -# -# -# Used to determine equivalent effect size based on specified standard deviations -# - -if(!is.null(y))x=x-y -output=matrix(NA,ncol=2,nrow=4) -dimnames(output)=list(c('AKP','QS (median)','QStr','SIGN'),c('NULL','Est')) -output[1,1:2]=c(0,D.akp.effect(x)) -output[2,1:2]=c(0.5,depQS(x)$Q.effect) -output[3,1:2]=c(0.5,depQS(x,locfun=mean,tr=tr)$Q.effect) -output[4,1:2]=c(0.5,mean(x[x!=0]<0)) -output -} - - -dep.ES.summary.CI<-function(x,y=NULL,tr=.2, alpha=.05, REL.MAG=NULL,SEED=TRUE,nboot=1000,AUTO=FALSE){ -# -# -# For two dependent groups, -# compute confidence intervals for four measures of effect size based on difference scores: -# -# AKP: robust standardized difference similar to Cohen's d -# QS: Quantile shift based on the median of the distribution of difference scores, -# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y -# SIGN: P(X0.5)REL.EF[4,]=.5-(REL.EF[4,]-.5) -output[,3:5]=REL.EF -a=D.akp.effect.ci(x,alpha=alpha,SEED=SEED,tr=tr,nboot=nboot) -output[1,6:7]=a$ci -output[1,8]=a$p.value -#output[1,6:7]=D.akp.effect.ci(x,alpha=alpha,SEED=SEED,tr=tr,nboot=nboot)$ci -a=depQSci(x,alpha=alpha,SEED=SEED,nboot=nboot) -output[2,6:7]=a$ci -output[2,8]=a$p.value -a=depQSci(x,locfun=tmean,alpha=alpha, SEED=SEED,tr=tr,nboot=nboot) -output[3,6:7]=a$ci -output[3,8]=a$p.value -Z=sum(x<0) -nm=length(x[x!=0]) -a=binom.conf.pv(Z,nm,alpha=alpha,AUTO=AUTO,pr=FALSE) -output[4,6:7]=a$ci -output[4,8]=a$p.value -output -} - - -bw.es.B<-function(J,K,x,tr=.2,POOL=FALSE,OPT=FALSE,CI=FALSE,SEED=TRUE,REL.MAG=NULL,pr=TRUE){ -# -# Between-by-within design. -# -# For each level of Factor A, compute effect sizes -# for all j0))Best=output[id,2] -if(flag==Jm1)Best='All' -#setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) #not sure select p.value is valid -#put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=dpv,Larger.than=Best,n=n,output=output) -setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) -put -} - -bin.best.crit<-function(p,n,iter=5000,SEED=TRUE){ -# -# -# -if(SEED)set.seed(2) -J=length(n) #Number of groups -Jm1=J-1 -pv.mat=matrix(NA,iter,Jm1) -for(i in 1:iter){ -x=rbinom(J,n,p) -pv.mat[i,]=bin.best.sub(x,n) -} -pv.mat -} - -bin.best.sub<-function(x,n,p.crit=NULL,alpha=.05,iter=5000,SEED=TRUE){ -# -# Used by bin.best.crit -# -# x is a vector containing the number of successes. -# n is a vector indicating the sample sizes. -# -# -J=length(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=x/n -R=order(est,decreasing = TRUE) -pvec=NA -for(i in 2:J){ -im1=i-1 -a=bi2KMSv2(x[R[1]],n[R[1]],x[R[i]],n[R[i]],alpha=p.crit[im1]) -pvec[im1]=a$p.value -} -pvec=as.vector(matl(pvec)) -pvec -} - - -bin.best.PV<-function(x,n,alpha=.05,iter=5000,SEED=TRUE){ -# -# For J independent groups, -# identify the group with highest probability of success. -# Make a decision if every p.value<=p.crit -# -# x is a vector containing the number of successes. -# n is a vector indicating the sample sizes. -# -# p.crit: If NULL, critical p-values are determined so that that FWE is alpha -# This is done using a simulation to determine the null distribution based on -# iter=5000 replications. -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -J=length(x) -if(J<2)stop('Should have 2 or more groups') -Jm1=J-1 -est=x/n -R=order(est,decreasing = TRUE) -pvec=NA - -phat=sum(x)/sum(n) - - -aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) -id=which(aval==alpha) -if(length(id)==0)stop('alpha be one one values .001(.001).1 or 11(.01).99') - -v=bin.best.crit.det(phat,n=n,iter=iter,SEED=SEED) -p.crit=v[id,] - - -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=bi2KMSv2(x[R[1]],n[R[1]],x[R[i]],n[R[i]],alpha=p.crit[im1]) -pvec[im1]=a$p.value -output[im1,1:7]=c(a$p1, R[i], a$p2,a$est.dif,a$ci[1],a$ci[2],a$p.value) -} -output[,8]=p.crit - - -# Determine p-value for overall decision -na=length(aval) -for(i in 1:na){ -chk=sum(output[,7]<=v[i,]) -pv=aval[i] -if(chk==Jm1)break -} -Best='No Decisions' -flag=sum(output[,7]<=output[,8]) -id=output[,7]<=output[,8] -if(sum(id>0))Best=output[id,2] -if(flag==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) -put -} - - -bin.best.crit.det<-function(p,n,iter=5000,SEED=TRUE){ -# -# -# -if(SEED)set.seed(2) -J=length(n) #Number of groups -Jm1=J-1 -pv.mat=matrix(NA,iter,Jm1) -for(i in 1:iter){ -x=rbinom(J,n,p) -pv.mat[i,]=bin.best.sub(x,n) -} -rem=pv.mat -aval=c(seq(.001,.1,.001),seq(.011,.99,.01)) -na=length(aval) -fin.crit=matrix(NA,na,Jm1) -for(i in 1:na){ -init=apply(rem,2,qest,aval[i]) -z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=aval[i],method='Brent',lower=0,upper=1) -fin.crit[i,]=z$par*init -} -fin.crit -} - - -bin.best.EQA<-function(x,n,p.crit=NULL,alpha=.05,iter=5000,SEED=TRUE){ -# -# -# Identify the group with highest probability of success. -# Make a decision if every p.value<=p.crit -# -# x is a vector containing the number of successes. -# n is a vector indicating the sample sizes. -# -# p.crit: If NULL, critical p-values are determined so that that FWE is alpha -# This is done using a simulation to determine the null distribution based on -# iter=5000 replications. -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -J=length(x) -if(J<2)stop('Should have 2 or more groups') -Jm1=J-1 -est=x/n -R=order(est,decreasing = TRUE) -pvec=NA -init=rep(alpha,Jm1) -if(is.null(p.crit)){ -phat=sum(x)/sum(n) -pv.mat=bin.best.crit(phat,n=n,iter=iter,SEED=SEED) -z=optim(0,anc.best.fun,init=init,iter=iter,rem=pv.mat,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) -p.crit=z$par*init -} -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=bi2KMSv2(x[R[1]],n[R[1]],x[R[i]],n[R[i]],alpha=p.crit[im1]) -pvec[im1]=a$p.value -output[im1,]=c(a$p1, R[i], a$p2,a$est.dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) -} -Best='No Decisions' -flag=sum(output[,7]<=output[,8]) -id=output[,7]<=output[,8] -if(sum(id>0))Best=output[id,2] -if(flag==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) -put -} - - - -binmcp<-function(x,n,p.crit=NULL,alpha=.05,iter=2000,SEED=TRUE){ -# -# -# x is a vector containing the number of successes. -# n is a vector indicating the sample sizes. -# -# p.crit: If NULL, critical p-values are determined so that that FWE is alpha -# This is done using a simulation to determine the null distribution based on -# iter=5000 replications. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -J=length(x) -A=(J^2-J)/2 -if(J<2)stop('Should have 2 or more groups') -Jm1=J-1 -est=x/n -pvec=NA -init=rep(alpha,Jm1) -if(is.null(p.crit)){ -phat=sum(x)/sum(n) -pv.mat=binmcp.crit(phat,n=n,iter=iter,SEED=SEED) -} -p.crit=qest(pv.mat,alpha) -output<-matrix(NA,A,9) -dimnames(output)=list(NULL,c('Grp','Grp','Est 1','Est 2','Dif','ci.low','ci.up','p.value','p.crit')) -p.crit=p.crit/A -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j0))Best=output[id,2] -if(flag==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) -put -} - - -rmanc.best.crit<-function(x,alpha=.05,tr=.2,iter=5000,SEED=TRUE,...){ -# -# Determine critical p-values for rmanc.best -# -if(SEED)set.seed(2) -library(MASS) -J=ncol(x) -n=nrow(x) -Jm1=J-1 -rem=matrix(NA,iter,Jm1) -A=winall(x,tr=tr)$cov -for(k in 1:iter){ -xs=mvrnorm(n,mu=rep(0,J),Sigma=A) -rem[k,]=rmanc.best.ex(xs,tr=tr) -} -init=apply(rem,2,qest,alpha) -z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) -fin.crit=z$par*init -fin.crit -} - -rmanc.best.PV<-function(x,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ -# -# -# For J dependent groups, -# identify the group with largest trimmed mean -# Make a decision if every p.value<=p.crit -# -# p.crit is determined via -# a simulation to determine the null distribution based on -# iter=5000 replications. -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure of location if a decision can be made. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -x=elimna(x) -flag=TRUE -if(is.list(x))stop('x should be a matrix or a data frame') -J=ncol(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=apply(x,2,tmean,tr=tr) -n=nrow(x) -est=matl(est) -R=order(est,decreasing = TRUE) -pvec=NA - -aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) -id=which(aval==alpha) -if(length(id)==0)stop('alpha be one one values .001(.001).1 or 11(.01).99') -v=rmanc.best.crit.det(x,iter=iter,alpha=alpha,tr=tr,SEED=SEED) -p.crit=v[id,] -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=yuend(x[,R[1]],x[,R[i]],alpha=p.crit[im1],tr=tr) -pvec[im1]=a$p.value -output[im1,]=c(a$est1,R[[i]],a$est2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) -} - -# Determine p-value for overall decision -na=length(aval) -for(i in 1:na){ -chk=sum(output[,7]<=v[i,]) -pv=aval[i] -if(chk==Jm1)break -} -Best='No Decisions' -flag=sum(output[,7]<=output[,8]) -id=output[,7]<=output[,8] -if(sum(id>0))Best=output[id,2] -if(flag==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) -put -} - -rmanc.best.crit.det<-function(x,tr=.2,iter=5000,SEED=TRUE,...){ -# -# Determine critical p-values for rmanc.best -# -if(SEED)set.seed(2) -J=ncol(x) -n=nrow(x) -Jm1=J-1 -rem=matrix(NA,iter,Jm1) -A=winall(x,tr=tr)$cov -for(k in 1:iter){ -xs=mvrnorm(n,mu=rep(0,J),Sigma=A) -rem[k,]=rmanc.best.ex(xs,tr=tr) -} - -aval=c(seq(.001,.1,.001),seq(.011,.99,.01)) -na=length(aval) -fin.crit=matrix(NA,na,Jm1) -for(i in 1:na){ -init=apply(rem,2,qest,aval[i]) -z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=aval[i],method='Brent',lower=0,upper=1) -fin.crit[i,]=z$par*init -} -fin.crit -} - -ancJN.LC<-function(x,y,pts=NULL,con=NULL,regfun=tsreg,nmin=12,npts=5, -alpha=.05,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,pr=TRUE,...){ -# -# ANCOVA: Linear contrasts -# J independent groups -# using a robust regression estimator. -# By default, use the Theil--Sen estimator -# -# Assume data are in -# x and y: list mode with length J or matrices with J columns -# -# pts can be used to specify the design points where the regression lines -# are to be compared. -# -# npts=5 points are used that are equally spaced -# npts=25 would use 25 points equally spaced. -# -# To get adjusted p-values that control FWE, set -# p6n50=p5n50 and p6n100=p6n100 -# where the R variables p6n5 and p6n100 contain the data in the files -# p6n5.csv and p6n100.csv, which are stored at -# https://dornsife.usc.edu/cf/labs/wilcox/wilcox-faculty-display.cfm -# in the directory labeled datasets -# These adjusted p-values are based on an estimate of the null distribution of the p-values using 10,000 replications. -# -# -if(identical(outfun,boxplot))stop('Use outfun=outbox') -if(SEED)set.seed(2) -FLAG=pts -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(is.matrix(y) || is.data.frame(y))y=listm(y) -J=length(x) -if(is.null(con))con=con.all.pairs(J) -con=as.matrix(con) - -for(j in 1:J){ -xy=cbind(x[[j]],y[[j]]) -xy=elimna(xy) -x[[j]]=xy[,1] -y[[j]]=xy[,2] -} - -LEV=seq(.001,.1,.001) -# Critical p-values: -PC=c( 0.0004715554, 0.0010600032, 0.0014646097, 0.0018389954, 0.0022100770, 0.0026392236 - , 0.0030845986, 0.0034799136, 0.0039374557, 0.0045480591, 0.0052206792, 0.0057883125 - , 0.0062902648, 0.0068322005, 0.0074001036, 0.0079687149, 0.0085694544, 0.0091944370 - , 0.0097899444, 0.0103380231, 0.0108787002, 0.0114575017, 0.0120748898, 0.0127022411 - , 0.0133151567, 0.0138929664, 0.0144234632, 0.0149174499, 0.0153969778, 0.0158737390 - , 0.0163486640, 0.0168226766, 0.0172990400, 0.0177788317, 0.0182590407, 0.0187350584 - , 0.0192063600, 0.0196827391, 0.0201828045, 0.0207226980, 0.0213060840, 0.0219224603 - , 0.0225496919, 0.0231594943, 0.0237279940, 0.0242453799, 0.0247167117, 0.0251561867 - , 0.0255817130, 0.0260113587, 0.0264602401, 0.0269376359, 0.0274445303, 0.0279729461 - , 0.0285099972, 0.0290459058, 0.0295790691, 0.0301128895, 0.0306482248, 0.0311800075 - , 0.0317011819, 0.0322091285, 0.0327081943, 0.0332067908, 0.0337124366, 0.0342287456 - , 0.0347555836, 0.0352908030, 0.0358314032, 0.0363734449, 0.0369117409, 0.0374406811 - , 0.0379564126, 0.0384591067, 0.0389535674, 0.0394475575, 0.0399489437, 0.0404633647 - , 0.0409932052, 0.0415375872, 0.0420929727, 0.0426543011, 0.0432164300, 0.0437752831 - , 0.0443282924, 0.0448743464, 0.0454136998, 0.0459479133, 0.0464794759, 0.0470108970 - , 0.0475436440, 0.0480776649, 0.0486119396, 0.0491457340, 0.0496797020, 0.0502161233 - , 0.0507581656, 0.0513086116, 0.0518687047, 0.0524377104) - PC100=c( 0.0002966929, 0.0007169931, 0.0011232960, 0.0014665833, 0.0018499989, 0.0021811159, 0.0025574707, 0.0030353289, 0.0035106136, -0.0039372118, 0.0043468911, 0.0047755738, 0.0052219494, 0.0056692252, 0.0061033036, 0.0065103109, 0.0068936246, 0.0072767310 -, 0.0076766388, 0.0080950473, 0.0085363254, 0.0090079848, 0.0095103514, 0.0100413399, 0.0105923616, 0.0111373687, 0.0116501421 -, 0.0121316345, 0.0126067868, 0.0130940807, 0.0135865147, 0.0140673848, 0.0145340038, 0.0149959496, 0.0154605814, 0.0159310004 -, 0.0164110067, 0.0169026157, 0.0173999561, 0.0178905940, 0.0183633096, 0.0188126308, 0.0192375443, 0.0196398972, 0.0200251178 -, 0.0204024531, 0.0207816755, 0.0211673500, 0.0215562764, 0.0219417240, 0.0223201432, 0.0226935235, 0.0230670390, 0.0234465536 -, 0.0238377401, 0.0242449082, 0.0246692644, 0.0251088323, 0.0255604045, 0.0260209857, 0.0264872622, 0.0269547078, 0.0274182559 -, 0.0278743138, 0.0283226660, 0.0287673658, 0.0292163846, 0.0296798737, 0.0301671058, 0.0306828917, 0.0312249485, 0.0317836349 -, 0.0323445314, 0.0328929974, 0.0334187668, 0.0339185027, 0.0343952967, 0.0348558545, 0.0353073007, 0.0357552402, 0.0362033565 -, 0.0366537740, 0.0371074184, 0.0375642517, 0.0380236755, 0.0384852821, 0.0389497084, 0.0394190068, 0.0398960179, 0.0403828416 -, 0.0408792748, 0.0413823211, 0.0418872318, 0.0423894946, 0.0428866160, 0.0433788246, 0.0438685719, 0.0443592827, 0.0448539520 -, 0.0453540386) - -LV10=seq(.11,.99,.01) - -PC2=c( 0.05847911,0.06353127,0.06846620,0.07310904,0.07881193,0.08499317 - ,0.09026885,0.09574866,0.10124244,0.10716984,0.11236755,0.11770113 - ,0.12277641,0.12816436,0.13376686,0.13841793,0.14410387,0.14968273 -,0.15593615,0.16189668,0.16835575,0.17408581,0.17990687,0.18615084 -,0.19255775,0.19789818,0.20461883,0.21144604,0.21805108,0.22337111 -,0.22987129,0.23652924,0.24281857,0.24931816,0.25527023,0.26170124 -,0.26891556,0.27539636,0.28170631,0.28851563,0.29594635,0.30318192 -,0.31045830,0.31845106,0.32544425,0.33439997,0.34149133,0.34720162 -,0.35473714,0.36186855,0.36966857,0.37744663,0.38495268,0.39335818 -,0.40139728,0.40976955,0.41707974,0.42741844,0.43614133,0.44525646 -,0.45212461,0.46081343,0.46949317,0.47873498,0.48961349,0.50015426 -,0.50931802,0.52037963,0.53148887,0.54267461,0.55344746,0.56735092 -,0.57914832,0.58912604,0.60177045,0.61639495,0.62953494,0.64515539 -,0.66071262,0.67259936,0.68883014,0.70680224,0.72531254,0.74408131 -,0.76444699,0.79049224,0.81775009,0.84817210,0.88710556) - -PC100v2=c( 0.05001203,0.05433510,0.05923540,0.06449529,0.06983196,0.07506101 - ,0.07980333,0.08542380,0.09160087,0.09662086,0.10166432,0.10676042 -,0.11225467,0.11892794,0.12375516,0.12925937,0.13452963,0.14002073 -,0.14507355,0.15091410,0.15606790,0.16249155,0.16797190,0.17373478 -,0.17882609,0.18493900,0.19193563,0.19735772,0.20348841,0.20935194 -,0.21635380,0.22225273,0.22983262,0.23667483,0.24413437,0.25020140 -,0.25702517,0.26403520,0.27014313,0.27804532,0.28442682,0.29088905 -,0.29821809,0.30537715,0.31289211,0.32086820,0.32832301,0.33561776 -,0.34266364,0.34882764,0.35668900,0.36457176,0.37183425,0.37923028 -,0.38652374,0.39483644,0.40189416,0.41095361,0.42066139,0.43029008 -,0.43909925,0.44914388,0.45986646,0.47000245,0.48024114,0.48927833 -,0.50263045,0.51392305,0.52498817,0.53697186,0.54793443,0.55984674 -,0.57110087,0.58246374,0.59547592,0.61054637,0.62515843,0.63817892 -,0.65448463,0.67194322,0.69024711,0.70584982,0.72322165,0.74211690 -,0.76534343,0.78913944,0.81819273,0.84990380,0.89251684) - -LV=c(LEV,LV10) -PC50=c(PC,PC2) -PC100.all=c(PC100,PC100v2) -n=lapply(y,length) -n=as.vector(matl(n)) -nmin=min(n) -if(nmin<=75)cp4=lplot.pred(LV,PC50,alpha)$yhat -else -cp4=lplot.pred(LV,PC100.all,alpha)$yhat -crit=qnorm(1-cp4/2) -if(xout){ -for(j in 1:J){ -flag=outfun(x[[j]],plotit=FALSE,...)$keep -m<-cbind(x[[j]],y[[j]]) -p1=ncol(m) -p=p1-1 -m<-m[flag,] -x[[j]]<-m[,1:p] -y[[j]]<-m[,p1] -}} -if(!is.null(pts))npts=length(pts) -if(is.null(pts[1])){ -xall=lapply(x,unique) -L=lapply(xall,min) -U=lapply(xall,max) -L=matl(L) -U=matl(U) -L=max(L) -U=min(U) -if(L>=U)stop('The range of covariate values is not sufficiently similar among the groups') -pts=seq(max(L),min(U),length.out=npts) -} -NT=ncol(con) -CON=list() -mat<-matrix(NA,npts,8) -dimnames(mat)<-list(NULL,c('X','Est','TEST','se','ci.low','ci.hi','p.value','Adj.p.value')) -mat[,1]=pts -sqsd=list() -est=list() -for(j in 1:J){ -sqsd[[j]]=regYvar(x[[j]],y[[j]],pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) -est[[j]]=regYhat(x[[j]],y[[j]],xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) -} -est=matl(est) -sqsd=matl(sqsd) -for(K in 1:NT){ -for(k in 1:npts){ -EST=0 -SD=0 -EST=sum(con[,K]*est[k,]) -SD=sum(con[,K]^2*sqsd[k,]) -sd=sqrt(SD) -mat[k,4]=sd -tests=EST/sd -mat[k,3]=tests -pv=2*(1-pnorm(abs(tests))) -mat[k,7]=pv -mat[k,5]=EST-crit*sd -mat[k,6]=EST+crit*sd -mat[k,2]=EST -mat[k,8]=NA -# Compute a p-value -if(nmin<=75){ -flag=mat[k,7]>=PC50 -ID=which(flag==TRUE) -ic=max(ID,1) -mat[k,8]=LV[ic] -} -else{ -flag=mat[k,7]>=PC100.all -ID=which(flag==TRUE) -ic=max(ID,1) -mat[k,8]=LV[ic] -} -} -CON[[K]]=mat -} -pts=as.matrix(pts,ncol=1) -g.est=cbind(pts,est) -LAB='X' -J1=J+1 -for(j in 2:J1)LAB[j]=paste('GRP',j-1) -dimnames(g.est)=list(NULL,LAB) -list(n=n,crit.p.value=cp4,CON=CON,con=con,GRP.est=g.est) -} - -elimna2g<-function(x,y){ -# -# Assume both are matrices or list mode -# -if(is.matrix(x)){ -J=ncol(x) -if(J!=ncol(y))stop('x and y have different number of columns') -J1=J+1 -J2=2*J -xy=elimna(cbind(x,y)) -x=xy[,1:J] -y=xy[,J1:J2] -} -if(is.list(x)){ -J=length(x) -J1=J+1 -J2=2*J -if(J!=length(y))stop('x and y have different lengths') -xy=elimna(c(x,y)) -x=xy[1:J] -y=xy[J1:J2] -} -list(x=x,y=y) -} - - -smgridVRC<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,TR=.2,alpha=.05,VAL1=NULL,VAL2=NULL,PB=FALSE,est=tmean,nboot=1000,pr=TRUE,method='hoch', -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Compare measures of location among grids defined by quantiles of two IVs. By default 20% trimming is used -# est=median would use medians -# est=hd would use the Harrell-Davis estimator for the median. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# - -# Then test the hypothesis of equal measures of location -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -# TR: amount of trimming when using a non-bootstrap method. To alter the amount of trimming when using -# a bootstrap method use -# tr. Example, tr=.25 would use 25% trimming. -# -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -if(identical(est,median))PB=TRUE -if(identical(est,hd))PB=TRUE -z=list() -group=list() -if(is.null(VAL1) || is.null(VA2)){ -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -} -else { -N.int=length(VAL1)+1 -N.int2=length(VAL2)+1 -} -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -n.mat=matrix(NA,nrow=N.int,ncol=N.int2) -DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -for(i in 1:N.int)L1[i]=paste('IV1.G',i) -for(i in 1:N.int2)L2[i]=paste('IV2.G',i) -dimnames(est.mat)=list(L1,L2) - -if(is.null(VAL1) || is.null(VA2)){ -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -} -else{ -qv=VAL1 -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=VAL2 -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -} -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2[,p1],...) -n.mat[j,k]=length(xsub2[,p1]) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) -} -NT=N.int*N.int2 -MID=matrix(c(1:NT),nrow=N.int,ncol=N.int2,byrow=TRUE) -# pull out s indicated by the columns of MID and do tests -IV1res=NULL -a=NULL -for(j in 1:N.int2){ -zsub=z[MID[,j]] -DV.mat[,j]=matl(lapply(zsub,est,...)) -if(!PB)a=lincon(zsub,tr=TR,pr=FALSE,alpha=alpha)$psihat[,3:8] -if(PB)a=linpairpb(zsub,nboot=nboot,alpha=alpha,SEED=SEED,...)$output[,c(3:9)] -IV1res=rbind(IV1res,a) -} -#Now do IV2 -IV2res=NULL -a=NULL -for(j in 1:N.int){ -zsub=z[MID[j,]] -if(!PB){ -a=lincon(zsub,tr=TR,pr=FALSE,alpha=alpha)$psihat[,3:8] -} -if(PB){ -a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,SEED=SEED,...)$output[,c(3:9)] -} -IV2res=rbind(IV2res,a) -} -if(!PB){ #fix labels add adjusted p-value -IV1res=cbind(IV1res[,1:4],p.adjust(IV1res[,4],method=method),IV1res[,5:6]) -IV2res=cbind(IV2res[,1:4],p.adjust(IV2res[,4],method=method),IV2res[,5:6]) -} -if(PB){ -IV1res[,3]=p.adjust(IV1res[,2],method=method) -IV2res[,3]=p.adjust(IV2res[,2],method=method) -IV1res=IV1res[,c(1,4,5,2,3,6,7)] -IV2res=IV2res[,c(1,4,5,2,3,6,7)] -} -nr=nrow(IV1res) -Lnam1=NULL -for(j in 1:nr)Lnam1=c(Lnam1,paste(' IV1Level',j)) -print(dim(IV1res)) -print(Lnam1) -nr=nrow(IV2res) -Lnam2=NULL -for(j in 1:nr)Lnam2=c(Lnam2,paste('IV2 Level',j)) -dimnames(IV1res)=list(Lnam1,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) -dimnames(IV2res)=list(Lnam2,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) -list(est.loc.4.DV=est.mat,n=n.mat,Independent.variables.summary=group,Res.4.IV1=IV1res,Res.4.IV2=IV2res) -} - - - -# -------------------------------------------------------------- -# Code adapted from RGenData::GenDataPopulation from John Ruscio -# -------------------------------------------------------------- - -# Reference -# Ruscio, J. & Kaczetow, W. (2008) -# Simulating Multivariate Nonnormal Data Using an Iterative Algorithm. -# Multivariate Behav Res, 43, 355-381. -# https://www.ncbi.nlm.nih.gov/pubmed/26741201 - -# License: MIT -# Copyright <2018> - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -# Original -# https://github.com/cran/RGenData/blob/master/R/EFAGenData.R - -# Simulate multivariate g-and-h data using an iterative algorithm -# -# Args: -# n.cases : Number of observations for each variable - default 1000 -# n.variables : Number of variables - default 2 -# g : g parameter of the g-and-h distribution - default 0 -# h : h parameter of the g-and-h distribution - default 0 -# rho : Target correlation between variables - default 0 -# corr.type : Type of correlation - default "pearson", alternative "spearman" -# -# Returns: -# data : Population of data - matrix n.cases rows by n.variables columns -# -gengh <- function(n, p = 2, - g = 0, h = 0, rho = 0, - corr.type = "pearson"){ -n.cases=n -n.variables=p -target.corr <- matrix(c(1, rho, rho, 1), nrow = 2, byrow = TRUE) # covariance matrix - -n.factors <- 0 # Number of factors (scalar) -max.trials <- 5 # Maximum number of trials (scalar) -initial.multiplier <- 1 # Value of initial multiplier (scalar) - -# generate g-and-h data -distributions <- matrix(NA, nrow = n.cases, ncol = n.variables) - for (V in 1:n.variables){ - distributions[,V] <- sort(ghdist(n.cases, g=g, h=h)) - } - -data <- matrix(0, nrow = n.cases, ncol = n.variables) -iteration <- 0 -best.rmsr <- 1 -trials.without.improvement <- 0 -intermediate.corr <- target.corr - -# If number of latent factors was not specified, determine it -if (n.factors == 0){ - Eigenvalues.Observed <- eigen(intermediate.corr)$values - Eigenvalues.Random <- matrix(0, nrow = 100, ncol = n.variables) - Random.Data <- matrix(0, nrow = n.cases, ncol = n.variables) - for (i in 1:100){ - for (j in 1:n.variables){ - Random.Data[,j] <- sample(distributions[,j], size = n.cases, replace = TRUE) - } - Eigenvalues.Random[i,] <- eigen(cor(Random.Data))$values - } - Eigenvalues.Random <- apply(Eigenvalues.Random, 2, mean) # calculate mean eigenvalue for each factor - n.factors <- max(1, sum(Eigenvalues.Observed > Eigenvalues.Random)) -} - -shared.comp <- matrix(rnorm(n.cases * n.factors, 0, 1), nrow = n.cases, - ncol = n.factors) -unique.comp <- matrix(rnorm(n.cases * n.variables, 0, 1), nrow = n.cases, - ncol = n.variables) -shared.load <- matrix(0, nrow = n.variables, ncol = n.factors) -unique.load <- matrix(0, nrow = n.variables, ncol = 1) -while (trials.without.improvement < max.trials) { - iteration <- iteration + 1 - factor.analysis <- FactorAnalysis(intermediate.corr, corr.matrix = TRUE, - max.iteration = 50, n.factors, corr.type) - if (n.factors == 1) { - shared.load[, 1] <- factor.analysis$loadings - } else { - for (i in 1:n.factors) - shared.load[, i] <- factor.analysis$loadings[, i] - } - shared.load[shared.load > 1] <- 1 - shared.load[shared.load < -1] <- -1 - if (shared.load[1, 1] < 0) - shared.load <- shared.load * -1 - for (i in 1:n.variables) - if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { - unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) - } else { - unique.load[i, 1] <- 0 - } - unique.load <- sqrt(unique.load) - for (i in 1:n.variables) - data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * - unique.load[i, 1] - for (i in 1:n.variables) { - data <- data[sort.list(data[, i]), ] - data[, i] <- distributions[, i] - } - reproduced.corr <- cor(data, method = corr.type) - residual.corr <- target.corr - reproduced.corr - rmsr <- sqrt(sum(residual.corr[lower.tri(residual.corr)] * - residual.corr[lower.tri(residual.corr)]) / - (.5 * (n.variables * n.variables - n.variables))) - if (rmsr < best.rmsr) { - best.rmsr <- rmsr - best.corr <- intermediate.corr - best.res <- residual.corr - intermediate.corr <- intermediate.corr + initial.multiplier * - residual.corr - trials.without.improvement <- 0 - } else { - trials.without.improvement <- trials.without.improvement + 1 - current.multiplier <- initial.multiplier * - .5 ^ trials.without.improvement - intermediate.corr <- best.corr + current.multiplier * best.res - } -} - -factor.analysis <- FactorAnalysis(best.corr, corr.matrix = TRUE, - max.iteration = 50, n.factors, - corr.type) -if (n.factors == 1) { - shared.load[, 1] <- factor.analysis$loadings -} else { - for (i in 1:n.factors) - shared.load[, i] <- factor.analysis$loadings[, i] -} -shared.load[shared.load > 1] <- 1 -shared.load[shared.load < -1] <- -1 -if (shared.load[1, 1] < 0) - shared.load <- shared.load * -1 -for (i in 1:n.variables) - if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { - unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) - } else { - unique.load[i, 1] <- 0 - } -unique.load <- sqrt(unique.load) -for (i in 1:n.variables) - data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * - unique.load[i, 1] -data <- apply(data, 2, scale) # standardizes each variable in the matrix -for (i in 1:n.variables) { - data <- data[sort.list(data[, i]), ] - data[, i] <- distributions[, i] -} -data -} - -################################################################################ -FactorAnalysis <- function(data, corr.matrix = FALSE, max.iteration = 50, - n.factors = 0, corr.type = "pearson") { -# Analyzes comparison data with known factorial structures -# -# Args: -# data : Matrix to store the simulated data. -# corr.matrix : Correlation matrix (default is FALSE) -# max.iteration : Maximum number of iterations (scalar, default is 50). -# n.factors : Number of factors (scalar, default is 0). -# corr.type : Type of correlation (character, default is "pearson", -# user can also call "spearman"). -# -# Returns: -# $loadings : Factor loadings (vector, if one factor. matrix, if multiple -# factors) -# $factors : Number of factors (scalar). -# - data <- as.matrix(data) - n.variables <- dim(data)[2] - if (n.factors == 0) { - n.factors <- n.variables - determine <- TRUE - } else { - determine <- FALSE - } - if (!corr.matrix) { - corr.matrix <- cor(data, method = corr.type) - } else { - corr.matrix <- data - } - criterion <- .001 - old.h2 <- rep(99, n.variables) - h2 <- rep(0, n.variables) - change <- 1 - iteration <- 0 - factor.loadings <- matrix(nrow = n.variables, ncol = n.factors) - while ((change >= criterion) & (iteration < max.iteration)) { - iteration <- iteration + 1 - eigenvalue <- eigen(corr.matrix) - l <- sqrt(eigenvalue$values[1:n.factors]) - for (i in 1:n.factors) - factor.loadings[, i] <- eigenvalue$vectors[, i] * l[i] - for (i in 1:n.variables) - h2[i] <- sum(factor.loadings[i, ] * factor.loadings[i, ]) - change <- max(abs(old.h2 - h2)) - old.h2 <- h2 - diag(corr.matrix) <- h2 - } - if (determine) n.factors <- sum(eigenvalue$values > 1) - return(list(loadings = factor.loadings[, 1:n.factors], - factors = n.factors)) -} - - -rmVARcom<-function(x,y=NULL,alpha=.05,est=bivar,plotit=TRUE,nboot=500,SEED=TRUE,...){ -# -# Use a percentile bootstrap method to compare dependent groups. -# based on some robust measure of variation. -# -# if y=NULL, assume x is a matrix or data frame with two columns. -# -# -# nboot is the number of bootstrap samples. -# -# -if(!is.null(y[1]))x<-cbind(x,y) -if(ncol(x)>2)stop('x should have at most two columns') -x=elimna(x) -n=nrow(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -bvec=matrix(NA,nboot,2) -for(ib in 1:nboot){ -bvec[ib,]<-apply(x[data[ib,],],2,est,...) -} -# -# Now have an nboot by 2 matrix of bootstrap values. -# -pstar=mean(bvec[,1]2, or data.frame or list mode with length >2') -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=lapply(x,tmean,tr=tr) -n=lapply(x,length) -est=matl(est) -n=as.vector(matl(n)) -R=order(est) -pvec=NA -aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) -if(length(id)==0)stop('alpha must be one of values .001(.001).1 or 11(.01).99') -v=ord.loc.crit.det(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) -p.crit=v[id,] - -pvdist=NULL -if(is.null(p.crit)){ -v=ord.loc.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) -p.crit=v$fin.crit -pvdist=v$pvdist -} -output<-matrix(NA,Jm1,9) -dimnames(output)=list(NULL,c('Grp.L','Grp.R','Est.L','Est.R','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=yuen(x[[R[im1]]],x[[R[[i]]]],alpha=p.crit[im1]) -pvec[im1]=a$p.value -output[im1,]=c(R[im1],R[i],a$est.1,a$est.2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) -} -dpv=NA -if(!is.null(pvdist)){ -chk=0 -for(i in 1:iter){ -flag=0 -for(j in 1:Jm1)if(pvdist[i,j]<=output[j,7])flag=flag+1 -if(flag>0)chk=chk+1 -} -dpv=chk/iter -} -# Determine p-value for overall decision -na=length(aval) -for(i in 1:na){ -chk=sum(output[,7]<=v[i,]) -pv=aval[i] -if(chk==Jm1)break -} -ORD.ID='NO' -id=output[,8]<=output[,9] -if(sum(id)==Jm1)ORD.ID='YES' -setClass('BIN',slots=c('Make.a.Decison','Decision.p.value','Estimates','n','output')) -put=new('BIN',Make.a.Decison=ORD.ID,Decision.p.value=pv,Estimates=est,n=n,output=output) -put -} - -ord.loc.crit<-function(J,n=30,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ -# -# Determine critical p-values for ord.loc -# -if(SEED)set.seed(2) -Jm1=J-1 -rem=matrix(NA,iter,Jm1) -for(k in 1:iter){ -if(length(n)==1){ -x=rmul(n,p=J) -x=listm(x) -} -else{ -x=list() -if(length(n)!=J)stop('J is not equal to the length of n') -for(j in 1:J)x[[j]]=rnorm(n[j]) -} -rem[k,]=ord.loc.ex(x,tr=tr) -} -init=apply(rem,2,qest,alpha) -#print(apply(rem,2,mean)) -#print(init) -z=optim(0,ord.loc.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) -fin.crit=z$par*init -list(fin.crit=fin.crit,pvdist=rem) -} - -ord.loc.fun<-function(a,init,iter,rem,Jm1,alpha){ -# -chk=0 -init=a*init -for(i in 1:iter){ -flag=0 -for(j in 1:Jm1)if(rem[i,j]<=init[j])flag=flag+1 -if(flag>0)chk=chk+1 -} -chk=chk/iter -dif=abs(chk-alpha) -dif -} - -ord.loc.crit.det<-function(J,n,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ -# -# Determine critical p-values for anc.best -# -if(SEED)set.seed(2) -Jm1=J-1 -rem=matrix(NA,iter,Jm1) -for(k in 1:iter){ -if(length(n)==1){ -x=rmul(n,p=J) -x=listm(x) -} -else{ -x=list() -if(length(n)!=J)stop('J is not equal to the length of n') -for(j in 1:J)x[[j]]=rnorm(n[j]) -} -rem[k,]=ord.loc.ex(x,tr=tr) -} -aval=c(seq(.001,.1,.001),seq(.011,.99,.01)) -na=length(aval) -fin.crit=matrix(NA,na,Jm1) -for(i in 1:na){ -init=apply(rem,2,qest,aval[i]) -z=optim(0,ord.loc.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=aval[i],method='Brent',lower=0,upper=1) -fin.crit[i,]=z$par*init -} -fin.crit -} - -RMcomvar.locdis<-function(x,y, -loc.fun=median,CI=FALSE,plotit=TRUE,xlab='First Group', -ylab='Est.1 - Est.2',ylabQCOM='Est.2 - Est.1',sm=TRUE,QCOM=TRUE,q=c(.1,.25,.75,.9),MC=FALSE,nboot=2000,PR=TRUE,...){ -# -# Compare the marginal distributions of two dependent groups in terms of the -# variation in the tails using all of the quantiles -# after centering the data. -# -# CI=FALSE, suppresses confidence intervals -# -if(!QCOM){ -if(PR){ -print('Interpretation: when using QCOM=F: If values in q.sig.greater are less than .5') -print('this indicates more variation in the lower tail for group 1') -print('Interpretation: If values in q.sig.greater are greater than .5') -print('This indicates more variation in the lower tail for group 2') - -print('Interpretation: If values in q.sig.less are less than .5') -print('this indicates more variation in the upper tail for group 2') -print('Interpretation: If values in q.sig.less are greater than .5') -print('This indicates more variation in the upper tail for group 1') -} -} -x=elimna(x) -y=elimna(y) -mx=loc.fun(x,...) -my=loc.fun(y,...) -X=x-mx -Y=y-my -if(!QCOM){ -a=lband(X,Y,plotit=plotit,xlab=xlab,ylab=ylabQCOM,sm=sm,CI=CI) -if(!CI)a$m=NULL -} -else{ -a=Dqcomhd(X,Y,q=q,nboot=nboot,plotit=plotit,xlab=xlab,ylab=ylab) -} -a -} - -g5.cen.plot<-function(x1, x2, x3 = NULL, x4 = NULL, x5 = NULL, fr = 0.8, - aval = 0.5, xlab = 'X', ylab ='', color = rep('black', 5), - main = NULL, sub = NULL,loc.fun=median){ -# -# Same a g5plot, only center the data based on the -# measure of location indicated by the argument -# loc.fun -# -x1=elimna(x1) -x2=elimna(x2) -x1=x1-loc.fun(x1) -x2=x2-loc.fun(x2) -if(!is.null(x3))x3=x3-loc.fun(x3) -if(!is.null(x4))x4=x4-loc.fun(x4) -if(!is.null(x5))x5=x5-loc.fun(x5) -g5plot(x1=x1, x2=x2, x3=x3, x4 = x4, x5 = x5, fr = fr, - aval = aval, xlab = xlab, ylab =ylab, color = color, - main = main, sub = sub) -} - - -rmul.MAR<-function(n,p=2,g=rep(0,p),h=rep(0,p),rho=0,cmat=NULL){ -# -# Generate multivariate normal data and transform the marginal -# distributions to g-and-h distributions -# -if(!is.null(cmat)){ -if(ncol(cmat)!=p)stop('cmat: number of columns must equal the value in the argument p') -} -if(abs(rho)>1)stop('rho must be between -1 and 1') -if(is.null(cmat)){ -cmat<-matrix(rho,p,p) -diag(cmat)<-1 -} -if(length(g)!=p)stop('Length of g should equal p') -if(length(h)!=p)stop('Length of h should equal p') -library(MASS) -x=mvrnorm(n,rep(0,p),cmat) -for(j in 1:p){ -if(g[j]==0)x[,j]=x[,j]*exp(h[j]*x[,j]^2/2) -if(g[j]>0)x[,j]=(exp(g[j]*x[,j])-1)*exp(h[j]*x[,j]^2/2)/g[j] -} -x -} - -lnormsd=function()sqrt(exp(1))*sqrt(exp(1)-1) #standard deviation of a lognormal distribution. - - - -varcom.IND.MP<-function(x,y,SEED=TRUE){ -# -# -# Compare the variances of two independent variables. -# Uses an updated Morgan-Pitman test based on a random -# permutations of the data. -# -# Returns a p-value and estimates of the variances -# No confidence interval -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -e1=var(x) -e2=var(y) -if(length(x)>length(y)){ -tempx=x -tempy=y -x=tempy -y=tempx -} -n1=length(x) -n2=length(y) -n=min(n1,n2) -nmax=max(n1,n2) -X=sample(x,n1) -Y=sample(y,n2) -p1=comdvar(X[1:n],Y[1:n])$p.value -PV=p1 -if(n1!=n2){ -neq=floor(nmax/n) -EQ=neq*n1 -A=matrix(c(1:EQ),nrow=n1) -PV=NA -J=ncol(A) -for(j in 1:J)PV[j]=comdvar(X[1:n],Y[A[,j]])$p.value -if(nmax>EQ){ -d=n2-n1+1 -Y2=Y[n2:d] #deliberately reversed the order. -p2=comdvar(X[1:n],Y2)$p.value -PV=c(PV,p2) -}} -PV=min(p.adjust(PV,method='hoch')) -list(est1=e1,est2=e2,p.value=PV) -} - - - -selvar.ind.ex<-function(x){ -# -pvec=NA -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -EST=lapply(x,var) -EST=matl(EST) -R=order(EST) -ic=0 -for(j in 2:J){ -ic=ic+1 -pvec[ic]=varcom.IND.MP(x[[R[1]]],x[[R[[j]]]],SEED=FALSE)$p.value -} -pvec -} - -selvar.ind.crit<-function(J,n,alpha=.05,iter=1000,...){ -# -# Determine null distribution of p-values for selvar.ind.MP -# -Jm1=J-1 -rem=matrix(NA,iter,Jm1) -XS=list() -for(k in 1:iter){ -for(j in 1:J)XS[[j]]=rnorm(n[j]) -rem[k,]=selvar.ind.ex(XS) -} -rem -} - -selvar.ind.MP<-function(x,alpha=.05, rem=NULL,iter=2000,p.crit=NULL,SEED=TRUE){ -# -# For J independent groups, -# identify the group with smallest variance -# Make a decision if every p.value<=p.crit -# -# x is a matrix or data frame or can have list mode. -# -# rem: The distribution of the null p-value under normality. -# If not specified, the funcion computes it using a simulation with iter replications. -# Or, one could compute it with the R function selva.ind.crit, store the -# results in some R variable, say Z, then use rem=Z in future applications -# that have the same sample sizes -# -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -x=elimna(x) -J=length(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=lapply(x,var) -n=lapply(x,length) -est=as.vector(matl(est)) -n=as.vector(matl(n)) -R=order(est) -pvec=NA -if(is.null(rem))rem=selvar.ind.crit(J=J,n=n,iter=iter,SEED=SEED) -output<-matrix(NA,Jm1,5) -dimnames(output)=list(NULL,c('Smallest.Est','Grp','Est','p.value','p.adj')) -for(i in 2:J){ -im1=i-1 -SM=est[R[1]] -a=varcom.IND.MP(x[[R[1]]],x[[R[i]]],SEED=SEED) -pvec[im1]=a$p.value -pv=mean(rem[,im1]<=pvec[im1]) -output[im1,1:4]=c(SM,R[i],est[R[i]],pv) -} -output[,5]=p.adjust(output[,4],method='hoch') -Best='No Decision' -flag=sum(output[,4]<=alpha) -id=output[,5]<=alpha -if(sum(id>0))Best=output[id,2] -if(flag==Jm1)Best='Smaller.than.all' -setClass('SSV',slots=c('Group.with.smallest.estimate','Less.than','n','output')) -put=new('SSV',Group.with.smallest.estimate=R[[1]],Less.than=Best,n=n,output=output) -put -} - -selvar.ind.MP<-function(x,alpha=.05, rem=NULL,iter=2000,p.crit=NULL,SEED=TRUE){ -# -# For J independent groups, -# identify the group with smallest variance -# Make a decision if every p.value<=p.crit -# -# x is a matrix or data frame or can have list mode. -# -# rem: The distribution of the null p-value under normality. -# If not specified, the funcion computes it using a simulation with iter replications. -# Or, one could compute it with the R function selva.ind.crit, store the -# results in some R variable, say Z, then use rem=Z in future applications -# that have the same sample sizes -# -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure if a decision can be made. -# -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -x=elimna(x) -J=length(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=lapply(x,var) -n=lapply(x,length) -est=as.vector(matl(est)) -n=as.vector(matl(n)) -R=order(est) -pvec=NA -if(is.null(rem))rem=selvar.ind.crit(J=J,n=n,iter=iter,SEED=SEED) -output<-matrix(NA,Jm1,5) -dimnames(output)=list(NULL,c('Smallest.Est','Grp','Est','p.value','p.adj')) -for(i in 2:J){ -im1=i-1 -SM=est[R[1]] -a=varcom.IND.MP(x[[R[1]]],x[[R[i]]],SEED=SEED) -pvec[im1]=a$p.value -pv=mean(rem[,im1]<=pvec[im1]) -output[im1,1:4]=c(SM,R[i],est[R[i]],pv) -} -output[,5]=p.adjust(output[,4],method='hoch') -Best='No Decision' -flag=sum(output[,5]<=alpha) -id=output[,5]<=alpha -if(sum(flag>0))Best=output[id,2] -if(flag==Jm1)Best='Smaller.than.all' -setClass('SSV',slots=c('Group.with.smallest.estimate','Less.than','n','output')) -put=new('SSV',Group.with.smallest.estimate=R[[1]],Less.than=Best,n=n,output=output) -put -} - -corCOM.DVvsIV<-function(x,y,com.p.dist=FALSE,corfun=wincor,iter=200,PV=NULL,pr=TRUE,neg.col=NULL,LARGEST=TRUE, -alpha=.05,nboot=500,SEED=TRUE,MC=FALSE,xout=FALSE,outfun=outpro,FWE.method='hoch',...){ -# -# Regresiion: -# Consider the IV with the largest correlation estimate with the DV -# Is it reasonable to decide that it has the highest population -# correlation? -# -# That is, have two or more independent variables, compare -# cor(y,x_I) to cor(y,x_k) for all k!=I, where -# cor(i,x_I) is the highest correlation -# Winsorized correlation is used by default. -# Hochberg's method is used to control FWE. -# -# x is assumed to be a matrix or data frame -# -# Possible alternative choices for corfun include: -# spear -# tau -# pbcor -# bicor -# scor -# mve.cor -# mcd.cor -# -# -if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') -p=ncol(x) -if(p<=2)stop('Should have 3 or more independent variables. With two, use TWOpov') -pm1=p-1 -p1=p+1 -m1=cbind(x,y) -m1<-elimna(m1) # Eliminate rows with missing values -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -nval=nrow(x) -x=neg.colM(x,neg.col) -L=c(seq(.001,.1,.001),seq(.11,.99,.01)) - -if(!is.null(PV)){ -rem=matrix(NA,length(L),pm1) -for(k in 1:pm1){ -for(i in 1:length(L))rem[i,k]=hd(PV[,k],q=L[i]) -}} -if(p>6)com.p.dist=TRUE -if(com.p.dist){ -if(is.null(PV)){ -if(pr)print('Computing the null distribution can take several minutes') -PV=corCOM.DVvsIV.crit(p,nval,iter=iter,MC=MC,...) -rem=matrix(NA,length(L),pm1) -for(k in 1:pm1){ -for(i in 1:length(L))rem[i,k]=hd(PV[,k],q=L[i]) -}}} - -P3=matrix(c( 0.002175049 ,1.335108e-05 - ,0.005872039 ,7.055924e-05 - ,0.010619227 ,2.497530e-04 - ,0.016286140 ,5.345789e-04 - ,0.022398371 ,7.976283e-04 - ,0.028262446 ,1.277063e-03 - ,0.033345906 ,1.627195e-03 - ,0.037456504 ,1.939087e-03 - ,0.040694218 ,2.234302e-03 - ,0.043307931 ,2.546080e-03 - ,0.045568092 ,2.903338e-03 - ,0.047696567 ,3.323658e-03 - ,0.049845675 ,3.813582e-03 - ,0.052103950 ,4.371974e-03 - ,0.054511258 ,4.993196e-03 - ,0.057074761 ,5.668892e-03 - ,0.059782746 ,6.388620e-03 - ,0.062615440 ,7.140126e-03 - ,0.065552496 ,7.909937e-03 - ,0.068577121 ,7.684452e-03 - ,0.071677317 ,9.451369e-03 - ,0.074845101 ,1.020104e-02 - ,0.078074759 ,1.092742e-02 - ,0.081360990 ,1.162839e-02 - ,0.084697501 ,1.230540e-02 - ,0.088076254 ,1.296274e-02 - ,0.091487319 ,1.360651e-02 - ,0.094919200 ,1.424359e-02 - ,0.098359413 ,1.488081e-02 - ,0.101795143 ,1.552446e-02 - ,0.105213802 ,1.617997e-02 - ,0.108603375 ,1.685188e-02 - ,0.111952481 ,1.754394e-02 - ,0.115250189 ,1.825919e-02 - ,0.118485658 ,1.900005e-02 - ,0.121647762 ,1.976830e-02 - ,0.124724850 ,2.056506e-02 - ,0.127704758 ,2.139068e-02 - ,0.130575140 ,2.224462e-02 - ,0.133324080 ,2.312538e-02 - ,0.135940914 ,2.403049e-02 - ,0.138417122 ,2.495656e-02 - ,0.140747141 ,2.589933e-02 - ,0.142928988 ,2.685388e-02 - ,0.144964592 ,2.781478e-02 - ,0.146859829 ,2.877632e-02 - ,0.148624253 ,2.973273e-02 - ,0.150270597 ,3.067838e-02 - ,0.151814117 ,3.160798e-02 - ,0.153271855 ,3.251674e-02 - ,0.154661905 ,3.340049e-02 - ,0.156002725 ,3.425580e-02 - ,0.157312540 ,3.508002e-02 - ,0.158608833 ,3.587127e-02 - ,0.159907941 ,3.662845e-02 - ,0.161224730 ,3.735119e-02 - ,0.162572340 ,3.803977e-02 - ,0.163961992 ,3.869502e-02 - ,0.165402841 ,3.931829e-02 - ,0.166901885 ,3.991132e-02 - ,0.168463921 ,4.047615e-02 - ,0.170091558 ,4.101511e-02 - ,0.171785292 ,4.153070e-02 - ,0.173543642 ,4.202554e-02 - ,0.175363353 ,4.250234e-02 - ,0.177239642 ,4.296380e-02 - ,0.179166497 ,4.341259e-02 - ,0.181137006 ,4.385123e-02 - ,0.183143686 ,4.428208e-02 - ,0.185178820 ,4.470724e-02 - ,0.187234759 ,4.512852e-02 - ,0.189304201 ,4.554735e-02 - ,0.191380404 ,4.596483e-02 - ,0.193457363 ,4.638162e-02 - ,0.195529910 ,4.679804e-02 - ,0.197593772 ,4.721406e-02 - ,0.199645563 ,4.762936e-02 - ,0.201682735 ,4.804340e-02 - ,0.203703491 ,4.845551e-02 - ,0.205706665 ,4.886496e-02 - ,0.207691581 ,4.927106e-02 - ,0.209657914 ,4.967325e-02 - ,0.211605535 ,5.007115e-02 - ,0.213534385 ,5.046464e-02 - ,0.215444346 ,5.085386e-02 - ,0.217335153 ,5.123927e-02 - ,0.219206331 ,5.162162e-02 - ,0.221057158 ,5.200196e-02 - ,0.222886667 ,5.238156e-02 - ,0.224693687 ,5.276190e-02 - ,0.226476899 ,5.314459e-02 - ,0.228234929 ,5.353130e-02 - ,0.229966452 ,5.392373e-02 - ,0.231670301 ,5.432350e-02 - ,0.233345582 ,5.473211e-02 - ,0.234991770 ,5.515090e-02 - ,0.236608793 ,5.558098e-02 - ,0.238197083 ,5.602325e-02 - ,0.239757602 ,5.647831e-02 - ,0.241291828 ,5.694653e-02 - ,0.255725281 ,6.227133e-02 - ,0.269643155 ,6.812649e-02 - ,0.282572599 ,7.379864e-02 - ,0.294230675 ,7.937555e-02 - ,0.305279532 ,7.560157e-02 - ,0.316231246 ,9.290848e-02 - ,0.327118515 ,1.006091e-01 - ,0.337725949 ,1.077792e-01 - ,0.348507973 ,1.143072e-01 - ,0.360275335 ,1.203813e-01 - ,0.372732081 ,1.259302e-01 - ,0.384865494 ,1.309854e-01 - ,0.396170468 ,1.359841e-01 - ,0.406525702 ,1.414512e-01 - ,0.416179917 ,1.474082e-01 - ,0.425830260 ,1.532244e-01 - ,0.435731315 ,1.584113e-01 - ,0.445574831 ,1.634671e-01 - ,0.455403556 ,1.693949e-01 - ,0.465473322 ,1.764807e-01 - ,0.475493015 ,1.840579e-01 - ,0.485104441 ,1.914634e-01 - ,0.494622601 ,1.987244e-01 - ,0.504479896 ,2.062144e-01 - ,0.514530980 ,2.140417e-01 - ,0.524379809 ,2.218487e-01 - ,0.533811067 ,2.291090e-01 - ,0.542964071 ,2.357491e-01 - ,0.552415670 ,2.422511e-01 - ,0.562702410 ,2.489342e-01 - ,0.573662536 ,2.556239e-01 - ,0.584499138 ,2.621385e-01 - ,0.594424353 ,2.684997e-01 - ,0.603091449 ,2.746540e-01 - ,0.610580104 ,2.804293e-01 - ,0.617291412 ,2.857687e-01 - ,0.623819392 ,2.908378e-01 - ,0.630625766 ,2.959208e-01 - ,0.637780329 ,3.012891e-01 - ,0.645128388 ,3.071430e-01 - ,0.652676811 ,3.135295e-01 - ,0.660706577 ,3.202344e-01 - ,0.669513580 ,3.269345e-01 - ,0.679074179 ,3.335610e-01 - ,0.688924859 ,3.403140e-01 - ,0.698359030 ,3.473033e-01 - ,0.706803246 ,3.544525e-01 - ,0.714138033 ,3.617598e-01 - ,0.720773425 ,3.694019e-01 - ,0.727376023 ,3.775318e-01 - ,0.734427437 ,3.860345e-01 - ,0.741910594 ,3.945174e-01 - ,0.749301181 ,4.026368e-01 - ,0.756038316 ,4.104358e-01 - ,0.762108143 ,4.183084e-01 - ,0.767991314 ,4.266920e-01 - ,0.774125688 ,4.358112e-01 - ,0.780642168 ,4.456474e-01 - ,0.787654735 ,4.560432e-01 - ,0.795560943 ,4.667688e-01 - ,0.804494568 ,4.776577e-01 - ,0.813689728 ,4.888006e-01 - ,0.822246533 ,5.003803e-01 - ,0.830074100 ,5.121099e-01 - ,0.837601422 ,5.231564e-01 - ,0.845257402 ,5.330505e-01 - ,0.853115769 ,5.423039e-01 - ,0.860852876 ,5.518459e-01 - ,0.868387995 ,5.622222e-01 - ,0.875848963 ,5.737404e-01 - ,0.882870692 ,5.869893e-01 - ,0.889442420 ,6.017184e-01 - ,0.896636569 ,6.161664e-01 - ,0.904626820 ,6.294051e-01 - ,0.911772218 ,6.420279e-01 - ,0.917549883 ,6.554093e-01 - ,0.923211670 ,6.714817e-01 - ,0.928746246 ,6.890702e-01 - ,0.934160521 ,7.060012e-01 - ,0.941177134 ,7.215806e-01 - ,0.949363922 ,7.357055e-01 - ,0.956847590 ,7.494285e-01 - ,0.963599180 ,7.621246e-01 - ,0.969055149 ,7.763082e-01 - ,0.973136108 ,7.936805e-01 - ,0.976868136 ,7.142080e-01 - ,0.981258840 ,7.460053e-01 - ,0.988264909 ,7.786117e-01 - ,0.994433844 ,9.192672e-01), - byrow=TRUE,ncol=2) - - -P4=matrix(c(0.02825196 ,0.002694346 ,2.999028e-06 - ,0.03650906 ,0.004146146 ,4.263801e-05 - ,0.04206787 ,0.005668266 ,1.964571e-04 - ,0.04755122 ,0.007287032 ,4.847254e-04 - ,0.05291511 ,0.009334544 ,8.277385e-04 - ,0.05813244 ,0.011977936 ,1.156019e-03 - ,0.06348677 ,0.015026277 ,1.458581e-03 - ,0.06908051 ,0.018151721 ,1.739547e-03 - ,0.07468576 ,0.021131256 ,1.999549e-03 - ,0.08003211 ,0.023887028 ,2.244613e-03 - ,0.08506408 ,0.026419261 ,2.483839e-03 - ,0.08992250 ,0.028745172 ,2.721838e-03 - ,0.09476611 ,0.030872849 ,2.960090e-03 - ,0.09964373 ,0.032802489 ,3.201924e-03 - ,0.10449749 ,0.034540459 ,3.452190e-03 - ,0.10923896 ,0.036109254 ,3.712771e-03 - ,0.11381116 ,0.037544213 ,3.980112e-03 - ,0.11820149 ,0.038882089 ,4.247472e-03 - ,0.12242309 ,0.040152826 ,4.509743e-03 - ,0.12649443 ,0.041380182 ,4.767249e-03 - ,0.13043191 ,0.042587785 ,5.026383e-03 - ,0.13425237 ,0.043803447 ,5.297105e-03 - ,0.13797722 ,0.045057120 ,5.588809e-03 - ,0.14163298 ,0.046373077 ,5.906670e-03 - ,0.14524737 ,0.047760788 ,6.249983e-03 - ,0.14884258 ,0.049209682 ,6.612920e-03 - ,0.15242802 ,0.050690819 ,6.986926e-03 - ,0.15599509 ,0.052164992 ,7.363452e-03 - ,0.15951646 ,0.053593885 ,7.735951e-03 - ,0.16295105 ,0.054949857 ,8.100655e-03 - ,0.16625418 ,0.056221112 ,8.456323e-03 - ,0.16938955 ,0.057411310 ,8.803438e-03 - ,0.17233932 ,0.058534904 ,9.143341e-03 - ,0.17510887 ,0.059610687 ,9.477586e-03 - ,0.17772492 ,0.060655906 ,9.807601e-03 - ,0.18022813 ,0.061682484 ,1.013458e-02 - ,0.18266319 ,0.062695748 ,1.045946e-02 - ,0.18506945 ,0.063695282 ,1.078300e-02 - ,0.18747477 ,0.064677064 ,1.110569e-02 - ,0.18989354 ,0.065635990 ,1.142782e-02 - ,0.19232847 ,0.066568052 ,1.174942e-02 - ,0.19477450 ,0.067471733 ,1.207037e-02 - ,0.19722304 ,0.068348479 ,1.239045e-02 - ,0.19966526 ,0.069202350 ,1.270946e-02 - ,0.20209341 ,0.070039138 ,1.302729e-02 - ,0.20450072 ,0.070865253 ,1.334392e-02 - ,0.20688005 ,0.071686698 ,1.365930e-02 - ,0.20922248 ,0.072508287 ,1.397321e-02 - ,0.21151643 ,0.073333231 ,1.428515e-02 - ,0.21374770 ,0.074163062 ,1.459419e-02 - ,0.21590048 ,0.074997845 ,1.489902e-02 - ,0.21795919 ,0.075836565 ,1.519811e-02 - ,0.21991052 ,0.076677610 ,1.548999e-02 - ,0.22174535 ,0.077519237 ,1.577353e-02 - ,0.22346014 ,0.078359986 ,1.604830e-02 - ,0.22505752 ,0.079198980 ,1.631478e-02 - ,0.22654605 ,0.080036104 ,1.657442e-02 - ,0.22793930 ,0.080872053 ,1.682959e-02 - ,0.22925445 ,0.081708279 ,1.708334e-02 - ,0.23051059 ,0.082546845 ,1.733898e-02 - ,0.23172712 ,0.083390212 ,1.759979e-02 - ,0.23292230 ,0.084240996 ,1.786853e-02 - ,0.23411229 ,0.085101710 ,1.814722e-02 - ,0.23531044 ,0.085974525 ,1.843693e-02 - ,0.23652714 ,0.086861072 ,1.873778e-02 - ,0.23776992 ,0.087762307 ,1.904896e-02 - ,0.23904386 ,0.088678461 ,1.936895e-02 - ,0.24035207 ,0.089609084 ,1.969576e-02 - ,0.24169630 ,0.090553177 ,2.002710e-02 - ,0.24307742 ,0.091509412 ,2.036069e-02 - ,0.24449595 ,0.092476402 ,2.069439e-02 - ,0.24595237 ,0.093452991 ,2.102637e-02 - ,0.24744741 ,0.094438523 ,2.135516e-02 - ,0.24898216 ,0.095433051 ,2.167968e-02 - ,0.25055813 ,0.096437453 ,2.199923e-02 - ,0.25217708 ,0.097453412 ,2.231345e-02 - ,0.25384088 ,0.098483278 ,2.262224e-02 - ,0.25555107 ,0.099529803 ,2.292570e-02 - ,0.25730850 ,0.100595780 ,2.322410e-02 - ,0.25911286 ,0.101683637 ,2.351787e-02 - ,0.26096228 ,0.102795033 ,2.380753e-02 - ,0.26285307 ,0.103930520 ,2.409377e-02 - ,0.26477965 ,0.105089312 ,2.437741e-02 - ,0.26673470 ,0.106269188 ,2.465942e-02 - ,0.26870964 ,0.107466555 ,2.494090e-02 - ,0.27069514 ,0.108676633 ,2.522304e-02 - ,0.27268195 ,0.109893752 ,2.550708e-02 - ,0.27466156 ,0.111111714 ,2.579422e-02 - ,0.27662692 ,0.112324166 ,2.608553e-02 - ,0.27857284 ,0.113524958 ,2.638190e-02 - ,0.28049622 ,0.114708449 ,2.668392e-02 - ,0.28239603 ,0.115869742 ,2.699191e-02 - ,0.28427298 ,0.117004848 ,2.730585e-02 - ,0.28612913 ,0.118110784 ,2.762538e-02 - ,0.28796727 ,0.119185602 ,2.794990e-02 - ,0.28979037 ,0.120228378 ,2.827855e-02 - ,0.29160108 ,0.121239153 ,2.861033e-02 - ,0.29340136 ,0.122218858 ,2.894414e-02 - ,0.29519222 ,0.123169211 ,2.927891e-02 - ,0.29697369 ,0.124092605 ,2.961360e-02 - ,0.31397659 ,0.132621459 ,3.287664e-02 - ,0.32905317 ,0.141687415 ,3.635344e-02 - ,0.34142264 ,0.150239874 ,4.011146e-02 - ,0.35320262 ,0.157769982 ,4.336456e-02 - ,0.36543162 ,0.165856637 ,4.597872e-02 - ,0.37719191 ,0.174946656 ,4.927422e-02 - ,0.38887463 ,0.183829329 ,5.322584e-02 - ,0.40152779 ,0.191918219 ,5.706844e-02 - ,0.41555978 ,0.200422337 ,6.072351e-02 - ,0.42908873 ,0.209907793 ,6.413420e-02 - ,0.44052818 ,0.219253257 ,6.745447e-02 - ,0.45132279 ,0.227775172 ,7.077917e-02 - ,0.46314082 ,0.234996714 ,7.417266e-02 - ,0.47488247 ,0.241586411 ,7.752946e-02 - ,0.48469259 ,0.248330403 ,8.082463e-02 - ,0.49326509 ,0.255625615 ,8.409529e-02 - ,0.50168814 ,0.263258846 ,8.746989e-02 - ,0.51028363 ,0.271055706 ,9.086717e-02 - ,0.51912560 ,0.278856970 ,9.420999e-02 - ,0.52769780 ,0.286320658 ,9.786763e-02 - ,0.53603821 ,0.293266278 ,1.020140e-01 - ,0.54478766 ,0.299467332 ,1.062560e-01 - ,0.55366271 ,0.305690735 ,1.102368e-01 - ,0.56272309 ,0.312768360 ,1.140490e-01 - ,0.57233129 ,0.319963119 ,1.178623e-01 - ,0.58200310 ,0.326558078 ,1.219016e-01 - ,0.59099024 ,0.332901385 ,1.263491e-01 - ,0.59937345 ,0.339303862 ,1.310123e-01 - ,0.60758641 ,0.345851059 ,1.357467e-01 - ,0.61598033 ,0.352190085 ,1.405864e-01 - ,0.62528643 ,0.357913761 ,1.455560e-01 - ,0.63543095 ,0.363356622 ,1.504331e-01 - ,0.64471050 ,0.369132056 ,1.551387e-01 - ,0.65228925 ,0.375308548 ,1.599456e-01 - ,0.65934374 ,0.381636254 ,1.650710e-01 - ,0.66706291 ,0.388370558 ,1.704709e-01 - ,0.67515257 ,0.395534711 ,1.758367e-01 - ,0.68284073 ,0.402421344 ,1.812059e-01 - ,0.69038336 ,0.408623429 ,1.868699e-01 - ,0.69845919 ,0.414460275 ,1.926662e-01 - ,0.70715142 ,0.420523246 ,1.981833e-01 - ,0.71574865 ,0.427206110 ,2.033596e-01 - ,0.72320765 ,0.434498031 ,2.084776e-01 - ,0.72954754 ,0.442149909 ,2.137674e-01 - ,0.73571186 ,0.449959300 ,2.191326e-01 - ,0.74226506 ,0.457421738 ,2.245519e-01 - ,0.74905392 ,0.464237364 ,2.303037e-01 - ,0.75612564 ,0.470902809 ,2.367213e-01 - ,0.76388865 ,0.477994733 ,2.435881e-01 - ,0.77159609 ,0.485500173 ,2.500961e-01 - ,0.77797709 ,0.493064413 ,2.561486e-01 - ,0.78328352 ,0.500852021 ,2.622563e-01 - ,0.78864834 ,0.509132560 ,2.684670e-01 - ,0.79436056 ,0.517227408 ,2.749325e-01 - ,0.80015524 ,0.524694907 ,2.817636e-01 - ,0.80624435 ,0.532053355 ,2.884733e-01 - ,0.81293829 ,0.540118580 ,2.947507e-01 - ,0.82015085 ,0.548921459 ,3.010033e-01 - ,0.82763569 ,0.557637151 ,3.078365e-01 - ,0.83505891 ,0.566310996 ,3.155196e-01 - ,0.84198778 ,0.575253522 ,3.239425e-01 - ,0.84848454 ,0.583976152 ,3.326209e-01 - ,0.85494638 ,0.592377395 ,3.411542e-01 - ,0.86153456 ,0.600715905 ,3.495319e-01 - ,0.86786548 ,0.608554390 ,3.581296e-01 - ,0.87340081 ,0.615887245 ,3.669205e-01 - ,0.87845635 ,0.623939370 ,3.759445e-01 - ,0.88379941 ,0.632832612 ,3.856236e-01 - ,0.88951674 ,0.641696896 ,3.957400e-01 - ,0.89459349 ,0.651106256 ,4.056838e-01 - ,0.89882091 ,0.662461064 ,4.146894e-01 - ,0.90377294 ,0.675025139 ,4.226494e-01 - ,0.90970927 ,0.687246375 ,4.314995e-01 - ,0.91531400 ,0.699233434 ,4.438556e-01 - ,0.92062598 ,0.710363456 ,4.575252e-01 - ,0.92625531 ,0.720885287 ,4.697992e-01 - ,0.93147783 ,0.730627162 ,4.799491e-01 - ,0.93661779 ,0.740878409 ,4.909327e-01 - ,0.94256155 ,0.753110750 ,5.054689e-01 - ,0.94879244 ,0.766049888 ,5.218112e-01 - ,0.95441461 ,0.779461673 ,5.382110e-01 - ,0.95922582 ,0.795415271 ,5.580688e-01 - ,0.96461303 ,0.812122645 ,5.830378e-01 - ,0.96954495 ,0.827981003 ,6.131022e-01 - ,0.97446302 ,0.845818141 ,6.404610e-01 - ,0.97912580 ,0.862982643 ,6.619244e-01 - ,0.98539520 ,0.887825781 ,6.905761e-01 - ,0.98986795 ,0.916261737 ,7.236242e-01 - ,0.99487474 ,0.960335712 ,7.689094e-01), - byrow=TRUE,ncol=3) - -P5=matrix(c( 0.05652412 ,0.01305194 ,0.002390178 ,3.378326e-12 - ,0.06574073 ,0.01487696 ,0.003342965 ,7.244389e-11 - ,0.07151676 ,0.01720008 ,0.004648512 ,7.773423e-10 - ,0.07511946 ,0.01981078 ,0.006063858 ,5.567153e-09 - ,0.07828081 ,0.02244535 ,0.007485693 ,2.994702e-08 - ,0.08180130 ,0.02494366 ,0.008893920 ,1.291037e-07 - ,0.08583995 ,0.02730528 ,0.010278443 ,4.647955e-07 - ,0.09025786 ,0.02960906 ,0.011622679 ,1.437865e-06 - ,0.09481793 ,0.03192766 ,0.012912280 ,3.903397e-06 - ,0.09930536 ,0.03429573 ,0.014138866 ,9.451086e-06 - ,0.10358876 ,0.03671695 ,0.015295959 ,2.067608e-05 - ,0.10762929 ,0.03918039 ,0.016375245 ,4.130899e-05 - ,0.11145588 ,0.04167054 ,0.017367679 ,7.605707e-05 - ,0.11512861 ,0.04416935 ,0.018267991 ,1.300654e-04 - ,0.11870678 ,0.04665486 ,0.019078858 ,2.080362e-04 - ,0.12222956 ,0.04910107 ,0.019812158 ,3.132019e-04 - ,0.12571005 ,0.05148055 ,0.020487010 ,4.464557e-04 - ,0.12913936 ,0.05376876 ,0.021125903 ,6.059430e-04 - ,0.13249584 ,0.05594791 ,0.021750598 ,7.872902e-04 - ,0.13575514 ,0.05800867 ,0.022379035 ,9.844397e-04 - ,0.13889807 ,0.05994945 ,0.023023715 ,1.190860e-03 - ,0.14191516 ,0.06177395 ,0.023691420 ,1.400786e-03 - ,0.14480785 ,0.06348830 ,0.024383868 ,1.610156e-03 - ,0.14758751 ,0.06509889 ,0.025098853 ,1.817036e-03 - ,0.15027304 ,0.06661130 ,0.025831529 ,2.021495e-03 - ,0.15288821 ,0.06803034 ,0.026575628 ,2.225055e-03 - ,0.15545896 ,0.06936082 ,0.027324476 ,2.429942e-03 - ,0.15801110 ,0.07060840 ,0.028071770 ,2.638347e-03 - ,0.16056820 ,0.07178026 ,0.028812074 ,2.851885e-03 - ,0.16315001 ,0.07288527 ,0.029541074 ,3.071323e-03 - ,0.16577118 ,0.07393380 ,0.030255603 ,3.296558e-03 - ,0.16844049 ,0.07493712 ,0.030953535 ,3.526795e-03 - ,0.17116069 ,0.07590682 ,0.031633586 ,3.760793e-03 - ,0.17392879 ,0.07685417 ,0.032295108 ,3.997135e-03 - ,0.17673683 ,0.07778972 ,0.032937919 ,4.234427e-03 - ,0.17957306 ,0.07872305 ,0.033562189 ,4.471420e-03 - ,0.18242312 ,0.07966272 ,0.034168393 ,4.707065e-03 - ,0.18527142 ,0.08061630 ,0.034757295 ,4.940502e-03 - ,0.18810221 ,0.08159041 ,0.035329958 ,5.171027e-03 - ,0.19090055 ,0.08259081 ,0.035887740 ,5.398048e-03 - ,0.19365295 ,0.08362233 ,0.036432265 ,5.621047e-03 - ,0.19634778 ,0.08468879 ,0.036965369 ,5.839553e-03 - ,0.19897548 ,0.08579287 ,0.037489019 ,6.053142e-03 - ,0.20152856 ,0.08693588 ,0.038005212 ,6.261435e-03 - ,0.20400154 ,0.08811768 ,0.038515895 ,6.464128e-03 - ,0.20639083 ,0.08933656 ,0.039022878 ,6.661029e-03 - ,0.20869457 ,0.09058924 ,0.039527788 ,6.852104e-03 - ,0.21091258 ,0.09187106 ,0.040032038 ,7.037528e-03 - ,0.21304622 ,0.09317615 ,0.040536826 ,7.217739e-03 - ,0.21509838 ,0.09449787 ,0.041043146 ,7.393470e-03 - ,0.21707346 ,0.09582915 ,0.041551812 ,7.565765e-03 - ,0.21897727 ,0.09716299 ,0.042063485 ,7.735961e-03 - ,0.22081697 ,0.09849292 ,0.042578697 ,7.905643e-03 - ,0.22260093 ,0.09981337 ,0.043097865 ,8.076563e-03 - ,0.22433851 ,0.10112002 ,0.043621312 ,8.250539e-03 - ,0.22603978 ,0.10240999 ,0.044149265 ,8.429335e-03 - ,0.22771524 ,0.10368199 ,0.044681863 ,8.614546e-03 - ,0.22937544 ,0.10493623 ,0.045219159 ,8.807490e-03 - ,0.23103064 ,0.10617433 ,0.045761127 ,9.009122e-03 - ,0.23269046 ,0.10739901 ,0.046307667 ,9.219978e-03 - ,0.23436362 ,0.10861381 ,0.046858620 ,9.440156e-03 - ,0.23605766 ,0.10982269 ,0.047413783 ,9.669322e-03 - ,0.23777881 ,0.11102960 ,0.047972925 ,9.906756e-03 - ,0.23953181 ,0.11223815 ,0.048535807 ,1.015141e-02 - ,0.24131994 ,0.11345120 ,0.049102198 ,1.040201e-02 - ,0.24314491 ,0.11467064 ,0.049671889 ,1.065710e-02 - ,0.24500695 ,0.11589718 ,0.050244706 ,1.091518e-02 - ,0.24690483 ,0.11713029 ,0.050820511 ,1.117476e-02 - ,0.24883595 ,0.11836818 ,0.051399206 ,1.143446e-02 - ,0.25079643 ,0.11960797 ,0.051980725 ,1.169304e-02 - ,0.25278126 ,0.12084583 ,0.052565028 ,1.194945e-02 - ,0.25478439 ,0.12207730 ,0.053152083 ,1.220290e-02 - ,0.25679899 ,0.12329752 ,0.053741852 ,1.245283e-02 - ,0.25881759 ,0.12450159 ,0.054334269 ,1.269891e-02 - ,0.26083237 ,0.12568485 ,0.054929219 ,1.294106e-02 - ,0.26283537 ,0.12684313 ,0.055526514 ,1.317939e-02 - ,0.26481879 ,0.12797294 ,0.056125873 ,1.341420e-02 - ,0.26677527 ,0.12907169 ,0.056726901 ,1.364589e-02 - ,0.26869809 ,0.13013771 ,0.057329079 ,1.387498e-02 - ,0.27058146 ,0.13117030 ,0.057931750 ,1.410201e-02 - ,0.27242063 ,0.13216971 ,0.058534126 ,1.432755e-02 - ,0.27421212 ,0.13313704 ,0.059135301 ,1.455210e-02 - ,0.27595371 ,0.13407414 ,0.059734269 ,1.477611e-02 - ,0.27764451 ,0.13498346 ,0.060329960 ,1.499992e-02 - ,0.27928490 ,0.13586793 ,0.060921287 ,1.522377e-02 - ,0.28087643 ,0.13673079 ,0.061507187 ,1.544778e-02 - ,0.28242170 ,0.13757549 ,0.062086680 ,1.567195e-02 - ,0.28392419 ,0.13840552 ,0.062658920 ,1.589617e-02 - ,0.28538803 ,0.13922434 ,0.063223239 ,1.612023e-02 - ,0.28681780 ,0.14003533 ,0.063779188 ,1.634387e-02 - ,0.28821831 ,0.14084167 ,0.064326561 ,1.656674e-02 - ,0.28959443 ,0.14164635 ,0.064865403 ,1.678851e-02 - ,0.29095083 ,0.14245215 ,0.065396011 ,1.700879e-02 - ,0.29229189 ,0.14326158 ,0.065918911 ,1.722723e-02 - ,0.29362151 ,0.14407696 ,0.066434823 ,1.744351e-02 - ,0.29494305 ,0.14490033 ,0.066944618 ,1.765732e-02 - ,0.29625927 ,0.14573355 ,0.067449265 ,1.786841e-02 - ,0.29757232 ,0.14657825 ,0.067949777 ,1.807657e-02 - ,0.29888374 ,0.14743586 ,0.068447155 ,1.828162e-02 - ,0.30019451 ,0.14830758 ,0.068942336 ,1.848344e-02 - ,0.31328362 ,0.15791008 ,0.073880466 ,2.031038e-02 - ,0.32658914 ,0.16858633 ,0.078745227 ,2.182686e-02 - ,0.34081183 ,0.17902263 ,0.083679715 ,2.336938e-02 - ,0.35529268 ,0.18923993 ,0.089192608 ,2.528373e-02 - ,0.36855379 ,0.19911099 ,0.095098319 ,2.734008e-02 - ,0.37994080 ,0.20799604 ,0.100761289 ,2.928966e-02 - ,0.39062100 ,0.21671444 ,0.105774473 ,3.127971e-02 - ,0.40257143 ,0.22609178 ,0.110115264 ,3.337375e-02 - ,0.41621275 ,0.23569341 ,0.113958744 ,3.539360e-02 - ,0.43046818 ,0.24490377 ,0.117643306 ,3.736688e-02 - ,0.44486669 ,0.25330316 ,0.121679236 ,3.972222e-02 - ,0.45923530 ,0.26068382 ,0.126584071 ,4.280986e-02 - ,0.47296930 ,0.26745795 ,0.132471529 ,4.640810e-02 - ,0.48590094 ,0.27415019 ,0.138753880 ,5.001684e-02 - ,0.49834493 ,0.28093903 ,0.144554010 ,5.345159e-02 - ,0.51026131 ,0.28775596 ,0.149452848 ,5.679297e-02 - ,0.52119211 ,0.29432490 ,0.153686512 ,5.996537e-02 - ,0.53082589 ,0.30059787 ,0.157685666 ,6.275756e-02 - ,0.53943126 ,0.30692404 ,0.161762480 ,6.518147e-02 - ,0.54774724 ,0.31353121 ,0.166120746 ,6.753031e-02 - ,0.55642024 ,0.32037096 ,0.170885289 ,7.007491e-02 - ,0.56550181 ,0.32736781 ,0.176132244 ,7.279915e-02 - ,0.57464883 ,0.33444881 ,0.181843360 ,7.550823e-02 - ,0.58368440 ,0.34149272 ,0.187714538 ,7.816054e-02 - ,0.59259791 ,0.34836154 ,0.193239542 ,8.092069e-02 - ,0.60135765 ,0.35491577 ,0.198212454 ,8.389308e-02 - ,0.60997532 ,0.36111908 ,0.202943209 ,8.697786e-02 - ,0.61843676 ,0.36715577 ,0.207920518 ,9.000649e-02 - ,0.62674513 ,0.37332556 ,0.213468203 ,9.287236e-02 - ,0.63517287 ,0.37979485 ,0.219617217 ,9.553586e-02 - ,0.64411712 ,0.38648760 ,0.226087437 ,9.803810e-02 - ,0.65355691 ,0.39327005 ,0.232464891 ,1.005292e-01 - ,0.66286128 ,0.40022900 ,0.238568656 ,1.032200e-01 - ,0.67142296 ,0.40767245 ,0.244631428 ,1.062392e-01 - ,0.67945227 ,0.41574779 ,0.250960847 ,1.095391e-01 - ,0.68784816 ,0.42420043 ,0.257504402 ,1.129935e-01 - ,0.69717228 ,0.43269300 ,0.263984690 ,1.165367e-01 - ,0.70693776 ,0.44108963 ,0.270350361 ,1.201702e-01 - ,0.71609195 ,0.44926200 ,0.276782149 ,1.239319e-01 - ,0.72398625 ,0.45697627 ,0.283281861 ,1.278724e-01 - ,0.73066318 ,0.46420271 ,0.289549848 ,1.319571e-01 - ,0.73650853 ,0.47133730 ,0.295322586 ,1.360494e-01 - ,0.74191863 ,0.47888254 ,0.300607528 ,1.400640e-01 - ,0.74721366 ,0.48699345 ,0.305518411 ,1.440467e-01 - ,0.75262380 ,0.49549336 ,0.310111010 ,1.480611e-01 - ,0.75824105 ,0.50420009 ,0.314535705 ,1.520995e-01 - ,0.76409670 ,0.51305053 ,0.319195032 ,1.561658e-01 - ,0.77030423 ,0.52194190 ,0.324545735 ,1.603832e-01 - ,0.77696430 ,0.53068348 ,0.330761451 ,1.648740e-01 - ,0.78392814 ,0.53921911 ,0.337687815 ,1.695305e-01 - ,0.79087142 ,0.54764584 ,0.345040336 ,1.740919e-01 - ,0.79764992 ,0.55599003 ,0.352535977 ,1.784667e-01 - ,0.80440374 ,0.56418827 ,0.359947893 ,1.828362e-01 - ,0.81125920 ,0.57217204 ,0.367126284 ,1.874504e-01 - ,0.81807396 ,0.57997214 ,0.374007327 ,1.923635e-01 - ,0.82455298 ,0.58778776 ,0.380706775 ,1.973552e-01 - ,0.83054464 ,0.59568049 ,0.387507714 ,2.021553e-01 - ,0.83619452 ,0.60335949 ,0.394648281 ,2.067057e-01 - ,0.84182788 ,0.61067252 ,0.402210773 ,2.111209e-01 - ,0.84769382 ,0.61810882 ,0.410130422 ,2.155355e-01 - ,0.85379684 ,0.62646991 ,0.418239217 ,2.201354e-01 - ,0.85982991 ,0.63596326 ,0.426436850 ,2.251539e-01 - ,0.86534864 ,0.64569215 ,0.434743378 ,2.307076e-01 - ,0.87020343 ,0.65434970 ,0.443196556 ,2.368692e-01 - ,0.87472363 ,0.66158858 ,0.451892254 ,2.437247e-01 - ,0.87942450 ,0.66828595 ,0.460851245 ,2.509675e-01 - ,0.88437025 ,0.67525025 ,0.469770141 ,2.580093e-01 - ,0.88904758 ,0.68246978 ,0.478325822 ,2.648763e-01 - ,0.89322701 ,0.68982898 ,0.486536728 ,2.723872e-01 - ,0.89745058 ,0.69753791 ,0.494776780 ,2.812096e-01 - ,0.90239053 ,0.70572617 ,0.503530501 ,2.908907e-01 - ,0.90800180 ,0.71430596 ,0.513032547 ,3.003753e-01 - ,0.91346775 ,0.72311282 ,0.523668882 ,3.096709e-01 - ,0.91813323 ,0.73249598 ,0.535903771 ,3.196147e-01 - ,0.92221702 ,0.74332982 ,0.549315193 ,3.304979e-01 - ,0.92622555 ,0.75519029 ,0.562491098 ,3.426984e-01 - ,0.93049876 ,0.76676088 ,0.574311153 ,3.573481e-01 - ,0.93559016 ,0.77792982 ,0.584778988 ,3.742742e-01 - ,0.94158878 ,0.78972489 ,0.594741057 ,3.918491e-01 - ,0.94715113 ,0.80271250 ,0.605197756 ,4.107232e-01 - ,0.95168452 ,0.81675031 ,0.616562519 ,4.307354e-01 - ,0.95643627 ,0.83145104 ,0.629586884 ,4.491496e-01 - ,0.96150476 ,0.84592600 ,0.643095195 ,4.683523e-01 - ,0.96619142 ,0.86139307 ,0.658098224 ,4.899430e-01 - ,0.97232678 ,0.87780728 ,0.682404452 ,5.109344e-01 - ,0.97800785 ,0.89418487 ,0.715182394 ,5.334721e-01 - ,0.98168187 ,0.90711236 ,0.750757190 ,5.668841e-01 - ,0.98661473 ,0.92175945 ,0.795670468 ,6.221022e-01 - ,0.99161624 ,0.94482117 ,0.844332168 ,6.873383e-01), - byrow=TRUE,ncol=4) - - P6=matrix(c( 0.02845841 ,0.009644671 ,0.0009664242 ,0.0007502456 ,2.594514e-11 - ,0.03862415 ,0.017910204 ,0.0031407471 ,0.0015589418 ,5.022928e-10 - ,0.05220254 ,0.025693640 ,0.0057938158 ,0.0021160814 ,4.868380e-09 - ,0.06621145 ,0.032227944 ,0.0082558224 ,0.0025765248 ,3.151134e-08 - ,0.07867811 ,0.037545052 ,0.0103200867 ,0.0030739790 ,1.532944e-07 - ,0.08886734 ,0.041725975 ,0.0120483568 ,0.0036182088 ,5.980879e-07 - ,0.09687788 ,0.044882223 ,0.0135631342 ,0.0041650257 ,1.950299e-06 - ,0.10316830 ,0.047217776 ,0.0149788321 ,0.0046727336 ,5.469913e-06 - ,0.10824058 ,0.048995043 ,0.0163941682 ,0.0051213088 ,1.347713e-05 - ,0.11250832 ,0.050462880 ,0.0178877558 ,0.0055125816 ,2.965286e-05 - ,0.11627805 ,0.051812087 ,0.0195084853 ,0.0058643506 ,5.903392e-05 - ,0.11976844 ,0.053169834 ,0.0212684847 ,0.0062028177 ,1.075073e-04 - ,0.12313219 ,0.054614572 ,0.0231443453 ,0.0065546842 ,1.807643e-04 - ,0.12647380 ,0.056192980 ,0.0250870063 ,0.0069403944 ,2.829186e-04 - ,0.12986454 ,0.057930892 ,0.0270368653 ,0.0073701618 ,4.152024e-04 - ,0.13335383 ,0.059838004 ,0.0289391057 ,0.0078435357 ,5.752174e-04 - ,0.13697580 ,0.061909087 ,0.0307548517 ,0.0083519589 ,7.570695e-04 - ,0.14075114 ,0.064124538 ,0.0324658987 ,0.0088828750 ,9.524158e-04 - ,0.14468601 ,0.066452103 ,0.0340732641 ,0.0094238009 ,1.152134e-03 - ,0.14877041 ,0.068850570 ,0.0355916148 ,0.0099652366 ,1.348124e-03 - ,0.15297783 ,0.071275211 ,0.0370422456 ,0.0105019844 ,1.534731e-03 - ,0.15726699 ,0.073684014 ,0.0384468508 ,0.0110330581 ,1.709452e-03 - ,0.16158577 ,0.076043401 ,0.0398233432 ,0.0115607019 ,1.872830e-03 - ,0.16587656 ,0.078332254 ,0.0411839642 ,0.0120890846 ,2.027675e-03 - ,0.17008218 ,0.080543571 ,0.0425352335 ,0.0126230926 ,2.177920e-03 - ,0.17415147 ,0.082683715 ,0.0438790091 ,0.0131674283 ,2.327430e-03 - ,0.17804364 ,0.084769760 ,0.0452139704 ,0.0137260428 ,2.479054e-03 - ,0.18173091 ,0.086825700 ,0.0465370496 ,0.0143018329 ,2.634074e-03 - ,0.18519926 ,0.088878323 ,0.0478445734 ,0.0148965065 ,2.792108e-03 - ,0.18844742 ,0.090953392 ,0.0491330541 ,0.0155105477 ,2.951390e-03 - ,0.19148459 ,0.093072518 ,0.0503996631 ,0.0161432460 ,3.109303e-03 - ,0.19432745 ,0.095250927 ,0.0516424566 ,0.0167927877 ,3.263029e-03 - ,0.19699700 ,0.097496165 ,0.0528604171 ,0.0174564124 ,3.410139e-03 - ,0.19951573 ,0.099807706 ,0.0540533669 ,0.0181306383 ,3.549062e-03 - ,0.20190550 ,0.102177381 ,0.0552217983 ,0.0188115440 ,3.679327e-03 - ,0.20418614 ,0.104590523 ,0.0563666642 ,0.0194950810 ,3.801605e-03 - ,0.20637484 ,0.107027661 ,0.0574891666 ,0.0201773871 ,3.917550e-03 - ,0.20848607 ,0.109466576 ,0.0585905770 ,0.0208550641 ,4.029514e-03 - ,0.21053195 ,0.111884485 ,0.0596721117 ,0.0215253931 ,4.140185e-03 - ,0.21252275 ,0.114260111 ,0.0607348702 ,0.0221864686 ,4.252232e-03 - ,0.21446746 ,0.116575429 ,0.0617798326 ,0.0228372461 ,4.367992e-03 - ,0.21637417 ,0.118816908 ,0.0628079009 ,0.0234775060 ,4.489244e-03 - ,0.21825036 ,0.120976170 ,0.0638199623 ,0.0241077482 ,4.617087e-03 - ,0.22010296 ,0.123050046 ,0.0648169547 ,0.0247290363 ,4.751923e-03 - ,0.22193833 ,0.125040101 ,0.0657999179 ,0.0253428107 ,4.893526e-03 - ,0.22376206 ,0.126951747 ,0.0667700240 ,0.0259506919 ,5.041168e-03 - ,0.22557886 ,0.128793104 ,0.0677285875 ,0.0265542902 ,5.193786e-03 - ,0.22739239 ,0.130573768 ,0.0686770614 ,0.0271550377 ,5.350146e-03 - ,0.22920525 ,0.132303638 ,0.0696170311 ,0.0277540524 ,5.508989e-03 - ,0.23101900 ,0.133991919 ,0.0705502132 ,0.0283520430 ,5.669140e-03 - ,0.23283434 ,0.135646375 ,0.0714784671 ,0.0289492606 ,5.829573e-03 - ,0.23465133 ,0.137272882 ,0.0724038164 ,0.0295454968 ,5.989428e-03 - ,0.23646965 ,0.138875254 ,0.0733284760 ,0.0301401298 ,6.148001e-03 - ,0.23828893 ,0.140455337 ,0.0742548720 ,0.0307322114 ,6.304704e-03 - ,0.24010890 ,0.142013293 ,0.0751856416 ,0.0313205878 ,6.459029e-03 - ,0.24192967 ,0.143548021 ,0.0761236012 ,0.0319040403 ,6.610507e-03 - ,0.24375172 ,0.145057640 ,0.0770716723 ,0.0324814318 ,6.758695e-03 - ,0.24557590 ,0.146539976 ,0.0780327640 ,0.0330518431 ,6.903176e-03 - ,0.24740333 ,0.147992990 ,0.0790096154 ,0.0336146829 ,7.043585e-03 - ,0.24923515 ,0.149415131 ,0.0800046099 ,0.0341697603 ,7.179648e-03 - ,0.25107232 ,0.150805565 ,0.0810195786 ,0.0347173115 ,7.311226e-03 - ,0.25291533 ,0.152164307 ,0.0820556131 ,0.0352579785 ,7.438362e-03 - ,0.25476396 ,0.153492230 ,0.0831129092 ,0.0357927443 ,7.561312e-03 - ,0.25661711 ,0.154791002 ,0.0841906620 ,0.0363228330 ,7.680559e-03 - ,0.25847267 ,0.156062941 ,0.0852870246 ,0.0368495882 ,7.796806e-03 - ,0.26032752 ,0.157310848 ,0.0863991407 ,0.0373743441 ,7.910946e-03 - ,0.26217762 ,0.158537805 ,0.0875232506 ,0.0378983036 ,8.024013e-03 - ,0.26401815 ,0.159746996 ,0.0886548636 ,0.0384224367 ,8.137120e-03 - ,0.26584379 ,0.160941546 ,0.0897889847 ,0.0389474062 ,8.251387e-03 - ,0.26764897 ,0.162124387 ,0.0909203766 ,0.0394735277 ,8.367871e-03 - ,0.26942825 ,0.163298178 ,0.0920438371 ,0.0400007620 ,8.487505e-03 - ,0.27117659 ,0.164465254 ,0.0931544716 ,0.0405287383 ,8.611045e-03 - ,0.27288969 ,0.165627620 ,0.0942479401 ,0.0410567998 ,8.739032e-03 - ,0.27456420 ,0.166786971 ,0.0953206650 ,0.0415840663 ,8.871781e-03 - ,0.27619794 ,0.167944735 ,0.0963699857 ,0.0421095038 ,9.009371e-03 - ,0.27779004 ,0.169102127 ,0.0973942565 ,0.0426319942 ,9.151667e-03 - ,0.27934099 ,0.170260209 ,0.0983928834 ,0.0431503996 ,9.298339e-03 - ,0.28085262 ,0.171419936 ,0.0993663043 ,0.0436636169 ,9.448902e-03 - ,0.28232802 ,0.172582199 ,0.1003159175 ,0.0441706205 ,9.602751e-03 - ,0.28377144 ,0.173747838 ,0.1012439683 ,0.0446704936 ,9.759205e-03 - ,0.28518812 ,0.174917647 ,0.1021534027 ,0.0451624485 ,9.917545e-03 - ,0.28658401 ,0.176092356 ,0.1030476997 ,0.0456458386 ,1.007705e-02 - ,0.28796561 ,0.177272586 ,0.1039306913 ,0.0461201648 ,1.023703e-02 - ,0.28933969 ,0.178458806 ,0.1048063812 ,0.0465850770 ,1.039685e-02 - ,0.29071301 ,0.179651274 ,0.1056787690 ,0.0470403750 ,1.055595e-02 - ,0.29209211 ,0.180849979 ,0.1065516870 ,0.0474860075 ,1.071386e-02 - ,0.29348304 ,0.182054592 ,0.1074286553 ,0.0479220717 ,1.087020e-02 - ,0.29489121 ,0.183264423 ,0.1083127575 ,0.0483488124 ,1.102471e-02 - ,0.29632114 ,0.184478407 ,0.1092065408 ,0.0487666200 ,1.117722e-02 - ,0.29777639 ,0.185695102 ,0.1101119397 ,0.0491760269 ,1.132768e-02 - ,0.29925947 ,0.186912720 ,0.1110302259 ,0.0495777007 ,1.147610e-02 - ,0.30077178 ,0.188129170 ,0.1119619808 ,0.0499724340 ,1.162261e-02 - ,0.30231363 ,0.189342139 ,0.1129070920 ,0.0503611291 ,1.176737e-02 - ,0.30388432 ,0.190549177 ,0.1138647708 ,0.0507447791 ,1.191065e-02 - ,0.30548226 ,0.191747804 ,0.1148335893 ,0.0511244432 ,1.205270e-02 - ,0.30710505 ,0.192935620 ,0.1158115351 ,0.0515012188 ,1.219386e-02 - ,0.30874972 ,0.194110415 ,0.1167960815 ,0.0518762105 ,1.233443e-02 - ,0.31041287 ,0.195270270 ,0.1177842705 ,0.0522504975 ,1.247476e-02 - ,0.31209087 ,0.196413653 ,0.1187728072 ,0.0526251016 ,1.261515e-02 - ,0.31378008 ,0.197539481 ,0.1197581619 ,0.0530009554 ,1.275592e-02 - ,0.33073053 ,0.207947746 ,0.1287029996 ,0.0569307733 ,1.424281e-02 - ,0.34766587 ,0.218180516 ,0.1351175921 ,0.0609818273 ,1.602065e-02 - ,0.36497276 ,0.229372730 ,0.1404381947 ,0.0646360301 ,1.803053e-02 - ,0.38214131 ,0.240774517 ,0.1460402987 ,0.0678929574 ,1.984669e-02 - ,0.39892707 ,0.251527352 ,0.1517699541 ,0.0710908941 ,2.140172e-02 - ,0.41477035 ,0.261312042 ,0.1577197965 ,0.0744655082 ,2.290802e-02 - ,0.42985193 ,0.270169908 ,0.1649510495 ,0.0779547358 ,2.442240e-02 - ,0.44459125 ,0.278214867 ,0.1733065465 ,0.0816344914 ,2.596410e-02 - ,0.45830629 ,0.285626732 ,0.1811614844 ,0.0857820745 ,2.747529e-02 - ,0.47040113 ,0.293106158 ,0.1877528247 ,0.0903640622 ,2.897806e-02 - ,0.48149178 ,0.301009058 ,0.1935701784 ,0.0951257701 ,3.068337e-02 - ,0.49230962 ,0.308762383 ,0.1992769670 ,0.0998239781 ,3.268333e-02 - ,0.50293366 ,0.315833032 ,0.2049784969 ,0.1044290674 ,3.485126e-02 - ,0.51329043 ,0.322327249 ,0.2104162141 ,0.1090755934 ,3.704227e-02 - ,0.52322279 ,0.328643258 ,0.2155878925 ,0.1137388083 ,3.919411e-02 - ,0.53250776 ,0.334876631 ,0.2207245049 ,0.1182751586 ,4.126276e-02 - ,0.54136758 ,0.341122307 ,0.2258390954 ,0.1227003370 ,4.321786e-02 - ,0.55057692 ,0.347936973 ,0.2307672090 ,0.1272211470 ,4.510598e-02 - ,0.56083373 ,0.355714012 ,0.2355401547 ,0.1320061535 ,4.697462e-02 - ,0.57188476 ,0.364046026 ,0.2403678821 ,0.1369508295 ,4.879706e-02 - ,0.58258251 ,0.372072572 ,0.2454048413 ,0.1418771565 ,5.057703e-02 - ,0.59206406 ,0.379279106 ,0.2507385738 ,0.1468283555 ,5.241157e-02 - ,0.60043277 ,0.385862014 ,0.2565083544 ,0.1518653755 ,5.442717e-02 - ,0.60824065 ,0.392470283 ,0.2627890921 ,0.1568425686 ,5.672558e-02 - ,0.61587328 ,0.399665003 ,0.2693240828 ,0.1615409971 ,5.932340e-02 - ,0.62350556 ,0.407442902 ,0.2757405639 ,0.1659131190 ,6.211202e-02 - ,0.63130400 ,0.415294312 ,0.2820549478 ,0.1701469869 ,6.493040e-02 - ,0.63950683 ,0.422769887 ,0.2885097791 ,0.1744470541 ,6.766549e-02 - ,0.64822576 ,0.429800809 ,0.2949604623 ,0.1788535306 ,7.024569e-02 - ,0.65718853 ,0.436524548 ,0.3009940240 ,0.1832922406 ,7.260984e-02 - ,0.66586026 ,0.443072284 ,0.3066417800 ,0.1877498418 ,7.477539e-02 - ,0.67385499 ,0.449570002 ,0.3124210302 ,0.1923869531 ,7.686614e-02 - ,0.68116860 ,0.456177204 ,0.3186629632 ,0.1973744564 ,7.901179e-02 - ,0.68817020 ,0.463016405 ,0.3252135392 ,0.2026604329 ,8.127321e-02 - ,0.69545456 ,0.470153870 ,0.3317877025 ,0.2080443155 ,8.365480e-02 - ,0.70339322 ,0.477628656 ,0.3382869934 ,0.2133829471 ,8.614100e-02 - ,0.71168363 ,0.485354548 ,0.3446879623 ,0.2185816498 ,8.874283e-02 - ,0.71966391 ,0.493111522 ,0.3508452160 ,0.2235543145 ,9.151748e-02 - ,0.72706877 ,0.500759585 ,0.3565974060 ,0.2283233375 ,9.452512e-02 - ,0.73411394 ,0.508436756 ,0.3620578719 ,0.2330030834 ,9.779298e-02 - ,0.74096543 ,0.516596717 ,0.3676326358 ,0.2376654675 ,1.013167e-01 - ,0.74754062 ,0.525572730 ,0.3736020261 ,0.2423531847 ,1.050525e-01 - ,0.75389357 ,0.534868176 ,0.3798150608 ,0.2471555672 ,1.089039e-01 - ,0.76044477 ,0.543419973 ,0.3859717103 ,0.2520806896 ,1.127538e-01 - ,0.76755285 ,0.550743471 ,0.3920256074 ,0.2568981812 ,1.165591e-01 - ,0.77509023 ,0.557269510 ,0.3981237467 ,0.2613608036 ,1.203644e-01 - ,0.78258820 ,0.563602135 ,0.4043785159 ,0.2656200877 ,1.241878e-01 - ,0.78956970 ,0.569952952 ,0.4108934218 ,0.2702271522 ,1.280316e-01 - ,0.79578357 ,0.576313169 ,0.4177625748 ,0.2756524098 ,1.320053e-01 - ,0.80125054 ,0.582816465 ,0.4248741636 ,0.2818778472 ,1.362441e-01 - ,0.80604156 ,0.589718190 ,0.4319593756 ,0.2884428281 ,1.407053e-01 - ,0.81021918 ,0.597012063 ,0.4388877066 ,0.2948281054 ,1.452044e-01 - ,0.81409451 ,0.604298241 ,0.4456550600 ,0.3008228462 ,1.496867e-01 - ,0.81820723 ,0.611181771 ,0.4521718937 ,0.3065488400 ,1.543527e-01 - ,0.82291940 ,0.617566520 ,0.4583418105 ,0.3121847541 ,1.593959e-01 - ,0.82816550 ,0.623606817 ,0.4642293076 ,0.3178061250 ,1.646657e-01 - ,0.83358635 ,0.629645757 ,0.4699982684 ,0.3234987538 ,1.698169e-01 - ,0.83888900 ,0.636078808 ,0.4758026388 ,0.3294433637 ,1.747666e-01 - ,0.84407903 ,0.643093954 ,0.4817498902 ,0.3359109428 ,1.797028e-01 - ,0.84924411 ,0.650551157 ,0.4878352970 ,0.3432352168 ,1.847478e-01 - ,0.85426493 ,0.658159827 ,0.4939669306 ,0.3514380763 ,1.899971e-01 - ,0.85905395 ,0.665825808 ,0.5002414181 ,0.3599104495 ,1.956613e-01 - ,0.86383966 ,0.673703674 ,0.5069596693 ,0.3680164470 ,2.017933e-01 - ,0.86878464 ,0.681723132 ,0.5141410970 ,0.3757733259 ,2.081422e-01 - ,0.87369919 ,0.689369371 ,0.5213869817 ,0.3836108960 ,2.144345e-01 - ,0.87848296 ,0.696307162 ,0.5284253727 ,0.3917481718 ,2.205671e-01 - ,0.88333342 ,0.703025539 ,0.5354226066 ,0.3997804393 ,2.267723e-01 - ,0.88852444 ,0.710392130 ,0.5425937450 ,0.4071416647 ,2.334748e-01 - ,0.89422273 ,0.718655581 ,0.5500700208 ,0.4139631116 ,2.405566e-01 - ,0.90022856 ,0.727393600 ,0.5580678239 ,0.4208102626 ,2.474752e-01 - ,0.90634711 ,0.736233079 ,0.5663716281 ,0.4277100246 ,2.541958e-01 - ,0.91250898 ,0.745181126 ,0.5745840700 ,0.4341555467 ,2.613699e-01 - ,0.91826744 ,0.754393531 ,0.5831569010 ,0.4404809807 ,2.695945e-01 - ,0.92342137 ,0.763619264 ,0.5926394880 ,0.4480172195 ,2.787520e-01 - ,0.92839321 ,0.772228663 ,0.6031756664 ,0.4576132755 ,2.886374e-01 - ,0.93344816 ,0.780039421 ,0.6154126922 ,0.4690720632 ,2.991463e-01 - ,0.93844307 ,0.787892760 ,0.6293505344 ,0.4817779858 ,3.100554e-01 - ,0.94305951 ,0.796994339 ,0.6430075903 ,0.4942618672 ,3.217248e-01 - ,0.94732447 ,0.807084776 ,0.6556595336 ,0.5058265444 ,3.343702e-01 - ,0.95204731 ,0.817744490 ,0.6691307167 ,0.5191457332 ,3.476188e-01 - ,0.95754260 ,0.828993245 ,0.6843916089 ,0.5349257669 ,3.612037e-01 - ,0.96262769 ,0.840489010 ,0.7010625113 ,0.5506951842 ,3.762375e-01 - ,0.96744559 ,0.851185162 ,0.7179714379 ,0.5654664242 ,3.939733e-01 - ,0.97198841 ,0.862041313 ,0.7355142033 ,0.5808629729 ,4.145818e-01 - ,0.97581215 ,0.873028712 ,0.7534527187 ,0.6022223081 ,4.363161e-01 - ,0.98074122 ,0.884033783 ,0.7720247523 ,0.6336556646 ,4.631840e-01 - ,0.98656778 ,0.899552193 ,0.7975923713 ,0.6670769076 ,4.902773e-01 - ,0.99129231 ,0.924710622 ,0.8318770069 ,0.7077418476 ,5.161952e-01 - ,0.99566295 ,0.958284024 ,0.8782468473 ,0.7720592633 ,5.667889e-01), - byrow=TRUE,ncol=5) - -if(is.null(PV)){ -if(!com.p.dist){ -if(p==3)rem=P3 -if(p==4)rem=P4 -if(p==5)rem=P5 -if(p==6)rem=P6 -} } - -est=NA -for(j in 1:p)est[j]=corfun(x[,j],y,...)$cor -id=which(est==max(est)) -R=order(est,decreasing=LARGEST) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -# If you use corfun=scor, set plotit=F -# -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(MC){ -library(parallel) -bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun,...) -} -if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun,...) -output=matrix(NA,nrow=pm1,ncol=8) -if(LARGEST)dimnames(output)=list(NULL,c('IV','Est.', 'Largest.Est', 'Dif','ci.low','ci.hi','p.value','adj.p.value')) -if(!LARGEST)dimnames(output)=list(NULL,c('IV','Est.', 'Smallest.Est', 'Dif','ci.low','ci.hi','p.value','adj.p.value')) -mat=matrix(NA,nrow=nboot,ncol=p) -ihi<-floor((1-alpha/2)*nboot+.5) -ilow<-floor((alpha/2)*nboot+.5) -for(i in 1:nboot)mat[i,]=bvec[[i]] -for(j in 2:p){ -k=j-1 -output[k,1]=R[j] -output[k,3]=est[R[1]] -output[k,2]=est[R[j]] -bsort<-sort(mat[,R[1]]-mat[,R[j]]) -output[k,4]=est[R[1]]-est[R[j]] -output[k,5]=bsort[ilow] -output[k,6]=bsort[ihi] -pv=mean(bsort<0)+.5*mean(bsort==0) -output[k,7]=2*min(c(pv,1-pv)) -flag=output[k,7]>=rem[,k] -ID=which(flag==TRUE) -ic=max(ID,1) -output[k,7]=L[ic] -} -Best='No Decision' -CH=R[1] -names(CH)='IV.w.Largest.Est' -if(!LARGEST)names(CH)='IV.w.Smallest.Est' -output[,8]=p.adjust(output[,7],method=FWE.method) -if(sum(output[,8]<=alpha)==pm1)Best='Decide' -list(CH,Conclusion=Best,results=output) -} - - - -corREGorder<-function(x,y,com.p.dist=FALSE, corfun=wincor,iter=1000,PV=NULL,pr=TRUE, -alpha=.05,nboot=500,SEED=TRUE,MC=FALSE,xout=FALSE,outfun=outpro,method='hoch',...){ -# -# Regresion: -# -# Have two or more independent variables, compare -# cor(y,x_I) to cor(y,x_k) for all k!=I, where -# cor(i,x_I) is the highest -# Winsorized correlation is used by default. -# Hochberg's method is used to control FWE. -# -# x is assumed to be a matrix or data frame -# -# -if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') -p=ncol(x) -pm1=p-1 -p1=p+1 -m1=cbind(x,y) -m1<-elimna(m1) # Eliminate rows with missing values -x=m1[,1:pm1] -y=m1[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -nval=nrow(x) - -if(pr){ -if(!com.p.dist){ -if(is.null(PV)){ -if(p>6 || nval>350)print('Might need to use com.p.dist=TRUE') -}}} - -if(is.null(rem))rem=corREGorder.crit(p,nval,iter=iter) -x<-m1[,1:p] -y=m1[,p1] - -L=c(seq(.001,.1,.001),seq(.11,.99,.01)) -if(!is.null(PV)){ -rem=matrix(NA,length(L),pm1) -for(k in 1:pm1){ -for(i in 1:length(L))rem[i,k]=hd(PV[,k],q=L[i]) -}} - -if(p>6)com.p.dist=TRUE -if(com.p.dist){ -if(is.null(PV)){ -if(pr)print('Computing the null distribution can take several minutes') -PV=corREGorder.crit(p,nval,iter=iter,MC=MC) -rem=matrix(NA,length(L),pm1) -for(k in 1:pm1){ -for(i in 1:length(L))rem[i,k]=hd(PV[,k],q=L[i]) -}}} -P3=matrix(c(0.002175049 ,0.01338189 - ,0.005872039 ,0.02090430 - ,0.010619227 ,0.02738247 - ,0.016286140 ,0.03219214 - ,0.022398371 ,0.03579440 - ,0.028262446 ,0.03877950 - ,0.033345906 ,0.04156497 - ,0.037456504 ,0.04436593 - ,0.040694218 ,0.04726051 - ,0.043307931 ,0.05025906 - ,0.045568092 ,0.05334328 - ,0.047696567 ,0.05648043 - ,0.049845675 ,0.05962933 - ,0.052103950 ,0.06274849 - ,0.054511258 ,0.06580645 - ,0.057074761 ,0.06878943 - ,0.059782746 ,0.07170227 - ,0.062615440 ,0.07456228 - ,0.065552496 ,0.07738946 - ,0.068577121 ,0.08019727 - ,0.071677317 ,0.08298752 - ,0.074845101 ,0.08575039 - ,0.078074759 ,0.08846894 - ,0.081360990 ,0.09112571 - ,0.084697501 ,0.09370929 - ,0.088076254 ,0.09621874 - ,0.091487319 ,0.09866517 - ,0.094919200 ,0.10107020 - ,0.098359413 ,0.10346231 - ,0.101795143 ,0.10587191 - ,0.105213802 ,0.10832639 - ,0.108603375 ,0.11084604 - ,0.111952481 ,0.11344138 - ,0.115250189 ,0.11611213 - ,0.118485658 ,0.11884783 - ,0.121647762 ,0.12162960 - ,0.124724850 ,0.12443290 - ,0.127704758 ,0.12723046 - ,0.130575140 ,0.12999536 - ,0.133324080 ,0.13270348 - ,0.135940914 ,0.13533548 - ,0.138417122 ,0.13787790 - ,0.140747141 ,0.14032355 - ,0.142928988 ,0.14267126 - ,0.144964592 ,0.14492507 - ,0.146859829 ,0.14709314 - ,0.148624253 ,0.14918647 - ,0.150270597 ,0.15121765 - ,0.151814117 ,0.15319972 - ,0.153271855 ,0.15514520 - ,0.154661905 ,0.15706537 - ,0.156002725 ,0.15896974 - ,0.157312540 ,0.16086577 - ,0.158608833 ,0.16275881 - ,0.159907941 ,0.16465206 - ,0.161224730 ,0.16654682 - ,0.162572340 ,0.16844268 - ,0.163961992 ,0.17033786 - ,0.165402841 ,0.17222951 - ,0.166901885 ,0.17411410 - ,0.168463921 ,0.17598769 - ,0.170091558 ,0.17784632 - ,0.171785292 ,0.17968623 - ,0.173543642 ,0.18150411 - ,0.175363353 ,0.18329733 - ,0.177239642 ,0.18506405 - ,0.179166497 ,0.18680333 - ,0.181137006 ,0.18851520 - ,0.183143686 ,0.19020068 - ,0.185178820 ,0.19186171 - ,0.187234759 ,0.19350113 - ,0.189304201 ,0.19512259 - ,0.191380404 ,0.19673036 - ,0.193457363 ,0.19832922 - ,0.195529910 ,0.19992426 - ,0.197593772 ,0.20152065 - ,0.199645563 ,0.20312347 - ,0.201682735 ,0.20473749 - ,0.203703491 ,0.20636692 - ,0.205706665 ,0.20801528 - ,0.207691581 ,0.20968521 - ,0.209657914 ,0.21137841 - ,0.211605535 ,0.21309554 - ,0.213534385 ,0.21483623 - ,0.215444346 ,0.21659918 - ,0.217335153 ,0.21838219 - ,0.219206331 ,0.22018238 - ,0.221057158 ,0.22199635 - ,0.222886667 ,0.22382040 - ,0.224693687 ,0.22565074 - ,0.226476899 ,0.22748367 - ,0.228234929 ,0.22931582 - ,0.229966452 ,0.23114424 - ,0.231670301 ,0.23296658 - ,0.233345582 ,0.23478109 - ,0.234991770 ,0.23658668 - ,0.236608793 ,0.23838287 - ,0.238197083 ,0.24016976 - ,0.239757602 ,0.24194791 - ,0.241291828 ,0.24371824 - ,0.255725281 ,0.26121652 - ,0.269643155 ,0.27818483 - ,0.282572599 ,0.29361598 - ,0.294230675 ,0.30801197 - ,0.305279532 ,0.32217337 - ,0.316231246 ,0.33525837 - ,0.327118515 ,0.34679924 - ,0.337725949 ,0.35781816 - ,0.348507973 ,0.36999019 - ,0.360275335 ,0.38380923 - ,0.372732081 ,0.39753304 - ,0.384865494 ,0.40899196 - ,0.396170468 ,0.41839542 - ,0.406525702 ,0.42826562 - ,0.416179917 ,0.44045085 - ,0.425830260 ,0.45430132 - ,0.435731315 ,0.46791777 - ,0.445574831 ,0.47988279 - ,0.455403556 ,0.48962935 - ,0.465473322 ,0.49746681 - ,0.475493015 ,0.50444076 - ,0.485104441 ,0.51166945 - ,0.494622601 ,0.51972051 - ,0.504479896 ,0.52847494 - ,0.514530980 ,0.53743526 - ,0.524379809 ,0.54615592 - ,0.533811067 ,0.55455128 - ,0.542964071 ,0.56289375 - ,0.552415670 ,0.57149093 - ,0.562702410 ,0.58046156 - ,0.573662536 ,0.58975098 - ,0.584499138 ,0.59931329 - ,0.594424353 ,0.60929957 - ,0.603091449 ,0.61973844 - ,0.610580104 ,0.63005679 - ,0.617291412 ,0.63940637 - ,0.623819392 ,0.64740125 - ,0.630625766 ,0.65425937 - ,0.637780329 ,0.66050475 - ,0.645128388 ,0.66675110 - ,0.652676811 ,0.67358269 - ,0.660706577 ,0.68141986 - ,0.669513580 ,0.69030010 - ,0.679074179 ,0.69978580 - ,0.688924859 ,0.70926800 - ,0.698359030 ,0.71838801 - ,0.706803246 ,0.72709029 - ,0.714138033 ,0.73536725 - ,0.720773425 ,0.74316348 - ,0.727376023 ,0.75049022 - ,0.734427437 ,0.75739163 - ,0.741910594 ,0.76379473 - ,0.749301181 ,0.76965778 - ,0.756038316 ,0.77526923 - ,0.762108143 ,0.78110525 - ,0.767991314 ,0.78741712 - ,0.774125688 ,0.79407849 - ,0.780642168 ,0.80072386 - ,0.787654735 ,0.80714732 - ,0.795560943 ,0.81355663 - ,0.804494568 ,0.82019692 - ,0.813689728 ,0.82693733 - ,0.822246533 ,0.83343489 - ,0.830074100 ,0.83948569 - ,0.837601422 ,0.84515904 - ,0.845257402 ,0.85070450 - ,0.853115769 ,0.85637797 - ,0.860852876 ,0.86248746 - ,0.868387995 ,0.86942987 - ,0.875848963 ,0.87706919 - ,0.882870692 ,0.88443942 - ,0.889442420 ,0.89118298 - ,0.896636569 ,0.89779842 - ,0.904626820 ,0.90423655 - ,0.911772218 ,0.91069863 - ,0.917549883 ,0.91754899 - ,0.923211670 ,0.92378977 - ,0.928746246 ,0.92901361 - ,0.934160521 ,0.93466485 - ,0.941177134 ,0.94142063 - ,0.949363922 ,0.94828957 - ,0.956847590 ,0.95420934 - ,0.963599180 ,0.95946403 - ,0.969055149 ,0.96451675 - ,0.973136108 ,0.97019334 - ,0.976868136 ,0.97565816 - ,0.981258840 ,0.98193948 - ,0.988264909 ,0.98843665 - ,0.994433844 ,0.99456404), - byrow=TRUE,ncol=2) - -P4=matrix(c( 0.01551012 ,0.04057726 ,0.04378631 - ,0.02786269 ,0.06546848 ,0.04944117 - ,0.03553717 ,0.08711494 ,0.05573588 - ,0.04023774 ,0.10496114 ,0.06252382 - ,0.04411887 ,0.12025416 ,0.06944572 - ,0.04807201 ,0.13340708 ,0.07602629 - ,0.05230897 ,0.14435931 ,0.08201922 - ,0.05685652 ,0.15314426 ,0.08742154 - ,0.06171756 ,0.16004283 ,0.09233439 - ,0.06689524 ,0.16549047 ,0.09686627 - ,0.07238520 ,0.16994001 ,0.10110915 - ,0.07816280 ,0.17377046 ,0.10514421 - ,0.08417136 ,0.17725430 ,0.10904320 - ,0.09031706 ,0.18056527 ,0.11285974 - ,0.09647542 ,0.18380356 ,0.11661935 - ,0.10250880 ,0.18702173 ,0.12031681 - ,0.10828903 ,0.19024325 ,0.12392259 - ,0.11371720 ,0.19347300 ,0.12739517 - ,0.11873498 ,0.19670257 ,0.13069417 - ,0.12332546 ,0.19991412 ,0.13379041 - ,0.12750572 ,0.20308477 ,0.13667102 - ,0.13131499 ,0.20619197 ,0.13933985 - ,0.13480237 ,0.20921901 ,0.14181443 - ,0.13801702 ,0.21215938 ,0.14412125 - ,0.14100194 ,0.21501886 ,0.14629078 - ,0.14379136 ,0.21781520 ,0.14835323 - ,0.14641091 ,0.22057551 ,0.15033563 - ,0.14887943 ,0.22333212 ,0.15226034 - ,0.15121153 ,0.22611802 ,0.15414467 - ,0.15342004 ,0.22896253 ,0.15600151 - ,0.15551793 ,0.23188795 ,0.15784045 - ,0.15751958 ,0.23490765 ,0.15966900 - ,0.15944129 ,0.23802542 ,0.16149379 - ,0.16130115 ,0.24123624 ,0.16332148 - ,0.16311848 ,0.24452789 ,0.16515915 - ,0.16491299 ,0.24788319 ,0.16701445 - ,0.16670375 ,0.25128243 ,0.16889527 - ,0.16850811 ,0.25470562 ,0.17080920 - ,0.17034067 ,0.25813429 ,0.17276281 - ,0.17221242 ,0.26155271 ,0.17476099 - ,0.17413006 ,0.26494845 ,0.17680637 - ,0.17609563 ,0.26831228 ,0.17889891 - ,0.17810649 ,0.27163767 ,0.18103585 - ,0.18015567 ,0.27491998 ,0.18321179 - ,0.18223251 ,0.27815551 ,0.18541909 - ,0.18432367 ,0.28134075 ,0.18764844 - ,0.18641426 ,0.28447176 ,0.18988943 - ,0.18848908 ,0.28754393 ,0.19213127 - ,0.19053376 ,0.29055199 ,0.19436336 - ,0.19253579 ,0.29349030 ,0.19657586 - ,0.19448531 ,0.29635330 ,0.19876015 - ,0.19637555 ,0.29913601 ,0.20090917 - ,0.19820304 ,0.30183457 ,0.20301766 - ,0.19996751 ,0.30444661 ,0.20508235 - ,0.20167155 ,0.30697148 ,0.20710203 - ,0.20332016 ,0.30941038 ,0.20907756 - ,0.20492013 ,0.31176625 ,0.21101180 - ,0.20647945 ,0.31404361 ,0.21290945 - ,0.20800667 ,0.31624825 ,0.21477685 - ,0.20951041 ,0.31838693 ,0.21662164 - ,0.21099886 ,0.32046697 ,0.21845237 - ,0.21247950 ,0.32249597 ,0.22027815 - ,0.21395877 ,0.32448147 ,0.22210809 - ,0.21544200 ,0.32643073 ,0.22395091 - ,0.21693331 ,0.32835049 ,0.22581448 - ,0.21843563 ,0.33024683 ,0.22770540 - ,0.21995078 ,0.33212513 ,0.22962873 - ,0.22147965 ,0.33398991 ,0.23158777 - ,0.22302229 ,0.33584491 ,0.23358389 - ,0.22457818 ,0.33769307 ,0.23561658 - ,0.22614641 ,0.33953655 ,0.23768352 - ,0.22772593 ,0.34137682 ,0.23978072 - ,0.22931573 ,0.34321472 ,0.24190279 - ,0.23091509 ,0.34505052 ,0.24404323 - ,0.23252371 ,0.34688402 ,0.24619468 - ,0.23414188 ,0.34871459 ,0.24834929 - ,0.23577053 ,0.35054128 ,0.25049895 - ,0.23741133 ,0.35236284 ,0.25263561 - ,0.23906664 ,0.35417779 ,0.25475146 - ,0.24073945 ,0.35598448 ,0.25683915 - ,0.24243332 ,0.35778113 ,0.25889193 - ,0.24415220 ,0.35956584 ,0.26090380 - ,0.24590024 ,0.36133667 ,0.26286960 - ,0.24768166 ,0.36309169 ,0.26478508 - ,0.24950046 ,0.36482898 ,0.26664698 - ,0.25136029 ,0.36654674 ,0.26845307 - ,0.25326420 ,0.36824330 ,0.27020217 - ,0.25521450 ,0.36991721 ,0.27189420 - ,0.25721259 ,0.37156729 ,0.27353012 - ,0.25925888 ,0.37319266 ,0.27511196 - ,0.26135269 ,0.37479283 ,0.27664276 - ,0.26349226 ,0.37636767 ,0.27812647 - ,0.26567472 ,0.37791746 ,0.27956791 - ,0.26789620 ,0.37944287 ,0.28097264 - ,0.27015188 ,0.38094492 ,0.28234681 - ,0.27243611 ,0.38242495 ,0.28369706 - ,0.27474260 ,0.38388453 ,0.28503033 - ,0.27706456 ,0.38532538 ,0.28635373 - ,0.27939486 ,0.38674928 ,0.28767441 - ,0.28172621 ,0.38815796 ,0.28899935 - ,0.30368556 ,0.40162893 ,0.30368088 - ,0.32094974 ,0.41407715 ,0.32178330 - ,0.33413706 ,0.42686681 ,0.33940421 - ,0.34519638 ,0.44106918 ,0.35258534 - ,0.35553655 ,0.45434941 ,0.36200235 - ,0.36622977 ,0.46608461 ,0.37040153 - ,0.37786945 ,0.47769452 ,0.37925941 - ,0.39037354 ,0.48941876 ,0.38854815 - ,0.40339828 ,0.50079642 ,0.39825958 - ,0.41579925 ,0.51188178 ,0.40920109 - ,0.42617477 ,0.52311541 ,0.42158463 - ,0.43455327 ,0.53487858 ,0.43453761 - ,0.44208791 ,0.54714403 ,0.44723309 - ,0.45001905 ,0.55923271 ,0.45932100 - ,0.45896481 ,0.57027401 ,0.47072048 - ,0.46860843 ,0.57999513 ,0.48143650 - ,0.47834450 ,0.58893700 ,0.49173131 - ,0.48781034 ,0.59778542 ,0.50198123 - ,0.49695736 ,0.60684063 ,0.51232433 - ,0.50614965 ,0.61608565 ,0.52269385 - ,0.51570653 ,0.62525226 ,0.53300494 - ,0.52544301 ,0.63382683 ,0.54336275 - ,0.53507224 ,0.64135887 ,0.55402012 - ,0.54466956 ,0.64795069 ,0.56477936 - ,0.55450045 ,0.65423352 ,0.57492627 - ,0.56470767 ,0.66082598 ,0.58412311 - ,0.57520589 ,0.66798477 ,0.59274171 - ,0.58563795 ,0.67557212 ,0.60127760 - ,0.59553109 ,0.68316811 ,0.60997760 - ,0.60465763 ,0.69037519 ,0.61886863 - ,0.61309294 ,0.69714665 ,0.62782311 - ,0.62096747 ,0.70381712 ,0.63662138 - ,0.62843358 ,0.71066866 ,0.64498294 - ,0.63586964 ,0.71749020 ,0.65265906 - ,0.64375353 ,0.72384456 ,0.65957668 - ,0.65212793 ,0.72967232 ,0.66583193 - ,0.66049812 ,0.73529566 ,0.67159225 - ,0.66842274 ,0.74091198 ,0.67713840 - ,0.67595398 ,0.74641608 ,0.68293746 - ,0.68342673 ,0.75170365 ,0.68940237 - ,0.69097598 ,0.75681883 ,0.69648797 - ,0.69842249 ,0.76180033 ,0.70370713 - ,0.70563560 ,0.76670054 ,0.71064719 - ,0.71281012 ,0.77173041 ,0.71735349 - ,0.72033581 ,0.77705000 ,0.72412267 - ,0.72848382 ,0.78241378 ,0.73111595 - ,0.73708729 ,0.78733284 ,0.73832883 - ,0.74551054 ,0.79160726 ,0.74567870 - ,0.75319837 ,0.79552364 ,0.75291314 - ,0.76015349 ,0.79957172 ,0.75966877 - ,0.76670769 ,0.80408855 ,0.76584461 - ,0.77313827 ,0.80924036 ,0.77174026 - ,0.77962403 ,0.81516519 ,0.77765648 - ,0.78632238 ,0.82177046 ,0.78360390 - ,0.79330110 ,0.82852505 ,0.78963534 - ,0.80032049 ,0.83486410 ,0.79614367 - ,0.80690242 ,0.84071720 ,0.80341723 - ,0.81285318 ,0.84643390 ,0.81116026 - ,0.81843087 ,0.85232962 ,0.81884205 - ,0.82389821 ,0.85834921 ,0.82628132 - ,0.82925515 ,0.86412609 ,0.83357420 - ,0.83452095 ,0.86947172 ,0.84068096 - ,0.84009069 ,0.87468947 ,0.84740433 - ,0.84641252 ,0.88012125 ,0.85369747 - ,0.85337749 ,0.88550274 ,0.85984420 - ,0.86056675 ,0.89031731 ,0.86629374 - ,0.86782542 ,0.89466471 ,0.87323085 - ,0.87514888 ,0.89925142 ,0.88037505 - ,0.88234515 ,0.90465670 ,0.88731009 - ,0.88906611 ,0.91058956 ,0.89382801 - ,0.89489509 ,0.91600949 ,0.89989887 - ,0.89964387 ,0.92037164 ,0.90556334 - ,0.90392518 ,0.92425694 ,0.91107573 - ,0.90874993 ,0.92837270 ,0.91667095 - ,0.91433102 ,0.93276449 ,0.92229467 - ,0.92009330 ,0.93716660 ,0.92780387 - ,0.92614330 ,0.94129808 ,0.93326004 - ,0.93274464 ,0.94523346 ,0.93882150 - ,0.93904729 ,0.94970628 ,0.94451555 - ,0.94498605 ,0.95507385 ,0.95042484 - ,0.95172002 ,0.96051456 ,0.95623595 - ,0.95904715 ,0.96546866 ,0.96109440 - ,0.96528014 ,0.96983221 ,0.96583206 - ,0.96981308 ,0.97446466 ,0.97124046 - ,0.97390026 ,0.97945170 ,0.97556419 - ,0.97783109 ,0.98385517 ,0.97915772 - ,0.98192499 ,0.98887894 ,0.98323212 - ,0.98701317 ,0.99271102 ,0.98660775 - ,0.99226134 ,0.99482149 ,0.99079872), - byrow=TRUE,ncol=3) - - P5=matrix(c( 0.05652412 ,0.1064700 ,0.05210169 ,0.005493583 - ,0.06574073 ,0.1193620 ,0.06365731 ,0.013235067 - ,0.07151676 ,0.1298226 ,0.07635689 ,0.024181097 - ,0.07511946 ,0.1389007 ,0.09074149 ,0.036214033 - ,0.07828081 ,0.1482306 ,0.10635395 ,0.047401883 - ,0.08180130 ,0.1583439 ,0.12166792 ,0.056974651 - ,0.08583995 ,0.1691003 ,0.13539360 ,0.065203466 - ,0.09025786 ,0.1802110 ,0.14713767 ,0.072752928 - ,0.09481793 ,0.1914175 ,0.15726850 ,0.080170849 - ,0.09930536 ,0.2024707 ,0.16646093 ,0.087701224 - ,0.10358876 ,0.2130961 ,0.17530679 ,0.095324733 - ,0.10762929 ,0.2230221 ,0.18413231 ,0.102886098 - ,0.11145588 ,0.2320476 ,0.19299485 ,0.110212699 - ,0.11512861 ,0.2400992 ,0.20177360 ,0.117183521 - ,0.11870678 ,0.2472403 ,0.21027870 ,0.123747085 - ,0.12222956 ,0.2536340 ,0.21833522 ,0.129906857 - ,0.12571005 ,0.2594800 ,0.22582830 ,0.135695457 - ,0.12913936 ,0.2649560 ,0.23271445 ,0.141152226 - ,0.13249584 ,0.2701803 ,0.23901157 ,0.146309897 - ,0.13575514 ,0.2752025 ,0.24477977 ,0.151189960 - ,0.13889807 ,0.2800167 ,0.25010143 ,0.155803801 - ,0.14191516 ,0.2845862 ,0.25506478 ,0.160156707 - ,0.14480785 ,0.2888690 ,0.25975221 ,0.164252701 - ,0.14758751 ,0.2928368 ,0.26423308 ,0.168098998 - ,0.15027304 ,0.2964849 ,0.26856022 ,0.171709323 - ,0.15288821 ,0.2998329 ,0.27276929 ,0.175105614 - ,0.15545896 ,0.3029190 ,0.27688008 ,0.178317905 - ,0.15801110 ,0.3057918 ,0.28089924 ,0.181382526 - ,0.16056820 ,0.3085024 ,0.28482365 ,0.184339066 - ,0.16315001 ,0.3110976 ,0.28864393 ,0.187226779 - ,0.16577118 ,0.3136161 ,0.29234767 ,0.190081080 - ,0.16844049 ,0.3160868 ,0.29592209 ,0.192930735 - ,0.17116069 ,0.3185293 ,0.29935598 ,0.195796047 - ,0.17392879 ,0.3209550 ,0.30264093 ,0.198688172 - ,0.17673683 ,0.3233687 ,0.30577201 ,0.201609481 - ,0.17957306 ,0.3257704 ,0.30874798 ,0.204554768 - ,0.18242312 ,0.3281569 ,0.31157121 ,0.207513022 - ,0.18527142 ,0.3305232 ,0.31424738 ,0.210469505 - ,0.18810221 ,0.3328636 ,0.31678506 ,0.213407838 - ,0.19090055 ,0.3351728 ,0.31919516 ,0.216311897 - ,0.19365295 ,0.3374465 ,0.32149031 ,0.219167333 - ,0.19634778 ,0.3396823 ,0.32368416 ,0.221962623 - ,0.19897548 ,0.3418799 ,0.32579080 ,0.224689627 - ,0.20152856 ,0.3440415 ,0.32782411 ,0.227343686 - ,0.20400154 ,0.3461714 ,0.32979739 ,0.229923353 - ,0.20639083 ,0.3482759 ,0.33172307 ,0.232429882 - ,0.20869457 ,0.3503624 ,0.33361256 ,0.234866606 - ,0.21091258 ,0.3524387 ,0.33547633 ,0.237238310 - ,0.21304622 ,0.3545126 ,0.33732401 ,0.239550684 - ,0.21509838 ,0.3565905 ,0.33916450 ,0.241809893 - ,0.21707346 ,0.3586772 ,0.34100612 ,0.244022277 - ,0.21897727 ,0.3607754 ,0.34285661 ,0.246194145 - ,0.22081697 ,0.3628855 ,0.34472307 ,0.248331635 - ,0.22260093 ,0.3650055 ,0.34661180 ,0.250440593 - ,0.22433851 ,0.3671314 ,0.34852808 ,0.252526433 - ,0.22603978 ,0.3692577 ,0.35047594 ,0.254593979 - ,0.22771524 ,0.3713776 ,0.35245788 ,0.256647268 - ,0.22937544 ,0.3734840 ,0.35447483 ,0.258689342 - ,0.23103064 ,0.3755696 ,0.35652605 ,0.260722057 - ,0.23269046 ,0.3776277 ,0.35860928 ,0.262745921 - ,0.23436362 ,0.3796525 ,0.36072093 ,0.264760011 - ,0.23605766 ,0.3816393 ,0.36285653 ,0.266761969 - ,0.23777881 ,0.3835848 ,0.36501105 ,0.268748094 - ,0.23953181 ,0.3854871 ,0.36717945 ,0.270713533 - ,0.24131994 ,0.3873455 ,0.36935709 ,0.272652546 - ,0.24314491 ,0.3891604 ,0.37154010 ,0.274558844 - ,0.24500695 ,0.3909334 ,0.37372567 ,0.276425957 - ,0.24690483 ,0.3926667 ,0.37591220 ,0.278247625 - ,0.24883595 ,0.3943630 ,0.37809926 ,0.280018169 - ,0.25079643 ,0.3960256 ,0.38028752 ,0.281732830 - ,0.25278126 ,0.3976578 ,0.38247846 ,0.283388055 - ,0.25478439 ,0.3992633 ,0.38467408 ,0.284981717 - ,0.25679899 ,0.4008454 ,0.38687646 ,0.286513250 - ,0.25881759 ,0.4024077 ,0.38908746 ,0.287983708 - ,0.26083237 ,0.4039536 ,0.39130828 ,0.289395740 - ,0.26283537 ,0.4054864 ,0.39353919 ,0.290753487 - ,0.26481879 ,0.4070095 ,0.39577932 ,0.292062415 - ,0.26677527 ,0.4085258 ,0.39802648 ,0.293329080 - ,0.26869809 ,0.4100386 ,0.40027722 ,0.294560863 - ,0.27058146 ,0.4115507 ,0.40252683 ,0.295765673 - ,0.27242063 ,0.4130649 ,0.40476958 ,0.296951643 - ,0.27421212 ,0.4145841 ,0.40699893 ,0.298126831 - ,0.27595371 ,0.4161107 ,0.40920786 ,0.299298952 - ,0.27764451 ,0.4176472 ,0.41138921 ,0.300475144 - ,0.27928490 ,0.4191957 ,0.41353603 ,0.301661783 - ,0.28087643 ,0.4207583 ,0.41564194 ,0.302864352 - ,0.28242170 ,0.4223369 ,0.41770144 ,0.304087372 - ,0.28392419 ,0.4239330 ,0.41971011 ,0.305334383 - ,0.28538803 ,0.4255480 ,0.42166488 ,0.306607981 - ,0.28681780 ,0.4271829 ,0.42356406 ,0.307909888 - ,0.28821831 ,0.4288386 ,0.42540740 ,0.309241059 - ,0.28959443 ,0.4305155 ,0.42719605 ,0.310601803 - ,0.29095083 ,0.4322137 ,0.42893242 ,0.311991913 - ,0.29229189 ,0.4339327 ,0.43061997 ,0.313410782 - ,0.29362151 ,0.4356718 ,0.43226306 ,0.314857513 - ,0.29494305 ,0.4374295 ,0.43386662 ,0.316331000 - ,0.29625927 ,0.4392040 ,0.43543592 ,0.317829987 - ,0.29757232 ,0.4409928 ,0.43697631 ,0.319353103 - ,0.29888374 ,0.4427930 ,0.43849299 ,0.320898866 - ,0.30019451 ,0.4446010 ,0.43999077 ,0.322465682 - ,0.31328362 ,0.4621878 ,0.45451950 ,0.338826479 - ,0.32658914 ,0.4768889 ,0.46817448 ,0.354761127 - ,0.34081183 ,0.4889837 ,0.48092037 ,0.369582715 - ,0.35529268 ,0.4997010 ,0.49462451 ,0.384419904 - ,0.36855379 ,0.5096083 ,0.51023274 ,0.398755180 - ,0.37994080 ,0.5184617 ,0.52647200 ,0.411411797 - ,0.39062100 ,0.5263287 ,0.54163677 ,0.423038765 - ,0.40257143 ,0.5344950 ,0.55461543 ,0.434627278 - ,0.41621275 ,0.5440195 ,0.56505570 ,0.446414188 - ,0.43046818 ,0.5547192 ,0.57379461 ,0.458794883 - ,0.44486669 ,0.5653656 ,0.58229160 ,0.471543195 - ,0.45923530 ,0.5750124 ,0.59101203 ,0.483555129 - ,0.47296930 ,0.5839699 ,0.59924570 ,0.494652841 - ,0.48590094 ,0.5928607 ,0.60697069 ,0.505540897 - ,0.49834493 ,0.6017113 ,0.61531580 ,0.516236508 - ,0.51026131 ,0.6105778 ,0.62505014 ,0.526149184 - ,0.52119211 ,0.6200780 ,0.63576320 ,0.535122517 - ,0.53082589 ,0.6304166 ,0.64643772 ,0.543660340 - ,0.53943126 ,0.6407196 ,0.65638365 ,0.552285332 - ,0.54774724 ,0.6500929 ,0.66564380 ,0.561135243 - ,0.55642024 ,0.6586570 ,0.67450792 ,0.570308867 - ,0.56550181 ,0.6670994 ,0.68302769 ,0.580053861 - ,0.57464883 ,0.6756273 ,0.69143503 ,0.590349969 - ,0.58368440 ,0.6838335 ,0.70025072 ,0.600590044 - ,0.59259791 ,0.6914073 ,0.70958149 ,0.610032910 - ,0.60135765 ,0.6984377 ,0.71882369 ,0.618579939 - ,0.60997532 ,0.7051144 ,0.72721163 ,0.626728815 - ,0.61843676 ,0.7115369 ,0.73447399 ,0.634788345 - ,0.62674513 ,0.7178305 ,0.74089008 ,0.642664491 - ,0.63517287 ,0.7242564 ,0.74693265 ,0.650337943 - ,0.64411712 ,0.7310644 ,0.75290477 ,0.658048604 - ,0.65355691 ,0.7382701 ,0.75884913 ,0.665979304 - ,0.66286128 ,0.7456401 ,0.76482766 ,0.674038165 - ,0.67142296 ,0.7528643 ,0.77103158 ,0.682044900 - ,0.67945227 ,0.7597670 ,0.77745500 ,0.689918781 - ,0.68784816 ,0.7663382 ,0.78372370 ,0.697608584 - ,0.69717228 ,0.7725694 ,0.78940484 ,0.705026668 - ,0.70693776 ,0.7783837 ,0.79438682 ,0.712091065 - ,0.71609195 ,0.7837138 ,0.79895071 ,0.718801927 - ,0.72398625 ,0.7886294 ,0.80353613 ,0.725335927 - ,0.73066318 ,0.7934308 ,0.80838726 ,0.731990069 - ,0.73650853 ,0.7984516 ,0.81339086 ,0.738959992 - ,0.74191863 ,0.8036594 ,0.81827024 ,0.746236537 - ,0.74721366 ,0.8086236 ,0.82286981 ,0.753653660 - ,0.75262380 ,0.8129806 ,0.82718266 ,0.760952429 - ,0.75824105 ,0.8167987 ,0.83122205 ,0.767851565 - ,0.76409670 ,0.8204615 ,0.83502651 ,0.774128504 - ,0.77030423 ,0.8243454 ,0.83871807 ,0.779748097 - ,0.77696430 ,0.8286333 ,0.84243305 ,0.784919804 - ,0.78392814 ,0.8333291 ,0.84622101 ,0.789918249 - ,0.79087142 ,0.8383467 ,0.85001054 ,0.794848310 - ,0.79764992 ,0.8435563 ,0.85367430 ,0.799645577 - ,0.80440374 ,0.8488329 ,0.85720937 ,0.804300457 - ,0.81125920 ,0.8540607 ,0.86077075 ,0.808937266 - ,0.81807396 ,0.8590979 ,0.86444685 ,0.813668806 - ,0.82455298 ,0.8638895 ,0.86820083 ,0.818594382 - ,0.83054464 ,0.8685076 ,0.87202674 ,0.823874450 - ,0.83619452 ,0.8729580 ,0.87586813 ,0.829667963 - ,0.84182788 ,0.8772068 ,0.87951882 ,0.836051379 - ,0.84769382 ,0.8813966 ,0.88292402 ,0.842860562 - ,0.85379684 ,0.8857317 ,0.88639513 ,0.849605729 - ,0.85982991 ,0.8902404 ,0.89031782 ,0.855810573 - ,0.86534864 ,0.8948524 ,0.89481091 ,0.861500169 - ,0.87020343 ,0.8994987 ,0.89970471 ,0.867139594 - ,0.87472363 ,0.9041210 ,0.90468146 ,0.872950260 - ,0.87942450 ,0.9088721 ,0.90940470 ,0.878638514 - ,0.88437025 ,0.9139336 ,0.91367910 ,0.883816792 - ,0.88904758 ,0.9189854 ,0.91755701 ,0.888324527 - ,0.89322701 ,0.9235781 ,0.92120549 ,0.892386541 - ,0.89745058 ,0.9278084 ,0.92477201 ,0.896563267 - ,0.90239053 ,0.9320707 ,0.92830567 ,0.901221708 - ,0.90800180 ,0.9364567 ,0.93180350 ,0.906121919 - ,0.91346775 ,0.9405897 ,0.93545855 ,0.911051876 - ,0.91813323 ,0.9441004 ,0.93949383 ,0.916500400 - ,0.92221702 ,0.9470722 ,0.94376981 ,0.922684763 - ,0.92622555 ,0.9500208 ,0.94792822 ,0.928942208 - ,0.93049876 ,0.9535137 ,0.95168820 ,0.934711316 - ,0.93559016 ,0.9573979 ,0.95514668 ,0.940041161 - ,0.94158878 ,0.9611006 ,0.95847721 ,0.945997043 - ,0.94715113 ,0.9646607 ,0.96144331 ,0.952681937 - ,0.95168452 ,0.9683622 ,0.96448503 ,0.957991575 - ,0.95643627 ,0.9719767 ,0.96793903 ,0.962249417 - ,0.96150476 ,0.9751701 ,0.97130901 ,0.967311290 - ,0.96619142 ,0.9782121 ,0.97505959 ,0.972704298 - ,0.97232678 ,0.9821594 ,0.97956025 ,0.977392196 - ,0.97800785 ,0.9866658 ,0.98474153 ,0.982039939 - ,0.98168187 ,0.9907911 ,0.98956488 ,0.987315734 - ,0.98661473 ,0.9937451 ,0.99413910 ,0.991894481 - ,0.99161624 ,0.9972316 ,0.99730242 ,0.996798896), - byrow=TRUE,ncol=4) - - P6=matrix(c(0.02845841 ,0.1323308 ,0.1003219 ,0.1384688 ,0.05477254 - ,0.03862415 ,0.1504455 ,0.1516471 ,0.1577896 ,0.05960211 - ,0.05220254 ,0.1708109 ,0.1948779 ,0.1744245 ,0.06519134 - ,0.06621145 ,0.1900433 ,0.2222013 ,0.1878881 ,0.07081226 - ,0.07867811 ,0.2067988 ,0.2375538 ,0.1993997 ,0.07609186 - ,0.08886734 ,0.2210344 ,0.2471487 ,0.2097566 ,0.08088784 - ,0.09687788 ,0.2332937 ,0.2552052 ,0.2193390 ,0.08524504 - ,0.10316830 ,0.2441869 ,0.2636724 ,0.2284077 ,0.08932865 - ,0.10824058 ,0.2541494 ,0.2731186 ,0.2372217 ,0.09334220 - ,0.11250832 ,0.2634030 ,0.2834439 ,0.2459903 ,0.09745755 - ,0.11627805 ,0.2720146 ,0.2942621 ,0.2547909 ,0.10177306 - ,0.11976844 ,0.2799750 ,0.3051103 ,0.2635490 ,0.10630380 - ,0.12313219 ,0.2872606 ,0.3155798 ,0.2720913 ,0.11099790 - ,0.12647380 ,0.2938675 ,0.3253870 ,0.2802295 ,0.11576708 - ,0.12986454 ,0.2998218 ,0.3343885 ,0.2878280 ,0.12051851 - ,0.13335383 ,0.3051771 ,0.3425552 ,0.2948321 ,0.12517872 - ,0.13697580 ,0.3100060 ,0.3499315 ,0.3012592 ,0.12970519 - ,0.14075114 ,0.3143922 ,0.3565967 ,0.3071707 ,0.13408620 - ,0.14468601 ,0.3184238 ,0.3626399 ,0.3126418 ,0.13833266 - ,0.14877041 ,0.3221871 ,0.3681465 ,0.3177403 ,0.14246686 - ,0.15297783 ,0.3257612 ,0.3731941 ,0.3225181 ,0.14651218 - ,0.15726699 ,0.3292136 ,0.3778517 ,0.3270116 ,0.15048655 - ,0.16158577 ,0.3325961 ,0.3821812 ,0.3312470 ,0.15440006 - ,0.16587656 ,0.3359439 ,0.3862375 ,0.3352459 ,0.15825605 - ,0.17008218 ,0.3392769 ,0.3900693 ,0.3390287 ,0.16205392 - ,0.17415147 ,0.3426022 ,0.3937200 ,0.3426162 ,0.16579194 - ,0.17804364 ,0.3459191 ,0.3972286 ,0.3460302 ,0.16946914 - ,0.18173091 ,0.3492229 ,0.4006301 ,0.3492928 ,0.17308558 - ,0.18519926 ,0.3525091 ,0.4039573 ,0.3524272 ,0.17664150 - ,0.18844742 ,0.3557754 ,0.4072408 ,0.3554574 ,0.18013586 - ,0.19148459 ,0.3590236 ,0.4105092 ,0.3584099 ,0.18356508 - ,0.19432745 ,0.3622586 ,0.4137885 ,0.3613127 ,0.18692243 - ,0.19699700 ,0.3654879 ,0.4171007 ,0.3641954 ,0.19019849 - ,0.19951573 ,0.3687203 ,0.4204629 ,0.3670873 ,0.19338240 - ,0.20190550 ,0.3719641 ,0.4238853 ,0.3700154 ,0.19646371 - ,0.20418614 ,0.3752258 ,0.4273710 ,0.3730020 ,0.19943431 - ,0.20637484 ,0.3785096 ,0.4309154 ,0.3760622 ,0.20229001 - ,0.20848607 ,0.3818167 ,0.4345067 ,0.3792026 ,0.20503150 - ,0.21053195 ,0.3851457 ,0.4381272 ,0.3824203 ,0.20766451 - ,0.21252275 ,0.3884927 ,0.4417550 ,0.3857039 ,0.21019930 - ,0.21446746 ,0.3918524 ,0.4453661 ,0.3890342 ,0.21264952 - ,0.21637417 ,0.3952189 ,0.4489369 ,0.3923868 ,0.21503090 - ,0.21825036 ,0.3985867 ,0.4524457 ,0.3957345 ,0.21735972 - ,0.22010296 ,0.4019509 ,0.4558750 ,0.3990498 ,0.21965153 - ,0.22193833 ,0.4053081 ,0.4592123 ,0.4023072 ,0.22192010 - ,0.22376206 ,0.4086562 ,0.4624507 ,0.4054849 ,0.22417673 - ,0.22557886 ,0.4119939 ,0.4655884 ,0.4085662 ,0.22642994 - ,0.22739239 ,0.4153207 ,0.4686284 ,0.4115399 ,0.22868536 - ,0.22920525 ,0.4186356 ,0.4715768 ,0.4144002 ,0.23094602 - ,0.23101900 ,0.4219366 ,0.4744418 ,0.4171462 ,0.23321268 - ,0.23283434 ,0.4252203 ,0.4772317 ,0.4197810 ,0.23548425 - ,0.23465133 ,0.4284812 ,0.4799538 ,0.4223106 ,0.23775824 - ,0.23646965 ,0.4317114 ,0.4826136 ,0.4247430 ,0.24003124 - ,0.23828893 ,0.4349012 ,0.4852138 ,0.4270871 ,0.24229916 - ,0.24010890 ,0.4380394 ,0.4877545 ,0.4293518 ,0.24455754 - ,0.24192967 ,0.4411139 ,0.4902331 ,0.4315456 ,0.24680175 - ,0.24375172 ,0.4441127 ,0.4926456 ,0.4336759 ,0.24902706 - ,0.24557590 ,0.4470248 ,0.4949865 ,0.4357493 ,0.25122873 - ,0.24740333 ,0.4498409 ,0.4972506 ,0.4377710 ,0.25340210 - ,0.24923515 ,0.4525542 ,0.4994334 ,0.4397457 ,0.25554261 - ,0.25107232 ,0.4551608 ,0.5015319 ,0.4416770 ,0.25764589 - ,0.25291533 ,0.4576600 ,0.5035450 ,0.4435684 ,0.25970787 - ,0.25476396 ,0.4600538 ,0.5054743 ,0.4454232 ,0.26172490 - ,0.25661711 ,0.4623474 ,0.5073235 ,0.4472447 ,0.26369385 - ,0.25847267 ,0.4645479 ,0.5090988 ,0.4490365 ,0.26561225 - ,0.26032752 ,0.4666643 ,0.5108083 ,0.4508025 ,0.26747840 - ,0.26217762 ,0.4687065 ,0.5124617 ,0.4525472 ,0.26929144 - ,0.26401815 ,0.4706847 ,0.5140696 ,0.4542750 ,0.27105140 - ,0.26584379 ,0.4726089 ,0.5156436 ,0.4559907 ,0.27275919 - ,0.26764897 ,0.4744884 ,0.5171951 ,0.4576988 ,0.27441663 - ,0.26942825 ,0.4763314 ,0.5187352 ,0.4594038 ,0.27602636 - ,0.27117659 ,0.4781447 ,0.5202743 ,0.4611098 ,0.27759181 - ,0.27288969 ,0.4799335 ,0.5218217 ,0.4628199 ,0.27911708 - ,0.27456420 ,0.4817019 ,0.5233854 ,0.4645368 ,0.28060691 - ,0.27619794 ,0.4834523 ,0.5249719 ,0.4662620 ,0.28206660 - ,0.27779004 ,0.4851863 ,0.5265858 ,0.4679962 ,0.28350186 - ,0.27934099 ,0.4869043 ,0.5282303 ,0.4697390 ,0.28491882 - ,0.28085262 ,0.4886062 ,0.5299067 ,0.4714888 ,0.28632382 - ,0.28232802 ,0.4902914 ,0.5316147 ,0.4732435 ,0.28772339 - ,0.28377144 ,0.4919591 ,0.5333528 ,0.4749997 ,0.28912404 - ,0.28518812 ,0.4936083 ,0.5351181 ,0.4767537 ,0.29053216 - ,0.28658401 ,0.4952382 ,0.5369065 ,0.4785012 ,0.29195381 - ,0.28796561 ,0.4968483 ,0.5387135 ,0.4802377 ,0.29339456 - ,0.28933969 ,0.4984381 ,0.5405341 ,0.4819586 ,0.29485931 - ,0.29071301 ,0.5000073 ,0.5423631 ,0.4836596 ,0.29635210 - ,0.29209211 ,0.5015561 ,0.5441954 ,0.4853366 ,0.29787602 - ,0.29348304 ,0.5030846 ,0.5460263 ,0.4869864 ,0.29943305 - ,0.29489121 ,0.5045932 ,0.5478516 ,0.4886062 ,0.30102400 - ,0.29632114 ,0.5060823 ,0.5496679 ,0.4901943 ,0.30264856 - ,0.29777639 ,0.5075524 ,0.5514723 ,0.4917498 ,0.30430529 - ,0.29925947 ,0.5090041 ,0.5532629 ,0.4932728 ,0.30599175 - ,0.30077178 ,0.5104381 ,0.5550384 ,0.4947644 ,0.30770464 - ,0.30231363 ,0.5118548 ,0.5567981 ,0.4962267 ,0.30943996 - ,0.30388432 ,0.5132549 ,0.5585419 ,0.4976622 ,0.31119330 - ,0.30548226 ,0.5146391 ,0.5602700 ,0.4990746 ,0.31295994 - ,0.30710505 ,0.5160078 ,0.5619830 ,0.5004678 ,0.31473518 - ,0.30874972 ,0.5173619 ,0.5636814 ,0.5018461 ,0.31651448 - ,0.31041287 ,0.5187020 ,0.5653660 ,0.5032140 ,0.31829362 - ,0.31209087 ,0.5200289 ,0.5670370 ,0.5045763 ,0.32006892 - ,0.31378008 ,0.5213431 ,0.5686948 ,0.5059374 ,0.32183726 - ,0.33073053 ,0.5339107 ,0.5844004 ,0.5201736 ,0.33893240 - ,0.34766587 ,0.5455699 ,0.5977995 ,0.5358098 ,0.35618985 - ,0.36497276 ,0.5565563 ,0.6088557 ,0.5512613 ,0.37599206 - ,0.38214131 ,0.5670711 ,0.6182221 ,0.5653348 ,0.39630223 - ,0.39892707 ,0.5774423 ,0.6269864 ,0.5781859 ,0.41294824 - ,0.41477035 ,0.5887255 ,0.6365914 ,0.5904167 ,0.42604441 - ,0.42985193 ,0.6012620 ,0.6472327 ,0.6023352 ,0.43772949 - ,0.44459125 ,0.6137931 ,0.6578100 ,0.6136908 ,0.44944166 - ,0.45830629 ,0.6247817 ,0.6680975 ,0.6238971 ,0.46148163 - ,0.47040113 ,0.6338022 ,0.6784202 ,0.6325018 ,0.47293114 - ,0.48149178 ,0.6415214 ,0.6879435 ,0.6396385 ,0.48317936 - ,0.49230962 ,0.6486243 ,0.6955894 ,0.6462754 ,0.49290670 - ,0.50293366 ,0.6556637 ,0.7014553 ,0.6530889 ,0.50315292 - ,0.51329043 ,0.6629905 ,0.7067582 ,0.6598945 ,0.51404269 - ,0.52322279 ,0.6704942 ,0.7127512 ,0.6664369 ,0.52474488 - ,0.53250776 ,0.6779803 ,0.7196826 ,0.6730249 ,0.53501309 - ,0.54136758 ,0.6852235 ,0.7269392 ,0.6804293 ,0.54550010 - ,0.55057692 ,0.6920138 ,0.7339733 ,0.6891016 ,0.55652673 - ,0.56083373 ,0.6985598 ,0.7407423 ,0.6983890 ,0.56770747 - ,0.57188476 ,0.7051878 ,0.7473342 ,0.7069275 ,0.57851707 - ,0.58258251 ,0.7118141 ,0.7535933 ,0.7139242 ,0.58875425 - ,0.59206406 ,0.7181377 ,0.7592423 ,0.7197399 ,0.59840912 - ,0.60043277 ,0.7241198 ,0.7641102 ,0.7253388 ,0.60737553 - ,0.60824065 ,0.7302008 ,0.7682440 ,0.7313809 ,0.61559579 - ,0.61587328 ,0.7369912 ,0.7720497 ,0.7377497 ,0.62335187 - ,0.62350556 ,0.7444895 ,0.7761923 ,0.7438837 ,0.63120528 - ,0.63130400 ,0.7518477 ,0.7811232 ,0.7494397 ,0.63946844 - ,0.63950683 ,0.7582114 ,0.7868131 ,0.7545253 ,0.64791259 - ,0.64822576 ,0.7634978 ,0.7929559 ,0.7594398 ,0.65618729 - ,0.65718853 ,0.7682302 ,0.7991861 ,0.7643607 ,0.66415285 - ,0.66586026 ,0.7729213 ,0.8051205 ,0.7692685 ,0.67170795 - ,0.67385499 ,0.7777031 ,0.8105010 ,0.7741140 ,0.67868318 - ,0.68116860 ,0.7824831 ,0.8154188 ,0.7789630 ,0.68495291 - ,0.68817020 ,0.7872791 ,0.8202914 ,0.7838794 ,0.69056808 - ,0.69545456 ,0.7921821 ,0.8255185 ,0.7888123 ,0.69590049 - ,0.70339322 ,0.7971795 ,0.8311156 ,0.7937833 ,0.70157680 - ,0.71168363 ,0.8021996 ,0.8366622 ,0.7989904 ,0.70809578 - ,0.71966391 ,0.8071535 ,0.8415997 ,0.8045273 ,0.71550505 - ,0.72706877 ,0.8119058 ,0.8456326 ,0.8101695 ,0.72339718 - ,0.73411394 ,0.8163800 ,0.8489277 ,0.8155690 ,0.73113224 - ,0.74096543 ,0.8207198 ,0.8519149 ,0.8205280 ,0.73821297 - ,0.74754062 ,0.8252428 ,0.8548854 ,0.8251003 ,0.74459805 - ,0.75389357 ,0.8301610 ,0.8578935 ,0.8295502 ,0.75057740 - ,0.76044477 ,0.8353000 ,0.8609909 ,0.8341226 ,0.75637074 - ,0.76755285 ,0.8401622 ,0.8643055 ,0.8388245 ,0.76200276 - ,0.77509023 ,0.8444077 ,0.8678815 ,0.8434774 ,0.76755578 - ,0.78258820 ,0.8482099 ,0.8716303 ,0.8478628 ,0.77338458 - ,0.78956970 ,0.8520035 ,0.8754454 ,0.8518595 ,0.77990334 - ,0.79578357 ,0.8559626 ,0.8792329 ,0.8555642 ,0.78708237 - ,0.80125054 ,0.8599094 ,0.8828409 ,0.8592369 ,0.79436547 - ,0.80604156 ,0.8636647 ,0.8861239 ,0.8631591 ,0.80125859 - ,0.81021918 ,0.8672377 ,0.8890983 ,0.8674684 ,0.80774304 - ,0.81409451 ,0.8707223 ,0.8919124 ,0.8719953 ,0.81404504 - ,0.81820723 ,0.8742131 ,0.8946943 ,0.8764030 ,0.82035059 - ,0.82291940 ,0.8778013 ,0.8975358 ,0.8804936 ,0.82673907 - ,0.82816550 ,0.8815756 ,0.9005363 ,0.8842174 ,0.83309877 - ,0.83358635 ,0.8855560 ,0.9037186 ,0.8875872 ,0.83911146 - ,0.83888900 ,0.8896385 ,0.9070040 ,0.8907332 ,0.84454564 - ,0.84407903 ,0.8937433 ,0.9103602 ,0.8937858 ,0.84960856 - ,0.84924411 ,0.8979929 ,0.9137611 ,0.8967297 ,0.85480217 - ,0.85426493 ,0.9025422 ,0.9170020 ,0.8996128 ,0.86038748 - ,0.85905395 ,0.9072085 ,0.9198988 ,0.9027462 ,0.86618304 - ,0.86383966 ,0.9115513 ,0.9226146 ,0.9064442 ,0.87189147 - ,0.86878464 ,0.9153553 ,0.9254626 ,0.9106255 ,0.87743613 - ,0.87369919 ,0.9187912 ,0.9285133 ,0.9149145 ,0.88292991 - ,0.87848296 ,0.9221554 ,0.9315980 ,0.9190752 ,0.88839609 - ,0.88333342 ,0.9255519 ,0.9345754 ,0.9230635 ,0.89375641 - ,0.88852444 ,0.9289291 ,0.9374606 ,0.9268703 ,0.89911378 - ,0.89422273 ,0.9325025 ,0.9403301 ,0.9305157 ,0.90464514 - ,0.90022856 ,0.9367256 ,0.9431860 ,0.9339928 ,0.91036426 - ,0.90634711 ,0.9416530 ,0.9460100 ,0.9373097 ,0.91620446 - ,0.91250898 ,0.9465501 ,0.9489720 ,0.9406272 ,0.92204347 - ,0.91826744 ,0.9504197 ,0.9522643 ,0.9440552 ,0.92778824 - ,0.92342137 ,0.9530621 ,0.9556061 ,0.9473789 ,0.93323589 - ,0.92839321 ,0.9551899 ,0.9585219 ,0.9505219 ,0.93796702 - ,0.93344816 ,0.9574912 ,0.9611978 ,0.9538301 ,0.94208767 - ,0.93844307 ,0.9600012 ,0.9642909 ,0.9574326 ,0.94613939 - ,0.94305951 ,0.9625902 ,0.9679124 ,0.9608894 ,0.95006010 - ,0.94732447 ,0.9652668 ,0.9714955 ,0.9636651 ,0.95357045 - ,0.95204731 ,0.9681020 ,0.9746738 ,0.9659957 ,0.95718333 - ,0.95754260 ,0.9712236 ,0.9775883 ,0.9688384 ,0.96124457 - ,0.96262769 ,0.9744829 ,0.9802432 ,0.9724819 ,0.96510323 - ,0.96744559 ,0.9773397 ,0.9826635 ,0.9763755 ,0.96890250 - ,0.97198841 ,0.9802640 ,0.9855485 ,0.9794530 ,0.97335289 - ,0.97581215 ,0.9841504 ,0.9886425 ,0.9820909 ,0.97807428 - ,0.98074122 ,0.9873238 ,0.9909320 ,0.9853537 ,0.98318562 - ,0.98656778 ,0.9900431 ,0.9927363 ,0.9882823 ,0.98878283 - ,0.99129231 ,0.9932818 ,0.9951413 ,0.9923500 ,0.99385907 - ,0.99566295 ,0.9970155 ,0.9968195 ,0.9963426 ,0.99728643), - byrow=TRUE,ncol=5) - - if(is.null(PV)){ - if(!com.p.dist){ -if(p==3)rem=P3 -if(p==4)rem=P4 -if(p==5)rem=P5 -if(p==6)rem=P6 -}} -est=NA -for(j in 1:p)est[j]=corfun(x[,j],y)$cor -#id=which(est==max(est)) -R=order(est,decreasing=TRUE) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -# If you use corfun=scor, set plotit=F -# -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(MC){ -library(parallel) -bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun,...) -} -if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun,...) -output=matrix(NA,nrow=pm1,ncol=9) -dimnames(output)=list(NULL,c('IV.1','IV.2','Est.1', -'Est.2','Dif','ci.low','ci.hi','p.value','adj.p.value')) -mat=matrix(NA,nrow=nboot,ncol=p) -ihi<-floor((1-alpha/2)*nboot+.5) -ilow<-floor((alpha/2)*nboot+.5) -for(i in 1:nboot)mat[i,]=bvec[[i]] -for(j in 2:p){ -k=j-1 -output[k,1]=R[k] -output[k,2]=R[j] -output[k,3]=est[R[k]] -output[k,4]=est[R[j]] -bsort<-sort(mat[,R[k]]-mat[,R[j]]) -output[k,5]=est[R[k]]-est[R[j]] -output[k,6]=bsort[ilow] -output[k,7]=bsort[ihi] -pv=mean(bsort<0)+.5*mean(bsort==0) -output[k,8]=2*min(c(pv,1-pv)) -flag=output[k,8]>=rem[,k] -ID=which(flag==TRUE) -ic=max(ID,1) -output[k,8]=L[ic] -} -output[,9]=p.adjust(output[,8],method=method) -list(results=output) -} - - - - -corREGorder.crit<-function(p,n,corfun=wincor,iter=1000,nboot=1000,SEED=TRUE,MC=FALSE,pr=TRUE,...){ -# -# Estimate null distribution of the p-values for corREGorde -# -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -if(pr)print('Execution time might take several minutes') -pm1=p-1 -rem=matrix(NA,iter,pm1) -p1=p+1 -for(I in 1:iter){ -x=rmul(n,p) -y=rnorm(n) -est=NA -for(j in 1:p)est[j]=corfun(x[,j],y)$cor -R=order(est,decreasing=TRUE) -# -# If you use corfun=scor, set plotit=F -# -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(MC){ -library(parallel) -bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun,...) -} -if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun,...) -output=matrix(NA,nrow=pm1,ncol=8) -dimnames(output)=list(NULL,c('IV','Larges.Est','Est.2','Dif','ci.low','ci.hi','p.value','adj.p.value')) -mat=matrix(NA,nrow=nboot,ncol=p) -ihi<-floor((1-alpha/2)*nboot+.5) -ilow<-floor((alpha/2)*nboot+.5) -for(i in 1:nboot)mat[i,]=bvec[[i]] -for(j in 2:p){ -k=j-1 -bsort<-sort(mat[,R[k]]-mat[,R[j]]) -pv=mean(bsort<0)+.5*mean(bsort==0) -rem[I,k]=2*min(c(pv,1-pv)) -} -} -rem -} - -corCOM.DVvsIV.crit<-function(p,n,corfun=wincor,iter=1000,nboot=500,SEED=TRUE,MC=FALSE,...){ -# -# Null p-value distribution for corCOM.DVvsIV -# -# p: number of independent variables -# n: sample size -# -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -pm1=p-1 -rem=matrix(NA,iter,pm1) -p1=p+1 -for(I in 1:iter){ -x=rmul(n,p) -y=rnorm(n) -est=NA -for(j in 1:p)est[j]=corfun(x[,j],y)$cor -id=which(est==max(est)) -R=order(est,decreasing=TRUE) -# -# If you use corfun=scor, set plotit=F -# -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(MC){ -library(parallel) -bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun,...) -} -if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun,...) -mat=matrix(NA,nrow=nboot,ncol=p) -ihi<-floor((1-alpha/2)*nboot+.5) -ilow<-floor((alpha/2)*nboot+.5) -for(i in 1:nboot)mat[i,]=bvec[[i]] -for(j in 2:p){ -k=j-1 -bsort<-sort(mat[,R[1]]-mat[,R[j]]) -pv=mean(bsort<0)+.5*mean(bsort==0) -rem[I,k]=2*min(c(pv,1-pv)) -} -} -rem -} - -rexgauss<-function(n,mu=0,sigma=1,rate=1){ -# -# Generate data from an Ex-Gaussian distribution -# -x=rnorm(n,mean=mu,sd=sigma) -y=rexp(n,rate=rate) -z=x+y -z -} - - - -t1way.EXES.ci<-function(x,alpha=.05,tr=0,nboot=500,SEED=TRUE,ITER=5,adj=TRUE,...){ -# -# Confidence interval for explanatory measure of effect size -# -# ITER: yuenv2, for unequal sample sizes. iterates to get estimate -# -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x=listm(x) -J=length(x) -n=lapply(x,length) -if(SEED)set.seed(2) -chk=t1wayv2(x,tr=tr,SEED=FALSE) -v=list() -val=NA -x=elimna(x) -for(i in 1:nboot){ -for(j in 1:J)v[[j]]=sample(x[[j]],replace=TRUE) -val[i]=t1wayv2(v,tr=tr,nboot=ITER,SEED=FALSE)$Effect.Size -} -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -val=sort(val) -ci=val[ilow] -ci[2]=val[ihi] -if(chk$p.value>alpha)ci[1]=0 -if(adj){ -fix=c(1, 1.268757, 1.467181, 1.628221, 1.763191, 1.856621, 1.993326) -if(J>8)print('No adjustment available when J>8') -J1=J-1 -if(j<=8){ -chk$Effect.Size=fix[J1]*chk$Effect.Size -ci=fix[J1]*ci -} -} -list(Effect.Size=chk$Effect.Size,ci=ci) -} - - -KMSmcp.ci<-function(x,tr=.2,alpha=0.05,SEED=TRUE,nboot=500,CI=TRUE,method='hoch'){ -# -# Estimate KMS effect size when comparing all -# pairs of groups in a one-way (independent) groups design -# -# CI=TRUE: confidence intervals for the measure of effect size are computed. -# -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J=length(x) -Jall=(J^2-J)/2 -con1=con1way(J) -output=matrix(NA,nrow=Jall,ncol=7) -dimnames(output)=list(NULL,c('Group','Group','Effect.Size','low.ci','up.ci','p.value','p.adjust')) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j0))Best=output[id,2] -if(flag==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) -put -} - -deplin.ES.summary.CI<-function(x,con=NULL,tr=.2,REL.MAG=NULL,SEED=TRUE,nboot=1000){ -# -# For J dependent variables, -# compute four measures of effect size based on a linear contrast of the J variables specified by the argument -# con -# -# Generalizes dep.ES.summary.CI -# Example: -# If x is a matrix with two columns and con=c(1,-1), get the same results as dep.ES.summary.CI -# -# By default, do all pairwise comparisons -# -# Measures of effect size: -# -# AKP: robust standardized difference similar to Cohen's d -# QS: Quantile shift based on the median of the distribution of difference scores, -# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y -# SIGN: P(X1){ -temp<-c(temp,x[[flag]]) -}} -data[[k]]<-temp -} -POOLED<-rmmcp(data,con=con,tr=tr,alpha=alpha,dif=dif,hoch=hoch) -PSI=NULL -A=NULL -} -if(!pool){ -A=list() -PSI=list() -mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) -for(j in 1:J){ -data<-list() -ic<-0 -for(k in 1:K){ -ic<-ic+1 -data[[ic]]<-x[[mat[j,k]]] -} -temp=rmmcp(data,con=con,tr=tr,alpha=alpha,dif=dif,hoch=hoch) -A[[j]]=temp$test -PSI[[j]]=temp$psihat -POOLED=NULL -}} -list(TESTS.4.EACH.LEVEL.OF.A=A,PSIHAT.4.EACH.LEVEL.OF.A=PSI,POOLED.RESULTS=POOLED) -} - - -in.interval<-function(x,low,up){ -# -# flag values in the vector x between low and up -# -x=elimna(x) -n=length(x) -id=rep(FALSE,n) -flag1=x<=up -flag2=x>=low -flag=flag1*flag2 -id[flag]=TRUE -id -} - - -wwlin.es<-function(J,K,x,tr = 0.2, REL.MAG = NULL, SEED = TRUE, nboot = 1000){ -# -# # For within-by-within -# -# Effect sizes based on linear sum of the random variables. -# Simplest case, compute effect sizes on x-y, difference scores -# -con=con2way(J,K) -A=deplin.ES.summary.CI(x,con=con$conA,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -B=deplin.ES.summary.CI(x,con=con$conB,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -AB=deplin.ES.summary.CI(x,con=con$conAB,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -list(Factor.A=A,Factor.B=B,Interactions=AB) -} - -wwwlin.es<-function(J,K,L,x,tr = 0.2, REL.MAG = NULL, SEED = TRUE, nboot = 1000){ -# -# For within-by-within-by-within -# -# Effect sizes based on linear sum of the random variables. -# Simplest case, compute effect sizes based on x-y, difference scores -# -con=con3way(J,K,L) -A=deplin.ES.summary.CI(x,con=con$conA,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -B=deplin.ES.summary.CI(x,con=con$conB,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -C=deplin.ES.summary.CI(x,con=con$conC,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -AB=deplin.ES.summary.CI(x,con=con$conAB,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -AC=deplin.ES.summary.CI(x,con=con$conAC,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -BC=deplin.ES.summary.CI(x,con=con$conBC,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -ABC=deplin.ES.summary.CI(x,con=con$conABC,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) -list(Factor.A=A,Factor.B=B,Factor.C=C,Inter.AB=AB,Inte.AC=AC,Inter.BC=BC,Inter.ABC=ABC) -} - -bwwA.es<-function(J,K,L,x,fun=QSci,nboot=1000,...){ -# -# For every two levels of Factor A, compute effect size -# and do this for each -# level of Factors B and C. -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -JKL=J*K*L -KL=K*L -id=matrix(c(1:JKL),ncol=KL,byrow=TRUE) -con=con.all.pairs(J) -A=list() -N=(J^2-J)/2 -for(i in 1:N){ -w=which(con[,i]!=0) -isel=id[w,] -A[[i]]=bwwA.es.sub(2,K,L,x[isel],fun=fun,nboot=nboot,...) -} -list(A=A,con=con) -} - - - -bwwA.es.sub<-function(J,K,L,x,fun=QSci,nboot=1000,...){ -# -# Effect sizes for the between factor, computed -# for each level of the within factors -# -if(J!=2)stop('Must have J=2') -JKL=J*K*L -KL=K*L -ic=0 -LOW=1 -UP=1+KL -id=matrix(c(1:JKL),ncol=KL,byrow=TRUE) -ES=matrix(NA,nrow=KL,ncol=7) -for(k in 1:K){ -for(l in 1:L){ -ic=ic+1 -ES[ic,1]=k -ES[ic,2]=l -isel=id[,ic] -d=IND.PAIR.ES(x[id[,ic]],fun=fun,...)$effect.size -temp=c(d[[1]]$n1,d[[1]]$n2,d[[1]]$Q.Effect,d[[1]][4]$ci) -ES[ic,3:7]=temp -}} -dimnames(ES)=list(NULL,c('B.Level','C.Level','n1','n2','Efect.Size','ci.low','ci.up')) -ES -} - -anc.ES.sum<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,pts=NA,SEED=TRUE,nboot=1000, -pr=TRUE,xout=FALSE,outfun=out, nmin=12,NULL.V = c(0, 0, 0.5, 0.5, 0.5, 0), REL.M = NULL, n.est = 1e+06,...){ -# -# -# For each point where the regression lines are compared, -# compute several measures of effect size via the R function ESsummary.CI -# -# Results for the ith point are returned in ES.4.Each.pt[[i]] -# -# -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -A=list() -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -FLAG=TRUE -if(is.na(pts[1])){ -FLAG=FALSE -npt<-5 -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=nmin]) -isub[5]<-max(sub[vecn>=nmin]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -pts=NA -n1=NA -n2=NA -for (i in 1:5){ -g1<-y1[near(x1,x1[isub[i]],fr1)] -g2<-y2[near(x2,x1[isub[i]],fr2)] -pts[i]=x1[isub[i]] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -n1[i]=length(g1) -n2[i]=length(g2) -A[[i]]=ES.summary.CI(g1,g2,tr=tr,SEED=SEED,alpha=alpha,nboot=nboot,NULL.V=NULL.V, REL.M =REL.M,n.est=n.est) -}} -if(FLAG){ -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -for (i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -g1<-g1[!is.na(g1)] -g2<-g2[!is.na(g2)] -A[[i]]=ES.summary.CI(g1,g2,tr=tr,SEED=SEED,alpha=alpha,nboot=nboot,NULL.V=NULL.V, REL.M =REL.M,n.est=n.est) -}} -list(n1=n1,n2=n2,pts=pts,ES.4.Each.pt=A) -} - -Dancova.ES.sum<-function(x1,y1,x2=x1,y2,fr1=1,fr2=1,tr=.2,alpha=.05,pts=NA,xout=FALSE,outfun=out, -REL.MAG=NULL, SEED=TRUE,nboot=1000,...){ -# -# Compute measures of effect size based on difference scores. -# This is done for each covariate value where the regression lines are compared as indicated the the argument -# pts -# This is done via the R function dep.ES.summary.CI -# -# No parametric assumption is made about the form of -# the regression lines--a running interval smoother is used. -# -# Assume data are in x1 y1 x2 and y2 -# -A=list() -N=NA -if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') -xy=elimna(cbind(x1,y1,x2,y2)) -x1=xy[,1] -y1=xy[,2] -x2=xy[,3] -y2=xy[,4] -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -FLAG=TRUE -n=length(y1) -ivals=c(1:n) -if(is.na(pts[1])){ -FLAG=FALSE -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=12]) -isub[5]<-max(sub[vecn>=12]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -for (i in 1:5){ -t1=near(x1,x1[isub[i]],fr1) -t2=near(x2,x1[isub[i]],fr2) -iv1=ivals[t1] -iv2=ivals[t2] -pick=unique(c(iv1,iv2)) -N[i]=length(pick) -pts[i]=x1[isub[i]] -A[[i]]=dep.ES.summary.CI(y1[pick],y2[pick], tr=tr, alpha=alpha, REL.MAG=REL.MAG, SEED=SEED,nboot=nboot) -}} -if(FLAG){ -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -npts=length(pts) -for(i in 1:npts){ -t1=near(x1,pts[i],fr1) -t2=near(x2,pts[i],fr2) -iv1=ivals[t1] -iv2=ivals[t2] -pick=unique(c(iv1,iv2)) -N[i]=length(pick) -A[[i]]=dep.ES.summary.CI(y1[pick],y2[pick], tr=tr, alpha=alpha, REL.MAG=REL.MAG, SEED=SEED,nboot=nboot) -}} -list(n=N,pts=pts,ES.4.Each.pt=A) -} - -pool.fun<-function(J,K,x){ -# -# x is assumed to have list mode. -# -# For a between-by-within design -# For data in list mode, pool the data -# over Factor A (between) -# and store in a new variable have list model with length K -# -# That is, ignore levels of A -# -JK=J*K -imat=matrix(c(1:JK),ncol=K,byrow=TRUE) -B=list() -for(k in 1:K){ -id=imat[,k] -B[[k]]=as.vector(matl(x[id])) -} -B -} - -spmcpbA<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),dif=TRUE,alpha=.05, -nboot=NA,SEED=TRUE,...){ -# -# For each level of Factor A -# use a percentile bootstrap for all pairwise -# multiple comparisons -# among dependent groups in a split-plot design -# -# -# If dif=T, the analysis is done based on all pairs -# of difference scores. -# Otherwise, marginal measures of location are used. -# -# The R variable x is assumed to contain the raw -# data stored in list mode or in a matrix. -# If in list mode, x[[1]] contains the data -# for the first level of both factors: level 1,1. -# x[[2]] is assumed to contain the data for level 1 of the -# first factor and level 2 of the second: level 1,2 -# x[[K]] is the data for level 1,K -# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. -# -# If the data are in a matrix, column 1 is assumed to -# correspond to x[[1]], column 2 to x[[2]], etc. -# -# When in list mode x is assumed to have length JK, the total number of -# groups being tested, but a subset of the data can be analyzed -# using grp -# - - if(is.matrix(x) || is.data.frame(x)) { - y <- list() - for(j in 1:ncol(x)) - y[[j]] <- x[, j] - x <- y -} -JK<-J*K -data<-list() -for(j in 1:length(x)){ -data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. -} -x<-data -A=list() -imat=matrix(c(1:JK),nrow=J,byrow=TRUE) -for(j in 1:J){ -# Now call function rmmcppb to do the analysis -id=imat[j,] -A[[j]]<-rmmcppb(x[id],est=est,pr=FALSE,nboot=nboot,dif=dif,alpha=alpha,plotit=FALSE,SEED=SEED,...) -} -list(A.Level=A) -} - - -anc.grid<-function(x1,y1,x2,y2, alpha=.05, -#IV=c(1,2), -Qsplit1=.5,Qsplit2=.5, SV1=NULL,SV2=NULL, -tr=.2,PB=FALSE,est=tmean,nboot=1000,CI=FALSE, -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Two independent groups. -# Split on two independent variables based on data in x1. Compare the corresponding regions -# -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# -# Alternatively, the data can be split based in values stored in the arguments -# SV1 and SV2. -# - -# Then test the hypothesis of equal measures of location -# IV[1]: indicates the column containing the first independent variable to use. -# IV[2]: indicates the column containing the second independent variable to use. -# -# if(length(unique(y)>2))stop('y should be binary') -p=ncol(x1) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -if(p!=ncol(x2))stop('x2 and x1 do not have the same of variables, ncol(x1)!=ncol(x2)') - -if(ncol(x1) != 2 || ncol(x2) !=2)stop('Should have two covariates') - -xy1<-elimna(cbind(x1,y1)) -x1<-xy1[,1:p] -y1<-xy1[,p1] - -xy2<-elimna(cbind(x2,y2)) -x2<-xy2[,1:p] -y2<-xy2[,p1] -ES=list() -if(xout){ -flag<-outfun(x1,plotit=FALSE)$keep -x1<-x1[flag,] -y1<-y1[flag] -flag<-outfun(x2,plotit=FALSE)$keep -x2<-x2[flag,] -y2<-y2[flag] -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -if(!is.null(SV1))J=length(SV1)+1 -if(!is.null(SV2))K=length(SV2)+1 - -JK=J*K -MAT=matrix(1:JK,J,K,byrow=TRUE) -z=list() -group=list() -N.int=J -N.int2=K - -NG=N.int*N.int2 -GRID=matrix(NA,NG,9) -GI=matrix(NA,NG,4) # grid intervals -L1=NULL -L2=NULL -qv=quantile(x1[,1],Qsplit1) -if(!is.null(SV1))qv=SV1 -qv=c(min(x1[,1]),qv,max(x1[,1])) -qv2=quantile(x2[,2],Qsplit2) -if(!is.null(SV2))qv2=SV2 -qv2=c(min(x2[,2]),qv2,max(x2[,2])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub1.1=binmat(xy1,1,qv[j],qv[j1]) # split, group 1 -xsub1.2=binmat(xy2,1,qv[j],qv[j1]) #split, group 2 -for(k in 1:N.int2){ -k1=k+1 -xsub2.1=binmat(xsub1.1,2,qv2[k],qv2[k1]) -xsub2.2=binmat(xsub1.2,2,qv2[k],qv2[k1]) -ic=ic+1 -if(length(xsub2.1[,3])<=7 || length(xsub2.2[,3])<=7)print('Not enough data in one or more grids') -GI[ic,]=c(qv[j],qv[j1],qv2[k],qv2[k1]) - -if(length(xsub2.1[,3])>7 || length(xsub2.2[,3])>7){ -a=yuen(xsub2.1[,3],xsub2.2[,3],tr=tr,alpha=alpha) -a=pool.a.list(a) -a=a[c(1:4,8,5:7)] -if(PB){ -pbv=trimpb2(xsub2.1[,3],xsub2.2[,3],tr=tr,alpha=alpha,nboot=nboot) -pbv=pool.a.list(pbv) -a[6:8]=pbv[c(2,3,1)] -} -GRID[ic,1:8]=a[1:8] -if(!CI)ES[[ic]]=ES.summary(xsub2.1,xsub2.2,tr=tr) -if(CI)ES[[ic]]=ES.summary.CI(xsub2.1,xsub2.2,tr=tr) -}} -} -dimnames(GI)=list(NULL,c('Int.1.low','Int.1.up','Int.2.low','Int.2.up')) -GRID[,9]=p.adjust(GRID[,8],method='hoch') -dimnames(GRID)=list(NULL,c('n1','n2','est.1','est.2','dif','ci.low','ci.up','p.value','adj.p.value')) -list(GRID.INTERVALS=GI,GRID=GRID, Effect.Sizes=ES) -} - -anc.grid.bin<-function(x1,y1,x2,y2, alpha=.05,method='KMS', -Qsplit1=.5,Qsplit2=.5, SV1=NULL,SV2=NULL, -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Two independent groups. -# Split on two independent variables based on data in x1. Compare the corresponding regions -# -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# -# The argument method can be 'KMS, 'SK' or 'ECP' -# See the 5th edition of Wilcox, Intro to Robust Estimation and Hypothesis Testing -# details. -# -# Alternatively, the data can be split based in values stored in the arguments -# SV1 and SV2. -# -if(identical(method,'ZHZ'))stop('Argument method should be KMS, SK or ECP') -if(length(unique(y1))>2)stop('y1 should be binary') -if(length(unique(y2))>2)stop('y2 should be binary') -p=ncol(x1) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -if(p!=ncol(x2))stop('x2 and x1 do not have the same of variables, ncol(x1)!=ncol(x2)') -if(ncol(x1) != 2 || ncol(x2) !=2)stop('Should have two covariates') -xy1<-elimna(cbind(x1,y1)) -x1<-xy1[,1:p] -y1<-xy1[,p1] -xy2<-elimna(cbind(x2,y2)) -x2<-xy2[,1:p] -y2<-xy2[,p1] -if(xout){ -flag<-outfun(x1,plotit=FALSE)$keep -x1<-x1[flag,] -y1<-y1[flag] -flag<-outfun(x2,plotit=FALSE)$keep -x2<-x2[flag,] -y2<-y2[flag] -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -if(!is.null(SV1))J=length(SV1)+1 -if(!is.null(SV2))K=length(SV2)+1 -JK=J*K -MAT=matrix(1:JK,J,K,byrow=TRUE) -z=list() -group=list() -N.int=J -N.int2=K -NG=N.int*N.int2 -GRID=matrix(NA,NG,9) -GI=matrix(NA,NG,4) # grid intervals -L1=NULL -L2=NULL -qv=quantile(x1[,1],Qsplit1) -if(!is.null(SV1))qv=SV1 -qv=c(min(x1[,1]),qv,max(x1[,1])) -qv2=quantile(x2[,2],Qsplit2) -if(!is.null(SV2))qv2=SV2 -qv2=c(min(x2[,2]),qv2,max(x2[,2])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub1.1=binmat(xy1,1,qv[j],qv[j1]) # split, group 1 -xsub1.2=binmat(xy2,1,qv[j],qv[j1]) #split, group 2 -for(k in 1:N.int2){ -k1=k+1 -xsub2.1=binmat(xsub1.1,2,qv2[k],qv2[k1]) -xsub2.2=binmat(xsub1.2,2,qv2[k],qv2[k1]) -ic=ic+1 -if(length(xsub2.1[,3])<=7 || length(xsub2.2[,3])<=7)print('Not enough data in one or more grids') -GI[ic,]=c(qv[j],qv[j1],qv2[k],qv2[k1]) - -if(length(xsub2.1[,3])>7 || length(xsub2.2[,3])>7){ -a=binom2g(sum(xsub2.1[,3]),length(xsub2.1[,3]), -sum(xsub2.2[,3]),length(xsub2.2[,3]), method=method,alpha=alpha) -if(identical(method,'KMS')){ -a=pool.a.list(a) -#print(a) -a=c(length(xsub2.1[,3]),length(xsub2.2[,3]),a[c(3:5,1:2,6)]) -} -if(identical(method,'SK')){ -a=c(length(xsub2.1[,3]),length(xsub2.2[,3]),a$p1,a$p2,a$p1-a$p2,NA,NA,a$p.value) -} -if(identical(method,'ECP')){ -a=c(length(xsub2.1[,3]),length(xsub2.2[,3]),a$output[1,3:8]) -} -GRID[ic,1:8]=a[1:8] -}} -} -dimnames(GI)=list(NULL,c('Int.1.low','Int.1.up','Int.2.low','Int.2.up')) -GRID[,9]=p.adjust(GRID[,8],method='hoch') -dimnames(GRID)=list(NULL,c('n1','n2','est.1','est.2','dif','ci.low','ci.up','p.value','adj.p.value')) -list(GRID.INTERVALS=GI,GRID=GRID) -} - -best.cell.sub<-function(x,alpha=.05,LARGEST=TRUE,method='AC',AUTO=FALSE){ -# -# For a multinomial distribution, can a decision be made -# about which cell has the highest probability -# -# x Assumed to contain the cell frequencies -# -x=elimna(x) -n=sum(x) -NCELL=length(x) -NCm1=NCELL-1 -xor=order(x,decreasing = LARGEST) -output=NA -ic=0 -for(j in 2:NCELL){ -ic=ic+1 -output[ic]=cell.com.pv(x,xor[1],xor[j]) -} -output -} - - -cell.com.pv<-function(x,i=1,j=2,method='AC'){ -# -# For a multinomial distribution, compute a confidence interval -# for p_i-p_j, the difference between the probabilities asscoiated with cells i and j -# -# x= cell frequencies -# -n=sum(x) -p1=x[i]/n -p2=x[j]/n -COR=0-sqrt(p1*p1/((1-p1)*(1-p2))) -a=seq(.001,.1,.001) -a=c(a,seq(.1,.99,.01)) -a=rev(a) - -if(x[i]==x[j])pv=1 -if(x[i]!=x[j]){ -for(k in 1:length(a)){ -c2=acbinomci(x[j],n,alpha=a[k])$ci -c1=acbinomci(x[i],n,alpha=a[k])$ci -T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) -T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) -T2=max(c(0,T2)) -T1=max(c(0,T1)) -L=p1-p2-sqrt(T1) -U=p1-p2+sqrt(T2) -pv=a[k] -if(sign(L*U)<0)break -}} -if(n<=35){ -if(x[i]==x[j])pvnew=1 -else{ -pv.up=pv+.1 -anew=seq(pv,pv.up,.01) -for(k in 1:length(anew)){ -c1=binom.conf(x[i],n,AUTO=TRUE,method=method,alpha=anew[k],pr=FALSE)$ci -c2=binom.conf(x[j],n,AUTO=TRUE,method=method,alpha=anew[k],pr=FALSE)$ci -T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) -T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) -T2=max(c(0,T2)) -T1=max(c(0,T1)) -L=p1-p2-sqrt(T1) -U=p1-p2+sqrt(T2) -pvnew=anew[k] -if(sign(L*U)>0)break -}} -pv=pvnew -} -pv -} - - -cell.com<-function(x,i=1,j=2,alpha=.05,AUTO=TRUE,method='AC'){ -# -# For a multinomial distribution, compute a confidence interval -# for p_i-p_j, the difference between the probabilities associated with cells i and j -# -# x= cell frequencies -# -n=sum(x) -c1=binom.conf(x[i],n,AUTO=AUTO,method=method,alpha=alpha,pr=FALSE)$ci -c2=binom.conf(x[j],n,AUTO=AUTO,method=method,alpha=alpha,pr=FALSE)$ci -p1=x[i]/n -p2=x[j]/n -COR=0-sqrt(p1*p1/((1-p1)*(1-p2))) -T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) -T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) -T2=max(c(0,T2)) -T1=max(c(0,T1)) -L=p1-p2-sqrt(T1) -U=p1-p2+sqrt(T2) -list(ci=c(L,U)) -} - -best.cell.crit<-function(N,ncell,LARGEST=TRUE,iter=1000,alpha=.05,SEED=TRUE,AUTO=FALSE){ -# -# -# N sample size -# ncell number of cells -# -if(SEED)set.seed(2) -NCm1=ncell-1 -pv=NA -a=rmultinom(iter,N,rep(1/ncell,ncell)) -pv.mat=apply(a,2,best.cell.sub,AUTO=AUTO,LARGEST=LARGEST) -init=apply(pv.mat,1,qest,alpha) -pv.mat=t(pv.mat) # For simplicity when using extant code related to this function -z=optim(0,anc.best.fun,init=init,iter=iter,rem=pv.mat,Jm1=NCm1, -alpha=alpha,method='Brent',lower=0,upper=1) -p.crit=z$par*init -p.crit -} - -BEST.cell<-function(x,alpha=.05,LARGEST=TRUE,method='AC',p.crit=NULL,AUTO=FALSE,iter=2000,SEED=TRUE,pr=TRUE){ -# -# For a multinomial distribution, can a decision be made about -# about which cell has the highest probability? -# -# PV if specified, is a N by iter matrix of p-values that can be computed via best.cell.crit -# N=number of cells -# x Assumed to contain the cell frequencies -# -if(pr)print('Confidence intervals are based on the critical p-values') -if(SEED)set.seed(2) -x=elimna(x) -n=sum(x) -NCELL=length(x) -NCm1=NCELL-1 -xor=order(x,decreasing = LARGEST) -IND.pv=NA -ic=0 -CI=matrix(NA,nrow=NCm1,ncol=2) -for(j in 2:NCELL){ -ic=ic+1 -IND.pv[ic]=cell.com.pv(x,xor[1],xor[j]) -} -if(is.null(p.crit))p.crit=best.cell.crit(n,NCELL,LARGEST=LARGEST,iter=iter,AUTO=FALSE,SEED=SEED) -output=matrix(NA,nrow=NCm1,8) -output[,1]=rep(x[xor[1]]/n,NCm1) -output[,2]=xor[2:NCELL] -output[,3]=x[xor[2:NCELL]]/n -output[,4]=output[,1]-output[,3] -ic=0 -for(j in 2:NCELL){ -ic=ic+1 -CI=cell.com(x,xor[1],xor[j],AUTO=AUTO,method=method,alpha=p.crit[ic]) -output[ic,5:6]=CI$ci -} -output[,7]=IND.pv -output[,8]=p.crit -dimnames(output)=list(NULL,c('Largest.Est','CELL','Est','Dif','ci.low','ci.up','p.value','p.crit')) -flag=IND.pv<=p.crit -id=output[flag,2] -setClass('BIN',slots=c('Cell.with.largest.estimate','Larger.than','n','output')) -put=new('BIN',Cell.with.largest.estimate=xor[1],Larger.than=id,n=n,output=output) - -if(!LARGEST){ -dimnames(output)=list(NULL,c('Smallest.Est','CELL','Est','Dif','ci.low','ci.up','p.value','p.crit')) -setClass('BIN',slots=c('Cell.with.smallest.estimate','smaller.than','n','output')) -put=new('BIN',Cell.with.smallest.estimate=xor[1],smaller.than=id,n=n,output=output) -} -put -} - - anc.grid.cat<-function(x1,y1,x2,y2, alpha=.05,KMS=FALSE, -Qsplit1=.5,Qsplit2=.5, SV1=NULL,SV2=NULL,pr=TRUE, -xout=FALSE,outfun=outpro){ -# -# Two independent groups. -# Split on two independent variables based on data in x1. Compare the corresponding regions -# -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# -# The argument method can be 'KMS, 'SK' or 'ECP' -# See the 5th edition of Wilcox, Intro to Robust Estimation and Hypothesis Testing -# details. -# -# Alternatively, the data can be split based in values stored in the arguments -# SV1 and SV2. -# -if(!KMS){ -if(pr)print('To get confidence intervals, set KMS=TRUE') -} -p=ncol(x1) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -if(p!=ncol(x2))stop('x2 and x1 do not have the same of variables, ncol(x1)!=ncol(x2)') -if(ncol(x1) != 2 || ncol(x2) !=2)stop('Should have two covariates') -xy1<-elimna(cbind(x1,y1)) -x1<-xy1[,1:p] -y1<-xy1[,p1] -xy2<-elimna(cbind(x2,y2)) -x2<-xy2[,1:p] -y2<-xy2[,p1] -if(xout){ -flag<-outfun(x1,plotit=FALSE)$keep -x1<-x1[flag,] -y1<-y1[flag] -flag<-outfun(x2,plotit=FALSE)$keep -x2<-x2[flag,] -y2<-y2[flag] -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -if(!is.null(SV1))J=length(SV1)+1 -if(!is.null(SV2))K=length(SV2)+1 -JK=J*K -MAT=matrix(1:JK,J,K,byrow=TRUE) -z=list() -group=list() -N.int=J -N.int2=K -NG=N.int*N.int2 -#GRID=matrix(NA,NG,9) -GRID=list() -GI=matrix(NA,NG,4) # grid intervals -L1=NULL -L2=NULL -qv=quantile(x1[,1],Qsplit1) -if(!is.null(SV1))qv=SV1 -qv=c(min(x1[,1]),qv,max(x1[,1])) -qv2=quantile(x2[,2],Qsplit2) -if(!is.null(SV2))qv2=SV2 -qv2=c(min(x2[,2]),qv2,max(x2[,2])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub1.1=binmat(xy1,1,qv[j],qv[j1]) # split, group 1 -xsub1.2=binmat(xy2,1,qv[j],qv[j1]) #split, group 2 -for(k in 1:N.int2){ -k1=k+1 -xsub2.1=binmat(xsub1.1,2,qv2[k],qv2[k1]) -xsub2.2=binmat(xsub1.2,2,qv2[k],qv2[k1]) -ic=ic+1 -if(length(xsub2.1[,3])<=7 || length(xsub2.2[,3])<=7)print('Not enough data in one or more grids') -GI[ic,]=c(qv[j],qv[j1],qv2[k],qv2[k1]) -if(length(xsub2.1[,3])>7 || length(xsub2.2[,3])>7){ -GRID[[ic]]=binband(xsub2.1[,3],xsub2.2[,3],alpha=alpha,KMS=KMS,plotit=FALSE,pr=FALSE) -}}} -dimnames(GI)=list(NULL,c('Int.1.low','Int.1.up','Int.2.low','Int.2.up')) -list(GRID.INTERVALS=GI,GRID=GRID) -} - - -PMD.PCD<-function(n=NULL,delta=.5,x=NULL, tr=.2, SIG=NULL,alpha=.05,p.crit=NULL,iter=5000,SEED=TRUE){ -# -# Which group has the largest measures of location? -# -# Use an indifference zone. Given -# n a vector of sample sizes, determine the -# probability of making a decision and the probability of -# of correct decision given that a decision is made. -# -# Number of groups is length(n) -# -if(is.null(n) & is.null(x))stop('Either n or x must be specified') -if(SEED)set.seed(2) -if(!is.null(x)){ -x=elimna(x) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -est=lapply(x,tmean,tr=tr) -est=as.vector(matl(est)) -ID=which(est==max(est)) -n=as.vector(matl(lapply(x,length))) -} -J=length(n) -if(J<=1)stop('n should have 2 or more values') -if(is.null(p.crit))p.crit=anc.best.crit(J,n,alpha=alpha,tr=tr,iter=iter,SEED=SEED)$fin.crit -z=list() -PMD=0 -PCD=0 -sig=rep(1,J) -if(is.null(SIG)){ -if(!is.null(x)){ -for(j in 1:J)sig[j]=sd(x[[j]]) -}} -if(!is.null(SIG))sig=SIG -for(i in 1:iter){ -for(j in 1:J)z[[j]]=sig[j]*rnorm(n[j]) -z[[1]]=z[[1]]+delta*sig[ID] -pv=anc.best(z,p.crit=p.crit, tr=tr,SEED=F) -if(pv@Larger.than[1]=='All'){ -PMD=PMD+1 -if(pv@Group.with.largest.estimate==1)PCD=PCD+1 -}} -PCD.ci=NA -PMD.ci=binom.conf(PMD,iter,pr=FALSE)$ci -if(PMD>0)PCD.ci=binom.conf(PCD,PMD,pr=FALSE)$ci -PCD=PCD/max(PMD,1) -PMD=PMD/iter -list(PMD=PMD,PMD.ci=PMD.ci,PCD=PCD,PCD.ci=PCD.ci) -} - - -bin.PMD.PCD<-function(n,p,DO=TRUE,alpha=.05,p.crit=NULL,iter=5000,SEED=TRUE){ -# -# Which group has the largest probability of success? -# -# Use an indifference zone. Given -# n a vector of sample sizes, determine the -# probability of making a decision and the probability of -# of correct decision given that a decision is made. -# -# Number of groups is length(n) -# x if specified contain the number of succcess in which case p=mean(x/n) -# - -if(SEED)set.seed(2) -pmax=which(p==max(p)) -J=length(n) -if(!is.null(x))p=rep(mean(x/n),J) -Jm1=J-1 -if(J<=1)stop('n should have 2 or more values') -remp=p -id=which(p==max(p))[1] -if(is.null(p.crit)){ -pv.mat=bin.best.crit(remp,n,iter=iter,SEED=FALSE) -init=apply(pv.mat,2,qest,alpha) -z=optim(0,anc.best.fun,init=init,iter=iter,rem=pv.mat,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) -p.crit=z$par*init -} -PMD=0 -PCD=0 -for(i in 1:iter){ -x=rbinom(J,n,p) -if(!DO){ -pv=bin.best(x,n,p.crit=p.crit,SEED=FALSE) -if(pv@Larger.than[1]=='All'){ -PMD=PMD+1 -if(pv@Group.with.largest.estimate==id)PCD=PCD+1 -}} -if(DO){ -a=bin.best.DO(x,n) -if(a$p.value<=alpha){ -PMD=PMD+1 -if(max(a$Est)==pmax)PCD=PCD+1 -}} -} -PCD=PCD/PMD -PMD=PMD/iter -list(PMD=PMD,PCD=PCD) -} - - -WINCOR<-function(x,tr=.2){ -# -# For convenience, compute Winsorized correlation matrix only. -# -a=winall(x,tr=tr)$cor -a -} - - MVECOR<-function(x){ -library(MASS) -val<-cov.mve(x) -val=cov2cor(val$cov) -val -} - -MCDCOR<-function(x){ -library(MASS) -#val<-cov.mcd(x) -val<-DetMCD(x) -val=cov2cor(val$cov) -val -} - -COR.ROB<-function(x,method=c('WIN','PB','skip','mve','mcd','Ken','Spear','BIC'),tr=.2,...){ -# -# -# WIN: Winsorized -# PB: Percentage Bend -# skip: Skipped correlation based on projection-type outlier detection method -# mve: minimum volume ellipsoid -# mcd: minimum covariance determinant -# Ken: Kendall's tau -# Spear: Spearman's rho -# BIC: biweight correlation. -# -type=match.arg(method) -switch(type, - WIN=winall(m=x,tr=tr)$cor, - PB=pball(m=x,...)$pbcorm, - skip=scorall(x=x,...), - mve=MVECOR(x=x), - mcd=MCDCOR(x=x), - Ken=tauall(m=x)$taum, - Spear=spear(x=x)$cor, - BIC=bicovm(x)$mcor, - ) -} - - -COR.PAIR<-function(x,y,method=c('WIN','PB','skip','mve','mcd','Ken','Spear','BIC'),skip.cor=pcor,tr=.2,...){ -# -# For the bivariat case, compute a correlation -# -# WIN: Winsorized -# PB: Percentage Bend -# skip: Skipped correlation based on projection-type outlier detection method -# mve: minimum volume ellipsoid -# mcd: minimum covariance determinant -# Ken: Kendall's tau -# Spear: Spearman's rho -# BIC: biweight correlation. -# -x=cbind(x,y) -type=match.arg(method) -switch(type, - WIN=list(cor=winall(m=x,tr=tr)$cor[1,2]), - PB=list(cor=pball(m=x,...)$pbcorm[1,2]), - skip=list(cor=scor(x=x,corfun=skip.cor)$cor), - mve=list(cor=MVECOR(x=x)[1,2]), - mcd=list(cor=MCDCOR(x=x)[1,2]), - Ken=list(cor=tauall(m=x)$taum[1,2]), - Spear=list(cor=spear(x=x)$cor[1,2]), - BIC=list(cor=bicovm(x)$mcor[1,2]), - ) -} - -bicorM<-function(x){ -a=bicovM(x) -a=cov2cor(a) -a -} - -bicor<-function(x,y){ -a=bicovM(cbind(x,y)) -a=cov2cor(a) -list(cor=a[1,2]) -} - -corregci<-function(x,y,corfun=wincor,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE,...){ -# -# Deals with correlations between some dependent variable y and p independent variables, x -# Compute confidence intervals for correlation coefficients and p-values when testing the -# hypothesis of a zero correlation, -# Also reported are adjusted p-values based on Hochberg's method. -# -# The predictor values are assumed to be in the n by p matrix x. -# The default number of bootstrap samples is nboot=599 -# -# corfun can be any R function that returns a correlation having the form -# the vector corfun$cor. Examples are pbcor and bicor, spear and tau. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -nrem=length(y) -estit=NA -for(j in 1:p)estit[j]=corfun(x[,j],y,...)$cor -nv=length(y) -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,corregci.sub,x,y,corfun=corfun) -#Leverage points already removed. -# bvec is a p by nboot matrix. The first row -# contains the bootstrap correlations for the first IV,, the second row -# contains the bootstrap values for first predictor, etc. -regci<-matrix(0,p,5) -vlabs=NA -for(j in 1:p)vlabs[j]=paste("ind.var",j) -i#vlabs[1:p]=labels(x)[[2]] -dimnames(regci)<-list(vlabs,c("ci.low","ci.up","Estimate","p-value",'Adj.p.value')) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -se<-NA -pvec<-NA -for(i in 1:p){ -bsort<-sort(bvec[i,]) -pvec[i]<-sum(bvec[i,]<0)/nboot #+.5*sum(bvec[i,]==0)/nboot -if(pvec[i]>.5)pvec[i]<-1-pvec[i] -regci[i,1]<-bsort[ilow] -regci[i,2]<-bsort[ihi] -} -regci[,3]=estit -pvec<-2*pvec -regci[,4]=pvec -regci[,5]=p.adjust(pvec,method='hoch') -num.sig=sum(regci[,5]<=alpha) -list(output=regci,n=nrem,num.sig=num.sig) -} - -corregci.sub<-function(isub,x,y,corfun){ -p=ncol(x) -xmat<-matrix(x[isub,],nrow(x),ncol(x)) -e=NA -for(j in 1:p)e[j]=corfun(xmat[,j],y[isub])$cor -e -} - -neg.colM<-function(x,id=NULL){ -# -# Columns of the matrix are mutliplied by -1 -# -x[,id]=x[,id]*-1 -x -} - -mve.cor<-function(x,y){ -xy=cbind(x,y) -a=MVECOR(x=xy)[1,2] -list(cor=a) -} - -mcd.cor<-function(x,y){ -xy=cbind(x,y) -a=MCDCOR(x=xy)[1,2] -list(cor=a) -} - - - -HQreg<-function(x,y,alpha=1,xout=FALSE,method='huber',tau=.5,outfun=outpro,...){ -# -# -# Robust elastic net -# Yi, C. & Huang, J. (2016) Semismooth Newton coordinate descent algorithm for elastic-net penalized -# Huber loss regression and quantile regression. (https://arxiv.org/abs/1509.02957) -# Journal of Computational and Graphical Statistics -# http://www.tandfonline.com/doi/full/10.1080/10618600.2016.1256816 - -# -library(hqreg) -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -a=hqreg(x,y,method=method,alpha=alpha,tau=tau)$beta -list(coef=a[,100]) -} - - -LTS.EN<-function(x,y,xout=FALSE,family='gaussian',alphas=NULL,lambdas=NULL,outfun=outpro,...){ -# -# -# Robust elastic net -# Yi, C. & Huang, J. (2016) Semismooth Newton coordinate descent algorithm for elastic-net penalized -# Huber loss regression and quantile regression. (https://arxiv.org/abs/1509.02957) -# Journal of Computational and Graphical Statistics -# http://www.tandfonline.com/doi/full/10.1080/10618600.2016.1256816 -# -# -library(enetLTS) -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,ncol(x)+1] -temp<-NA -x<-as.matrix(x) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=plotit,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -} -if(!is.null(alphas) & !is.null(lambdas))a=enetLTS(x,y,alphas=alphas,lambdas=lambdas,family=family,plot=FALSE) -if(!is.null(alphas) & is.null(lambdas))a=enetLTS(x,y,alphas=alphas,family=family,plot=FALSE) -if(is.null(alphas) & is.null(lambdas))a=enetLTS(x,y,family=family,plot=FALSE) -list(coef=a[6]$raw.coefficients) -} - -corCOM.PMDPCD<-function(n,p,rho=0,delta=.3,corfun=wincor,LARGEST=TRUE,alpha=.05, -x=NULL,y=NULL,iter=500,pr=TRUE,SEED=TRUE,MC=TRUE,FUN=mean,...){ -# -# Given n and p, the number of explanatory variables, -# determine the probability of making a decision about which independent variable has the -# the largest correlation with the dependent variable -# in the context of an indifference zone -# -# -# All independent variables have a same correlation with dependent variable indicated by the argument -# rho, -# except the first independent variable, which has correlation -# rho+delta. -# Default is rho=0 and delta= .3 -# (Cohen's suggestion for a small medium and large correlation -# are .1, .3 and .5) -# This is designed for situations where the goal is to make a decision about which IV has the largest correlation. -# -# If LARGEST=TRUE; the function default to rho=0 for the first IV and delta for the remaining IVs -# -# Possible alternative choices for corfun include: -# spear -# tau -# pbcor -# bicor -# scor -# mve.cor -# mcd.cor -# -if(pr)print('Execution time can be high') -use.cor=FALSE -if(!is.null(x) & !is.null(y)){ -xy=cbind(y,x) -R=COR.ROB(xy,method=COR.method) -rho=FUN(R[upper.tri(R)]) -n=nrow(x) -p=ncol(x) -use.cor=TRUE -} - -if(rho+delta>1)stop('rho+delta is greater than 1') -if(rho+delta<0-1)stop('rho+delta is less than -1') -if(MC)library(parallel) -if(SEED)set.seed(2) -if(delta<0-1 || delta>1)stop('rho + delta should be between -1 and 1') -PMD=0 -PCD=0 -p1=p+1 -if(LARGEST){ -COV=matrix(rho,p1,p1) -COV[1,2]=COV[2,1]=rho+delta -} -if(!LARGEST){ -COV=matrix(rho+delta,p1,p1) -COV[1,2]=COV[2,1]=rho -} -diag(COV)=1 -if(use.cor)COV=R #Over rule using an indifference zone; use estimate of the correlation matrix -x=list() -for(i in 1:iter)x[[i]]=rmulnorm(n,p1,COV) -if(!MC)a=lapply(x,corCOM.PMDPCD.sub,corfun=corfun,LARGEST=LARGEST,...) -if(MC)a=mclapply(x,corCOM.PMDPCD.sub,corfun=corfun,LARGEST=LARGEST,...) -for(i in 1:iter){ -if(a[[i]]$Conclusion=='Decide'){ -PMD=PMD+1 -if(a[[i]][1]==1)PCD=PCD+1 -}} -PCD.CI=binom.conf(PCD,PMD,alpha=alpha,pr=FALSE)$ci -PMD.CI=binom.conf(PMD,iter,alpha=alpha,pr=FALSE)$ci -PCD=PCD/max(1,PMD) -PMD=PMD/iter -list(PMD=PMD,PMD.CI=PMD.CI, PCD=PCD, PCD.CI=PCD.CI) -} - -corCOM.PMDPCD.sub<-function(x,corfun,LARGEST=LARGEST,...){ -p1=ncol(x) -a=corCOM.DVvsIV(x[,2:p1],x[,1],corfun=corfun,SEED=FALSE,LARGEST=LARGEST,...) -a -} - -RM.PMD.PCD<-function(x,tr=.2,delta=.5,alpha=.05,p.crit=NULL,iter=5000,nboot=500,SEED=TRUE){ -# -# -# Use an indifference zone. Given x -# determine the -# probability of making a decision and the probability of -# of correct decision given that a decision is made -# within the context of an indiffernce zone -# -if(is.list(x))stop('x should be a matrix or a data frame') -PMD=0 -PCD=0 -x=elimna(x) -est=apply(x,2,tmean,tr=tr) -ID=which(est==max(est)) -n=nrow(x) -# First, determine decision rule -# -A=cov(x) -J=ncol(x) -aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) -id=which(aval==alpha) -if(length(id)==0)stop('alpha be one one values .001(.001).1 or 11(.01).99') -p.crit=rmanc.best.crit(x,iter=iter,alpha=alpha,tr=tr,SEED=SEED) -# -# Now simulate indifference zone. -for(i in 1:nboot){ -x=mvrnorm(n,mu=rep(0,J),Sigma=A) -x[,1]=x[,1]+delta*sqrt(A[ID,ID]) -est=apply(x,2,tmean,tr=tr) -id=which(est==max(est)) -a=rmanc.best.ex(x,tr=tr) -if(sum(a<=p.crit)){ -PMD=PMD+1 -if(id==1)PCD=PCD+1 -}} -PMD.ci=binom.conf(PMD,nboot,pr=FALSE)$ci -PCD.ci=binom.conf(PCD,PMD,pr=FALSE)$ci -PCD=PCD/max(PMD,1) -PMD=PMD/nboot -list(PMD=PMD,PMD.ci=PMD.ci,PCD=PCD,PCD.ci=PCD.ci) -} - -RS.LOC.IZ<-function(n,J=NULL,locfun=tmean,delta,iter=10000,SEED=TRUE,...){ -# -# Estimate probability of a correct decision based on an indifference zone -# -if(SEED)set.seed(2) -PCD=0 -if(length(n)==1){ -if(is.null(J))stop('Number of groups, J, was not specified') -n=rep(n,J) -} -if(length(n)>1){ -x=list() -J=length(n) -for(i in 1:iter){ -for(j in 1:J)x[[j]]=rnorm(n[j]) -x[[1]]=x[[1]]+delta -est=lapply(x,locfun,...) -est=as.vector(list2mat(est)) -id=which(est==max(est)) -if(id==1)PCD=PCD+1 -}} -PCD.ci=binom.conf(PCD,iter,pr=FALSE)$ci -PCD=PCD/iter -list(PCD=PCD,PCD.ci=PCD.ci) -} - -dep.dif.fun<-function(x,y,tr=.2,alpha=.05,AUTO=TRUE,PVSD=FALSE,nboot=2000,method=c('TR','TRPB','HDPB','MED','AD','SIGN')){ -# -# -# For two dependent groups, -# compute confidence intervals based on difference scores -# -# TR: trimmed mean Tukey--McLaughlin -# TRPB: trimmed means percentile bootstrap -# MED: median of the difference scores. -#.HDPB: median of the difference scores using Harrell--Davis and a percentile bootstrap -# AD: based on the median of the distribution of x-y, which can differ from the median of the difference scores. -# SIGN: P(X7){ -a=yuend(xsub2.1[,3],xsub2.1[,4],tr=tr,alpha=alpha) -GRID[ic,1]=a$n -GRID[ic,2]=a$est1 -GRID[ic,3]=a$est2 -GRID[ic,4]=a$dif -GRID[ic,5]=a$ci[1] -GRID[ic,6]=a$ci[2] -GRID[ic,7]=a$p.value -if(PB){ -pbv=trimpb2(xsub2.1[,3],xsub2.1[,4],tr=tr,alpha=alpha,nboot=nboot) -GRID[ic,5]=pbv$ci[1] -GRID[ic,6]=pbv$ci[2] -GRID[ic,7]=pbv$p.value -} -}} -if(DIF){ -if(length(xsub2.1[,3])>7){ -d=dep.dif.fun(xsub2.1[,3],xsub2.1[,4],tr=tr,alpha=alpha,method=METHOD,AUTO=AUTO,PVSD=PVSD) -J=which(LAB==METHOD) -d=pool.a.list(d) -GRID[ic,1]=length(xsub2.1[,3]) -GRID[ic,2:5]=d[IZ[J,]] -}} -ES[[ic]]=dep.ES.summary.CI(xsub2.1[,3],xsub2.1[,4],tr=tr) -}} -dimnames(GI)=list(NULL,c('Int.1.low','Int.1.up','Int.2.low','Int.2.up')) -if(DIF){ -dimnames(GRID)=list(NULL,c('n','Est','ci.low','ci.up','p.value','adj.p.value')) -GRID[,6]=p.adjust(GRID[,5],method='hoch') -} -if(!DIF){ -dimnames(GRID)=list(NULL,c('n','est.1','est.2','DIF','ci.low','ci.up','p.value','adj.p.value')) -GRID[,8]=p.adjust(GRID[,7],method='hoch') -} -list(GRID.INTERVALS=GI,GRID=GRID, Effect.Sizes=ES) -} - -comdvar.mcp<-function(x,method='hoch'){ -# -# Compare the variances of J depenent variables. -# Perform all pairwise comparisons using the HC4 extension of the Morgan-Pitman test -# -if(is.null(dim(x)))stop('x should be a matrix or data frame') -J=ncol(x) -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Est. 1','Est 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=comdvar(x[,j],x[,k]) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=a$est1 -output[ic,4]=a$est2 -output[ic,5]=a$est1-a$est2 -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - - - -rmVARcom.mcp<-function(x,est=winvar,alpha=.05,nboot=500,method='hoch',SEED=TRUE){ -# -# Compare the variances of J dependent variables. -# Perform all pairwise comparisons using the HC4 extension of the Morgan-Pitman test -# -if(is.null(dim(x)))stop('x should be a matrix or data frame') -J=ncol(x) -CC=(J^2-J)/2 -output<-matrix(0,CC,9) -dimnames(output)<-list(NULL,c('Var','Var','Est. 1','Est 2','Dif','cilow','ci.up','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=rmVARcom(x[,j],x[,k],est=est,alpha=alpha,nboot=nboot,plotit=FALSE,SEED=SEED) -output[ic,1]=j -output[ic,2]=k -output[ic,3:4]=a$estimate -output[ic,5]=a$dif -output[ic,6]=a$ci[1] -output[ic,7]=a$ci[2] -output[ic,8]=a$p.value -}}} -output[,9]=p.adjust(output[,8],method=method) -output -} - -comvar.mcp<-function(x,method='hoch',SEED=TRUE){ -# -# Compare the variances of J indepenent variables. -# Perform all pairwise comparisons using -# a slight extension of HC4 vesion of the Morgan-Pitman test -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','SD 1','SD 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=varcom.IND.MP(x[[j]],x[[k]],SEED=SEED) -a=pool.a.list(a) -output[ic,1]=j -output[ic,2]=k -output[ic,3:4]=a[1:2] -output[ic,3:4]=sqrt(output[ic,3:4]) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a[3] -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -robVARcom.mcp<-function(x,est=winvar,alpha=.05,nboot=2000,method='hoch',SEED=TRUE){ -# -# Compare the robust variances of J indepenent variables. -# Perform all pairwise comparisons using the HC4 extension of the Morgan-Pitman test -# -if(is.null(dim(x)))stop('x should be a matrix or data frame') -J=ncol(x) -CC=(J^2-J)/2 -output<-matrix(0,CC,9) -dimnames(output)<-list(NULL,c('Var','Var','Est. 1','Est 2','Dif','cilow','ci.up','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=b2ci(x[,j],x[,k],est=est,SEED=SEED,nboot=nboot,alpha=alpha) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=a$est1 -output[ic,4]=a$est2 -output[ic,5]=a$est1-a$est2 -output[ic,6]=a$ci[1] -output[ic,7]=a$ci[2] -output[ic,8]=a$p.value -}}} -output[,9]=p.adjust(output[,8],method=method) -output -} - -oph.ind.comvar<-function(x,y=NULL,method='hommel',invalid=4,SEED=TRUE,STOP=TRUE){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -3 diopters or greater than 3 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare the variances of J independent measures. -# All pairwise comparisons are performed using -# a slight extension of the HC4 vesion of the Morgan-Pitman test -# -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# -# By default, Hochberg's method is used to control the probability of one -# or more TypeI errors -# -if(!is.null(y))x=list(x,y) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -for(j in 1:J){ -flag=abs(elimna(x[[j]]))>invalid -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','SD 1','SD 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=varcom.IND.MP(x[[j]],x[[k]],SEED=SEED) -a=pool.a.list(a) -output[ic,1]=j -output[ic,2]=k -output[ic,3:4]=a[1:2] -output[ic,3:4]=sqrt(output[ic,3:4]) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a[3] -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - - -oph.dep.comvar<-function(x, y=NULL, invalid=4, method='hommel',STOP=TRUE){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -3 diopters or greater than 3 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare the variances of J dependent measures. -# All pairwise comparisons are performed using -# a slight extension of the HC4 version of the Morgan-Pitman test -# Compare the variances of J dependent variables. -# Perform all pairwise comparisons using the HC4 extension of the Morgan-Pitman test - -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# -# By default, Hochberg's method is used to control the probability of one -# or more Type I errors -# -if(!is.null(y))x=cbind(x,y) -if(is.list(x)){ -n=pool.a.list(lapply(x,length)) -if(var(n)!=0)stop('lengths have different values') -x=matl(x) -} -J=ncol(x) -flag=abs(elimna(x))>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -if(STOP)stop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','SD 1','SD 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=comdvar(x[,j],x[,k]) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=a$est1 -output[ic,4]=a$est2 -output[ic,3]=sqrt(a$est1) -output[ic,4]=sqrt(a$est2) -output[ic,5]=sqrt(a$est1)- sqrt(a$est2) -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - - -ancM.COV.ES<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,pts=NULL,xout=FALSE,outfun=outpro,...){ -# -# -# For two or more covariates, estimate effect sizes for -# a collection of points. -# -# That is, for each point of interest, determine -# a cloud of points close to it and based on the -# corresponding y values, compute measures of effect size -# -# If pts=NULL -# the significant points returned by -# ancdetM4 are used -# -p=ncol(x1) -if(p<2)stop('This function is for two or more covariates') -p1=p+1 -if(ncol(x2)!=p)stop('x1 and x2 do not have the same number of columns') -xy=elimna(cbind(x1,y1)) -x1=xy[,1:p] -y1=xy[,p1] -xy=elimna(cbind(x2,y2)) -x2=xy[,1:p] -y2=xy[,p1] -if(min(length(y1),length(y2))<50)stop('The minimum sample size must be greater than or equal to 50') -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag,] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag,] -y2<-y2[flag] -} -guide1=NA -guide2=NA -x1=as.matrix(x1) -x2=as.matrix(x2) -a=ancdetM4(x1=x1,y1=y1,x2=x2,y2=y2,fr1=fr1,fr2=fr2,tr=tr,pts=pts,...) -if(is.null(pts))pts=a$significant.points -M=NA -SML.class=NA -if(!is.null(pts)){ -if(is.vector(pts))pts=matrix(pts,nrow=1) -nr=nrow(pts) -M=matrix(NA,nr,6) -SML.class=matrix(0,nr,6) -m1<-covmcd(x1) -m2<-covmcd(x2) -for(i in 1:nr){ -id1=near3d(x1,pts[i,],fr1,m1) -id2=near3d(x2,pts[i,],fr2,m2) -if(sum(id1)<10 ||sum(id2)<10)print(paste('For point',j,'not enough nearest neighbors')) -if(sum(id1)>=10 & sum(id2)>10){ -ES=ES.summary(y1[id1],y2[id2]) -M[i,]=ES[,1] -}} -dimnames(M)=list(NULL,names(ES[,1])) -dum1=rnorm(50) -dum2=rnorm(50) -guide1=ES.summary(dum1,dum2+3)[,-1] -guide2=ES.summary(dum1,dum2-3)[,-1] -} -if(!is.na(M[1])){ -for(i in 1:nr){ -flag=M[i,] <= guide1[,2] || M[i,]>= guide2[,2] -SML.class[i,flag]=1 -flag=M[i,] <= guide1[,3] || M[i,]>= guide2[,3] -SML.class[i,flag]=2 -flag=M[i,] <= guide1[,4] || M[i,]>= guide2[,4] -SML.class[i,flag]=3 -}} -leg1='0=At most small, 1=between S and M, 2=between M and L, 3=greater than L' -list(ES.REL.MAG.G1.less.than.G2=guide1, ES.REL.MAG.G1.greater.than.G2=guide2,Est=M,legend.4.SML.class=leg1,SML.class=SML.class) -} - -ancDEP.MULC.ES<-function(x1,y1,y2,fr1=1.5,fr2=1.5,tr=.2,pts=NULL,xout=FALSE,outfun=outpro,cov.fun=skip.cov,...){ -# -# -# Dependent groups -# For two or more covariates, estimate effect sizes for -# a collection of points. -# -# That is, for each point of interest, determine -# a cloud of points close to it and based on the -# corresponding y values, compute measures of effect size -# -# If pts=NULL -# the significant points returned by -# ancdetM4 are used -# -library(MASS) -x2=x1 -p=ncol(x1) -if(p<2)stop('This function is for two or more covariates') -p1=p+1 -#if(ncol(x2)!=p)stop('x1 and x2 do not have the same number of columns') -xy=elimna(cbind(x1,y1)) -x1=xy[,1:p] -y1=xy[,p1] -xy=elimna(cbind(x2,y2)) -x2=xy[,1:p] -y2=xy[,p1] -if(min(length(y1),length(y2))<50)stop('The minimum sample size must be greater than or equal to 50') -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag,] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag,] -y2<-y2[flag] -} -guide1=NA -guide2=NA -x1=as.matrix(x1) -x2=as.matrix(x2) -a=Dancovamp(x1=x1,y1=y1,x2=x2,y2=y2,fr1=fr1,fr2=fr2,tr=tr,pts=pts,...) -if(is.null(pts))pts=a$pts -M=NA -SML.class=NA -if(!is.null(pts)){ -if(is.vector(pts))pts=matrix(pts,nrow=1) -nr=nrow(pts) -M=matrix(NA,nr,4) -SML.class=matrix(0,nr,4) -m1<-cov.fun(x1) -m2<-cov.fun(x2) -for(i in 1:nr){ -id1=near3d(x1,pts[i,],fr1,m1) -id2=near3d(x2,pts[i,],fr2,m2) -if(sum(id1)<10 ||sum(id2)<10)print(paste('For point',j,'not enough nearest neighbors')) -if(sum(id1)>=10 & sum(id2)>10){ -ES=dep.ES.summary(y1[id1],y2[id2]) -M[i,]=ES[,2] -}} -dimnames(M)=list(NULL,names(ES[,1])) -dum1=rnorm(50) -dum2=rnorm(50) -guide1=dep.ES.summary(dum1,dum2+3)[,-2] -guide2=dep.ES.summary(dum1,dum2-3)[,-2] -} -if(!is.na(M[1])){ -for(i in 1:nr){ -flag=M[i,] <= guide1[,2] || M[i,]>= guide2[,2] -SML.class[i,flag]=1 -flag=M[i,] <= guide1[,3] || M[i,]>= guide2[,3] -SML.class[i,flag]=2 -flag=M[i,] <= guide1[,4] || M[i,]>= guide2[,4] -SML.class[i,flag]=3 -}} -leg1='0=At most small, 1=between S and M, 2=between M and L, 3=greater than L' -list(ES.REL.MAG.G1.less.than.G2=guide1, ES.REL.MAG.G1.greater.than.G2=guide2,Est=M,legend.4.SML.class=leg1,SML.class=SML.class) -} - -rmESPRO.null<-function(n,J,est=tmean,nboot=2000,SEED=TRUE,...){ - # - # Determine null distribution - # for rmES.pro - # - if(SEED)set.seed(2) - v=NA - for(i in 1:nboot){ - x=rmul(n,J,g=0,h=0,rho=0) -E=apply(x,2,est,...) - GM=mean(E) - GMvec=rep(GM,J) - DN=pdis(x,GMvec,center=E) -# DN=pdis(x,E,center=GMvec) -v[i]=DN -} -v -} - - -rmES.dif.pro<-function(x,est=tmean,...){ -# -# Global measure of effect size, -# based on difference scores, -# relative to the null distribution -# -if(is.list(x))x=matl(x) -x=elimna(x) -n=nrow(x) -n1=n+1 -J=ncol(x) -ALL=(J^2-J)/2 -M=matrix(NA,nrow=n,ncol=ALL) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j0)PCD.ci=binom.conf(PCD,PMD,pr=FALSE)$ci -PCD=PCD/max(PMD,1) -PMD=PMD/iter -list(PMD=PMD,PMD.ci=PMD.ci,PCD=PCD,PCD.ci=PCD.ci) -} - -RMPB.PMD.PCD<-function(x,est=tmean,delta=.5,alpha=.05,iter=500,nboot=1000,SEED=TRUE,...){ -# -# -# Use an indifference zone. Given x -# determine the -# probability of making a decision and the probability of -# of correct decision given that a decision is made -# within the context of an indifference zone -# -if(SEED)set.seed(2) -if(is.list(x))stop('x should be a matrix or a data frame') -PMD=0 -PCD=0 -x=elimna(x) -n=nrow(x) -E=apply(x,2,est,...) -ID=which(E==max(E)) -# -A=cov(x) -J=ncol(x) -# -# Now simulate indifference zone. -for(i in 1:iter){ -x=mvrnorm(n,mu=rep(0,J),Sigma=A) -x[,1]=x[,1]+delta*sqrt(A[ID,ID]) -e=apply(x,2,est,...) -id=which(e==max(e)) -a=rmanc.best.PB(x,est=est,nboot=nboot,SEED=FALSE,...) -if(sum(a$p.value<=alpha)){ -PMD=PMD+1 -if(id==1)PCD=PCD+1 -}} -PMD.ci=binom.conf(PMD,iter,pr=FALSE)$ci -PCD.ci=binom.conf(PCD,PMD,pr=FALSE)$ci -PCD=PCD/max(PMD,1) -PMD=PMD/iter -list(PMD=PMD,PMD.ci=PMD.ci,PCD=PCD,PCD.ci=PCD.ci) -} - - -ID.sm.varPB<-function(x,var.fun=winvar,nboot=500,NARM=FALSE,na.rm=TRUE,SEED=TRUE,...){ -# -# -# Strategy: suppose group 2 has the lowest estimate. -# Generate a bootstrap sample and determine whether -# the lowest bootstrap estimate corresponds to group 2. -# Repeat nboot times and let P denote the proportion of times group 2 has the lowest estimate -# Make a decision if this proportion is sufficiently high. -# P yields a pseudo p-value -# -# -x=elimna(x) -J=ncol(x) -chk=0 -if(is.list(x))x<-matl(x) -if(NARM)x=elimna(x) -e=apply(x,2,var.fun,...) -id=which(e==min(e)) -n=nrow(x) -J=length(x) -for(i in 1:nboot){ -isam=sample(n,replace=TRUE) -b=apply(x[isam,],2,var.fun,na.rm=na.rm,...) -ichk=which(b==min(b)) -if(id==ichk)chk=chk+1 -} -pv=chk/nboot -pv=2*min(pv,1-pv) -list(n=n,Est=e,p.value=pv) -} - -bin.best.DO<-function(x,n){ -# -# Determine whether it is reasonable to -# decide which group has largest probability of success -# -# x= vector number of successes -# n=sample sizes -# -chk=0 -e=x/n -J=length(x) -id=which(e==max(e))[1] -CON=conCON(J,id)$conCON -a=lincon.bin(x,n,con=CON) -pv=max(a$CI[,4]) -list(Est.=e,p.value=pv) -} - - -rplotCIsmm<-function(x,y,tr=.2,fr=.5,plotit=TRUE,scat=TRUE,pyhat=FALSE,SEED=TRUE, -dfmin=2,pts=NULL,npts=25,nmin=12, -eout=FALSE,xout=FALSE,xlab='x',ylab='y',outfun=out,LP=TRUE,MID=TRUE,alpha=.05,pch='.',...){ -# -# Confidence interval for running interval smoother based on a trimmed mean. -# -# rplotCI will provide shorter and more accurate confidence intervals but -# is limited to 10 or 25 points and alpha=.05. -# This functions returns confidence intervals that are generally a bit wider -# but it has low execution time if alpha differs from 0.5 or there is interest -# using something other than 10 or 25 points. -# -# Unlike rplot,a confidence band based on the Studentized maximum modulus dist -# is computed, -# unless alpha is not equal to .05 or the number of confidence intervals -# is greater than npts=28, in which case the distribution of the max of npts -# random variables is used. -# -# LP=TRUE, the plot is further smoothed via lowess -# -# fr controls amount of smoothing -# -xord=order(x) -x=x[xord] -y=y[xord] -if(!is.null(pts))pts=sort(pts) -str=rplot(x,y,tr=tr,xout=xout,plotit=FALSE,LP=LP,fr=fr,pr=FALSE)$Strength.Assoc -m<-cbind(x,y) -if(ncol(m)>2)stop('To get a smooth with more than one covariate, use rplot') -m<-elimna(m) -nv=nrow(m) -if(eout && xout)stop('Not allowed to have eout=xout=T') -if(eout){ -flag<-outfun(m,plotit=FALSE)$keep -m<-m[flag,] -} -if(xout){ -flag<-outfun(m[,1])$keep -m<-m[flag,] -} -if(is.null(pts)){ -res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr,nmin=nmin)$output -pts=seq(res1[1,1],res1[5,1],length.out=npts) -} -x=m[,1] -y=m[,2] -n.keep=length(y) -if(is.null(pts)){ -if(!MID)pts=seq(min(x),max(x),length.out=npts) -vv=idealf(x) -if(MID)pts=seq(vv$ql,vv$qu,length.out=npts) -} -rmd=NA -for(i in 1:length(pts))rmd[i]<-mean(y[near(x,pts[i],fr)],tr=tr) -sedf=runse(x,y,fr=fr,tr=tr,pts=pts,SEED=SEED) -df=sedf$df -flag=df>dfmin -se=sedf$se -ntest=length(df[flag]) -mdif=min(df[flag]) -crit=NA -dfval=df[flag] -for(it in 1:ntest)crit[it]=qsmm(1-alpha,ntest,dfval[it]) -low=rmd[flag]-crit*se[flag] -up=rmd[flag]+crit*se[flag] -ptsall=pts -rmdall=rmd -rmd=rmd[flag] -pts=pts[flag] -if(plotit){ -ord=order(x) -x=x[ord] -y=y[ord] -if(LP){ -rmd=lplot(pts,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -up=lplot(pts,up,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -low=lplot(pts,low,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat -} -plot(c(x,pts),c(y,rmd),xlab=xlab,ylab=ylab,type='n') -if(scat)points(x,y,pch=pch) -lines(pts,up,lty=2) -lines(pts,low,lty=2) -lines(pts,rmd) -} -if(pyhat){output<-cbind(pts,rmd,low,up) -dimnames(output)=list(NULL,c('pts','y.hat','ci.low','ci.up')) -} -if(!pyhat)output<-'Done' -list(output=output,str=str,n=nv,n.keep=n.keep) -} - -corREG.best<-function(x,y,corfun=wincor,alpha=.05,nboot=500, neg.col=NULL,LARGEST=TRUE, SEED=TRUE,MC=FALSE,xout=FALSE,outfun=outpro,...){ -# -# Can a decision be made about which IV -# has the strongest correlation with the DV -# Winsorized correlation is used by default. -# -# x is assumed to be a matrix -# -# -if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') -p=ncol(x) -p1=p+1 -m1=cbind(x,y) -m1<-elimna(m1) # Eliminate rows with missing values -nval=nrow(m1) -x<-m1[,1:p] -y=m1[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x=neg.colM(x,neg.col) -est=NA -for(j in 1:p)est[j]=corfun(x[,j],y)$cor -if(LARGEST)ID=which(est==max(est)) -if(!LARGEST)ID=which(est==min(est)) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -# -# If you use corfun=scor, set plotit=F -# -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -data=listm(t(data)) -if(MC){ -library(parallel) -bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun) -} -IB=NA -if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun) -for(i in 1:nboot){ -if(LARGEST)IB[i]=which(bvec[[i]]==max(bvec[[i]])) -if(!LARGEST)IB[i]=which(bvec[[i]]==min(bvec[[i]])) -} -PC=mean(IB==ID) -PC=2*min(PC,1-PC) -list(Est.=est,p.value=PC) -} - -corREG.best.DO=corREG.best - -rung3hatCI<-function(x,y,pts=x,tr=.2,alpha=.05,fr=1,nmin=12,ADJ=FALSE,iter=1000,...){ -# -# Compute y hat for each row of data in the matrix pts -# use a running interval smoother to compute a confidence interval for trimmed mean of Y given X -# -# fr controls amount of smoothing -# tr is the amount of trimming -# x is an n by p matrix of predictors. -# pts is an m by p matrix, m>=1. -# -oldSeed <- .Random.seed -set.seed(12) # So get consistent results from near3d -if(ADJ)alpha=rung3hat.pcrit(x,pts=pts,tr=tr,nmin=nmin,fr=fr,iter=iter) -x=as.matrix(x) -p=ncol(x) -pts=as.matrix(pts) -library(MASS) -m<-cov.mve(x) -rmd<-1 # Initialize rmd -nval<-1 -ci=matrix(NA,nrow=nrow(pts),ncol=2) -x.used=matrix(NA,nrow=nrow(pts),ncol=p) -for(i in 1:nrow(pts)){ -flag=near3d(x,pts[i,],fr,m) -rmd[i]<-mean(y[flag],tr) -nval[i]<-length(y[flag]) -if(nval[i]>nmin){ -ci[i,]=trimci(y[flag],tr=tr,alpha=alpha,pr=FALSE)$ci -x.used[i,]=pts[i,] -} -} -flag=!is.na(x.used[,1]) -x.used=x.used[flag,] -rmd=rmd[flag] -nval=nval[flag] -ci=ci[flag,] -output=cbind(nval,rmd,ci) -dimnames(output)=list(NULL,c('n','Est.','ci.low','ci.up')) -assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) -list(pts.used=x.used,output=output,alpha.used=alpha) -} - -rung3hat.pcrit<-function(x,pts=x,alpha=.05,iter=1000,tr=.2,fr=1,nmin=12,...){ -# -# Compute critical p-value for rung3hatCI. -# -x=as.matrix(x) -n=nrow(x) -pts=as.matrix(pts) -pvdist=NA -m<-cov.mve(x) -for(i in 1:iter){ -y=rnorm(n) -a=rung3hat.sub(x,y,pts=pts,m=m,tr=tr,fr=fr,nmin=nmin) -pvdist[i]=min(a,na.rm=TRUE) -} -pc=hd(pvdist,alpha) -pc -} - - -rung3hat.sub<-function(x,y,pts,m,tr=.2,fr,nmin){ -pv=NA -for(i in 1:nrow(pts)){ -flag=near3d(x,pts[i,],fr,m) -if(sum(flag)>nmin)pv[i]=trimci(y[flag],tr=tr,pr=FALSE)$p.value -} -pv -} - -sm.vs.sm<-function(x,y,method1='RUN',method2='RF',xout=FALSE,outfun=outpro,xlab='Est1', -ylab='Est2',pch='.',pr=TRUE,xoutL=FALSE,...){ -# -# If the smoothers give similar results, the plot returned here should be -# tightly clustered around a line having slope=1 and intercept=0, indicated -# by a dashed line. -# -# if(!xoutL)print('Suggest also looking at result using xoutL=TRUE) -e1=smpred(x,y,method=method1,xout=xout,outfun=outfun,...) -e2=smpred(x,y,method=method2,xout=xout,outfun=outfun,...) -lplot(e1,e2,xlab=xlab,ylab=ylab,pc=pch,xout=xoutL,pr=FALSE) -abline(0,1,lty=2) -} - -best.DO<-function(x,tr=.2,...){ -# -# Determine whether it is reasonable to -# decide which group has largest measure of location -# -# -chk=0 -if(is.matrix(x)||is.data.frame(x))x<-listm(x) -x=elimna(x) -J=length(x) -e=lapply(x,tmean,tr) -e=pool.a.list(e) -id=which(e==max(e)) -CON=conCON(J,id)$conCON -a=lincon(x,con=CON,pr=FALSE) -pv=max(a$psihat[,5]) -list(Est.=e,p.value=pv) -} - -rmanc.best.DO<-function(x,tr=.2,...){ -# -# Determine whether it is reasonable to -# decide which group has largest measure of location -# -# -if(is.list(x))x=matl(x) -x=elimna(x) -x<-listm(x) -J=length(x) -e=lapply(x,tmean,tr) -e=pool.a.list(e) -id=which(e==max(e)) -id=id[1] -e=lapply(x,tmean,tr) -e=pool.a.list(e) -id=which(e==max(e)) -CON=conCON(J,id)$conCON -a=rmmcp(x,con=CON,dif=FALSE,tr=tr) -pv=max(a$test[,3]) -list(Best.Group=id,Est.=e,p.value=pv) -} - - - - - -rmbestPB.DO<-function(x,est=tmean,nboot=NA,SEED=TRUE,...){ -# -# Determine whether it is reasonable to -# decide which group has largest measure of location -# -# -if(is.list(x))x=matl(x) -x=elimna(x) -x<-listm(x) -J=length(x) -e=lapply(x,est,...) -e=pool.a.list(e) -id=which(e==max(e)) -CON=conCON(J,id)$conCON -a=rmmcppb(x,con=CON,dif=FALSE,est=est,nboot=nboot,SEED=SEED,pr=FALSE,...) -pv=max(a$output[,3]) -list(Est.=e,p.value=pv,con=CON) -} - - - -rmanc.bestPB<-function(x,alpha=.05,est=tmean,iter=5000,SEED=TRUE,nboot=2000,PB=FALSE,...){ -# -# -# For J dependent groups, -# identify the group with largest trimmed mean -# Make a decision if every p.value<=p.crit -# -# p.crit is determined via -# a simulation to determine the null distribution based on -# iter=5000 replications. -# -# PB=FALSE: Determine critical values via rmanc.best.crit. Faster execution time but can differ somewhat -# from values based on PB method -# -# -# Returns: -# Best='No Decision' if not significant -# Best= the group with largest measure of location if a decision can be made. -# -# Confidence intervals having simultaneous probability coverage 1-alpha -# using the adjusted level. -# -if(is.list(x))x=matl(x) -x=elimna(x) -flag=TRUE -J=ncol(x) -if(J<3)stop('Should have 3 or more groups') -Jm1=J-1 -est=apply(x,2,tmean,tr=tr) -n=nrow(x) -est=matl(est) -R=order(est,decreasing = TRUE) -pvec=NA -if(!PB)p.crit=rmanc.best.crit(x,iter=iter,alpha=alpha,SEED=SEED) -if(PB)p.crit=rmanc.best.critPB(x,iter=iter,alpha=alpha,SEED=SEED) -output<-matrix(NA,Jm1,8) -dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) -for(i in 2:J){ -im1=i-1 -a=yuend(x[,R[1]],x[,R[i]],alpha=p.crit[im1],tr=tr) -pvec[im1]=a$p.value -output[im1,]=c(a$est1,R[[i]],a$est2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) -} -Best='No Decisions' -flag=sum(output[,7]<=output[,8]) -id=output[,7]<=output[,8] -if(sum(id>0))Best=output[id,2] -if(flag==Jm1)Best='All' -setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) -put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) -put -} - -cell.com<-function(x,i=1,j=2,alpha=.05,AUTO=TRUE,method='AC',data=NULL){ -# -# For a multinomial distribution, compuate a confidence interval -# for p_i-p_j, the difference between the probabilities asscoiated with cells i and j -# -# x= cell frequencies -# -if(!is.null(data))x=splot(data)$frequencies -n=sum(x) -c1=binom.conf(x[i],n,AUTO=AUTO,method=method,alpha=alpha,pr=FALSE)$ci -c2=binom.conf(x[j],n,AUTO=AUTO,method=method,alpha=alpha,pr=FALSE)$ci -p1=x[i]/n -p2=x[j]/n -COR=0-sqrt(p1*p1/((1-p1)*(1-p2))) -T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) -T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) -T2=max(c(0,T2)) -T1=max(c(0,T1)) -L=p1-p2-sqrt(T1) -U=p1-p2+sqrt(T2) -list(cells.compared=c(i,j),dif=p1-p2,Estimates=x/n,ci=c(L,U)) -} - - -cell.com.pv<-function(x,i=1,j=2,method='AC',data=NULL){ -# -# For a multinomial distribution, compute a confidence interval -# for p_i-p_j, the difference between the probabilities asscoiated with cells i and j -# -# x= cell frequencies -# -if(!is.null(data))x=splot(data)$frequencies -n=sum(x) -p1=x[i]/n -p2=x[j]/n -COR=0-sqrt(p1*p1/((1-p1)*(1-p2))) -a=seq(.001,.1,.001) -a=c(a,seq(.1,.99,.01)) -a=rev(a) - -if(x[i]==x[j])pv=1 -else{ -for(k in 1:length(a)){ -c2=acbinomci(x[j],n,alpha=a[k])$ci -c1=acbinomci(x[i],n,alpha=a[k])$ci -T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) -T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) -T2=max(c(0,T2)) -T1=max(c(0,T1)) -L=p1-p2-sqrt(T1) -U=p1-p2+sqrt(T2) -pv=a[k] -if(sign(L*U)<0)break -}} -if(n<=35){ -if(x[i]==x[j])pvnew=1 -else{ -pv.up=pv+.1 -anew=seq(pv,pv.up,.01) -for(k in 1:length(anew)){ -c1=binom.conf(x[i],n,AUTO=TRUE,method=method,alpha=anew[k],pr=FALSE)$ci -c2=binom.conf(x[j],n,AUTO=TRUE,method=method,alpha=anew[k],pr=FALSE)$ci -T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) -T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) -T2=max(c(0,T2)) -T1=max(c(0,T1)) -L=p1-p2-sqrt(T1) -U=p1-p2+sqrt(T2) -pvnew=anew[k] -if(sign(L*U)>0)break -}} -pv=pvnew -} -pv -} - - -TM<-function(x,bend=1.28){ - - ## x has list mode - ## Computes TM test statistic. - ## "mestse" is used as the standard error of one-step M-estimator and - ## "mad" is used as a measure of scale. - - X<-lapply(x,na.omit) - f1<-function(t){length(t[abs((t-median(t))/mad(t))>bend])} - alist<-X - f<-(sapply(alist,length))-(sapply(alist,f1)) - s=sapply(alist,mestse)^2 - wden=sum(1/s) - w=(1/s)/wden - xplus<-sum(w*(sapply(alist,onestep))) - tt<-((sapply(alist,onestep))-xplus)/sqrt(s) - TM<-sum(tt^2) - list(TM=TM) -} - -boot.TM<-function(x,nboot=599,alpha=.05,SEED=TRUE){ -# -# Global test for equal M-measures of location, J independent groups -# -# This is method TM in 5th Ed of Intro to Robust Estimation and Testing -# -if(SEED)set.seed(2) -B=nboot - if(is.matrix(x) || is.data.frame(x))xlist=listm(x) - else xlist=x - xlist=elimna(xlist) - T.test<-TM(xlist)$TM - k<-length(xlist) - ylist<-vector(mode="list",length=k) - TT<-numeric(B) - b<-floor((1-alpha)*B) - onesteps<-sapply(xlist,onestep) - for (i in 1:B){ - j<-1 - repeat { - ylist[[j]]<-(sample(xlist[[j]],length(xlist[[j]]),replace=T)-onesteps[j]) - if (mad(ylist[[j]])>0) j<-j+1 #MAD must be greater than zero for every bootstrap sample - if (j>k)break - } - TT[i]<-TM(ylist,alpha)$TM - } - TT=sort(TT) - if(T.test>=TT[b]){1} else{0} -pv=mean(T.test<=TT) -list(Est.=onesteps,p.value=pv) -} - - - -cat.dat.ci<-function(x,alpha=.05){ -# -# x is assumed to be discrete with a relatively small -# sample space. -# For each oberved value, x, compute a confidence interval -# for the probability that x occurs -# -x=elimna(x) -n=length(x) -v=unique(x) -v=sort(v) -N=length(v) -M=matrix(NA,nrow=N,ncol=4) -for(i in 1:N){ -M[i,1]=v[i] -z=sum(x==v[i]) -a=binom.conf(z,n,pr=FALSE) -M[i,2]=a$phat -M[i,3:4]=a$ci -} -dimnames(M)=list(NULL,c('x','Est.','ci.low','ci.up')) -list(output=M) -} - -smvar.DO<-function(x,est=winsd,nboot=1000,SEED=TRUE,pr=TRUE,...){ -# -# For J independent groups. -# Determine whether it is reasonable to -# decide which group has smallest robust measure of variation -# -# Default is the Winsorized standard deviation -# -if(is.matrix(x)||is.data.frame(x))x<-listm(x) -J=length(x) -e=lapply(x,est,...) -e=pool.a.list(e) -id=which(e==min(e)) -id=id[1] -e=lapply(x,est,...) -e=pool.a.list(e) -pv=NA -CON=conCON(J,id)$conCON -a=linconpb(x,con=CON,est=est,nboot=nboot,SEED=SEED,...)$output[,3] -pv=max(a) -list(Group.Smallest=id,Est.=e,p.value=pv) -} - -manES<-function(x1,x2,method=NULL,pro.p=0.8,nboot=100,...){ -# -# Estimate probability of a correct classification -# for two independent groups having -# unknown multivariate distributions -# -# The function estimates misclassification rates using -# techniques indicated by the argument -# method. -# method=NULL means that methods 'KNN','DIS','DEP','SVM','RF','NN','PRO','LSM','GBT' -# are used See function CLASS.fun -# -# The lowest value is used as the -# estimate or a correct classification. -# -if(is.null(method))method=c('KNN','DIS','DEP','SVM','RF','NN','PRO','LSM','GBT') -if(method[1]=='ALL')method=NULL -a=class.error.com(x1,x2,method=method,pro.p=pro.p,nboot=nboot,...) -IOR=order(a$Error.rates[1,]) -e=1-min(a$Error.rates[1,]) -LAB=dimnames(a$Error.rates)[[2]][IOR[1]] -list(Method.Used=LAB,Prob.Correct.Decision=e) -} - -MCWB<-function(x,tr=.2,alpha=.05,SEED=TRUE,REPS=5000,...){ -# -# J independent groups -# Multiple comparisons with the best based on trimmed means -# Control FWE when all J have a common trimmed mean. -# -# -chk=0 -if(is.matrix(x)||is.data.frame(x))x<-listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -e=lapply(x,tmean,tr) -e=pool.a.list(e) -n=pool.a.list(lapply(x,length)) -id=which(e==max(e)) -CON=conCON(J,id)$conCON -pcrit=MCWB.crit(n=n,alpha=alpha,SEED=SEED,REPS=REPS,...) -a=lincon(x,con=CON,pr=FALSE) -numsig=sum(a$psihat[,5]<=pcrit) -list(n=a$n,tests=a$test,psihat=a$psihat,con=CON, -Best.Group=id,Est.=e,IND.p.values=a$psihat[,5],p.crit=pcrit, -num.sig=numsig) -} - -MCWB.crit<-function(n,alpha,SEED=TRUE,REPS=5000,...){ -if(SEED)set.seed(3) -J=length(n) -z=list() -REM=NA -for(i in 1:REPS){ -for(j in 1:J)z[[j]]=rnorm(n[j]) -e=lapply(z,tmean,tr) -e=pool.a.list(e) -id=which(e==max(e)) -CON=conCON(J,id)$conCON -a=lincon(z,con=CON,pr=FALSE) -REM[i]=min(a$psihat[,5]) -} -hd(REM,alpha) -} - -ESprodis<-function(x,est=tmean,REP=10,DIF=FALSE,SEED=TRUE,...){ -# -# Independent groups. -# Compute an effect size based on projection distances -# -if(SEED)set.seed(2) -if(is.matrix(x))x=listm(x) -J=length(x) -n=pool.a.list(lapply(x,length)) -nmin=min(n) -V=var(n) -if(V==0)E=ESprodis.EQ(x,est=est,DIF=DIF,REP=REP,...) -if(V!=0){ -E=NA -XS=list() -for(i in 1:REP){ -for(j in 1:J)XS[[j]]=sample(x[[j]],nmin) - -E[i]=ESprodis.EQ(XS,est=est,DIF=DIF,...) -} -E=mean(E) -} -E -} - - -ESprodis.EQ<-function(x,est=tmean,REP=1,DIF=TRUE,iter=1,...){ -# -# Independent groups. -# Compute an effect size based on projection distances -# Equal sample sizes -# -if(is.matrix(x))x=listm(x) -J=length(x) -n=pool.a.list(lapply(x,length)) -nord=order(n) -nmin=n[nord[1]] -XS=list() -E=NA -for(k in 1:REP){ -for(j in 1:J)XS[[j]]=sample(x[[j]]) -if(!DIF)E[k]=rmES.pro(XS,est=est,iter=iter,...)$effect.size -if(DIF)E[k]=rmES.dif.pro(XS,est=est,...) -} -E=mean(E) -E -} - - - -PcorREG.best.DO<-function(x,y,neg.col=NULL, -LARGEST=TRUE,xout=FALSE,outfun=outpro,...){ -# -# Can a decision be made about which IV -# has the strongest Pearson correlation with the DV -# -# x is assumed to be a matrix or data frame -# -# -if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') -p=ncol(x) -p1=p+1 -pm1=p-1 -m1=cbind(x,y) -m1<-elimna(m1) # Eliminate rows with missing values -nval=nrow(m1) -x<-m1[,1:p] -y=m1[,p1] -if(xout){ -m<-cbind(x,y) -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -x=neg.colM(x,neg.col) -est=NA -for(j in 1:p)est[j]=cor(x[,j],y) -if(LARGEST)ID=which(est==max(est)) -if(!LARGEST)ID=which(est==min(est)) -a=matrix(NA,nrow=pm1,ncol=7) -dimnames(a)=list(NULL,c('Best.IV','IV','Est.best','Est','dif','ci.low','ci.up')) -ic=0 -for(j in 1:p){ -if(j!=ID){ -ic=ic+1 -b=TWOpov(x[,c(ID,j)],y) -a[ic,]=c(ID,j,b$est.rho1,b$est.rho2,b$dif,b$ci[1],b$ci[2]) -}} -chk=sign(a[,6]*a[,7]) -D='No Decision' -if(sum(chk)==pm1)D=paste('Decide IV',ID,' is best') -list(output=a, Result=D) -} - -two.dep.pb<-function(x,y=NULL,alpha=.05,est=tmean,plotit=FALSE,dif=TRUE, -nboot=NA,xlab='Group 1',ylab='Group 2',pr=TRUE,SEED=TRUE,...){ -# -# Two dependent groups -# Compare measures of location via a percentile bootstrap. -# Trimmed mean used by default. -# -# Calls rmmcppb, provided for convenience -# nboot, number of bootstrap samples defaults to 1000 -# -if(pr){ -if(dif)print('dif=TRUE, difference scores were used') -if(!dif)print('dif=FALSE, marginal trimmed means were used') -} -if(is.null(y)){ -if(ncol(x)!=2)stop('y is null so x should have two columns') -} -if(!is.null(y)){ -xy=cbind(x,y) -xy=elimna(xy) -x=xy[,1] -y=xy[,2] -} -e=apply(cbind(x,y),2,est,...) -a=rmmcppb(x,y,est=est,nboot=nboot,alpha=alpha,SR=FALSE,SEED=SEED, -plotit=plotit,dif=dif,BA=FALSE,pr=FALSE,...)$output -if(!dif){ -output=matrix(c(e[1],e[2],a[1,2],a[1,3],a[1,5],a[1,6]),nrow=1) -dimnames(output)=list(NULL,c('Est.1','Est.2','Est.dif','p.value','ci.lower','ci.upper')) -} -if(dif){ -output=matrix(c(a[1,2],a[1,3],a[1,5],a[1,6]),nrow=1) -dimnames(output)=list(NULL,c('Est.typical.dif','p.value','ci.lower','ci.upper')) -} -output -} - -bmean<-function(x,na.rm=TRUE){ -# -# Compute a skipped estimator of location. -# where outliers are flagged based on a boxplot rule -# -if(na.rm)x<-x[!is.na(x)] #Remove missing values -flag<-outbox(x)$keep -es<-mean(x[flag]) -es -} - -zwe<-function(x,k=3,C=0.2){ -# -# Zuo's (2010) weighted estimator -# -x=elimna(x) -SD=abs((x-median(x)))/mad(x,constant=1) -D=1/(SD+1) -n=length(x) -IDGE =rep(0,n) -flag=D >= C -IDGE[flag]=1 -IDLT=rep(0,n) -flag=D0.5 || a$lims[iu,1]<0.5)break -} -A=wmw.bca(x,y,alpha=alpha,nboot=nboot,SEED=SEED) -list(n1=n1,n2=n2,phat=est$phat,ci.low=A$ci.low,ci.upper=A$ci.upper,p.value=pv) -} - - -wmw.bca<-function(x,y,alpha=.05,nboot=1000,SEED=TRUE,...){ -# -# BCA confidence interval for P(X0. || a$lims[iu,1]<0.)break -} -A=cor.skip.com(x,y,corfun=corfun,outfun=outpro,alpha=.05,nboot=nboot,SEED=SEED,...) -ci=c(a$lims[1,1],a$lims[3,1]) -list(n=nrow(m),Est1=est1,Est2=est2,difference=dif,ci.low=A[1],ci.upper=A[2],p.value=pv) -} - -runbin.CI<-function(x,y,pts=NULL,fr=1.2,xout=FALSE,outfun=outpro){ -# -# Based on running interval smoother, for each point in pts, compute a confidence -# interval for probability of success based on the nearest neighbors -# -xx<-cbind(x,y) -xx<-elimna(xx) -n=nrow(xx) -p1=ncol(xx) -p=p1-1 -x=xx[,1:p] -y=xx[,p1] -if(is.null(pts))pts=x -pts=unique(pts) -pts=as.matrix(pts) -x=as.matrix(x) -if(p>1)m=cov.mve(x) -npts=nrow(pts) -output=matrix(NA,npts,5) -dimnames(output)=list(NULL,c('n','pts.no','Est','ci.low','ci.upper')) -for(i in 1:npts){ -if(p==1)Z=y[near(x[,1],pts[i,],fr)] -if(p>1)Z=y[near3d(x,pts[i,],fr,m)] -if(length(Z)>5){ -a=binom.conf(sum(Z),length(Z),pr=FALSE) -output[i,3]=a$phat -output[i,2]=i -output[i,1]=a$n -output[i,4]=a$ci[1] -output[i,5]=a$ci[2] -}} -list(points=pts,output=output) -} - - -wmw.RZR<-function(x,y,nboot=1000,SEED=TRUE){ -# -# Perform the Reiczigel et al. (2005) improvement of of the -# Wilcoxon--Mann--Whitney test -# -if(SEED)set.seed(2) -val=0 -n1=length(x) -n2=length(y) -xy=rank(c(x,y)) -N=n1+n2 -n1p1=n1+1 -a=yuen(xy[1:n1],xy[n1p1:N],tr=0)$teststat -#print(a) -LOC=loc2dif(x,y) -x=x-a -y=y-a -#print(yuen(x,x2,tr=0)) -bval=0 -for(i in 1:nboot){ -z1=sample(x,n1,replace=TRUE) -z2=sample(y,n2,replace=TRUE) -XY=rank(c(z1,z2)) -bval[i]=yuen(XY[1:n1],XY[n1p1:N],tr=0)$teststat -} -#print(bval[1:10]) -pv1=mean(a>bval) -pv2=mean(a8)print('No adjustment available when J>8') -else es=fix[J1]*es -} -es -} - - -KMS.inter.pbci<-function(x,tr=.2,alpha=.05,nboot=1000,SEED=TRUE,SW=FALSE){ -# -# For a 2-by-2 design, compare -# explanatory power associated with the two levels of the first factor -# -# SW=TRUE, switches rows and column - -if(SEED)set.seed(2) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(length(x)!=4)stop('Should have four groups exactly') -for(j in 1:4)x[[j]]=elimna(x[[j]]) -if(SW)x=x[c(1,3,2,4)] -v=list() -dif=NA -for(i in 1:nboot){ -for(j in 1:4)v[[j]]=sample(x[[j]],replace=TRUE) -a1=kms.effect(v[[1]],v[[2]],tr=tr)$effect.size -a2=kms.effect(v[[3]],v[[4]],tr=tr)$effect.size -dif[i]=a1-a2 -} -dif=sort(dif) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=dif[ilow] -ci[2]=dif[ihi] -pv=mean(dif<0)+.5*mean(dif==0) -pv=2*min(pv,1-pv) -a1=kms.effect(x[[1]],x[[2]],tr=tr)$effect.size -a2=kms.effect(x[[3]],x[[4]],tr=tr)$effect.size -Dif=a1-a2 -list(Est.1=a1, Est.2=a2,Dif=Dif,ci=ci,p.value=pv) -} - -KMSinter.mcp<-function(J,K,x,tr=.2,alpha=.05,nboot=999,SEED=TRUE,SW=FALSE){ -# -# Interactions based on KMS measure of effect size -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -con=con2way(J,K)$conAB -if(SW){ -JK=J*K -M=matrix(c(1:JK),nrow=J,byrow=TRUE) -M=as.vector(M) -x=x[M] -con=con2way(K,J)$conAB -} -num=ncol(con) -CON=matrix(NA,nrow=num,ncol=8) -dimnames(CON)=list(NULL,c('Con.num','Est.1','Est.2','Dif','ci.low','ci.up','p.value','p.adjusted')) -#CON=list() -for(j in 1:ncol(con)){ -id=which(con[,j]!=0) -dat=x[id] -temp=pool.a.list(KMS.inter.pbci(dat,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED)) -CON[j,1]=j -CON[j,2:7]=temp -} -CON[,8]=p.adjust(CON[,7],method='hoch') -list(CON=CON,con=con) -} - -QS.inter.pbci<-function(x,locfun=median,alpha=.05,nboot=1000,SEED=TRUE,SW=FALSE){ -# -# For a 2-by-2 design, characterize an interaction -# in terms of a quantile shift measure of effect size -# -# SW=TRUE, switches rows and column -# -if(SEED)set.seed(2) -if(is.matrix(x))x=listm(x) -if(length(x)!=4)stop('There should be exactly four groups') -for(j in 1:4)x[[j]]=elimna(x[[j]]) -if(SW)x=x[c(1,3,2,4)] -v=list() -dif=NA -a1=NA -a2=NA -for(i in 1:nboot){ -for(j in 1:4)v[[j]]=sample(x[[j]],replace=TRUE) -a1[i]=shiftQS(v[[1]],v[[2]],locfun=locfun)$Q.Effect -a2[i]=shiftQS(v[[3]],v[[4]],locfun=locfun)$Q.Effect -} -dif=a1-a2 -dif=sort(dif) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=dif[ilow] -ci[2]=dif[ihi] -pv=mean(dif<0)+.5*mean(dif==0) -pv=2*min(pv,1-pv) -a1=shiftQS(x[[1]],x[[2]],locfun=locfun)$Q.Effect -a2=shiftQS(x[[3]],x[[4]],locfun=locfun)$Q.Effect -Dif=a1-a2 -list(Est.1=a1, Est.2=a2,Dif=Dif,ci=ci,p.value=pv) -} - - -QSinter.mcp<-function(J,K,x,alpha=.05,nboot=999,SEED=TRUE,SW=FALSE){ -# -# Interactions based on measure of effect size -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -con=con2way(J,K)$conAB -if(SW){ -JK=J*K -M=matrix(c(1:JK),nrow=J,byrow=TRUE) -M=as.vector(M) -x=x[M] -con=con2way(K,J)$conAB -} -num=ncol(con) -CON=matrix(NA,nrow=num,ncol=8) -dimnames(CON)=list(NULL,c('Con.num','Est.1','Est.2','Dif','ci.low','ci.up','p.value','p.adjusted')) -for(j in 1:ncol(con)){ -id=which(con[,j]!=0) -dat=x[id] -temp=pool.a.list(QS.inter.pbci(dat,alpha=alpha,nboot=nboot,SEED=SEED)) -CON[j,1]=j -CON[j,2:7]=temp -} -CON[,8]=p.adjust(CON[,7],method='hoch') -list(CON=CON,con=con) -} - -smgrid2M<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,PB=FALSE,est=tmean,nboot=1000,pr=TRUE,fun=ES.summary, -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Split on two variables, -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# - -# Then test main effects based on trimmed means -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -# if(length(unique(y)>2))stop('y should be binary') -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -xy=cbind(x,y) -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -JK=J*K -MAT=matrix(1:JK,J,K,byrow=TRUE) -z=list() -group=list() -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -n.mat=matrix(NA,nrow=N.int,ncol=N.int2) -DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2[,p1],...) -n.mat[j,k]=length(xsub2[,p1]) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('For group',del,'the sample size is less than 6')) -} -A=t2way(J,K,z,tr=tr) -A -} - -ANOG2KMS<-function(J,K,x,tr=.2,alpha=.05,iter=5000,nulldist=NULL,SEED=TRUE,FAC.B=FALSE,...){ -# -# Two-way ANOVA independent groups. -# Compare global KMS measure of effect size for all pairs of rows -# Example: for row of Factor A, compute KMS global effect size. Do the sample for -# row 2 and test the hypothesis that they are the same. Do this for all pairs of rows. -# -# Can do the same for Factor B by setting -# FAC.B=TRUE. -# -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -JK=J*K -mat=matrix(c(1:JK),nrow=J,byrow=TRUE) -if(FAC.B){ -ic=0 -y=list() -for(j in 1:J){ -for(k in 1:K){ -ic=ic+1 -y[ic]=x[mat[j,k]] -}} -x=y -rem.J=J -J=K -K=rem.J -mat=t(mat) -} -num=(J^2-J)/2 -n=pool.a.list(lapply(x,length)) -if(!is.null(nulldist))V=ND -if(is.null(nulldist)){ -if(SEED)set.seed(2) -ndist=NA -V=matrix(NA,iter,num) -ic=0 -for(j in 1:J){ -for(jj in 1:J){ -if(j6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) -} -a=AOV2KMS.mcp(J,K,z,tr=tr,nboot=nboot) -b=AOV2KMS.mcp(J,K,z,tr=tr,nboot=nboot,FAC.B=TRUE) -list(Factor.A=a,Factor.B=b) -} - -AOV2KMS.mcp<-function(J,K,x,tr=.2,alpha=.05,nboot=500,SEED=TRUE,FAC.B=FALSE,...){ -# -# Two-way ANOVA independent groups. -# Compare average KMS measure of effect size for all pairs of rows and columns -# Example: for row of Factor A, compute KMS global effect size. Do the sample for -# row 2 and test the hypothesis that the averages are the same. Do this for all pairs of rows. -# -# -if(SEED)set.seed(2) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -JK=J*K -mat=matrix(c(1:JK),nrow=J,byrow=TRUE) -if(FAC.B){ -ic=0 -y=list() -for(j in 1:J){ -for(k in 1:K){ -ic=ic+1 -y[ic]=x[mat[j,k]] -}} -x=y -rem.J=J -J=K -K=rem.J -mat=t(mat) -} -num=(J^2-J)/2 -A=matrix(NA,num,8) -ic=0 -for (j in 1:J){ -for(jj in 1:J){ -if(j6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) -} -a=ANOG2KMS(J,K,z,tr=tr,iter=iter,nulldist=nulldist.a) -b=ANOG2KMS(J,K,z,tr=tr,iter=iter,FAC.B=TRUE,nulldist=nulldist.b) -list(Factor.A=a,Factor.B=b) -} - -anova.KMS.ND<-function(n,tr=.2,iter=5000,nulldist=nulldist,SEED=TRUE){ -# -# Null distribution for anova.KMS -# -if(SEED)set.seed(2) -v=NA -dat=list() -J=length(n) # number of groups -for(i in 1:iter){ -for(L in 1:J)dat[[L]]=ghdist(n[L],g=0.75) -v[i]=KS.ANOVA.ES(dat,tr=tr) -} -v -} - - -wAKP.avg<-function(x,tr=.2){ -# -# Have J dependent groups. For each pair of groups, compute -# AKP type measure of effect size and average the results -# -# For tr=0, get Cohen d type measure -# -# -a=wmcpAKP(x,tr=tr) -e=mean(a[,3]) -e -} - -AOV2KMS<-function(J,K,x,tr=.2,alpha=.05,nboot=500,SEED=TRUE,...){ -# -# Two-way ANOVA independent groups. -# Compare averages KMS measure of effect size for all pairs of rows and columns -# -if(SEED)set.seed(2) -A=AOV2KMS.mcp(J,K,x,tr=tr,alpha=alpha,SEED=FALSE) -B=AOV2KMS.mcp(J,K,x,tr=tr,alpha=alpha,SEED=FALSE,FAC.B=TRUE) -list(Factor.A=A,Factor.B=B) -} - - -ANOG2KMS.ND<-function(J,K,n,tr=.2,iter=5000,SEED=TRUE,FAC.B=FALSE,...){ -# -# Two-way ANOVA independent groups. -# Compare global KMS measure of effect size for all pairs of rows and columns -# Example: for row of Factor A, compute KMS global effect size. Do the sample for -# row 2 and test the hypothesis that they are the same. Do this for all pairs of rows. -# -# Repeat for the columns of Factor B. -# -# -if(SEED)set.seed(2) -ndist=NA -JK=J*K -mat=matrix(c(1:JK),nrow=J,byrow=TRUE) -num=(J^2-J)/2 -if(FAC.B)num(K^2-K)/2 -V=matrix(NA,iter,num) -ic=0 -if(!FAC.B){ -for(j in 1:J){ -for(jj in 1:J){ -if(j2))stop('y should be binary') -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -xy=cbind(x,y) -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -z=list() -group=list() -if(is.null(VAL1) || is.null(VAL2)){ -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -} -else { -J=length(VAL1)+1 -K=length(VAL2)+1 -N.int=length(VAL1)+1 -N.int2=length(VAL2)+1 -} -JK=J*K -MAT=matrix(1:JK,J,K,byrow=TRUE) -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -n.mat=matrix(NA,nrow=N.int,ncol=N.int2) -DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -if(is.null(VAL1) || is.null(VAL2)){ -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -} -else{ -qv=VAL1 -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=VAL2 -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -} -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2[,p1],...) -n.mat[j,k]=length(xsub2[,p1]) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('For group',del,'the sample size is less than 6')) -} -A=t2way(J,K,z) -A -} - -KMS.ci<-function(x,y,tr=.2,alpha=.05,null.val=0,nboot=500,SEED=TRUE,...){ -# -# confidence interval for the difference between to KMS -# measures of effect size. -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -n1=length(x) -n2=length(y) -v=NA -ef=kms.effect(x,y)$effect.size -for(i in 1:nboot){ -X=sample(x,n1,replace=TRUE) -Y=sample(y,n2,replace=TRUE) -v[i]=kms.effect(X,Y)$effect.size -} -v=sort(v) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=v[ilow] -ci[2]=v[ihi] -pv=mean(v0)points(x[a$out.id,1],x[a$out.id,2]) -flag=which(d>=median(d)) -xx<-x[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -}} -a -} - -KMSgrid.mcp<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,VAL1=NULL,VAL2=NULL,alpha=05,SW=FALSE, -nulldist=NULL,est=tmean,iter=1000,pr=TRUE,method='hoch', -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Compare robust, heteroscedastic measures of effect size, the KMS measure for two or more groups -# among grids defined by quantiles of two IVs. -# Uses the sign version of KMS (the two group) case rather than the variation measure used by KMSgridRC -# -# The method tests for main effects based on the -# signed version (not the squared version) of the KMS measure of effect size. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# -# Basically, reduce the data to a two-way ANOVA design and examine main effects. -# -# -if(!is.null(VAL1))Qsplit1=PVALS(x[,IV[1]],VAL1) -if(!is.null(VAL2))Qsplit2=PVALS(x[,IV[2]],VAL2) -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -xy=cbind(x,y) -} -z=list() -group=list() -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -n.mat=matrix(NA,nrow=N.int,ncol=N.int2) -DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -for(i in 1:N.int)L1[i]=paste('IV1.G',i) -for(i in 1:N.int2)L2[i]=paste('IV2.G',i) -dimnames(est.mat)=list(L1,L2) -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2[,p1],...) -n.mat[j,k]=length(xsub2[,p1]) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) -} -a=KMSinter.mcp(J,K,z,tr=tr,SW=SW) -a -} - -smgrid.est<-function(x,y,est=tmean,IV=c(1,2),Qsplit1 = c(.3,.7),Qsplit2 = c(.3,.7),tr=.2,xout=FALSE,outfun=outpro,...){ -# -# Splits the data into groups -# -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -JK=J*K -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -ic=0 -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -if(!is.null(dim(xsub2))) -est.mat[j,k]=est(xsub2[,p1],...) -}} -est.mat -} - -KMS.ES.M<-function(x,y){ -# -# Computes the robust effect size using a simple generalization of the method in -# Kulinskaya, E., Morgenthaler, S. & Staudte, R. (2008). -# Meta Analysis: A guide to calibrating and combining statistical evidence p. 177 -# based on an M-estimator and percentage bend variance -#Cohen d=.2, .5 .8 correspond to .1, .25 and .4') (KMS p. 180) - -library(MASS) -x<-elimna(x) -y<-elimna(y) -n1<-length(x) -n2<-length(y) -N=n1+n2 -q=n1/N -s1sq=pbvar(x) -s2sq=pbvar(y) -t1=onestep(x) -t2=onestep(y) -top=q*s1sq+(1-q)*s2sq -bot=q*(1-q) -sigsq=top/bot # Quantity in brackets KMS p. 176 eq 21.1 -varrho=s2sq/s1sq -d1=(t1-t2)/sqrt(sigsq) -list(effect.size=d1,Cohen.d.equiv=2*d1) -} - -rplotN<-function(x,y,nsub=1000,est=tmean,fr=1,xout=FALSE,xlab='X',ylab='Y',zlab='',ticktype = 'simple',theta = 50, phi = 25, scale = TRUE, - expand = 0.5, SEED = TRUE,frame=TRUE){ -# - # Running interval smoother, good for large sample sizes or plots of the - # regression surface without a scatter plot. - # - # nsub is size of the random sample of the data used to predict outcome using all of the data - # - if(SEED)set.seed(2) - x=as.matrix(x) -p=ncol(x) -p1=p+1 - xy=cbind(x,y) - xy=elimna(xy) - n=nrow(xy) - nsub=min(n,nsub) - id=sample(n,nsub) -x=xy[,1:p] -y=xy[,p1] -x=as.matrix(x) -w=rplot.pred(x,y,pts=x[id,],fr=fr)$Y.hat -a=lplot(x[id,],w,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,frame=frame,phi=phi,theta=theta,scale=scale,pr=FALSE) -} - -lplotN<-function(x,y,nsub=1000,est=tmean,fr=1,xout=FALSE,xlab='X',ylab='Y',zlab='', -ticktype = 'simple',theta = 50, phi = 25, scale = TRUE, - expand = 0.5, SEED = TRUE,frame=TRUE){ -# - # Running interval smoother, good for large sample sizes or plots of the - # regression surface without a scatter plot. - # - # nsub is size of the random sample of the data used to predict outcome using all of the data - # - if(SEED)set.seed(2) - x=as.matrix(x) -p=ncol(x) -p1=p+1 - xy=cbind(x,y) - xy=elimna(xy) - n=nrow(xy) - nsub=min(n,nsub) - id=sample(n,nsub) -x=xy[,1:p] -y=xy[,p1] -x=as.matrix(x) -w=lplot.pred(x,y,pts=x[id,],fr=fr)$yhat -a=lplot(x[id,],w,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype, -frame=frame,phi=phi,theta=theta,scale=scale,pr=FALSE) -} - -bwquantile<-function(M1,M2,alpha=.05,nboot=1000,SEED=TRUE,q=.5,...){ -# -# M1 and M2 are assumed to be matrices with two columns -# They are random samples from some bivariate distribution from -# two independent groups -# -# For example, -# have two dependent groups, e.g., same subjects under two conditions, -# Have two independent groups, e.g., male and female -# -# Consider difference between males and females at condition 1, estimate difference between quantiles -# Under condition 2, does this difference differ from the difference under condition 1? -# -# q indicates the quantile to be used -# -# -# REQUIRES WRS PACKAGE OR THE FUNCTIONS IN RALLFUN-V38 -# -M1=elimna(M1) -M2=elimna(M2) -n1=nrow(M1) -n2=nrow(M2) -e1=apply(M1,2,hd,q) -e2=apply(M2,2,hd,q) -dif1=e1[1]-e2[1] -dif2=e1[2]-e2[2] -dif=dif1-dif2 -DIF=NA -for(i in 1:nboot){ -id1=sample(n1,replace=TRUE) -id2=sample(n2,replace=TRUE) -B1=apply(M1[id1,],2,hd,q) -B2=apply(M2[id2,],2,hd,q) -DIF[i]=B1[1]-B2[1]-B1[2]+B2[2] -} -DIF=sort(DIF) -pv=mean(DIF<0) -pv=2*min(pv,1-pv) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=DIF[ilow] -ci[2]=DIF[ihi] -list(p.value=pv,ci=ci) -} - -ghtransform<-function(x,g=0,h=0){ -# -# transform normal data in x to a g-and-h distribution - if (g>0){ - ghdist<-(exp(g*x)-1)*exp(h*x^2/2)/g - } - if(g==0)ghdist<-x*exp(h*x^2/2) - ghdist - } - - plot.ghdist<-function(g=0,h=0,xlab='',ylab='f(x)'){ -# -# plot density function of a g-and-h distribution -# -x=seq(-3,3,.05) -pf=dnorm(x) -xs=ghtransform(x,g=g,h=h) -plot(xs,pf,type='n',xlab=xlab,ylab=ylab) -lines(xs,pf) -} - - - -mulcen.region<-function(m,region=.05,plotit=TRUE,est=median, -xlab="VAR 1",ylab="VAR 2",...){ -# -# -# m is an n-by-2 matrix -# -# region=.05 means that the function -# determine the 1-.05=.95 deepest points and then plots the convex hull -# containing these points. -# -# Returns the points that form the convex hull -# -# -m<-as.matrix(m) -est=apply(m,2,est) -if(ncol(m)!=2)stop('Argument m should be a matrix with two columns') -temp<-fdepth(m,plotit=FALSE,center=est) #Defaults to using the marginal medians -flag=(temp>=qest(temp,region)) -xx<-m[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -if(plotit){ -plot(m[,1],m[,2],xlab=xlab,ylab=ylab) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -list(center=est,convex.hull.pts=xx[temp,]) -} - -mulcen.region.MF<-function(m,region=.05,est=median){ -# -# region=.05 means that the function -# determine the 1-.05=.95 -# This is done for each formula -# Assume m is a matrix or data frame having -# J columns. First two columns first formula, next two columns next formula.. -# -# So J should be an even integer -# -J=ncol(m) -N=J/2 -if(N != floor(N))stop('Should have an even number of columns') -region=list() -centers=list() -id=c(-1,0) -for(j in 1:N){ -id=id+2 -a=mulcen.region(m[,id],plotit=FALSE,est=est) -centers[[j]]=a$center -region[[j]]=a$convex.hull.pts -n=nrow(elimna(m[id])) -n=as.integer(n) -centers[[j]]=c(a$center,n) -names(centers[[j]])=c('V1','V2','N') -} -list(centers=centers,convex.hull.pts=region) -} - - -oph.astig.datasetconvexpoly.median<-function(m,Region=.05,plotit=FALSE,xlab='V1',ylab='V2'){ -# -# region=.05 means that the function -# determine the 1-.05=.95 -# This is done for each formula -# Assume m is a matrix or data frame having -# J columns. First two columns first formula, next two columns next formula.. -# -# So J should be an even integer -# -J=ncol(m) -N=J/2 -if(N != floor(N))stop('Should have an even number of columns') -region=list() -centers=list() -id=c(-1,0) -for(j in 1:N){ -id=id+2 -a=mulcen.region(elimna(m[,id]),region=Region,plotit=FALSE,xlab=xlab,ylab=ylab) -centers[[j]]=a$center -region[[j]]=a$convex.hull.pts -n=nrow(elimna(m[id,])) -n=as.integer(n) -centers[[j]]=c(a$center,n) -names(centers[[j]])=c('V1','V2','N') -} -if(plotit){ -M=m -if(N>1)par(mfrow=c(2,2)) -id=c(-1,0) -for(j in 1:N){ -id=id+2 -m=M[,id] -m=elimna(m) -m=as.matrix(m) -temp<-fdepth(m,plotit=FALSE) #Defaults to using the marginal medians -flag=(temp>=qest(temp,Region)) -xx<-m[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -plot(m[,1],m[,2],xlab=xlab,ylab=ylab,pch=pch,xlim=c(-1.5,1.5),ylim=c(-1.5,1.5),asp=2/3) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -par(mfrow=c(1,1)) -} -list(centers=centers,convex.hull.pts=region) -} - -oph.astig.datasetconvexpoly.mean<-function(m,Region=.05,plotit=FALSE,xlab='V1',ylab='V2'){ -# -# region=.05 means that the function -# determine the 1-.05=.95 -# This is done for each formula -# Assume m is a matrix or data frame having -# J columns. First two columns first formula, next two columns next formula.. -# -# So J should be an even integer -# -J=ncol(m) -N=J/2 -if(N != floor(N))stop('Should have an even number of columns') -region=list() -centers=list() -id=c(-1,0) -for(j in 1:N){ -id=id+2 -a=mulcen.region(elimna(m[,id]),region=Region,plotit=FALSE,xlab=xlab,ylab=ylab,est=mean) -centers[[j]]=a$center -region[[j]]=a$convex.hull.pts -n=nrow(elimna(m[id,])) -n=as.integer(n) -centers[[j]]=c(a$center,n) -names(centers[[j]])=c('V1','V2','N') -} -if(plotit){ -M=m -if(N>1)par(mfrow=c(2,2)) -id=c(-1,0) -for(j in 1:N){ -id=id+2 -m=M[,id] -m=elimna(m) -m=as.matrix(m) -temp<-fdepth(m,plotit=FALSE) #Defaults to using the marginal medians -flag=(temp>=qest(temp,Region)) -xx<-m[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -plot(m[,1],m[,2],xlab=xlab,ylab=ylab) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -par(mfrow=c(1,1)) -} -list(centers=centers,convex.hull.pts=region) -} - - - -psmm.x=function(x, c, r, nu) { - snu = sqrt(nu) - sx = snu * x - lgx = log(snu) - lgamma(nu/2) + (1 - nu/2) * log(2) + - (nu - 1) * log(sx) + (-sx^2/2) - exp(r * log(2 * pnorm(c * x) - 1) + lgx) - } - -psmm = function(x, r, nu) { - res = integrate(psmm.x, 0, Inf, c = x, r = r, nu = nu) - res$value - } - - qsmm<-function(q, r, nu) { - #r=number of comparisons - if (!is.finite(nu)) - return(qnorm(1 - 0.5 * (1 - q^(1/r)))) - res = uniroot(function(c, r, nu, q) { - psmm(c, r = r, nu = nu) - q - }, - c(0, 100), r = r, nu = nu, q = q) - res$root - } - - lincon<-function(x,con=0,tr=.2,alpha=.05,pr=FALSE){ -# -# A heteroscedastic test of d linear contrasts using trimmed means. -# -# This version uses an improved method for computing the quantiles of a -# Studentized maximum modulus distriburtion -# -# The data are assumed to be stored in $x$ in list mode, a matrix -# or a data frame. If in list mode, -# length(x) is assumed to correspond to the total number of groups. -# It is assumed all groups are independent. -# -# con is a J by d matrix containing the contrast coefficients that are used. -# If con is not specified, all pairwise comparisons are made. -# -# Missing values are automatically removed. -# -# pr=FALSE included to avoid errors using an earlier version of this function when -# dealing with two-way and higher designs -# -# Adjusted p-values are based on the Studentized maximum modulus distribution with the -# goal of controlling FWE -# -# To apply the Kaiser-Bowden method, use the function kbcon -# -if(tr==.5)stop('Use the R function medpb to compare medians') -if(is.data.frame(x))x=as.matrix(x) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -con<-as.matrix(con) -J<-length(x) -sam=NA -h<-vector('numeric',J) -w<-vector('numeric',J) -xbar<-vector('numeric',J) -for(j in 1:J){ -xx<-!is.na(x[[j]]) -val<-x[[j]] -x[[j]]<-val[xx] # Remove missing values -sam[j]=length(x[[j]]) -h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) - # h is the number of observations in the jth group after trimming. -w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) -xbar[j]<-mean(x[[j]],tr) -} -if(sum(con^2)==0){ -CC<-(J^2-J)/2 -psihat<-matrix(0,CC,9) -dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', -'p.value','Est.1','Est.2','adj.p.value')) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c('Group','Group','test','crit','se','df')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) -sejk<-sqrt(w[j]+w[k]) -test[jcom,5]<-sejk -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[j]-xbar[k]) -df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) -test[jcom,6]<-df -psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) -psihat[jcom,7]=xbar[j] -psihat[jcom,8]=xbar[k] -crit=qsmm(1-alpha,CC,df) -test[jcom,4]<-crit -psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk -psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk -psihat[jcom,9]=1-psmm(test[jcom,3],CC,df) -}}}} -if(sum(con^2)>0){ -if(nrow(con)!=length(x)){ -stop('The number of groups does not match the number of contrast coefficients.') -} -CC=ncol(con) -psihat<-matrix(0,ncol(con),6) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', -'p.value','adj.p.value')) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c('con.num','test','crit','se','df')) -df<-0 -for (d in 1:ncol(con)){ -psihat[d,1]<-d -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-sqrt(sum(con[,d]^2*w)) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) -crit=qsmm(1-alpha,CC,df) -test[d,3]<-crit -test[d,4]<-sejk -test[d,5]<-df -psihat[d,3]<-psihat[d,2]-crit*sejk -psihat[d,4]<-psihat[d,2]+crit*sejk -psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) -psihat[d,6]=1-psmm(abs(test[d,2]),CC,df) -} -} -list(n=sam,test=test,psihat=psihat) -} - -linconpb<-function(x,alpha=.05,nboot=NA,grp=NA,est=tmean,con=0,method='holm',bhop=FALSE,SEED=TRUE,...){ -# -# Multiple comparisons for J independent groups using trimmed means -# -# A percentile bootstrap method with Rom's method is used. -# -# The data are assumed to be stored in x -# which either has list mode or is a matrix. In the first case -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J. -# If stored in a matrix, the columns of the matrix correspond -# to groups. -# -# est is the measure of location and defaults to the median -# ... can be used to set optional arguments associated with est -# -# The argument grp can be used to analyze a subset of the groups -# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. -# -# Missing values are allowed. -# -con<-as.matrix(con) -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -if(bhop)method='BH' -if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') -if(!is.na(sum(grp))){ # Only analyze specified groups. -xx<-list() -for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] -x<-xx -} -J<-length(x) -tempn<-0 -mvec<-NA -for(j in 1:J){ -temp<-x[[j]] -temp<-temp[!is.na(temp)] # Remove missing values. -tempn[j]<-length(temp) -x[[j]]<-temp -mvec[j]<-est(temp,...) -} -Jm<-J-1 -# -# Determine contrast matrix -# -if(sum(con^2)==0){ -ncon<-(J^2-J)/2 -con<-matrix(0,J,ncon) -id<-0 -for (j in 1:Jm){ -jp<-j+1 -for (k in jp:J){ -id<-id+1 -con[j,id]<-1 -con[k,id]<-0-1 -}}} -ncon<-ncol(con) -if(nrow(con)!=J)stop('Something is wrong with con; the number of rows does not match the number of groups.') -# Determine nboot if a value was not specified -if(is.na(nboot)){ -nboot<-5000 -if(J <= 8)nboot<-4000 -if(J <= 3)nboot<-2000 -} -# Determine critical values -if(method!='BH'){ -if(alpha==.05){ -dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -if(ncon > 10){ -avec<-.05/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(alpha != .05 && alpha != .01){ -dvec<-alpha/c(1:ncon) -} -} -if(method=='BH')dvec<-(ncon-c(1:ncon)+1)*alpha/ncon -bvec<-matrix(NA,nrow=J,ncol=nboot) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -for(j in 1:J){ -data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group -} -test<-NA -bcon<-t(con)%*%bvec #ncon by nboot matrix -tvec<-t(con)%*%mvec -for (d in 1:ncon){ -tv<-sum(bcon[d,]==0)/nboot -test[d]<-sum(bcon[d,]>0)/nboot+.5*tv -if(test[d]> .5)test[d]<-1-test[d] -} -test<-2*test -output<-matrix(0,ncon,7) -dimnames(output)<-list(NULL,c('con.num','psihat','p.value','p.crit','ci.lower','ci.upper','p.adjusted')) -temp2<-order(0-test) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2]>=zvec) -output[temp2,4]<-zvec -icl<-round(dvec[ncon]*nboot/2)+1 -icu<-nboot-icl-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-tvec[ic,] -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(bcon[ic,]) -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -output[,7]=p.adjust(output[,3],method=method) -num.sig<-sum(output[,3]<=output[,4]) -list(output=output,con=con,num.sig=num.sig) -} - wwmcp.miss<-function(J,K,x,tr=.2,alpha=.05,nboot=500,SEED=TRUE){ -# -# Do all multiple comparisons for a within-by-within design -# using trimmed means in a manner that uses all of the data when some -# values are missing. -# -conM=con2way(J,K) -A=rmmismcp(x,con=conM$conA,tr=tr,alpha=alpha,SEED=SEED,nboot=nboot) -B=rmmismcp(x,con=conM$conB,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED) -AB=rmmismcp(x,con=conM$conAB,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED) -list(Factor_A=A,Factor_B=B,Factor_AB=AB) -} - -smgridAB<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,VAL1=NULL,VAL2=NULL, -PB=FALSE,est=tmean,nboot=1000,pr=TRUE,fun=ES.summary, -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Split on two variables, not just one. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# -# Alternatively, can split the data based on specified values indicating by the arguments -# VAL1 and VAL2 -# -# Then test the hypothesis of equal measures of location -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -# if(length(unique(y)>2))stop('y should be binary') -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -xy=cbind(x,y) -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -z=list() -group=list() -if(is.null(VAL1) || is.null(VAL2)){ -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -} -else { -J=length(VAL1)+1 -K=length(VAL2)+1 -N.int=length(VAL1)+1 -N.int2=length(VAL2)+1 -} -JK=J*K -MAT=matrix(1:JK,J,K,byrow=TRUE) -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -n.mat=matrix(NA,nrow=N.int,ncol=N.int2) -DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -if(is.null(VAL1) || is.null(VAL2)){ -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -} -else{ -qv=VAL1 -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=VAL2 -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -} -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2[,p1],...) -n.mat[j,k]=length(xsub2[,p1]) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('For group',del,'the sample size is less than 6')) -} -A=list() -B=list() -A.ES=list() -B.ES=list() -for(j in 1:J)A.ES[[j]]=IND.PAIR.ES(z[MAT[j,]],fun=fun)$effect.size[[1]] -for(j in 1:K)B.ES[[j]]=IND.PAIR.ES(z[MAT[,j]],fun=fun)$effect.size[[1]] -if(!PB){ -for(j in 1:J)A[[j]]=lincon(z[MAT[j,]],tr=tr,pr=FALSE)$psihat -for(j in 1:K)B[[j]]=lincon(z[MAT[,j]],tr=tr,pr=FALSE)$psihat -} -if(PB){ -for(j in 1:J)A[[j]]=linpairpb(z[MAT[j,]],est=est,nboot=nboot,...)$output -for(j in 1:K)B[[j]]=linpairpb(z[MAT[,j]],est=est,nboot=nboot,...)$output -} -list(est.loc.4.DV=est.mat,n=n.mat,A=A,B=B,A.effect.sizes=A.ES,B.effect.sizes=B.ES) -} - - -linconbt<-function(x,con=0,tr=.2,alpha=.05,nboot=599,pr=FALSE,SEED=TRUE,method='holm'){ -# -# Compute a 1-alpha confidence interval for a set of d linear contrasts -# involving trimmed means using the bootstrap-t bootstrap method. -# Independent groups are assumed. -# -# The data are assumed to be stored in x in list mode. Thus, -# x[[1]] contains the data for the first group, x[[2]] the data -# for the second group, etc. Length(x)=the number of groups = J, say. -# -# Missing values are automatically removed. -# -# con is a J by d matrix containing the contrast coefficents of interest. -# If unspecified, all pairwise comparisons are performed. -# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) -# will test two contrasts: (1) the sum of the first two trimmed means is -# equal to the sum of the second two, and (2) the difference between -# the first two is equal to the difference between the trimmed means of -# groups 5 and 6. -# -# The default number of bootstrap samples is nboot=599 -# -# This function uses functions trimparts and trimpartt written for this -# book. -# -# -# -# -if(is.data.frame(x))x=as.matrix(x) -con<-as.matrix(con) -if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J<-length(x) -for(j in 1:J){ -xx<-x[[j]] -x[[j]]<-xx[!is.na(xx)] # Remove any missing values. -} -Jm<-J-1 -d<-(J^2-J)/2 -FLAG=FALSE -if(sum(con^2)==0){ -FLAG=TRUE -con=con.all.pairs(J) -} -if(nrow(con)!=length(x))stop('The number of groups does not match the number of contrast coefficients.') -bvec<-array(0,c(J,2,nboot)) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -nsam=matl(lapply(x,length)) -for(j in 1:J){ -xcen<-x[[j]]-mean(x[[j]],tr) -data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row -# contains the bootstrap trimmed means, the second row -# contains the bootstrap squared standard errors. -} -m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means -m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq. se. -boot<-matrix(0,ncol(con),nboot) -for (d in 1:ncol(con)){ -top<-apply(m1,2,trimpartt,con[,d]) -# A vector of length nboot containing psi hat values -consq<-con[,d]^2 -bot<-apply(m2,2,trimpartt,consq) -boot[d,]<-abs(top)/sqrt(bot) -} -testb<-apply(boot,2,max) -ic<-floor((1-alpha)*nboot) -ic.crit=ic -testb<-sort(testb) -psihat<-matrix(0,ncol(con),4) -test<-matrix(0,ncol(con),5) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper')) -dimnames(test)<-list(NULL,c('con.num','test','se','p.value','p.adjusted')) -for (d in 1:ncol(con)){ -test[d,1]<-d -psihat[d,1]<-d -testit<-lincon(x,con[,d],tr,pr=FALSE) -test[d,2]<-testit$test[1,2] -pval<-mean((abs(testit$test[1,2])invalid) -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -}} -for(j in 1:J){ -flag=as.logical(x[[j]]<0) -if(sum(flag)>0){ -print(paste('Values less than zero were detected')) -print(paste('Variable', j, 'has one or more values=0')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -Jm<-J-1 -d<-(J^2-J)/2 -FLAG=FALSE -if(sum(con^2)==0){ -FLAG=TRUE -con=con.all.pairs(J) -} -if(nrow(con)!=length(x))stop('The number of groups does not match the number of contrast coefficients.') -bvec<-array(0,c(J,2,nboot)) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -nsam=matl(lapply(x,length)) -for(j in 1:J){ -xcen<-x[[j]]-mean(x[[j]],tr) -data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) -bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row -# contains the bootstrap trimmed means, the second row -# contains the bootstrap squared standard errors. -} -m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means -m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq. se. -boot<-matrix(0,ncol(con),nboot) -for (d in 1:ncol(con)){ -top<-apply(m1,2,trimpartt,con[,d]) -# A vector of length nboot containing psi hat values -consq<-con[,d]^2 -bot<-apply(m2,2,trimpartt,consq) -boot[d,]<-abs(top)/sqrt(bot) -} -testb<-apply(boot,2,max) -ic<-floor((1-alpha)*nboot) -ic.crit=ic -testb<-sort(testb) -psihat<-matrix(0,ncol(con),4) -test<-matrix(0,ncol(con),5) -dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper')) -dimnames(test)<-list(NULL,c('con.num','test','se','p.value','p.adjusted')) -for (d in 1:ncol(con)){ -test[d,1]<-d -psihat[d,1]<-d -testit<-lincon(x,con[,d],tr,pr=FALSE) -test[d,2]<-testit$test[1,2] -pval<-mean((abs(testit$test[1,2])invalid -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -}} -for(j in 1:J){ -flag=as.logical(x[[j]]<0) -if(sum(flag)>0){ -print(paste('Values less than zero were detected')) -print(paste('Variable', j, 'has one or more values=0')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -Jm<-J-1 -d<-(J^2-J)/2 -# -CC<-(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('V1','V2','n','Mean.1','Mean.2','p.value','adj.p.value')) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(jSP2) -n[2]=sum(flag,na.rm=TRUE) -flag=(x[,1]>SP1 & x[,2]<=SP2) -n[3]=sum(flag,na.rm=TRUE) -flag=(x[,1]>SP1 & x[,2]>SP2) -n[4]=sum(flag,na.rm=TRUE) -m=matrix(n,2,2,byrow=TRUE) -dimnames(m)=list(c('V1.less','V1.greater'),c('V2.less','V2.greater')) -m -} - - - - -oph.astig.mcnemar<-function(x,method='holm',invalid=4){ -# -# Astigmatism: compare prediction formulas -# -if(is.null(dim(x)))stop('x should be a matrix or data frame') -J=ncol(x) #number of formulas -flag=abs(elimna(x))>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -istop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,9) -dimnames(output)<-list(NULL,c('D', ' Var', 'N< ' , '%<', 'Var', 'N<', '%< ', -'p.value','Adj.p.value')) -E=list() -TAB=list() -D=seq(.25,2,.25) #D intervals from .25 to 2 -for(L in 1:length(D)){ -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=mat2table(x[,c(j,k)],D[L],D[L]) -n1=sum(x[[j]]<=D[L]) -pn1=mean(x[[j]]<=D[L]) -n2=sum(x[[k]]<=D[L]) -pn2=mean(x[[k]]<=D[L]) -if(sum(is.na(a)>0))print(paste('No data for VAR',j,'VAR',k,'D=',D[L])) -if(sum(is.na(a))==0){ -mct=mcnemar.test(a) -output[ic,1]=D[L] -output[ic,2]=j -output[ic,3]=n1 -output[ic,4]=pn1 -output[ic,5]=k -output[ic,6]=n2 -output[ic,7]=pn2 -output[ic,8]=mct[[3]] -if(a[1,2]==0 &a[2,1]==0)output[ic,8]=1 -}}}} -output[,9]=p.adjust(output[,8],method=method) -E[[L]]=output -} -E -} - -skipreg<-function(x,y,outfun=outpro.depth,Regfun=ols,...){ -# -# Skipped regression: remove outliers from cbind(x,y) using a method -# that takes into account the overall structure of the data cloud. -# -# other choices for outfun: -# outpro -# outmgv -# out -# -x<-as.matrix(x) -xx<-cbind(x,y) -xx<-elimna(xx) -temp<-NA -x<-as.matrix(x) -n=nrow(x) -a=outfun(xx) -id=a$keep -x<-xx[id,1:ncol(x)] -x<-as.matrix(x) -y<-xx[id,ncol(x)+1] -b=Regfun(x,y) -list(n=a$n,n.keep=a$n.keep,coef=b$coef) -} - - -oph.astig.datasetconvexpoly<-function(m,Region=.05,plotit=FALSE,xlab='V1',ylab='V2',pch='.',reset=TRUE){ -# -# region=.05 means that the function -# determine the 1-.05=.95 -# This is done for each formula -# Assume m is a matrix or data frame having -# J columns. First two columns first formula, next two columns next formula.. -# -# So J should be an even integer -# -J=ncol(m) -N=J/2 -if(N != floor(N))stop('Should have an even number of columns') -region=list() -centers=list() -id=c(-1,0) -for(j in 1:N){ -id=id+2 -a=mulcen.region(m[,id],region=Region,plotit=FALSE,xlab=xlab,ylab=ylab) -centers[[j]]=a$center -region[[j]]=a$convex.hull.pts -n=nrow(elimna(m[id,])) -n=as.integer(n) -centers[[j]]=c(a$center,n) -names(centers[[j]])=c('V1','V2','N') -} -if(plotit){ -par(pty='s') -M=m -if(N>1)par(mfrow=c(2,2)) -id=c(-1,0) -for(j in 1:N){ -id=id+2 -m=M[,id] -m=as.matrix(m) -temp<-fdepth(m,plotit=FALSE) #Defaults to using the marginal medians -flag=(temp>=qest(temp,Region)) -xx<-m[flag,] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -plot(m[,1],m[,2],xlab=xlab,ylab=ylab,pch=pch,xlim=c(-1.5,1.5),ylim=c(-1.5,1.5),asp=1) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -if(reset)par(mfrow=c(1,1)) -} -list(centers=centers,convex.hull.pts=region) -} - - -oph.astig.indepintervals<-function(m,method='holm',invalid=4){ -# -# For column of x, compare frequencies using KMS method -# -# -# n: sample sizes -# x is a matrix or data frame with 8 rows -# -# -E=list() -ic=0 -J=ncol(m) -x=m -flag=abs(elimna(x))>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -stop() -} -id=matrix(NA,8,2) -x=matrix(NA,8,2) -INT=c(0.25,0.50, 0.75,1,1.25,1.5,1.75,2) -dimnames(id)=list(NULL,ncol=c('S1','S2')) -for (j in 1:J){ - for (k in 1:J){ - if (j < k){ - ic=ic+1 -id[,1]=rep(j,8) -id[,2]=rep(k,8) -# Next determine frequencies -S1=elimna(m[,j]) -S2=elimna(m[,k]) -n1=length(S1) -n2=length(S2) -for(L in 1:8){ -x[L,1]=sum(S1<=INT[L]) -x[L,2]=sum(S2<=INT[L]) -} -a=srg1.vs.2(c(n1,n2),x) -Adj.p.value=p.adjust(a[,3],method=method) -E[[ic]]=cbind(id,a,Adj.p.value) - }}} -E -} - -srg1.vs.2<-function(n,m,alpha=.05){ -# -# Goal: Compare proportions for each of 8 intervals. -# -# m has two columns. -# Col1: frequencies for first person -# Col2: frequencies for second person -# n: indicates the totals (number of cases) for each -# So n=c(100,120) would indicate 100 surgeries for the first -# and 120 for the second. - -INT=c( -'<= 0.25', -'<= 0.50', -'<= 0.75', -'<= 1.00', -'<= 1.25', -'<= 1.50', -'<= 1.75', -'<= 2.00') -output<-matrix(NA,ncol=3,nrow=8) -n1=n[1] -n2=n[2] -for(j in 1:8){ -r1=m[j,1] -r2=m[j,2] -a=binom2g(r1,n1,r2,n2,alpha=alpha) -output[j,]=c(a$p1,a$p2,a$p.value) -} -dimnames(output)=list(INT,c('p1','p2','p-value')) -output -} - -ABES.KS<-function(J,K,x,tr=0.2){ -# -# Effect size for Factor A, ignoring B and -# Factor B, ignoring A - # - # A robust heteroscedastic analog of Cohen's d is used - # - if(is.data.frame(x))x=as.matrix(x) - if(is.matrix(x))x=listm(x) - JK=J*K - mat=matrix(c(1:JK),nrow=J,byrow=TRUE) - A=list() - for(j in 1:J){ - id=mat[j,] - z=pool.a.list(x[id]) - A[[j]]=z - } - - B=list() - for(k in 1:K){ - id=mat[,k] - z=pool.a.list(x[id]) - B[[k]]=z - } - E1=KS.ANOVA.ES(A,tr=tr) - E2=KS.ANOVA.ES(B,tr=tr) -list(A.Effect.Size=E1,B.Effect.Size=E2) -} - -rmdif.scores<-function(x){ -# -# Compute all pairwise difference scores -# - - if(!is.matrix(x) & !is.data.frame(x))stop('x should be matrix or data frame') - x=elimna(x) - n=nrow(x) - J=ncol(x) - ALL=(J^2-J)/2 - M=matrix(NA,nrow=n,ncol=ALL) - ic=0 - for(j in 1:J){ - for(k in 1:J){ - if(j0 & ci[1]<0)ci=0.001 -#if(DIF<0 & ci[2]>0)ci[2]=0-0.001 -#} -#Eif(pv>=alpha){ -#if(DIF>0 & ci[1]>0)ci[1]=0-0.001 -#if(DIF<0 & ci[2]<0)ci[2]=-0.001 -#} -list(n1=n1,n2=n2,Est1=E1,Est2=E2,p.value=pv) -} - -bwESP.GLOB.B.NULL<-function(n1,n2,K,tr=.2,MM=FALSE,SEED=TRUE,iter=1000,g=0.,rho=0){ -if(SEED)set.seed(2) -ND=NA -for(i in 1:iter){ -M1=rmul(n1,p=K,g=g,rho=rho) -M2=rmul(n2,p=K,g=g,rho=rho) -ND[i]=rmESPRO.est(M1,MM=MM)-rmESPRO.est(M2,MM=MM) -} -ND -} - - - - rmESPRO.est<-function(x,est=tmean,MM=FALSE,...){ - # - # Estimate projection measure of effect size - # - if(is.list(x))x=matl(x) - x=elimna(x) - n=nrow(x) - E=apply(x,2,est,...) - GM=mean(E) - J=ncol(x) - GMvec=rep(GM,J) - GMvec=rep(GM,J) - DN=pdis(x,E,center=GMvec,MM=MM) - DN -} - -smean.depth<-function(m){ -# -# Skipped estimator based on projection for removing outliers. -# Uses random projections -# -m=elimna(m) -id=outpro.depth(m)$keep -val=apply(m[id,],2,mean) -val -} - -oph.astig.depbivmeans<-function(m,alpha=.05,nboot=1999,SEED=TRUE,tr=0){ -# -# This function is designed to compare two bivariate distributions relevant to -# prediction errors when dealing with astigmatism. -# -# Assume m is a matrix or data frame having -# J columns. First two columns first formula, next two columns next formula.. -# -# So J should be an even integer -# -# Compare col 1-2 to 3-4, then 1-2 vs 5-6, etc -# using difference scores. That is, col1 and 3, use difference scores, col 2 and 4, then col 1 and 5, etc. -# -# returns confidence interval for pairwise difference scores. -# alpha = .05 = .95 confidence intervals -# -# Estimates are adjusted if outliers are found based on a projection method. -# -nullv=rep(0,2) -J=ncol(m) -N=J/2 -J1=J-1 -chk.n=names(m) -MAT=matrix(NA,nrow=2,ncol=4) -dimnames(MAT)=list(NULL,c('Mean 1','Mean 2','p.value','p.adjusted')) -if(N != floor(N))stop('Should have an even number of columns') -results=list() -mat=matrix(NA,nrow=N,ncol=2,byrow=TRUE) -v1=seq(1,J1,2) -mat[,1]=v1 -mat[,2]=v1+1 -ic=0 -for(j in 1:N){ -for(k in 1:N){ -if(j1)){ -d=jitter(m[id,j])/100 -m[id,j]=d -}} -if(plotit){ -plot.new() -if(N>1)par(mfrow=c(2,2)) -} -region=list() -centers=list() -val=list() -pv=list() -CENTERS=list() -id=c(-1,0) -for(j in 1:N){ -id=id+2 -a=smeancr.cord.oph(m[,id],SEED=SEED,plotit=FALSE,xlab=xlab,ylab=ylab,nboot=nboot) -centers[[j]]=a$center -region[[j]]=a$conf.region.points -val[[j]]=a$boot.vals -centers[[j]]=a$center -n=nrow(elimna(m[,id])) -n=as.integer(n) -CENTERS[[j]]=c(a$center,n) -names(CENTERS[[j]])=c('V1','V2','N') -pv[[j]]=a$p.value -} -VAL=val -#if(N>1)par(mfrow=c(2,2)) -id=c(-1,0) -for(j in 1:N){ -id=id+2 -n=nrow(m[,id]) -crit.level<-.05 -if(n<=120)crit.level<-.045 -if(n<=80)crit.level<-.04 -if(n<=60)crit.level<-.035 -if(n<=40)crit.level<-.03 -if(n<=30)crit.level<-.025 -if(n<=20)crit.level<-.02 -ic<-round((1-crit.level)*nboot) -val=VAL[[j]] -est=centers[[j]] -temp3<-est -ic<-round((1-crit.level)*nboot) -if(!MC)temp<-pdis(val,center=est) -if(MC)temp<-pdisMC(val,center=est) -temp.dis<-order(temp) -xx<-val[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -region[[j]]=xx[temp,] -if(plotit){ -#if(N>1)par(mfrow=c(2,2)) -plot(val[,1],val[,2],xlab=xlab,ylab=ylab) -points(temp3[1],temp3[2],pch="+") -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -} -par(mfrow=c(1,1)) -list(centers=CENTERS,conf.region.points=region,p.values=pv) -} - - -smeancr.cord.oph<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE, -nboot=500,plotit=TRUE,MC=FALSE,xlab="VAR 1",ylab="VAR 2",STAND=TRUE){ -# -# m is an n by p matrix -# -# Test hypothesis that multivariate skipped estimators -# are all equal to the null value, which defaults to zero. -# The level of the test is .05. -# -# Eliminate outliers using a projection method -# That is, determine center of data using: -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 MVE -# -# For each point -# consider the line between it and the center -# project all points onto this line, and -# check for outliers using -# -# MM=F, a boxplot rule. -# MM=T, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# Eliminate any outliers and compute means -# using remaining data. -# -if(SEED)set.seed(2) -m<-elimna(m) -n<-nrow(m) -est=smean(m,MC=MC,cop=cop,STAND=STAND) -crit.level<-.05 -if(n<=120)crit.level<-.045 -if(n<=80)crit.level<-.04 -if(n<=60)crit.level<-.035 -if(n<=40)crit.level<-.03 -if(n<=30)crit.level<-.025 -if(n<=20)crit.level<-.02 -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -val<-matrix(NA,ncol=ncol(m),nrow=nboot) -for(j in 1: nboot){ -mm<-m[data[j,],] -val[j,]<-smean(mm,MC=MC,cop=cop,STAND=STAND) -} -if(!MC)temp<-pdis(rbind(val,nullv),center=est) -if(MC)temp<-pdisMC(rbind(val,nullv),center=est) -sig.level<-sum(temp[nboot+1]1)par(mfrow=c(2,2)) -} -region=list() -centers=list() -val=list() -pv=list() -CENTERS=list() -id=c(-1,0) -for(j in 1:N){ -id=id+2 -a=meancr.cord.oph(m[,id],SEED=SEED,plotit=FALSE,xlab=xlab,ylab=ylab,nboot=nboot) -centers[[j]]=a$center -region[[j]]=a$conf.region.points -val[[j]]=a$boot.vals -centers[[j]]=a$center -n=nrow(elimna(m[,id])) -n=as.integer(n) -CENTERS[[j]]=c(a$center,n) -names(CENTERS[[j]])=c('V1','V2','N') -pv[[j]]=a$p.value -} -VAL=val -plot.new() -if(N>1)par(mfrow=c(2,2)) -id=c(-1,0) -for(j in 1:N){ -id=id+2 -n=nrow(m[,id]) -crit.level<-.05 -if(n<=120)crit.level<-.045 -if(n<=80)crit.level<-.04 -if(n<=60)crit.level<-.035 -if(n<=40)crit.level<-.03 -if(n<=30)crit.level<-.025 -if(n<=20)crit.level<-.02 -ic<-round((1-crit.level)*nboot) -val=VAL[[j]] -est=centers[[j]] -temp3<-est -ic<-round((1-crit.level)*nboot) -if(!MC)temp<-pdis(val,center=est) -if(MC)temp<-pdisMC(val,center=est) -temp.dis<-order(temp) -xx<-val[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -region[[j]]=xx[temp,] -if(plotit){ -plot(val[,1],val[,2],xlab=xlab,ylab=ylab) -points(temp3[1],temp3[2],pch="+") -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -}} -par(mfrow=c(1,1)) -list(centers=CENTERS,conf.region.points=region,p.values=pv) -} - - -meancr.cord.oph<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE,tr=0, -nboot=500,plotit=TRUE,MC=FALSE,xlab="VAR 1",ylab="VAR 2",STAND=TRUE){ -# -# m is an n by p matrix -# -# Test hypothesis that the means -# are all equal to the null value, which defaults to zero. -# The level of the test is .05. -# -# Eliminate outliers using a projection method -# That is, determine center of data using: -# -# cop=1 Donoho-Gasko median, -# cop=2 MCD, -# cop=3 marginal medians. -# cop=4 MVE -# -# For each point -# consider the line between it and the center -# project all points onto this line, and -# check for outliers using -# -# MM=F, a boxplot rule. -# MM=T, rule based on MAD and median -# -# Repeat this for all points. A point is declared -# an outlier if for any projection it is an outlier -# using a modification of the usual boxplot rule. -# -# Eliminate any outliers and compute means -# using remaining data. -# -if(SEED)set.seed(2) -m<-elimna(m) -n<-nrow(m) -#est=smean(m,MC=MC,cop=cop,STAND=STAND) -est=apply(m,2,mean,tr=tr) -crit.level<-.05 -if(n<=120)crit.level<-.045 -if(n<=80)crit.level<-.04 -if(n<=60)crit.level<-.035 -if(n<=40)crit.level<-.03 -if(n<=30)crit.level<-.025 -if(n<=20)crit.level<-.02 -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -val<-matrix(NA,ncol=ncol(m),nrow=nboot) -for(j in 1: nboot){ -mm<-m[data[j,],] -#val[j,]<-smean(mm,MC=MC,cop=cop,STAND=STAND) -val[j,]=apply(mm,2,mean) -} -if(!MC)temp<-pdis(rbind(val,nullv),center=est) -if(MC)temp<-pdisMC(rbind(val,nullv),center=est) -sig.level<-sum(temp[nboot+1]=v[j] & x[,xcol]=1)e.pow<-corfun(yhat,y)$cor^2 -stre=sqrt(e.pow) -} -list(coef=coef,residuals=residuals,Strength.Assoc=stre,Explanatory.Power=e.pow) -} - -rotate.points<-function(x,y,deg=NULL,rad=NULL){ -# -# Rotate points deg degrees or rad radians -# -if(!is.null(deg))rad=degrees.2.radians(deg) -if(is.null(rad))stop(' deg or rad need to be indicated') -xp=x*cos(rad)-y*sin(rad) -yp=y*cos(rad)+x*sin(rad) -d=cbind(xp,yp) -d -} - -degrees.2.radians<-function(d)d*pi/180 - -radians.2.degrees<-function(rad)rad*180/pi - -skip.gen.cor<-function(m,y=NULL,outfun=outpro.depth,...){ -# -# Eliminate outliers using -# outfun -# estimate correlation using remaining data -# -m=cbind(m,y) -id=outfun(m,...)$keep -if(ncol(m)==2)val=cor(m[id,])[1,2] -else val=cor(m[id,]) -val -} - -oph.astig.Dataset.Means.ConfEllipses<-function(m,plotit=TRUE,alpha=.05,reset=FALSE,POLY=FALSE, -xlab='X',ylab='Y',pch='.'){ -# -# See Hotelling_Bivariate_Transformation_Rand_10Jul21.docx in rfun -# -# -n=nrow(m) -J=ncol(m) -N=J/2 -if(N>1)par(mfrow=c(2,2)) -if(N != floor(N))stop('Should have an even number of columns') -results=list() -MAT=matrix(NA,nrow=N,ncol=10) -dimnames(MAT)=list(NULL,c('N','SW.px','SW.py','Mean.x','Mean.y','sd`x','sd`y','T','Cor','Ro.Ang.Deg')) -id=c(-1,0) -for(j in 1:N){ -id=id+2 -d=m[,id] -ntest1=round(shapiro.test(d[,1])$p.value,4) -ntest2=round(shapiro.test(d[,2])$p.value,4) -M=apply(d,2,mean) -M=round(M,4) -sd=apply(d,2,sd) -sd=round(sd,4) -Tsq=2*(n-1)*qf(1-alpha,2,n-2)/(n-2) -Tv=sqrt(Tsq) -P.cor=cor(d[,1],d[,2]) -P.cor=round(P.cor,4) -term=(2*P.cor*sd[1]*sd[2])/(var(d[,1])-var(d[,2])) -two.phi.rad=.5*atan(term) -two.phi.degrees=radians.2.degrees(two.phi.rad) # this actually phi, 2*phi is used when plotting -two.phi.degrees=round(two.phi.degrees,4) -if(sd[1]=int1[2] -flag3=m[,col[2]]<=int2[1] -flag4=m[,col[2]]>=int2[2] -} -if(!INC){ -flag1=m[,col[1]]int1[2] -flag3=m[,col[2]]int2[2] - -} -flag=as.logical(flag1*flag2*flag3*flag4) -m[flag,] -} - -harmonic.mean<-function(x)1/mean(1/x) - -rplotv2<- -function(x,y,est=tmean,scat=TRUE,fr=NA,plotit=TRUE,pyhat=FALSE,efr=.5,pch1='*',pch2='.', -theta=50,phi=25,scale=TRUE,expand=.5,SEED=TRUE,varfun=pbvar,outfun=outpro, -nmin=0,xout=FALSE,out=FALSE,eout=FALSE,xlab='X',ylab='Y',zscale=FALSE, -zlab=' ',pr=TRUE,duplicate='error',ticktype='simple',LP=TRUE,OLD=FALSE,pch='.',prm=TRUE,...){ -# -# Like rplot but can handle one or two binary independent variables, -# at least one non-binary independent variable is required. -# -# duplicate='error' -# In some situations where duplicate values occur, when plotting with -# two predictors, it is necessary to set duplicate='strip' -# -# LP=TRUE, the plot of the smooth is further smoothed via lplot (lowess) -# To get a plot as done with old version set -# LP=FALSE -# -# zscale=TRUE will standardize the dependent variable when plotting with 2 independent variables. -# -# efr is the span when computing explanatory strength of association -# -# cf qplot in the R package ggplot2 -# -if(pr){ -if(!xout)print('Suggest also looking at result using xout=TRUE') -} -x<-as.matrix(x) -p=ncol(x) -xx<-cbind(x,y) -xx<-elimna(xx) -n=nrow(xx) -if(eout){ -flag=outfun(xx,plotit=FALSE,...)$keep -xx=xx[flag,] -} -if(xout){ -flag=outfun(xx[,1:p],plotit=FALSE,...)$keep -xx=xx[flag,] -} -n.keep=nrow(xx) -x<-xx[,1:p] -x<-as.matrix(x) -p1=ncol(x)+1 -y<-xx[,p1] -if(ncol(x)==1){ -if(is.na(fr))fr<-.8 -val<-rungen(x,y,est=est,scat=scat,fr=fr,plotit=plotit,pyhat=TRUE, -xlab=xlab,ylab=ylab,LP=LP,pch=pch,...) -val2<-rungen(x,y,est=est,fr=efr,plotit=FALSE,pyhat=TRUE,LP=FALSE,...)$output -val<-val$output -} -if(ncol(x)>1){ -xvals=list() -id=chk4binary(x) -Lid=length(id) -if(Lid==ncol(x))stop('All independent variables are binary, a smoother is inappropriate') -if(Lid>2)stop('Can have a most two binary independent variables') -val=list() -if(Lid==1){ -xval=list() -yhat=list() -if(is.na(fr))fr=.8 -irow0=which(x[,id]==0) -val[[1]]=rplot(x[irow0,-id],y[irow0],pyhat=TRUE,plotit=FALSE,est=est,xlab=xlab,ylab=ylab,pr=FALSE)$yhat -irow1=which(x[,id]==1) -#print(x[irow1,-id]) -val[[2]]=rplot(x[irow1,-id],y[irow1],pyhat=TRUE,plotit=FALSE,est=est,xlab=xlab,ylab=ylab,pr=FALSE)$yhat -rplot2g(x[irow0,-id],y[irow0],x[irow1,-id],y[irow1],est=est,xlab=xlab,ylab=ylab,fr=fr,pch1=pch1,pch2=pch2) #,xout=xout,SEED=SEED) -xvals[[1]]=x[irow0,-id] -xvals[[2]]=x[irow1,-id] -} -if(Lid==2){ -if(ncol(x)>3)stop(' With two binary IVs, current version limited to a third continuous IV') -xval=NULL -yhat=NULL -xval=list() -yhat=list() -xy=cbind(x[,id],x[,-id],y) -v=bin2binary.IV(xy) -val=rplot4g(v,est=est,xlab=xlab,ylab=ylab,pyhat=pyhat) -if(pyhat){ -xvals=list() -val=list() -for(j in 1:4)xvals[[j]]=v[[j]][,3] -for(j in 1:4)val[[j]]=v[[j]][,4] -} -} - -if(Lid==0){ -if(pr && !OLD){ -print('A new estimate of the strength of the association is used by default.') -print(' To get the old estimate, set OLD=TRUE') -} -if(ncol(x)==2 && !scale){ -if(pr){print('scale=FALSE is specified.') -print('If there is dependence, might want to use scale=T') -}} -if(is.na(fr))fr<-1 -val<-rung3d(x,y,est=est,fr=fr,plotit=plotit,pyhat=TRUE,SEED=SEED,nmin=nmin,LP=LP, -scale=scale,phi=phi,theta=theta,expand=expand,zscale=zscale,pr=FALSE, -duplicate='error',xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,...) -E.power=NULL -if(OLD){ -E.power=varfun(val)/varfun(y) -names(E.power)='' -if(E.power>1)E.power=.99 -} -if(!OLD)E.power=smRstr(x,y,fr=fr)$str^2 -stra=sqrt(E.power) -# Best correction at the moment. Not sure when or if needed. -# Maybe a correlation option is better, but need to check this. -xvals=x -if(ncol(x)==1)xvals=sort(xvals) -if(!pyhat){ -val <- NULL -xvals=NULL -} -if(!prm){ -stra=NULL -E.power=NULL -val=NULL -}}} -list(n=n,n.keep=n.keep,xvals=xvals,yhat = val) -} - -regIQR<-function(x,y,xr=x,regfun=Qreg,xout=FALSE,outfun=outpro,...){ -# -# -IQR=regYhat(x,y,xr=xr,regfun=regfun,q=.75)-regYhat(x,y,xr=xr,regfun=regfun,q=.25) -IQR -} - -ESfun.CI<-function(x,y,QSfun=median,method=c('KMS','EP','QS','QStr','AKP','WMW'),tr=.2,pr=TRUE,alpha=.05, -nboot=2000,SEED=TRUE){ -type=match.arg(method) -switch(type, - KMS=KMS.ci(x,y,alpha=alpha,nboot=nboot,SEED=SEED), - EP=EPci(x,y,tr=tr,alpha=alpha,SEED=SEED,nboot=nboot), - QS=shiftPBci(x,y,locfun=QSfun,alpha=alpha,nboot=nboot,SEED=SEED), - QStr=shiftPBci(x,y,locfun=tmean,alpha=alpha,nboot=nboot,SEED=SEED), - AKP=akp.effect.ci(x,y,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED), - WMW=cidv2(x,y)) -} - - -anclin.QS<-function(x1,y1,x2,y2,pts=NULL,xout=FALSE,ALL=FALSE,npts=10,outfun=outpro,REQMIN=.001,...){ -# -# x1, y1 is the control group -# x2 y2 is the experimental group -# -# For Exp group, estimate the median of Y given the x values stored in -# pts -# pts=NULL: If ALL=TRUE, 20 points are chosen by this function -# otherwise three points are used. -# -# The QS effect size is the conditional quantile of the control group corresponding -# to the median of Y, given x, for the experimental group. -# -# -xy=elimna(cbind(x1,y1)) -x1<-as.matrix(x1) -p=ncol(x1) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x1<-xy[,1:p] -y1<-xy[,p1] -x1<-as.matrix(x1) -xy=elimna(cbind(x2,y2)) -x2<-as.matrix(x2) -p=ncol(x2) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x2<-xy[,1:p] -y2<-xy[,p1] -x2<-as.matrix(x2) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1=length(y1) -n2=length(y2) -n=min(c(n1,n2)) -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -n1=nrow(m) -x1<-m[,1:p] -y1<-m[,p1] -x1=as.matrix(x1) -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -n2=nrow(m) -n=min(c(n1,n2)) -x2<-m[,1:p] -y2<-m[,p1] -x2=as.matrix(x2) -} -if(!is.null(pts))npts=length(pts) -if(is.null(pts)){ -xall=unique(c(x1,x2)) -if(ALL)pts=xall -if(!ALL){ -L1=qest(x1,.2) -L2=qest(x2,.2) -U1=qest(x1,.8) -U2=qest(x2,.8) -L=max(L1,L2) -U=min(U1,U2) -if(ALL)pts=seq(L,U,length.out=npts) -else{pts=c(L,(L+U)/2,U) -npts=3 -} -}} -e=reg.pred(x2,y2,xr=pts,regfun=Qreg,q=.5,xout=FALSE) -qs=NA -for(i in 1:npts){ -qs[i]=qinvreg(x1,y1,pts[i],e[i],REQMIN=REQMIN) -} -M=cbind(pts,e,qs) -dimnames(M)=list(NULL,c('Pts','Y.hat4ExpGrp','QS.Effect.Size')) -M -} - -qinvreg<-function(x,y,pt,v,REQMIN=.001){ -# -# Find q such that for Qreg Y hat equals v -# -xy=cbind(x,y) -a=nelderv2(xy,1,qinvreg.sub,START=.5,pt=pt,v=v,REQMIN=REQMIN) -# note: using optim, even with BFGS method, can result in highly inaccurate values -a -} -qinvreg.sub<-function(xy,q,pt,v){ -e=reg.pred(xy[,1],xy[,2],xr=pt,regfun=Qreg,q=q,xout=FALSE) -a=abs(e-v) -a -} - -anclin.QS.CIpb<-function(x1,y1,x2,y2,alpha=.05,pts=NULL,xout=FALSE,ALL=FALSE,npts=10,outfun=outpro,nboot=200, -MC=TRUE,REQMIN=.01,SEED=TRUE,...){ -# -# ANCOVA -# -# Compute a confidence interval for the conditional quantile shift -# measure of effect size -# x1, y1 is the control group -# x2 y2 is the experimental group -# -# for Exp group, estimate the median of Y given x for values stored in -# pts -# pts=NULL: If ALL=TRUE, 20 points are chosen by this function -# otherwise three points are used to reduce execution time. -# -# The QS effect size is the conditional quantile of the control group corresponding -# to the median of Y for the experimental group. -# -# -if(SEED)set.seed(2) -xy=elimna(cbind(x1,y1)) -x1=as.matrix(x1) -p=ncol(x1) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x1<-xy[,1:p] -y1<-xy[,p1] -x1<-as.matrix(x1) -xy=elimna(cbind(x2,y2)) -x2<-as.matrix(x2) -p=ncol(x2) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x2<-xy[,1:p] -y2<-xy[,p1] -x2<-as.matrix(x2) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1=length(y1) -n2=length(y2) -n=min(c(n1,n2)) -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE,...)$keep -m<-m[flag,] -n1=nrow(m) -x1<-m[,1:p] -y1<-m[,p1] -x1=as.matrix(x1) -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE,...)$keep -m<-m[flag,] -n2=nrow(m) -n=min(c(n1,n2)) -x2<-m[,1:p] -y2<-m[,p1] -x2=as.matrix(x2) -} -if(!is.null(pts))npts=length(pts) -if(is.null(pts)){ -xall=unique(c(x1,x2)) -if(ALL)pts=xall -if(!ALL){ -L1=qest(x1,.2) -L2=qest(x2,.2) -U1=qest(x1,.8) -U2=qest(x2,.8) -L=max(L1,L2) -U=min(U1,U2) -if(ALL)pts=seq(L,U,length.out=npts) -else{pts=c(L,(L+U)/2,U) -npts=3 -} -}} -v=NA -m=list() -for(i in 1:nboot){ -id1=sample(n1,replace=TRUE) -id2=sample(n2,replace=TRUE) -m[[i]]=list(x1[id1],y1[id1],x2[id2],y2[id2]) -} -if(!MC)v=lapply(m,anclinQS.sub,pts=pts,npts=npts,...) -if(MC){ -library(parallel) -v=mclapply(m,anclinQS.sub,pts=pts,npts=npts,...) -} -v=matl(v) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -nv=nrow(v) -CI=matrix(NA,nv,2) -pv=NA -for(i in 1:nv){ -pv[i]=mean(v[i,]<0.5) -pv[i]=2*min(pv[i],1-pv[i]) -sv=sort(v[i,]) -CI[i,1]=sv[ilow] -CI[i,2]=sv[ihi] -} -output=matrix(NA,nrow=nv,ncol=4) -output[,1]=pts -output[,2]=pv -output[,3:4]=CI -e=anclin.QS(x1,y1,x2,y2,pts=pts) -e=as.matrix(e[,2:3]) -if(nv==1)e=t(e) -output=cbind(output,e) -dimnames(output)=list(NULL,c('X','p.value','ci.low','ci.hi','Median.ExpGrp','QS.effect')) -output -} -anclinQS.sub<-function(m,pts,npts=npts,...){ -v=anclin.QS(m[[1]],m[[2]],m[[3]],m[[4]],pts=pts,npts=npts,...)[,3] -v -} - -anclinQS.plot<-function(x1,y1,x2,y2,pts=NULL,q=0.1,xout=FALSE,ALL=TRUE,npts=10,line=TRUE, -xlab='X',ylab='QS.Effect',outfun=outpro,REQMIN=.001,...){ -# -# x1, y1 is the control group -# x2 y2 is the experimental group -# -# For Exp group, estimate the median of Y given the x values stored in -# pts -# pts=NULL: If ALL=TRUE, 20 points are chosen by this function -# otherwise three points are used. -# -# The QS effect size is the conditional quantile of the control group corresponding -# to the median of Y, given x, for the experimental group. -# The function plots estimates of the QS effect size for the points in pts -# -# -xy=elimna(cbind(x1,y1)) -x1<-as.matrix(x1) -p=ncol(x1) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x1<-xy[,1:p] -y1<-xy[,p1] -x1<-as.matrix(x1) -xy=elimna(cbind(x2,y2)) -x2<-as.matrix(x2) -p=ncol(x2) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x2<-xy[,1:p] -y2<-xy[,p1] -x2<-as.matrix(x2) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1=length(y1) -n2=length(y2) -n=min(c(n1,n2)) -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE,...)$keep -m<-m[flag,] -n1=nrow(m) -x1<-m[,1:p] -y1<-m[,p1] -x1=as.matrix(x1) -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE,...)$keep -m<-m[flag,] -n2=nrow(m) -n=min(c(n1,n2)) -x2<-m[,1:p] -y2<-m[,p1] -x2=as.matrix(x2) -} -if(!is.null(pts))npts=length(pts) -if(is.null(pts)){ -if(q<=0 || q>=1)stop('Argument q must be greater than 0 and less than 1') -qu=1-q -L1=qest(x1,q) -L2=qest(x2,q) -U1=qest(x1,qu) -U2=qest(x2,qu) -L=max(L1,L2) -U=min(U1,U2) -if(ALL)pts=seq(L,U,length.out=npts) -else{pts=c(L,(L+U)/2,U) -npts=3 -}} -e=reg.pred(x2,y2,xr=pts,regfun=Qreg,q=.5,xout=FALSE) -qs=NA -for(i in 1:npts){ -qs[i]=qinvreg(x1,y1,pts[i],e[i],REQMIN=REQMIN) -} -M=cbind(pts,e,qs) -if(line){ -plot(pts,qs,xlab=xlab,ylab=ylab,ylim=c(0,1),type='n') -lines(pts,qs) -} -else -plot(pts,qs,xlab=xlab,ylab=ylab,ylim=c(0,1)) -dimnames(M)=list(NULL,c('Pts','Y.hat4ExpGrp','QS.Effect.Size')) -M -} - -ES.summary.sub<-function(x,n1,n2){ -id1=c(1:n1) -n1p=n1+1 -N=n1+n2 -id2=c(n1p:N) -a=ES.summary.CI(x[id1],x[id2],SEED=F)[,8] -} - -ksnorm.test<-function(z)ks.test(z,'pnorm',mean=mean(z),sd=sd(z)) #KS test for normality - -reg.reglev<-function(x,y,plotit=TRUE,xlab='X',ylab='Y',GEN=TRUE,regfun=tsreg,outfun=outpro,pr=TRUE,...){ - -# -# Remove any bad leverage points detected by -# the fit using the estimator indicated by regun -# -# GEN=TRUE: use a generalization of the Rousseeuw van Zomeren method -# GEN=FALSE: usw the Rousseeuw van Zomeren method. Unknown when if ever this older approach -# offers an advantage. -# -xy=elimna(cbind(x,y)) -n=nrow(xy) -x=as.matrix(x) -p=ncol(x) -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -x<-as.matrix(x) -keep=c(1:n) -if(!GEN)a=reglev(x,y,plotit=FALSE,SEED=FALSE)$bad.lev.points -else a=reglev.gen(x,y,plotit=FALSE,regfun=regfun,outfun=outfun)$bad.lev -if(length(a)>0)keep=keep[-a] -nk=length(y[keep]) -e=regfun(x[keep,],y[keep],...) -list(n=n,n.keep=nk,coef=e$coef) -} - - -oph.astig.depbivtotvars<-function(m,alpha=.05){ -# -# This function is designed to compare two variances dependent variables based -# prediction errors when dealing with astigmatism. -# -# Assume m is a matrix or data frame having -# J columns. First two columns first formula, next two columns next formula.. -# -# So J should be an even integer -# -# Compare col 1-2 to 3-4, then 1-2 vs 5-6, etc -# -# returns confidence interval for pairwise difference scores. So using difference scores for 1 and 3 as well as 2 and 4 -# alpha = .05 = .95 confidence intervals -# -# Estimates are adjusted if outliers are found based on a projection method. -# -J=ncol(m) -nv=NA -for(j in 1:J)nv[j]=length(elimna(m[,j])) -N=J/2 -J1=J-1 -F=NULL -for(j in 1:N)F=c(F,paste('F',j)) -chk.n=names(m) -MAT=matrix(NA,nrow=N,ncol=6) -dimnames(MAT)=list(NULL,c('Form', 'Form','Tot Var 1','Tot Var 2','Ratio','p.adjusted')) -MAT=as.data.frame(MAT) -if(N != floor(N))stop('Should have an even number of columns') -results=list() -results.total=list() -mat=matrix(NA,nrow=N,ncol=2,byrow=TRUE) -v1=seq(1,J1,2) -mat[,1]=v1 -mat[,2]=v1+1 -ic=0 -for(j in 1:N){ -for(k in 1:N){ -if(j1){ -if(p!=ncol(pts))stop('pts should be a matrix with',paste(p),'columns') -} - -x2<-as.matrix(x2) -if(p!=ncol(x2))stop('Number of col. for x1 is not equal to the number of col. for x2') -xy<-cbind(x2,y2) -xy<-elimna(xy) -x2<-xy[,1:p] -y2<-xy[,p1] - -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE,...)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE,...)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} - -e=NA -for(i in 1:nrow(pts)){ -d1=reg.con.dist(x1,y1,pts=pts[i,]) -d2=reg.con.dist(x2,y2,pts=pts[i,]) -p=NA -for(j in 1:99)p[j]=mean(d1[j]<=d2) -e[i]=mean(p) -} -e -} - - -outmc<-function(x,plotit=FALSE){ -# -# Detect outliers using a modification of Carling's method -# that takes into account skewness -# -x=elimna(x) -temp<-idealf(x) -gval<-(17.63*n-23.64)/(7.74*n-3.71) -M=median(x) -cl=M-gval*2*(M-temp$ql) -cu=M+gval*2*(temp$qu-M) -n=length(x) -flag<-NA -outid<-NA -vec<-c(1:n) -for(i in 1:n){ -flag[i]<-(x[i]< cl || x[i]> cu) -} -if(sum(flag)==0)outid<-NULL -if(sum(flag)>0)outid<-vec[flag] -keep<-vec[!flag] -outval<-x[flag] -n.out=sum(length(outid)) -list(out.val=outval,out.id=outid,keep=keep,n=n,n.out=n.out,cl=cl,cu=cu) -} - -reglev.gen<-function(x,y,regfun=tsreg,outfun=outpro.depth,regout=outpro,crit=sqrt(qchisq(.975,1)), -plotit=TRUE,xlab='X',ylab='Y',outplot=FALSE,DIS=FALSE,...){ -# -# Search for good and bad leverage points using the regression method -# indicated by -# regfun -# -# This is a more general version of reglev. -# Here, can specify the regression estimator and outlier detection method. -# -#. plotit=TRUE. Point marked o are bad leverage points -# -# When x is univariate and has a skewed distribution, suggest using outfun=outmc -# -# x is an n by p matrix -# -# Strategy: first determine whether there are any leverage points -# If yes, remove them and estimate the slopes and intercept -# Based on this fit, compute residuals using all of the data. -# Check for outliers among the residuals using MAD-median rule -# Bad leverage point is a leverage points for which the residual is an outlier. -# -# VALUE: -# keep indicates which points are not bad leverage points. -# -# if DIS=TRUE, distances used to determine leverage points are returned. -# -xy=elimna(cbind(x,y)) -nkeep=c(1:nrow(xy)) -x=as.matrix(x) -p=ncol(x) -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -x<-as.matrix(x) -d=outfun(x,plotit=outplot,...) -iout=d$out.id #leverage points -glp=iout -nlp=length(iout) -keep=d$keep -est=regfun(x[keep,],y[keep])$coef -yhat=est[1]+x%*%est[2:p1] -res=y-yhat -dis=abs(res-median(res))/mad(res) -chk<-ifelse(dis>crit,1,0) #residuals outliers -vec<-c(1:nrow(x)) -outid=resid=vec[chk==1] # id which are residuals outliers -keep<-vec[chk==0] -both=c(iout,outid) -blp=duplicated(both) -if(sum(!blp)>0)blp=unique(both[blp]) -else blp=NULL -if(length(blp)>0){ -flag=NULL -for(k in 1:length(blp)){ -flag=c(flag,which(iout==blp[k])) -} -glp=iout[-flag] -} -if(!is.null(blp))regout.n=length(blp) -nkeep=c(1:length(y)) -if(length(blp)>0)nkeep=vec[-blp] -if(ncol(xy)==2){ -if(plotit){ -plot(x,y,xlab=xlab,ylab=ylab,type='n') -points(x[keep],y[keep],pch='.') -points(x[glp],y[glp],pch='*') -points(x[blp],y[blp],pch='o') -}} -list(n.lev=d$n.out,lev.pts=iout,good.lev=glp,bad.lev=blp,res.out.id=resid,keep=nkeep) -} - - -outblp=reglev.gen - -B.outbox<-function(x,mbox=FALSE,gval=NA,plotit=FALSE,STAND=FALSE){ -# -# Uses the method derived by -# Walker, M. L., Dovoedo, Y. H., Chakraborti, S. \& Hilton, C. W. (2018). -# An Improved Boxplot for Univariate Data. {\em American Statistician, 72}, 348--353. -# -# -x<-x[!is.na(x)] # Remove missing values -if(plotit)boxplot(x) -n<-length(x) -temp<-idealf(x) -M=median(x) -Bc=(temp$qu+temp$ql-2*M)/(temp$qu-temp$ql) -if(is.na(gval))gval<-1.5 -cl<-temp$ql-gval*(temp$qu-temp$ql)*((1-Bc)/(1+Bc)) -cu<-temp$qu+gval*(temp$qu-temp$ql)*((1+Bc)/(1-Bc)) -flag<-NA -outid<-NA -vec<-c(1:n) -for(i in 1:n){ -flag[i]<-(x[i]< cl || x[i]> cu) -} -if(sum(flag)==0)outid<-NULL -if(sum(flag)>0)outid<-vec[flag] -keep<-vec[!flag] -outval<-x[flag] -n.out=sum(length(outid)) -list(out.val=outval,out.id=outid,keep=keep,n=n,n.out=n.out,cl=cl,cu=cu) -} - -anc.plot.es<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=x1,method='QS',CI=FALSE, -pr=TRUE,xout=FALSE,outfun=out,xlab='X',ylab='Effect.Size',pch='*',pts.only=TRUE,low.span=2/3, -nmin=12,...){ - -# Plot effect size curve. Done for each point in x1 for which the number of nearest neighbors for -# both x1 and x2 is > nmin -# nmim default =12 -# -# pts.only=TRUE: plot the estimates -# pts.only=FALSE: add a smoother to the points using LOESS -# low.span control the span -# -# fr1 and fr2 are the spans when looking for the nearest neighbors -# see function near -# -if(pr){ -print('Effect size is based on the argument method, default is quantile shift measure of effect size') -print('Other options: EP, explanatory power; AKP, robust analog of Cohen d; WMW, P(X1)stop('One covariate only is allowed with this function') -if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') -if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') -xy=elimna(cbind(x1,y1)) -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} - -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -e=NA -x=NA -ic=0 -n1=NA -ci.low=NA -ci.hi=NA -pv=NA -n=length(pts) -for(i in 1:n){ -ysub1=y1[near(x1,pts[i],fr1)] -ysub2=y2[near(x2,pts[i],fr2)] -n1=length(ysub1) -n2=length(ysub2) -if(n1>=nmin & n2>=nmin){ -ic=ic+1 -e[ic]=ESfun(ysub1,ysub2,method=method) -x[ic]=pts[i] -if(CI){ -temp=ESfun.CI(ysub1,ysub2,method=method) -if(identical(method,'WMW')){ -ci.low[ic]=temp$p.ci[1] -ci.hi[ic]=temp$p.ci[2] -pv[ic]=temp$p.value -} -if(!identical(method,'WMW')){ -ci.low[i]=temp$ci[1] -ci.hi[i]=temp$ci[2] -pv[ic]=temp$p.value -} -}}} -if(plotit){ -if(pts.only)plot(x,e,pch=pch,xlab=xlab,ylab=ylab) -else -lplot(x,e,pr=FALSE,xlab=xlab,ylab=ylab,low.span=low.span) -M='Done' -} -if(CI){ -M=cbind(x,e,ci.low,ci.hi,pv) -dimnames(M)=list(NULL,c('X','Est','ci.low','ci.hi','p.value')) -} -M -} - -ancova.KMS.plot<-function(x1,y1,x2,y2,pts=NULL,xlab='X',ylab='Effect Size',xout=FALSE,outfun=outpro,pch='x',line=TRUE){ -# -# -# Plot the robust KMS measure of effect size for the covariate values in pts -# -# pts=NULL, use the uniques values in x1 and x2 -# -xy=elimna(cbind(x1,y1)) -if(ncol(xy)!=2)stop('Only one covariate can be used') -x1=xy[,1] -y1=xy[,2] -xy=elimna(cbind(x2,y2)) -x2=xy[,1] -y2=xy[,2] -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1] -y1<-m[,2] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1] -y2<-m[,2] -} -if(is.null(pts)){ -id1=duplicated(x1) -id2=duplicated(x2) -X1=sort(x1[!id1]) -X2=sort(x2[!id2]) -n1=length(X1) -n2=length(X2) -low=max(X1[1],X2[1]) -up=min(X1[n1],X2[n2]) -X12=sort(c(X1,X2)) -flag=(X12>=low & X12<=up) -pts=X12[flag] -} -e=ancova.ES(x1,y1,x2,y2,pts=pts,plotit=FALSE) -plot(e[,1],e[,2],xlab=xlab,ylab=ylab,type='n') -if(line)lines(e[,1],e[,2]) -else -points(e[,1],e[,2],pch=pch) -} - -logistic.LR<-function(x,y,xout=FALSE,outfun=outpro,ROB=FALSE,ADJ=TRUE,reps=5000,SEED=TRUE){ -# -# Logistic regression: -# Likelihood ratio test that all slope parameters are equal to zero. -# ROB = True, initial estimate is based on the Bianco and Yohai (1996) estimator -# -x<-as.matrix(x) -xx<-cbind(x,y) -p1=ncol(xx) -p=p1-1 -xx<-elimna(xx) -x<-xx[,1:ncol(x)] -x<-as.matrix(x) -y<-xx[,p1] -n=nrow(x) -n.keep=n -if(ADJ){ -if(SEED)set.seed(2) -rem=NA -for(i in 1:reps){ -xx=rmul(n,p=p) -yy=rbinom(n,1,.5) -rem[i]=logistic.LR.sub(xx,yy,n) -} -} -if(ROB)xout=FALSE # ROB=T deals with leverage points. -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -n.keep=nrow(x) -} -xx=cbind(rep(n.keep,1),x) -e=logreg.pred(x,y,pts=x) -LF=sum(y*log(e))+sum((1-y)*log(1-e)) -py=mean(y) -LN=sum(y*log(py))+sum((1-y)*log(1-py)) -LR.test=-2*(LN-LF) -pv=1-pchisq(LR.test,p) -if(ADJ)pv=1-mean(pv<=rem) -pv -} -logistic.LR.sub<-function(x,y,n){ -p=ncol(x) -xx=cbind(rep(n,1),x) -e=logreg.pred(x,y,pts=x) -LF=sum(y*log(e))+sum((1-y)*log(1-e)) -py=mean(y) -LN=sum(y*log(py))+sum((1-y)*log(1-py)) -LR.test=-2*(LN-LF) -pv=1-pchisq(LR.test,p) -pv -} - - - - -out.dummy<-function(x,outfun=outpro,id,plotit=FALSE,...){ -# -# When using dummy coding in regression -# -# remove col indicated by -# id -# then check for outliers using -# outfun -x=as.matrix(x) -if(ncol(x)==1)stop(' Should have two or more columns') -X=x[,-id] -a=outfun(X,plotit=FALSE) -a -} - -out.by.groups<-function(x,grp.col,outfun=outpro,pr=TRUE,plotit=FALSE,...){ -# -# divide data into groups, id outliers in each group -# return: -# keep = id rows in x not outliers -# out.id =rows containing outliers -# -x=elimna(x) -p=ncol(x) -p1=p+1 -pv=c(1:p) -pv=pv[-grp.col] -#pv=c(pv,p1) -n=nrow(x) -ones=c(1:n) -w=cbind(x,ones) -z=fac2Mlist(w,grp.col=grp.col,c(1:p1),pr=FALSE) -MAT=NULL -for(j in 1:length(z)){ -m=z[[j]] -a=outfun(m[,pv],plotit=FALSE) -MAT=rbind(MAT,m[a$keep,]) -} -keep=MAT[,p1] -ou=ones[-keep] -list(out.id=ou,keep=keep) -} - - -risk.ratio<-function(x1,n1,x2,n2,alpha=.05){ -# -# Risk ratio confidence interval -# -p1=x1/n1 -p2=x2/n2 -rat=p1/p2 -term1=((n1-x1)/x1)/n1 -term2=((n2-x2)/x2)/n2 -term=sqrt(term1+term2) -z=qnorm(1-alpha/2) -LL=log(rat) -v1=LL-z*term -v2=LL+z*term -ci=c(exp(v1),exp(v2)) -# Compute p-value -pv=seq(.001,.999,.001) -for(j in 1:length(pv)){ -pv.rem=pv[j] -z=qnorm(1-pv[j]/2) -v1=LL-z*term -v2=LL+z*term -chk=c(exp(v1),exp(v2)) -if(chk[1]>1 || chk[2]<1)break -} -if(p1==p2)pv.rem=1 -list(p1=p1,p2=p2,RR.est=rat,ci=ci,p.value=pv.rem) -} - -KMS2way<-function(J,K,x,tr=.2,alpha=.05,nboot=999,SEED=TRUE,SW=FALSE){ -# -# Compare robust, heteroscedastic measures of effect size, the KMS measure effect size -# For main effects pool the data over levels and do all pairwise comparisons -# -# Do all interactions -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -JK=J*K -mat=matrix(1:JK,J,K,byrow=TRUE) -# First do Factor A -dat=list() -for(j in 1:J){ -DAT=NULL -for(k in 1:K){ -DAT=c(as.vector(matl(x[mat[j,k]]))) -} -dat[[j]]=DAT -} -A=KMSmcp.ci(dat,SEED=SEED) -# -# Factor B next -# -dat=list() -for(k in 1:K){ -DAT=NULL -for(j in j:J){ -DAT=c(as.vector(matl(x[mat[j,k]]))) -} -dat[[k]]=DAT -} -B=KMSmcp.ci(dat,SEED=SEED) -AB=KMSinter.mcp(J,K,x,tr=tr,SEED=SEED,SW=SW) -list(Factor.A=A,Factor.B=B,Interactions=AB) -} - -KMSgridAB<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,VAL1=NULL,VAL2=NULL,PB=FALSE,est=tmean,nboot=1000,pr=TRUE,fun=ES.summary, -xout=FALSE,outfun=outpro,SEED=TRUE,...){ -# -# Split on two variables, not just one. -# -# Qsplit: split the independent variable based on the -# quantiles indicated by Qsplit -# Example -# Qsplit1=c(.25,.5,.75) -# Qsplit2=.5 -# would split based on the quartiles for the first independent variable and the median -# for the second independent variable -# -# Alternatively, can split the data based on specified values indicating by the arguments -# VAL1 and VAL2 -# -# Then test the hypothesis of equal measures of location -# IV[1]: indicates the column of containing the first independent variable to use. -# IV[2]: indicates the column of containing the second independent variable to use. -# -# if(length(unique(y)>2))stop('y should be binary') -x=as.matrix(x) -p=ncol(x) -if(p==1)stop('There should be two or more independent variables') -p1=p+1 -xy<-elimna(cbind(x,y)) -x<-xy[,1:p] -y<-xy[,p1] -v=NULL -if(xout){ -flag<-outfun(x,plotit=FALSE,...)$keep -x<-x[flag,] -y<-y[flag] -xy=cbind(x,y) -} -J=length(Qsplit1)+1 -K=length(Qsplit2)+1 -z=list() -group=list() -if(is.null(VAL1) || is.null(VAL2)){ -N.int=length(Qsplit1)+1 -N.int2=length(Qsplit2)+1 -} -else { -J=length(VAL1)+1 -K=length(VAL2)+1 -N.int=length(VAL1)+1 -N.int2=length(VAL2)+1 -} -JK=J*K -MAT=matrix(1:JK,J,K,byrow=TRUE) -est.mat=matrix(NA,nrow=N.int,ncol=N.int2) -n.mat=matrix(NA,nrow=N.int,ncol=N.int2) -DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) -L1=NULL -L2=NULL -if(is.null(VAL1) || is.null(VAL2)){ -qv=quantile(x[,IV[1]],Qsplit1) -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=quantile(x[,IV[2]],Qsplit2) -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -} -else{ -qv=VAL1 -qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) -qv2=VAL2 -qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) -} -ic=0 -for(j in 1:N.int){ -j1=j+1 -xsub=binmat(xy,IV[1],qv[j],qv[j1]) -for(k in 1:N.int2){ -k1=k+1 -xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) -est.mat[j,k]=est(xsub2[,p1],...) -n.mat[j,k]=length(xsub2[,p1]) -ic=ic+1 -z[[ic]]=xsub2[,p1] -if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) -} -} -n=NA -for(j in 1:length(z)){ -n[j]=length(z[[j]]) -} -if(min(n)<=5){ -id=which(n>5) -del=which(n<=5) -n=n[id] -if(pr)print(paste('For group',del,'the sample size is less than 6')) -} -A=list() -B=list() -for(j in 1:J)A[[j]]=lincon(z[MAT[j,]],tr=tr,pr=FALSE)$psihat -for(j in 1:K)B[[j]]=lincon(z[MAT[,j]],tr=tr,pr=FALSE)$psihat -list(est.loc.4.DV=est.mat,n=n.mat,A=A,B=B,A.effect.sizes=A,B.effect.sizes=B) -} - -AKPmcp.ci<-function(x,tr=.2,alpha=0.05,SEED=TRUE,nboot=500,CI=TRUE,method='hoch'){ -# -# Estimate AKP effect size when comparing all -# pairs of groups in a one-way (independent) groups design -# -# CI=TRUE: confidence intervals for the measure of effect size are computed. -# -if(is.matrix(x) || is.data.frame(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -J=length(x) -Jall=(J^2-J)/2 -con1=con1way(J) -output=matrix(NA,nrow=Jall,ncol=7) -dimnames(output)=list(NULL,c('Group','Group','Effect.Size','low.ci','up.ci','p.value','p.adjust')) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j1){ -if(p!=ncol(pts))stop('pts should be a matrix with',paste(p),'columns') -} - -x2<-as.matrix(x2) -if(p!=ncol(x2))stop('Number of col. for x1 is not equal to the number of col. for x2') -xy<-cbind(x2,y2) -xy<-elimna(xy) -x2<-xy[,1:p] -y2<-xy[,p1] - -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE,...)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE,...)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -e=NA -for(i in 1:nrow(pts)){ -d1=reg.con.dist(x1,y1,pts=pts[i,]) -d2=reg.con.dist(x2,y2,pts=pts[i,]) -e[i]=shiftQS(d1,d2) -} -e -} -QS.ancbse<-function(x1,y1,x2,y2,pts,nboot=100,SEED=TRUE,MC=FALSE,null.value=.5, -xout=FALSE,outfun=outpro,alpha=.05,...){ -# -# ANCOVA based on quantile shift measure of effect size. -# -# -# pts indicates the covariance values for which the groups will be compared. - -x1<-as.matrix(x1) -p1<-ncol(x1)+1 -p<-ncol(x1) -xy<-cbind(x1,y1) -xy<-elimna(xy) -x1<-xy[,1:p] -y1<-xy[,p1] - -x2<-as.matrix(x2) -if(p!=ncol(x2))stop('Number of col. for x1 is not equal to the number of col. for x2') -xy<-cbind(x2,y2) -xy<-elimna(xy) -x2<-xy[,1:p] -y2<-xy[,p1] - -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE,...)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE,...)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -if(SEED)set.seed(2) -e=QSanc(x1,y1,x2,y2,pts=pts) -e=as.vector(matl(e)) -npt=length(pts) -ci=matrix(NA,npt,2) -LAB=NULL -for(j in 1:npt)LAB[j]=paste('pts',j) -dimnames(ci)=list(LAB,c('ci.low','ci.up')) -E=matrix(NA,nboot,npt) -n1=length(y1) -n2=length(y2) -bl=list() -for(k in 1:nboot){ -id1=sample(n1,replace=TRUE) -id2=sample(n2,replace=TRUE) -bl[[k]]=list(x1[id1],y1[id1],x2[id2],y2[id2]) -} -if(!MC)temp=lapply(bl,QS.ancbse.sub,pts) -else{ -library(parallel) -temp=mclapply(bl,QS.ancbse.sub,pts) -} -tv=list() -for(j in 1:nboot)tv[[j]]=as.vector(matl(temp[[j]][1:npt])) -E=matl(tv) -E=as.matrix(E) -se=apply(E,1,sd) -ci[,1]<-e-qnorm(1-alpha/2)*se -ci[,2]<-e+qnorm(1-alpha/2)*se -test<-(e-null.value)/se -sig<-2*(1-pnorm(abs(test))) -list(Est=e,SE=se,test.stat=test,conf.int=ci,p.value=sig) -} - -QS.ancbse.sub<-function(m,pts){ -v=QSanc(m[[1]],m[[2]],m[[3]],m[[4]],pts=pts) -v -} - -getBetaHdi <- function(a, b, width) { -eps <- 1e-9 -if (a < 1 + eps & b < 1 + eps) # Degenerate case -return(c(NA, NA)) -if (a < 1 + eps & b > 1) # Left border case -return(c(0, width)) -if (a > 1 & b < 1 + eps) # Right border case -return(c(1 - width, 1)) -if (width > 1 - eps) -return(c(0, 1)) -# Middle case -mode <- (a - 1) / (a + b - 2) -pdf <- function(x) dbeta(x, a, b) -l <- uniroot( -f = function(x) pdf(x) - pdf(x + width), -lower = max(0, mode - width), -upper = min(mode, 1 - width), -tol = 1e-9 -)$root -r <- l + width -return(c(l, r)) -} - -thd<- function(x, q=.5, width = 1 / sqrt(length(x))) -sapply(q, function(p) { -# -# q =quantiles to be estimated -# -n <- length(x) -if (n == 0) return(NA) -if (n == 1) return(x) -x <- sort(x) -a <- (n + 1) * p -b <- (n + 1) * (1 - p) -hdi <- getBetaHdi(a, b, width) -hdiCdf <- pbeta(hdi, a, b) -cdf <- function(xs) { -xs[xs <= hdi[1]] <- hdi[1] -xs[xs >= hdi[2]] <- hdi[2] -(pbeta(xs, a, b) - hdiCdf[1]) / (hdiCdf[2] - hdiCdf[1]) -} -iL <- floor(hdi[1] * n) -iR <- ceiling(hdi[2] * n) -cdfs <- cdf(iL:iR/n) -W <- tail(cdfs, -1) - head(cdfs, -1) -sum(x[(iL+1):iR] * W) -}) - -t2way.KMS.curve<-function(x,y,pts=NULL,SW=FALSE,npts=15,xlab='X',ylab='Effect.Size'){ -# -# For a 2-by-2 design, compare -# KMS measure of effect size associated with the two levels of the first factor -# plots an interaction effect when there is a covariate. -# -# SW=TRUE, switches rows and column - -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(is.matrix(y) || is.data.frame(y))y=listm(y) -if(length(x)!=4)stop('Should have four groups exactly. Fix argument x') -if(length(y)!=4)stop('Should have four groups exactly. Fix argument y') -for(j in 1:4){ -xy=elimna(cbind(x[[j]],y[[j]])) -x[[j]]=xy[,1] -y[[j]]=xy[,2] -} -n=lapply(y,length) -n=as.vector(matl(n)) -adj=4.4/n+1.00086 -flag=n>150 -adj[flag]=1 -adj=mean(adj) -nmax=max(n) -if(is.null(pts)){ -xlow=max(matl((lapply(x,qest,.1)))) -xhi=min(matl((lapply(x,qest,.9)))) -pts=seq(xlow,xhi,length=npts) -} -nv=lapply(x,length) -if(SW){ -x=x[c(1,3,2,4)] -y=y[c(1,3,2,4)] -} -v1=ancova.KMS(x[[1]],y[[1]],x[[2]],y[[2]],pts=pts,plotit=FALSE)[,2] -v2=ancova.KMS(x[[3]],y[[3]],x[[4]],y[[4]],pts=pts,plotit=FALSE)[,2] -v=v1-v2 -plot(pts,v,xlab=xlab,ylab=ylab,type='n') -lines(pts,v) -} - - -t2way.KMS.interbt<-function(x,y,pts=NULL,alpha=.05,nboot=100,MC=FALSE,SEED=TRUE,SW=FALSE){ -# -# For a 2-by-2 design, compare -# KMS measure of effect size associated with the two levels of the first factor -# to get an interaction effect when there is a covariate. -# -# SW=TRUE, switches rows and column - -if(SEED)set.seed(2) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -if(is.matrix(y) || is.data.frame(y))y=listm(y) -if(length(x)!=4)stop('Should have four groups exactly. Fix argument x') -if(length(y)!=4)stop('Should have four groups exactly. Fix argument y') -for(j in 1:4){ -xy=elimna(cbind(x[[j]],y[[j]])) -x[[j]]=xy[,1] -y[[j]]=xy[,2] -} -n=lapply(y,length) -nn=as.vector(matl(n)) -adj=4.4/nn+1.00086 -flag=nn>150 -adj[flag]=1 -adj=mean(adj) -nmax=max(nn) -if(is.null(pts)){ -xlow=max(matl((lapply(x,qest,.1)))) -xhi=min(matl((lapply(x,qest,.9)))) -pts=c(xlow,(xlow+xhi)/2,xhi) -} -nv=lapply(x,length) -if(SW){ -x=x[c(1,3,2,4)] -y=y[c(1,3,2,4)] -} -npts=length(pts) -MAT1=matrix(NA,nmax,4) -MAT2=matrix(NA,nmax,4) -dat1=list() -dat2=list() -for(i in 1:nboot){ -id1=sample(nv[[1]],replace=TRUE) -id2=sample(nv[[2]],replace=TRUE) -MAT1[1:nv[[1]],1:2]=cbind(x[[1]][id1],y[[1]][id1]) -MAT1[1:nv[[2]],3:4]=cbind(x[[2]][id2],y[[2]][id2]) -dat1[[i]]=MAT1 -id1=sample(nv[[3]],replace=TRUE) -id2=sample(nv[[4]],replace=TRUE) -MAT2[1:nv[[3]],1:2]=cbind(x[[3]][id1],y[[3]][id1]) -MAT2[1:nv[[4]],3:4]=cbind(x[[4]][id2],y[[4]][id2]) -dat2[[i]]=MAT2 -} -if(MC){ -library(parallel) -a1=mclapply(dat1,t2way.KMS.inter.sub,pts=pts) -a2=mclapply(dat2,t2way.KMS.inter.sub,pts=pts) -} -if(!MC){ -a1=lapply(dat1,t2way.KMS.inter.sub,pts=pts) -a2=lapply(dat2,t2way.KMS.inter.sub,pts=pts) -} -a1=t(matl(a1)) -a2=t(matl(a2)) - -zq=qnorm(1-alpha/2) -sqse1=NA -sqse2=NA -for(j in 1:npts)sqse1[j]=var(a1[,j]) -for(j in 1:npts)sqse2[j]=var(a2[,j]) -Results=matrix(NA,npts,10) -Results[,1:2]=ancova.KMS(x[[1]],y[[1]],x[[2]],y[[2]],pts=pts,plotit=FALSE) -Results[,3]=ancova.KMS(x[[3]],y[[3]],x[[4]],y[[4]],pts=pts,plotit=FALSE)[,2] -Results[,4]=Results[,2]-Results[,3] -Results[,5]=adj*Results[,4]/sqrt(sqse1+sqse2) # n=40 1.15 work well get .054 -pv=2*(1-pnorm(abs(Results[,5]))) -Results[,6]=pv -Results[,7]=Results[,4]-zq*sqrt(sqse1+sqse2)/adj -Results[,8]=Results[,4]+zq*sqrt(sqse1+sqse2)/adj -Results[,9]=sqrt(sqse1+sqse2)/adj -Results[,10]=p.adjust(pv,method='hoch') -dimnames(Results)=list(NULL,c('pts','Est1','Est2','Dif','Test.Stat','p.value','ci.low','ci.up','SE','p.adjusted')) -n=matl(nv) -n=as.vector(n) -list(n=nn,Results=Results) -} - -t2way.KMS.inter.sub<-function(z,pts){ -ancova.KMS(z[,1],z[,2],z[,3],z[,4],pts=pts,plotit=FALSE)[,2] -} - -regblp.ci<- -function(x,y,regfun=tsreg,GEN=TRUE,nboot=599,alpha=.05,plotit=FALSE,pr=FALSE,MC=FALSE, -xlab='Predictor 1',ylab='Predictor 2',SEED=TRUE,...){ -# -# Compute a .95 confidence interval for each of the parameters of -# a linear regression equation using a method that removes bad -# leverage points -# -# GEN=TRUE: Use a modified version of the Rousseeuw and van Zomeren method, recommended. -# else, use Rousseeuw and van Zomeren method -# -# The predictor values are assumed to be in the n by p matrix x. -# The default number of bootstrap samples is nboot=599 -# -# plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based -# on the bootstrap samples. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -x=as.matrix(x) -n=nrow(x) -# Check or bad leverage points and remove any that are found -# -if(GEN)id=reglev.gen(x,y,regfun=regfun,plotit=FALSE)$bad.lev -else -id=reglev(x,y)$bad.lev.points -if(length(id)>0){ -xy=cbind(x,y) -xy=xy[-id,] -x<-xy[,1:p] -y<-xy[,p1] -} -nk=nrow(xy) -if(MC)e=regciMC(x,y,nboot=nboot,regfun=regfun,alpha=alpha,SEED=SEED,plotit=plotit,xlab=xlab,ylab=ylab,pr=pr) -if(!MC)e=regci(x,y,nboot=nboot,regfun=regfun,alpha=alpha,SEED=SEED,plotit=plotit,xlab=xlab,ylab=ylab,pr=pr) -e$n=n -e$n.keep=nk -e -} - -part.cor<-function(x,y,z,corfun=wincor,regfun=MMreg,plotit=FALSE,xout=FALSE,GEN=TRUE,BOOT=TRUE,SEED=TRUE,nboot=599, -XOUT.blp=TRUE,plot.out=FALSE, -outfun=outpro,plotfun=plot,xlab='Res 1',ylab='Res 2',...){ -# -# Robust partial correlation. -# Uses the correlation between the residuals of x with z and y with z -# -# Default is a Winsorized correlation between x and y controlling for z -# XOUT.blp=TRUE means that if any bad leverage points are detected, they are removed -# -# -# If XOUT.blp=FALSE -# and -# xout=TRUE remove leverage points. If -# GEN =TRUE, remove only bad leverage when dealing with the association between -#. x and z as well as y and z. In contrast, -# XOUT.blp=TRUE permanently removes bad leverage points associated the -# regression line for x and y, where x is the independent variable. -# -# if z contains a dummy variable, can ignore the corresponding col when removing outliers -#. Example -# part.cor(x,y,z,GEN=FALSE,outfun=out.dummy,id=2,xout=TRUE) -# -# Examples: -# part.cor(x,y,z,regfun=MMreg,corfun=wincor) -# part.cor(x,y,z,regfun=MMreg,corfun=cor.test,method='kendall') -# part.cor(x,y,z,regfun=MMreg,corfun=cor.test,method='spear') -# part.cor(x,y,z,regfun=MMreg,corfun=scor) #skpped correlation, -# -# -xyz=elimna(cbind(x,y,z)) -p3=ncol(xyz) -p1=p3-1 -x=xyz[,1] -y=xyz[,2] -z=xyz[,3:p3] -z=as.matrix(z) -if(XOUT.blp){ -id=outblp(x,y)$keep -x=x[id] -y=y[id] -z=z[id,] -xout=FALSE -} -if(xout){ -if(GEN){ -e1=reg.reglev(z,x,regfun=regfun)$coef -e2=reg.reglev(z,y,regfun=regfun)$coef -} -else{ -e1=regfun(z,x,xout=xout,outfun=outfun,...)$coef -e2=regfun(z,y,xout=xout,outfun=outfun,...)$coef -} -} -if(!xout){ -e1=regfun(z,x)$coef -e2=regfun(z,y)$coef -} -z=as.matrix(z) -res1=x-z%*%e1[2:p1]-e1[1] -res2=y-z%*%e2[2:p1]-e2[1] -if(plotit){ -if(plot.out){ -id=outpro(res1)$keep -res1=res1[id] -res2=res2[id] -} -if(identical(plotfun,plot))plot(res1,res2,xlab=xlab,ylab=ylab) -else plotfun(res1,res2,xlab=xlab,ylab=ylab,pr=FALSE) -} -if(BOOT)est=corb(res1,res2,corfun,SEED=SEED,nboot=nboot) -else -est=corfun(res1,res2) -est -} - -qcomthd<-function(x,y,q=c(.1,.25,.5,.75,.9),nboot=2000,plotit=TRUE,SEED=TRUE,xlab='Group 1',ylab='Est.1-Est.2',alpha=.05,ADJ.CI=TRUE,MC=FALSE){ -# -# Compare quantiles using pb2gen using trimmed version of the Harrell-Davis estimator -#Tied values are allowed. -# -# ADJ.CI=TRUE means that the confidence intervals are adjusted based on the level used by the corresponding -# test statistic. If a test is performed with at the .05/3 level, for example, the confidence returned has -# 1-.05/3 probability coverage. -# -# When comparing lower or upper quartiles, both power and the probability of Type I error -# compare well to other methods that have been derived. -# q: can be used to specify the quantiles to be compared -# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles -# -# Function returns p-values and critical p-values based on Hochberg's method. -# - -if(SEED)set.seed(2) -pv=NULL -output=matrix(NA,nrow=length(q),ncol=10) -dimnames(output)<-list(NULL,c('q','n1','n2','est.1','est.2','est.1_minus_est.2','ci.low','ci.up','p-value','adj.p.value')) -for(i in 1:length(q)){ -output[i,1]=q[i] -output[i,2]=length(elimna(x)) -output[i,3]=length(elimna(y)) -output[i,4]=thd(x,q=q[i]) -output[i,5]=thd(y,q=q[i]) -output[i,6]=output[i,4]-output[i,5] -temp=qcomthd.sub(x,y,nboot=nboot,q=q[i],SEED=FALSE,alpha=alpha,MC=MC) -output[i,7]=temp$ci[1] -output[i,8]=temp$ci[2] -output[i,9]=temp$p.value -} -temp=order(output[,9],decreasing=TRUE) -zvec=alpha/c(1:length(q)) -zvec[temp]=zvec -if(ADJ.CI){ -for(i in 1:length(q)){ -if(!MC)temp=pb2gen(x,y,nboot=nboot,est=thd,q=q[i],SEED=FALSE,alpha=zvec[i],pr=FALSE) -else -temp=pb2genMC(x,y,nboot=nboot,est=thd,q=q[i],SEED=FALSE,alpha=zvec[i],pr=FALSE) -output[i,7]=temp$ci[1] -output[i,8]=temp$ci[2] -output[i,9]=temp$p.value -} -temp=order(output[,10],decreasing=TRUE) -} -output[,10]=p.adjust(output[,9],method='hoch') - - -if(plotit){ -xax=rep(output[,4],3) -yax=c(output[,6],output[,7],output[,8]) -plot(xax,yax,xlab=xlab,ylab=ylab,type='n') -points(output[,4],output[,6],pch='*') -lines(output[,4],output[,6]) -points(output[,4],output[,7],pch='+') -points(output[,4],output[,8],pch='+') -} -output -} - -qcomhd.sub<-function(x,y,q,alpha=.05,nboot=2000,SEED=TRUE,MC=TRUE){ -# -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -datax=listm(t(datax)) -datay=listm(t(datay)) -if(MC){ -library(parallel) -bvecx<-mclapply(datax,hd,q,mc.preschedule=TRUE) -bvecy<-mclapply(datay,hd,q,mc.preschedule=TRUE) -} -else{ -bvecx<-lapply(datax,hd,q) -bvecy<-lapply(datay,hd,q) -} -bvecx=as.vector(matl(bvecx)) -bvecy=as.vector(matl(bvecy)) -bvec<-sort(bvecx-bvecy) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) -sig.level<-2*(min(temp,1-temp)) -se<-var(bvec) -list(est.1=hd(x,q),est.2=hd(y,q),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) -} - -qcomthd.sub<-function(x,y,q,alpha=.05,nboot=2000,SEED=TRUE,MC=TRUE){ -# -x<-x[!is.na(x)] # Remove any missing values in x -y<-y[!is.na(y)] # Remove any missing values in y -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) -datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) -datax=listm(t(datax)) -datay=listm(t(datay)) -if(MC){ -library(parallel) -bvecx<-mclapply(datax,thd,q,mc.preschedule=TRUE) -bvecy<-mclapply(datay,thd,q,mc.preschedule=TRUE) -} -else{ -bvecx<-lapply(datax,thd,q) -bvecy<-lapply(datay,thd,q) -} -bvecx=as.vector(matl(bvecx)) -bvecy=as.vector(matl(bvecy)) -bvec<-sort(bvecx-bvecy) -low<-round((alpha/2)*nboot)+1 -up<-nboot-low -temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) -sig.level<-2*(min(temp,1-temp)) -se<-var(bvec) -list(est.1=thd(x,q),est.2=thd(y,q),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) -} - - -bw.2by2.int.es<-function(x,CI=FALSE){ -# -# Form difference scores and compute several measures of effect size -# -# if CI=TRUE, compute confidence intervals -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -d1=x[[1]]-x[[2]] -d2=x[[3]]-x[[4]] -if(!CI)a=ES.summary(d1,d2) -else -a=ES.summary.CI(d1,d2) -a -} - -bw.int.es<-function(J,K,x,method='KMS',tr=.2,SEED=TRUE,nboot=2000,CI=FALSE){ -# -# All Interactions based on difference scores -# -#. Choices for method: 'EP','QS','QStr','AKP','WMW','KMS' -# -if(is.matrix(x) || is.data.frame(x))x=listm(x) -con=con2way(J,K)$conAB -num=ncol(con) -if(CI){ -CON=matrix(NA,nrow=num,ncol=8) -dimnames(CON)=list(NULL,c('Con.num','n1','n2','Est.','ci.low','ci.up','p.value','p.adjusted')) -} -if(!CI){ -CON=matrix(NA,nrow=num,ncol=2) -dimnames(CON)=list(NULL,c('Con.num','Est.')) -} - -for(j in 1:ncol(con)){ -id=which(con[,j]!=0) -dat=x[id] -d1=dat[[1]]-dat[[2]] -d2=dat[[3]]-dat[[4]] -if(!CI){ -temp=ESfun(d1,d2) -CON[j,1]=j -CON[j,2]=temp -} -else{ -temp=ESfun.CI(d1,d2) -CON[j,1]=j -a=ESfun.CI(d1,d2) -CON[j,2]=a$n1 -CON[j,3]=a$n2 -CON[j,4]=a$effect.size -CON[j,5]=a$ci[1] -CON[j,6]=a$ci[2] -CON[j,7]=a$p.value -} -} -if(CI) -CON[,8]=p.adjust(CON[,7],method='hoch') -list(CON=CON,con=con) -} - -quant<-function(x,q=.5,names=TRUE,na.rm=TRUE,type=8){ -# -# For convenience, follow style of other functions in Rallfun -# when using the R function quanitle. Also, use by default the estimator -# recommended by Hyndman and Fan (1996). -# -a=quantile(x,probs=q,names=names,type=type,na.rm=na.rm) -a -} - -qghdist<-function(q=.5,g=0,h=0){ -# -# Determine quantile of a g-and-h distribution -# -e=qnorm(q) -v=ghtransform(e,g=g,h=h) -v -} - -MED.ES<-function(x,tr=.25,null.val=0,est=median){ -# -# One-sample effect size analog of Cohen's d based on the median -# and either MAD or Winsorized standard deviation rescaled to estimate the standard deviation when -# sampling from a normal distribution -# -x=elimna(x) -e=est(x) -bot=mad(x) -if(bot==0)bot=winsdN(x,tr=tr) -if(bot==0)stop('Both measures of scale are equal to zero') -es=(e-null.val)/bot -es -} - - -winsdN<-function(x,tr=.2){ -# -# Rescale a Winsorized standard deviation so that it estimates -# the population standard deviation under normality. -# -library(MASS) -x=elimna(x) -e=winsd(x,tr=tr) -if(tr==0)cterm=1 -else -cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr -cterm=sqrt(cterm) -e=e/cterm -e -} - -qcomhd<-function(x,y,est=hd,q=c(.1,.25,.5,.75,.9),nboot=4000,plotit=TRUE,SEED=TRUE,xlab='Group 1',ylab='Est.1-Est.2',alpha=.05,ADJ.CI=TRUE,MC=FALSE){ -# -# Compare quantiles using pb2gen using trimmed version of the Harrell-Davis estimator -# Tied values are allowed. -# -# ADJ.CI=TRUE means that the confidence intervals are adjusted based on the level used by the corresponding -# test statistic. If a test is performed with at the .05/3 level, for example, the confidence returned has -# 1-.05/3 probability coverage. -# -# When comparing lower or upper quartiles, both power and the probability of Type I error -# compare well to other methods that have been derived. -# q: can be used to specify the quantiles to be compared -# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles -# -# Function returns p-values and critical p-values based on Hochberg's method. -# - -if(SEED)set.seed(2) -pv=NULL -output=matrix(NA,nrow=length(q),ncol=10) -dimnames(output)<-list(NULL,c('q','n1','n2','est.1','est.2','est.1_minus_est.2','ci.low','ci.up','p-value','adj.p.value')) -for(i in 1:length(q)){ -output[i,1]=q[i] -output[i,2]=length(elimna(x)) -output[i,3]=length(elimna(y)) -output[i,4]=hd(x,q=q[i]) -output[i,5]=hd(y,q=q[i]) -output[i,6]=output[i,4]-output[i,5] -temp=qcomhd.sub(x,y,nboot=nboot,q=q[i],SEED=FALSE,alpha=alpha,MC=MC) -output[i,7]=temp$ci[1] -output[i,8]=temp$ci[2] -output[i,9]=temp$p.value -} -temp=order(output[,9],decreasing=TRUE) -zvec=alpha/c(1:length(q)) -zvec[temp]=zvec -if(ADJ.CI){ -for(i in 1:length(q)){ -if(!MC)temp=pb2gen(x,y,nboot=nboot,est=est,q=q[i],SEED=FALSE,alpha=zvec[i],pr=FALSE) -else -temp=pb2genMC(x,y,nboot=nboot,est=est,q=q[i],SEED=FALSE,alpha=zvec[i],pr=FALSE) -output[i,7]=temp$ci[1] -output[i,8]=temp$ci[2] -output[i,9]=temp$p.value -} -temp=order(output[,10],decreasing=TRUE) -} -output[,10]=p.adjust(output[,9],method='hoch') -if(plotit){ -xax=rep(output[,4],3) -yax=c(output[,6],output[,7],output[,8]) -plot(xax,yax,xlab=xlab,ylab=ylab,type='n') -points(output[,4],output[,6],pch='*') -lines(output[,4],output[,6]) -points(output[,4],output[,7],pch='+') -points(output[,4],output[,8],pch='+') -} -output -} - - chk.lin<-function(x,y,regfun=tsreg,xout=FALSE,outfun=outpro,LP=TRUE,...){ -# -# Check for linearity by plotting predicted vs residuals. -# -xy=cbind(x,y) -xy=elimna(xy) -p1=ncol(xy) -p=p1-1 -x=xy[,1:p] -y=xy[,p1] -x=as.matrix(x) -if(xout){ -id=outfun(x)$keep -x=x[id,] -y=y[id] -} -x=as.matrix(x) -if(identical(regfun,MMreg))res=MMreg(x,y,RES=TRUE)$residuals -else -res=regfun(x,y)$residuals -pre=reg.pred(x,y) -if(LP)q=lplot(pre,res,ylab='Res',xlab='Yhat',pr=FALSE) -else -plot(pre,res,ylab='Res',xlab='Yhat') -} - -wincovN<-function(x,y=NULL,tr=0.2){ -# -# Winsorized covariance rescaled to est cov under normality when there is no trimming -# -library(MASS) -e=wincor(x,y,tr=tr)$cov -if(tr==0)cterm=1 -else cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr -e=e/cterm -e -} - -rm.marg.es<-function(x,y=NULL,tr=0.2){ -# -# Analog of robust version KMS measure of effect size for two -# dependent groups -# -library(MASS) -if(!is.null(y))x=cbind(x,y) -x=elimna(x) -if(ncol(x)>2)stop('Should have only two variables') -v1=winsdN(x[,1],tr=tr)^2 -v2=winsdN(x[,2],tr=tr)^2 -v3=wincovN(x,tr=tr) -a=v1+v2 -2*v3 -e=sqrt(2)*(mean(x[,1],tr=tr)-mean(x[,2],tr=tr))/sqrt(a) -e -} - -rm.marg.esCI<-function(x,y=NULL,tr=.2,nboot=1000,SEED=TRUE,alpha=.05, -null.val=0,MC=FALSE,...){ -# -# Two dependent groups. -# Confidence interval for effect size that takes into account heteroscedasticity as well as the -# association between X and Y based on the marginal distributions, not the -# difference scores. For robust estimators, these two approaches generally give -# different results. -# -library(MASS) -if(!is.null(y))x=cbind(x,y) -x=elimna(x) -if(SEED)set.seed(2) -e=rm.marg.es(x,tr=tr) -n=nrow(x) -if(!MC){ -v=NA -for(i in 1:nboot){ -id=sample(n,replace=TRUE) -v[i]=rm.marg.es(x[id,],tr=tr) -} -} -if(MC){ -library(parallel) -d=list() -for(j in 1:nboot){ -id=sample(n,replace=TRUE) -d[[j]]=x[id,] -} -v=mclapply(d,rm.marg.es,tr=tr) -v=matl(v) -} - -v=sort(v) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=v[ilow] -ci[2]=v[ihi] -pv=mean(v0){ -print("Bootstrap estimates of location could not be computed") -print("This can occur when using an M-estimator") -print("Might try est=tmean") -} -bcon<-t(con)%*%bvec #C by nboot matrix -tvec<-t(con)%*%mvec -tvec<-tvec[,1] -tempcen<-apply(bcon,1,mean) -vecz<-rep(0,ncol(con)) -bcon<-t(bcon) -smat<-var(bcon-tempcen+tvec) -temp<-bcon-tempcen+tvec -bcon<-rbind(bcon,vecz) -if(op==1)dv<-mahalanobis(bcon,tvec,smat) -if(op==2){ -smat<-cov.mcd(temp)$cov -dv<-mahalanobis(bcon,tvec,smat) -} -if(op==3){ -if(!MC)dv<-pdis(bcon,MM=MM,cop=cop) -if(MC)dv<-pdisMC(bcon,MM=MM,cop=cop) -} -bplus<-nboot+1 -sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot -list(p.value=sig.level,psihat=tvec,con=con,n=nvec) -} - -pbad3way<-function(J,K,L,x,est=tmean,alpha=.05,nboot=2000,MC=FALSE){ -# -# Three-way ANOVA for robust measures of locaton -# To compare medians, use est=hd, in case which tied values are allowed. -# -if(is.matrix(x)|| is.data.frame(x))x=listm(x) -chkcar=NA -for(j in 1:length(x))chkcar[j]=length(unique(x[[j]])) -if(min(chkcar)<14){ -print('Warning: Sample size is less than') -print('14 for one more groups. Type I error might not be controlled') -} -con=con3way(J,K,L) -A=pbadepth(x,est=est,con=con$conA,alpha=alpha,nboot=nboot,MC=MC) -B=pbadepth(x,est=est,con=con$conB,alpha=alpha,nboot=nboot,MC=MC) -C=pbadepth(x,est=est,con=con$conC,alpha=alpha,nboot=nboot,MC=MC) -AB=pbadepth(x,est=est,con=con$conAB,alpha=alpha,nboot=nboot,MC=MC) -AC=pbadepth(x,est=est,con=con$conAC,alpha=alpha,nboot=nboot,MC=MC) -BC=pbadepth(x,est=est,con=con$conBC,alpha=alpha,nboot=nboot,MC=MC) -ABC=pbadepth(x,est=est,con=con$conABC,alpha=alpha,nboot=nboot,MC=MC) -list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) -} - - ph.inter<-function(x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4){ -# -# Patel--Hoel interaction for a -# in 2 by 2 design. The method is based on an -# extension of Cliff's heteroscedastic technique for -# handling tied values and the Patel-Hoel definition of no interaction. -# -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# The default value for alpha is .05. Any other value results in using -# alpha=.01. -# -# Argument grp can be used to rearrange the order of the data. -# - if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -a=rimul(2,2,x,alpha=alpha,plotit=FALSE) - e1=cid(x[[1]],x[[2]],,plotit=FALSE)$phat - e2=cid(x[[3]],x[[4]],,plotit=FALSE)$phat -if(plotit){ -m1<-outer(x[[1]],x[[2]],FUN='-') -m2<-outer(x[[3]],x[[4]],FUN='-') -m1<-as.vector(m1) -m2<-as.vector(m2) -g2plot(m1,m2,op=op) -} -list(Est.1=e1,Est.2=e2,dif=e1-e2,ci.lower=a$test[1,6],ci.upper=a$test[1,7],p.value=a$test[8]) -} - - ph.inter<-function(x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4,SW=FALSE){ -# -# Patel--Hoel interaction for a -# in 2 by 2 design. The method is based on an -# extension of Cliff's heteroscedastic technique for -# handling tied values and the Patel-Hoel definition of no interaction. -# -# The function rimul deals with the J by K design -# -# It is assumed all groups are independent. -# -# Missing values are automatically removed. -# -# The default value for alpha is .05. Any other value results in using -# alpha=.01. -# -# Argument grp can be used to rearrange the order of the data. -# - if(is.matrix(x))x<-listm(x) -if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') -if(SW)x=x[c(1,3,2,4)] -a=rimul(2,2,x,alpha=alpha,plotit=FALSE) - e1=cid(x[[1]],x[[2]],,plotit=FALSE)$phat - e2=cid(x[[3]],x[[4]],,plotit=FALSE)$phat -if(plotit){ -m1<-outer(x[[1]],x[[2]],FUN='-') -m2<-outer(x[[3]],x[[4]],FUN='-') -m1<-as.vector(m1) -m2<-as.vector(m2) -g2plot(m1,m2,op=op) -} -list(Est.1=e1,Est.2=e2,dif=e1-e2,ci.lower=a$test[1,6],ci.upper=a$test[1,7],p.value=a$test[8]) -} - -ESmcp.CI<-function(x,method='KMS',alpha=.05,nboot=2000,SEED=TRUE,pr=TRUE){ -# -# All -# Choices for method: -# 'EP','QS','QStr','AKP','WMW','KMS' -# -# - -if(is.data.frame(x))x=as.matrix(x) -if(SEED)set.seed(2) -if(is.matrix(x))x<-listm(x) -x=elimna(x) -n=lapply(x,length) -J<-length(x) -JALL=(J^2-J)/2 -if(identical(method,'EP')){ -if(pr)print('Note: A method for computing a p.value for EP is not yet available') -output=matrix(NA,JALL,5) -ic=0 -for(j in 1:J){ -for(k in 1:J){ -if(j Eigenvalues.Random)) -} - -shared.comp <- matrix(rnorm(n.cases * n.factors, 0, 1), nrow = n.cases, - ncol = n.factors) -unique.comp <- matrix(rnorm(n.cases * n.variables, 0, 1), nrow = n.cases, - ncol = n.variables) -shared.load <- matrix(0, nrow = n.variables, ncol = n.factors) -unique.load <- matrix(0, nrow = n.variables, ncol = 1) -while (trials.without.improvement < max.trials) { - iteration <- iteration + 1 - factor.analysis <- FactorAnalysis(intermediate.corr, corr.matrix = TRUE, - max.iteration = 50, n.factors, corr.type) - if (n.factors == 1) { - shared.load[, 1] <- factor.analysis$loadings - } else { - for (i in 1:n.factors) - shared.load[, i] <- factor.analysis$loadings[, i] - } - shared.load[shared.load > 1] <- 1 - shared.load[shared.load < -1] <- -1 - if (shared.load[1, 1] < 0) - shared.load <- shared.load * -1 - for (i in 1:n.variables) - if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { - unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) - } else { - unique.load[i, 1] <- 0 - } - unique.load <- sqrt(unique.load) - for (i in 1:n.variables) - data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * - unique.load[i, 1] - for (i in 1:n.variables) { - data <- data[sort.list(data[, i]), ] - data[, i] <- distributions[, i] - } - reproduced.corr <- cor(data, method = corr.type) - residual.corr <- target.corr - reproduced.corr - rmsr <- sqrt(sum(residual.corr[lower.tri(residual.corr)] * - residual.corr[lower.tri(residual.corr)]) / - (.5 * (n.variables * n.variables - n.variables))) - if (rmsr < best.rmsr) { - best.rmsr <- rmsr - best.corr <- intermediate.corr - best.res <- residual.corr - intermediate.corr <- intermediate.corr + initial.multiplier * - residual.corr - trials.without.improvement <- 0 - } else { - trials.without.improvement <- trials.without.improvement + 1 - current.multiplier <- initial.multiplier * - .5 ^ trials.without.improvement - intermediate.corr <- best.corr + current.multiplier * best.res - } -} - -factor.analysis <- FactorAnalysis(best.corr, corr.matrix = TRUE, - max.iteration = 50, n.factors, - corr.type) -if (n.factors == 1) { - shared.load[, 1] <- factor.analysis$loadings -} else { - for (i in 1:n.factors) - shared.load[, i] <- factor.analysis$loadings[, i] -} -shared.load[shared.load > 1] <- 1 -shared.load[shared.load < -1] <- -1 -if (shared.load[1, 1] < 0) - shared.load <- shared.load * -1 -for (i in 1:n.variables) - if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { - unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) - } else { - unique.load[i, 1] <- 0 - } -unique.load <- sqrt(unique.load) -for (i in 1:n.variables) - data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * - unique.load[i, 1] -data <- apply(data, 2, scale) # standardizes each variable in the matrix -for (i in 1:n.variables) { - data <- data[sort.list(data[, i]), ] - data[, i] <- distributions[, i] -} -data -} - -################################################################################ -FactorAnalysis <- function(data, corr.matrix = FALSE, max.iteration = 50, - n.factors = 0, corr.type = "pearson") { -# Analyzes comparison data with known factorial structures -# -# Args: -# data : Matrix to store the simulated data. -# corr.matrix : Correlation matrix (default is FALSE) -# max.iteration : Maximum number of iterations (scalar, default is 50). -# n.factors : Number of factors (scalar, default is 0). -# corr.type : Type of correlation (character, default is "pearson", -# user can also call "spearman"). -# -# Returns: -# $loadings : Factor loadings (vector, if one factor. matrix, if multiple -# factors) -# $factors : Number of factors (scalar). -# - data <- as.matrix(data) - n.variables <- dim(data)[2] - if (n.factors == 0) { - n.factors <- n.variables - determine <- TRUE - } else { - determine <- FALSE - } - if (!corr.matrix) { - corr.matrix <- cor(data, method = corr.type) - } else { - corr.matrix <- data - } - criterion <- .001 - old.h2 <- rep(99, n.variables) - h2 <- rep(0, n.variables) - change <- 1 - iteration <- 0 - factor.loadings <- matrix(nrow = n.variables, ncol = n.factors) - while ((change >= criterion) & (iteration < max.iteration)) { - iteration <- iteration + 1 - eigenvalue <- eigen(corr.matrix) - l <- sqrt(eigenvalue$values[1:n.factors]) - for (i in 1:n.factors) - factor.loadings[, i] <- eigenvalue$vectors[, i] * l[i] - for (i in 1:n.variables) - h2[i] <- sum(factor.loadings[i, ] * factor.loadings[i, ]) - change <- max(abs(old.h2 - h2)) - old.h2 <- h2 - diag(corr.matrix) <- h2 - } - if (determine) n.factors <- sum(eigenvalue$values > 1) - return(list(loadings = factor.loadings[, 1:n.factors], - factors = n.factors)) -} - - -rmm.mar<-function(x, tr = 0.2, alpha = 0.05,BH=FALSE,ADJ.CI=FALSE){ -# -# Dependent groups -# Pairwise comparisons based on trimmed means of the marginal distributions -# ADJ.CI=TRUE: Confidence interval adjusted based on Hochberg or, if BH=TRUE, Benjamini--Hochberg - -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J<-ncol(x) -L=(J^2-J)/2 -psihat<-matrix(0,L,7) -testt<-matrix(0,L,4) -dimnames(psihat)<-list(NULL,c('Group','Group','est 1','est 2','dif','ci.lower','ci.upper')) -test<-matrix(NA,L,4) -dimnames(test)<-list(NULL,c('Group','Group','p.value','p.adjust')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,1]<-j -psihat[jcom,2]<-k -a=yuend(x[,j],x[,k],tr=tr) -psihat[jcom,3]=a$est1 -psihat[jcom,4]=a$est2 -psihat[jcom,5]=a$dif -test[jcom,1]<-j -test[jcom,2]<-k -test[jcom,3]<-a$p.value -psihat[jcom,6]=a$ci[1] -psihat[jcom,7]=a$ci[2] -}}} -if(ADJ.CI){ -ior=order(0-test[,3]) -adj=alpha/c(1:L) #Hoch -if(BH)adj=alpha*(L-1:L+1) -# -# Adjust confidence intervals -# -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -print(adj[ior[[jcom]]]) -a=yuend(x[,j],x[,k],alpha=adj[ior[[jcom]]],tr=tr) -psihat[jcom,6]=a$ci[1] -psihat[jcom,7]=a$ci[2] -}}} -} -test[,4]=p.adjust(test[,3],method='hoch') -if(BH)test[,4]=p.adjust(test[,3],method='BH') -nval=nrow(x) -list(n=nval,test=test,psihat=psihat) -} -rmm.dif<-function(x, tr = 0.2, alpha = 0.05,BH=FALSE,ADJ.CI=FALSE){ -# -# Dependent groups -# Pairwise comparisons, trimmed means based on difference scores -# ADJ.CI=TRUE: Confidence interval adjusted based on Hochberg or, if BH=TRUE, Benjamini--Hochberg -# -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J<-ncol(x) -L=(J^2-J)/2 -psihat<-matrix(0,L,5) -testt<-matrix(0,L,4) -dimnames(psihat)<-list(NULL,c('Group','Group','est','ci.lower','ci.upper')) -test<-matrix(NA,L,4) -dimnames(test)<-list(NULL,c('Group','Group','p.value','p.adjust')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,1]<-j -psihat[jcom,2]<-k -a=trimci(x[,j]-x[,k],tr=tr,pr=FALSE) -psihat[jcom,3]=a$estimate -test[jcom,1]<-j -test[jcom,2]<-k -test[jcom,3]<-a$p.value -psihat[jcom,4]=a$ci[1] -psihat[jcom,5]=a$ci[2] -}}} -if(ADJ.CI){ -ior=rev(rank(test[,3])) -adj=alpha/c(1:L) #Hoch -if(BH)adj=alpha*(L-1:L+1) -# -# Adjust confidence intervals -# -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -a=trimci(x[,j]-x[,k],alpha=adj[ior[[jcom]]],tr=tr,pr=FALSE) -psihat[jcom,4]=a$ci[1] -psihat[jcom,5]=a$ci[2] -}}} -} -test[,4]=p.adjust(test[,3],method='hoch') -if(BH)test[,4]=p.adjust(test[,3],method='BH') -nval=nrow(x) -list(n=nval,test=test,psihat=psihat) -} - -rmm.difpb<-function(x, est=tmean, alpha = 0.05,nboot=NA,SEED=TRUE,BH=FALSE,ADJ.CI=FALSE,...){ -# -# Dependent groups -# Pairwise comparisons, trimmed means based on difference scores -# ADJ.CI=TRUE: Confidence interval adjusted based on Hochberg or, if BH=TRUE, Benjamini--Hochberg -# -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J<-ncol(x) -L=(J^2-J)/2 -psihat<-matrix(0,L,5) -testt<-matrix(0,L,4) -dimnames(psihat)<-list(NULL,c('Group','Group','est','ci.lower','ci.upper')) -test<-matrix(NA,L,4) -dimnames(test)<-list(NULL,c('Group','Group','p.value','p.adjust')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,1]<-j -psihat[jcom,2]<-k -a=rmmcppbd(x[,j],x[,k],est=est,alpha=alpha,nboot=nboot,SEED=SEED,plotit=FALSE,...)$output -psihat[jcom,3]=a[2] -test[jcom,1]<-j -test[jcom,2]<-k -test[jcom,3]<-a[3] -psihat[jcom,4]=a[5] -psihat[jcom,5]=a[6] -}}} -if(ADJ.CI){ -ior=rev(rank(test[,3])) -adj=alpha/c(1:L) #Hoch -if(BH)adj=alpha*(L-1:L+1) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -print(adj[ior[[jcom]]]) -#a=trimci(x[,j]-x[,k],alpha=adj[ior[[jcom]]],tr=tr) -#a=two.dep.pb(x[,j],x[,k],dif=TRUE,est=est,alpha=adj[ior[[jcom]]],nboot=nboot,SEED=SEED,pr=FALSE,...) -a=rmmcppbd(x[,j],x[,k],est=est,alpha=adj[ior[[jcom]]],nboot=nboot,SEED=SEED,plotit=FALSE,...)$output -psihat[jcom,4]=a[5] -psihat[jcom,5]=a[6] -}}} -} -test[,4]=p.adjust(test[,3],method='hoch') -if(BH)test[,4]=p.adjust(test[,3],method='BH') -nval=nrow(x) -list(n=nval,test=test,psihat=psihat) -} - -rmm.marpb<-function(x, est=tmean, alpha = 0.05,nboot=NA,BH=FALSE,SEED=TRUE,ADJ.CI=FALSE,...){ -# -# Dependent groups -# Pairwise comparisons based on trimmed means of the marginal distributions -# ADJ.CI=TRUE: Confidence interval adjusted based on Hochberg or, if BH=TRUE, Benjamini--Hochberg -# -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') -J<-ncol(x) -if(nrow(x)<80){ -if(identical(est,mom))stop('Use rmmcppb with argument BA=TRUE') -if(identical(est,onestep))stop('Use rmmcppb with argument BA=TRUE') -} -L=(J^2-J)/2 -psihat<-matrix(0,L,7) -testt<-matrix(0,L,4) -dimnames(psihat)<-list(NULL,c('Group','Group','est 1','est 2','dif','ci.lower','ci.upper')) -test<-matrix(NA,L,4) -dimnames(test)<-list(NULL,c('Group','Group','p.value','p.adjust')) -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,1]<-j -psihat[jcom,2]<-k -#a=yuend(x[,j],x[,k],tr=tr) -a=two.dep.pb(x[,j],x[,k],dif=FALSE,est=est,nboot=nboot,SEED=SEED,pr=FALSE,...) -psihat[jcom,3]=a[1] -psihat[jcom,4]=a[2] -psihat[jcom,5]=a[3] -test[jcom,1]<-j -test[jcom,2]<-k -test[jcom,3]<-a[4] -psihat[jcom,6]=a[5] -psihat[jcom,7]=a[6] -}}} -if(ADJ.CI){ -ior=order(0-test[,3]) -adj=alpha/c(1:L) #Hoch -if(BH)adj=alpha*(L-1:L+1) -# -#. Next, adjust the confidence intervals -# -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -#a=yuend(x[,j],x[,k],alpha=adj[ior[[jcom]]],tr=tr) -a=two.dep.pb(x[,j],x[,k],dif=FALSE,est=est,alpha=adj[ior[[jcom]]],nboot=nboot,SEED=SEED,pr=FALSE,...) -psihat[jcom,6]=a[5] -psihat[jcom,7]=a[6] -}}} -} -test[,4]=p.adjust(test[,3],method='hoch') -if(BH)test[,4]=p.adjust(test[,3],method='BH') -nval=nrow(x) -list(n=nval,test=test,psihat=psihat) -} - -dat2dif<-function(x){ -# -# x is assumed to be a matrix or data frame with at least 2 columns -# -# For J dependent groups, compute all pairwise differences and return the results -# -# -if(is.null(dim(x)))stop('x should be a matrix or data frame') -ic=0 -J=ncol(x) -n=nrow(x) -N=(J^2-J)/2 -ic=0 -dif=matrix(NA,nrow=n,ncol=N) -for(j in 1:J){ -for(k in 1:J){ -if(j1)stop('Argument g should be a vector') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -if(is.null(x1))stop('Something is wrong, no data in x1') -if(is.null(x2))stop('Something is wrong, no data in x2') -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -dimnames(x1)=list(NULL,NULL) # can be necessary to eliminate labels to avoid an error in randomForest. -dimnames(x2)=list(NULL,NULL) -n1=nrow(x1) -n2=nrow(x2) -ns1=min(n1,nboot) -ns2=min(n2,nboot) -nm=min(ns1,ns2) -if(EN)ns1=ns2=min(c(ns1,ns2)) -P1hat=NA -P2hat=NA -Av=NA -Bv=NA -Cv=NA -Dv=NA - -J=length(method) -TE=matrix(NA,nrow=nm,ncol=J) -FP=matrix(NA,nrow=nm,ncol=J) -FN=matrix(NA,nrow=nm,ncol=J) -TP=matrix(NA,nrow=nm,ncol=J) -TN=matrix(NA,nrow=nm,ncol=J) - -isub1=sample(c(1:nm)) -isub2=sample(c(1:nm)) -for(k in 1:nm){ -N1=isub1[k] -N2=isub2[k] -train1=x1[-N1,] -train2=x2[-N2,] -test=rbind(x1[N1,],x2[N2,]) -for(j in 1:J){ -a=CLASS.fun(x1=train1,x2=train2,test=test,method=method[j],...) -a1=a[1] -a2=a[2] -flag1=a1!=1 # ID False negatives e..g., method 1 predict no fracture but fracture occurred. So !flag1 is correct decision -flag2=a2!=2 # ID False positives e..g., predict fracture but no fracture occurred. -flag=c(flag1,flag2) #Overall mistakes -TE[k,j]=mean(flag) -FN[k,j]=mean(flag1) -FP[k,j]=mean(flag2) -flag3=a1==1 -flag4=a2==2 -TP[k,j]=mean(flag3) #method 1 predict fracture and fracture occurred -TN[k,j]=mean(flag4) #method 1 predict no fracture and no fracture occurred -}} -ERR=matrix(NA,nrow=5,ncol=J) -dimnames(ERR)=list(c('TE','FP','FN','TP','TN'),method) -#dimnames(CAT)=list(c('TRUE 1','TRUE 2'),c('PRED 1','PRED2')) -v=apply(TE,2,mean) -ERR[1,]=v -v=apply(FP,2,mean) -ERR[2,]=v -v=apply(FN,2,mean) -ERR[3,]=v -v=apply(TP,2,mean) -ERR[4,]=v -v=apply(TN,2,mean) -ERR[5,]=v -list(Error.rates=ERR) -} - -class.error.CM<-function(x1=NULL,x2=NULL,train=NULL,g=NULL,method='KNN',nboot=100,EN=TRUE,FAST=TRUE, -AUC=FALSE,SEED=TRUE,...){ -# -# For a classification methods indicated by the argument -# method -# use cross validation leaving one out. -# -#. Return a confusion matrix: unconditional, To get a conditional result use class.error.CP -# -# The data for the two groups can be entered via the arguments -# x1 and x2 -# or -# store all of the data in the argument train in which case g specifies the group -# AUC=TRUE, returns auc. Default is FALSE because conditions can be created where -# Error: $ operator is invalid for atomic vectors -# -# Current choices available: -# KNN: Nearest neighbor using robust depths -# DIS: Points classified based on their depths -# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS -# SVM: support vector machine -# RF: Random forest -# NN: neural network -# ADA: ada boost -# PRO: project the points onto a line connecting the centers of the data clouds. -# Then use estimate of the pdf for each group to make a decision about future points. -# LSM: smooth version of logistic regression when sm=TRUE; otherwise use logistic regression. -# -# Returns confusion matrix -# -# -# method='KNN' is default -# -# nboot=number of samples -# -if(length(method)!=1)stop('Only one method at a time is allowed') -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g))if(dim(g)>1)stop('Argument g should be a vector') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -if(is.null(x1))stop('Something is wrong, no data in x1') -if(is.null(x2))stop('Something is wrong, no data in x2') -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -dimnames(x1)=list(NULL,NULL) # can be necessary to eliminate labels to avoid an error in randomForest. -dimnames(x2)=list(NULL,NULL) -n1=nrow(x1) -n2=nrow(x2) -ns1=min(n1,nboot) -ns2=min(n2,nboot) -mn=min(ns1,ns2) -CM=matrix(0,2,2) -isub1=sample(c(1:ns1)) -isub2=sample(c(1:ns2)) -A1=NULL -A2=NULL -ic1=0 -ic2=0 -for(k in 1:mn){ -N1=isub1[k] -N2=isub2[k] -train1=x1[-N1,] -train2=x2[-N2,] -test=rbind(x1[N1,],x2[N2,]) -a=CLASS.fun(x1=train1,x2=train2,test=test,method=method,...) -a1=a[1] -a2=a[2] -A1[k]=a1 -A2[k]=a2 -if(a1==1)CM[1,1]=CM[1,1]+1 #true = 1 pred 1 -else -CM[1,2]=CM[1,2]+1 -if(a2==2)CM[2,2]=CM[2,2]+1 #true =2 and pred 2 -else -CM[2,1]=CM[2,1]+1 -} -FREQ=CM -CM=CM/(2*nboot) -F=matrix(NA,3,3) -dimnames(F)=list(c('True 1','True 2','Sum'),c('Pred 1','Pred 2','Sum')) -F[1,1]=FREQ[1,1] -F[1,2]=FREQ[1,2] -F[2,1]=FREQ[2,1] -F[2,2]=FREQ[2,2] -F[1,3]=F[1,1]+F[1,2] -F[2,3]=F[2,1]+F[2,2] -F[3,1]=F[1,1]+F[2,1] -F[3,2]=F[1,2]+F[2,2] -F[3,3]=F[1,3]+F[2,3] -RES=F/F[3,3] -auroc=NULL -if(AUC){ -library(ROCR) -PRED=c(A1,A2) -LABS=c(rep(1,length(A1)),rep(2,length(A2))) -pred=prediction(PRED,LABS) -perf=performance(pred, "auc") - auroc<- perf@y.values[[1]] -} -dimnames(RES)=list(c('True 1','True 2','Sum'),c('Pred 1','Pred 2','Sum')) -list(C.MAT=RES,COUNTS=F,AUC=auroc[[1]]) -} - - -regtest.blp<-function(x,y,regfun=tsreg,nboot=600,alpha=.05,plotit=TRUE, -grp=c(1:ncol(x)),nullvec=c(rep(0,length(grp))),SEED=TRUE,pr=TRUE,...){ -# -# Test the hypothesis that q of the p predictors are equal to -# some specified constants. By default, the hypothesis is that all -# p predictors have a coefficient equal to zero. -# The method is based on a confidence ellipsoid. -# The critical value is determined with the percentile bootstrap method -# in conjunction with Mahalanobis distance. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -m<-cbind(x,y) -flag<-reglev.gen(x,y,regfun=regfun,plotit=FALSE)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -x<-as.matrix(x) -if(length(grp)!=length(nullvec))stop('The arguments grp and nullvec must have the same length.') -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -#print('Taking bootstrap samples. Please wait.') -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -bvec<-apply(data,1,regboot,x,y,regfun) # A p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -grp<-grp+1 #Ignore the intercept. -est<-regfun(x,y)$coef -estsub<-est[grp] -bsub<-t(bvec[grp,]) -if(length(grp)==1){ -m1<-sum((bvec[grp,]-est)^2)/(length(y)-1) -dis<-(bsub-estsub)^2/m1 -} -if(length(grp)>1){ -mvec<-apply(bsub,2,FUN=mean) -m1<-var(t(t(bsub)-mvec+estsub)) -dis<-mahalanobis(bsub,estsub,m1) -} -dis2<-order(dis) -dis<-sort(dis) -critn<-floor((1-alpha)*nboot) -crit<-dis[critn] -test<-mahalanobis(t(estsub),nullvec,m1) -sig.level<-1-sum(test>dis)/nboot -print(length(grp)) -if(length(grp)==2 && plotit){ -plot(bsub,xlab='Parameter 1',ylab='Parameter 2') -points(nullvec[1],nullvec[2],pch=0) -xx<-bsub[dis2[1:critn],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -} -list(test=test,crit=crit,p.value=sig.level,nullvec=nullvec,est=estsub,n=length(y)) -} - -chbin2num<-function(x){ -# -# Make sure x is binary and numeric -# -n=length(x) -v=rep(NA,n) -y=elimna(x) -chk=unique(y) -if(length(chk)!=2)stop('Should have binary data after NA removed') -M=max(chk) -id=x==M -v[id]=1 -M=min(chk) -id=x==M -v[id]=0 -v -} - -wmw.anc.plot<-function(x1,y1,x2,y2,q1=c(.1,.9),q2=c(.1,.9),npts=20, -pts=NULL,xout=FALSE,outfun=outpro,xlab='X',ylab='P(Y1=1)stop('Argument q must be greater than 0 and less than 1') -qu=1-ql -L1=qest(x1,ql) -L2=qest(x2,ql) -U1=qest(x1,qu) -U2=qest(x2,qu) -L=max(L1,L2) -U=min(U1,U2) -pts=seq(L,U,length.out=npts) -} -s1sq=regIQRsd(x1,y1,pts=pts) -s2sq=regIQRsd(x2,y2,pts=pts) -e1=regYhat(x1,y1,xr=pts,regfun=Qreg) -e2=regYhat(x2,y2,xr=pts,regfun=Qreg) -v1=s1sq^2 -v2=s2sq^2 -n1=length(y1) -n2=length(y2) -N=n1+n2 -q=n1/N -top=(1-q)*v1+q*v2 -bot=q*(1-q) -sigsq=top/bot # Quantity in brackets KMS p. 176 eq 21.1 -es=(e1-e2)/sqrt(sigsq) -mat=cbind(pts,es) -#if(plotit)reg2plot(x1,y1,x2,y2,regfun=Qreg,xlab=xlab,ylab=ylab) -if(plotit)anclinKMS.plot(x1,y1,x2,y2,pts=pts,line=line,xlab=xlab,ylab=ylab,ylim=ylim) -dimnames(mat)=list(NULL,c('pts','Effect.size')) -mat -} - - anclinKMS.plot<-function(x1,y1,x2,y2,pts=NULL,q=0.1,xout=FALSE,ALL=TRUE,npts=10,line=TRUE, -xlab='X',ylab='KMS.Effect',outfun=outpro,ylim=NULL,...){ -# -# Plot KMS measure of effect size -# pts=NULL: If ALL=TRUE, 10 points are chosen by this function -# otherwise three points are used. -# -# The KMS effect size is a heteroscedastic robust analog of Cohen's d -# -# -xy=elimna(cbind(x1,y1)) -x1<-as.matrix(x1) -p=ncol(x1) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x1<-xy[,1:p] -y1<-xy[,p1] -x1<-as.matrix(x1) -xy=elimna(cbind(x2,y2)) -x2<-as.matrix(x2) -p=ncol(x2) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x2<-xy[,1:p] -y2<-xy[,p1] -x2<-as.matrix(x2) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1=length(y1) -n2=length(y2) -n=min(c(n1,n2)) -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE,...)$keep -m<-m[flag,] -n1=nrow(m) -x1<-m[,1:p] -y1<-m[,p1] -x1=as.matrix(x1) -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE,...)$keep -m<-m[flag,] -n2=nrow(m) -n=min(c(n1,n2)) -x2<-m[,1:p] -y2<-m[,p1] -x2=as.matrix(x2) -} -if(!is.null(pts))npts=length(pts) -if(is.null(pts)){ -if(q<=0 || q>=1)stop('Argument q must be greater than 0 and less than 1') -qu=1-q -L1=qest(x1,q) -L2=qest(x2,q) -U1=qest(x1,qu) -U2=qest(x2,qu) -L=max(L1,L2) -U=min(U1,U2) -if(ALL)pts=seq(L,U,length.out=npts) -else{pts=c(L,(L+U)/2,U) -npts=3 -}} - -#e=reg.pred(x2,y2,xr=pts,regfun=Qreg,q=.5,xout=FALSE) -qs=ancova.ES(x1,y1,x2,y2,pts=pts,plotit=FALSE)[,2] -M=cbind(pts,qs) -if(is.null(ylim)) -ylim=min(-.8,min(qs)) -ylim[2]=max(.8,max(qs)) -if(line){ -plot(pts,qs,xlab=xlab,ylab=ylab,ylim=ylim,type='n') -lines(pts,qs) -} -else -plot(pts,qs,xlab=xlab,ylab=ylab,ylim=ylim) -dimnames(M)=list(NULL,c('Pts','KMS.Effect.Size')) -M -} - -ancNCE.QS.plot<-function(x1,y1,x2,y2,pts=NULL,q=0.1,xout=FALSE,ALL=TRUE,npts=10,line=TRUE, -xlab='X',ylab='QS.Effect',outfun=outpro,...){ -# -# Plot quantile shift measure of effect size -# No control group -# -# q = lower quantile used to determine the points used, -# -# pts=NULL: If ALL=TRUE, 20 points are chosen by this function -# otherwise three points are used. -# -# -# -xy=elimna(cbind(x1,y1)) -x1<-as.matrix(x1) -p=ncol(x1) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x1<-xy[,1:p] -y1<-xy[,p1] -x1<-as.matrix(x1) -xy=elimna(cbind(x2,y2)) -x2<-as.matrix(x2) -p=ncol(x2) -if(p>1)stop('Current version allows one covariate only') -p1=p+1 -vals=NA -x2<-xy[,1:p] -y2<-xy[,p1] -x2<-as.matrix(x2) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1=length(y1) -n2=length(y2) -n=min(c(n1,n2)) -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE,...)$keep -m<-m[flag,] -n1=nrow(m) -x1<-m[,1:p] -y1<-m[,p1] -x1=as.matrix(x1) -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE,...)$keep -m<-m[flag,] -n2=nrow(m) -n=min(c(n1,n2)) -x2<-m[,1:p] -y2<-m[,p1] -x2=as.matrix(x2) -} -if(!is.null(pts))npts=length(pts) -if(is.null(pts)){ -if(q<=0 || q>=1)stop('Argument q must be greater than 0 and less than 1') -qu=1-q -L1=qest(x1,q) -L2=qest(x2,q) -U1=qest(x1,qu) -U2=qest(x2,qu) -L=max(L1,L2) -U=min(U1,U2) -if(ALL)pts=seq(L,U,length.out=npts) -else{pts=c(L,(L+U)/2,U) -npts=3 -}} -qs=QSanc(x1,y1,x2,y2,pts=pts) -qs=as.vector(matl(qs)) -M=cbind(pts,qs) -if(line){ -plot(pts,qs,xlab=xlab,ylab=ylab,ylim=c(0,1),type='n') -lines(pts,qs) -} -else -plot(pts,qs,xlab=xlab,ylab=ylab,ylim=c(0,1)) -dimnames(M)=list(NULL,c('Pts','QS.Effect.Size')) -M -} - -com2gfun<-function(x,y,est=tmean,tr=.2,alpha=.05,SEED=TRUE,nboot=2000,method=c('Y','PB','CID','BM')){ -# -# -# Y=Yuen -# PB=Percentile bootstrap -# CID= Cliff's -# BM = Brunner--Munzel -# -type=match.arg(method) -switch(type, -Y=yuen(x,y,tr=tr,alpha=alpha), -PB=pb2gen(x,y,est=est,alpha=alpha,SEED=SEED,nboot=nboot), -CID=cidv2(x,y,alpha=alpha), -BM=bmp(x,y,alpha=alpha)) -} - -anc.2gbin<-function(x1,y1,x2,y2,pts=NA,fr1=.8,fr2=.8,npts=10,xlab='X',ylab='Est. Dif',xout=FALSE, -outfun=out,nmin=12,plotit=TRUE){ -# -# Compare probability of success give a value for some covariate. -# A running-interval smoother is used coupled with the KMS method for comparing binomial distributions -# -isub=0 -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -xorder=order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -if(is.na(pts[1])){ -n1<-1 -n2<-1 -vecn<-1 -isub=0 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=nmin]) -isub[2]<-max(sub[vecn>=nmin]) -} -bot=x1[isub[1]] -top=x1[isub[2]] -pts=seq(bot,top,length.out=npts) -output=matrix(NA,nrow=length(pts),ncol=8) -for(i in 1:length(pts)){ -g1<-y1[near(x1,pts[i],fr1)] -g2<-y2[near(x2,pts[i],fr2)] -v=binom2g(x=g1,y=g2) -output[i,1:7]=c(pts[i],v$p1,v$p2,v$est.dif,v$ci[1],v$ci[2],v$p.value) -} -dimnames(output)=list(NULL,c('pts','p1','p2','est.dif','ci.lower','ci.upper','p.value','p.adjusted')) -if(plotit){ -plot(c(pts,pts,pts),c(output[,4],output[,5],output[,6]),type='n',xlab=xlab,ylab=ylab,ylim=c(-1,1)) -points(pts,output[,4]) -points(pts,output[,5],pch='+') -points(pts,output[,6],pch='+') -} -output[,8]=p.adjust(output[,7],method='hoch') -output -} - -ancmg1.power<-function(n,del=.2,alpha=.05,iter=100,SEED=TRUE,ADJ=FALSE){ -# -# n sample sizes, length of n indicates number of groups -#. Estimate power with no data -# Simulate assuming standard normal distributions but first group has a mean del -# -# -J=length(n) -x=list() -y=list() -chk=0 -if(SEED)set.seed(2) -for(i in 1:iter){ -for(j in 1:J){ -x[[j]]=rnorm(n[j]) -y[[j]]=rnorm(n[j]) -} -y[[1]]=y[[1]]+del -a=ancmg1(x,y,pr=FALSE) -pv=NA -K=length(a$points) -for(k in 1:K){ -if(!ADJ)pv[k]=min(a$point[[k]][,3]) -else -pv[k]=min(a$point[[k]][,7]) -} -if(min(pv)<=alpha)chk=chk+1 -} -chk/iter -} - - - -oph.dep.comRMSAE<-function(x, y=NULL, tr=0,invalid=4, method='hommel',STOP=TRUE,nboot=1999){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# -# Goal: compare the root-mean-square absolute prediction error of J dependent measures. -# Strictly speaking, the squared mean absolute error value is used. -# The estimates reported by the function are the root-mean-squared absolute errors. -# All pairwise comparisons are performed using a bootstrap-t method based on means -# To use a 20% trimmed mean, set tr=.2 - -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# -# By default, Hommel's method is used to control the probability of one -# or more Type I errors -# -if(!is.null(y))x=cbind(x,y) -if(is.list(x)){ -n=pool.a.list(lapply(x,length)) -if(var(n)!=0)stop('lengths have different values') -x=matl(x) -} -J=ncol(x) -flag=abs(elimna(x))>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -if(STOP)stop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','RMSAE 1','RMSAE 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=ydbt(x[,j]^2,x[,k]^2,tr=tr,nboot=nboot) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=RMSAE(elimna(x[,j])) -output[ic,4]=RMSAE(elimna(x[,k])) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -RMSAE<-function(x) sqrt(sum(x^2)/(length(x))) - -oph.indep.comRMSAE<-function(x,y=NULL,method='hoch',invalid=4,STOP=TRUE,tr=0,nboot=1999){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -4 diopters or greater than 4 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare root-mean-square Absolute Error (RMSAE) of J independent measures. -# All pairwise comparisons are performed using a heteroscedastic -# Welch method -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# To get an even more robust method using a 20% trimmed mean, set the argument tr=.2 -# -# By default, Hochberg's method is used to control the probability of one -# or more TypeI errors -# -if(!is.null(y))x=list(x,y) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -for(j in 1:J){ -flag=abs(x[[j]])>invalid -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','RMSAE 1','RMSAE 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=yuenbt(x[[j]]^2,x[[k]]^2,tr=tr,nboot=nboot) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=RMSAE(x[[j]]) -output[ic,4]=RMSAE(x[[k]]) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.dep.comMeanAE<-function(x, y=NULL, tr=0,invalid=4, method='hommel',STOP=TRUE,nboot=1999){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# -# Goal: compare the root-mean-square absolute prediction error of J dependent measures. -# Strictly speaking, the squared mean absolute error value is used. -# The estimates reported by the function are the root-mean-squared absolute errors. -# All pairwise comparisons are performed using a bootstrap-t method based on means -# For an even more robust method using a20% trimmed mean, set tr=.2 - -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# -# By default, Hommel's method is used to control the probability of one -# or more Type I errors -# -if(!is.null(y))x=cbind(x,y) -if(is.list(x)){ -n=pool.a.list(lapply(x,length)) -if(var(n)!=0)stop('lengths have different values') -x=matl(x) -} -J=ncol(x) -flag=abs(elimna(x))>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -if(STOP)stop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Mean.AE 1','Mean.AE 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=ydbt(abs(x[,j]),abs(x[,k]),tr=tr,nboot=nboot) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=mean(abs(x[,j]),tr=tr,na.rm=TRUE) -output[ic,4]=mean(abs(x[,k]),tr=tr,na.rm=TRUE) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.indep.comMeanAE<-function(x,y=NULL,method='hoch',invalid=4,STOP=TRUE,tr=0,nboot=1999){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -4 diopters or greater than 4 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare root-mean-square Absolute Error (RMSAE) of J independent measures. -# All pairwise comparisons are performed using a heteroscedastic -# Welch method -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# For a more robust method using a 20% trimme mean, set tr=.2 -# -# By default, Hochberg's method is used to control the probability of one -# or more TypeI errors -# -if(!is.null(y))x=list(x,y) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -for(j in 1:J){ -flag=abs(x[[j]])>invalid -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Mean.AE 1','Mean.AE 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=yuenbt(abs(x[[j]]),abs(x[[k]]),tr=tr,nboot=nboot) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=mean(abs(x[[j]]),tr=tr) -output[ic,4]=mean(abs(x[[k]]),tr=tr) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.dep.commean<-function(x, y=NULL, tr=0,invalid=4, method='hommel',STOP=TRUE){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -4 diopters or greater than 4 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare the means of J dependent measures. -# All pairwise comparisons are performed using a bootstrap-t method based on means -# To use an even more robust method using a 20% trimmed mean, set tr=.2 -# - -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# -# By default, Hommel's method is used to control the probability of one -# or more Type I errors -# -if(!is.null(y))x=cbind(x,y) -if(is.list(x)){ -n=pool.a.list(lapply(x,length)) -if(var(n)!=0)stop('lengths have different values') -x=matl(x) -} -J=ncol(x) -flag=abs(x)>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -if(STOP)stop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Mean 1','Mean 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=ydbt(x[,j],x[,k],tr=tr) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=a$Est.1 -output[ic,4]=a$Est.2 -output[ic,5]=a$Est.1- a$Est.2 -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.indep.commean<-function(x,y=NULL,method='hommel',invalid=4,STOP=TRUE,tr=0){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -4 diopters or greater than 4 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare means of J independent measures. -# All pairwise comparisons are performed using a heteroscedastic -# Welch method -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# To use an even more robust method using a 20% trimmed mean, set tr=.2 -# -# By default, Hommel's method is used to control the probability of one -# or more TypeI errors -# -if(!is.null(y))x=list(x,y) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -for(j in 1:J){ -flag=abs(elimna(x[[j]]))>invalid -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Mean 1','Mean 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=yuenbt(x[[j]],x[[k]],tr=tr) -output[ic,1]=j -output[ic,2]=k -output[ic,3:4]=c(a$est.1,a$est.2) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - - -oph.dep.comMedAE<-function(x, y=NULL, est=median,dif=FALSE, invalid=4, method='hommel',STOP=TRUE,nboot=1999){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# -# Goal: compare the median absolute prediction error of J dependent measures. -# -# All pairwise comparisons are performed using a bootstrap-t method based on means -# -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# -# By default, Hommel's method is used to control the probability of one -# or more Type I errors -# -if(!is.null(y))x=cbind(x,y) -if(is.list(x)){ -n=pool.a.list(lapply(x,length)) -if(var(n)!=0)stop('lengths have different values') -x=matl(x) -} -J=ncol(x) -flag=abs(elimna(x))>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -if(STOP)stop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Med.AE 1','Med.AE 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=dmedpb(abs(x[,j]),abs(x[,k]),est=est,dif=dif,nboot=nboot,pr=FALSE,plotit=FALSE) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=est(abs(x[,j]),na.rm=TRUE) -output[ic,4]=est(abs(x[,k]),na.rm=TRUE) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$output[,3] -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.indep.commedian<-function(x,y=NULL,method='hommel',invalid=4,STOP=TRUE,SEED=TRUE){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -4 diopters or greater than 4 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare medians of J independent measures. -# All pairwise comparisons are performed using a heteroscedastic -# percentile bootstrap method -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# To use an even more robust method using a 20% trimmed mean, set tr=.2 -# -# By default, Hommel's method is used to control the probability of one -# or more TypeI errors -# -if(!is.null(y))x=list(x,y) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -for(j in 1:J){ -flag=abs(x[[j]])>invalid -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Median 1','Median 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=medpb2(x[[j]],x[[k]],SEED=SEED) -output[ic,1]=j -output[ic,2]=k -output[ic,3:4]=c(a$est1,a$est2) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.dep.commedian<-function(x, y=NULL,invalid=4, method='hommel',STOP=TRUE,SEED=TRUE){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -4 diopters or greater than 4 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare the medians of J dependent measures. -# All pairwise comparisons are performed using a percentile bootstrap method -# -# - -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# -# By default, Hommel's method is used to control the probability of one -# or more Type I errors -# -if(!is.null(y))x=cbind(x,y) -if(is.list(x)){ -n=pool.a.list(lapply(x,length)) -if(var(n)!=0)stop('lengths have different values') -x=matl(x) -} -J=ncol(x) -flag=abs(elimna(x))>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -if(STOP)stop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Median 1','Median 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=dmedpb(x[,j],x[,k],pr=FALSE,plotit=FALSE,nboot=2000,SEED=SEED) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=median(x[,j],na.rm=TRUE) -output[ic,4]=median(x[,k],na.rm=TRUE) -output[ic,5]=a$output[1,2] -output[ic,6]=a$output[1,3] -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.mcnemar<-function(x,method='holm',invalid=4){ -# -# Astigmatism: compare prediction formulas -# -if(is.null(dim(x)))stop('x should be a matrix or data frame') -x=abs(x) -J=ncol(x) #number of formulas -flag=max(abs(x),na.rm=TRUE)>invalid -if(flag){ -nr=c(1:nrow(x)) -if(sum(flag,na.rm=TRUE)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag,na.rm=TRUE)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -stop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,9) -dimnames(output)<-list(NULL,c('D', ' Var', 'N< ' , '%<', 'Var', 'N<', '%< ', -'p.value','Adj.p.value')) -E=list() -TAB=list() -D=seq(.25,2,.25) #D intervals from .25 to 2 -for(L in 1:length(D)){ -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=mat2table(x[,c(j,k)],D[L],D[L]) -n1=sum(x[[j]]<=D[L],na.rm=TRUE) -pn1=mean(x[[j]]<=D[L],na.rm=TRUE) -n2=sum(x[[k]]<=D[L],na.rm=TRUE) -pn2=mean(x[[k]]<=D[L],na.rm=TRUE) -if(sum(is.na(a)>0))print(paste('No data for VAR',j,'VAR',k,'D=',D[L])) -if(sum(is.na(a))==0){ -mct=mcnemar.test(a) -output[ic,1]=D[L] -output[ic,2]=j -output[ic,3]=n1 -output[ic,4]=pn1 -output[ic,5]=k -output[ic,6]=n2 -output[ic,7]=pn2 -output[ic,8]=mct[[3]] -if(a[1,2]==0 &a[2,1]==0)output[ic,8]=1 -}}}} -output[,9]=p.adjust(output[,8],method=method) -E[[L]]=output -} -E -} - -oph.indepintervals<-function(m,method='holm',invalid=4){ -# -# For column of x, compare frequencies using KMS method -# -# -# n: sample sizes -# x is a matrix or data frame with 8 rows -# -# -E=list() -ic=0 -m=abs(m) -J=ncol(m) -x=m -flag=abs(elimna(x))>invalid -if(sum(flag)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -stop() -} -id=matrix(NA,8,2) -x=matrix(NA,8,2) -INT=c(0.25,0.50, 0.75,1,1.25,1.5,1.75,2) -dimnames(id)=list(NULL,ncol=c('S1','S2')) -for (j in 1:J){ - for (k in 1:J){ - if (j < k){ - ic=ic+1 -id[,1]=rep(j,8) -id[,2]=rep(k,8) -# Next determine frequencies -S1=elimna(m[,j]) -S2=elimna(m[,k]) -n1=length(S1) -n2=length(S2) -for(L in 1:8){ -x[L,1]=sum(S1<=INT[L]) -x[L,2]=sum(S2<=INT[L]) -} -a=srg1.vs.2(c(n1,n2),x) -Adj.p.value=p.adjust(a[,3],method=method) -E[[ic]]=cbind(id,a,Adj.p.value) - }}} -E -} - -oph.dep.comMAD<-function(x, y=NULL, tr=0,invalid=4, method='hommel',STOP=TRUE,nboot=1999){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# -# Goal: # Goal: compare mean absolute deviation of J dependent measures. -# Strictly speaking, the squared mean absolute error value is used. -# The estimates reported by the function are the root-mean-squared absolute errors. -# All pairwise comparisons are performed using a bootstrap-t method based on means -# - -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# -# By default, Hommel's method is used to control the probability of one -# or more Type I errors -# -if(!is.null(y))x=cbind(x,y) -if(is.list(x)){ -n=pool.a.list(lapply(x,length)) -if(var(n)!=0)stop('lengths have different values') -x=matl(x) -} -J=ncol(x) -flag=abs(x)>invalid -if(sum(flag,na.rm=TRUE)>0){ -nr=c(1:nrow(x)) -if(sum(flag)>1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following rows have invalid values') -} -if(sum(flag)==1){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print('The following row has an invalid value') -} -irow=NA -ic=0 -N=nrow(x) -for(i in 1:N){ -iflag=abs(x[i,])>invalid -if(sum(iflag,na.rm=TRUE)>0){ -ic=ic+1 -irow[ic]=i -}} -print(irow) -if(STOP)stop() -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','MAD 1','MAD 2','Dif','p.value','Adj.p.value')) -ic=0 -for(j in 1:J){ -mx=mean(x[,j],na.rm=TRUE) -#x[,j]=x[,j]-mean(x[,j],na.rm=TRUE) -x[,j]=x[,j]-mx -} -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=ydbt(abs(x[,j]),abs(x[,k]),tr=tr,nboot=nboot) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=mean(abs(x[,j]),tr=tr,na.rm=TRUE) -output[ic,4]=mean(abs(x[,k]),tr=tr,na.rm=TRUE) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.indep.comMAD<-function(x,y=NULL,method='hoch',invalid=4,STOP=TRUE,tr=0,nboot=1999){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -4 diopters or greater than 4 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare mean absolute deviation of J independent measures. -# All pairwise comparisons are performed using a heteroscedastic -# Welch method -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# and the goal is to compare the variances of the data in x and y. -# For a more robust method using a 20% trimme mean, set tr=.2 -# -# By default, Hochberg's method is used to control the probability of one -# or more TypeI errors -# -if(!is.null(y))x=list(x,y) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -for(j in 1:J){ -flag=abs(x[[j]])>invalid -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','MAD 1','MAD 2','Dif','p.value','Adj.p.value')) -ic=0 -for(j in 1:J)x[[j]]=x[[j]]-mean(x[[j]]) -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=yuenbt(abs(x[[j]]),abs(x[[k]]),tr=tr,nboot=nboot) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=mean(abs(x[[j]]),tr=tr) -output[ic,4]=mean(abs(x[[k]]),tr=tr) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - -oph.indep.comMedAE<-function(x,y=NULL,est=median,method='hommel',invalid=4,STOP=TRUE,nboot=1999){ -# -# This function is designed specifically for dealing with -# Prediction Error for Intraocular Lens Power Calculation -# It is assumed that any value less than -4 diopters or greater than 4 diopters -# is invalid. The argument invalid can be used to change this decision rule. -# -# Goal: compare median Absolute Error of J independent measures. -# All pairwise comparisons are performed using a heteroscedastic method -# x can be a matrix, a data frame or it can have list mode. -# if y is not NULL, the function assumes x is a vector -# -# -# By default, Hommel's method is used to control the probability of one -# or more TypeI errors -# -if(!is.null(y))x=list(x,y) -if(is.matrix(x) || is.data.frame(x))x=listm(x) -J=length(x) -for(j in 1:J)x[[j]]=elimna(x[[j]]) -for(j in 1:J){ -flag=abs(x[[j]])>invalid -if(sum(flag,na.rm=TRUE)>0){ -print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) -print(paste('Variable', j, 'has one or more invalid values')) -print('They occur in the following positions') -nr=c(1:length(x[[j]])) -print(nr[flag]) -if(STOP)stop() -} -} -CC=(J^2-J)/2 -output<-matrix(0,CC,7) -dimnames(output)<-list(NULL,c('Var','Var','Med.AE 1','Med.AE 2','Dif','p.value','Adj.p.value')) -ic=0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -ic=ic+1 -a=pb2gen(abs(x[[j]]),abs(x[[k]]),est=est,nboot=nboot) -output[ic,1]=j -output[ic,2]=k -output[ic,3]=est(abs(x[[j]]),) -output[ic,4]=est(abs(x[[k]])) -output[ic,5]=output[ic,3]-output[ic,4] -output[ic,6]=a$p.value -}}} -output[,7]=p.adjust(output[,6],method=method) -output -} - - -corblp.ci<-function(x,y,regfun=tsreg,varfun=pbvar,nboot=100,alpha=.05,outfun=outpro.depth,SEED=TRUE, -plotit=FALSE,...){ -# -# Correlation, basically a robust version of explanatory power, -# based on a robust regression estimator with bad -# leverage points removes -# -if(SEED)set.seed(2) -xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -if(p!=1)stop('Only a single independent variable is allowed') -x=xy[,1] -y=xy[,2] -id=reglev.gen(x,y,regfun=regfun,plotit=plotit,outfun=outpro.depth)$keep -X=x[id] -Y=y[id] -v=NA -n=length(Y) -bot=varfun(Y) -for(i in 1:nboot){ -id=sample(n,replace=TRUE) -e=reg.pred(X[id],Y[id],regfun=regfun) -top=varfun(e) -rsq=top/bot -rsq=min(rsq,1) -est=regfun(X[id],Y[id])$coef[2] -rest=sign(est)*sqrt(rsq) -v[i]=rest -} -se=sd(v) -est=corblp.EP(x,y,regfun=regfun,varfun=varfun) -test=est$cor/se -sig<-2*(1-pnorm(abs(test))) -crit=qnorm(1-alpha/2) -ci=est$cor-crit*se -ci=max(ci,-1) -ci[2]=est$cor+crit*se -ci[2]=min(ci[2],1) -list(cor=est$cor,test=test,p.value=sig,ci=ci) -} - -ancovap2.KMS<-function(x1,y1,x2,y2,pts=NULL,BOTH=TRUE,npts=20,profun=prodepth, -xout=FALSE,outfun=outpro){ -# -# Comparing two independent regression lines. -# based on an analog of the KMS measure of effect size -# for the points indicated by pts. -# pts=NULL: three points used that are determined based on the data -# -# profun=prdepth, random projections are used to measure the depth of a point -# =pdepth would use a deterministic method, might have high execution time -# if n is large. -# -# BOTH=TRUE: combine x1 and x2 when picking points, otherwise use x1 -# -x1=as.matrix(x1) -p=ncol(x1) -p1=p+1 -m1=elimna(cbind(x1,y1)) -x1=m1[,1:p] -y1=m1[,p1] -x2=as.matrix(x2) -p=ncol(x2) -p1=p+1 -m2=elimna(cbind(x2,y2)) -x2=m2[,1:p] -y2=m2[,p1] -if(xout){ -m<-cbind(x1,y1) -if(identical(outfun,reglev))flag=outfun(x1,y1,plotit=FALSE)$keep -else -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -if(identical(outfun,reglev))flag=outfun(x2,y2,plotit=FALSE)$keep -else -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -if(is.null(pts)){ -if(!BOTH){ -d=profun(x1) -ior=order(d) -id=seq(1,min(n1),length.out=npts) -id=floor(id) -pts=x1[ior[id],] -} -if(BOTH){ -X=rbind(x1,x2) -d=profun(X) -ior=order(d) -id=seq(1,min(n1),length.out=npts) -id=floor(id) -pts=X[ior[id],] -} -} -pts<-as.matrix(pts) -s1sq=regIQRsd(x1,y1,pts=pts) -s2sq=regIQRsd(x2,y2,pts=pts) -e1=regYhat(x1,y1,xr=pts,regfun=Qreg) -e2=regYhat(x2,y2,xr=pts,regfun=Qreg) -v1=s1sq^2 -v2=s2sq^2 -n1=length(y1) -n2=length(y2) -N=n1+n2 -q=n1/N -top=(1-q)*v1+q*v2 -bot=q*(1-q) -sigsq=top/bot # Quantity in brackets KMS p. 176 eq 21.1 -es=(e1-e2)/sqrt(sigsq) -mat=cbind(pts,es) -lab=NA -for(i in 1:p)lab[i]=paste('X',i) -dimnames(mat)=list(NULL,c(lab,'KMS')) -mat -} - -ancovap2.KMS.SEpb<-function(x1,y1,x2,y2,nboot=100,pts=NULL,SEED=TRUE){ -# -# Estimate standard error -# -if(is.null(pts))stop('No points were specified') -n1=nrow(x1) -n2=nrow(x2) -p=ncol(x1)+1 -npts=nrow(pts) -if(SEED)set.seed(2) -v=matrix(NA,nrow=nboot,ncol=nrow(pts)) -for(i in 1:nboot){ -id1=sample(n1,replace=TRUE) -id2=sample(n2,replace=TRUE) -X1=x1[id1,] -Y1=y1[id1] -X2=x2[id2,] -Y2=y2[id2] -v[i,]=ancovap2.KMS(X1,Y1,X2,Y2,pts=pts)[,p] -} -se=apply(v,2,sd) -se -} - -ancovap2.KMSci<-function(x1,y1,x2,y2,pts=NULL,alpha=.05,nboot=100,SEED=TRUE,npts=20, -SIMPLE=FALSE,PLOT.ADJ=FALSE, -plotit=TRUE,xlab='X1',ylab='X2',BOTH=TRUE,profun=prodepth, -xout=FALSE,outfun=outpro,method='hoch'){ -# -# Two independent groups, have two covariates. -# -# For each specified value for x, stored in pts, compute a heteroscedastic measure of effect -# -# if pts=NULL -# SIMPLE=TRUE: use the quartiles of the marginal distributions of group 1 -# to determine the covariate points used, -# SIMPLE=FALSE -# points are chosen based on the depths of the points, which is computed -# by the R function indicated by the argument profun. -# The default is profun=prodepth. -# To use a random collection of projections, set -# profun=pdepth.depth -# -# npts=20 When SIMPLE=FALSE, means 20 points are selected evenly spaced -# between the deepest point and the -# least deep point. -# -# The function tests the hypothesis that the measure of effect is zero, no effect. -# -# iter=100: number of replications used to estimate the standard error. -# -# BOTH=TRUE and SIMPLE=FALSE: -# combine x1 and x2 when picking points, otherwise use x1 -# -FLAG=FALSE -if(!is.null(pts))FLAG=TRUE -p=ncol(x1) -if(p!=2)stop('Current version is limited to two covariates') -p1=p+1 -xy=elimna(cbind(x1,y1)) -x1=xy[,1:p] -y1=xy[,p1] -n1=nrow(xy) -xy=elimna(cbind(x2,y2)) -n2=nrow(xy) -x2=xy[,1:p] -y2=xy[,p1] -if(xout){ -m<-cbind(x1,y1) -if(identical(outfun,reglev))flag=outfun(x1,y1,plotit=FALSE)$keep -else -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -if(identical(outfun,reglev))flag=outfun(x2,y2,plotit=FALSE)$keep -else -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -n1=length(y1) -n2=length(y2) -n=n1+n2 -if(is.null(pts)&SIMPLE) pts=cbind(qest(x1[,1],c(.25,.5,.75)),qest(x1[,2],c(.25,.5,.75))) - -if(is.null(pts)){ -if(!BOTH){ -d=profun(x1) -ior=order(d) -id=seq(1,n1,length.out=npts) -id=floor(id) -pts=x1[ior[id],] -} -if(BOTH){ -X=rbind(x1,x2) -d=profun(X) -ior=order(d) -id=seq(1,n,length.out=npts) -id=floor(id) -pts=X[ior[id],] -} -} -adj=matrix(c(20, 0.673367, -30, 0.8048804, -40, 0.8452348, -50, 0.8702816, -75, 0.8975298, -100, 0.9231938, -125, 0.9363285, -150, 0.940, -175, 0.9438881, -200, 0.9492541, -250, 0.9546365, -300, 0.9527324),byrow=TRUE,ncol=2) -nmid=(n1+n2)/2 -if(max(n1,n2)>300)b.adj=.975 -else -b.adj=lplot.pred(1/adj[,1],adj[,2],1/nmid)$yhat -npts=nrow(pts) -RES=matrix(NA,nrow=npts,ncol=6) -SE=ancovap2.KMS.SEpb(x1,y1,x2,y2,nboot=nboot,pts=pts,SEED=SEED) -SE=b.adj*SE -RES[,1]=ancovap2.KMS(x1,y1,x2,y2,pts=pts)[,p1] -RES[,3]=RES[,1]-qnorm(1-alpha/2)*SE -RES[,4]=RES[,1]+qnorm(1-alpha/2)*SE -test=RES[,1]/SE -pv=2*(1-pnorm(abs(test))) -RES[,5]=pv -RES[,2]=test -dimnames(RES)=list(NULL,c('Est.','Test.Stat','ci.low','ci.up','p-value','p.adjusted')) -RES[,6]=p.adjust(RES[,5],method=method) -ip=which(RES[,5]<=.05) -sig.output=NULL -sig.points=NULL -if(length(ip)>0){ -sig.output=RES[ip,] -sig.points=pts[ip,] -} -if(FLAG)sig.output=RES -if(plotit){ -plot(x1[,1],x1[,2],xlab=xlab,ylab=ylab,pch='.') #type='n') -if(PLOT.ADJ)ip=which(RES[,6]<=.05) -else ip=which(RES[,5]<=.05) -points(pts[,1],pts[,2],pch='*') -if(length(ip)>0)points(pts[ip,],pch='o') -} -RES -list(pts=pts,output=RES) -} - - - -ancovap2.KMS.plot<-function(x1,y1,x2,y2,pts=NULL,xlab='X1',ylab='X2',zlab='Effect Size', -xout=FALSE,outfun=outpro,SEED=TRUE, theta = 50, phi = 25,REV=FALSE){ -# -# -# Two covariates, plot the KMS measure of effect size -# The function automatically removes leverage points. -# -# The function computes the KMS measure of effect size for the points in -# pts -# and plots the results. if -#. pts=NULL, the function picks the deepest 90% of the pooled -# data in x1 and x2 -# -# REV=FALSE: The plot created by LOESS is impacted by which independent -# variable is first in the matrix -#. pts -# To switch which is first, set REV=TRUE -# -xy=elimna(cbind(x1,y1)) -if(ncol(xy)!=3)stop('Only two covariates can be used') -x1=xy[,1:2] -y1=xy[,3] -xy=elimna(cbind(x2,y2)) -x2=xy[,1:2] -y2=xy[,3] -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:2] -y1<-m[,3] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:2] -y2<-m[,3] -} -if(is.null(pts))pts=rbind(x1,x2) -N=nrow(pts) -e=ancovap2.KMS(x1,y1,x2,y2,pts=pts)[,3] -if(N<25){ -library(scatterplot3d) -scatterplot3d(pts[,1],pts[,2],e,xlab=xlab,ylab=ylab,zlab=zlab) -} -if(N>=25){ -if(!REV)f=lplot(pts,e,xlab=ylab,ylab=xlab,zlab=zlab,ticktype='det',pr=FALSE,theta=theta,phi=phi) -else f=lplot(pts[,c(2,1)],e,xlab=xlab,ylab=ylab,zlab=zlab,ticktype='det',pr=FALSE,theta=theta,phi=phi) -} -list(Number_of_points_used_is=N) -} - - -#Wrap-upp functions - -#Spherical Equivalent Prediction Error Dataset -#Choose this file (SEQ_PE): source(file.choose()) -#for dependent dataset: SEQ_PE(dependent=T) -#for independent dataset: SEQ_PE(dependent=F) - -SEQ_PE<-function(dependent=T){ - -#library("readxl") -#library("xlsx") -library("reshape") -library("tidyverse") -library("viridis") -library("ragg") - -#src<-choose.files(caption="Please specify source R code file: Rallfun-v40") -#src<-file.choose() -#source(src) - - -##filein<-choose.files(caption="Please specify your data input file") -filein<-file.choose() -x=read.table(filein,header=T,sep="\t") - -## dir of the inputfile: -## dir<-dirname(filein) - -#x=read_excel(filein) -#dim(x) -#print(x) - -#x=read.table(file.choose(),header=T,sep="\t") - -f_d<-dependent -max<-as.integer(max(abs(x),na.rm=T))+1 -if(max>7) { -#print("A maxium value is > 6 in your data. Stop running."); -stop("A maxium magnitude is > 6 in your data. Stop running.",call.=F);} -cmbn<-names(x) - -## fileout<-file.choose() - - -##dir<-choose.dir(caption="Please specify where to save your results") -dir=dirname(filein) - -box_plot1(x,paste0(dir,"/","PE_boxplot.tiff")) - - -## if dependent is False -if(f_d==F) { -## SD: - -e1<-try({ -name="02.SD" -res<-as.data.frame(oph.ind.comvar(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) -} -) - -name="04.MAD" -res<-as.data.frame(oph.indep.comMAD(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -name="06.Intervals" -resList<-oph.indepintervals(x,invalid=max) -resL<-do.call(rbind,resList) -resL1<-as.data.frame(resL) -resL1$D<-row.names(resL) -colnames(resL1)[1]="Formula_1" -colnames(resL1)[2]="Formula_2" -resL1$Formula_1<-cmbn[match(resL1$Formula_1,c(1:length(cmbn)))] -resL1$Formula_2<-cmbn[match(resL1$Formula_2,c(1:length(cmbn)))] -resL1<-resL1[,c(dim(resL1)[2],1:dim(resL1)[2]-1)] -write.table(resL1,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=F,append=F) - -e1<-try({ -name="08.Mean" -res<-as.data.frame(oph.indep.commean(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) -} -) - -name="10.Median" -res<-as.data.frame(oph.indep.commedian(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -name="12.MeanAE" -res<-as.data.frame(oph.indep.comMeanAE(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -name="14.MedianAE" -res<-as.data.frame(oph.indep.comMedAE(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - - -#oph.indep.comRMSAE(x,invalid=max) -name="16.RMSAE" -res<-as.data.frame(oph.indep.comRMSAE(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -} - -else { - -name="01.SD" -res<-as.data.frame(oph.dep.comvar(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -name="07.Mean" -res<-as.data.frame(oph.dep.commean(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -name="09.Median" -res<-as.data.frame(oph.dep.commedian(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -name="11.MeanAE" -res<-as.data.frame(oph.dep.comMeanAE(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -name="13.MedianAE" -res<-as.data.frame(oph.dep.comMedAE(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -e1<-try({ -name="03.MAD" -res<-as.data.frame(oph.dep.comMAD(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) -} -) - -name="15.RMSAE" -res<-as.data.frame(oph.dep.comRMSAE(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -name="05.Intervals" -resList<-oph.mcnemar(x,invalid=max) -resL<-do.call(rbind,resList) -resL<-as.data.frame(resL) -colnames(resL)[2]="Formula_1" -colnames(resL)[5]="Formula_2" -resL$Formula_1<-cmbn[match(resL$Formula_1,c(1:length(cmbn)))] -resL$Formula_2<-cmbn[match(resL$Formula_2,c(1:length(cmbn)))] -write.table(resL,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - -} -} - -box_plot1<-function(x,fileout){ -library("reshape") -library("tidyverse") -library("viridis") -library("ggplot2") -library("hrbrthemes") - -x1<-melt(x) -#tiff(fileout) -#jpeg -plot<-ggplot(x1,aes(x=variable,y=value,fill=variable))+ - geom_boxplot() + -scale_fill_viridis(discrete = TRUE, alpha=0.6) + -geom_jitter(color="black", size=1.5, alpha=0.9) + -theme_ipsum() + - theme( - legend.position="none", - plot.title = element_text(size=11) - ) + - ggtitle(" Prediction Errors (D)") + - xlab("")+ - ylab("") -ggsave(fileout,plot=plot) - -} - -testplot<-function(x){ -plot(x[,1],x[,2]) -} - - -#Astigmatism Magnitude Dataset: -#Choose this file (Astig_Magnitude): source(file.choose()) -#for dependent dataset: Astig_Magnitude(dependent=T) -#for independent dataset: Astig_Magnitude(dependent=F) - -Astig_Magnitude<-function(dependent=T){ - -#library("readxl") -#library("xlsx") - -#src<-choose.files(caption="Please specify source R code file: Rallfun-v40") -#src<-file.choose() -#source(src) - - -#filein<-choose.files(caption="Please specify your data input file") -filein<-file.choose() -x=read.table(filein,header=T,sep="\t") - -f_d<-dependent -max<-as.integer(max(abs(x),na.rm=T))+1 -if(max>7) { -#print("A maxium value is > 6 in your data. Stop running."); -stop("A maxium value is > 6 in your data. Stop running.",call.=F);} -cmbn<-names(x) - -## fileout<-file.choose() - - -#dir<-choose.dir(caption="Please specify where to save your results") -dir=dirname(filein) - -## if dependent is False -if(f_d==F) { - -e1<-try({ -name="18.Mean" -res<-as.data.frame(oph.astig.indepcom(x,invalid=max)) -colnames(res)[4]="Formula_1" -colnames(res)[5]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -#write.table(res,file=fileout,sep="\t",quote=F,row.names=F,append=T) -res<-res[,c(1:12)] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) -} -) - -name="20.Intervals" -resList<-oph.astig.indepintervals(x,invalid=max) -resL<-do.call(rbind,resList) -resL1<-as.data.frame(resL) -resL1$D<-row.names(resL) -colnames(resL1)[1]="Formula_1" -colnames(resL1)[2]="Formula_2" -resL1$Formula_1<-cmbn[match(resL1$Formula_1,c(1:length(cmbn)))] -resL1$Formula_2<-cmbn[match(resL1$Formula_2,c(1:length(cmbn)))] -resL1<-resL1[,c(dim(resL1)[2],1:dim(resL1)[2]-1)] -write.table(resL1,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=F,append=F) -} - -else { - -name="17.Mean" -res<-as.data.frame(oph.astig.depcom(x,invalid=max)) -colnames(res)[1]="Formula_1" -colnames(res)[2]="Formula_2" -res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] -res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] -write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) - - -name="19.Intervals" -resList<-oph.astig.mcnemar(x,invalid=max) -resL<-do.call(rbind,resList) -resL<-as.data.frame(resL) -colnames(resL)[2]="Formula_1" -colnames(resL)[5]="Formula_2" -resL$Formula_1<-cmbn[match(resL$Formula_1,c(1:length(cmbn)))] -resL$Formula_2<-cmbn[match(resL$Formula_2,c(1:length(cmbn)))] -write.table(resL,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) -} -} - - -#Astigmatism Vector Dataset -#Choose this file (Astig_Vector): source(file.choose()) -#for dependent dataset: Astig_Vector(dependent=T) -#for independent dataset: Astig_Vector(dependent=F) - -Astig_Vector<-function(dependent=T){ - -#library("readxl") -#library("xlsx") - -#src<-choose.files(caption="Please specify source R code file: Rallfun-v40") -#src<-file.choose() -#source(src) - -#filein<-choose.files(caption="Please specify your data input file") -filein<-file.choose() -x=read.table(filein,header=T,sep="\t") - -f_d<-dependent -max<-as.integer(max(abs(x),na.rm=T))+1 -if(max>7) { -#print("A maximum value is > 6 in your data. Stop running."); -stop("A maximum value is > 6 in your data. Stop running.",call.=F);} -cmbn<-names(x) -cmbn1<- unlist(strsplit(cmbn,split="\\.")) -cmbn1<-cmbn1[seq_along(cmbn1)%%2 >0] -cmbn1<-cmbn1[seq_along(cmbn1)%%2 >0] -empty<-data.frame() -## fileout<-file.choose() - - -#dir<-choose.dir(caption="Please specify where to save your results") -dir=dirname(filein) - -name="23.DatasetMeanConvexpoly" -res<-oph.astig.datasetconvexpoly.mean(x,plotit=F) -CNT<-as.data.frame(do.call(rbind,res$centers)) -colnames(CNT)<-c("center.X","center.Y","N") -row.names(CNT)<-cmbn1 -write.table(CNT,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -Dm<-list() -for(i in 1:length(res$convex.hull.pts)){ -DD<-as.data.frame(res$convex.hull.pts[[i]]); -#Dm<-append(Dm,DD) -Dm[[i]]<-DD -colnames(DD)<-c(paste0(cmbn1[i],".","X"),paste0(cmbn1[i],".","Y")) -write.table(DD,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -#Dm<-append(Dm,DD) -} - -name="21.MeanConvexpoly" -name1="Mean Convex Polygon" -res<-oph.astig.meanconvexpoly(x,plotit=F) -P<-as.data.frame(do.call(rbind,res$p.values)) -colnames(P)<-"P.values" -row.names(P)<-cmbn1 -write.table(P,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -CNT<-as.data.frame(do.call(rbind,res$centers)) -colnames(CNT)<-c("center.X","center.Y","N") -row.names(CNT)<-cmbn1 -write.table(CNT,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -for(i in 1:length(res$conf.region.points)){ -D<-as.data.frame(res$conf.region.points[[i]]); -colnames(D)<-c(paste0(cmbn1[i],".","X"),paste0(cmbn1[i],".","Y")) -write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -##PDF(paste0(dir,"\\",name,".",cmbn1[i],".PDF")) -ragg::agg_tiff(paste0(dir,"/",name,".",cmbn1[i],".tiff"), width = 6, height = 7, units = "in", res = 300) -plotDAP(x[,c(2*i-1,2*i)],D,CNT[i,],Dm[[i]],paste0(name1," ",cmbn1[i])) -dev.off() -} - -name="24.DatasetMedianConvexpoly" -res<-oph.astig.datasetconvexpoly.median(x,plotit=F) -CNT<-as.data.frame(do.call(rbind,res$centers)) -colnames(CNT)<-c("center.X","center.Y","N") -row.names(CNT)<-cmbn1 -write.table(CNT,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -Dm<-list() -for(i in 1:length(res$convex.hull.pts)){ -DD<-as.data.frame(res$convex.hull.pts[[i]]); -#Dm<-append(Dm,DD) -Dm[[i]]<-DD -colnames(DD)<-c(paste0(cmbn1[i],".","X"),paste0(cmbn1[i],".","Y")) -write.table(DD,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -#Dm<-append(Dm,DD) -} - -name="22.MedianConvexpoly" -name1="Median Convex Polygon" -res<-oph.astig.medianconvexpoly(x,plotit=F) -P<-as.data.frame(do.call(rbind,res$p.values)) -colnames(P)<-"P.values" -row.names(P)<-cmbn1 -write.table(P,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -CNT<-as.data.frame(do.call(rbind,res$centers)) -colnames(CNT)<-c("center.X","center.Y","N") -row.names(CNT)<-cmbn1 -write.table(CNT,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -for(i in 1:length(res$conf.region.points)){ -D<-as.data.frame(res$conf.region.points[[i]]); -colnames(D)<-c(paste0(cmbn1[i],".","X"),paste0(cmbn1[i],".","Y")) -write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -##PDF(paste0(dir,"/",name,".",cmbn1[i],".PDF")) -ragg::agg_tiff(paste0(dir,"/",name,".",cmbn1[i],".tiff"), width = 6, height = 7, units = "in", res = 300) -plotDAP(x[,c(2*i-1,2*i)],D,CNT[i,],Dm[[i]],paste0(name1," ",cmbn1[i])) -dev.off() -} - -cmbn2<-combn(cmbn1,2,simplify=F) -empty<-data.frame() - -## if dependent is False -if(f_d==F) { - -e1<-try({ -name="26.BivMeans.independent" -res<-oph.astig.indepbivmeans(x) -for(i in 1:length(cmbn1)){ -D<-as.data.frame(res[[i]]) -colnames(D)<-c(cmbn2[[i]][1],cmbn2[[i]][2],"p.value", "p.adjusted") -rownames(D)<-c("X","Y") -write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -} -} -) - -name="28.Bivmarg.totvars.independent" -res<-oph.astig.indepbivmarg.totvars(x) -for(i in 1:length(cmbn1)){ -D<-as.data.frame(res$results[[i]]) -colnames(D)<-c(cmbn2[[i]][1],cmbn2[[i]][2],"Ratio","p.value", "p.adjusted") -rownames(D)<-c("X","Y") -write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -} -D1<-as.data.frame(res$results.total) -colnames(D1)<-c("Formula_1","Formula_2","TotalVar_X","TotalVar_Y","SD_X","SD_Y","p.value", "p.adjusted") -D1$Formula_1<-cmbn1[match(D1$Formula_1,paste0("F ",c(1:length(cmbn1))))] -D1$Formula_2<-cmbn1[match(D1$Formula_2,paste0("F ",c(1:length(cmbn1))))] -write.table(D1,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) - -} - -else { - -e1<-try({ -name="25.BivMeans.dependent" -res<-oph.astig.depbivmeans(x) -for(i in 1:length(cmbn1)){ -D<-as.data.frame(res[[i]]) -colnames(D)<-c(cmbn2[[i]][1],cmbn2[[i]][2],"p.value", "p.adjusted") -rownames(D)<-c("X","Y") -write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -} -} -) - -name="27.Bivmarg.totvars.dependent" -res<-oph.astig.depbivmarg.totvars(x) -for(i in 1:length(cmbn1)){ -D<-as.data.frame(res$results[[i]]) -colnames(D)<-c(cmbn2[[i]][1],cmbn2[[i]][2],"Ratio","p.value", "p.adjusted") -rownames(D)<-c("X","Y") -write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) -} -D1<-as.data.frame(res$results.total) -colnames(D1)<-c("Formula_1","Formula_2","TotalVar_X","TotalVar_Y","SD_X","SD_Y","p.value", "p.adjusted") -D1$Formula_1<-cmbn1[match(D1$Formula_1,paste0("F ",c(1:length(cmbn1))))] -D1$Formula_2<-cmbn1[match(D1$Formula_2,paste0("F ",c(1:length(cmbn1))))] -write.table(D1,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) - - -} -} - - -plotDAP<-function(rawData,CRP,center,DataMean,name){ -library("plotrix") -#rawDat<-read.table("rawData.txt",sep="\t",header=T) -#realDat<-read.table("liwData.txt",sep="\t",header=F) -colnames(rawData)<-c("X","Y") -colnames(CRP)<-c("X","Y") -colnames(center)<-c("X","Y") -colnames(DataMean)<-c("X","Y") - -D1<-data.frame(X1=CRP$X-mean(CRP$X),Y1=CRP$Y-mean(CRP$Y),X=CRP$X,Y=CRP$Y) -tmp1<-D1[D1$Y1>0,] -tmp2<-D1[D1$Y1<0,] -Tmp1<-tmp1[order(tmp1$X1,decreasing=T),] -Tmp2<-tmp2[order(tmp2$X1,decreasing=F),] -Tmp<-rbind(Tmp1,Tmp2) -Tmp$a<-atan2(Tmp$Y1,Tmp$X1) -Tmp<-Tmp[order(Tmp$a,decreasing=T),] -## add an ending point as the starting point, so that the circle is complete -newDat<-rbind(Tmp,Tmp[1,]) - -D2<-data.frame(X1=DataMean$X-mean(DataMean$X),Y1=DataMean$Y-mean(DataMean$Y),X=DataMean$X,Y=DataMean$Y) -tmp1<-D2[D2$Y1>0,] -tmp2<-D2[D2$Y1<0,] -Tmp1<-tmp1[order(tmp1$X1,decreasing=T),] -Tmp2<-tmp2[order(tmp2$X1,decreasing=F),] -Tmp<-rbind(Tmp1,Tmp2) -Tmp$a<-atan2(Tmp$Y1,Tmp$X1) -Tmp<-Tmp[order(Tmp$a,decreasing=T),] -## add an ending point as the starting point, so that the circle is complete -newDat2<-rbind(Tmp,Tmp[1,]) - -R<-4 -cos45<-cos(pi/4) -#tiff("DoubleAnglePlot.tiff") -plot(newDat$X,newDat$Y,type="p",xlim=c(-5,5),ylim=c(-5,5),col="blue",pch=4,cex=0,frame.plot=F,axes=FALSE,xlab="",ylab="",asp=1,main=paste(name,"")) -points(rawData$X,rawData$Y,type="p",col="black",pch=19,cex=0.5) -points(newDat2$X,newDat2$Y,type="p",col="purple",pch=8,cex=0) -points(center$X,center$Y,type="p",col="red",pch=15,cex=1) -lines(newDat$X,newDat$Y,col="blue",lwd=1.3) -lines(newDat2$X,newDat2$Y,col="purple",lwd=2.0) -### lines(D[,1],D[,2],type="p",col="red",pch=0,cex=5) - -draw.circle(0,0,1,lty=1,lwd=0.5) -draw.circle(0,0,2,lty=1,lwd=0.5) -draw.circle(0,0,3,lty=1,lwd=0.5) -draw.circle(0,0,4,lty=1,lwd=0.5) -segments(-1*R*cos45,-1*R*cos45,1*R*cos45,R*cos45,lty=1,lwd=0.5) -segments(-1*R*cos45,1*R*cos45,1*R*cos45,-1*R*cos45,lty=1,lwd=0.5) -segments(0,R,0,-1*R,lty=1,lwd=0.5) -segments(R,0,-1*R,0,lty=1,lwd=0.5) - -R1<-R+0.6 -text(R1,0,paste0("0",intToUtf8(176))) -text(R1*cos45,R1*cos45,paste0("22.5",intToUtf8(176))) -text(0, R1,paste0("45",intToUtf8(176))) -text(-1*R1*cos45,R1*cos45,paste0("67.5",intToUtf8(176))) -text(-1*R1,0,paste0("90",intToUtf8(176))) -text(-1*R1*cos45,-1*R1*cos45,paste0("112.5",intToUtf8(176))) -text(0,-1*R1,paste0("135",intToUtf8(176))) -text(R1*cos45,-1*R1*cos45,paste0("157.5",intToUtf8(176))) -#dev.off() - -#legend(x="bottomleft",pch=c(15,4,8),legend=c("Centroid","Mean Convex Polygon", "Dataset Convex Polygon"),col=c("red","blue","purple")) -legend(x="bottomleft",pch=c(15,NA,NA), lty=c(NA,1,1),cex=0.88,legend=c("Centroid","Mean Convex Polygon", "Dataset Convex Polygon"),col=c("red","blue","purple")) -} - -mulquant<-function(x,q=c(1:9)/10,HD=TRUE,type=8){ -# -# Estimate multiple quantiles for the data in vector x -# By default estimate the deciles -# HD=TRUE: use the Harrell-Davis estimate of the qth quantile -# HD=FALSE:use R function quantile -# -x=elimna(x) -nq=length(q) -if(HD){ -xs<-sort(x) -n<-length(x) -vecx<-seq(along=x) -xq<-0 -for (i in 1:nq){ -m1<-(n+1)*q[i] -m2<-(n+1)*(1-q[i]) -wx<-pbeta(vecx/n,m1,m2)-pbeta((vecx-1)/n,m1,m2) # W sub i values -xq[i]<-sum(wx*xs) -}} -if(!HD){ -xq=quantile(x,probs=q,type=type) -} -xq -} - -matbin2v<-binmat2v<-function(m,col=c(1,2),int1=c(.5,.5),int2=c(.5,.5),INC=TRUE){ -# -# pull out the rows of the matrix m based on the values in the column -# indicated by the argument -# int1 indicates intervals for first variable -# int2 indicates intervals for second variable -# By default, split at the median for both variables. - -# col indicates the columns of m by which the splits are made. - -# -# Example: binmat(m,c(1,3),c(10,15),c(2:6)) will return all rows such that the -# values in column 1 are between 10 and 15, inclusive. -# values in col 15 are between 2 and 5 -# -if(is.null(m))stop('First argument should be a matrix with two or more columns') -if(ncol(m)==1)stop('First argument should be a matrix with two or more columns') -if(INC){ -flag1=m[,col[1]]>=int1[1] -flag2=m[,col[1]]<=int1[2] -flag3=m[,col[2]]>=int2[1] -flag4=m[,col[2]]<=int2[2] -} -if(!INC){ -flag1=m[,col[1]]>int1[1] -flag2=m[,col[1]]int2[1] -flag4=m[,col[2]]0)points(pts[flag,1],pts[flag,2],pch='o') -} -output -} - - -wmw.ancbsep2.sub<-function(m,pts){ -v=wmw.ancp2(m[[1]],m[[2]],m[[3]],m[[4]],pts=pts) -v -} - -ancovap2.wmw=wmw.ancbsep2 - -ancovap2.wmw.plot<-function(x1,y1,x2,y2,pts=NULL,xlab='X1',ylab='X2',zlab='Effect Size',REV=FALSE, -xout=FALSE,outfun=outpro,SEED=TRUE, theta = 50, phi = 25){ -# -# -# Two covariates, plot the Wilcoxon--Mann--Whitney measure of effect size -# using a smoother -# -# -xy=elimna(cbind(x1,y1)) -if(ncol(xy)!=3)stop('Only two covariates can be used') -x1=xy[,1:2] -y1=xy[,3] -xy=elimna(cbind(x2,y2)) -x2=xy[,1:2] -y2=xy[,3] -if(xout){ -m<-cbind(x1,y1) -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:2] -y1<-m[,3] -m<-cbind(x2,y2) -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:2] -y2<-m[,3] -} -if(is.null(pts))pts=rbind(x1,x2) -N=nrow(pts) -e=wmw.ancp2(x1,y1,x2,y2,pts=pts) -if(N<25){ -library(scatterplot3d) -scatterplot3d(pts[,1],pts[,2],e,xlab=xlab,ylab=ylab,zlab=zlab) -} -if(N>=25){ -if(!REV)f=lplot(pts,e,xlab=ylab,ylab=xlab,zlab=zlab,ticktype='det',pr=FALSE,theta=theta,phi=phi) -else f=lplot(pts[,c(2,1)],e,xlab=xlab,ylab=ylab,zlab=zlab,ticktype='det',pr=FALSE,theta=theta,phi=phi) -} -list(Number_of_points_used_is=N) -} - - - - -wmw.ancp2<-function(x1,y1,x2,y2,pts=NULL,xout=FALSE,outfun=outpro){ -# -# For the regression lines corresponding to two independent groups -# estimate the conditional WMW effect size for each point in pts -# -# pts=NULL: three points used that are determined based on the data -# -x1=as.matrix(x1) -p=ncol(x1) -p1=p+1 -m1=elimna(cbind(x1,y1)) -x1=m1[,1:p] -y1=m1[,p1] -x2=as.matrix(x2) -p=ncol(x2) -p1=p+1 -m2=elimna(cbind(x2,y2)) -x2=m2[,1:p] -y2=m2[,p1] -if(xout){ -m<-cbind(x1,y1) -if(identical(outfun,reglev))flag=outfun(x1,y1,plotit=FALSE)$keep -else -flag<-outfun(x1,plotit=FALSE)$keep -m<-m[flag,] -x1<-m[,1:p] -y1<-m[,p1] -m<-cbind(x2,y2) -if(identical(outfun,reglev))flag=outfun(x2,y2,plotit=FALSE)$keep -else -flag<-outfun(x2,plotit=FALSE)$keep -m<-m[flag,] -x2<-m[,1:p] -y2<-m[,p1] -} -if(is.null(pts[1])){ -x1<-as.matrix(x1) -pts<-ancdes(x1) -pts=unique(pts) -} -pts<-as.matrix(pts) -if(is.null(pts)){ -x1<-as.matrix(x1) -pts<-ancdes(x1) -pts=unique(pts) -} -e=NA -PV=NA -n1=length(y1) -n2=length(y2) -for(i in 1:nrow(pts)){ -d1=reg.con.dist(x1,y1,pts=pts[i,]) -d2=reg.con.dist(x2,y2,pts=pts[i,]) -p=NA -for(j in 1:99)p[j]=mean(d1[j]<=d2) -e[i]=mean(p) -} -e -} - -hdmq<-function(x,q=.5,tr=FALSE){ -# -# -# Estimate one or more quantiles. -e=NA -nq=length(q) -if(!tr)for(i in 1:nq)e[i]=hd(x,q[i]) -if(tr)for(i in 1:nq)e[i]=thd(x,q[i]) -e -} - -decinter<-function(x,alpha=.05,q=c(1:9)/10,nboot=1000,SEED=TRUE,method='BH'){ -# -# By default, use all deciles when dealing with interactions in a 2-by-2 design. -# The quantiles used can be altered via the argument q -# -if(SEED)set.seed(2) -if(is.matrix(x))x=listm(x) -x=elimna(x) -bv1=matrix(NA,nrow=9,ncol=nboot) -bv2=matrix(NA,nrow=9,ncol=nboot) -bv3=matrix(NA,nrow=9,ncol=nboot) -bv4=matrix(NA,nrow=9,ncol=nboot) -data<-matrix(sample(x[[1]],size=length(x[[1]])*nboot,replace=TRUE),nrow=nboot) -bv1=apply(data,1,hdmq,q=q) -data<-matrix(sample(x[[2]],size=length(x[[2]])*nboot,replace=TRUE),nrow=nboot) -bv2=apply(data,1,hdmq,q=q) -data<-matrix(sample(x[[3]],size=length(x[[3]])*nboot,replace=TRUE),nrow=nboot) -bv3=apply(data,1,hdmq,q=q) -data<-matrix(sample(x[[4]],size=length(x[[4]])*nboot,replace=TRUE),nrow=nboot) -bv4=apply(data,1,hdmq,q=q) -be=bv1-bv2-bv3+bv4 -pv=NA -nq=length(q) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -vs=sort(be) -cilow=NA -ciup=NA -for(i in 1:nq){ -pv[i]=mean(be[i,]<0) -pv[i]=2*min(pv[i],1-pv[i]) -bes=sort(be[i,]) -cilow[i]=bes[ilow] -ciup[i]=bes[ihi] -} -output=matrix(NA,nrow=nq,ncol=8) -dimnames(output)=list(NULL,c('Quant','Est.Lev 1','Est.Lev 2','Dif','ci.low','ci.up','p-value','p.adj')) -output[,1]=q -e=lapply(x,hdmq,q=q) -est=e[[1]]-e[[2]]-e[[3]]+e[[4]] -output[,2]=e[[1]]-e[[2]] -output[,3]=e[[3]]-e[[4]] -output[,4]=est -output[,5]=cilow -output[,6]=ciup -output[,7]=pv -output[,8]=p.adjust(pv,method=method) -output -} - -ancsm.es<- -function(x1,y1,x2,y2,method='KMS',pts=NA,est=tmean, -fr1=1,fr2=1,nboot=NA,nmin=12,alpha=.05,xout=FALSE, -outfun=outpro,plotit=TRUE,LP=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+',...){ -# -# Compare two independent groups using -# a percentile bootstrap combined with a running interval -# smooth and some robust measure of effect size: -# -#Choices for method: -# 'EP','QS','QStr','AKP','WMW','KMS' -# -# Assume data are in x1 y1 x2 and y2 -# Comparisons are made at the design points contained in the vector -# pts -# -flag.est=FALSE -if(identical(est,onestep))flag.est=TRUE -if(flag.est)LP=FALSE # Get an error when using onestep in conjunction with LP=T -if(identical(est,mom))flag.est=TRUE -xy1=elimna(cbind(x1,y1)) -x1=xy1[,1] -y1=xy1[,2] -xy2=elimna(cbind(x2,y2)) -x2=xy2[,1] -y2=xy2[,2] -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -npt<-5 -gv1<-vector('list') -if(is.na(pts[1])){ -output=matrix(NA,5,5) -dimnames(output)=list(NULL,c('pts','Effect.Size','ci.low','ci.up','p.value')) -output[,1]=c(1:5) -isub<-c(1:5) # Initialize isub -test<-c(1:5) -xorder<-order(x1) -y1<-y1[xorder] -x1<-x1[xorder] -xorder<-order(x2) -y2<-y2[xorder] -x2<-x2[xorder] -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) -for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) -for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) -sub<-c(1:length(x1)) -isub[1]<-min(sub[vecn>=nmin]) -isub[5]<-max(sub[vecn>=nmin]) -isub[3]<-floor((isub[1]+isub[5])/2) -isub[2]<-floor((isub[1]+isub[3])/2) -isub[4]<-floor((isub[3]+isub[5])/2) -mat<-matrix(NA,5,3) -dimnames(mat)<-list(NULL,c('X','n1','n2')) -for (i in 1:5){ -j<-i+5 -temp1<-y1[near(x1,x1[isub[i]],fr1)] -temp2<-y2[near(x2,x1[isub[i]],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -mat[i,1]<-x1[isub[i]] -mat[i,2]<-length(temp1) -mat[i,3]<-length(temp2) -test=ESfun.CI(temp1,temp2,method=method) -if(method=='KMS')output[i,2]=test$effect.size -if(method=='QS')output[i,2]=test$effect.size -if(method=='QStr')output[i,2]=test$effect.size -if(method=='WMW'){ -output[i,2]=test$p.hat -test$ci[1]=test$p.ci[1] -test$ci[2]=test$p.ci[2] -} -if(method=='AKP')output[i,2]=test$akp.effect -if(method=='EP')output[i,2]=test$Effect.Size -output[i,3]=test$ci[1] -output[i,4]=test$ci[2] -if(method!='EP')output[i,5]=test$p.value -}} -# -if(!is.na(pts[1])){ -npt<-length(pts) -output=matrix(NA,npt,5) -output[,1]=c(1:npt) -dimnames(output)=list(NULL,c('pts','Effect.Size','ci.low','ci.up','p.value')) -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),3) -dimnames(mat)<-list(NULL,c('X','n1','n2')) -gv<-vector('list',2*length(pts)) -for (i in 1:length(pts)){ -j<-i+npt -temp1<-y1[near(x1,pts[i],fr1)] -temp2<-y2[near(x2,pts[i],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -test=ESfun.CI(temp1,temp2,method=method) -output[i,2]=test$effect.size -output[i,3]=test$ci[1] -output[i,4]=test$ci[2] -mat[i,1]<-pts[i] -if(length(temp1)<=5)paste('Warning, there are',length(temp1),' points corresponding to the design point X=',pts[i]) -if(length(temp2)<=5)paste('Warning, there are',length(temp2),' points corresponding to the design point X=',pts[i]) -mat[i,2]<-length(temp1) -mat[i,3]<-length(temp2) -#gv1[[i]]<-temp1 -#gv1[[j]]<-temp2 -test=ESfun.CI(temp1,temp2,method=method) -output[i,2]=test$effect.size -output[i,3]=test$ci[1] -output[i,4]=test$ci[2] -output[i,5]=test$p.value -} -} -if(plotit){ -runmean2g(x1,y1,x2,y2,fr=fr1,est=est,LP=LP,xlab=xlab,ylab=ylab,pch1=pch1,pch2=pch2,...) -} -list(mat=mat,output=output) -} - -ancsm.es<- -function(x1,y1,x2,y2,ES='KMS',npt=8,est=tmean,method='BH', -fr1=1,fr2=1,nboot=NA,nmin=12,alpha=.05,xout=FALSE,SEED=TRUE, -outfun=outpro,plotit=TRUE,LP=FALSE,xlab='X',ylab='Effect Size',pch1='*',pch2='+',...){ -# -# Compare two independent groups using -# a percentile bootstrap combined with a running interval -# smooth and some robust measure of effect size: -# -# This is done for npt covariate values, default is npt=8 -# -#. FWE is controlled based on the argument method, default is FDR -#. (Bejamini - Hochberg method). -# plotit=TRUE, plot estimates plus confidence intervals not adjusted to -# get simultaneous probability coverage. -# -# Choices for ES, the measure of effect size: -# 'KMS', 'EP','QS','QStr','AKP','WMW' -# -# Assume data are in x1 y1 x2 and y2 -# -flag.est=FALSE -if(identical(est,onestep))flag.est=TRUE -if(flag.est)LP=FALSE # Get an error when using onestep in conjunction with LP=T -if(identical(est,mom))flag.est=TRUE -xy1=elimna(cbind(x1,y1)) -x1=xy1[,1] -y1=xy1[,2] -xy2=elimna(cbind(x2,y2)) -x2=xy2[,1] -y2=xy2[,2] -if(xout){ -flag<-outfun(x1,...)$keep -x1<-x1[flag] -y1<-y1[flag] -flag<-outfun(x2,...)$keep -x2<-x2[flag] -y2<-y2[flag] -} -# -# -res1=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE,SEED=FALSE)$output -pts=seq(res1[1,1],res1[5,1],length.out=npt) -pts=unique(pts) -npt=length(pts) -output=matrix(NA,npt,6) -output[,1]=c(1:npt) -dimnames(output)=list(NULL,c('pts','Effect.Size','ci.low','ci.up','p.value','p.adj')) -n1<-1 -n2<-1 -vecn<-1 -for(i in 1:length(pts)){ -n1[i]<-length(y1[near(x1,pts[i],fr1)]) -n2[i]<-length(y2[near(x2,pts[i],fr2)]) -} -mat<-matrix(NA,length(pts),3) -dimnames(mat)<-list(NULL,c('X','n1','n2')) -gv<-vector('list',2*length(pts)) -for (i in 1:length(pts)){ -temp1<-y1[near(x1,pts[i],fr1)] -temp2<-y2[near(x2,pts[i],fr2)] -temp1<-temp1[!is.na(temp1)] -temp2<-temp2[!is.na(temp2)] -test=ESfun.CI(temp1,temp2,method=ES,SEED=SEED,alpha=alpha) -output[i,2]=test$effect.size -output[i,3]=test$ci[1] -output[i,4]=test$ci[2] -mat[i,1]<-pts[i] -if(length(temp1)<=5)paste('Warning, there are',length(temp1),' points corresponding to the design point X=',pts[i]) -if(length(temp2)<=5)paste('Warning, there are',length(temp2),' points corresponding to the design point X=',pts[i]) -mat[i,2]<-length(temp1) -mat[i,3]<-length(temp2) -test=ESfun.CI(temp1,temp2,method=ES,alpha=alpha,SEED=SEED) -output[i,2]=test$effect.size -output[i,3]=test$ci[1] -output[i,4]=test$ci[2] -output[i,5]=test$p.value -} -output[,6]=p.adjust(output[,5],method=method) -if(plotit){ -#runmean2g(x1,y1,x2,y2,fr=fr1,est=est,LP=LP,xlab=xlab,ylab=ylab,pch1=pch1,pch2=pch2,...) -plot(c(pts,pts,pts),c(output[,2],output[,3],output[,4]),xlab=xlab,ylab=ylab,type='n') -points(pts,output[,3],pch='+') -points(pts,output[,4],pch='+') -points(pts,output[,2],pch='*') -} -list(mat=mat,output=output) -} - - -class.error.CP<-function(x1=NULL,x2=NULL,train=NULL,g=NULL,method='KNN',nboot=100,EN=TRUE,FAST=TRUE, -AUC=FALSE,SEED=TRUE,...){ -# -#. Requires ROCR pacakage -# -# For a classification methods indicated by the argument -# method -# use cross validation leaving one out. -# -#. Return a confusion matrix -# -# -# The data for the two groups can be entered via the arguments -# x1 and x2 -# or -# store all of the data in the argument train in which case g specifies the group -# -# AUC=TRUE, returns auc. Default is FALSE because conditions can be created where -# Error: $ operator is invalid for atomic vectors -# -# Current choices available: -# KNN: Nearest neighbor using robust depths -# DIS: Points classified based on their depths -# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS -# SVM: support vector machine -# RF: Random forest -# NN: neural network -# ADA: ada boost -# PRO: project the points onto a line connecting the centers of the data clouds. -# Then use estimate of the pdf for each group to make a decision about future points. -# LSM: smooth version of logistic regression when sm=TRUE; otherwise use logistic regression. -# -# Returns confusion matrix -# -# -# method='KNN' is default -# -# nboot=number of samples -# -if(length(method)!=1)stop('Only one method at a time is allowed') -if(SEED)set.seed(2) -if(!is.null(train)){ -if(is.null(g))stop('Argument g, group ids, must be specified') -if(is.matrix(g))if(dim(g)>1)stop('Argument g should be a vector') -flag=g==min(g) -x1=train[flag,] -x2=train[!flag,] -} -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -if(is.null(x1))stop('Something is wrong, no data in x1') -if(is.null(x2))stop('Something is wrong, no data in x2') -if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') -x1=elimna(x1) -x2=elimna(x2) -x1=as.matrix(x1) -x2=as.matrix(x2) -dimnames(x1)=list(NULL,NULL) # can be necessary to eliminate labels to avoid an error in randomForest. -dimnames(x2)=list(NULL,NULL) -n1=nrow(x1) -n2=nrow(x2) -ns1=min(n1,nboot) -ns2=min(n2,nboot) -mn=min(ns1,ns2) -CM=matrix(0,2,2) -isub1=sample(c(1:ns1)) -isub2=sample(c(1:ns2)) -A1=NULL -A2=NULL -ic1=0 -ic2=0 -for(k in 1:mn){ -N1=isub1[k] -N2=isub2[k] -train1=x1[-N1,] -train2=x2[-N2,] -test=rbind(x1[N1,],x2[N2,]) -a=CLASS.fun(x1=train1,x2=train2,test=test,method=method,...) -a1=a[1] -a2=a[2] -A1[k]=a1 -A2[k]=a2 -if(a1==1)CM[1,1]=CM[1,1]+1 #true = 1 pred 1 -else -CM[1,2]=CM[1,2]+1 -if(a2==2)CM[2,2]=CM[2,2]+1 #true =2 and pred 2 -else -CM[2,1]=CM[2,1]+1 -} -FREQ=CM -CM=CM/(2*nboot) -F=matrix(NA,2,3) -dimnames(F)=list(c('True 1','True 2'),c('Pred 1','Pred 2','Sum')) -F[1,1]=FREQ[1,1] -F[1,2]=FREQ[1,2] -F[2,1]=FREQ[2,1] -F[2,2]=FREQ[2,2] -F[1,3]=F[1,1]+F[1,2] -F[2,3]=F[2,1]+F[2,2] -RES=F -RES[1,1]=F[1,1]/(F[1,1]+F[1,2]) -RES[1,2]=F[1,2]/(F[1,1]+F[1,2]) -RES[2,1]=F[2,1]/(F[2,1]+F[2,2]) -RES[2,2]=F[2,2]/(F[2,1]+F[2,2]) -RES[,3]=1 -auroc=NULL -if(AUC){ -library(ROCR) -PRED=c(A1,A2) -LABS=c(rep(1,length(A1)),rep(2,length(A2))) -pred=prediction(PRED,LABS) -perf=performance(pred, "auc") - auroc<- perf@y.values[[1]] - } -list(C.MAT=RES,COUNTS=F,AUC=auroc[[1]]) -} - - -class.uni.error<-function(x,y,xy=NULL,g=NULL){ -# -# For univariate data, estimate prediction error using -# a kernel density estimator. -# Returns conditional estimate of the error rates -# Example: Given that a values is from group 1 erroneously classify it as coming from -# group 2 -# -# Also returns unconditional probabilities -# Example, the probability that a randomly sample subject is in group 1 and -# is classified as being in group 1 -# -# -if(!is.null(xy)){ -xy=fac2list(xy,g) -x=xy[[1]] -y=xy[[2]] -} -x=elimna(x) -y=elimna(y) -n1=length(x) -n2=length(y) -xsort=sort(x) -ysort=sort(y) -n1p=n1+1 -N=n1+n2 -if(is.null(x1))stop('Something is wrong, no data in x1') -if(is.null(x2))stop('Something is wrong, no data in x2') -UC=matrix(NA,2,2) -CP=matrix(NA,2,2) -d1<-akerd(x,pts=xsort,pyhat=TRUE,plotit=FALSE) -d2<-akerd(y,pts=xsort,pyhat=TRUE,plotit=FALSE) -e1=d1>d2 #means predict group 1 for data in group 1 -D1<-akerd(x,pts=ysort,pyhat=TRUE,plotit=FALSE) -D2<-akerd(y,pts=ysort,pyhat=TRUE,plotit=FALSE) -e2=D2>D1 # means predict group 2 for data in group 2 -CP[1,1]=mean(e1==1) -CP[1,2]=1-CP[1,1] -CP[2,2]=mean(e2==0) -CP[2,1]=1-CP[2,2] -# -e3=c(e1,e2) -UC[1,1]=sum(e3[1:n1]==1) -UC[1,2]=sum(e3[1:n1]==0) -UC[2,1]=sum(e3[n1p:N]==0) -UC[2,2]=sum(e3[n1p:N]==1) -UC=UC/sum(UC) -dimnames(CP)=list(c('True 1','True 2'),c('Pred 1','Pred 2')) -dimnames(UC)=list(c('True 1','True 2'),c('Pred 1','Pred 2')) -list(Conditional.prob=CP, Unconditiion.prob=UC,prob.correct.decision=UC[1,1]+UC[2,2]) -} - -hdno<-function(x,q=.5){ -# -# Use hd when .1<1<.9, -# otherwise use no estimator -# -x=elimna(x) -if(q<=.1 || q>=.9)e=qno.est(x,q) -else -e=hd(x,q) -e -} - -IQR2g.W<-function(x,y,nboot=100,alpha=.05,SEED=TRUE){ -# -# Wald-type test for comparing interquartile range of two independent groups. -# -sd1=bootse(x,est=IQRhd,nboot=nboot,SEED=SEED) -sd2=bootse(x,est=IQRhd,nboot=nboot,SEED=SEED) -e1=IQRhd(x) -e2=IQRhd(y) -se=sqrt(sd1^2+sd2^2) -W=(e1-e2)/se -crit=qnorm(1-alpha/2) -ci=(e1-e2)-crit*se -ci[2]=(e1-e2)+crit*se -pv=2*(1-pnorm(abs(W))) -list(Est.1=e1,Est.2=e2,Test.Stat=W,ci=ci,p.value=pv) -} - -IQRhd<-function(x){ -e=hd(x,.75)-hd(x,.25) -e -} - -decJKinter<-function(J,K,x,alpha = 0.05, q = c(1:9)/10, nboot = 1000, SEED = TRUE, - method = "BH"){ -# -# For every relevant interaction, compare multiple quantiles -# -if(is.matrix(x))x=listm(x) -INT=list() -JK=J*K -CO=con2way(J,K)$conAB -for( j in 1:ncol(CO)){ -id=which(CO[,j]!=0) -X=x[id] -INT[[j]]=decinter(X,alpha=alpha,q=q,nboot=nboot,SEED=SEED,method=method) -} -list(interactions=INT,con=CO) -} - -decJKinter<-function(J,K,x,alpha = 0.05, q = c(1:9)/10, nboot = 1000, SEED = TRUE, - method = "BH"){ -# -# For every relevant interaction, compare multiple quantiles -# -if(is.matrix(x))x=listm(x) -INT=list() -JK=J*K -CO=con2way(J,K)$conAB -for( j in 1:ncol(CO)){ -id=which(CO[,j]!=0) -X=x[id] -INT[[j]]=decinter(X,alpha=alpha,q=q,nboot=nboot,SEED=SEED,method=method) -} -list(interactions=INT,con=CO) -} - -con2by2A<-function(J,K){ -# -# For J by K design, for every two rows and two columns, -# create contrast coefficients for main effect for Factor A. -# -JK=J*K -Ja<-(J^2-J)/2 -Ka<-(K^2-K)/2 -conAB<-matrix(0,nrow=JK,ncol=Ka*Ja) -ic<-0 -for(j in 1:J){ -for(jj in 1:J){ -if(j < jj){ -for(k in 1:K){ -for(kk in 1:K){ -if(k4)nboot<-5000 -} -n<-nrow(mat) -connum<-ncol(con) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -xbars<-apply(mat,2,est,na.rm=NA.RM,...) -psidat<-NA -for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) -psihat<-matrix(0,connum,nboot) -psihatcen<-matrix(0,connum,nboot) -bvec<-matrix(NA,ncol=J,nrow=nboot) -data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) -for(ib in 1:nboot){ -bvec[ib,]<-apply(x[data[ib,],],2,est,na.rm=NA.RM,...) -} -# -# Now have an nboot by J matrix of bootstrap values. -# -test<-1 -for (ic in 1:connum){ -psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) -ptemp<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot -test[ic]<-ptemp -test[ic]<-2*min(test[ic],1-test[ic]) -} -ncon<-ncol(con) -if(plotit && ncol(bvec)==2){ -z<-c(0,0) -one<-c(1,1) -plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") -points(bvec) -totv<-apply(x,2,est,...) -cmat<-var(bvec) -dis<-mahalanobis(bvec,totv,cmat) -temp.dis<-order(dis) -ic<-round((1-alpha)*nboot) -xx<-bvec[temp.dis[1:ic],] -xord<-order(xx[,1]) -xx<-xx[xord,] -temp<-chull(xx) -lines(xx[temp,]) -lines(xx[c(temp[1],temp[length(temp)]),]) -abline(0,1) -} -ncon<-ncol(con) -output<-matrix(0,connum,6) -dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.adj","ci.lower","ci.upper")) -tmeans<-apply(mat,2,est,na.rm=NA.RM,...) -psi<-1 -for (ic in 1:ncol(con)){ -output[ic,2]<-sum(con[,ic]*tmeans) -output[ic,1]<-ic -output[ic,3]<-test[ic] -temp<-sort(psihat[ic,]) -icl<-round(alpha*nboot/2) -icu<-nboot-icl -icl=icl+1 -output[ic,5]<-temp[icl] -output[ic,6]<-temp[icu] -} -ids=NA -output[,4]=p.adjust(output[,3],method=method) -num.sig=sum(output[,4]<=alpha) -if(is.na(output[1,3])){ -if(pr)print('Evidently, one or more groups have too many missing values') -} -list(output=output,con=con,num.sig=num.sig) -} - -rmmcpv2<-function(x, y=NULL,con = 0, tr = 0.2, alpha = 0.05,dif=TRUE, -hoch=TRUE,na.rm=TRUE,nmin=5){ -# -# MCP on trimmed means with FWE controlled with Hochberg's method -# hoch=FALSE, will use Rom's method if alpha=.05 or .01 and number of tests is <=10 -# -# Note: confidence intervals are adjusted based on the corresponding critical p-value. -# -if(!is.null(y))x=cbind(x,y) -flagcon=FALSE -if(!is.matrix(x))x<-matl(x) -if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") -con<-as.matrix(con) -J<-ncol(x) -nval<-nrow(x) -if(sum(con^2!=0))CC<-ncol(con) -if(sum(con^2)==0)CC<-(J^2-J)/2 -ncon<-CC -#if(alpha==.05){ -#dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) -#if(ncon > 10){ -#avec<-.05/c(11:ncon) -#dvec<-c(dvec,avec) -#}} -if(alpha==.01){ -dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) -if(ncon > 10){ -avec<-.01/c(11:ncon) -dvec<-c(dvec,avec) -}} -if(hoch)dvec<-alpha/c(1:ncon) -if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) -if(sum(con^2)==0){ -flagcon<-TRUE -psihat<-matrix(0,CC,5) -dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) -test<-matrix(NA,CC,6) -dimnames(test)<-list(NULL,c("Group","Group","test","p.value","p.adj","se")) -temp1<-0 -jcom<-0 -for (j in 1:J){ -for (k in 1:J){ -if (j < k){ -jcom<-jcom+1 -y=elimna(x[,c(j,k)]) -if(is.null(dim(y)))y=matrix(c(1,1),nrow=1) -if(nrow(y)<=nmin)print(paste('Skipping group', j, ' and group', k, 'due to small sample size')) -if(nrow(y)>nmin){ -h1<-nrow(y)-2*floor(tr*nrow(y)) -df<-h1-1 -xbar=mean(y[,1],tr=tr) -xbar[2]=mean(y[,2],tr=tr) -q1<-(nrow(y)-1)*winvar(y[,1],tr) -q2<-(nrow(y)-1)*winvar(y[,2],tr) -q3<-(nrow(y)-1)*wincor(y[,1],y[,2],tr)$cov -sejk<-sqrt((q1+q2-2*q3)/(h1*(h1-1))) -if(!dif){ -test[jcom,6]<-sejk -test[jcom,3]<-(xbar[1]-xbar[2])/sejk -temp1[jcom]<-2 * (1 - pt(abs(test[jcom,3]), df)) -test[jcom,4]<-temp1[jcom] -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-(xbar[1]-xbar[2]) -} -if(dif){ -dv<-y[,1]-y[,2] -test[jcom,6]<-trimse(dv,tr) -temp<-trimci(dv,alpha=alpha/CC,pr=FALSE,tr=tr) -test[jcom,3]<-temp$test.stat -temp1[jcom]<-temp$p.value -test[jcom,4]<-temp1[jcom] -psihat[jcom,1]<-j -psihat[jcom,2]<-k -test[jcom,1]<-j -test[jcom,2]<-k -psihat[jcom,3]<-mean(dv,tr=tr) -psihat[jcom,4]<-temp$ci[1] -psihat[jcom,5]<-temp$ci[2] -} -}}}} -if(hoch)dvec<-alpha/c(1:ncon) -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2,4]>=zvec) -sigvec=elimna(sigvec) -if(sum(sigvec)0){ -xbar=apply(x,2,mean,tr=tr) -if(nrow(con)!=ncol(x))warning("The number of groups does not match the number - of contrast coefficients.") -ncon<-ncol(con) -psihat<-matrix(0,ncol(con),4) -dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) -test<-matrix(0,ncol(con),5) -dimnames(test)<-list(NULL,c("con.num","test","p.value","p.adj","se")) -temp1<-NA -for (d in 1:ncol(con)){ -psihat[d,1]<-d -if(!dif){ -psihat[d,2]<-sum(con[,d]*xbar) -sejk<-0 -for(j in 1:J){ -for(k in 1:J){ -djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1)) -sejk<-sejk+con[j,d]*con[k,d]*djk -}} -sejk<-sqrt(sejk) -test[d,1]<-d -test[d,2]<-sum(con[,d]*xbar)/sejk -test[d,5]<-sejk -temp1[d]<-2 * (1 - pt(abs(test[d,2]), df)) -} -if(dif){ -for(j in 1:J){ -if(j==1)dval<-con[j,d]*x[,j] -if(j>1)dval<-dval+con[j,d]*x[,j] -} -temp1[d]<-trimci(dval,tr=tr,pr=FALSE)$p.value -test[d,1]<-d -test[d,2]<-trimci(dval,tr=tr,pr=FALSE)$test.stat -test[d,5]<-trimse(dval,tr=tr) -psihat[d,2]<-mean(dval,tr=tr) -}} -test[,3]<-temp1 -temp2<-order(0-temp1) -zvec<-dvec[1:ncon] -sigvec<-(test[temp2,3]>=zvec) -if(sum(sigvec)top)flag[i]=TRUE -} -outid <- NULL -if(sum(flag) > 0)outid <- vec[flag] #regression outlier -both=c(iout,outid) -blp=duplicated(both) -if(sum(!blp)>0) -blp=unique(both[blp]) -else - blp=NULL -glp=iout -if(length(blp)>0){ -flag=NULL -for(k in 1:length(blp)){ -flag=c(flag,which(iout==blp[k])) -} -glp=iout[-flag] -keep=vec[-blp] -} -if(plotit){ -plot(x,y,type='n',xlab=xlab,ylab=ylab) -points(x[keep],y[keep],pch='*') -points(x[blp],y[blp],pch='o') -} -list(n=n,n.out=length(iout),res.out.id=outid,keep=keep,good.lev=glp,bad.lev=blp) -} - -corblp.bca.C<-function(x,y,regfun=tsreg,varfun=pbvar,nboot=1000,alpha=.05,outfun=outpro.depth,SEED=TRUE, -plotit=FALSE,...){ -# -# Correlation based on a robust regression estimator with bad -# leverage points removes - -library(bcaboot) -if(SEED)set.seed(2) -xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -if(p!=1)stop('Only a single independent variable is allowed') -x=xy[,1] -y=xy[,2] -n=length(y) -est=corblp(x,y,regfun=regfun,varfun=varfun)$cor -a=bcajack2(xy,1000,corblp.sub,alpha=alpha/2,regfun=regfun,varfun=varfun) -ci=c(a$lims[1,1],a$lims[3,1]) -list(cor=est,ci=ci) -} - -corblp.sub<-function(xy,regfun=tsreg,varfun=pbvar){ -X=xy[,1] -Y=xy[,2] -rest=corblp(X,Y,regfun=regfun,varfun=varfun)$cor -rest -} - - -corblppb<-function(x,y,regfun=tsreg,varfun=pbvar,nboot=1000,alpha=.05,outfun=outpro.depth,SEED=TRUE, -plotit=FALSE,...){ -# -# Correlation based on a robust regression estimator with bad -# leverage points removes -# -if(SEED)set.seed(2) -xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -if(p!=1)stop('Only a single independent variable is allowed') -x=xy[,1] -y=xy[,2] -v=NA -n=length(y) -for(i in 1:nboot){ -id=sample(n,replace=TRUE) -v[i]=corblp(x[id],y[id],regfun=regfun,varfun=varfun)$cor -} -P=mean(v<0) -pv=2*min(P,1-P) -sv=sort(v) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci<-sv[ilow] -ci[2]<-sv[ihi] -est=corblp(x,y,regfun=regfun,varfun=varfun) -list(cor.est=est$cor,p.value=pv,ci=ci) -} - - - reg.slope.only<-function(m,regfun=tsreg,...){ - # - # Assume data are in a matrix with 2 columns, used by BCA method -a=regfun(m[,1],m[,2],...)$coef[2] -a -} - - reg.bca<- -function(x,y,alpha=.05,nboot=1000,regfun=tsreg,SEED=TRUE,...){ -# -# BCA confidence interval for slope of a linear regression model -# -# Method: Bias corrected accelerated bootstrap -# -library(bcaboot) -if(SEED)set.seed(2) -m=elimna(cbind(x,y)) -n=nrow(m) -if(ncol(m)!=2)stop('Only one independent variable is allowed') -e=regfun(m[,1],m[,2])$coef[2] -a=bcajack(m,nboot,reg.slope.only,alpha=alpha/2,verbose=FALSE,regfun=regfun,...) -list(n=n,slope=e,ci.low=a$lims[1,1],ci.upper=a$lims[3,1]) -} - -reghet.blp<-function(x,y,regfun=tsreg,HH=TRUE,...){ - -# Eliminate bad leverage points using a heteroscedastic method -# Then estimate the parameters. -# -xx<-cbind(x,y) -xx<-elimna(xx) -if(ncol(xx)!=2)stop('Current version limited to a single independent variable') -x<-xx[,1] -y=xx[,2] -if(HH)id= outblp.HH(x,y,regfun=regfun,plotit=FALSE)$keep -else id=regcon.out(x,y,plotit=FALSE)$keep -e=regfun(x[id],y[id],...) -e -} - - -reghet.blp.ci<-function(x,y,regfun=tsreg,nboot=999,HH=TRUE, -SEED=TRUE,BCA=FALSE,pr=TRUE,...){ - -# Eliminate bad leverage points using a heteroscedastic method -# Then compute a confidence interval for the slope -# -#Use bias corrected accelerated bootstrap when BCA=TRUE, -# otherwise use a percentile bootstrap -# -xx<-cbind(x,y) -if(ncol(xx)!=2)stop('Current version limited to a single independent variable') -xx<-elimna(xx) -n=nrow(xx) -if(!BCA & n<50) #print('Might be safer to use BCA=TRUE') -if(BCA & pr)print('Note: when BCA=TRUE, only returns a confidence interval for the slope') -x<-xx[,1] -y=xx[,2] -if(HH)id= outblp.HH(x,y,regfun=regfun,plotit=FALSE)$keep -else id=regcon.out(x,y,plotit=FALSE)$keep -if(BCA)e=reg.bca(x[id],y[id],SEED=SEED,regfun=regfun) -else e=regci(x[id],y[id],SEED=SEED,regfun=regfun,nboot=nboot,...) -e -} - - -outblp.HH<-function(x,y,regfun=tsreg,omit.col=NULL,plotit=TRUE,xlab='X',ylab='Y'){ -# -# indicates which points, if any, are bad leverage points -# using a blend of a homoscedastic and heteroscedastic methods. -# -# This approach helps to avoid issues with Type I errors when testing hpotheses -# -# If for example -# omit.col=c(1,3) -# columns 1 and 3 of x are ignored when checking for bad leverage points. -# These columns might be, for example, dummy variables. -# -library(MASS) -xy=elimna(cbind(x,y)) -n=nrow(xy) -x=as.matrix(x) -p=ncol(x) -p1=p+1 -x=xy[,1:p] -y=xy[,p1] -if(p>1){ -if(!is.null(omit.col)) -x=x[,-omit.col] -} -x<-as.matrix(x) -out.id=NULL -temp=reglev.gen(x,y,regfun=regfun,plotit=FALSE) -out.id=temp$bad.lev -temp2=regcon.out(x,y,plotit=FALSE) -vec=keep=c(1:n) -out.id=unique(c(out.id,temp2$bad.lev)) -if(length(out.id)>0)keep=vec[-out.id] -n.out=length(out.id) -if(plotit){ -plot(x,y,type='n',xlab=xlab,ylab=ylab) -points(x[keep],y[keep],pch='*') -points(x[out.id],y[out.id],pch='o') -} -list(n=n,n.out=n.out,bad.lev=out.id,keep=keep) -} - - - -regHH<-function(x,y,regfun=tsreg,SO=FALSE,...){ -# -# -# SO=TRUE, estimate slope only, convenient for some bootstrap methods -# -xy=elimna(cbind(x,y)) -if(ncol(xy)!=2)stop('Current version limited to a single independent variable') -id= outblp.HH(xy[,1],xy[,2])$keep -if(!SO)e=regfun(xy[id,1],xy[id,2],...) -else e=regfun(xy[id,1],xy[id,2])$coef -list(coef=e) -} - - -regciHH<-function(x,y,regfun=tsreg,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE,null.val=NULL, -method='hoch',plotit=FALSE,xlab='Predictor 1',ylab='Predictor 2',WARNS=FALSE,LABELS=FALSE,...){ -# -# Compute a .95 confidence interval for each of the parameters of -# a linear regression equation. The default regression method is -# the Theil-Sen estimator. -# -# Use method HH to eliminate bad leverage points -# -# When using the least squares estimator, and when n<250, use -# lsfitci instead. -# -# The predictor values are assumed to be in the n by p matrix x. -# The default number of bootstrap samples is nboot=599 -# -# regfun can be any R function that returns the coefficients in -# the vector regfun$coef, the first element of which contains the -# estimated intercept, the second element contains the estimated of -# the first predictor, etc. -# -# plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based -# on the bootstrap samples. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -nrem=length(y) - -flag<-outblp.HH(x,y,plotit=FALSE,...)$keep -xy<-xy[flag,] -x<-xy[,1:p] -y<-xy[,p1] - -estit=regfun(x,y,...)$coef -if(is.null(null.val))null.val=rep(0,p1) -flagF=FALSE -flagF=identical(regfun,tsreg) -if(flagF){if(pr){ -if(sum(duplicated(y)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') -}} -nv=length(y) -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print('Taking bootstrap samples. Please wait.') -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -if(!WARNS)options(warn=-1) -bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) -options(warn=0) -#Leverage points already removed. -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -regci<-matrix(0,p1,6) -vlabs='Intercept' -for(j in 2:p1)vlabs[j]=paste('Slope',j-1) -if(LABELS)vlabs[2:p1]=labels(x)[[2]] -dimnames(regci)<-list(vlabs,c('ci.low','ci.up','Estimate','S.E.','p-value','p.adj')) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -se<-NA -pvec<-NA -for(i in 1:p1){ -bsort<-sort(bvec[i,]) -#pvec[i]<-(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot -pvec[i]<-(sum(bvec[i,].5)pvec[i]<-1-pvec[i] -regci[i,1]<-bsort[ilow] -regci[i,2]<-bsort[ihi] -se[i]<-sqrt(var(bvec[i,])) -} -if(p1==3){ -if(plotit){ -plot(bvec[2,],bvec[3,],xlab=xlab,ylab=ylab) -}} -regci[,3]=estit -pvec<-2*pvec -regci[,4]=se -regci[,5]=regci[,6]=pvec -regci[2:p1,6]=p.adjust(pvec[2:p1],method=method) -list(regci=regci,n=nrem,n.keep=nv) -} - -regci.het.blp<-function(x,y,regfun=tsreg,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE,null.val=NULL, -method='hoch',plotit=FALSE,xlab='Predictor 1',ylab='Predictor 2',WARNS=FALSE,LABELS=FALSE,...){ -# -# Compute a .95 confidence interval for each of the parameters of -# a linear regression equation. The default regression method is -# the Theil-Sen estimator. -# -# Use method HH to eliminate bad leverage points -# -# When using the least squares estimator, and when n<250, use -# lsfitci instead. -# -# The predictor values are assumed to be in the n by p matrix x. -# The default number of bootstrap samples is nboot=599 -# -# regfun can be any R function that returns the coefficients in -# the vector regfun$coef, the first element of which contains the -# estimated intercept, the second element contains the estimated of -# the first predictor, etc. -# -# plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based -# on the bootstrap samples. -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -nrem=length(y) - -flag<-regcon.out(x,y,plotit=FALSE,...)$keep -xy<-xy[flag,] -x<-xy[,1:p] -y<-xy[,p1] - -estit=regfun(x,y,...)$coef -if(is.null(null.val))null.val=rep(0,p1) -flagF=FALSE -flagF=identical(regfun,tsreg) -if(flagF){if(pr){ -if(sum(duplicated(y)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') -}} -nv=length(y) -x<-as.matrix(x) -if(SEED)set.seed(2) # set seed of random number generator so that -# results can be duplicated. -if(pr)print('Taking bootstrap samples. Please wait.') -data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) -if(!WARNS)options(warn=-1) -bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) -options(warn=0) -#Leverage points already removed. -# bvec is a p+1 by nboot matrix. The first row -# contains the bootstrap intercepts, the second row -# contains the bootstrap values for first predictor, etc. -regci<-matrix(0,p1,6) -vlabs='Intercept' -for(j in 2:p1)vlabs[j]=paste('Slope',j-1) -if(LABELS)vlabs[2:p1]=labels(x)[[2]] -dimnames(regci)<-list(vlabs,c('ci.low','ci.up','Estimate','S.E.','p-value','p.adj')) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -se<-NA -pvec<-NA -for(i in 1:p1){ -bsort<-sort(bvec[i,]) -#pvec[i]<-(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot -pvec[i]<-(sum(bvec[i,].5)pvec[i]<-1-pvec[i] -regci[i,1]<-bsort[ilow] -regci[i,2]<-bsort[ihi] -se[i]<-sqrt(var(bvec[i,])) -} -if(p1==3){ -if(plotit){ -plot(bvec[2,],bvec[3,],xlab=xlab,ylab=ylab) -}} -regci[,3]=estit -pvec<-2*pvec -regci[,4]=se -regci[,5]=regci[,6]=pvec -regci[2:p1,6]=p.adjust(pvec[2:p1],method=method) -list(regci=regci,n=nrem,n.keep=nv) -} - -reglev.est<-function(x,y,regfun=tsreg,...){ -# -# Use homoscedastic method to remove bad leverage points and estimate parameters using remaining data -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -y<-xy[,p1] -x=as.matrix(x) -nrem=length(y) -id=reglev.gen(x,y,plotit=FALSE,...)$keep -e=regfun(x[id,],y[id],...)$coef -e -} - -wmw.det<-function(x,y,refp=NULL,plotit=FALSE,xlab='Difference',ylab='Density', -plotfun=kerSORT){ -# -# -# If execution time is an issue when plotting, try plotfun=skerd -# -# -# Compare the tails distribution of x-y based on a specified value -# indicated by the argument -# refp -# That is compare P(x-y< -refp) vs P(x-y > refp) -# -# -if(is.null(refp))stop('No reference point was provided') -x<-x[!is.na(x)] -y<-y[!is.na(y)] -refp=abs(refp) -m<-outer(x,y,FUN='-') -if(refp!=0){ -L=mean(m<=0-refp) -U=mean(m>=refp) -} -else{ -L=mean(m<0) -U=mean(m>0) -} -if(plotit)plotfun(as.vector(m),xlab=xlab,ylab=ylab) -list(L=L,U=U,dif=U-L) -} - -wmw.ref.dif<-function(x,y,q=.25,pts=NULL,nboot=1000,alpha=.05,SEED=TRUE, -plotit=FALSE,xlab='Difference',ylab='Density',estfun=hdmq, -plotfun=kerSORT){ -# -# If pts is specified, the goal is to make inferences about -# P(x-y< -pts)-P(x-y > pts) -# using a percentile bootstrap method -# -# If pts is not specified, and make inferences -# about the 1-q and q quantiles. If X and Y -# have identical distributions, D=X-Y is symmetric about zero and the sum of the -# 1-q and qth quantiles is zero. Should not be used when there are tied values -# -# If QC=FALSE and pts=NULL, -# take pts to be estimate of the q quantile of D. -# -# if pts is not NULL, QC=FALSE is used -# -# Output: -# L=P(x-y< -pts) -# U = P(x-y > pts) -# Est.dif=U-L -# -QC=TRUE -if(!is.null(pts))QC=FALSE -if(is.null(pts)){ -if(sum(q<.5)!=length(q))stop('All q values should be <=.5') -} -if(SEED)set.seed(2) -d=NA -x<-x[!is.na(x)] -y<-y[!is.na(y)] -n1=length(x) -n2=length(y) -if(!QC){ -if(!is.null(pts)){ -e=wmw.det(x,y,refp=pts,plotit=plotit,xlab=xlab,ylab=ylab,plotfun=plotfun) -est=e$dif -L=e$L -U=e$U -} -else{ -pts=qest(outer(x,y,FUN='-'),q=q) -e=wmw.det(x,y,refp=pts,plotit=plotit,xlab=xlab,ylab=ylab,plotfun=plotfun) -est=e$dif -L=e$L -U=e$U -}} - -if(QC){ -d=outer(x,y,FUN='-') -d=as.vector(d) -qv=estfun(d,q=c(q,1-q)) -if(plotit)plotfun(d,xlab=xlab,ylab=ylab) -est=qv[1]+qv[2] -L=qv[1] -U=qv[2] -} -for(i in 1:nboot){ -id1=sample(n1,replace=TRUE) -id2=sample(n2,replace=TRUE) -if(!QC)d[i]=wmw.det(x[id1],y[id2],refp=pts,plotit=FALSE)$dif -else{ -qv=estfun(outer(x[id1],y[id2],FUN='-'),q=c(q,1-q)) -d[i]=qv[1]+qv[2] -}} -crit<-alpha/2 -icl<-round(crit*nboot)+1 -icu<-nboot-icl -dif=sort(d) -ci=dif[icl] -ci[2]=dif[icu] -pv=mean(dif<0)+.5*mean(dif==0) -pv<-2*min(pv,1-pv) -list(L=L,U=U,Est.dif=est,ci=ci,p.value=pv) -} - -wmw.ref.mul<-function(x,y,refp=NULL,pts=NULL,q=seq(.6,.9,.1), center=FALSE, estfun=hdmq, alpha=.05,nboot=1000,SEED=TRUE,method='BH',plotit=FALSE, -xlab='Difference',ylab='Density', -plotfun=kerSORT){ -# -# -# For multiple reference points, refp, -# make inferences about P(x-y< -refp) vs P(x-y > refp) -# refp can be constants chosen by the user. If not specified, -# refp are taken to be the .6(.1).9 estimated quantiles of the distribution of X-Y -# -# pts can be used to indicate specified reference points, refp -# -# To use the Harrell-Davis estimator, set estfun=hdmq -# -if(SEED)set.seed(2) -if(!is.null(pts))refp=pts -x=elimna(x) -y=elimna(y) -if(is.null(refp)){ -m=outer(x,y,FUN='-') -m=as.vector(m) -morig=m - -if(center)m=m-median(m) -refp=estfun(m,q) -} -np=length(refp) -output<-matrix(NA,np,8) -dimnames(output)=list(NULL,c('Pts','P(x-y<-Pts)' ,'P(x-y>Pts)','Dif','ci.low','ci.up','p.value','p.adj')) -for(i in 1:np){ -e=wmw.ref.dif(x,y,pts=refp[i],alpha=alpha,nboot=nboot,SEED=FALSE) -output[i,1:7]=c(refp[i],e$L,e$U,e$Est.dif,e$ci[1],e$ci[2],e$p.value) -} -output[,8]=p.adjust(output[,7],method=method) -if(plotit)plotfun(as.vector(morig),xlab=xlab,ylab=ylab) -output -} - -wmw.QC.mul<-function(x,y,q=seq(.1,.4,.1), estfun=hdmq, alpha=.05,nboot=1000,SEED=TRUE,method='BH',plotit=FALSE, -xlab='Difference',ylab='Density', -plotfun=kerSORT){ -# -# -# For multiple reference quantiles, q>.5, -# make inferences about P(x-y< -refp) vs P(x-y > refp) -#. where refp is the q<.5 quantile -# refp can be constants chosen by the user. If not specified, -# refp are taken to be the .6(.1).9 estimated quantiles of the distribution of X-Y -# -# To use the Harrell-Davis estimator, set estfun=hdmq which is the default -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -np=length(q) -output<-matrix(NA,np,8) -dimnames(output)=list(NULL,c('q','q.quant' ,'1-q.quant','Sum','ci.low','ci.up','p.value','p.adj')) -for(i in 1:np){ -e=wmw.QC(x,y,q=q[i],alpha=alpha,nboot=nboot,SEED=FALSE) -output[i,1:7]=c(q[i],e$L,e$U,e$Est.dif,e$ci[1],e$ci[2],e$p.value) -} -output[,8]=p.adjust(output[,7],method=method) -if(plotit)plotfun(as.vector(morig),xlab=xlab,ylab=ylab) -output -} - -tailci<-function(x,y,pts=NULL,q=.25, nboot=1000,estfun=hdmq,alpha=.05,SEED=TRUE){ -# -# If pts is specified, the goal is to compute a confidence interval -# P(x-y< pts). If pts is not specified, it is taken to be an estimate of the qth quantile, -# q=.25 is the default. -# -if(SEED)set.seed(2) -x<-x[!is.na(x)] -y<-y[!is.na(y)] -n1=length(x) -n2=length(y) -qe=NA -m=as.vector(outer(x,y,FUN='-')) -if(is.null(pts))e=estfun(m,q) -else e=pts -est=mean(m<=e) -V=NA -for(i in 1:nboot){ -id1=sample(n1,replace=TRUE) -id2=sample(n2,replace=TRUE) -M=as.vector(outer(x[id1],y[id2],FUN='-')) -V[i]=mean(M<=e) -} -crit<-alpha/2 -icl<-round(crit*nboot)+1 -icu<-nboot-icl -a=sort(V) -ci=a[icl] -ci[2]=a[icu] -list(Est=est,ci=ci) -} - -difqci=tailci - - -difqci.mul<-function(x,y,refp=NULL,pts=NULL,q=seq(.1,.4,.1), estfun=hdmq, center=FALSE, alpha=.05,nboot=1000,SEED=TRUE,method='BH',plotit=FALSE, -xlab='Difference',ylab='Density', -plotfun=kerSORT){ -# -# -# For multiple reference points, refp, -# make inferences about P(x-y< refp) -# refp can be constants chosen by the user. If not specified, -# refp are taken to be the .6(.1).9 estimated quantiles of the distribution of X-Y -# -# pts can be used to indicate specified reference points, refp -# -# If refp=NULL, reference points are based on estimates of the q quantiles -# Default is q=.1(.1).4 -# -# Her, use the Harrell-Davis estimator by default, estfun=hdmq -# -if(SEED)set.seed(2) -if(!is.null(pts))refp=pts -x=elimna(x) -y=elimna(y) -FLAG=TRUE -m=outer(x,y,FUN='-') -m=as.vector(m) -morig=m - -if(center)m=m-median(m) -if(is.null(refp)){ -FLAG=FALSE -refp=estfun(m,q) -} -np=length(refp) -if(!FLAG){ -output<-matrix(NA,np,5) -dimnames(output)=list(NULL,c('q','Pts','P(x-y2)stop('Should have only two variables') -v1=pbvar(x[,1]) -v2=pbvar(x[,2]) -v3=pbcor(x[,1],x[,2])$cor -a=v1+v2 -2*v3*sqrt(v1)*sqrt(v2) -e=sqrt(2)*(locfun(x[,1])-locfun(x[,2]))/sqrt(a) -e -} - -rm.marg.OMCI<-function(x,y=NULL,locfun=onestep,nboot=1000,SEED=TRUE,alpha=.05, -null.val=0,MC=FALSE,...){ -# -# Two dependent groups. -# Confidence interval for effect size that takes into account heteroscedasticity as well as the -# association between X and Y based on the marginal distributions, not the -# difference scores. For robust estimators, these two approaches generally give -# different results. -# -library(MASS) -if(!is.null(y))x=cbind(x,y) -x=elimna(x) -if(SEED)set.seed(2) -e=rm.margOM.es(x) -n=nrow(x) -if(!MC){ -v=NA -for(i in 1:nboot){ -id=sample(n,replace=TRUE) -v[i]=rm.margOM.es(x[id,],locfun=locfun) -} -} -if(MC){ -library(parallel) -d=list() -for(j in 1:nboot){ -id=sample(n,replace=TRUE) -d[[j]]=x[id,] -} -v=mclapply(d,rm.margOM.es,locfun=locfun) -v=matl(v) -} - -v=sort(v) -ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 -ci=v[ilow] -ci[2]=v[ihi] -pv=mean(v0)flag=flag[-flag.out] -x1<-x1[flag] -y1<-y1[flag] -x2<-x2[flag] -y2<-y2[flag] -} -if(is.null(pts)){ -if(QM){ -q1=qest(x1,q) -q2=qest(x2,q) -pts=max(q1,q2) -u1=qest(x1,1-q) -u2=qest(x2,1-q) -up=min(u1,u2) -pts[2]=mean(c(pts[1],up)) -pts[3]=up -} -if(!QM)pts=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE,SEED=FALSE)$output[,1] -} - -# Use a bootstrap estimate of the covariance matrix to get an estimate of -# a marginal measure of dispersion. -n=length(y1) -if(n!=length(y2))stop('Time one sample size not equal to time two sample size') -npts=length(pts) -e1=matrix(NA,Nboot,npts) -e2=matrix(NA,Nboot,npts) -for(i in 1:Nboot){ -flag=sample(n,replace=TRUE) -e1[i,]=regYhat(x1[flag],y1[flag],xr=pts,regfun=regfun) -e2[i,]=regYhat(x2[flag],y2[flag],xr=pts,regfun=regfun) -} -CV=NA -sq1=apply(e1,2,var) -sq2=apply(e2,2,var) -for(j in 1:npts)CV[j]=cov(e1[,j],e2[,j]) -SQE=sq1+sq2-2*CV -bot=n*SQE -es=(regYhat(x1,y1,xr=pts,regfun=regfun)-regYhat(x2,y2,xr=pts,regfun=regfun))/sqrt(bot) -es=sqrt(2)*es -mat=cbind(pts,es) -if(plotit)reg2plot(x1,y1,x2,y2,regfun=regfun,xlab=xlab,ylab=ylab) -dimnames(mat)=list(NULL,c('pts','Effect.size')) -mat -} - -ancovad.ES.SEpb<-function(x1,y1,x2,y2,nboot=100,regfun=tsreg,pts=0,SEED=TRUE){ -# -# Estimate standard error -# -n1=length(x1) -npts=length(pts) -if(SEED)set.seed(2) -v=matrix(NA,nrow=nboot,ncol=npts) -for(i in 1:nboot){ -id1=sample(n1,replace=TRUE) -X1=x1[id1] -Y1=y1[id1] -X2=x2[id1] -Y2=y2[id1] -v[i,]=ancovad.ES(X1,Y1,X2,Y2,regfun=regfun,pts=pts,plotit=FALSE,SEED=FALSE)[,2] -} -se=apply(v,2,sd,na.rm=TRUE) -se -} - - -ancovad.ESci<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,alpha=.05,nboot=100,SEED=TRUE, -QM=FALSE,ql=.2, -xout=FALSE,outfun=outpro,xlab='Pts',ylab='Y',method='hoch',plotit=TRUE){ -# -# Two dependent groups. -# -# For each specified value for x, compute a heteroscedastic measure of effect -# So if x=2, rather can compare the groups using some specified measure of location, use a -# measure of effect size that takes into account the conditional measure of dispersion of y given x. -# -# if pts=NULL and -# QM=FALSE: pick covariate points based on default points used by the R funtcion ancovad -# -# ql determines quantiles of x that form the range of points -# pts can be used to specify the points x, if NULL, the function picks three values -# The function tests the hypothesis that the measure of effect is zero, no effect. -# -# iter=100: number of replications used to estimate the standard error. -# -# -xy=elimna(cbind(x1,y1,x2,y2)) -n=nrow(xy) -if(ncol(xy)!=4)stop('Only one covariate can be used') -x1=xy[,1] -y1=xy[,2] -x2=xy[,3] -y2=xy[,4] -if(xout){ -flag=c(1:n) -flag1=out.methods(x1,y1,regfun=regfun,plotit=FALSE,id=id)$out.id -flag2=out.methods(x2,y2,regfun=regfun,plotit=FALSE,id=id)$out.id -flag.out=unique(c(flag1,flag2)) -if(length(flag.out)>0)flag=flag[-flag.out] -x1<-x1[flag] -y1<-y1[flag] -x2<-x2[flag] -y2<-y2[flag] -} -if(is.null(pts)){ -if(QM){ -qu=1-ql -q1=qest(x1,ql) -q2=qest(x2,ql) -pts=max(q1,q2) -u1=qest(x1,qu) -u2=qest(x2,qu) -up=min(u1,u2) -pts[2]=mean(c(pts[1],up)) -pts[3]=up -} -pts=unique(pts) -if(!QM)pts=ancovad.ES(x1,y1,x2,y2,regfun=regfun,plotit=FALSE,SEED=FALSE)[,1] -} -npts=length(pts) -RES=matrix(NA,nrow=npts,ncol=8) -SE=ancovad.ES.SEpb(x1,y1,x2,y2,regfun=regfun,nboot=nboot,pts=pts,SEED=FALSE) -a=ancovad.ES(x1,y1,x2,y2,regfun=regfun,pts=pts,SEED=FALSE,plotit=plotit,xlab=xlab,ylab=ylab) -RES[,1]=a[,1] -RES[,2]=a[,2] -RES[,3]=SE -RES[,5]=RES[,2]-qnorm(1-alpha/2)*RES[,3] -RES[,6]=RES[,2]+qnorm(1-alpha/2)*RES[,3] -test=RES[,2]/RES[,3] -pv=2*(1-pnorm(abs(test))) -RES[,7]=pv -RES[,4]=test -dimnames(RES)=list(NULL,c('pts','Est.','SE','Test.Stat','ci.low','ci.up','p-value','p.adjusted')) -RES[,8]=p.adjust(RES[,7],method=method) -if(plotit){ -xa=c(pts,pts,pts) -ya=c(RES[,2],RES[,5],RES[,6]) -plot(xa,ya,xlab=xlab,ylab='ES',type='n') -lines(pts,RES[,5],lty=2) -lines(pts,RES[,2]) -lines(pts,RES[,6],lty=2) -} -RES -} - - -tailsci.mul=difqci.mul - -outDETMCD<-function(x,cov.fun=DETMCD,xlab='X',ylab='Y',qval=.975, -crit=NULL,KS=TRUE,plotit=FALSE,...){ -# -# Search for outliers using robust measures of location and scatter, -# which are used to compute robust analogs of Mahalanobis distance. -# -# x is an n by p matrix or a vector of data. -# -# The function returns the values flagged as an outlier plus -# the (row) number where the data point is stored. -# If x is a vector, out.id=4 indicates that the fourth observation -# is an outlier and outval=123 indicates that 123 is the value. -# If x is a matrix, out.id=4 indicates that the fourth row of -# the matrix is an outlier and outval reports the corresponding -# values. -# -# The function also returns the distance of the -# points identified as outliers -# in the variable dis. -# -# For bivariate data, if plotit=TRUE, plot points and circle outliers. -# -# cov.fun determines how the measure of scatter is estimated. -# The default is covDETMCD -# Possible choices are -# cov.mve (the MVE estimate) -# cov.mcd (the MCD estimate) -# covmba2 (the MBA or median ball algorithm) -# rmba (an adjustment of MBA suggested by D. Olive) -# cov.roc (Rockes TBS estimator) -# -# plotit=FALSE used to avoid problems when other functions in WRS call -# this function -# -# KS=TRUE: keep the seed that was used -# -if(is.data.frame(x))x=as.matrix(x) -if(is.list(x))stop('Data cannot be stored in list mode') -nrem=nrow(as.matrix(x)) -if(!is.matrix(x)){ -dis<-(x-median(x,na.rm=TRUE))^2/mad(x,na.rm=TRUE)^2 -if(is.null(crit))crit<-sqrt(qchisq(.975,1)) -vec<-c(1:length(x)) -} -if(is.matrix(x)){ -mve<-cov.fun(elimna(x)) -dis<-mahalanobis(x,mve$center,mve$cov) -if(is.null(crit))crit<-sqrt(qchisq(.975,ncol(x))) -vec<-c(1:nrow(x)) -} -dis[is.na(dis)]=0 -dis<-sqrt(dis) -chk<-ifelse(dis>crit,1,0) -id<-vec[chk==1] -keep<-vec[chk==0] -if(is.matrix(x)){ -if(ncol(x)==2 && plotit){ -plot(x[,1],x[,2],xlab=xlab,ylab=ylab,type='n') -flag<-rep(TRUE,nrow(x)) -flag[id]<-FALSE -points(x[flag,1],x[flag,2]) -if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch='*') -}} -if(!is.matrix(x))outval<-x[id] -if(is.matrix(x))outval<-x[id,] -n=nrow(as.matrix(x)) -n.out=length(id) -list(n=n,n.out=n.out,out.val=outval,out.id=id,keep=keep,dis=dis,crit=crit) -} - -out.methods<-function(x,y, regfun = tsreg,plotit=FALSE,id,method=c('PRO','PRO.R','BLP','DUM','MCD','BOX')){ -type=match.arg(method) -switch(type, - PRO=outpro(x,plotit=plotit), # projection method - PRO.R=outpro.depth(x), #projection method random, lower execution time vs outpro - BLP=outblp(x,y,regfun=regfun,plotit=FALSE), # regression method - DUM=out.dummy(x,y,outfun=outpro.depth,id=id), # Detect outliers ignoring col indicated by argument id - MCD=outDETMCD(x,plotit=plotit), - BOX=outbox(x)) # Boxplot method using ideal. fourths -} - -hoch2.simp<-function(n,V,cil,tr,alpha=.05,con=NULL){ -# -# -# Hochberg two-stage given n's sd's tr and alpha -# if tr>0, var. should contain winsorized variances -# raw data not provided -# -#V = variances or Winsorized variance if tr>0 -# cil desired length of the confidence intervals -# -# -if(is.matrix(x))x<-listm(x) -J=length(n) -svec=V -tempn=n -tempt<-floor((1-2*tr)*tempn) -A<-sum(1/(tempt-1)) -df<-J/A -if(is.null(con))con=con1way(J) -crit=qtukey(1-alpha,J,df) -avec<-NA -ncon=ncol(con) -for(i in 1:ncon){ -temp<-con[,i] -avec[i]<-sum(temp[temp>0]) -} -dvec<-(cil/(2*crit*avec))^2 -d<-max(dvec) -n.vec<-NA -for(j in 1:J){ -n.vec[j]<-max(tempn[j],floor(svec[j]/d)+1) -print(paste("Need an additional ", n.vec[j]-tempn[j], -" observations for group", j)) -} -} - -mean.pred.ci<-function(M,sd,orig.n,new.n,tr=0,alpha=.05){ -# -# -# Generalization of prediction method in Spence & Stanley -# Advances in Methods and Practices in Psychological Science January-March 2024, Vol. 7, No. 1, -#pp. 1-13 -# -# M = observed mean or can be a trimmed mean -# tr= amount of trimming -# sd = Winsorized standard deviation, which is the usual standard deviation when t=0 -orig.n=orig.n/(1-2*tr)^2 -new.n=new.n/(1-2*tr)^2 -se=sqrt(sd^2/orig.n+sd^2/new.n) -g=floor(tr*orig.n) -df=orig.n-2*g-1 -crit=qt(1-alpha/2,df) -ci=M-crit*se -ci=c(ci,M+crit*se) -ci -} - - -qcorp1.ci<-function(x,y,q=.5,alpha=.05,nboot=599,SEED=TRUE, xout=TRUE, -method='PRO',regfun=Qreg){ -# -# Confidence interval for a quantile regression measure of association -# -# -if(SEED)set.seed(2) - xy=elimna(cbind(x,y)) -p1=ncol(xy) -if(p1>2)stop('Current version is for a single independent variable only') -#, use the R function corblp.ci') -x=xy[,1] -y=xy[,2] - if(xout){ -x<-as.matrix(x) -flag<-out.methods(x,y,plotit=FALSE,method=method,regfun=regfun)$keep -x<-x[flag,] -y<-y[flag] -n.keep=length(y) -} -p=p1-1 -x=xy[,1:p] -x=as.matrix(x) -y=xy[,p1] - v=NA - n=nrow(xy) - if(n<=40)xout=FALSE # to avoid computational issues when n is small. - for(i in 1:nboot){ - id=sample(n,replace=TRUE) -v[i]=qcorp1(xy[id,1:p],xy[id,p1],q=q,xout=xout)$cor - } - mv=mean(v) - v=sort(v) - ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 - ci=v[ilow] - ci[2]=v[ihi] - e=qcorp1(x,y,q=q,xout=xout)$cor - pv=mean(v<0)+.5*mean(v==0) - pv=2*min(pv,1-pv) - list(Est.=e,ci=ci,p.value=pv) - } - - - -remove.lab.vec<-function(a){ -# -# Remove labels -# -a=as.matrix(a) -dimnames(a)=list(NULL,NULL) -a -} - -qcmul<-function(x,y,q=.5,nboot=599,alpha=.05,SEED=FALSE,xout=FALSE, -outfun=outpro,method='BH'){ -# -# -# For each independent variable, compute a confidence interval for a -# quantile regression correlation. -# -if(SEED)set.seed(2) - xy=elimna(cbind(x,y)) -p1=ncol(xy) -p=p1-1 -x=xy[,1:p] -x=as.matrix(x) -y=xy[,p1] -n=length(y) -if(xout){ -x<-as.matrix(x) -flag<-outfun(x,plotit=FALSE)$keep -x<-x[flag,] -y<-y[flag] -n=length(y) -xy=cbind(x,y) -} -x=as.matrix(x) -e=matrix(NA,p,5) -dimnames(e)=list(NULL,c('Est.','ci.low','ci.up','p-value','adj.p.value')) -for(j in 1:p)e[j,1]=qcorp1(x[,j],y,q=q)$cor - v=matrix(NA,nboot,p) - n=nrow(xy) - for(j in 1:p){ - for(i in 1:nboot){ - id=sample(n,replace=TRUE) -v[i,j]=qcorp1(xy[id,j],xy[id,p1],q=q)$cor - }} - v=apply(v,2,sort) - ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 - e[,2]=v[ilow,] - e[,3]=v[ihi,] - pv=NA - for(j in 1:p){ - pv[j]=mean(v[,j]<0) - if(pv[j]>=.5)pv[j]=1-pv[j] - } - e[,4]=2*pv - e[,5]=p.adjust(e[,4],method=method) - list(n=n,results=e) -} - - -qcor.ci<-function(x,y,q=.5,alpha=.05,nboot=599,SEED=TRUE, xout=TRUE, -method='PRO',regfun=Qreg){ -# -# Confidence interval for a quantile regression measure of association derived by Li et al. -# -# -if(SEED)set.seed(2) - xy=elimna(cbind(x,y)) -p1=ncol(xy) -if(p1>2)stop('Current version is for a single independent variable only') -#, use the R function corblp.ci') -x=xy[,1] -y=xy[,2] - if(xout){ -x<-as.matrix(x) -flag<-out.methods(x,y,plotit=FALSE,method=method,regfun=regfun)$keep -x<-x[flag,] -y<-y[flag] -n.keep=length(y) -} -p=p1-1 -x=xy[,1:p] -x=as.matrix(x) -y=xy[,p1] - v=NA - n=nrow(xy) - if(n<=40)xout=FALSE # to avoid computational issues when n is small. - for(i in 1:nboot){ - id=sample(n,replace=TRUE) -v[i]=qcor(xy[id,1:p],xy[id,p1],q=q,xout=xout)$cor - } - mv=mean(v) - v=sort(v) - ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 - ci=v[ilow] - ci[2]=v[ihi] - pv=mean(v<0)+.5*mean(v==0) - pv=2*min(pv,1-pv) - e=qcor(x,y,q=q)$cor # Already did or did not remove leverage points - list(Est.=e,ci=ci,p.value=pv) - } - - qcor.EP<-function(x,y,q=c(.25,.5,.75),alpha=.05,nboot=1000,SEED=TRUE, xout=TRUE, -method='PRO',regfun=Qreg){ -# -# Correlation based on the quantile regression estimator. -# This version is based on explanatory power. -# See Wilcox (2022, section 11.9). -# -nq=length(q) -res=matrix(NA,nq,5) -for(j in 1:nq){ -a=qcor.ep.ci(x,y,q=q[j],alpha=alpha,nboot=nboot,SEED=SEED,xout=xout) -res[j,]=c(q[j],a$Est.,a$ci[1],a$ci[2],a$p.value) -} -dimnames(res)=list(NULL,c('q','Est','ci.low','ci.up','p-value')) -res -} - -qcor.ep.ci<-function(x,y,q=.5,alpha=.05,nboot=599,SEED=TRUE, xout=TRUE, -method='PRO',regfun=Qreg){ -# -# Confidence interval for a quantile regression measure of association -# -# -if(SEED)set.seed(2) - xy=elimna(cbind(x,y)) -p1=ncol(xy) -if(p1>2)stop('Current version is for a single independent variable only') -#, use the R function corblp.ci') - if(xout){ -x<-as.matrix(x) -flag<-out.methods(x,y,plotit=FALSE,method=method,regfun=regfun)$keep -x<-x[flag,] -y<-y[flag] -n.keep=length(y) -} -p=p1-1 -x=xy[,1:p] -x=as.matrix(x) -y=xy[,p1] - v=NA - n=nrow(xy) - if(n<=40)xout=FALSE # to avoid computational issues when n is small. - for(i in 1:nboot){ - id=sample(n,replace=TRUE) -v[i]=qcor.ep(xy[id,1:p],xy[id,p1],q=q,xout=xout)$cor - } - mv=mean(v) - v=sort(v) - ilow<-round((alpha/2) * nboot) -ihi<-nboot - ilow -ilow<-ilow+1 - ci=v[ilow] - ci[2]=v[ihi] - e=qcor.ep(x,y,q=q,xout=xout)$cor - pv=mean(v<0)+.5*mean(v==0) - pv=2*min(pv,1-pv) - list(Est.=e,ci=ci,p.value=pv) - } - - qcor.L<-function(x,y,q=c(.25,.5,.75),alpha=.05,nboot=1000,SEED=TRUE, xout=TRUE, -method='PRO',regfun=Qreg){ -# -# Correlation based on the quantile regression estimator. -# This version is based on the approach used by -# Li, G., Li, Y. \& Tsai, C.-L. (2015). -# Quantile correlations and quantile autoregressive -# modeling. {\em Journal of the American Statistical Association , 110}, 246--261. -# https://doi.org/10.1080/01621459.2014.892007 -# -# -nq=length(q) -res=matrix(NA,nq,5) -for(j in 1:nq){ -a=qcor.ci(x,y,q=q[j],alpha=alpha,nboot=nboot,SEED=SEED,xout=xout) -res[j,]=c(q[j],a$Est.,a$ci[1],a$ci[2],a$p.value) -} -dimnames(res)=list(NULL,c('q','Est','ci.low','ci.up','p-value')) -res -} - -qcor.R<-function(x,y,q=c(.25,.5,.75),alpha=.05,nboot=1000,SEED=TRUE, xout=TRUE, -method='PRO',regfun=Qreg){ -# -# Correlation based on the quantile regression estimator. -# This version is based on the ratio of the loss function for the full model versus the null case -# See Wilcox (2022, section 11.9). -# -nq=length(q) -res=matrix(NA,nq,5) -for(j in 1:nq){ -a=qcorp1.ci(x,y,q=q[j],alpha=alpha,nboot=nboot,SEED=SEED,xout=xout) -res[j,]=c(q[j],a$Est.,a$ci[1],a$ci[2],a$p.value) -} -dimnames(res)=list(NULL,c('q','Est','ci.low','ci.up','p-value')) -res -} - -qcor.EP<-function(x,y,q=c(.25,.5,.75),alpha=.05,nboot=1000,SEED=TRUE, xout=TRUE, -method='PRO',regfun=Qreg){ -# -# Correlation based on the quantile regression estimator. -# This version is based on explanatory power. -# See Wilcox (2022, section 11.9). -# -nq=length(q) -res=matrix(NA,nq,5) -for(j in 1:nq){ -a=qcor.ep.ci(x,y,q=q[j],alpha=alpha,nboot=nboot,SEED=SEED,xout=xout) -res[j,]=c(q[j],a$Est.,a$ci[1],a$ci[2],a$p.value) -} -dimnames(res)=list(NULL,c('q','Est','ci.low','ci.up','p-value')) -res -} - -qcor.ep<-function(x,y,qest=hd,q=.5,xout=FALSE,method='PRO',regfun=MMreg, -plotit=FALSE,...){ -# -# Compute a measure of the strength of the association in terms of explanatory power and -# based on the quantile regression line -# -X=cbind(x,y) -X=elimna(X) -x<-as.matrix(x) -p=ncol(x) -x=X[,1:p] -p1=p+1 -y=X[,p1] -if(xout){ -x<-as.matrix(x) -flag<-out.methods(x,y,plotit=plotit,method=method,regfun=regfun)$keep -x<-x[flag,] -y<-y[flag] -x<-as.matrix(x) -X=cbind(x,y) -} -est=qreg(x,y,q=q)$coef -pred=reg.pred(x,y,regfun=Qreg,q=q) -top=pbvar(pred) -bot=pbvar(y) -EP=top/bot -if(p==1)ce=sign(est[2])*sqrt(EP) -list(cor=ce,Explanatory.power=EP) -} - -Stein.pairs<-function(x,delta,alpha=.05,power=.8,z.sqrt=NULL,tr=.2,reps=100000,SEED=TRUE){ -# -# All pairwise comparisons among J independent groups -# Using available data, determine how many observations, if any, are needed to get power >= the -# value indicated by the argument -# power, given that the difference between the measures of location is -# delta -# -if(is.matrix(x))x=listm(x) -J=length(x) -ic=0 -ALL=(J^2-J)/2 -output=matrix(NA,ALL,6) -for(j in 1:J){ -for(k in 1:J){ -if(j= the value -# specified by the argument power when the linear contrast is >= the value -# indicated by the argument -# delta -# -# -if(SEED)set.seed(2) -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -n=lapply(x,length) -n=list2vec(n) -g=floor(tr*n) -df=n-2*g-1 -sq=lapply(x,winsdN,tr=tr) -sq=list2vec(sq) -sq=sq^2 -top=1-alpha/2 -t2=1-power -if(is.null(z.sqrt)){ -v=sum.T(df,reps=reps,con=con) -ta=qest(v,top) -q=qest(v,t2) -delta=abs(delta) # Same result if not done but this avoids getting negative sqrt(z) -z.sqrt=delta/(ta-q) -} -N=rep(0,J) -for(k in 1:J){ -if(con[k]!=0)N[k]=max(floor(sq[k]/z.sqrt^2),n[k]) -} -list(n=n,N=N,z.sqrt=z.sqrt) -} - - -Stein.LC<-function(x,delta,con,alpha=.05,power=.8,z=NULL,tr=.2,reps=100000,SEED=TRUE){ -# -# For a collection of linear contrast coefficients, -# determine the total number of observations to achieve power >= the value -# specified by the argument power in the linear contrast is >= the value -# indicated by the argument -# delta -# -# -if(SEED)set.seed(2) -if(is.null(ncol(con)))N=Stein.LC1(x,delta=delta,con=con,alpha=alpha,power=power,tr=tr, -reps=reps,SEED=SEED)$N -else{ -J=nrow(con) -NL=ncol(con) -N=matrix(NA,J,NL) -for(k in 1:NL)N[,k]=Stein.LC1(x,delta=delta,con[,k],alpha=alpha,power=power,tr=tr, -reps=reps,SEED=SEED)$N -} -list(con=con,N=N) -} - -Stein2g<-function(x,y=NULL,delta,alpha=.05,power=.8,z.sqrt=NULL,tr=.2,reps=100000,SEED=TRUE){ -# -# For two independent groups, to Stein-type two stage method for determining how many more -# observations, if any, are need to achieve power indicated by the argument -# pow given a difference in location -# delta -# -# if y=NULL -# x can be a matrix with 2 columns or have list mode with length 2 -# -# Or -# x and y can be vector. -# -if(SEED){ -if(is.null(z.sqrt))set.seed(2) -} -if(!is.null(y))x=list(x,y) -x=elimna(x) -if(is.matrix(x))x=listm(x) -J=length(x) -if(J!=2)stop('This function is for two groups only') -n=lapply(x,length) -n=list2vec(n) -g=floor(tr*n) -df=n-2*g-1 -sq=lapply(x,winsdN,tr=tr) -sq=list2vec(sq) -sq=sq^2 -top=1-alpha/2 -t2=1-power -if(is.null(z.sqrt)){ -v=sum.T(df,reps=reps,con=c(1,-1)) -ta=qest(v,top) -q=qest(v,t2) -delta=abs(delta) # Same result if not done but this avoids getting negative sqrt(z) -z.sqrt=delta/(ta-q) -} -N=NA -for(k in 1:2){ -N[k]=max(floor(sq[k]/z.sqrt^2),n[k]) -} -list(n=n,N=N,z.sqrt=z.sqrt) -} - -sum.T<-function(df,reps=100000,con){ -# -# Estimate distribution of T=sum T_j, T_j independent T distribution with degree of stored in -# df -# -# Return dist. -n=df+1 -v=NA -K=length(df) -for(j in 1:reps){ -e=0 -for(k in 1:K)e=e+con[k]*rt(1,df=df[k]) -v[j]=e -} -v -} - - -qest.meth<-function(x,q=.5,method=c('HD','NO','TR','SO')){ - -type=match.arg(method) -switch(type, -HD=hd(x,q=q), -NO=qno.est(x,q=q), -TR=thd(x,q=q), -SO=qest(x,q=q)) -} - -rbin.mul<-function(n,N,prob=.5,p=2){ -# -# p multivariate binomial, correlations are .5 -# -p1=p+1 -np1=n*p1 -z=matrix(rbinom(np1,size=N,prob=prob),ncol=p1) -for(j in 1:p)z[,j]=z[,j]+z[,p1] -z[,1:p] -} - -bwdepth<-function(x,y,fun=prodepth,plotit=FALSE,xlab='V1',ylab='V2'){ -# -# For two independent groups, let X and Y denote multivariate random variables -# This function estimates the extent the distributions overlap using the notion -# of projection distances. In effect, a nonparametric measure of effect size is -# estimated -# For identical distribution, effect size is .5. The more separated the distributions, the -# closer is the effect size to zero. Complete separation means the effect size is equal to zero -# -# -# -x=elimna(x) -y=elimna(y) -x=as.matrix(x) -y=as.matrix(y) -n1=nrow(x) -n2=nrow(y) -if(ncol(x)==1){ -fun=unidepth -x=as.vector(x) -y=as.vector(y) -} -pdyy=fun(y,y) -pdyx=fun(y,x) -pdxx=fun(x,x) -pdxy=fun(x,y) -v1=NA -v2=NA -ic=0 -for(i in 1:n2){ -for(j in 1:n1){ -ic=ic+1 -v1[ic]=pdyy[i]<=pdyx[j] -}} -ic=0 -for(j in 1:n1){ -for(i in 1:n2){ -ic=ic+1 -v2[ic]=pdxx[j]<=pdxy[i] -}} -e1=mean(v1) -e2=mean(v2) -e=(n1*e1+n2*e2)/(n1+n2) -x=as.matrix(x) -y=as.matrix(y) -if(plotit){ -if(ncol(x)==2){ -plot(rbind(x,y),xlab=xlab,ylab=ylab,type='n') -points(x,pch='*') -points(y,pch='o') -}} -list(e=e,e1=e1,e2=e2) -} - -bwdepthMC.ci<-function(x,y,fun=prodepth,nboot=100,alpha=.05,MC=TRUE, -SEED=TRUE,plotit=FALSE,xlab='V1',ylab='V2'){ -# -if(SEED)set.seed(2) - crit=qnorm(1-alpha/2) -if(identical(fun,prodepth))MC=FALSE # get odd error otherwise -x=elimna(x) -y=elimna(y) -x=as.matrix(x) -y=as.matrix(y) -n1=nrow(x) -n2=nrow(y) -est=bwdepth(x,y,plotit=plotit,xlab=xlab,ylab=ylab) -if(MC)library(parallel) -id=list() -for(i in 1:nboot)id[[i]]=c(sample(n1,replace=TRUE),sample(n2,replace=TRUE)) -if(!MC)BE=lapply(id,bwdepth.sub,x,y,n1,n2,fun=fun) -if(MC)BE=mclapply(id,bwdepth.sub,x,y,n1,n2,fun=fun) -E=matl(BE) -se=sd(E) -c1=est$e-crit*se -c1[2]=est$e+crit*se -test=(est$e-.5)/se -pv=2*(1-pnorm(abs(test))) -list(n1=n1,n2=n2,Est=est$e,ci=c1,p.value=pv) -} - -bwdepth.sub<-function(id,x,y,n1,n2,fun){ -n=n1+n2 -np1=n1+1 -e=bwdepth(x[id[1:n1],],y[id[np1:n],],fun=fun)$e -e -} - -bwdepth.perm<-function(x,y,reps=500, -fun=prodepth,alpha=.05,SEED=TRUE){ -# -# Permutation test of F=G, two independent multivariate distributions -# -# -if(SEED)set.seed(2) -x=elimna(x) -y=elimna(y) -x=as.matrix(x) -y=as.matrix(y) -n1=nrow(x) -np1=n1+1 -n2=nrow(y) -n=n1+n2 -d=bwdepth(x,y)$e -xy=rbind(x,y) -print(dim(xy)) -v=NA -for(i in 1:reps){ -ip=sample(n,replace=FALSE) -z=xy[ip,] -v[i]=bwdepth(z[1:n1,],z[np1:n,])$e -} -v=sort(v) -il=round(alpha*reps/2) -iu=reps-il -list(Est=d,Lower.crit=v[il],Upper.crit=v[iu]) -} - -simp.break<-function(x,y,pts){ -# -# Estimate the break point -# -#. Using a method derived by -# Muggeo 2003 -# STATISTICS IN MEDICINE -# Statist. Med. 2003; 22:3055-3071 (DOI: 10.1002/sim.1545) -# - library(segmented) -sati=data.frame(xx=x,yy=y) -M.lm=lm(y~x,data=dati) -a=segmented(ut.lm,psi=pts) -a -} - -reg.break<-function(x,y,int=NULL,xout=TRUE,regfun=tsreg,outfun=outpro,...){ -# -# Estimate the break point of a regression line, where the line bends. -# That is, where the slope suddenly changes. -# Use a. robust analog of the method in -# A Statistical Method for Determining the Breakpoint of Two Lines -# Jones and Molitoris -# Analytical Biochemistry 287-290 (1984) -# -x<-as.matrix(x) -p1<-ncol(x)+1 -p<-ncol(x) -if(p!=1)stop('Current version limited to a single independent variable') -xy<-cbind(x,y) -xy<-elimna(xy) -x<-xy[,1:p] -x=as.vector(x) -y<-xy[,p1] -if(xout){ -m<-cbind(x,y) -if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep -else -flag<-outfun(x,plotit=FALSE,...)$keep -m<-m[flag,] -x<-m[,1:p] -y<-m[,p1] -} -if(is.null(int)){ -low=qest(x,.25) -up=qest(x,.75) -int=seq(low,up,length.out=25) -} -nrem=length(y) -if(!is.null(int)){ -x0=int -iup=length(x0) - -} -v=NA -for(i in 1:iup){ -id=x +#USC Stevens Institute for Innovation +#University of Southern California +#1150 S. Olive Street, Suite 2300 +#Los Angeles, CA 90015, USA +#Tel: +#Fax: +1 213-821-5001 +#Email: a +# +#and cc to: +#accounting@stevens.usc.edu + + +# Last update: +# May, 2024 + + +madsq<-function(x)mad(x)^2 + +listv2mat<-function(x){ +# +# Each x[[]] has a vector of same length, p +# store in a matrix with p columns +# +p=length(x[[1]]) +n=length(x) +m=matrix(NA,nrow=n,ncol=p) +for(i in 1:n)m[i,]=x[[i]] +m +} + + +DqdifMC<-function(x,y=NULL,q=.25,nboot=1000,plotit=TRUE,xlab='Group 1 - Group 2',SEED=TRUE,alpha=.05){ +# +# Compare two dependent groups by comparing the +# q and 1-q quantiles of the difference scores +# +# q should be < .5 +# if the groups do not differ, then the difference scores should be symmetric +# about zero. +# In particular, the sum of q and 1-q quantiles should be zero. +# +# q indicates the quantiles to be compared. By default, the .25 and .75 quantiles are used. +# +library(parallel) +if(SEED)set.seed(2) +if(q>=.5)stop('q should be less than .5') +if(!is.null(y)){ +xy=elimna(cbind(x,y)) +dif=xy[,1]-xy[,2] +} +if(is.null(y))dif=elimna(x) +x=as.matrix(x) +n=length(dif) +if(plotit)akerd(dif,xlab=xlab) +bvec=NA +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +bvec<-mclapply(data,difQMC_sub,dif,q,mc.preschedule=TRUE) +bvec=matl(bvec) +est1=hd(dif,q=q) +est2=hd(dif,q=1-q) +pv=mean(bvec<0)+.5*mean(bvec==0) +p=2*min(c(pv,1-pv)) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +sbvec=sort(bvec) +ci=sbvec[low] +ci[2]=sbvec[up] +list(est.q=est1,est.1.minus.q=est2,conf.interval=ci,p.value=p) +} + +winsd<-function(x,tr=.2,na.rm=FALSE){ +val=sqrt(winvar(x,tr=tr,na.rm=na.rm)) +val +} + +winsd05<-function(x,tr=.2,na.rm=FALSE){ +val=sqrt(winvar(x,tr=tr,na.rm=na.rm)) +val +} + + + +difQMC_sub<-function(data,dif,q){ +es=hd(dif[data],q)+hd(dif[data],1-q) +es +} + + +ancGparMC<-function(x1,y1,x2,y2,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,eout=FALSE,outfun=outpro, +STAND=TRUE,plotit=TRUE,xlab="X",ylab="Y",ISO=FALSE,...){ +# +# Test hypothesis that for two independent groups, all regression parameters are equal +# By default the Theil--Sen estimator is used +# +# Strategy: Use bootstrap estimate of standard errors followed by +# Johansen type test statistic. +# +# ISO=TRUE, ignore intercept, test only the slope parameters. +# +x1=as.matrix(x1) +p=ncol(x1) +p1=p+1 +xy=elimna(cbind(x1,y1)) +x1=xy[,1:p] +y1=xy[,p1] +x2=as.matrix(x2) +p=ncol(x2) +p1=p+1 +xy=elimna(cbind(x2,y2)) +x2=xy[,1:p] +y2=xy[,p1] +if(plotit){ +xx1=x1 +yy1=y1 +xx2=x2 +yy2=y2 +if(ncol(as.matrix(x1))==1){ +if(eout){ +flag=outfun(cbind(x1,y1),plotit=FALSE,...)$keep +xx1=x1[flag] +yy1=y1[flag] +flag=outfun(cbind(x2,y2),plotit=FALSE,...)$keep +xx2=x2[flag] +yy2=y2[flag] +} +if(xout){ +flag=outfun(xx1,plotit=FALSE,...)$keep +xx1=x1[flag] +yy1=y1[flag] +flag=outfun(xx2,plotit=FALSE,...)$keep +xx2=x2[flag] +yy2=y2[flag] +} +plot(c(xx1,xx2),c(yy1,yy2),type="n",xlab=xlab,ylab=ylab) +points(xx1,yy1) +points(xx2,yy2,pch="+") +abline(regfun(xx1,yy1,...)$coef) +abline(regfun(xx2,yy2,...)$coef,lty=2) +}} +x=list() +y=list() +x[[1]]=x1 +x[[2]]=x2 +y[[1]]=y1 +y[[2]]=y2 +if(!ISO)output=reg1wayMC(x,y,regfun=regfun,nboot=nboot,xout=xout,outfun=outfun, +SEED=SEED,STAND=STAND,...) +if(ISO)output=reg1wayISOMC(x,y,regfun=regfun,nboot=nboot,xout=xout,outfun=outfun, +SEED=SEED,STAND=STAND,...) +output +} + + + +qcomhdMC<-function(x,y,est=hd,q=c(.1,.25,.5,.75,.9),nboot=4000,plotit=TRUE,SEED=TRUE,xlab="Group 1",ylab="Est.1-Est.2",alpha=.05,ADJ.CI=TRUE){ +# +# Compare quantiles using pb2gen +# via hd estimator. Tied values are allowed. +# +# ADJ.CI=TRUE means that the confidence intervals are adjusted based on the level used by the corresponding +# test statistic. If a test is performed with at the .05/3 level, for example, the confidence returned has +# 1-.05/3 probability coverage. +# +# When comparing lower or upper quartiles, both power and the probability of Type I error +# compare well to other methods that have been derived. +# q: can be used to specify the quantiles to be compared +# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles +# +# Function returns p-values and critical p-values based on Hochberg's method. +# +library(parallel) +if(SEED)set.seed(2) +print('Can also use the function qcomhd with the argument MC=TRUE') +pv=NULL +output=matrix(NA,nrow=length(q),ncol=10) +dimnames(output)<-list(NULL,c("q","n1","n2","est.1","est.2","est.1_minus_est.2","ci.low","ci.up","p_crit","p-value")) +for(i in 1:length(q)){ +output[i,1]=q[i] +output[i,2]=length(elimna(x)) +output[i,3]=length(elimna(y)) +output[i,4]=hd(x,q=q[i]) +output[i,5]=hd(y,q=q[i]) +output[i,6]=output[i,4]-output[i,5] +temp=qcom.sub(x,y,nboot=nboot,q=q[i],SEED=FALSE,alpha=alpha) +output[i,7]=temp$ci[1] +output[i,8]=temp$ci[2] +output[i,10]=temp$p.value +} +temp=order(output[,10],decreasing=TRUE) +zvec=alpha/c(1:length(q)) +output[temp,9]=zvec +if(ADJ.CI){ +for(i in 1:length(q)){ +temp=pb2genMC(x,y,nboot=nboot,est=est,q=q[i],SEED=FALSE,alpha=output[i,9],pr=FALSE) +output[i,7]=temp$ci[1] +output[i,8]=temp$ci[2] +output[i,10]=temp$p.value +} +temp=order(output[,10],decreasing=TRUE) +} +output <- data.frame(output) +output$signif=rep("YES",nrow(output)) +for(i in 1:nrow(output)){ +if(output[temp[i],10]>output[temp[i],9])output$signif[temp[i]]="NO" +#if(output[temp[i],10]<=output[temp[i],9])break +} +if(plotit){ +xax=rep(output[,4],3) +yax=c(output[,6],output[,7],output[,8]) +plot(xax,yax,xlab=xlab,ylab=ylab,type="n") +points(output[,4],output[,6],pch="*") +lines(output[,4],output[,6]) +points(output[,4],output[,7],pch="+") +points(output[,4],output[,8],pch="+") +} +output +} + +qcom.sub<-function(x,y,q,alpha=.05,nboot=2000,SEED=TRUE){ +# +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +datax=listm(t(datax)) +datay=listm(t(datay)) +bvecx<-mclapply(datax,hd,q,mc.preschedule=TRUE) +bvecy<-mclapply(datay,hd,q,mc.preschedule=TRUE) +bvecx=as.vector(matl(bvecx)) +bvecy=as.vector(matl(bvecy)) +bvec<-sort(bvecx-bvecy) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) +sig.level<-2*(min(temp,1-temp)) +se<-var(bvec) +list(est.1=hd(x,q),est.2=hd(y,q),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) +} + + + +smeanMC<-function(m,cop=6,MM=FALSE,op=1,outfun=outogk,cov.fun=rmba,...){ +# +# m is an n by p matrix +# +# Compute a multivariate skipped measure of location +# +# op=1: +# Eliminate outliers using a projection method +# That is, first determine center of data using: +# if op=1, a multi-core processor is used via the +# package multicore +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 uses MVE center +# cop=5 uses TBS +# cop=6 uses rmba (Olive's median ball algorithm) +# +# For each point +# consider the line between it and the center, +# project all points onto this line, and +# check for outliers using +# +# MM=F, a boxplot rule. +# MM=T, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# op=2 use mgv (function outmgv) method to eliminate outliers +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# op=3 use outlier method indicated by outfun +# +# Eliminate any outliers and compute means +# using remaining data. +# +m<-elimna(m) +if(op==1){ +temp<-outproMC(m,plotit=FALSE,cop=cop,MM=MM)$keep +} +if(op==2)temp<-outmgv(m,plotit=FALSE,cov.fun=cov.fun)$keep +if(op==3)temp<-outfun(m,plotit=FALSE,...)$keep +val<-apply(m[temp,],2,mean) +val +} + + pb2genMC<-function(x,y,alpha=.05,nboot=2000,est=onestep,SEED=TRUE,pr=FALSE,...){ +# +# Compute a bootstrap confidence interval for the +# the difference between any two parameters corresponding to +# independent groups. +# By default, M-estimators are compared. +# Setting est=mean, for example, will result in a percentile +# bootstrap confidence interval for the difference between means. +# Setting est=onestep will compare M-estimators of location. +# The default number of bootstrap samples is nboot=2000 +# +library(parallel) +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print("Taking bootstrap samples. Please wait.") +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +# +datax=t(datax) +datay=t(datay) +datax=listm(datax) +datay=listm(datay) +bvecx<-mclapply(datax,est,mc.preschedule=TRUE,...) +bvecy<-mclapply(datay,est,mc.preschedule=TRUE,...) +bvec=sort(matl(bvecx)-matl(bvecy)) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) +sig.level<-2*(min(temp,1-temp)) +se<-var(bvec) +list(est.1=est(x,...),est.2=est(y,...),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) +} + +cbmhdMC<-function(x,y,alpha=.05,q=.25,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab="",nboot=600,SEED=TRUE){ +# +# Compute a confidence interval for the sum of the qth and (1-q)th quantiles +# of the distribution of D=X-Y, where X and Y are two independent random variables. +# The Harrell-Davis estimator is used +# If the distribution of X and Y are identical, then in particular the +# distribution of D=X-Y is symmetric about zero. +# +# plotit=TRUE causes a plot of the difference scores to be created +# pop=0 adaptive kernel density estimate +# pop=1 results in the expected frequency curve. +# pop=2 kernel density estimate (Rosenblatt's shifted histogram) +# pop=3 boxplot +# pop=4 stem-and-leaf +# pop=5 histogram +# +library(parallel) +if(SEED)set.seed(2) +if(q>=.5)stop("q should be less than .5") +if(q<=0)stop("q should be greater than 0") +x<-x[!is.na(x)] +y<-y[!is.na(y)] +n1=length(x) +n2=length(y) +m<-outer(x,y,FUN="-") +q2=1-q +est1=hd(m,q) +est2=hd(m,q2) +data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot) +data=cbind(data1,data2) +data=listm(t(data)) +bvec=NA +bvec<-mclapply(data,cbmhd_subMC,x=x,y=y,q=q,q2=q2,n1=n1,n2=n2,mc.preschedule=TRUE) +bvec=list2vec(bvec) +p=mean(bvec>0)+.5*mean(bvec==0) +p=2*min(c(p,1-p)) +sbv=sort(bvec) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=sbv[ilow] +ci[2]=sbv[ihi] +if(plotit){ +if(pop==1 || pop==0){ +if(length(x)*length(y)>2500){ +print("Product of sample sizes exceeds 2500.") +print("Execution time might be high when using pop=0 or 1") +print("If this is case, might consider changing the argument pop") +print("pop=2 might be better") +}} +MM=as.vector(m) +if(pop==0)akerd(MM,xlab=xlab,ylab=ylab) +if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab) +if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab) +if(pop==3)boxplot(MM) +if(pop==4)stem(MM) +if(pop==5)hist(MM,xlab=xlab) +if(pop==6)skerd(MM) +} +list(q=q,Est1=est1,Est2=est2,sum=est1+est2,ci=ci,p.value=p) +} + +cbmhd_subMC<-function(data,cbmhd_subMC,x,y,q,q2,n1,n2){ +np1=n1+1 +nall=n1+n2 +mb=outer(x[data[1:n1]],y[data[np1:nall]],"-") +est=hd(mb,q)+hd(mb,q2) +est +} + +lintestMC<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=FALSE,outfun=out,...){ +# +# Test the hypothesis that the regression surface is a plane. +# Stute et al. (1998, JASA, 93, 141-149). +# +library(parallel) +set.seed(2) +if(identical(regfun,tshdreg))print('When using tshdreg, be sure to include RES=TRUE') +#if(identical(regfun,Qreg))print('When using Qreg, be sure to include res.vals=TRUE') +x<-as.matrix(x) +d<-ncol(x) +temp<-elimna(cbind(x,y)) +x<-temp[,1:d] +x<-as.matrix(x) +y<-temp[,d+1] +if(xout){ +flag<-outfun(x)$keep +x<-x[flag,] +x<-as.matrix(x) +y<-y[flag] +} +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +reg<-regfun(x,y,...) +yhat<-y-reg$residuals +print("Taking bootstrap samples, please wait.") +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-sqrt(12)*(data-.5) # standardize the random numbers. +data=listm(t(data)) +rvalb<-mclapply(data,lintests1,yhat,reg$residuals,mflag,x,regfun,mc.preschedule=TRUE,...) +# An n x nboot matrix of R values +rvalb=matl(rvalb) +rvalb<-rvalb/sqrt(length(y)) +dstatb<-apply(abs(rvalb),2,max) +wstatb<-apply(rvalb^2,2,mean) +# compute test statistic +v<-c(rep(1,length(y))) +rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...) +rval<-rval/sqrt(length(y)) +dstat<-max(abs(rval)) +wstat<-mean(rval^2) +ib<-round(nboot*(1-alpha)) +p.value.d<-1-sum(dstat>=dstatb)/nboot +p.value.w<-1-sum(wstat>=wstatb)/nboot +list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w) +} + + + lloc<-function(x,est=tmean,...){ +if(is.data.frame(x)){ +x=as.matrix(x) +x=apply(x,2,as.numeric) # earlier versions of R require this command +} +if(!is.list(x))val<-est(x,...) +if(is.list(x))val=lapply(x,est,...) +if(is.matrix(x))val<-apply(x,2,est,...) +val +} + +reg2g.p2plot<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,xlab="Var 1",ylab="Var 2",zlab="Var 3",regfun=tsreg,COLOR=TRUE,STAND=TRUE, +tick.marks=TRUE,type="p",pr=TRUE,...){ +# +# Create a 3D plot of points and plot regression surface for two groups. +# +# Assumes that the package scatterplot3d has been installed. +# If not, use the command install.packages("scatterplot3d") +# assuming you are connected to the web. +# +# The regression method used is specified with the argument +# regfun. +# +# type="p", points will be plotted. Use type="n" to get only regression planes plotted +# +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=2)stop("Argument x1 must be stored in a matrix with 2 columns.") +if(ncol(x2)!=2)stop("Argument x2 must be stored in a matrix with 2 columns.") +xy1<-elimna(cbind(x1,y1)) +xy2<-elimna(cbind(x2,y2)) +if(xout){ +if(!STAND)flag1=outfun(xy1[,1:2],plotit=FALSE,...)$keep +if(STAND)flag1=outpro(xy1[,1:2],plotit=FALSE,STAND=TRUE,...)$keep +if(!STAND)flag2=outfun(xy2[,1:2],plotit=FALSE,...)$keep +if(STAND)flag2=outpro(xy2[,1:2],plotit=FALSE,STAND=TRUE,...)$keep +xy1=xy1[flag1,] +xy2=xy2[flag2,] +} +x1=xy1[,1:2] +x2=xy2[,1:2] +y1=xy1[,3] +y2=xy2[,3] +library(scatterplot3d) +temp<-scatterplot3d(rbind(xy1,xy2),xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=tick.marks,type=type) +vals1<-regfun(x1,y1,...)$coef +vals2<-regfun(x2,y2,...)$coef +if(COLOR){ +if(pr)print("First group is blue") +temp$plane(vals1,col="blue") +temp$plane(vals2,col="red") +} +if(!COLOR){ +temp$plane(vals1) +temp$plane(vals2) +} +list(coef.group.1=vals1,coef.group.2=vals2) +} + + +regp2plot<-function(x,y,xout=FALSE,outfun=out,xlab="Var 1",ylab="Var 2",zlab="Var 3",regfun=tsreg,COLOR=FALSE,tick.marks=TRUE,...){ +# +# Create a 3D plot of points and plot regression surface. +# based on the regression estimator indicated by +# regfun +# +# Assumes that the package scatterplot3d has been installed. +# If not, use the command install.packages("scatterplot3d") +# assuming you are connected to the web. +# +# The regression method used is specified with the argument +# regfun. +# +# Package scatterplot3d is required. To install it, use the command +# install.packages("scatterplot3d") +# while connected to the web +# +x=as.matrix(x) +if(ncol(x)!=2)stop("Argument x must be stored in a matrix with 2 columns.") +xy<-elimna(cbind(x,y)) +if(xout){ +flag=outfun(xy[,1:2])$keep +xy=xy[flag,] +} +x=xy[,1:2] +y=xy[,3] +library(scatterplot3d) +temp<-scatterplot3d(xy,xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=tick.marks) +vals<-regfun(x,y,...)$coef +if(COLOR)temp$plane(vals,col="blue") +if(!COLOR)temp$plane(vals) +} + + +reg2plot<-function(x1,y1,x2,y2,regfun=tsreg,xlab="X",ylab="Y",xout=FALSE,outfun=outpro,pch1='.',pch2='+',...){ +# +# For convenience +# plot two regression lines corresponding to two groups. +# +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(xout){ +if(identical(outfun,outblp))flag=outblp(x1,y1,plotit=FALSE)$keep +else +flag<-outfun(x1,plotit=FALSE,...)$keep +x1=x1[flag] +y1=y1[flag] +if(identical(outfun,outblp))flag=outblp(x2,y2,plotit=FALSE)$keep +else +flag<-outfun(x2,plotit=FALSE,...)$keep +x2=x2[flag] +y2=y2[flag] +} +plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab) +points(x1,y1,pch=pch1) +points(x2,y2,pch=pch2) +abline(regfun(x1,y1,...)$coef) +abline(regfun(x2,y2,...)$coef,lty=2) +} + +ghdist<-function(n,g=0,h=0){ +# +# generate n observations from a g-and-h dist. +# +x<-rnorm(n) +if (g>0){ +ghdist<-(exp(g*x)-1)*exp(h*x^2/2)/g +} +if(g==0)ghdist<-x*exp(h*x^2/2) +ghdist +} + + +wincor<-function(x,y=NULL,tr=.2){ +# Compute the Winsorized correlation between x and y. +# +# tr is the amount of Winsorization +# This function also returns the Winsorized covariance +# +# Pairwise deletion of missing values is performed. +# +# x is a vector, or it can be a matrix with two columns when y=NULL +# + +if(!is.null(y[1])){ +m=cbind(x,y) +} +else m=x +m<-elimna(m) +nval=nrow(m) +if(ncol(m)==2){ +a=wincor.sub(m[,1],m[,2],tr=tr) +wcor=a$cor +wcov=a$cov +sig=a$p.value +} +if(ncol(m)>2){ +#if(is.data.frame(m))m=as.matrix(m) +if(!is.matrix(m))stop("The data must be stored in a n by p matrix") +wcor<-matrix(1,ncol(m),ncol(m)) +wcov<-matrix(0,ncol(m),ncol(m)) +siglevel<-matrix(NA,ncol(m),ncol(m)) +for (i in 1:ncol(m)){ +ip<-i +for (j in ip:ncol(m)){ +val<-wincor.sub(m[,i],m[,j],tr) +wcor[i,j]<-val$cor +wcor[j,i]<-wcor[i,j] +if(i==j)wcor[i,j]<-1 +wcov[i,j]<-val$cov +wcov[j,i]<-wcov[i,j] +if(i!=j){ +siglevel[i,j]<-val$p.value +siglevel[j,i]<-siglevel[i,j] +} +}} +sig=siglevel +} +list(n=nval,cor=wcor,cov=wcov,p.value=sig) +} + +wincor.sub<-function(x,y,tr=tr){ +sig<-NA +g<-floor(tr*length(x)) +xvec<-winval(x,tr) +yvec<-winval(y,tr) +wcor<-cor(xvec,yvec) +wcov<-var(xvec,yvec) +if(sum(x==y)!=length(x)){ +test<-wcor*sqrt((length(x)-2)/(1.-wcor^2)) +sig<-2*(1-pt(abs(test),length(x)-2*g-2)) +} +list(cor=wcor,cov=wcov,p.value=sig) +} + +bivar<-function(x){ +# compute biweight midvariance of x +m<-median(x) +u<-abs((x-m)/(9*qnorm(.75)*mad(x))) +av<-ifelse(u<1,1,0) +top<-length(x)*sum(av*(x-m)^2*(1-u^2)^4) +bot<-sum(av*(1-u^2)*(1-5*u^2)) +bi<-top/bot^2 +bi +} + +mjse<-function(x,q=.5,na.rm=FALSE){ +# +# Compute the Maritz-Jarrett estimate of the standard error of +# X sub m, m=[qn+.5] +# The default value for q is .5 +# +if(na.rm)x=elimna(x) +n<-length(x) +m<-floor(q*n+.5) +vec<-seq(along=x) +w<-pbeta(vec/n,m-1,n-m)-pbeta((vec-1)/n,m-1,n-m) # W sub i values +y<-sort(x) +c1<-sum(w*y) +c2<-sum(w*y*y) +mjse<-sqrt(c2-c1^2) +mjse +} + +pbvar<-function(x,beta=.2){ +# Compute the percentage bend midvariance +# +# beta is the bending constant for omega sub N. +# +pbvar=0 +x=elimna(x) +w<-abs(x-median(x)) +w<-sort(w) +m<-floor((1-beta)*length(x)+.5) +omega<-w[m] +if(omega>0){ +y<-(x-median(x))/omega +z<-ifelse(y>1,1,y) +z<-ifelse(z<(-1),-1,z) +pbvar<-length(x)*omega^2*sum(z^2)/(length(x[abs(y)<1]))^2 +} +pbvar +} + +win<-function(x,tr=.2){ +# +# Compute the gamma Winsorized mean for the data in the vector x. +# +# tr is the amount of Winsorization +# +y<-sort(x) +n<-length(x) +ibot<-floor(tr*n)+1 +itop<-length(x)-ibot+1 +xbot<-y[ibot] +xtop<-y[itop] +y<-ifelse(y<=xbot,xbot,y) +y<-ifelse(y>=xtop,xtop,y) +win<-mean(y) +win +} + +hd<-function(x,q=.5,na.rm=TRUE,STAND=NULL,tr=FALSE){ +# +# Compute the Harrell-Davis estimate of the qth quantile +# +# The vector x contains the data, +# and the desired quantile is q +# The default value for q is .5. +# +if(tr)e=thd(x,q=q) +else{ +if(na.rm)x=elimna(x) +n<-length(x) +m1<-(n+1)*q +m2<-(n+1)*(1-q) +vec<-seq(along=x) +w<-pbeta(vec/n,m1,m2)-pbeta((vec-1)/n,m1,m2) # W sub i values +y<-sort(x) +e<-sum(w*y) +} +e +} + +mestse<-function(x,bend=1.28,op=2){ +# +# Estimate the standard error of M-estimator using Huber's Psi +# using estimate of influence function +# +n<-length(x) +mestse<-sqrt(sum((ifmest(x,bend,op=2)^2))/(n*(n-1))) +mestse +} + +omega<-function(x,beta=.1){ +# Compute the estimate of the measure omega as described in +# chapter 3. +# The default value is beta=.1 because this function is used to +# compute the percentage bend midvariance. +# +y<-abs(x-median(x)) +y<-sort(y) +m<-floor((1-beta)*length(x)+.5) +omega<-y[m]/qnorm(1-beta/2) # omega is rescaled to equal sigma +# under normality +omega +} + +qse<-function(x,q=.5,op=3){ +# +# Compute the standard error of qth sample quantile estimator +# based on the single order statistic, x sub ([qn+.5]) (See Ch 3) +# +# Store the data in vector +# x, and the desired quantile in q +# The default value for q is .5 +# +# op=1 Use Rosenblatt's shifted histogram +# op=2 Use expected frequency curve +# op=3 Use adaptive kernel density estimator +# +y <- sort(x) +n <- length(x) +iq <- floor(q * n + 0.5) +qest <- y[iq] +fhat<-NA +if(op==1)fhat<-kerden(x,q) +if(op==2)fhat<-rdplot(x,pts=qest,pyhat=TRUE,plotit=FALSE) +if(op==3)fhat<-akerd(x,pts=qest,pyhat=TRUE,plotit=FALSE) +if(is.na(fhat[1]))stop("Something wrong, op should be 1 or 2 or 3") +qse<-1/(2*sqrt(length(x))*fhat) +qse +} + +winval<-function(x,tr=.2){ +# +# Winsorize the data in the vector x. +# tr is the amount of Winsorization which defaults to .2. +# +# This function is used by several other functions that come with this book. +# +y<-sort(x) +n<-length(x) +ibot<-floor(tr*n)+1 +itop<-length(x)-ibot+1 +xbot<-y[ibot] +xtop<-y[itop] +winval<-ifelse(x<=xbot,xbot,x) +winval<-ifelse(winval>=xtop,xtop,winval) +winval +} + +hdseb<-function(x,q=.5,nboot=100,SEED=TRUE){ +# +# Compute bootstrap estimate of the standard error of the +# Harrell-Davis estimator of the qth quantile. +# The default quantile is the median, q=.5 +# The default number of bootstrap samples is nboot=100 +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,hd,q) +hdseb<-sqrt(var(bvec)) +hdseb +} + +mestseb<-function(x,nboot=1000,bend=1.28,SEED=TRUE){ +# +# Compute bootstrap estimate of the standard error of the +# M-estimators with Huber's Psi. +# The default percentage bend is bend=1.28 +# The default number of bootstrap samples is nboot=100 +# +if(SEED)set.seed(1) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,mest,bend=bend) +mestseb<-sqrt(var(bvec)) +mestseb +} + +onestep<-function(x,bend=1.28,na.rm=FALSE,MED=TRUE){ +# +# Compute one-step M-estimator of location using Huber's Psi. +# The default bending constant is 1.28 +# +# MED=TRUE: initial estimate is the median +# Otherwise use modified one-step M-estimator +# +if(na.rm)x<-x[!is.na(x)] +if(MED)init.loc=median(x) +if(!MED)init.loc=mom(x,bend=bend) +y<-(x-init.loc)/mad(x) #mad in splus is madn in the book. +A<-sum(hpsi(y,bend)) +B<-length(x[abs(y)<=bend]) +onestep<-median(x)+mad(x)*A/B +onestep +} + + +trimse<-function(x,tr=.2,na.rm=FALSE){ +# +# Estimate the standard error of the gamma trimmed mean +# The default amount of trimming is tr=.2. +# +if(na.rm)x<-x[!is.na(x)] +trimse<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x))) +trimse +} + +winvar<-function(x,tr=.2,na.rm=FALSE,STAND=NULL){ +# +# Compute the gamma Winsorized variance for the data in the vector x. +# tr is the amount of Winsorization which defaults to .2. +# +remx=x +x<-x[!is.na(x)] +y<-sort(x) +n<-length(x) +ibot<-floor(tr*n)+1 +itop<-length(x)-ibot+1 +xbot<-y[ibot] +xtop<-y[itop] +y<-ifelse(y<=xbot,xbot,y) +y<-ifelse(y>=xtop,xtop,y) +wv<-var(y) +if(!na.rm)if(sum(is.na(remx)>0))wv=NA +wv +} + +mest<-function(x,bend=1.28,na.rm=FALSE){ +# +# Compute M-estimator of location using Huber's Psi. +# The default bending constant is 1.28 +# +if(na.rm)x<-x[!is.na(x)] +if(mad(x)==0)stop("MAD=0. The M-estimator cannot be computed.") +y<-(x-median(x))/mad(x) #mad in splus is madn in the book. +A<-sum(hpsi(y,bend)) +B<-length(x[abs(y)<=bend]) +mest<-median(x)+mad(x)*A/B +repeat{ +y<-(x-mest)/mad(x) +A<-sum(hpsi(y,bend)) +B<-length(x[abs(y)<=bend]) +newmest<-mest+mad(x)*A/B +if(abs(newmest-mest) <.0001)break +mest<-newmest +} +mest +} + + +hpsi<-function(x,bend=1.28){ +# +# Evaluate Huber`s Psi function for each value in the vector x +# The bending constant defaults to 1.28. +# +hpsi<-ifelse(abs(x)<=bend,x,bend*sign(x)) +hpsi +} + +hdci<-function(x,q=.5,alpha=.05,nboot=100,SEED=TRUE,pr=TRUE){ +# +# Compute a 1-alpha confidence for qth quantile using the +# Harrell-Davis estimator in conjunction with the +# bootstrap estimate of the standard error. +# +# The default quantile is .5. +# The default value for alpha is .05. +# +if(alpha!=.05)stop("Use the function qcipb. Generally works well even when alpha is not equal to .05") +x=elimna(x) +if(pr){ +if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb") +} +se<-hdseb(x,q,nboot,SEED=SEED) +crit<-.5064/(length(x)^(.25))+1.96 +if(q<=.2 || q>=.8){ +if(length(x) <=20)crit<-(-6.23)/length(x)+5.01 +} +if(q<=.1 || q>=.9){ +if(length(x) <=40)crit<-36.2/length(x)+1.31 +} +if(length(x)<=10){ +print("The number of observations is less than 11.") +print("Accurate critical values have not been determined for this case.") +} +low<-hd(x,q)-crit*se +hi<-hd(x,q)+crit*se +list(ci=c(low,hi),crit=crit,se=se) +} + +mestci<-function(x,alpha=.05,nboot=4000,bend=1.28,os=FALSE,pr=TRUE){ +# +# Compute a bootstrap, .95 confidence interval for the +# M-estimator of location based on Huber's Psi. +# The default percentage bend is bend=1.28 +# The default number of bootstrap samples is nboot=4000 +# +# By default, the fully iterated M-estimator is used. To use the +# one-step M-estimator instead, set os=TRUE +# +os<-as.logical(os) +if(pr){ +if(length(x) <=19) +print("The number of observations is less than 20.") +print("This function might fail due to division by zero,") +print("which in turn causes an error in function hpsi") +print("having to do with a missing value.") +} +set.seed(1) # set seed of random number generator so that +# results can be duplicated. +if(pr)print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +if(!os)bvec<-apply(data,1,mest,bend) +if(os)bvec<-apply(data,1,onestep,bend) +bvec<-sort(bvec) +low<-round((alpha/2)*nboot) +up<-nboot-low +low<-low+1 +list(ci=c(bvec[low],bvec[up])) +} + + +sint<-function(x,alpha=.05,pr=FALSE){ +# +# Compute a 1-alpha confidence interval for the median using +# the Hettmansperger-Sheather interpolation method. +# +# The default value for alpha is .05. +# +x=elimna(x) +if(pr){ +if(sum(duplicated(x)>0))print("Duplicate values detected; hdpb might have more power") +} +k<-qbinom(alpha/2,length(x),.5) +gk<-pbinom(length(x)-k,length(x),.5)-pbinom(k-1,length(x),.5) +if(gk >= 1-alpha){ +gkp1<-pbinom(length(x)-k-1,length(x),.5)-pbinom(k,length(x),.5) +kp<-k+1 +} +if(gk < 1-alpha){ +k<-k-1 +gk<-pbinom(length(x)-k,length(x),.5)-pbinom(k-1,length(x),.5) +gkp1<-pbinom(length(x)-k-1,length(x),.5)-pbinom(k,length(x),.5) +kp<-k+1 +} +xsort<-sort(x) +nmk<-length(x)-k +nmkp<-nmk+1 +ival<-(gk-1+alpha)/(gk-gkp1) +lam<-((length(x)-k)*ival)/(k+(length(x)-2*k)*ival) +low<-lam*xsort[kp]+(1-lam)*xsort[k] +hi<-lam*xsort[nmk]+(1-lam)*xsort[nmkp] +sint<-c(low,hi) +sint +} + + + +b2ci<-function(x,y,alpha=.05,nboot=2000,est=bivar,SEED=TRUE,...){ +# +# Compute a bootstrap confidence interval for the +# the difference between any two parameters corresponding to +# independent groups. +# By default, biweight midvariances are compared. +# Setting est=mean, for example, will result in a percentile +# bootstrap confidence interval for the difference between means. +# The default number of bootstrap samples is nboot=399 +# +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +e1=est(x) +e2=est(y) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvecx<-apply(datax,1,est,...) +bvecy<-apply(datay,1,est,...) +bvec<-sort(bvecx-bvecy) +low <- round((alpha/2) * nboot) + 1 +up <- nboot - low +temp <- sum(bvec < 0)/nboot + sum(bvec == 0)/(2 * nboot) +sig.level <- 2 * (min(temp, 1 - temp)) +list(est1=e1,est2=e2,ratio=e1/e2,ci = c(bvec[low], bvec[up]), p.value = sig.level) +} + +ecdf<-function(x,val){ +# compute empirical cdf for data in x evaluated at val +# That is, estimate P(X <= val) +# +ecdf<-length(x[x<=val])/length(x) +ecdf +} + +kswsig<-function(m,n,val){ +# +# Compute significance level of the weighted +# Kolmogorov-Smirnov test statistic +# +# m=sample size of first group +# n=sample size of second group +# val=observed value of test statistic +# +mpn<-m+n +cmat<-matrix(0,m+1,n+1) +umat<-matrix(0,m+1,n+1) +for (i in 1:m-1){ +for (j in 1:n-1)cmat[i+1,j+1]<-abs(i/m-j/n)*sqrt(m*n/((i+j)*(1-(i+j)/mpn))) +} +cmat<-ifelse(cmat<=val,1,0) +for (i in 0:m){ +for (j in 0:n)if(i*j==0)umat[i+1,j+1]<-cmat[i+1,j+1] +else umat[i+1,j+1]<-cmat[i+1,j+1]*(umat[i+1,j]+umat[i,j+1]) +} +term<-lgamma(m+n+1)-lgamma(m+1)-lgamma(n+1) +kswsig<-1.-umat[m+1,n+1]/exp(term) +kswsig +} + + +binomci<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05){ +# Compute a 1-alpha confidence interval for p, the probability of +# success for a binomial distribution, using Pratt's method +# +# y is a vector of 1s and 0s. +# x is the number of successes observed among n trials +# +if(!is.null(y)){ +y=elimna(y) +nn=length(y) +} +if(nn==1)stop("Something is wrong: number of observations is only 1") +n<-nn +if(x!=n && x!=0){ +z<-qnorm(1-alpha/2) +A<-((x+1)/(n-x))^2 +B<-81*(x+1)*(n-x)-9*n-8 +C<-(0-3)*z*sqrt(9*(x+1)*(n-x)*(9*n+5-z^2)+n+1) +D<-81*(x+1)^2-9*(x+1)*(2+z^2)+1 +E<-1+A*((B+C)/D)^3 +upper<-1/E +A<-(x/(n-x-1))^2 +B<-81*x*(n-x-1)-9*n-8 +C<-3*z*sqrt(9*x*(n-x-1)*(9*n+5-z^2)+n+1) +D<-81*x^2-9*x*(2+z^2)+1 +E<-1+A*((B+C)/D)^3 +lower<-1/E +} +if(x==0){ +lower<-0 +upper<-1-alpha^(1/n) +} +if(x==1){ +upper<-1-(alpha/2)^(1/n) +lower<-1-(1-alpha/2)^(1/n) +} +if(x==n-1){ +lower<-(alpha/2)^(1/n) +upper<-(1-alpha/2)^(1/n) +} +if(x==n){ +lower<-alpha^(1/n) +upper<-1 +} +phat<-x/n +list(phat=phat,ci=c(lower,upper),n=n) +} + + + +kssig<-function(m,n,val){ +# +# Compute significance level of the Kolmogorov-Smirnov test statistic +# m=sample size of first group +# n=sample size of second group +# val=observed value of test statistic +# +cmat<-matrix(0,m+1,n+1) +umat<-matrix(0,m+1,n+1) +for (i in 0:m){ +for (j in 0:n)cmat[i+1,j+1]<-abs(i/m-j/n) +} +cmat<-ifelse(cmat<=val,1e0,0e0) +for (i in 0:m){ +for (j in 0:n)if(i*j==0)umat[i+1,j+1]<-cmat[i+1,j+1] +else umat[i+1,j+1]<-cmat[i+1,j+1]*(umat[i+1,j]+umat[i,j+1]) +} +term<-lgamma(m+n+1)-lgamma(m+1)-lgamma(n+1) +kssig<-1.-umat[m+1,n+1]/exp(term) +kssig=max(0,kssig) +kssig +} + +meemul<-function(x,alpha=.05){ +# +# Perform Mee's method for all pairs of J independent groups. +# The familywise type I error probability is controlled by using +# a critical value from the Studentized maximum modulus distribution. +# +# The data are assumed to be stored in $x$ in list mode. +# Length(x) is assumed to correspond to the total number of groups, J +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# The default value for alpha is .05. Any other value results in using +# alpha=.01. +# +if(!is.list(x))stop("Data must be stored in list mode.") +J<-length(x) +CC<-(J^2-J)/2 +test<-matrix(NA,CC,5) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +} +dimnames(test)<-list(NULL,c("Group","Group","phat","ci.lower","ci.upper")) +jcom<-0 +crit<-smmcrit(200,CC) +if(alpha!=.05)crit<-smmcrit01(200,CC) +alpha<-1-pnorm(crit) +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +temp<-mee(x[[j]],x[[k]],alpha) +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-k +test[jcom,3]<-temp$phat +test[jcom,4]<-temp$ci[1] +test[jcom,5]<-temp$ci[2] +}}} +list(test=test) +} + +tsub<-function(isub,x,y,tr){ +# +# Compute test statistic for trimmed means +# when comparing dependent groups. +# By default, 20% trimmed means are used. +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by ydbt +# +tsub<-yuend(x[isub],y[isub],tr=tr)$teststat +tsub +} + +deciles<-function(x,HD=TRUE,type=7){ +# +# Estimate the deciles for the data in vector x +# HD=TRUE: use the Harrell-Davis estimate of the qth quantile +# HD=FALSE:use R function quantile +# +x=elimna(x) +if(HD){ +xs<-sort(x) +n<-length(x) +vecx<-seq(along=x) +xq<-0 +for (i in 1:9){ +q<-i/10 +m1<-(n+1)*q +m2<-(n+1)*(1-q) +wx<-pbeta(vecx/n,m1,m2)-pbeta((vecx-1)/n,m1,m2) # W sub i values +xq[i]<-sum(wx*xs) +}} +if(!HD){ +pts=seq(.1,.9,.1) +xq=quantile(x,probs=pts,type=type) +} +xq +} + + +kstiesig<-function(x,y,val){ +# +# Compute significance level of the Kolmogorov-Smirnov test statistic +# for the data in x and y. +# This function allows ties among the values. +# val=observed value of test statistic +# +m<-length(x) +n<-length(y) +z<-c(x,y) +z<-sort(z) +cmat<-matrix(0,m+1,n+1) +umat<-matrix(0,m+1,n+1) +for (i in 0:m){ +for (j in 0:n){ +if(abs(i/m-j/n)<=val)cmat[i+1,j+1]<-1e0 +k<-i+j +if(k > 0 && k.25)print("Warning: with tr>.25 type I error control might be poor") +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +h1<-length(x)-2*floor(tr*length(x)) +h2<-length(y)-2*floor(tr*length(y)) +q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) +q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) +df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) +crit<-qt(1-alpha/2,df) +dif<-mean(x,tr)-mean(y,tr) +low<-dif-crit*sqrt(q1+q2) +up<-dif+crit*sqrt(q1+q2) +test<-abs(dif/sqrt(q1+q2)) +yuen<-2*(1-pt(test,df)) +list(n1=length(x),n2=length(y),est.1=mean(x,tr),est.2=mean(y,tr),ci=c(low,up),p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test,crit=crit,df=df) +} + +shifthd<-function(x,y,nboot=200,plotit=TRUE,plotop=FALSE,SEED=TRUE){ +# +# Compute confidence intervals for the difference between deciles +# of two independent groups. The simultaneous probability coverage is .95. +# The Harrell-Davis estimate of the qth quantile is used. +# The default number of bootstrap samples is nboot=200 +# +# The results are stored and returned in a 9 by 3 matrix, +# the ith row corresponding to the i/10 quantile. +# The first column is the lower end of the confidence interval. +# The second column is the upper end. +# The third column is the estimated difference between the deciles +# (second group minus first). +# +plotit<-as.logical(plotit) +x<-x[!is.na(x)] +y<-y[!is.na(y)] +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +crit<-80.1/(min(length(x),length(y)))^2+2.73 +m<-matrix(0,9,3) +for (i in 1:9){ +q<-i/10 +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,hd,q) +sex<-var(bvec) +data<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,hd,q) +sey<-var(bvec) +dif<-hd(y,q)-hd(x,q) +m[i,3]<-dif +m[i,1]<-dif-crit*sqrt(sex+sey) +m[i,2]<-dif+crit*sqrt(sex+sey) +} +dimnames(m)<-list(NULL,c("ci.lower","ci.upper","Delta.hat")) +if(plotit){ +if(plotop){ +xaxis<-c(1:9)/10 +xaxis<-c(xaxis,xaxis) +} +if(!plotop)xaxis<-c(deciles(x),deciles(x)) +par(pch="+") +yaxis<-c(m[,1],m[,2]) +if(!plotop)plot(xaxis,yaxis,ylab="delta",xlab="x (first group)") +if(plotop)plot(xaxis,yaxis,ylab="delta",xlab="Deciles") +par(pch="*") +if(!plotop)points(deciles(x),m[,3]) +if(plotop)points(c(1:9)/10,m[,3]) +} +m +} + +shiftdhd<-function(x,y,nboot=200,plotit=TRUE,plotop=FALSE,SEED=TRUE,pr=TRUE,xlab='x (first group)', +ylab='Delta'){ +# +# Compute confidence intervals for the difference between deciles +# of two dependent groups. The simultaneous probability coverage is .95. +# The Harrell-Davis estimate of the qth quantile is used. +# The default number of bootstrap samples is nboot=200 +# +# The results are stored and returned in a 9 by 4 matrix, +# the ith row corresponding to the i/10 quantile. +# The first column is the lower end of the confidence interval. +# The second column is the upper end. +# The third column is the estimated difference between the deciles +# (second group minus first). +# The fourth column contains the estimated standard error. +# +# No missing values are allowed. +# +if(pr){ +print("NOTE: if the goal is to use an alpha value different from .05") +print("use the function qcomdhd or qdec2ci") +} +xy=elimna(cbind(x,y)) +x=xy[,1] +y=xy[,2] +plotit<-as.logical(plotit) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +crit<-37/length(x)^(1.4)+2.75 +if(pr)print("The approximate .05 critical value is") +if(pr)print(crit) +m<-matrix(0,9,6) +if(pr)print("Taking Bootstrap Samples. Please wait.") +data<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot) +xmat<-matrix(x[data],nrow=nboot,ncol=length(x)) +ymat<-matrix(y[data],nrow=nboot,ncol=length(x)) +for (i in 1:9){ +q<-i/10 +bvec<-apply(xmat,1,hd,q)-apply(ymat,1,hd,q) +se<-sqrt(var(bvec)) +dif<-hd(x,q)-hd(y,q) +m[i,1]=hd(x,q) +m[i,2]=hd(y,q) +m[i,3]<-dif +m[i,4]<-dif-crit*se +m[i,5]<-dif+crit*se +m[i,6]<-se +} +dimnames(m)<-list(NULL,c('est.1','est.2','est.dif','ci.lower','ci.upper','se')) +if(plotit){ +if(plotop){ +xaxis<-c(1:9)/10 +xaxis<-c(xaxis,xaxis) +} +if(!plotop)xaxis<-c(deciles(x),deciles(x)) +par(pch="+") +#yaxis<-c(m[,1],m[,2]) +yaxis<-c(m[,4],m[,5]) +if(!plotop)plot(xaxis,yaxis,ylab=ylab,xlab=xlab) +if(plotop)plot(xaxis,yaxis,ylab="delta",xlab="Deciles") +par(pch="*") +if(!plotop)points(deciles(x),m[,3]) +if(plotop)points(c(1:9)/10,m[,3]) +} +m +} + + +smmcrit<-function(nuhat,C){ +# +# Determine the .95 quantile of the C-variate Studentized maximum +# modulus distribution using linear interpolation on inverse +# degrees of freedom +# If C=1, this function returns the .975 quantile of Student's t +# distribution. +# +if(C-round(C)!=0)stop("The number of contrasts, C, must be an integer") +if(C>=29)stop("C must be less than or equal to 28") +if(C<=0)stop("C must be greater than or equal to 1") +if(nuhat<2)stop("The degrees of freedom must be greater than or equal to 2") +if(C==1)smmcrit<-qt(.975,nuhat) +if(C>=2){ +C<-C-1 +m1<-matrix(0,20,27) +m1[1,]<-c(5.57,6.34,6.89,7.31,7.65,7.93,8.17,8.83,8.57, +8.74,8.89,9.03,9.16,9.28,9.39,9.49,9.59, 9.68, +9.77,9.85,9.92,10.00,10.07,10.13,10.20,10.26,10.32) +m1[2,]<-c(3.96,4.43,4.76,5.02,5.23,5.41,5.56,5.69,5.81, +5.92,6.01,6.10,6.18,6.26,6.33,6.39,6.45,6.51, +6.57,6.62,6.67,6.71,6.76,6.80,6.84,6.88, 6.92) +m1[3,]<-c(3.38,3.74,4.01,4.20,4.37,4.50,4.62,4.72,4.82, +4.89,4.97,5.04,5.11,5.17,5.22,5.27,5.32, 5.37, +5.41,5.45,5.49,5.52,5.56,5.59,5.63,5.66,5.69) +m1[4,]<-c(3.09,3.39,3.62,3.79,3.93,4.04,4.14,4.23,4.31, +4.38,4.45,4.51,4.56,4.61,4.66,4.70,4.74,4.78, +4.82,4.85,4.89,4.92,4.95,4.98,5.00,5.03,5.06) +m1[5,]<-c(2.92,3.19,3.39,3.54,3.66,3.77,3.86,3.94,4.01, +4.07,4.13,4.18,4.23,4.28,4.32,4.36,4.39,4.43, +4.46,4.49,4.52,4.55,4.58,4.60,4.63,4.65,4.68) +m1[6,]<-c(2.80,3.06,3.24,3.38,3.49,3.59,3.67,3.74,3.80, +3.86,3.92,3.96,4.01,4.05,4.09,4.13,4.16,4.19, +4.22,4.25,4.28,4.31,4.33,4.35,4.38,4.39,4.42) +m1[7,]<-c(2.72,2.96,3.13,3.26,3.36,3.45,3.53,3.60,3.66, +3.71,3.76,3.81,3.85,3.89,3.93,3.96,3.99, 4.02, +4.05,4.08,4.10,4.13,4.15,4.18,4.19,4.22,4.24) +m1[8,]<-c(2.66,2.89,3.05,3.17,3.27,3.36,3.43,3.49,3.55, +3.60,3.65,3.69,3.73,3.77,3.80,3.84,3.87,3.89, +3.92,3.95,3.97,3.99,4.02,4.04,4.06,4.08,4.09) +m1[9,]<-c(2.61,2.83,2.98,3.10,3.19,3.28,3.35,3.41,3.47, +3.52,3.56,3.60,3.64,3.68,3.71,3.74,3.77,3.79, +3.82,3.85,3.87,3.89,3.91,3.94,3.95, 3.97,3.99) +m1[10,]<-c(2.57,2.78,2.93,3.05,3.14,3.22,3.29,3.35,3.40, +3.45,3.49,3.53,3.57,3.60,3.63,3.66,3.69,3.72, +3.74,3.77,3.79,3.81,3.83,3.85,3.87,3.89,3.91) +m1[11,]<-c(2.54,2.75,2.89,3.01,3.09,3.17,3.24,3.29,3.35, +3.39,3.43,3.47,3.51,3.54,3.57,3.60,3.63,3.65, +3.68,3.70,3.72,3.74,3.76,3.78,3.80,3.82,3.83) +m1[12,]<-c(2.49,2.69,2.83,2.94,3.02,3.09,3.16,3.21,3.26, +3.30,3.34,3.38,3.41,3.45,3.48,3.50,3.53,3.55, +3.58,3.59,3.62,3.64,3.66,3.68,3.69,3.71,3.73) +m1[13,]<-c(2.46,2.65,2.78,2.89,2.97,3.04,3.09,3.15,3.19, +3.24,3.28,3.31,3.35,3.38,3.40,3.43,3.46,3.48, +3.50,3.52,3.54,3.56,3.58,3.59,3.61,3.63,3.64) +m1[14,]<-c(2.43,2.62,2.75,2.85,2.93,2.99,3.05,3.11,3.15, +3.19,3.23,3.26,3.29,3.32,3.35,3.38,3.40,3.42, +3.44,3.46,3.48,3.50,3.52,3.54,3.55,3.57,3.58) +m1[15,]<-c(2.41,2.59,2.72,2.82,2.89,2.96,3.02,3.07,3.11, +3.15,3.19,3.22,3.25,3.28,3.31,3.33,3.36,3.38, +3.39,3.42,3.44,3.46,3.47,3.49,3.50,3.52,3.53) +m1[16,]<-c(2.38,2.56,2.68,2.77,2.85,2.91,2.97,3.02,3.06, +3.09,3.13,3.16,3.19,3.22,3.25,3.27,3.29,3.31, +3.33,3.35,3.37,3.39,3.40,3.42,3.43,3.45,3.46) +m1[17,]<-c(2.35,2.52,2.64,2.73,2.80,2.87,2.92,2.96,3.01, +3.04,3.07,3.11,3.13,3.16,3.18,3.21,3.23,3.25, +3.27,3.29,3.30,3.32,3.33,3.35,3.36,3.37,3.39) +m1[18,]<-c(2.32,2.49,2.60,2.69,2.76,2.82,2.87,2.91,2.95, +2.99,3.02,3.05,3.08,3.09,3.12,3.14,3.17, 3.18, +3.20,3.22,3.24,3.25,3.27,3.28,3.29,3.31,3.32) +m1[19,]<-c(2.29,2.45,2.56,2.65,2.72,2.77,2.82,2.86,2.90, +2.93,2.96,2.99,3.02,3.04,3.06,3.08,3.10, 3.12, +3.14,3.16,3.17,3.19,3.20,3.21,3.23,3.24,3.25) +m1[20,]<-c(2.24,2.39,2.49,2.57,2.63,2.68,2.73,2.77,2.79, +2.83,2.86,2.88,2.91,2.93,2.95,2.97,2.98, 3.01, +3.02,3.03,3.04,3.06,3.07,3.08,3.09,3.11,3.12) +if(nuhat>=200)smmcrit<-m1[20,C] +if(nuhat<200){ +nu<-c(2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,30,40,60,200) +temp<-abs(nu-nuhat) +find<-order(temp) +if(temp[find[1]]==0)smmcrit<-m1[find[1],C] +if(temp[find[1]]!=0){ +if(nuhat>nu[find[1]]){ +smmcrit<-m1[find[1],C]- +(1/nu[find[1]]-1/nuhat)*(m1[find[1],C]-m1[find[1]+1,C])/ +(1/nu[find[1]]-1/nu[find[1]+1]) +} +if(nuhat0)J<-length(grp) +nval<-1 +nrat<-1 +nmax<-0 +rbar<-1 +mrbar<-0 +for (j in grp){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] #Missing values are removed. +nrat[j]<-(length(temp)-1)/length(temp) +nval[j]<-length(temp) +if(j==grp[1])xall<-temp +if(j!=grp[1])xall<-c(xall,temp) +if(length(temp)>nmax)nmax<-length(temp) +} +pv<-array(NA,c(J,nmax,J)) +tv<-matrix(NA,J,nmax) +rv<-matrix(0,J,nmax) +for (i in 1:J){ +data<-x[[i]] +data<-data[!is.na(data)] +for (j in 1:length(data)){ +tempr<-data[j]-xall +rv[i,j]<-length(tempr[tempr>=0]) +for (l in 1:J){ +templ<-x[[l]] +templ<-templ[!is.na(templ)] +temp<-data[j]-templ +pv[i,j,l]<-length(temp[temp>=0]) +} +tv[i,j]<-sum(pv[i,j,])-pv[i,j,i] +} +rbar[i]<-sum(rv[i,])/nval[i] +mrbar<-mrbar+sum(rv[i,]) +} +amat<-matrix(0,J,J) +for(i in 1:J){ +temptv<-tv[i,] +temptv<-temptv[!is.na(temptv)] +amat[i,i]<-(length(temptv)-1)*var(temptv) +for (l in 1:J){ +tempp<-pv[l,,i] +tempp<-tempp[!is.na(tempp)] +if(l!=i){ +amat[i,i]<-amat[i,i]+(length(tempp)-1)*var(tempp) +}} +for (j in 1:J){ +if(j>i){ +for (l in 1:J){ +temp1<-pv[l,,i] +temp2<-pv[l,,j] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +#if(i!=l && l!=j)amat[i,j]<-(length(temp1)-1)*var(temp1,temp2) +if(i!=l && l!=j)amat[i,j]<-amat[i,j]+(length(temp1)-1)*var(temp1,temp2) +} +temp1<-pv[i,,j] +temp2<-tv[i,] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +amat[i,j]<-amat[i,j]-(length(temp1)-1)*var(temp1,temp2) +temp1<-pv[j,,i] +temp2<-tv[j,] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +amat[i,j]<-amat[i,j]-(length(temp1)-1)*var(temp1,temp2) +} +amat[j,i]<-amat[i,j] +}} +N<-sum(nval) +amat<-amat/N^3 +amati<-ginv(amat) +uvec<-1 +mrbar<-mrbar/N +for (i in 1:J)uvec[i]<-nval[i]*(rbar[i]-mrbar)/(N*(N+1)) +testv<-N*prod(nrat)*uvec%*%amati%*%uvec +test<-testv[1,1] +df<-J-1 +siglevel<-1-pchisq(test,df) +list(test=test,p.value=siglevel,df=df) +} + +apanova<-function(data,grp=0){ +# +# Perform Agresti-Pendergast rank test for J dependent groups +# The data are assumed to be stored in an n by J matrix or +# in list mode. In the latter case, length(data)=J. +# +if(is.list(data)){ +x<-matrix(0,length(data[[1]]),length(data)) +for (j in 1:length(data))x[,j]<-data[[j]] +} +if(is.matrix(data))x<-data +if(sum(grp==0))grp<-c(1:ncol(x)) +x<-x[,grp] +J<-ncol(x) +n<-nrow(x) +if(n<=20)print("With n<=20, suggest using bprm") +rm<-matrix(rank(x),n,J) +rv<-apply(rm,2,mean) +sm<-(n-1)*winall(rm,tr=0)$cov/(n-J+1) +jm1<-J-1 +cv<-diag(1,jm1,J) +for (i in 2:J){ +k<-i-1 +cv[k,i]<--1 +} +cr<-cv%*%rv +ftest<-n*t(cr)%*%solve(cv%*%sm%*%t(cv))%*%cr/(J-1) +df1<-J-1 +df2<-(J-1)*(n-1) +siglevel<-1-pf(ftest,df1,df2) +list(FTEST=ftest,df1=df1,df2=df2,p.value=siglevel) +} +box1way<-function(x,tr=.2,grp=c(1:length(x))){ +# +# A heteroscedastic one-way ANOVA for trimmed means +# using a generalization of Box's method. +# +# The data are assumed to be stored in $x$ in list mode. +# Length(x) is assumed to correspond to the total number of groups. +# By default, the null hypothesis is that all groups have a common mean. +# To compare a subset of the groups, use grp to indicate which +# groups are to be compared. For example, if you type the +# command grp<-c(1,3,4), and then execute this function, groups +# 1, 3, and 4 will be compared with the remaining groups ignored. +# +# Missing values are automatically removed. +# +J<-length(grp) # The number of groups to be compared +print("The number of groups to be compared is") +print(J) +h<-vector("numeric",J) +w<-vector("numeric",J) +xbar<-vector("numeric",J) +svec<-vector("numeric",J) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) + # h is the number of observations in the jth group after trimming. +svec[j]<-((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr))/(h[j]-1) +xbar[j]<-mean(x[[grp[j]]],tr) +} +xtil<-sum(h*xbar)/sum(h) +fval<-h/sum(h) +TEST<-sum(h*(xbar-xtil)^2)/sum((1-fval)*svec) +nu1<-sum((1-fval)*svec) +nu1<-nu1^2/((sum(svec*fval))^2+sum(svec^2*(1-2*fval))) +nu2<-(sum((1-fval)*svec))^2/sum(svec^2*(1-fval)^2/(h-1)) +sig<-1-pf(TEST,nu1,nu2) +list(TEST=TEST,nu1=nu1,nu2=nu2,p.value=sig) +} + + + +pairdepb<-function(x,tr=.2,alpha=.05,grp=0,nboot=599){ +# +# Using the percentile t bootstrap method, +# compute a .95 confidence interval for all pairwise differences between +# the trimmed means of dependent groups. +# By default, 20% trimming is used with B=599 bootstrap samples. +# +# x can be an n by J matrix or it can have list mode +# +if(is.data.frame(x)) x <- as.matrix(x) +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +if(sum(grp)==0)grp<-c(1:length(x)) +# put the data in an n by J matrix +mat<-matrix(0,length(x[[1]]),length(grp)) +for (j in 1:length(grp))mat[,j]<-x[[grp[j]]] +} +if(is.matrix(x)){ +if(sum(grp)==0)grp<-c(1:ncol(x)) +mat<-x[,grp] +} +if(sum(is.na(mat)>=1))stop("Missing values are not allowed.") +J<-ncol(mat) +connum<-(J^2-J)/2 +bvec<-matrix(0,connum,nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot) +xcen<-matrix(0,nrow(mat),ncol(mat)) +for (j in 1:J)xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data +it<-0 +for (j in 1:J){ +for (k in 1:J){ +if(j=2)kron<-rbind(kron,m3) +} +kron +} + +rmanova<-function(x,tr=.2,grp=c(1:length(x))){ +# +# A heteroscedastic one-way repeated measures ANOVA for trimmed means. +# +# The data are assumed to be stored in $x$ which can +# be either an n by J matrix, or an R variable having list mode. +# If the data are stored in list mode, +# length(x) is assumed to correspond to the total number of groups. +# By default, the null hypothesis is that all group have a common mean. +# To compare a subset of the groups, use grp to indicate which +# groups are to be compared. For example, if you type the +# command grp<-c(1,3,4), and then execute this function, groups +# 1, 3, and 4 will be compared with the remaining groups ignored. +# +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +J<-length(grp) # The number of groups to be compared +#print("The number of groups to be compared is") +#print(J) +m1<-matrix(x[[grp[1]]],length(x[[grp[1]]]),1) +for(i in 2:J){ # Put the data into an n by J matrix +m2<-matrix(x[[grp[i]]],length(x[[i]]),1) +m1<-cbind(m1,m2) +} +} +if(is.matrix(x)){ +if(length(grp)=ncol(x))m1<-as.matrix(x) +J<-ncol(x) +#print("The number of groups to be compared is") +#print(J) +} +# +# Raw data are now in the matrix m1 +# +m2<-matrix(0,nrow(m1),ncol(m1)) +xvec<-1 +g<-floor(tr*nrow(m1)) #2g is the number of observations trimmed. +for(j in 1:ncol(m1)){ # Putting Winsorized values in m2 +m2[,j]<-winval(m1[,j],tr) +xvec[j]<-mean(m1[,j],tr) +} +xbar<-mean(xvec) +qc<-(nrow(m1)-2*g)*sum((xvec-xbar)^2) +m3<-matrix(0,nrow(m1),ncol(m1)) +m3<-sweep(m2,1,apply(m2,1,mean)) # Sweep out rows +m3<-sweep(m3,2,apply(m2,2,mean)) # Sweep out columns +m3<-m3+mean(m2) # Grand Winsorized mean swept in +qe<-sum(m3^2) +test<-(qc/(qe/(nrow(m1)-2*g-1))) +# +# Next, estimate the adjusted degrees of freedom +# +v<-winall(m1,tr=tr)$cov +vbar<-mean(v) +vbard<-mean(diag(v)) +vbarj<-1 +for(j in 1:J){ +vbarj[j]<-mean(v[j,]) +} +A<-J*J*(vbard-vbar)^2/(J-1) +B<-sum(v*v)-2*J*sum(vbarj^2)+J*J*vbar^2 +ehat<-A/B +etil<-(nrow(m2)*(J-1)*ehat-2)/((J-1)*(nrow(m2)-1-(J-1)*ehat)) +etil<-min(1.,etil) +df1<-(J-1)*etil +df2<-(J-1)*etil*(nrow(m2)-2*g-1) +siglevel<-1-pf(test,df1,df2) +list(num.groups=J,test=test,df=c(df1,df2),p.value=siglevel,tmeans=xvec,ehat=ehat,etil=etil) +} + + + +trimpartt<-function(x,con){ +# +# This function is used by other functions described in chapter 6. +# +trimpartt<-sum(con*x) +trimpartt +} + +bptdmean<-function(isub,x,tr){ +# +# Compute trimmed means +# when comparing dependent groups. +# By default, 20% trimmed means are used. +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by bptd. +# +bptdmean<-mean(x[isub],tr) +bptdmean +} + + +bptdpsi<-function(x,con){ +# Used by bptd to compute bootstrap psihat values +# +bptdpsi<-sum(con*x) +bptdpsi +} +bptdsub<-function(isub,x,tr,con){ +# +# Compute test statistic for trimmed means +# when comparing dependent groups. +# By default, 20% trimmed means are used. +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# con is a J by c matrix. The cth column contains +# a vector of contrast coefficients. +# +# This function is used by bptd. +# +h1 <- nrow(x) - 2 * floor(tr * nrow(x)) +se<-0 +for(j in 1:ncol(x)){ +for(k in 1:ncol(x)){ +djk<-(nrow(x) - 1) * wincor(x[isub,j],x[isub,k], tr)$cov +se<-se+con[j]*con[k]*djk +} +} +se/(h1*(h1-1)) +} + +selby2<-function(m,grpc,coln=NA){ +# Create categories according to the grpc[1] and grpc[2] columns +# of the matrix m. The function puts the values in column coln into +# a vector having list mode. +# +if(is.na(coln))stop("The argument coln is not specified") +if(length(grpc)>4)stop("The argument grpc must have length less than or equal to 4") +x<-vector("list") +ic<-0 +if(length(grpc)==2){ +cat1<-selby(m,grpc[1],coln)$grpn +cat2<-selby(m,grpc[2],coln)$grpn +for (i1 in 1:length(cat1)){ +for (i2 in 1:length(cat2)){ +temp<-NA +it<-0 +for (i in 1:nrow(m)){ +if(sum(m[i,c(grpc[1],grpc[2])]==c(cat1[i1],cat2[i2]))==2){ +it<-it+1 +temp[it]<-m[i,coln] +} +} +if(!is.na(temp[1])){ +ic<-ic+1 +x[[ic]]<-temp +if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2]),1,2) +if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2])) +} +}} +} +if(length(grpc)==3){ +cat1<-selby(m,grpc[1],coln)$grpn +cat2<-selby(m,grpc[2],coln)$grpn +cat3<-selby(m,grpc[3],coln)$grpn +x<-vector("list") +ic<-0 +for (i1 in 1:length(cat1)){ +for (i2 in 1:length(cat2)){ +for (i3 in 1:length(cat3)){ +temp<-NA +it<-0 +for (i in 1:nrow(m)){ +if(sum(m[i,c(grpc[1],grpc[2],grpc[3])]==c(cat1[i1],cat2[i2],cat3[i3]))==3){ +it<-it+1 +temp[it]<-m[i,coln] +}} +if(!is.na(temp[1])){ +ic<-ic+1 +x[[ic]]<-temp +if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3]),1,3) +if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3])) +}}}} +} +if(length(grpc)==4){ +cat1<-selby(m,grpc[1],coln)$grpn +cat2<-selby(m,grpc[2],coln)$grpn +cat3<-selby(m,grpc[3],coln)$grpn +cat4<-selby(m,grpc[4],coln)$grpn +x<-vector("list") +ic<-0 +for (i1 in 1:length(cat1)){ +for (i2 in 1:length(cat2)){ +for (i3 in 1:length(cat3)){ +for (i4 in 1:length(cat4)){ +temp<-NA +it<-0 +for (i in 1:nrow(m)){ +if(sum(m[i,c(grpc[1],grpc[2],grpc[3],grpc[4])]==c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]))==4){ +it<-it+1 +temp[it]<-m[i,coln] +}} +if(!is.na(temp[1])){ +ic<-ic+1 +x[[ic]]<-temp +if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]),1,4) +if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3],cat4[i4])) +}}}}} +} +list(x=x,grpn=grpn) +} + + +lindmsub<-function(isub,x,est,...){ +# +# isub is a vector of length n containing integers between +# randomly sampled with replacement from 1,...,n. +# +# Used by lindm to convert an n by B matrix of bootstrap values, +# randomly sampled from 1, ..., n, with replacement, to a +# J by B matrix of measures of location. +# +# +lindmsub<-est(x[isub],...) +lindmsub +} +lindm<-function(x,con=0,est=onestep,grp=0,alpha=.05,nboot=999,...){ +# +# Compute a 1-alpha confidence interval for a set of d linear contrasts +# involving M-estimators associated with the marginal distributions +# using a bootstrap method. +# Dependent groups are assumed. +# +# The data are assumed to be stored in x in list mode. Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# +# con is a J by d matrix containing the contrast coefficents of interest. +# If unspecified, all pairwise comparisons are performed. +# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first two trimmed means is +# equal to the sum of the second two, and (2) the difference between +# the first two is equal to the difference between the trimmed means of +# groups 5 and 6. +# +# The default number of bootstrap samples is nboot=399 +# +# This function uses the function trimpartt written for this +# book. +# +# +# +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +if(sum(grp)==0)grp<-c(1:length(x)) +# put the data in an n by J matrix +mat<-matrix(0,length(x[[1]]),length(grp)) +for (j in 1:length(grp))mat[,j]<-x[[grp[j]]] +} +if(is.matrix(x)){ +if(sum(grp)==0)grp<-c(1:ncol(x)) +mat<-x[,grp] +} +mat<-elimna(mat) +J<-ncol(mat) +Jm<-J-1 +d<-(J^2-J)/2 +if(sum(con^2)==0){ +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +if(nrow(con)!=ncol(mat))stop("The number of groups does not match the number of contrast coefficients.") +m1<-matrix(0,J,nboot) +m2<-1 # Initialize m2 +mval<-1 +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot) +# data is B by n matrix +xcen<-matrix(0,nrow(mat),ncol(mat)) #An n by J matrix +for (j in 1:J){xcen[,j]<-mat[,j]-est(mat[,j],...) #Center data +mval[j]<-est(mat[,j],...) +} +for (j in 1:J)m1[j,]<-apply(data,1,lindmsub,xcen[,j],est,...) # A J by nboot matrix. +m2<-var(t(m1)) # A J by J covariance matrix corresponding to the nboot values. +boot<-matrix(0,ncol(con),nboot) +bot<-1 +for (d in 1:ncol(con)){ +top<-apply(m1,2,trimpartt,con[,d]) +# A vector of length nboot containing psi hat values +consq<-con[,d]^2 +bot[d]<-trimpartt(diag(m2),consq) +for (j1 in 1:J){ +for (j2 in 1:J){ +if(j1=29)stop("C must be less than or equal to 28") +if(C<=0)stop("C must be greater than or equal to 1") +if(nuhat<2)stop("The degrees of freedom must be greater than or equal to 2") +if(C==1)smmcrit01<-qt(.995,nuhat) +if(C>=2){ +C<-C-1 +m1<-matrix(0,20,27) +m1[1,]<-c(12.73,14.44,15.65,16.59,17.35,17.99,18.53,19.01,19.43, +19.81,20.15,20.46,20.75,20.99,20.99,20.99,20.99,20.99, +22.11,22.29,22.46,22.63,22.78,22.93,23.08,23.21,23.35) +m1[2,]<-c(7.13,7.91,8.48,8.92,9.28,9.58,9.84,10.06,10.27, +10.45,10.61,10.76,10.90,11.03,11.15,11.26,11.37,11.47, +11.56,11.65,11.74,11.82,11.89,11.97,12.07,12.11,12.17) +m1[3,]<-c(5.46,5.99,6.36,6.66,6.89,7.09,7.27,7.43,7.57, +7.69,7.80,7.91,8.01,8.09,8.17,8.25,8.32,8.39, +8.45,8.51,8.57,8.63,8.68,8.73,8.78,8.83,8.87) +m1[4,]<-c(4.70,5.11,5.39,5.63,5.81,5.97,6.11,6.23,6.33, +6.43,6.52,6.59,6.67,6.74,6.81,6.87,6.93,6.98, +7.03,7.08,7.13,7.17,7.21,7.25,7.29,7.33,7.36) +m1[5,]<-c(4.27,4.61,4.85,5.05,5.20,5.33,5.45,5.55,5.64, +5.72,5.79,5.86,5.93,5.99,6.04,6.09,6.14,6.18, +6.23,6.27,6.31,6.34,6.38,6.41,6.45,6.48,6.51) +m1[6,]<-c(3.99,4.29,4.51,4.68,4.81,4.93,5.03,5.12,5.19, +5.27,5.33,5.39,5.45,5.50,5.55,5.59,5.64,5.68, +5.72,5.75,5.79,5.82,5.85,5.88,5.91,5.94,5.96) +m1[7,]<-c(3.81,4.08,4.27,4.42,4.55,4.65,4.74,4.82,4.89, +4.96,5.02,5.07,5.12,5.17,5.21,5.25,5.29, 5.33, +5.36,5.39,5.43,5.45,5.48,5.51,5.54,5.56,5.59) +m1[8,]<-c(3.67,3.92,4.10,4.24,4.35,4.45,4.53,4.61,4.67, +4.73,4.79,4.84,4.88,4.92,4.96,5.01,5.04,5.07, +5.10,5.13,5.16,5.19,5.21,5.24,5.26,5.29,5.31) +m1[9,]<-c(3.57,3.80,3.97,4.09,4.20,4.29,4.37,4.44,4.50, +4.56,4.61,4.66,4.69,4.74,4.78,4.81,4.84,4.88, +4.91,4.93,4.96,4.99,5.01,5.03,5.06,5.08,5.09) +m1[10,]<-c(3.48,3.71,3.87,3.99,4.09,4.17,4.25,4.31,4.37, +4.42,4.47,4.51,4.55,4.59,4.63,4.66,4.69,4.72, +4.75,4.78,4.80,4.83,4.85,4.87,4.89,4.91,4.93) +m1[11,]<-c(3.42,3.63,3.78,3.89,.99,4.08,4.15,4.21,4.26, +4.31,4.36,4.40,4.44,4.48,4.51,4.54,4.57,4.59, +4.62,4.65,4.67,4.69,4.72,4.74,4.76,4.78,4.79) +m1[12,]<-c(3.32,3.52,3.66,3.77,3.85,3.93,3.99,.05,4.10, +4.15,4.19,4.23,4.26,4.29,4.33,4.36,4.39,4.41, +4.44,4.46,4.48,4.50,4.52,4.54,4.56,4.58,4.59) +m1[13,]<-c(3.25,3.43,3.57,3.67,3.75,3.82,3.88,3.94,3.99, +4.03,4.07,4.11,4.14,4.17,4.19,4.23,4.25,4.28, +4.29,4.32,4.34,4.36,4.38,4.39,4.42,4.43,4.45) +m1[14,]<-c(3.19,3.37,3.49,3.59,3.68,3.74,3.80,3.85,3.89, +3.94,3.98,4.01,4.04,4.07,4.10,4.13,4.15,4.18, +4.19,4.22,4.24,4.26,4.28,4.29,4.31,4.33,4.34) +m1[15,]<-c(3.15,3.32,3.45,3.54,3.62,3.68,3.74,3.79,3.83, +3.87,3.91,3.94,3.97,3.99,4.03,4.05,4.07,4.09, +4.12,4.14,4.16,4.17,4.19,4.21,4.22,4.24,4.25) +m1[16,]<-c(3.09,3.25,3.37,3.46,3.53,3.59,3.64,3.69,3.73, +3.77,3.80,3.83,3.86,3.89,3.91,3.94,3.96,3.98, +4.00,4.02,4.04,4.05,4.07,4.09,4.10,4.12,4.13) +m1[17,]<-c(3.03,3.18,3.29,3.38,3.45,3.50,3.55,3.59,3.64, +3.67,3.70,3.73,3.76,3.78,3.81,3.83,3.85,3.87, +3.89,3.91,3.92,3.94,3.95,3.97,3.98,4.00,4.01) +m1[18,]<-c(2.97,3.12,3.22,3.30,3.37,3.42,3.47,3.51,3.55, +3.58,3.61,3.64,3.66,3.68,3.71,3.73,3.75,3.76, +3.78,3.80,3.81,3.83,3.84,3.85,3.87,3.88,3.89) +m1[19,]<-c(2.91,3.06,3.15,3.23,3.29,3.34,3.38,3.42,3.46, +3.49,3.51,3.54,3.56,3.59,3.61,3.63,3.64,3.66, +3.68,3.69,3.71,3.72,3.73,3.75,3.76,3.77,3.78) +m1[20,]<-c(2.81,2.93,3.02,3.09,3.14,3.19,3.23,3.26,3.29, +3.32,3.34,3.36,3.38,3.40,.42,.44,3.45,3.47, +3.48,3.49,3.50,3.52,3.53,3.54,3.55,3.56,3.57) +if(nuhat>=200)smmcrit01<-m1[20,C] +if(nuhat<200){ +nu<-c(2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,30,40,60,200) +temp<-abs(nu-nuhat) +find<-order(temp) +if(temp[find[1]]==0)smmcrit01<-m1[find[1],C] +if(temp[find[1]]!=0){ +if(nuhat>nu[find[1]]){ +smmcrit01<-m1[find[1],C]- +(1/nu[find[1]]-1/nuhat)*(m1[find[1],C]-m1[find[1]+1,C])/ +(1/nu[find[1]]-1/nu[find[1]+1]) +} +if(nuhat=1))ikeep[i]<-0 +e<-m[ikeep[ikeep>=1],] +} +e +} + +pball<-function(m,beta=.2){ +# +# Compute the percentage bend correlation matrix for the +# data in the n by p matrix m. +# +# This function also returns the two-sided significance level +# for all pairs of variables, plus a test of zero correlations +# among all pairs. (See chapter 6 for details.) +# +if(!is.matrix(m))stop("Data must be stored in an n by p matrix") +pbcorm<-matrix(0,ncol(m),ncol(m)) +temp<-matrix(1,ncol(m),ncol(m)) +siglevel<-matrix(NA,ncol(m),ncol(m)) +cmat<-matrix(0,ncol(m),ncol(m)) +for (i in 1:ncol(m)){ +ip1<-i +for (j in ip1:ncol(m)){ +if(i1]) +sx<-ifelse(psi<(-1),0,x) +sx<-ifelse(psi>1,0,sx) +pbos<-(sum(sx)+omhatx*(i2-i1))/(length(x)-i1-i2) +pbos +} + + +tauall<-function(m){ +# +# Compute Kendall's tau for the +# data in the n-by-p matrix m. +# +# This function also returns the two-sided significance level +# for all pairs of variables, plus a test of zero correlations +# among all pairs. (See chapter 6 for details.) +# +if(!is.matrix(m))stop("Data must be stored in an n by p matrix") +taum<-matrix(0,ncol(m),ncol(m)) +siglevel<-matrix(NA,ncol(m),ncol(m)) +for (i in 1:ncol(m)){ +ip1<-i +for (j in ip1:ncol(m)){ +if(i=length(xv)/2)warning("More than half of the w values equal zero") +sumw<-sum(w[ee=.0001) +paste("failed to converge in",iter,"iterations") +list(coef=c(b0,slope),residuals=res) +} + +chreg<-function(x,y,bend=1.345,SEED=TRUE,xout=FALSE,outfun=outpro,pr=TRUE,...){ +# +# Compute Coakley Hettmansperger robust regression estimators +# JASA, 1993, 88, 872-880 +# +# x is a n by p matrix containing the predictor values. +# +# No missing values are allowed +# +# Comments in this function follow the notation used +# by Coakley and Hettmansperger +# +library(MASS) +# with old version of R, need library(lqs) when using ltsreg +# as the initial estimate. +# +if(pr)print('If using chreg with a bootstrap method, use chregF instead') +if(SEED)set.seed(12) # Set seed so that results are always duplicated. +x<-as.matrix(x) +p<-ncol(x) +m<-elimna(cbind(x,y)) +x<-m[,1:p] +p1<-p+1 +y<-m[,p1] +if(xout){ +x<-as.matrix(x) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +x<-as.matrix(x) +cutoff<-bend +mve<-vector("list") +if(ncol(x)==1){ +mve$center<-median(x) +mve$cov<-mad(x)^2 +} +if(ncol(x)>=2)mve<-cov.mve(x) # compute minimum volume ellipsoid measures of + # location and scale and store in mve. +reg0<-ltsReg(x,y) # compute initial regression est using least trimmed + # squares. +# Next, compute the rob-md2(i) values and store in rob +rob<-1 # Initialize vector rob +mx<-mve$center +rob<-mahalanobis(x,mx,mve$cov) +k21<-qchisq(.95,p) +c62<-k21/rob +vecone<-c(rep(1,length(y))) # Initialize vector vecone to 1 +c30<-pmin(vecone,c62) # mallows weights put in c30 +k81<-median(abs(reg0$residuals)) # median of absolute residuals +k72<-1.4826*(1+(5/(length(y)-p-1)))*k81 # lms scale +c60<-reg0$residuals/(k72*c30) # standardized residuals +# compute psi and store in c27 +cvec<-c(rep(cutoff,length(y))) # Initialize vector cvec to cutoff +c27<-pmin(cvec,c60) +c27<-pmax(-1*cutoff,c27) #c27 contains psi values +# +# compute B matrix and put in c66. +# Also, transform B so that i th diag elem = 0 if c27[i] is +# between -cutoff and cutoff, 1 otherwise. +# +c66<-ifelse(abs(c27)<=bend,1,0) # Have derivative of psi in c66 +m1<-cbind(1,x) # X matrix with col of 1's added +m2<-t(m1) #X transpose +m5<-diag(c30) # matrix W, diagonal contains weights +m4<-diag(c66) # B matrix +m6<-m4%*%m1 # BX +m7<-m2%*%m6 # X'BX (nD=X'BX) +m8<-solve(m7) #m8 = (X'-B-X)inverse +m9<-m8%*%m2 #m9=X prime-B-X inverse X' +m9<-m9%*%m5 # m9=X prime-B-X inverse X'W +m10<-m9%*%c27 +c20<-m10*k72 +c21<-reg0$coef+c20 #update initial estimate of parameters. +res<-y-m1%*%c21 +list(coef=t(c21),residuals=res) +} + +regboot<-function(isub,x,y,regfun,...){ +# +# Perform regression using x[isub] to predict y[isub] +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by other functions when computing +# bootstrap estimates. +# +# regfun is some regression method already stored in R +# It is assumed that regfun$coef contains the intercept and slope +# estimates produced by regfun. The regression methods written for +# this book, plus regression functions in R, have this property. +# +# x is assumed to be a matrix containing values of the predictors. +# +xmat<-matrix(x[isub,],nrow(x),ncol(x)) +vals<-regfun(xmat,y[isub],...)$coef +vals +} + + +bmreg<-function(x,y,iter=20,bend=2*sqrt((ncol(x)+1)/nrow(x)),xout=FALSE,outfun=outpro,...){ +# compute a bounded M regression using Huber Psi and Schweppe weights. +# The predictors are assumed to be stored in the n by p matrix x. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x<-as.matrix(x) +init<-lsfit(x,y) +resid<-init$residuals +x1<-cbind(1,x) +nu<-sqrt(1-hat(x1)) +low<-ncol(x)+1 +for(it in 1:iter){ +ev<-sort(abs(resid)) +scale<-median(ev[c(low:length(y))])/qnorm(.75) +rov<-(resid/scale)/nu +psi<-ifelse(abs(rov)<=bend,rov,bend*sign(rov)) # Huber Psi +wt<-nu*psi/(resid/scale) +new<-lsfit(x,y,wt) +if(max(abs(new$coef-init$coef))<.0001)break +init$coef<-new$coef +resid<-new$residuals +} +resid<-y-x1%*%new$coef +if(max(abs(new$coef-init$coef))>=.0001) +paste("failed to converge in",iter,"steps") +list(coef=new$coef,residuals=resid,w=wt) +} + + +reglev<-function(x,y,plotit=TRUE,SEED=TRUE,DIS=FALSE){ +# +# Search for good and bad leverage points using the +# Rousseuw and van Zomeren method. +# +# x is an n by p matrix +# +# The function returns the number of the rows in x that are identified +# as outliers. (The row numbers are stored in outliers.) +# It also returns the distance of the points identified as outliers +# in the variable dis. +# +library(MASS) +xy=elimna(cbind(x,y)) +x=as.matrix(x) +p=ncol(x) +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +plotit<-as.logical(plotit) +if(SEED)set.seed(12) +x<-as.matrix(x) +res<-lmsreg(x,y)$resid +sighat<-sqrt(median(res^2)) +sighat<-1.4826*(1+(5/(length(y)-ncol(x)-1)))*sighat +stanres<-res/sighat +if(ncol(x)>=2)mve<-cov.mve(x) +if(ncol(x)==1){ +mve<-vector("list") +mve$center<-median(x) +mve$cov<-mad(x)^2 +} +dis<-mahalanobis(x,mve$center,mve$cov) +dis<-sqrt(dis) +crit<-sqrt(qchisq(.975,ncol(x))) +chk<-ifelse(dis>crit,1,0) +vec<-c(1:nrow(x)) +id<-vec[chk==1] +chkreg<-ifelse(abs(stanres)>2.5,1,0) +idreg<-vec[chkreg==1] +if(plotit){ +plot(dis,stanres,xlab="Robust distances",ylab="standardized residuals") +abline(-2.5,0) +abline(2.5,0) +abline(v=crit) +} +all=c(id,idreg) +ID=duplicated(all) +blp=all[ID] +vec=c(1:length(y)) +nkeep=vec +if(length(blp)>0)nkeep=vec[-blp] +if(!DIS)dis=NULL +list(levpoints=id,regout=idreg,bad.lev.points=blp,keep=nkeep,dis=dis,stanres=stanres,crit=crit) +} + +winreg<-function(x,y,iter=20,tr=.2,xout=FALSE,outfun=outpro,...){ +# +# Compute a Winsorized regression estimator +# The predictors are assumed to be stored in the n by p matrix x. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x=as.matrix(x) +ma<-matrix(0,ncol(x),1) +m<-matrix(0,ncol(x),ncol(x)) +mvals<-apply(x,2,win,tr) +for (i in 1:ncol(x)){ +ma[i,1]<-wincor(x[,i],y,tr=tr)$cov +for (j in 1:ncol(x))m[i,j]<-wincor(x[,i],x[,j],tr=tr)$cov +} +slope<-solve(m,ma) +b0<-win(y,tr)-sum(slope%*%mvals) +for(it in 1:iter){ +res<-y-x%*%slope-b0 +for (i in 1:ncol(x))ma[i,1]<-wincor(x[,i],res,tr=tr)$cov +slopeadd<-solve(m,ma) +b0add<-win(res,tr)-sum(slopeadd%*%mvals) +if(max(abs(slopeadd),abs(b0add)) <.0001)break +slope<-slope+slopeadd +b0<-b0+b0add +} +if(max(abs(slopeadd),abs(b0add)) >=.0001) +paste("failed to converge in",iter,"iterations") +list(coef=c(b0,slope),resid=res) +} + + +anctgen<-function(x1,y1,x2,y2,pts,fr1=1,fr2=1,tr=.2){ +# +# Compare two independent groups using the ancova method +# in chapter 9. No assumption is made about the form of the regression +# lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# Comparisons are made at the design points contained in the vector +# pts +# +# Comparisons can be made using at most 28 design points, otherwise +# a critical value for controlling the experimentwise type I error cannot +# be computed. +# +if(length(pts)>=29)stop("At most 28 points can be compared") +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),8) +dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi")) +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +test<-yuen(g1,g2,tr=tr) +mat[i,1]<-pts[i] +mat[i,2]<-length(g1) +mat[i,3]<-length(g2) +mat[i,4]<-test$dif +mat[i,5]<-test$teststat +mat[i,6]<-test$se +if(length(pts)>=2)critv<-smmcrit(test$df,length(pts)) +if(length(pts)==1)critv<-qt(.975,test$df) +cilow<-test$dif-critv*test$se +cihi<-test$dif+critv*test$se +mat[i,7]<-cilow +mat[i,8]<-cihi +} +list(output=mat,crit=critv) +} + +near<-function(x,pt,fr=1){ +# determine which values in x are near pt +# based on fr * mad +if(!is.vector(x))stop('x should be a vector') +m<-mad(x) +if(m==0){ +temp<-idealf(x) +m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25)) +} +if(m==0)m<-sqrt(winvar(x)/.4129) +if(m==0)stop("All measures of dispersion are equal to 0") +dis<-abs(x-pt) +dflag<-dis <= fr*m +dflag +} + +regpres1<-function(isub,x,y,regfun,mval){ +# +# Perform regression using x[isub] to predict y[isub] +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by other functions when computing +# bootstrap estimates. +# +# regfun is some regression method already stored in R +# It is assumed that regfun$coef contains the intercept and slope +# estimates produced by regfun. The regression methods written for +# this book, plus regression functions in R, have this property. +# +# x is assumed to be a matrix containing values of the predictors. +# +xmat<-matrix(x[isub,],mval,ncol(x)) +regboot<-regfun(xmat,y[isub]) +regboot<-regboot$coef +regboot +} + +runhat<-function(x,y,pts=x,est=tmean,fr=1,nmin=1,...){ +# +# running interval smoother that can be used with any measure +# of location or scale. By default, a 20% trimmed mean is used. +# This function computes an estimate of y for each x value stored in pts +# +# fr controls amount of smoothing +rmd<-rep(NA,length(pts)) +for(i in 1:length(pts)){ +val<-y[near(x,pts[i],fr)] +if(length(val)>=nmin)rmd[i]<-est(val,...) +} +rmd +} + +sqfun<-function(y,na.rm=FALSE){ +# +sqfun<-sum(y^2,na.rm=na.rm) +sqfun +} + +absfun<-function(y,na.rm=FALSE){ +absfun<-sum(abs(y),na.rm=na.rm) +absfun +} + +ancbootg<-function(x1,y1,x2,y2,pts,fr1=1,fr2=1,tr=.2,nboot=599){ +# +# Compare two independent groups using the ancova method +# in chapter 9. No assumption is made about the form of the regression +# lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# Comparisons are made at the design points contained in the vector +# pts +# +m1=elimna(cbind(x1,y1)) +x1=m1[,1] +y1=m1[,2] +m1=elimna(cbind(x2,y2)) +x2=m1[,1] +y2=m1[,2] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),8) +dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi")) +gv<-vector("list",2*length(pts)) +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +j<-i+length(pts) +gv[[i]]<-g1 +gv[[j]]<-g2 +} +I1<-diag(length(pts)) +I2<-0-I1 +con<-rbind(I1,I2) +test<-linconb(gv,con=con,tr=tr,nboot=nboot) +mat[,1]<-pts +mat[,2]<-n1 +mat[,3]<-n2 +mat[,4]<-test$psihat[,2] +mat[,5]<-test$test[,2] +mat[,6]<-test$test[,3] +mat[,7]<-test$psihat[,3] +mat[,8]<-test$psihat[,4] +list(output=mat,crit=test$crit) +} + +errfun<-function(yhat,y,error=sqfun){ +# +# Compute error terms for regpre +# +# yhat is an n by nboot matrix +# y is n by 1. +# +ymat<-matrix(y,nrow(yhat),ncol(yhat)) +blob<-yhat-ymat +errfun<-error(blob) +errfun +} + +near3d<-function(x,pt,fr=.8,m){ +# determine which values in x are near pt +# based on fr * cov.mve +# +# x is assumed to be an n by p matrix +# pt is a vector of length p (a point in p-space). +# m is cov.mve(x) computed by runm3d +# +library(MASS) +if(!is.matrix(x))stop("Data are not stored in a matrix.") +dis<-sqrt(mahalanobis(x,pt,m$cov)) +dflag<-dis < fr +dflag +} + +run3hat<-function(x,y,pts,fr=.8,tr=.2){ +# +# Compute y hat for each row of data in the matrix pts +# using a running interval method +# +# fr controls amount of smoothing +# tr is the amount of trimming +# x is an n by p matrix of predictors. +# pts is an m by p matrix, m>=1. +# +library(MASS) +set.seed(12) +if(!is.matrix(x))stop("Predictors are not stored in a matrix.") +if(!is.matrix(pts))stop("The third argument, pts, must be a matrix.") +m<-cov.mcd(x) +rmd<-1 # Initialize rmd +nval<-1 +for(i in 1:nrow(pts)){ +rmd[i]<-mean(y[near3d(x,pts[i,],fr,m)],tr) +nval[i]<-length(y[near3d(x,pts[i,],fr,m)]) +} +list(rmd=rmd,nval=nval) +} + + +idb<-function(x,n){ +# +# Determine whether a sequence of integers contains a 1, 2, ..., n. +# Return idb[i]=1 if the value i is in x; 0 otherwise. +# This function is used by regpre +# +m1<-matrix(0,n,n) +m1<-outer(c(1:n),x,"-") +m1<-ifelse(m1==0,1,0) +idb<-apply(m1,1,sum) +idb<-ifelse(idb>=1,0,1) +idb +} + +hratio<-function(x,y,regfun=bmreg){ +# +# Compute a p by p matrix of half-slope ratios +# +# regfun can be any R function that returns the coefficients in +# the vector regfun$coef, the first element of which contains the +# estimated intercept, the second element contains the estimate of +# the first predictor, etc. +# +# OUTPUT: +#The first row reports the half-slope +#ratios when the data are divided into two groups using the first predictor. +#The first column is the half-slope ratio for the first predictor, the +#second column is the half-slope ratio for the second predictor, and so forth. +#The second row contains the half-slope ratios when the data are divided +#into two groups using the second predictor, and so on. +# +x<-as.matrix(x) +xmat<-matrix(0,nrow(x),ncol(x)) +mval<-floor(length(y)/2) +mr<-length(y)-mval +xmatl<-matrix(0,mval,ncol(x)) +xmatr<-matrix(0,mr,ncol(x)) +hmat<-matrix(NA,ncol(x),ncol(x)) +isub<-c(1:length(y)) +ksub<-c(1:ncol(x))+1 +for (k in 1:ncol(x)){ +xord<-order(x[,k]) +yord<-y[xord] +yl<-yord[isub<=mval] +yr<-yord[isub>mval] +for (j in 1:ncol(x)){ +xmat[,j]<-x[xord,j] +xmatl[,j]<-xmat[isub<=mval,j] +xmatr[,j]<-xmat[isub>mval,j] +} +coefl<-regfun(xmatl,yl)$coef +coefr<-regfun(xmatr,yr)$coef +hmat[k,]<-coefr[ksub[ksub>=2]]/coefl[ksub[ksub>=2]] +} +hmat +} + + + +rung3d<-function(x,y,est=onestep,fr=1,plotit=TRUE,theta=50,phi=25,pyhat=FALSE,LP=FALSE, +expand=.5,scale=FALSE,zscale=TRUE, +nmin=0,xout=FALSE,eout=FALSE,outfun=out,SEED=TRUE,STAND=TRUE, +xlab="X",ylab="Y",zlab="",pr=TRUE,duplicate="error",ticktype="simple",...){ +# +# running mean using interval method +# + +# fr (the span) controls amount of smoothing +# est is the measure of location. +# (Goal is to determine est(y) given x.) +# x is an n by p matrix of predictors. +# +# pyhat=T, predicted values are returned. +# +library(MASS) +library(akima) +if(SEED)set.seed(12) # set seed for cov.mve +if(eout && xout)stop("Not allowed to have eout=xout=TRUE") +if(!is.matrix(x))stop("Data are not stored in a matrix.") +if(nrow(x) != length(y))stop("Number of rows in x does not match length of y") +temp<-cbind(x,y) +p<-ncol(x) +p1<-p+1 +temp<-elimna(temp) # Eliminate any rows with missing values. +if(eout){ +keepit<-outfun(temp,plotit=FALSE)$keep +x<-x[keepit,] +y<-y[keepit] +} +if(xout){ +keepit<-outfun(x,plotit=FALSE,STAND=STAND,...)$keep +x<-x[keepit,] +y<-y[keepit] +} +if(zscale){ +for(j in 1:p1){ +temp[,j]<-(temp[,j]-median(temp[,j]))/mad(temp[,j]) +}} +x<-temp[,1:p] +y<-temp[,p1] +m<-cov.mve(x) +iout<-c(1:nrow(x)) +rmd<-1 # Initialize rmd +nval<-1 +for(i in 1:nrow(x))rmd[i]<-est(y[near3d(x,x[i,],fr,m)],...) +for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)]) +if(ncol(x)==2){ +if(plotit){ +if(pr){ +if(!scale)print("With dependence, suggest using scale=TRUE") +} +fitr<-rmd[nval>nmin] +y<-y[nval>nmin] +x<-x[nval>nmin,] +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +if(LP)fitr=lplot(x[iout>=1,],fitr,pyhat=TRUE,pr=FALSE,plotit=FALSE)$yhat +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +persp(fit,theta=theta,phi=phi,expand=expand, +scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +}} +if(pyhat)last<-rmd +if(!pyhat)last <- "Done" + last +} + +mbmreg<-function(x,y,iter=20,bend=2*sqrt(ncol(x)+1)/nrow(x),xout=FALSE,outfun=outpro,...){ +# +# Compute a bounded M regression estimator using +# Huber Psi and Schweppe weights with +# regression outliers getting a weight of zero. +# +# This is the modified M-regression estimator in Chapter 8 +# +# The predictors are assumed to be stored in the n by p matrix x. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x<-as.matrix(x) +if(is.matrix(y)){ +if(ncol(y)==1)y=as.vector(y) +} +x1<-cbind(1,x) +library(MASS) +reslms<-lmsreg(x,y)$resid +sighat<-sqrt(median(reslms^2)) +sighat<-1.4826*(1+(5/(length(y)-ncol(x)-1)))*sighat +if(sighat==0)warning("The estimated measure of scale, based on the residuals using lms regression, is zero") +temp<-ifelse(sighat*reslms>0,abs(reslms)/sighat,0*reslms) +wt<-ifelse(temp<=2.5,1,0) +init<-lsfit(x,y,wt) +resid<-init$residuals +nu<-sqrt(1-hat(x1)) +low<-ncol(x)+1 +for(it in 1:iter){ +ev<-sort(abs(resid)) +scale<-median(ev[c(low:length(y))])/qnorm(.75) +rov<-(resid/scale)/nu +psi<-ifelse(abs(rov)<=bend,rov,bend*sign(rov)) # Huber Psi +wt<-nu*psi/(resid/scale) +wt<-ifelse(temp<=2.5,wt,0) +new<-lsfit(x,y,wt) +if(abs(max(new$coef-init$coef)<.0001))break +init$coef<-new$coef +resid<-new$residuals +} +resid<-y-x1%*%new$coef +if(abs(max(new$coef-init$coef)>=.0001)) +paste("failed to converge in",iter,"steps") +list(coef=new$coef,residuals=resid,w=wt) +} + +rankisub<-function(x,y){ +# +# compute phat and an estimate of its variance +# +x<-x[!is.na(x)] # Remove missing values from x +y<-y[!is.na(y)] # Remove missing values from y +u<-outer(x,y,FUN="<") +p1<-0 +p2<-0 +for (j in 1:length(y)){ +temp<-outer(u[,j],u[,j]) +p1<-p1+sum(temp)-sum(u[,j]*u[,j]) +} +for (i in 1: length(x)){ +temp<-outer(u[i,],u[i,]) +p2<-p2+sum(temp)-sum(u[i,]*u[i,]) +} +p<-sum(u)/(length(x)*length(y)) +pad<-p +if(p==0)pad<-.5/(length(x)*length(y)) +if(p==1)pad<-(1-.5)/(length(x)*length(y)) +p1<-p1/(length(x)*length(y)*(length(x)-1)) +p2<-p2/(length(x)*length(y)*(length(y)-1)) +var<-pad*(1.-pad)*(((length(x)-1)*(p1-p^2)/(pad*(1-pad))+1)/(1-1/length(y))+ +((length(y)-1)*(p2-p^2)/(pad*(1-pad))+1)/(1-1/length(x))) +var<-var/(length(x)*length(y)) +list(phat=p,sqse=var) +} + +pbcor<-function(x,y,beta=.2){ +# Compute the percentage bend correlation between x and y. +# +# beta is the bending constant for omega sub N. +# +if(length(x)!=length(y))stop("The vectors do not have equal lengths") +m1=cbind(x,y) +m1<-elimna(m1) +nval=nrow(m1) +x<-m1[,1] +y<-m1[,2] +# Have eliminated missing values +temp<-sort(abs(x-median(x))) +omhatx<-temp[floor((1-beta)*length(x))] +temp<-sort(abs(y-median(y))) +omhaty<-temp[floor((1-beta)*length(y))] +a<-(x-pbos(x,beta))/omhatx +b<-(y-pbos(y,beta))/omhaty +a<-ifelse(a<=-1,-1,a) +a<-ifelse(a>=1,1,a) +b<-ifelse(b<=-1,-1,b) +b<-ifelse(b>=1,1,b) +pbcor<-sum(a*b)/sqrt(sum(a^2)*sum(b^2)) +test<-pbcor*sqrt((length(x) - 2)/(1 - pbcor^2)) +sig<-2*(1 - pt(abs(test),length(x)-2)) +list(cor=pbcor,test=test,p.value=sig,n=nval) +} + +rmanovab<-function(x,tr=.2,alpha=.05,grp=0,nboot=599){ +# +# A bootstrap-t for comparing the trimmed means of dependent groups. +# By default, 20% trimming is used with B=599 bootstrap samples. +# +# The optional argument grp is used to select a subset of the groups +# and exclude the rest. +# +# x can be an n by J matrix or it can have list mode +# +if(is.data.frame(x))x=as.matrix(x) +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x))x=matl(x) +#{ +#if(sum(grp)==0)grp<-c(1:length(x)) +# put the data in an n by J matrix +#mat<-matrix(0,length(x[[1]]),length(grp)) +#for (j in 1:length(grp))mat[,j]<-x[[grp[j]]] +#} +if(is.matrix(x)){ +if(sum(grp)==0)grp<-c(1:ncol(x)) +mat<-x[,grp] +} +mat=elimna(mat) +J<-ncol(mat) +connum<-(J^2-J)/2 +bvec<-matrix(0,connum,nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot) +xcen<-matrix(0,nrow(mat),ncol(mat)) +for (j in 1:J)xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data +bvec<-apply(data,1,tsubrmanovab,xcen,tr) +# bvec is vector of nboot bootstrap test statistics. +icrit<-round((1-alpha)*nboot) +bvec<-sort(bvec) +crit<-bvec[icrit] +test<-rmanova(mat,tr,grp)$test +pv=mean(test<=bvec) +list(teststat=test,crit=crit,p.value=pv) +} + + +tsubrmanovab<-function(isub,x,tr){ +# +# Compute test statistic for trimmed means +# when comparing dependent groups. +# By default, 20% trimmed means are used. +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by rmanovab +# +tsub<-rmanovab1(x[isub,],tr=tr)$test +tsub +} + + + + + +rmanovab1<-function(x,tr=.2,grp=c(1:length(x))){ +# +# A heteroscedastic one-way repeated measures ANOVA for trimmed means. +# +# The data are assumed to be stored in $x$ which can +# be either an n by J matrix, or an R variable having list mode. +# If the data are stored in list mode, +# length(x) is assumed to correspond to the total number of groups. +# By default, the null hypothesis is that all group have a common mean. +# To compare a subset of the groups, use grp to indicate which +# groups are to be compared. For example, if you type the +# command grp<-c(1,3,4), and then execute this function, groups +# 1, 3, and 4 will be compared with the remaining groups ignored. +# +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +J<-length(grp) # The number of groups to be compared +m1<-matrix(x[[grp[1]]],length(x[[grp[1]]]),1) +for(i in 2:J){ # Put the data into an n by J matrix +m2<-matrix(x[[grp[i]]],length(x[[i]]),1) +m1<-cbind(m1,m2) +} +} +if(is.matrix(x)){ +if(length(grp)=ncol(x))m1<-as.matrix(x) +J<-ncol(x) +} +# +# Raw data are now in the matrix m1 +# +m2<-matrix(0,nrow(m1),ncol(m1)) +xvec<-1 +g<-floor(tr*nrow(m1)) #2g is the number of observations trimmed. +for(j in 1:ncol(m1)){ # Putting Winsorized values in m2 +m2[,j]<-winval(m1[,j],tr) +xvec[j]<-mean(m1[,j],tr) +} +xbar<-mean(xvec) +qc<-(nrow(m1)-2*g)*sum((xvec-xbar)^2) +m3<-matrix(0,nrow(m1),ncol(m1)) +m3<-sweep(m2,1,apply(m2,1,mean)) # Sweep out rows +m3<-sweep(m3,2,apply(m2,2,mean)) # Sweep out columns +m3<-m3+mean(m2) # Grand Winsorized mean swept in +qe<-sum(m3^2) +test<-(qc/(qe/(nrow(m1)-2*g-1))) +# +# Next, estimate the adjusted degrees of freedom +# +v<-winall(m1)$cov +vbar<-mean(v) +vbard<-mean(diag(v)) +vbarj<-1 +for(j in 1:J){ +vbarj[j]<-mean(v[j,]) +} +A<-J*J*(vbard-vbar)^2/(J-1) +B<-sum(v*v)-2*J*sum(vbarj^2)+J*J*vbar^2 +ehat<-A/B +etil<-(nrow(m2)*(J-1)*ehat-2)/((J-1)*(nrow(m2)-1-(J-1)*ehat)) +etil<-min(1.,etil) +df1<-(J-1)*etil +df2<-(J-1)*etil*(nrow(m2)-2*g-1) +siglevel<-1-pf(test,df1,df2) +list(test=test,df=c(df1,df2),p.value=siglevel,tmeans=xvec,ehat=ehat,etil=etil) +} + + + + +mee<-function(x,y,alpha=.05){ +# +# For two independent groups, compute a 1-\alpha confidence interval +# for p=P(X 0){print("Warning: Tied values detected") +print("so even if distributions are identical,") +print("P(X 0) +print("Tied values detected. Interchanging columns might give different results. That is, comparing rows based on P(XY)") +ck<-(K^2-K)/2 +cj<-(J^2-J)/2 +tc<-ck*cj +if(tc>28){ +print("Warning: The number of contrasts exceeds 28.") +print("The critical value being used is based on 28 contrasts") +tc<-28 +} +idmat<-matrix(NA,nrow=tc,ncol=8) +dimnames(idmat)<-list(NULL,c("row","row","col","col","ci.lower","ci.upper","estimate","test.stat")) +crit<-smmcrit(300,tc) +if(alpha != .05){ +crit<-smmcrit01(300,tc) +if(alpha != .01){print("Warning: Only alpha = .05 and .01 are allowed,") +print("alpha = .01 is being assumed.") +} +} +phatsqse<-0 +phat<-0 +allit<-0 +jcount<-0-K +it<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j < jj){ +for(k in 1:K){ +for(kk in 1:K){ +if(k < kk){ +it<-it+1 +idmat[it,1:4]<-c(j,jj,k,kk) +}}}}} +jcount<-jcount+K +for(k in 1:K){ +for(kk in 1:K){ +if(k < kk){ +allit<-allit+1 +xx<-x[[grp[k+jcount]]] +yy<-x[[grp[kk+jcount]]] +temp<-rankisub(xx,yy) +phat[allit]<-temp$phat +phatsqse[allit]<-temp$sqse +}}}} +# +# Compute the contrast matrix. Each row contains a 1, -1 and the rest 0 +# That is, all pairwise comparisons among K groups. +# +con<-matrix(0,cj,J) +id<-0 +Jm<-J-1 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[id,j]<-1 +con[id,k]<-0-1 +}} +IK<-diag(ck) +B<-kron(con,IK) +ntest<-ck*(J^2-J)/2 +test<-0 +civecl<-0 +civecu<-0 +for (itest in 1:ntest){ +temp1<-sum(B[itest,]*phat) +idmat[itest,7]<-temp1 +idmat[itest,8]<-temp1/sqrt(sum(B[itest,]^2*phatsqse)) +idmat[itest,5]<-temp1-crit*sqrt(sum(B[itest,]^2*phatsqse)) +idmat[itest,6]<-temp1+crit*sqrt(sum(B[itest,]^2*phatsqse)) +} +nsig<-sum((abs(idmat[,8])>crit)) +list(phat=phat,ci=idmat,crit=crit,nsig=nsig) +} + + +regts1<-function(vstar,yhat,res,mflag,x,tr){ +ystar<-yhat+res*vstar +bres<-ystar-mean(ystar,tr) +rval<-0 +for (i in 1:nrow(x)){ +rval[i]<-sum(bres[mflag[,i]]) +} +rval +} + +bptd<-function(x,tr=.2,alpha=.05,con=0,nboot=599){ +# +# Using the percentile t bootstrap method, +# compute a .95 confidence interval for all linear contasts +# specified by con, a J by C matrix, where C is the number of +# contrasts to be tested, and the columns of con are the +# contrast coefficients. +# +# If con is not specified, all pairwise comparisons are performed. +# +# The trimmed means of dependent groups are being compared. +# By default, 20% trimming is used with B=599 bootstrap samples. +# +# x can be an n by J matrix or it can have list mode +# +if(is.data.frame(x))x=as.matrix(x) +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +if(is.matrix(con)){ +if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") + }} +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))mat[,j]<-x[[j]] +} +if(is.matrix(x))mat=x +J<-ncol(mat) +Jm<-J-1 +if(sum(con^2)==0){ +d<-(J^2-J)/2 +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +if(is.matrix(x)){ +if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") +mat<-x +} +if(sum(is.na(mat)>=1))stop("Missing values are not allowed.") +J<-ncol(mat) +connum<-ncol(con) +bvec<-matrix(0,connum,nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# data is an nboot by n matrix +xcen<-matrix(0,nrow(mat),ncol(mat)) #An n by J matrix +xbars<-matrix(0,nboot,ncol(mat)) +psihat<-matrix(0,connum,nboot) +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(nrow(xcen),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot) +for (j in 1:J){ +xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data +xbars[,j]<-apply(data,1,bptdmean,xcen[,j],tr) +} +for (ic in 1:connum){ +paste("Working on contrast number",ic) +bvec[ic,]<-apply(data,1,bptdsub,xcen,tr,con[,ic]) +# bvec is a connum by nboot matrix containing the bootstrap sq standard error +psihat[ic,]<-apply(xbars,1,bptdpsi,con[,ic]) +} +bvec<-psihat/sqrt(bvec) #bvec now contains bootstrap test statistics +bvec<-abs(bvec) #Doing two-sided confidence intervals +icrit<-round((1-alpha)*nboot) +critvec<-apply(bvec,2,max) +critvec<-sort(critvec) +crit<-critvec[icrit] +psihat<-matrix(0,connum,4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(NA,connum,3) +dimnames(test)<-list(NULL,c("con.num","test","se")) +isub<-c(1:nrow(mat)) +tmeans<-apply(mat,2,mean,trim=tr) +sqse<-1 +psi<-1 +for (ic in 1:ncol(con)){ +sqse[ic]<-bptdsub(isub,mat,tr,con[,ic]) +psi[ic]<-sum(con[,ic]*tmeans) +psihat[ic,1]<-ic +psihat[ic,2]<-psi[ic] +psihat[ic,3]<-psi[ic]-crit*sqrt(sqse[ic]) +psihat[ic,4]<-psi[ic]+crit*sqrt(sqse[ic]) +test[ic,1]<-ic +test[ic,2]<-psi[ic]/sqrt(sqse[ic]) +test[ic,3]<-sqrt(sqse[ic]) +} +list(test=test,psihat=psihat,crit=crit,con=con) +} + +twomanbt<-function(x,y,tr=.2,alpha=.05,nboot=599){ +# +# Two-sample Behrens-Fisher problem. +# +# For each of two independent groups, +# have p measures for each subject. The goal is to compare the +# trimmed means of the first measure, the trimmed means for the second +# and so on. So there are a total of p comparisons between the two +# groups, one for each measure. +# +# The percentile t bootstrap method is used to +# compute a .95 confidence interval. +# +# By default, 20% trimming is used with B=599 bootstrap samples. +# +# x contains the data for the first group; it +# can be an n by J matrix or it can have list mode. +# y contains the data for the second group. +# +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(!is.list(y) && !is.matrix(y))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +# put the data in an n by p matrix +matx<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))matx[,j]<-x[[j]] +} +if(is.list(y)){ +# put the data in an n by p matrix +maty<-matrix(0,length(y[[1]]),length(y)) +for (j in 1:length(y))maty[,j]<-y[[j]] +} +if(is.matrix(x)){ +matx<-x +} +if(is.matrix(y)){ +maty<-y +} +if(ncol(matx)!=ncol(maty))stop("The number of variables for group one is not equal to the number for group 2") +if(sum(is.na(mat)>=1))stop("Missing values are not allowed.") +J<-ncol(mat) +connum<-ncol(matx) +bvec<-matrix(0,connum,nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +xcen<-matrix(0,nrow(matx),ncol(matx)) +ycen<-matrix(0,nrow(maty),ncol(maty)) +for (j in 1:connum)xcen[,j]<-matx[,j]-mean(matx[,j],tr) #Center data +for (j in 1:connum)ycen[,j]<-maty[,j]-mean(maty[,j],tr) #Center data +print("Taking bootstrap samples. Please wait.") +bootx<-sample(nrow(matx),size=nrow(matx)*nboot,replace=TRUE) +booty<-sample(nrow(maty),size=nrow(maty)*nboot,replace=TRUE) +matval<-matrix(0,nrow=nboot,ncol=connum) +for (j in 1:connum){ +datax<-matrix(xcen[bootx,j],ncol=nrow(matx)) +datay<-matrix(ycen[booty,j],ncol=nrow(maty)) +paste("Working on variable", j) +top<- apply(datax, 1., mean, tr) - apply(datay, 1., mean, tr) +botx <- apply(datax, 1., trimse, tr) +boty <- apply(datay, 1., trimse, tr) +matval[,j]<-abs(top)/sqrt(botx^2. + boty^2.) +} +bvec<-apply(matval,1,max) +icrit<-round((1-alpha)*nboot) +bvec<-sort(bvec) +crit<-bvec[icrit] +psihat<-matrix(0,ncol=4,nrow=connum) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol=3,nrow=connum) +dimnames(test)<-list(NULL,c("con.num","test","se")) +for(j in 1:ncol(matx)){ +temp<-yuen(matx[,j],maty[,j],tr=tr) +test[j,1]<-j +test[j,2]<-abs(temp$test) +test[j,3]<-temp$se +psihat[j,1]<-j +psihat[j,2]<-mean(matx[,j],tr)-mean(maty[,j]) +psihat[j,3]<-mean(matx[,j],tr)-mean(maty[,j])-crit*temp$se +psihat[j,4]<-mean(matx[,j],tr)-mean(maty[,j])+crit*temp$se +} +list(psihat=psihat,teststat=test,critical.value=crit) +} + + + +bootdep<-function(x,tr=.2,nboot=500){ +# +# x is a matrix (n by p) or has list mode +# Goal: Obtain boostrap samples and compute +# the trimmed each for each of the p variables. +# Return the bootstrap means in a matrix +# +# tr is the amount of trimming +# nboot is the number of bootstrap samples +# +if(is.matrix(x))m1<-x +if(is.list(x)){ +# put the data into a matrix +m1<-matrix(NA,ncol=length(x)) +for(j in 1:length(x))m1[,j]<-x[[j]] +} +data<-matrix(sample(nrow(m1),size=nrow(m1)*nboot,replace=TRUE),nrow=nboot) +bvec<-matrix(NA,ncol=ncol(m1),nrow=nboot) +for(j in 1:ncol(m1)){ +temp<-m1[,j] +bvec[,j]<-apply(data, 1., bootdepsub,temp,tr) +} +# return a nboot by p matrix of bootstrap trimmed means. +bvec +} + +bootdepsub<-function(isub,x,tr){ +tsub<-mean(x[isub],tr) +tsub +} +corb<-function(x,y,corfun=pbcor,nboot=599,alpha=.05,plotit=FALSE,xlab='X',ylab='Y',SEED=TRUE,...){ +# +# Compute a 1-alpha confidence interval for a correlation. +# The default correlation is the percentage bend. +# +# The function corfun is any R function that returns a +# correlation coefficient in corfun$cor. The functions pbcor and +# wincor follow this convention. +# +# When using Pearson's correlation, and when n<250, use +# lsfitci instead. +# +# The default number of bootstrap samples is nboot=599 +# +m1=cbind(x,y) +m1<-elimna(m1) # Eliminate rows with missing values +nval=nrow(m1) +x<-m1[,1] +y<-m1[,2] +est<-corfun(x,y,...)$cor +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,corbsub,x,y,corfun,...) # A 1 by nboot matrix. +ihi<-floor((1-alpha/2)*nboot+.5) +ilow<-floor((alpha/2)*nboot+.5) +bsort<-sort(bvec) +corci<-1 +corci[1]<-bsort[ilow] +corci[2]<-bsort[ihi] +phat <- sum(bvec < 0)/nboot +sig <- 2 * min(phat, 1 - phat) +if(plotit)outpro(cbind(x,y),xlab=xlab,ylab=ylab,plotit=TRUE) +list(cor.ci=corci,p.value=sig,cor.est=est) +} + +corbsub<-function(isub,x,y,corfun,...){ +# +# Compute correlation for x[isub] and y[isub] +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by other functions when computing +# bootstrap estimates. +# +# corfun is some correlation function already stored in R +# +corbsub<-corfun(x[isub],y[isub],...)$cor +corbsub +} + + + +depreg<-function(x,y,xout=FALSE,outfun=out,...){ +# +# Compute the depth regression estimator. +# Only a single predictor is allowed in this version +# Perhaps use instead +# +if(is.matrix(x)){ +if(ncol(x)>=2)stop("Only a single predicor is allowed") +x<-as.vector(x) +} +xy=cbind(x,y) +xy=elimna(xy) +if(xout){ +flag<-outfun(xy[,1],plotit=FALSE,...)$keep +xy<-xy[flag,] +} +x=xy[,1] +y=xy[,2] +ord<-order(x) +xs<-x[ord] +ys<-y[ord] +vec1<-outer(ys,ys,"-") +vec2<-outer(xs,xs,"-") +v1<-vec1[vec2>0] +v2<-vec2[vec2>0] +slope<-v1/v2 +vec3<-outer(ys,ys,"+") +vec4<-outer(xs,xs,"+") +v3<-vec3[vec2>0] +v4<-vec4[vec2>0] +deep<-NA +inter<-v3/2-slope*v4/2 +temp<-matrix(c(inter,slope),ncol=2) +deep<-apply(temp,1,rdepth.orig,x,y) +best<-max(deep) +coef<-NA +coef[2]<-mean(slope[deep==best]) +coef[1]<-mean(inter[deep==best]) +res<-y-coef[2]*x-coef[1] +list(coef=coef,residuals=res) +} + +tsgreg<-function(x,y,tries=(length(y)^2-length(y))/2){ +# +# +x<-as.matrix(x) +if(nrow(x)!=length(y))stop("Length of y must match the number of rows of x") +# eliminate any rows with missing values. +m1<-cbind(x,y) +m1<-elimna(m1) +x<-m1[,1:ncol(x)] +y<-m1[,ncol(x)+1] +set.seed(2) +data<-matrix(NA,ncol=ncol(x)+1,nrow=tries) +for(i in 1:tries){ +data[i,]<-sample(length(y),size=ncol(x)+1,replace=FALSE) +} +bvec <- apply(data, 1,tsgregs1,x,y) +coef<-0 +numzero<-0 +loc<-0 +for (i in 1:ncol(x)){ +ip<-i+1 +temp<-bvec[ip,] +loc[i]<-median(x[,i]) +coef[i+1]<-median(temp[temp!=0]) +numzero[i]<-length(temp[temp==0]) +} +ip<-ncol(x)+1 +coef[1]<-median(y)-sum(coef[2:ip]*loc) +res<-y-x %*% coef[2:ip] - coef[1] +list(coef=coef,residuals=res,numzero=numzero) +} +tsgregs1<-function(isub,x,y){ +# +# This function is used by tsgreg +# +# Perform regression using x[isub,] to predict y[isub] +# isub is a vector of length nsub, determined by tsgreg +# +tsgregs1<-lsfit(x[isub,],y[isub])$coef +} + +lts1reg<-function(x,y,tr=.2,h=NA){ +# +# Compute the least trimmed squares regression estimator. +# Only a single predictor is allowed in this version +# +if(is.na(h))h<-length(x)-floor(tr * length(x)) +ord<-order(x) +xs<-x[ord] +ys<-y[ord] +vec1<-outer(ys,ys,"-") +vec2<-outer(xs,xs,"-") +v1<-vec1[vec2>0] +v2<-vec2[vec2>0] +slope<-v1/v2 +vec3<-outer(ys,ys,"+") +vec4<-outer(xs,xs,"+") +v3<-vec3[vec2>0] +v4<-vec4[vec2>0] +val<-NA +inter<-v3/2-slope*v4/2 +for(i in 1:length(slope)){ +#risk<-(y[vec2>0]-slope[i]*x[vec2>0]-inter[i])^2 +risk<-(y-slope[i]*x-inter[i])^2 +risk<-sort(risk) +val[i]<-sum(risk[1:h]) +} +best<-min(val) +coef<-NA +coef[2]<-mean(slope[val==best]) +coef[1]<-mean(inter[val==best]) +res<-y-coef[2]*x-coef[1] +list(coef=coef,residuals=res) +} + +man2pb<-function(x,y,alpha=.05,nboot=NA,crit=NA,SEED=TRUE){ +# +# Two-sample Behrens-Fisher problem. +# +# For each of two independent groups, +# have P measures for each subject. The goal is to compare the 20% +# trimmed means of the first group to the trimmed means for the second; +# this is done for each of the P measures. +# +# The percentile bootstrap method is used to +# compute a .95, or .975, or .99 confidence interval. +# +# Only 20% trimming is allowed. +# +# x contains the data for the first group; it +# can be an n by J matrix or it can have list mode. +# y contains the data for the second group. +# +# Vectors with missing values are eliminated from the analysis. +# +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(!is.list(y) && !is.matrix(y))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +# put the data in an n by p matrix +matx<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))matx[,j]<-x[[j]] +} +if(is.list(y)){ +# put the data in an n by p matrix +maty<-matrix(0,length(y[[1]]),length(y)) +for (j in 1:length(y))maty[,j]<-y[[j]] +} +if(is.matrix(x)){ +matx<-x +} +if(is.matrix(y)){ +maty<-y +} +if(ncol(matx)!=ncol(maty))stop("The number of variables for group 1 is not equal to the number for group 2") +if(sum(is.na(matx)>=1))matx<-elimna(matx) +if(sum(is.na(maty)>=1))maty<-elimna(maty) +J<-ncol(matx) +connum<-ncol(matx) +if(is.na(nboot)){ +if(ncol(matx)<=4)nboot<-2000 +if(ncol(matx)>4)nboot<-5000 +} +# +# Determine critical value +# +if(ncol(matx)==2){ +if(alpha==.05)crit<-.0125 +if(alpha==.025)crit<-.0060 +if(alpha==.01)crit<-.0015 +} +if(ncol(matx)==3){ +if(alpha==.05)crit<-.007 +if(alpha==.025)crit<-.003 +if(alpha==.01)crit<-.001 +} +if(ncol(matx)==4){ +if(alpha==.05)crit<-.0055 +if(alpha==.025)crit<-.0020 +if(alpha==.01)crit<-.0005 +} +if(ncol(matx)==5){ +if(alpha==.05)crit<-.0044 +if(alpha==.025)crit<-.0016 +if(alpha==.01)crit<-.0005 +} +if(ncol(matx)==6){ +if(alpha==.05)crit<-.0038 +if(alpha==.025)crit<-.0018 +if(alpha==.01)crit<-.0004 +} +if(ncol(matx)==7){ +if(alpha==.05)crit<-.0028 +if(alpha==.025)crit<-.0010 +if(alpha==.01)crit<-.0002 +} +if(ncol(matx)==8){ +if(alpha==.05)crit<-.0026 +if(alpha==.025)crit<-.001 +if(alpha==.01)crit<-.0002 +} +if(ncol(matx)>8){ +# Use an approximation of the critical value +if(alpha==.025)warning("Can't determine a critical value when alpha=.025 and the number of groups exceeds 8.") +nmin<-min(nrow(matx),nrow(maty)) +if(alpha==.05){ +if(nmin<100)wval<-smmcrit(60,ncol(matx)) +if(nmin>=100)wval<-smmcrit(300,ncol(matx)) +wval<-0-wval +crit<-pnorm(wval) +} +if(alpha==.01){ +if(nmin<100)wval<-smmcrit01(60,ncol(matx)) +if(nmin>=100)wval<-smmcrit01(300,ncol(matx)) +wval<-0-wval +crit<-pnorm(wval) +} +} +if(is.na(crit))warning("Critical values can be determined for alpha=.05, .025 and .01 only") +icl<-ceiling(crit*nboot) +icu<-ceiling((1-crit)*nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +bootx<-bootdep(matx,tr=.2,nboot) +booty<-bootdep(maty,tr=.2,nboot) + # + # Now have an nboot by J matrix of bootstrap values. + # +test<-1 +for (j in 1:connum){ +test[j]<-sum(bootx[,j].5)test[j]<-1-test[j] +} +output <- matrix(0, connum, 5) + dimnames(output) <- list(NULL, c("variable #", "psihat", "p.value", + "ci.lower", "ci.upper")) + tmeanx <- apply(matx, 2, mean, trim = .2) + tmeany <- apply(maty, 2, mean, trim = .2) + psi <- 1 + for(ic in 1:connum) { + output[ic, 2] <- tmeanx[ic]-tmeany[ic] + output[ic, 1] <- ic + output[ic, 3] <- test[ic] + temp <- sort(bootx[,ic]-booty[,ic]) +#print(length(temp)) + output[ic, 4] <- temp[icl] + output[ic, 5] <- temp[icu] + } + list(output = output, crit.p.value = crit) +} + + +qhatds1<-function(isubx,x,y){ +# +# function used by qhat when working on bootstrap estimates. +# +xx<-x[isubx] +yy<-y[isubx] +group<-disker(xx,yy,x,op=2)$zhat +group +} +qhatd<-function(x,y,nboot=50){ +# +# Estimate Q, a nonparametric measure of effect size, using +# the .632 method of estimating prediction error. +# (See Efron and Tibshirani, 1993, pp. 252--254) +# +# The default number of bootstrap samples is nboot=50 +# +# This function is for dependent groups. For independent groups, use +# qhati +# +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot) +# data is an nboot by n matrix containing subscripts for bootstrap sample +bid<-apply(data,1,idb,length(x)) +# bid is a n by nboot matrix. If the jth bootstrap sample from +# 1, ..., n contains the value i, bid[i,j]=0; otherwise bid[i,j]=1 +yhat<-apply(data,1,qhatds1,x,y) +bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253 +temp<-(bid*yhat) +diff<-apply(temp,1,sum) +temp<-diff/bi +ep0<-sum(temp[!is.na(temp)])/length(y) +aperror<-disker(x,y)$phat # apparent error +regpre<-.368*aperror+.632*ep0 +list(app.error=aperror,qhat.632=regpre) +} + + +winmean<-function(x,tr=.2,na.rm=TRUE){ +if(na.rm)x=elimna(x) +winmean<-mean(winval(x,tr)) +winmean +} + + +kerden<-function(x,q=.5,xval=0){ +# Compute the kernel density estimator of the +# probability density function evaluated at the qth quantile. +# +# x contains vector of observations +# q is the quantile of interest, the default is the median. +# If you want to evaluate f hat at xval rather than at the +# q th quantile, set q=0 and xval to desired value. +# +y<-sort(x) +n<-length(x) +temp<-idealf(x) +h<-1.2*(temp$qu-temp$ql)/n^(.2) +iq<-floor(q*n+.5) +qhat<-y[iq] +if (q==0) qhat<-xval +xph<-qhat+h +A<-length(y[y<=xph]) +xmh<-qhat-h +B<-length(y[y0 & !is.na(l)])+length(u[u<0 & !is.na(u)]) +qhat<-c(1:length(x))/length(x) +m<-matrix(c(qhat,l,u),length(x),3) +dimnames(m)<-list(NULL,c('qhat','lower','upper')) +if(plotit){ +temp2 <- m[, 2] +temp2 <- temp2[!is.na(temp2)] +xsort<-sort(x) +ysort<-sort(y) +del<-0 +for (i in 1:length(x)){ +ival<-round(length(y)*i/length(x)) +if(ival<=0)ival<-1 +if(ival>length(y))ival<-length(y) +del[i]<-ysort[ival]-xsort[i] +} +xaxis<-c(xsort,xsort,xsort) +yaxis<-c(del,m[,2],m[,3]) +plot(xaxis,yaxis,type='n',ylab='delta',xlab='x (first group)') +lines(xsort,del) +lines(xsort,m[,2],lty=2) +lines(xsort,m[,3],lty=2) +temp <- summary(x) + text(temp[3], min(temp2), '+') + text(temp[2], min(temp2), 'o') + text(temp[5], min(temp2), 'o') +} +list(m=m,crit=crit,numsig=num,prob.coverage=1-kswsig(n1,n2,crit)) +} + + +runcor<-function(x,y,z,fr=1,corflag=FALSE,corfun=pbcor,plotit=TRUE,rhat=FALSE){ +# +# Estimate how the correlation between x and y varies with z +# +# running correlation using interval method +# +# fr controls amount of smoothing +# +# corfun is the correlation to be used. It is assumed that +# corfun is an R function that returns a correlation coefficient +# in corfun$cor +# +# To use Pearsons correlation, set corflag=T +# +temp<-cbind(x,y,z) # Eliminate any rows with missing values +temp<-elimna(temp) +x<-temp[,1] +y<-temp[,2] +z<-temp[,3] +plotit<-as.logical(plotit) +rmd<-NA +if(!corflag){ +for(i in 1:length(x)){ +flag<-near(z,z[i],fr) +if(sum(flag)>2)rmd[i]<-corfun(x[flag],y[flag])$cor +}} +if(corflag){ +for(i in 1:length(x)){ +flag<-near(z,z[i],fr) +if(sum(flag)>2)rmd[i]<-cor(x[flag],y[flag]) +}} +if(plotit){ +plot(c(max(z),min(z),z),c(1,-1,rmd),xlab="Modifier",ylab="Correlation",type="n") +sz<-sort(z) +zorder<-order(z) +sysm<-rmd[zorder] +lines(sz,sysm) +} +if(!rhat)rmd<-"Done" +rmd +} + + +pcorb<-function(x,y,SEED=TRUE){ +# Compute a .95 confidence interval for Pearson's correlation coefficient. +# +# This function uses an adjusted percentile bootstrap method that +# gives good results when the error term is heteroscedastic. +# +nboot<-599 #Number of bootstrap samples +xy<-elimna(cbind(x,y)) +x<-xy[,1] +y<-xy[,2] +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples; please wait") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,pcorbsub,x,y) # A 1 by nboot matrix. +ilow<-15 +ihi<-584 +if(length(y) < 250){ +ilow<-14 +ihi<-585 +} +if(length(y) < 180){ +ilow<-11 +ihi<-588 +} +if(length(y) < 80){ +ilow<-8 +ihi<-592 +} +if(length(y) < 40){ +ilow<-7 +ihi<-593 +} +bsort<-sort(bvec) +r<-cor(x,y) +ci<-c(bsort[ilow],bsort[ihi]) +list(r=r,ci=ci) +} + + +twobici<-function(r1=sum(elimna(x)),n1=length(elimna(x)),r2=sum(elimna(y)),n2=length(elimna(y)), +x=NA,y=NA,alpha=.05){ +# +# Compute confidence interval for p1-p2, +# the difference between probabilities of +# success for a two binomials using Beal's method. +# +# r is number of successes +# n is sample size +# if x contains data, r1 is taken to be the +# number of 1s in x and n1 is length(x) +# +if(length(r1)>1)stop("r1 must be a single number, not a vector") +if(length(n1)>1)stop("n1 must be a single number, not a vector") +if(length(r2)>1)stop("r2 must be a single number, not a vector") +if(!is.na(sum(r1)) || !is.na(sum(n1)) || !is.na(sum(r2)) || !is.na(sum(n2))){ +if(r1<0 || n1<0)stop("Both r1 and n1 must be greater than 0") +if(r1 > n1)stop("r1 can't be greater than n1") +if(r2<0 || n2<0)stop("Both r2 and n2 must be greater than 0") +if(r2 > n2)stop("r2 can't be greater than n2") +} +if(!is.na(sum(x))){ +r1<-sum(x) +n1<-length(x) +} +if(!is.na(sum(y))){ +r2<-sum(y) +n2<-length(y) +} +a<-(r1/n1)+(r2/n2) +b<-(r1/n1)-(r2/n2) +u<-.25*((1/n1)+(1/n2)) +v<-.25*((1/n1)-(1/n2)) +V<-u*((2-a)*a-b^2)+2*v*(1-a)*b +crit<-qchisq(1-alpha/2,1) +A<-sqrt(crit*(V+crit*u^2*(2-a)*a+crit*v^2*(1-a)^2)) +B<-(b+crit*v*(1-a))/(1+crit*u) +ci<-NA +ci[1]<-B-A/(1+crit*u) +ci[2]<-B+A/(1+crit*u) +p1<-r1/n1 +p2<-r2/n2 +list(ci=ci,p1=p1,p2=p2) +} + +runmean<-function(x,y,fr=1,tr=.2,pyhat=FALSE,eout=FALSE,outfun=out,plotit=TRUE,xout=FALSE, +xlab="x",ylab="y"){ +# +# running mean using interval method +# +# fr controls amount of smoothing +# tr is the amount of trimming +# +# Missing values are automatically removed. +# +if(eout && xout)xout<-FALSE +temp<-cbind(x,y) +temp<-elimna(temp) # Eliminate any rows with missing values +if(eout){ +flag<-outfun(temp,plotit=FALSE)$keep +temp<-temp[flag,] +} +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +temp<-temp[flag,] +} +x<-temp[,1] +y<-temp[,2] +pyhat<-as.logical(pyhat) +rmd<-c(1:length(x)) +for(i in 1:length(x))rmd[i]<-mean(y[near(x,x[i],fr)],tr) +if(pyhat)return(rmd) +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab) +sx<-sort(x) +xorder<-order(x) +sysm<-rmd[xorder] +tempx<-(!duplicated(sx)) +lines(sx[tempx], sysm[tempx]) +}} + +pcorbsub<-function(isub, x, y) +{ + # + # Compute Pearson's correlation using x[isub] and y[isub] + # isub is a vector of length n, + # a bootstrap sample from the sequence of integers + # 1, 2, 3, ..., n + # + pcorbsub<-cor(x[isub],y[isub]) + pcorbsub +} + +pow1<-function(n,Del,alpha){ +# +# Determine power of Student's T in the +# one-sided, one-sample case where +# +# n=sample size +# Del=(mu0-mu1)/sigma +# alpha=Type I error probability +# mu0 is hypothesized value +# mu1 is some non-null value for the mean. +# +Del<-abs(Del) +if(alpha<=0 || alpha>=1)stop("alpha must be between 0 and 1") +K11<-1-alpha +K5<-sqrt(n)*Del +# Next, use the Kraemer-Paik (1979, Technometrics, 21, 357-360) +# approximation of the noncentral T. +K6<-n-1 +K14<-qt(K11,K6) +K7<-K14*sqrt(1+K5*K5/K6) +K8<-K5*sqrt(1+K14*K14/K6) +K9<-K7-K8 +pow1<-1-pt(K9,K6) +pow1 +} + +stein1<-function(x,del,alpha=.05,pow=.8,oneside=FALSE,n=NULL,VAR=NULL){ +# +# Performs Stein's method on the data in x. +# In the event additional observations are required +# and can be obtained, use the R function stein2. +# +del<-abs(del) +if(is.null(n))n<-length(x) +if(is.null(VAR))VAR=var(x) +df<-n-1 +if(!oneside)alpha<-alpha/2 +d<-(del/(qt(pow,df)-qt(alpha,df)))^2 +N<-max(c(n,floor(VAR/d)+1)) +N +} + +stein2<-function(x1,x2,mu0=0,alpha=.05){ +# +# Do second stage of Stein's method +# x1 contains first stage data +# x2 contains first stage data +# mu0 is the hypothesized value +# +n<-length(x1) +df<-n-1 +N<-n+length(x2) +test<-sqrt(N)*(mean(c(x1,x2))-mu0)/sqrt(var(x1)) +crit <- qt(1 - alpha/2, df) +low<- mean(c(x1,x2))-crit*sqrt(var(x1)) +up<- mean(c(x1,x2))+crit*sqrt(var(x1)) +sig<-2*(1-pt(test,df)) +list(ci = c(low, up), siglevel =sig,mean=mean(c(x1,x2)), +teststat = test, crit = crit, df = df) +} + + +ci2bin<-function(r1=sum(x),n1=length(x),r2=sum(y),n2=length(y),x=NA,y=NA,alpha=0.05){ +# +# Compute a confidence interval for the +# difference between probability of success +# for two independent binomials +# +# r1=number of successes in group 1 +# n1=number of observations in group 1 +# +cr<-qchisq(1-alpha,1) +p1<-r1/n1 +p2<-r2/n2 +a<-p1+p2 +b<-p1-p2 +u<-.25*(1/n1+1/n2) +v<-.25*(1/n1-1/n2) +V<-u*((2-a)*a-b^2)+2*v*(1-a)*b +A<-sqrt(cr*(V+cr*u^2*(2-a)*a+cr*v^2*(1-a)^2)) +B<-(b+cr*v*(1-a))/(1+cr*u) +ci<-NA +ci[1]<-B-A/(1+cr*u) +ci[2]<-B+A/(1+cr*u) +list(ci=ci) +} +powt1est<-function(x,delta=0,ci=FALSE,nboot=800){ +# +# Estimate power for a given value of delta +# +# Only 20% trimming is allowed. +# +temp1<-powest(x,rep(0,5),delta,se=trimse(x)) +if(ci){ +set.seed(2) +pboot<-NA +datay<-rep(0,5) +print("Taking bootstrap samples. Please wait.") +datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE + ), nrow = nboot) +for(i in 1:nboot) { +se <- trimse(datax[i, ]) +pboot[i] <- powest(x, rep(0,5), delta, se) +} +temp <- sort(pboot) +} +ll<-floor(0.05 * nboot + 0.5) +list(est.power=temp1,ci=temp[ll]) +} + +powt1an<-function(x,ci=FALSE,plotit=TRUE,nboot=800){ +# +# Do a power analysis for the one-sample case with 20% trimmed +# mean and when the percentile bootstrap is to be used to test +# hypoltheses. +# +x<-x[!is.na(x)] +lp<-NA +se<-trimse(x) +gval<-NA +dv<-seq(0,3.5*se,length=15) +for(i in 1:length(dv)){ +gval[i]<-powest(x,rep(0,5),dv[i],se) +} +if(!ci){ +if(plotit){ +plot(dv,gval,type="n",xlab="delta",ylab="power") +lines(dv,gval) +}} +if(ci){ +set.seed(2) +print("Taking bootstrap samples. Please wait.") +datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE), + nrow = nboot) +pboot<-matrix(NA,nrow=nboot,ncol=length(dv)) +for(i in 1:nboot){ +se<-trimse(datax[i,]) +for(j in 1:length(dv)){ +pboot[i,j]<-powest(x,rep(0,5),dv[j],se) +}} +ll<-floor(.05*nboot+.5) +for(i in 1:15){ +temp<-sort(pboot[,i]) +lp[i]<-temp[ll] +} +plot(c(dv,dv),c(gval,lp),type="n",xlab="delta",ylab="power") +lines(dv,gval) +lines(dv,lp,lty=2) +} +list(delta=dv,power=gval,lowp=lp) +} + +trimpb2<-function(x,y,tr=.2,alpha=.05,nboot=2000,WIN=FALSE,win=.1,plotit=FALSE,op=4, +SEED=TRUE){ +# +# Compute a 1-alpha confidence interval for +# the difference between two 20% trimmed means. +# Independent groups are assumed. +# +# The default number of bootstrap samples is nboot=2000 +# +# tr is the amount of trimming +# +# win is the amount of Winsorizing before bootstrapping +# when WIN=T. +# +# Missing values are automatically removed. +# +x<-x[!is.na(x)] +y<-y[!is.na(y)] +if(WIN){ +if(win>tr)stop("Cannot Winsorize more than you trim") +if(tr < .2){print("When Winsorizing, the amount of trimming") +print("should be at least .2") +} +if(min(c(length(x),length(y))) < 15){ +print ("Warning: Winsorizing with sample sizes less than 15") +print("can result in poor control over the probability of a Type I error") +} +x<-winval(x,win) +y<-winval(y,win) +} +xx<-list() +xx[[1]]<-x +xx[[2]]<-y +e1=mean(xx[[1]],tr=tr) +e2=mean(xx[[2]],tr=tr) +#est.dif<-tmean(xx[[1]],tr=tr)-tmean(xx[[2]],tr=tr) +est.dif=e1-e2 +crit<-alpha/2 +temp<-round(crit*nboot) +icl<-temp+1 +icu<-nboot-temp +bvec<-matrix(NA,nrow=2,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(j in 1:2){ +data<-matrix(sample(xx[[j]],size=length(xx[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group +} +top<-bvec[1,]-bvec[2,] +test<-sum(top<0)/nboot+.5*sum(top==0)/nboot +if(test > .5)test<-1-test +top<-sort(top) +ci<-NA +ci[1]<-top[icl] +ci[2]<-top[icu] +if(plotit)g2plot(bvec[1,],bvec[2,],op=op) +list(Est1=e1,Est2=e2,p.value=2*test,ci=ci,est.dif=est.dif) +} + +twolsreg<-function(x1,y1,x2,y2){ +# +# Compute a .95 confidence interval for +# the difference between two regression slopes, +# estimated via least squares and +# corresponding to two independent groups. +# +# This function uses an adjusted percentile bootstrap method that +# gives good results when the error term is heteroscedastic. +# +# WARNING: If the number of boostrap samples is altered, it is +# unknown how to adjust the confidence interval when n1+n2 < 250. +# +nboot<-599 #Number of bootstrap samples +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples; please wait") +xy=elimna(cbind(x1,y1)) +if(ncol(xy)>2)stop("This function only allows one covariate") +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +bvec1<-apply(data1,1,twolsregsub,x1,y1) # A 1 by nboot matrix. +data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) +bvec2<-apply(data2,1,twolsregsub,x2,y2) # A 1 by nboot matrix. +bvec<-bvec1-bvec2 +ilow<-15 +ihi<-584 +if(length(y1)+length(y2) < 250){ +ilow<-14 +ihi<-585 +} +if(length(y1)+length(y2) < 180){ +ilow<-11 +ihi<-588 +} +if(length(y1)+length(y2) < 80){ +ilow<-8 +ihi<-592 +} +if(length(y1)+length(y2) < 40){ +ilow<-7 +ihi<-593 +} +bsort<-sort(bvec) +b1<-lsfit(x1,y1)$coef[2] +b2<-lsfit(x2,y2)$coef[2] +ci<-c(bsort[ilow],bsort[ihi]) +list(b1=b1,b2=b2,ci=ci) +} + +twolsregsub<-function(isub, x, y) +{ + # + # Compute least squares estimate of the + # slope using x[isub] and y[isub] + # isub is a vector of length n, + # a bootstrap sample from the sequence of integers + # 1, 2, 3, ..., n + # + twolsregsub<-lsfit(x[isub],y[isub])$coef[2] + twolsregsub +} +bdanova1<-function(x,alpha=.05,power=.9,delta=NA){ +# +# Do the first stage of a Bishop-Dudewicz ANOVA method. +# That is, based on the data in x +# determine N_j, the number of observations needed +# in the jth group to achieve power 1-beta. +# +# The argument x is assumed to have list mode or the +# data is assumed to be stored in an n by J matrix +# +if(is.na(delta))stop("A value for delta was not specified") +if(!is.list(x)){ +if(!is.matrix(x))stop("Data must be stored in matrix or in list mode") +} +y<-x +if(is.list(y))y=matl(y) +x<-list() +for(j in 1:ncol(y))x[[j]]<-elimna(y[,j]) +nvec<-NA +svec<-NA +J<-length(x) +for(j in 1:length(x)){ +nvec[j]<-length(x[[j]]) +svec[j]<-var(x[[j]]) +} +nu<-nvec-1 +nu1<-sum(1/(nu-2)) +nu1<-J/nu1+2 +A<-(J-1)*nu1/(nu1-2) +B<-(nu1^2/J)*(J-1)/(nu1-2) +C<-3*(J-1)/(nu1-4) +D<-(J^2-2*J+3)/(nu1-2) +E<-B*(C+D) +M<-(4*E-2*A^2)/(E-A^2-2*A) +L<-A*(M-2)/M +f<-qf(1-alpha,L,M) +crit<-L*f +b<-(nu1-2)*crit/nu1 +zz<-qnorm(power) +A<-.5*(sqrt(2)*zz+sqrt(2*zz^2+4*(2*b-J+2))) +B<-A^2-b +d<-((nu1-2)/nu1)*delta/B +N<-NA +for(j in 1:length(x)){ +N[j]<-max(c(nvec[j]+1,floor(svec[j]/d)+1)) +} +list(N=N,d=d,crit=crit) +} + + +comvar2<-function(x,y,nboot=1000,SEED=TRUE){ +# +# Compare the variances of two independent groups. +# +x<-x[!is.na(x)] # Remove missing values in x +y<-y[!is.na(y)] # Remove missing values in y +# set seed of random number generator so that +# results can be duplicated. +est1=var(x) +est2=var(y) +sig<-est1-est2 +if(SEED)set.seed(2) +nmin<-min(length(x),length(y)) +datax<-matrix(sample(x,size=nmin*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=nmin*nboot,replace=TRUE),nrow=nboot) +v1<-apply(datax,1,FUN=var) +v2<-apply(datay,1,FUN=var) +boot<-v1-v2 +boot<-sort(boot) + ilow <- 15 + ihi <- 584 + if(nmin < 250) { + ilow <- 13 + ihi <- 586 + } + if(nmin < 180) { + ilow <- 10 + ihi <- 589 + } + if(nmin < 80) { + ilow <- 7 + ihi <- 592 + } + if(nmin < 40) { + ilow <- 6 + ihi <- 593 + } +ilow<-round((ilow/599)*nboot) +ihi<-round((ihi/599)*nboot) +ci<-c(boot[ilow+1],boot[ihi]) +list(n=c(length(x),length(y)),ci=ci,est.1=est1,est.2=est2,vardif=sig,ratio=est1/est2) +} + + +regi<-function(x,y,z,pt=median(z),fr=.8,est=onestep,regfun=tsreg,testit=FALSE,...){ +# +# split the data according to whether z is < or > pt, then +# use runmean2g to plot a smooth of the regression +# lines corresponding to these two groups. +# +m<-cbind(x,y,z) +m<-elimna(m) +x<-m[,1] +y<-m[,2] +z<-m[,3] +flag<-(z=nmin]) +isub[5]<-max(sub[vecn>=nmin]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,3) +dimnames(mat)<-list(NULL,c("X","n1","n2")) +for (i in 1:5){ +j<-i+5 +temp1<-y1[near(x1,x1[isub[i]],fr1)] +temp2<-y2[near(x2,x1[isub[i]],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(temp1) +mat[i,3]<-length(temp2) +gv1[[i]]<-temp1 +gv1[[j]]<-temp2 +} +I1<-diag(npt) +I2<-0-I1 +con<-rbind(I1,I2) +if(flag.est)test<-pbmcp(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...) +if(!flag.est)test<-linconpb(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...) +} +# +if(!is.na(pts[1])){ +npt<-length(pts) +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),3) +dimnames(mat)<-list(NULL,c("X","n1","n2")) +gv<-vector("list",2*length(pts)) +for (i in 1:length(pts)){ +j<-i+npt +temp1<-y1[near(x1,pts[i],fr1)] +temp2<-y2[near(x2,pts[i],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +mat[i,1]<-pts[i] +if(length(temp1)<=5)paste("Warning, there are",length(temp1)," points corresponding to the design point X=",pts[i]) +if(length(temp2)<=5)paste("Warning, there are",length(temp2)," points corresponding to the design point X=",pts[i]) +mat[i,2]<-length(temp1) +mat[i,3]<-length(temp2) +gv1[[i]]<-temp1 +gv1[[j]]<-temp2 +} +I1<-diag(npt) +I2<-0-I1 +con<-rbind(I1,I2) +if(flag.est)test<-pbmcp(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...) +if(!flag.est)test<-linconpb(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...) +} +if(plotit){ +runmean2g(x1,y1,x2,y2,fr=fr1,est=est,LP=LP,xlab=xlab,ylab=ylab,pch1=pch1,pch2=pch2,...) +} +list(mat=mat,output=test$output,con=test$con,num.sig=test$num.sig) +} + +ancboot<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,nboot=599,pts=NA,plotit=TRUE,xout=FALSE,outfun=outpro,...){ +# +# Compare two independent groups using the ancova method +# in chapter 12 of Wilcox, 2017, Intro to Robust Estimation and Hypothesis Testing. +# No assumption is made about the form of the regression +# lines--a running interval smoother is used. +# Confidence intervals are computed using a bootstrap-t bootstrap +# method. Comparisons are made at five empirically chosen design points. +# +# Assume data are in x1 y1 x2 and y2 +# +if(is.na(pts[1])){ +isub<-c(1:5) # Initialize isub +test<-c(1:5) +m1=elimna(cbind(x1,y1)) +x1=m1[,1] +y1=m1[,2] +m1=elimna(cbind(x2,y2)) +x2=m1[,1] +y2=m1[,2] +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,8) +dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","ci.low","ci.hi", +"p.value")) +gv1<-vector("list") +for (i in 1:5){ +j<-i+5 +temp1<-y1[near(x1,x1[isub[i]],fr1)] +temp2<-y2[near(x2,x1[isub[i]],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +mat[i,2]<-length(temp1) +mat[i,3]<-length(temp2) +gv1[[i]]<-temp1 +gv1[[j]]<-temp2 +} +I1<-diag(5) +I2<-0-I1 +con<-rbind(I1,I2) +test<-linconb(gv1,con=con,tr=tr,nboot=nboot) +for(i in 1:5){ +mat[i,1]<-x1[isub[i]] +} +mat[,4]<-test$psihat[,2] +mat[,5]<-test$test[,2] +mat[,6]<-test$psihat[,3] +mat[,7]<-test$psihat[,4] +mat[,8]<-test$test[,4] +} +if(!is.na(pts[1])){ +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +if(n1[i]<=5)paste("Warning, there are",n1[i]," points corresponding to the design point X=",pts[i]) +if(n2[i]<=5)paste("Warning, there are",n2[i]," points corresponding to the design point X=",pts[i]) +} +mat<-matrix(NA,length(pts),9) +dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi", +"p.value")) +gv<-vector("list",2*length(pts)) +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +j<-i+length(pts) +gv[[i]]<-g1 +gv[[j]]<-g2 +} +I1<-diag(length(pts)) +I2<-0-I1 +con<-rbind(I1,I2) +test<-linconb(gv,con=con,tr=tr,nboot=nboot) +mat[,1]<-pts +mat[,2]<-n1 +mat[,3]<-n2 +mat[,4]<-test$psihat[,2] +mat[,5]<-test$test[,2] +mat[,6]<-test$test[,3] +mat[,7]<-test$psihat[,3] +mat[,8]<-test$psihat[,4] +mat[,9]<-test$test[,4] +} +if(plotit){ +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +runmean2g(x1,y1,x2,y2,fr=fr1,est=mean,tr=tr) +} +list(output=mat,crit=test$crit) +} + +spear<-function(x,y=NULL){ +# Compute Spearman's rho +# +if(!is.null(y[1])){ +m=elimna(cbind(x,y)) +n=nrow(m) +x=m[,1] +y=m[,2] +corv<-cor(rank(x),rank(y)) +} +if(is.null(y[1])){ +x=elimna(x) +n=nrow(x) +m<-apply(x,2,rank) +corv<-cor(m) +} +test <-corv * sqrt((n - 2)/(1. - corv^2)) +sig <- 2 * (1 - pt(abs(test), length(x) - 2)) +if(is.null(y[1]))sig<-matrix(sig,ncol=sqrt(length(sig))) +list(cor=corv,p.value = sig) +} + + +linchk<-function(x,y,sp,pv=1,regfun=tsreg,plotit=TRUE,nboot=599,alpha=.05,pr=TRUE,xout=FALSE){ +# +# Split the data into two groups according to whether +# predictor variable pv has a value less than sp. +# Then test the hypothesis that slope coefficients, +# based on the regression method regfun, are equal. +# +x<-as.matrix(x) +if(pr)print(paste("Splitting data using predictor", pv)) +xx<-x[,pv] +flag<-(xx<=sp) +temp<-reg2ci(x[flag,],y[flag],x[!flag,],y[!flag],regfun=regfun,plotit=plotit,nboot=nboot,alpha=alpha,xout=xout) +temp +} + +trimci<-function(x,tr=.2,alpha=.05,null.value=0,pr=TRUE,nullval=NULL){ +# +# Compute a 1-alpha confidence interval for the trimmed mean +# +# The default amount of trimming is tr=.2 +# +if(pr){ +print("The p-value returned by this function is based on the") +print("null value specified by the argument null.value, which defaults to 0") +print('To get a measure of effect size using a Winsorized measure of scale, use trimciv2') +} +if(!is.null(nullval))null.value=nullval +x<-elimna(x) +se<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x))) +trimci<-vector(mode="numeric",length=2) +df<-length(x)-2*floor(tr*length(x))-1 +trimci[1]<-mean(x,tr)-qt(1-alpha/2,df)*se +trimci[2]<-mean(x,tr)+qt(1-alpha/2,df)*se +test<-(mean(x,tr)-null.value)/se +sig<-2*(1-pt(abs(test),df)) +list(estimate=mean(x,tr),ci=trimci,test.stat=test,se=se,p.value=sig,n=length(x)) +} + +trimciv2<-function(x,tr=.2,alpha=.05,null.value=0,pr=TRUE){ +# +# Compute a 1-alpha confidence interval for the trimmed mean +# Same as trimci, only a standardized measure of effect size is reported: +# the difference between the trimmed mean and hypothesized value divided by +# the Winsorized standard deviation, rescaled to estimate the standard deviation +# when sampling from a normal distribution. +# +# The default amount of trimming is tr=.2 +# +library(MASS) +x<-elimna(x) +se<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x))) +trimci<-vector(mode="numeric",length=2) +df<-length(x)-2*floor(tr*length(x))-1 +trimci[1]<-mean(x,tr)-qt(1-alpha/2,df)*se +trimci[2]<-mean(x,tr)+qt(1-alpha/2,df)*se +test<-(mean(x,tr)-null.value)/se +sig<-2*(1-pt(abs(test),df)) +if(tr==0)term=1 +if(tr>0)term=sqrt(area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr) +epow=(mean(x,tr)-null.value)*term/sqrt(winvar(x,tr=tr,na.rm=TRUE)) +list(ci=trimci,estimate=mean(x,tr),test.stat=test,se=se,p.value=sig,n=length(x),Effect.Size=epow) +} + +trimciQS<-function(x,tr=.2,alpha=.05,null.value=0,pr=TRUE,nullval=NULL){ +# +# Compute a 1-alpha confidence interval for the trimmed mean +# Same as trimci plus quantile shift measure of effect size. +# +# The default amount of trimming is tr=.2 +# +if(pr){ +print("The p-value returned by this function is based on the") +print("null value specified by the argument null.value, which defaults to 0") +print('To get a measure of effect size using a Winsorized measure of scale, use trimciv2') +} +if(!is.null(nullval))null.value=nullval +x<-elimna(x) +se<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x))) +trimci<-vector(mode="numeric",length=2) +df<-length(x)-2*floor(tr*length(x))-1 +trimci[1]<-mean(x,tr)-qt(1-alpha/2,df)*se +trimci[2]<-mean(x,tr)+qt(1-alpha/2,df)*se +test<-(mean(x,tr)-null.value)/se +sig<-2*(1-pt(abs(test),df)) +QS=depQS(x,locfun=tmean,tr=tr)$Q.effect +list(ci=trimci,estimate=mean(x,tr),test.stat=test,se=se,p.value=sig,n=length(x),Q.effect=QS) +} + + +msmed<-function(x,y=NA,con=0,alpha=.05){ +# +# Test a set of linear contrasts using Medians +# +# The data are assumed to be stored in $x$ in a matrix or in list mode. +# Length(x) is assumed to correspond to the total number of groups, J +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# Missing values are automatically removed. +# +if(!is.na(y[1])){ +xx<-list() +xx[[1]]<-x +xx[[2]]<-y +if(is.matrix(x) || is.list(x))stop("When y is speficied, x should not have list mode or be a matrix") +x<-xx +} +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-length(x) +h<-vector("numeric",J) +w<-vector("numeric",J) +xbar<-vector("numeric",J) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +if(sum(duplicated(val)>0)){ +print(paste("Warning: Group",j, "has tied values. Might want to used medpb")) +} +x[[j]]<-val[xx] # Remove missing values +xbar[j]<-median(x[[j]]) +w[j]<-msmedse(x[[j]])^2 # Squared standard error. +} +if(sum(con^2!=0))CC<-ncol(con) +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +psihat<-matrix(0,CC,5) +dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) +test<-matrix(NA,CC,7) +dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","p.value",'p.adjusted')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) +test[jcom,6]<-2*(1-pt(test[jcom,3],999)) +test[jcom,7]=1-psmm(abs(test[jcom,3]),CC,500) +sejk<-sqrt(w[j]+w[k]) +test[jcom,5]<-sejk +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +crit<-NA +if(CC==1)crit<-qnorm(1-alpha/2) +if(CC>1){ +crit=qsmm(1-alpha,CC,500) +} +test[jcom,4]<-crit +psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5] +psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5] +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.") +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),6) +dimnames(test)<-list(NULL,c("con.num","test","crit","se","p.value",'p.adjusted')) +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +test[d,5]<-2*(1-pt(abs(test[d,2]),999)) +test[d,6]=1-psmm(abs(test[d,2]),ncol(con),500) +crit<-NA +if(CC==1)crit<-qnorm(1-alpha/2) +if(CC>1)crit=qsmm(1-alpha,CC,500) +test[d,3]<-crit +test[d,4]<-sejk +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +}} +list(test=test,psihat=psihat) +} +selby<-function(m,grpc,coln){ +# +# +# A commmon situation is to have data stored in an n by p matrix where +# one or more of the columns are group identification numbers. +# This function groups all values in column coln according to the +# group numbers in column grpc and stores the results in list mode. +# +# More than one column of data can sorted +# +# grpc indicates the column of the matrix containing group id number +# +if(is.null(dim(m)))stop("Data must be stored in a matrix or data frame") +if(is.na(grpc[1]))stop("The argument grpc is not specified") +if(is.na(coln[1]))stop("The argument coln is not specified") +if(length(grpc)!=1)stop("The argument grpc must have length 1") +x<-vector("list") +grpn<-sort(unique(m[,grpc])) +it<-0 +for (ig in 1:length(grpn)){ +for (ic in 1:length(coln)){ +it<-it+1 +flag<-(m[,grpc]==grpn[ig]) +x[[it]]<-m[flag,coln[ic]] +}} +list(x=x,grpn=grpn) +} + + +med2way<-function(J,K,x,grp=c(1:p),p=J*K, ADJ.P.VALUE=TRUE, iter=5000,SEED=TRUE){ +# +# Perform a J by K (two-way) anova on medians where +# all jk groups are independent. +# +# The argument x is assumed to contain the raw +# data stored in list mode. +# If grp is unspecified, it is assumed x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second factor: level 1,2 +# x[[j+1]] is the data for level 2,1, etc. +# If the data are in wrong order, grp can be used to rearrange the +# groups. For example, for a two by two design, grp<-c(2,4,3,1) +# indicates that the second group corresponds to level 1,1; +# group 4 corresponds to level 1,2; group 3 is level 2,1; +# and group 1 is level 2,2. +# +# It is assumed that the input variable x has length JK, the total number of +# groups being tested. If not, a warning message is printed. +# +if(L.ties(x))print("There are tied values, suggest using the function m2way instead") +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data are not stored in a matrix or in list mode") +if(p!=length(x)){ +print("Warning: The number of groups in your data is not equal to JK") +} +xbar<-0 +h<-0 +d<-0 +R<-0 +W<-0 +d<-0 +r<-0 +w<-0 +nuhat<-0 +omegahat<-0 +DROW<-0 +DCOL<-0 +xtil<-matrix(0,J,K) +aval<-matrix(0,J,K) +for (j in 1:p){ +xbar[j]<-median(x[[grp[j]]]) +h[j]<-length(x[[grp[j]]]) +d[j]<-msmedse(x[[grp[j]]])^2 +} +d<-matrix(d,J,K,byrow=TRUE) +xbar<-matrix(xbar,J,K,byrow=TRUE) +h<-matrix(h,J,K,byrow=TRUE) +for(j in 1:J){ +R[j]<-sum(xbar[j,]) +nuhat[j]<-(sum(d[j,]))^2/sum(d[j,]^2/(h[j,]-1)) +r[j]<-1/sum(d[j,]) +DROW[j]<-sum(1/d[j,]) +} +for(k in 1:K){ +W[k]<-sum(xbar[,k]) +omegahat[k]<-(sum(d[,k]))^2/sum(d[,k]^2/(h[,k]-1)) +w[k]<-1/sum(d[,k]) +DCOL[k]<-sum(1/d[,k]) +} +D<-1/d +for(j in 1:J){ +for(k in 1:K){ +xtil[j,k]<-sum(D[,k]*xbar[,k]/DCOL[k])+sum(D[j,]*xbar[j,]/DROW[j])- +sum(D*xbar/sum(D)) +aval[j,k]<-(1-D[j,k]*(1/sum(D[j,])+1/sum(D[,k])-1/sum(D)))^2/(h[j,k]-3) +} +} +Rhat<-sum(r*R)/sum(r) +What<-sum(w*W)/sum(w) +Ba<-sum((1-r/sum(r))^2/nuhat) +Bb<-sum((1-w/sum(w))^2/omegahat) +Va<-sum(r*(R-Rhat)^2)/((J-1)*(1+2*(J-2)*Ba/(J^2-1))) +Vb<-sum(w*(W-What)^2)/((K-1)*(1+2*(K-2)*Bb/(K^2-1))) +sig.A<-1-pf(Va,J-1,9999999) +sig.B<-1-pf(Vb,K-1,9999999) +# Next, do test for interactions +Vab<-sum(D*(xbar-xtil)^2) +dfinter<-(J-1)*(K-1) +sig.AB<-1-pchisq(Vab,dfinter) +if(ADJ.P.VALUE){ +a=med2way.crit(J,K,h,iter=iter,SEED=SEED) +sig.A=mean(Va<=a$A.dist) +sig.B=mean(Vb<=a$B.dist) +sig.AB=mean(Vab<=a$AB.dist) +} +list(test.A=Va,p.val.A=sig.A,test.B=Vb,p.val.B=sig.B,test.AB=Vab,p.val.AB=sig.AB) +} + + +med2way.sub<-function(J,K,x,grp=c(1:p),p=J*K){ +# +# Perform a J by K (two-way) anova on medians where +# all jk groups are independent. +# +# The argument x is assumed to contain the raw +# data stored in list mode. +# If grp is unspecified, it is assumed x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second factor: level 1,2 +# x[[j+1]] is the data for level 2,1, etc. +# If the data are in wrong order, grp can be used to rearrange the +# groups. For example, for a two by two design, grp<-c(2,4,3,1) +# indicates that the second group corresponds to level 1,1; +# group 4 corresponds to level 1,2; group 3 is level 2,1; +# and group 1 is level 2,2. +# +# It is assumed that the input variable x has length JK, the total number of +# groups being tested. If not, a warning message is printed. +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data are not stored in a matrix or in list mode') +if(p!=length(x)){ +print('Warning: The number of groups in your data is not equal to JK') +} +xbar<-0 +h<-0 +d<-0 +R<-0 +W<-0 +d<-0 +r<-0 +w<-0 +nuhat<-0 +omegahat<-0 +DROW<-0 +DCOL<-0 +xtil<-matrix(0,J,K) +aval<-matrix(0,J,K) +for (j in 1:p){ +xbar[j]<-median(x[[grp[j]]]) +h[j]<-length(x[[grp[j]]]) +d[j]<-msmedse(x[[grp[j]]])^2 +} +d<-matrix(d,J,K,byrow=TRUE) +xbar<-matrix(xbar,J,K,byrow=TRUE) +h<-matrix(h,J,K,byrow=TRUE) +for(j in 1:J){ +R[j]<-sum(xbar[j,]) +nuhat[j]<-(sum(d[j,]))^2/sum(d[j,]^2/(h[j,]-1)) +r[j]<-1/sum(d[j,]) +DROW[j]<-sum(1/d[j,]) +} +for(k in 1:K){ +W[k]<-sum(xbar[,k]) +omegahat[k]<-(sum(d[,k]))^2/sum(d[,k]^2/(h[,k]-1)) +w[k]<-1/sum(d[,k]) +DCOL[k]<-sum(1/d[,k]) +} +D<-1/d +for(j in 1:J){ +for(k in 1:K){ +xtil[j,k]<-sum(D[,k]*xbar[,k]/DCOL[k])+sum(D[j,]*xbar[j,]/DROW[j])- +sum(D*xbar/sum(D)) +aval[j,k]<-(1-D[j,k]*(1/sum(D[j,])+1/sum(D[,k])-1/sum(D)))^2/(h[j,k]-3) +} +} +Rhat<-sum(r*R)/sum(r) +What<-sum(w*W)/sum(w) +Ba<-sum((1-r/sum(r))^2/nuhat) +Bb<-sum((1-w/sum(w))^2/omegahat) +Va<-sum(r*(R-Rhat)^2)/((J-1)*(1+2*(J-2)*Ba/(J^2-1))) +Vb<-sum(w*(W-What)^2)/((K-1)*(1+2*(K-2)*Bb/(K^2-1))) +sig.A<-1-pf(Va,J-1,9999999) +sig.B<-1-pf(Vb,K-1,9999999) +# Next, do test for interactions +Vab<-sum(D*(xbar-xtil)^2) +dfinter<-(J-1)*(K-1) +sig.AB<-1-pchisq(Vab,dfinter) +list(test.A=Va,p.val.A=sig.A,test.B=Vb,p.val.B=sig.B,test.AB=Vab,p.val.AB=sig.AB) +} + +L.ties<-function(x){ +# +# x is assumed to have list mode +# +# Goal: determine whether there are any tied values +# +a=FALSE +if(is.matrix(x))x=listm(x) +if(!is.list(x))stop('x should be a matrix or have list mode') +x=elimna(x) +J=length(x) +for(j in 1:J){ +u=unique(x[[j]]) +if(length(u)!=length(x[[j]]))a=TRUE +} +a +} + +med2way.crit<-function(J,K,n,iter,SEED=TRUE){ +# +# Estimate the null distribution for med2way +# +x=list() +p=J*K +A.dist=NA +B.dist=NA +AB.dist=NA +for(i in 1:iter){ +for(j in 1:p)x[[j]]=rmul(n[j]) +a=med2way.sub(J,K,x) +A.dist[i]=a$test.A +B.dist[i]=a$test.B +AB.dist[i]=a$test.AB +} +list(A.dist=A.dist,B.dist=B.dist,AB.dist=AB.dist) +} + + + +idealf<-function(x,na.rm=FALSE){ +# +# Compute the ideal fourths for data in x +# +if(na.rm)x<-x[!is.na(x)] +j<-floor(length(x)/4 + 5/12) +y<-sort(x) +g<-(length(x)/4)-j+(5/12) +ql<-(1-g)*y[j]+g*y[j+1] +k<-length(x)-j+1 +qu<-(1-g)*y[k]+g*y[k-1] +list(ql=ql,qu=qu) +} + +lintests1<-function(vstar,yhat,res,mflag,x,regfun,...){ +ystar<-yhat+res*vstar +bres<-regfun(x,ystar,...)$residuals +rval<-0 +for (i in 1:nrow(x)){ +rval[i]<-sum(bres[mflag[,i]]) +} +rval +} + + + +#Note: rdepth in library(mrfDepth) eliminates access to rdepth below and it handles p>1 Ind. Var. + + +rdepth.orig<-function(d, x, y, sortx = TRUE) +{ +########################################################################## +# This function computes the regression depth of a line with coordinates d +# relative to the bivariate data set (x,y). +# The first component of the vector d indicates the intercept of the line, +# the second component is the slope. +# +# Input : d : vector with two components +# x,y : vectors of equal length (data set) +# sortx : logical, to set to F if the data set (x,y) is +# already sorted by its x-coordinates +# +# Reference: +# Rousseeuw, P.J. and Hubert, M. (1996), +# Regression Depth, Technical report, University of Antwerp +# submitted for publication. +########################################################################## + if(!is.vector(x) || !is.vector(y)) stop("x and y should be vectors") + n <- length(x) + if(n < 2) + stop("you need at least two observations") + xy <- cbind(x, y) + b <- d[1] + a <- d[2] + if(sortx) + xy <- xy[order(xy[, 1], xy[, 2]), ] + res <- xy[, 2] - a * xy[, 1] - b + res[abs(res) < 9.9999999999999995e-08] <- 0 + posres <- res >= 0 + negres <- res <= 0 + lplus <- cumsum(posres) + rplus <- lplus[n] - lplus + lmin <- cumsum(negres) + rmin <- lmin[n] - lmin + depth <- pmin(lplus + rmin, rplus + lmin) + min(depth) +} + +permg<-function(x,y,alpha=.05,est=mean,nboot=1000){ +# +# Do a two-sample permutation test based on means or any +# other measure of location or scale indicated by the +# argument est. +# +# The default number of permutations is nboot=1000 +# +x<-x[!is.na(x)] +y<-y[!is.na(y)] +xx<-c(x,y) +dif<-est(x)-est(y) +vec<-c(1:length(xx)) +v1<-length(x)+1 +difb<-NA +temp2<-NA +for(i in 1:nboot){ +data <- sample(xx, size = length(xx), replace = FALSE) +temp1<-est(data[c(1:length(x))]) +temp2<-est(data[c(v1:length(xx))]) +difb[i]<-temp1-temp2 +} +difb<-sort(difb) +icl<-floor((alpha/2)*nboot+.5) +icu<-floor((1-alpha/2)*nboot+.5) +reject<-"no" +if(dif>=difb[icu] || dif <=difb[icl])reject<-"yes" +list(dif=dif,lower=difb[icl],upper=difb[icu],reject=reject) +} + + +pb2gen<-function(x,y,alpha=.05,nboot=2000,est=onestep,SEED=TRUE,pr=FALSE,...){ +# +# Compute a bootstrap confidence interval for the +# the difference between any two parameters corresponding to +# independent groups. +# By default, M-estimators are compared. +# Setting est=mean, for example, will result in a percentile +# bootstrap confidence interval for the difference between means. +# Setting est=onestep will compare M-estimators of location. +# The default number of bootstrap samples is nboot=2000 +# +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvecx<-apply(datax,1,est,...) +bvecy<-apply(datay,1,est,...) +bvec<-sort(bvecx-bvecy) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) +sig.level<-2*(min(temp,1-temp)) +se<-var(bvec) +list(est.1=est(x,...),est.2=est(y,...),est.dif=est(x,...)-est(y,...),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) +} + + + + +tmean<-function(x,tr=.2,na.rm=FALSE,STAND=NULL){ +if(na.rm)x<-x[!is.na(x)] +val<-mean(x,tr) +val +} + +depth<-function(U,V,m){ +# +# Compute the halfspace depth of the point (u,v) for the pairs of points +# in the n by 2 matrix m. +# +X<-m[,1] +Y<-m[,2] +FV<-NA +NUMS<-0 +NUMH<-0 +SDEP<-0.0 +HDEP<-0.0 +N<-length(X) +P<-acos(-1) +P2<-P*2.0 +EPS<-0.000001 +ALPHA<-NA +NT<-0 +for(i in 1:nrow(m)){ + DV<-sqrt(((X[i]-U)*(X[i]-U)+(Y[i]-V)*(Y[i]-V))) + if (DV <= EPS){ + NT<-NT+1 + } + else{ + XU<-(X[i]-U)/DV + YU<-(Y[i]-V)/DV + if (abs(XU) > abs(YU)){ + if (X[i] >= U){ + ALPHA[i-NT]<-asin(YU) + if(ALPHA[i-NT] < 0.0) + ALPHA[i-NT]<-P2+ALPHA[i-NT] + } + else{ + ALPHA[i-NT]<-P-asin(YU) + } + } + else{ + if (Y[i] >= V) + ALPHA[i-NT]<-acos(XU) + else + ALPHA[i-NT]<-P2-acos(XU) + } + if (ALPHA[i-NT] >= P2-EPS) ALPHA[i-NT]<-0.0 + } +} +NN<-N-NT +if(NN<=1){ +NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+ +depths1(NT,3) + if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0) + NUMH<-NUMH+NT + HDEP<-(NUMH+0.0)/(N+0.0) + return(HDEP) +} +ALPHA<-sort(ALPHA[1:NN]) +ANGLE<-ALPHA[1]-ALPHA[NN]+P2 +for(i in 2:NN){ +ANGLE<-max(c(ANGLE,ALPHA[i]-ALPHA[i-1])) + } +if(ANGLE > (P+EPS)){ +NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+ +depths1(NT,3) + if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0) + NUMH<-NUMH+NT + HDEP<-(NUMH+0.0)/(N+0.0) + return(HDEP) + } +ANGLE<-ALPHA[1] +NU<-0 +for (i in 1:NN){ +ALPHA[i]<-ALPHA[i]-ANGLE +if(ALPHA[i]<(P-EPS))NU<-NU+1 + } +if(NU >= NN){ +NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+ +depths1(NT,3) + if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0) + NUMH<-NUMH+NT + HDEP<-(NUMH+0.0)/(N+0.0) + return(HDEP) +} +# +# Mergesort the alpha with their antipodal angles beta, +# and at the same time update I, F(I), and NBAD. +# +JA<-1 +JB<-1 + ALPHK<-ALPHA[1] + BETAK<-ALPHA[NU+1]-P + NN2<-NN*2 + NBAD<-0 + I<-NU + NF<-NN +for(J in 1:NN2){ + ADD<-ALPHK+EPS + if (ADD < BETAK){ + NF<-NF+1 + if(JA < NN){ + JA<-JA+1 + ALPHK<-ALPHA[JA] + } + else + ALPHK<-P2+1.0 + } + else{ + I<-I+1 + NN1<-NN+1 + if(I==NN1){ + I<-1 + NF<-NF-NN + } + FV[I]<-NF + NFI<-NF-I + NBAD<-NBAD+depths1(NFI,2) + if(JB < NN){ + JB<-JB+1 + if(JB+NU <= NN) + BETAK<-ALPHA[JB+NU]-P + else + BETAK<-ALPHA[JB+NU-NN]+P + } + else + BETAK<-P2+1.0 + } +} +NUMS<-depths1(NN,3)-NBAD +# +# Computation of NUMH for halfspace depth. +# + GI<-0 + JA<-1 + ANGLE<-ALPHA[1] + dif<-NN-FV[1] + NUMH<-min(FV[1],dif) +for(I in 2:NN){ + AEPS<-ANGLE+EPS + if(ALPHA[I] <= AEPS){ + JA<-JA+1 + } + else{ + GI<-GI+JA + JA<-1 + ANGLE<-ALPHA[I] + } + KI<-FV[I]-GI + NNKI<-NN-KI + NUMH<-min(c(NUMH,min(c(KI,NNKI)))) + } +NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+ +depths1(NT,3) + if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0) + NUMH<-NUMH+NT + HDEP<-(NUMH+0.0)/(N+0.0) + HDEP +} + +rtdep<-function(pts,m,nsamp=100,SEED=NA){ +# +# Determine Tukey depth by randomly sampling +# p-1 points from m (which has p columns), +# combine this with pt, fit a plane, check +# the residuals, and repeat many times. +# Count how many positive residuals +# there are, say pr, how many negative residuals, nr. +# The approximate depth is min (pr,nr) over all samples. +# +set.seed(2) +if(!is.na(SEED))set.seed(SEED) +if(!is.matrix(m))stop("Second argument is not a matrix") +if(ncol(m)==2)tdep<-depth(pts[1],pts[2],m) +if(ncol(m)>2){ +n<-nrow(m) +pts<-matrix(pts,ncol=ncol(m)) +mold<-m +p<-ncol(m) +pm1<-p-1 +mdup<-matrix(rep(pts,nrow(m)),ncol=ncol(m),byrow=TRUE) +dif<-abs(m-mdup) +chk<-apply(dif,1,sum) +flag<-(chk!=0) +m<-m[flag,] +m<-as.matrix(m) +dmin<-sum(chk==0) +m3<-rbind(m,pts) +tdep<-nrow(m)+1 +for(i in 1:nsamp){ +mat<-sample(nrow(m),pm1,T) +if(p>2)x<-rbind(m[mat,2:p],pts[,2:p]) +y<-c(m[mat,1],pts[1]) +if(prod(eigen(var(x))$values) >10^{-8}){ +#print(prod(eigen(var(x))$values)) +temp<-qr(x) +if(temp$rank[1]==ncol(x)){ +temp<-lsfit(x,y)$coef +m2<-cbind(rep(1,nrow(m3)),m3[,2:p]) +res<-m3[,1]-temp%*%t(m2) +p1<-sum((res>0)) +p2<-sum((res<0)) +tdep<-min(c(tdep,p1,p2)) +if(tdep EPS) + { + NSIN <- NSIN + 1 + foundSingular <- T + if (PRINT) + paste( "ERROR: No Eigenvalue = 0 for sample", NRAN) + next + } + + # ------------------------------------------ + # Need to test for singularity + # ------------------------------------------ + if (Eval[NP-1] <= EPS) + { + NSIN <- NSIN + 1 + } + + # ------------------------------------------ + # Projecting all pints on line through + # theta with direction given by the eigen + # vector of the smallest eigenvalue, i.e., + # the direction orthogonal on the hyperplane + # given by the NP-subset. + # Compute the one-dimensional halfspace depth + # of theta on this line. + # ------------------------------------------ + # in Splus the smallest eigenvalue is the + # last one and corresponding vector is the + # last one, hence Eval[NP] is the smallest + # and Evec[,NP] is the corresponding vector + # ------------------------------------------ + eigenVec <- Evec[,NP] + NT <- sum( ifelse( eigenVec <= EPS, 1, 0 ) ) + KT <- sum( ifelse( eigenVec > EPS, PNT * eigenVec, 0 ) ) + if (NT == NP) + { + NSIN <- NSIN + 1 + foundSingular <- T + if (PRINT) + paste( " ERROR: Eigenvector = 0 for sample", NRAN ) + if (foundSingular) next # Do next Sample + } + K <- X %*% eigenVec + K <- K - KT + NUMH <- sum( ifelse( K > EPS, 1, 0 ) ) + NT <- sum( ifelse( abs(K) <= EPS, 1, 0 ) ) + # ------------------------------------------- + # If all projections collapse with theta, + # return to reduce the dimension + # ------------------------------------------- + if (NT == N) + { + NSIN <- -1 + return( list( NDEP=NDEP, NSIN=NSIN, EVEC=Evec ) ) # Will need +#Eigen Vector matrix to reduce dimension + } + + # ------------------------------------------- + # Update halfspace depth + # ------------------------------------------- + NDEP <- min( NDEP, min( NUMH+NT,N-NUMH ) ) + } + + return( list( NDEP=NDEP, NSIN=NSIN, EVEC=Evec ) ) + } + + #================================================ + Reduce <- function( X, PNT, Evec ) + { + Det <- det(Evec) + if (Det==0) + { + return( list( X=X, PNT=PNT, DET=Det ) ) + } + NP <- ncol(X) + + # --------------------------------------- + # Compute (NP-1)-dimentional coordinates + # for all points and theta + # --------------------------------------- + RedEvec <- matrix(Evec[,1:(NP-1)],nrow=NP,ncol=(NP-1)) # Reducing + # dimension by removing the last dimension with 0 variance. + PNT <- PNT %*% RedEvec + X <- X %*% RedEvec + if (!is.matrix(X)) X <- matrix(X,ncol=(NP-1)) + return( list( X=X, PNT=PNT, DET=Det ) ) + } + +# +# PROGRAM BEGINS +# + if (!is.na(SEED)) set.seed( SEED ) + # --------------------------------------- + # Initialize Number of singular samples + # --------------------------------------- + Nsin <- 0 + + X <- as.matrix( X ) + N <- nrow( X ) + NP <- ncol( X ) + +if (length(PNT) != NP){print("Length of 'PNT' has to equal to") +stop("number of columns in X !!! " ) +} + + # --------------------------------------- + # Handle special case where N=1 + # --------------------------------------- + if (N==1) + { + NDEP <- ifelse( abs(X[1,]-PNT) > EPS, 0, 1 ) # if any dimension +# different from point PNT, NDEP=0, else = 1 + NDEP <- min( NDEP ) + DEPTH <- NDEP/ N + return( DEPTH ) + } + + # --------------------------------------- + # Handle special case where NP=1 + # --------------------------------------- + repeat #+++++++++++++++++++++++++++++++++ + { + # In this case depth is equal to number of points <= to T + if (NP==1) + { + MORE <- sum( ifelse( X[,1] >= (PNT-EPS), 1, 0 ) ) + LESS <- sum( ifelse( X[,1] <= (PNT+EPS), 1, 0 ) ) + NDEP <- min( LESS, MORE ) + DEPTH <- NDEP / N + return( DEPTH ) + } + + # --------------------------------------- + # General Case, call function DEP + # --------------------------------------- + if (N > NP) + { + RES <- DEP( X=X, PNT=PNT, NDIR=NDIR, EPS=EPS, PRINT=PRINT ) + NDEP <- RES$NDEP + NSIN <- RES$NSIN + EVEC <- RES$EVEC + } + else + { + NSIN <- -1 # Needs to reduce dimensions + EVEC <- eigen( var( X ) )[[2]] # Getting eigenvector + } + + # --------------------------------------- + # If all points and theta are identified + # as lying on the same hyperplane, reduce + # the dimension of the data set by projection + # on that hyperplane, and compute the depth + # on the reduced data set + # --------------------------------------- + if (NSIN == -1) + { + NSIN <- 0 + if (PRINT) print( " Direction with zero variance detected" ) + RED <- Reduce( X=X, PNT=PNT, Evec=EVEC ) + X <- RED$X + PNT <- RED$PNT + Det <- RED$DET + if (Det==0) + { +print("\n\n\t DIMENSION REDUCTION TERMINATED\n\t EIGENVECTORS ARE NOT") +stop("INDEPENDENT\n\n" ) + } + NP <- ncol(X) + if (PRINT) paste(" Dimension reduced to", NP ) + } + else + { + break # No need to reduce dimension of X and hence no need to +#return, breaks 'repeat' loop + } + } # End repeat+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + DEPTH <- NDEP / N + return( DEPTH ) +} + + + +depths1<-function(m,j){ +if(m < j)depths1<-0 +else{ +if(j==1)depths1<-m +if(j==2)depths1<-(m*(m-1))/2 +if(j==3)depths1<-(m*(m-1)*(m-2))/6 +} +depths1 +} + +outbox<-function(x,mbox=FALSE,gval=NA,plotit=FALSE,STAND=FALSE){ +# +# This function detects outliers using the +# boxplot rule, but unlike the R function boxplot, +# the ideal fourths are used to estimate the quartiles. +# +# Setting mbox=TRUE results in using the modification +# of the boxplot rule suggested by Carling (2000). +# +x<-x[!is.na(x)] # Remove missing values +if(plotit)boxplot(x) +n<-length(x) +temp<-idealf(x) +if(mbox){ +if(is.na(gval))gval<-(17.63*n-23.64)/(7.74*n-3.71) +cl<-median(x)-gval*(temp$qu-temp$ql) +cu<-median(x)+gval*(temp$qu-temp$ql) +} +if(!mbox){ +if(is.na(gval))gval<-1.5 +cl<-temp$ql-gval*(temp$qu-temp$ql) +cu<-temp$qu+gval*(temp$qu-temp$ql) +} +flag<-NA +outid<-NA +vec<-c(1:n) +for(i in 1:n){ +flag[i]<-(x[i]< cl || x[i]> cu) +} +if(sum(flag)==0)outid<-NULL +if(sum(flag)>0)outid<-vec[flag] +keep<-vec[!flag] +outval<-x[flag] +n.out=sum(length(outid)) +list(out.val=outval,out.id=outid,keep=keep,n=n,n.out=n.out,cl=cl,cu=cu) +} + +mscov<-function(m,STAND=TRUE){ +# +# m is an n by p matrix +# +# Compute a skipped covariance matrix +# +# Eliminate outliers using a projection method +# That is, compute Donoho-Gasko median, for each point +# consider the line between it and the median, +# project all points onto this line, and +# check for outliers using a boxplot rule. +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# Eliminate any outliers and compute covariances +# using remaining data. +# +m<-elimna(m) +temp<-outpro(m,plotit=FALSE,STAND=STAND)$keep +mcor<-var(m[temp,]) +mcor +} + +runm3d<-function(x,y,theta=50,phi=25,fr=.8,tr=.2,plotit=TRUE,pyhat=FALSE,nmin=0, +expand=.5,scale=FALSE,zscale=FALSE,xout=FALSE,outfun=out,eout=FALSE,xlab="X",ylab="Y",zlab="", +pr=TRUE,SEED=TRUE,ticktype="simple"){ +# +# running mean using interval method +# +# fr controls amount of smoothing +# tr is the amount of trimming +# x is an n by p matrix of predictors. +# +# Rows of data with missing values are automatically removed. +# +# When plotting, theta and phi can be used to change +# the angle at which the plot is viewed. +# +# theta is the azimuthal direction and phi the colatitude +# expand controls relative length of z-axis +# +library(MASS) +library(akima) +if(plotit){ +if(pr){ +print("Note: when there is independence, scale=F is probably best") +print("When there is dependence, scale=T is probably best") +}} +if(!is.matrix(x))stop("x should be a matrix") +if(nrow(x) != length(y))stop("number of rows of x should equal length of y") +temp<-cbind(x,y) +p<-ncol(x) +p1<-p+1 +temp<-elimna(temp) # Eliminate any rows with missing values. +if(xout){ +keepit<-rep(TRUE,nrow(x)) +flag<-outfun(x,plotit=FALSE)$out.id +keepit[flag]<-F +x<-x[keepit,] +y<-y[keepit] +} +if(zscale){ +for(j in 1:p1){ +temp[,j]<-(temp[,j]-median(temp[,j]))/mad(temp[,j]) +}} +x<-temp[,1:p] +y<-temp[,p1] +pyhat<-as.logical(pyhat) +plotit<-as.logical(plotit) +if(SEED)set.seed(12) +m<-cov.mve(x) +iout<-c(1:nrow(x)) +rmd<-1 # Initialize rmd +nval<-1 +for(i in 1:nrow(x))rmd[i]<-mean(y[near3d(x,x[i,],fr,m)],tr) +for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)]) +if(plotit){ +if(ncol(x)!=2)stop("When plotting, x must be an n by 2 matrix") +fitr<-rmd[nval>nmin] +y<-y[nval>nmin] +x<-x[nval>nmin,] +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype) +} +last<-"Done" +if(pyhat)last<-rmd +last +} + +rdplot<-function(x,fr=NA,plotit=TRUE,theta=50,phi=25,expand=.5,pyhat=FALSE,pts=NA, +xlab="X",ylab="",ticktype="simple"){ +# +# Expected frequency curve +# +# fr controls amount of smoothing +# theta is the azimuthal direction and phi the colatitude +# +plotit<-as.logical(plotit) +x<-elimna(x) +x<-as.matrix(x) +rmd<-NA +if(ncol(x)==1){ +x=as.vector(x) +if(is.na(fr))fr<-.8 +if(is.na(pts[1]))pts<-x +for(i in 1:length(pts)){ +rmd[i]<-sum(near(x,pts[i],fr)) +} +if(mad(x)!=0)rmd<-rmd/(2*fr*mad(x)) +rmd<-rmd/length(x) +if(plotit){ +plot(pts,rmd,type="n",ylab=ylab,xlab=xlab) +sx<-sort(pts) +xorder<-order(pts) +sysm<-rmd[xorder] +lines(sx,sysm) +}} +x<-as.matrix(x) +if(ncol(x)>1){ +library(MASS) +if(is.na(fr))fr<-.6 +m<-covmve(x) +for(i in 1:nrow(x)){ +rmd[i]<-sum(near3d(x,x[i,],fr,m)) +} +rmd<-rmd/nrow(x) +if(plotit && ncol(x)==2){ +library(akima) +fitr<-rmd +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fit,theta=theta,phi=phi,expand=expand,xlab="Var 1",ylab="Var 2",zlab="", +ticktype=ticktype) +} +} +if(pyhat)last<-rmd +if(!pyhat)last<-"Done" +last +} + + rimul<-function(J,K,x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4){ +# +# Rank-based multiple comparisons for all interactions +# in J by K design. The method is based on an +# extension of Cliff's heteroscedastic technique for +# handling tied values and the Patel-Hoel definition of no interaction. +# +# The familywise type I error probability is controlled by using +# a critical value from the Studentized maximum modulus distribution. +# +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# The default value for alpha is .05. Any other value results in using +# alpha=.01. +# +# Argument grp can be used to rearrange the order of the data. +# + df=Inf +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +CCJ<-(J^2-J)/2 +CCK<-(K^2-K)/2 +CC<-CCJ*CCK +test<-matrix(NA,CC,8) +test.p<-matrix(NA,CC,7) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +} +mat<-matrix(grp,ncol=K,byrow=TRUE) +dimnames(test)<-list(NULL,c("Factor A","Factor A","Factor B","Factor B","delta","ci.lower","ci.upper","p.value")) +jcom<-0 +crit=qsmm(1-alpha,CC,df) +#if(alpha!=.05)crit<-smmcrit01(200,CC) +alpha<-1-pnorm(crit) +for (j in 1:J){ +for (jj in 1:J){ +if (j < jj){ +for (k in 1:K){ +for (kk in 1:K){ +if (k < kk){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-jj +test[jcom,3]<-k +test[jcom,4]<-kk +temp1<-cid(x[[mat[j,k]]],x[[mat[j,kk]]],plotit=FALSE) +temp2<-cid(x[[mat[jj,k]]],x[[mat[jj,kk]]],plotit=FALSE) +delta<-temp2$d-temp1$d +sqse<-temp1$sqse.d+temp2$sqse.d +test[jcom,5]<-delta/2 +test[jcom,6]<-delta/2-crit*sqrt(sqse/4) +test[jcom,7]<-delta/2+crit*sqrt(sqse/4) +test[jcom,8]=2*(1-pnorm(abs((delta/2)/sqrt(sqse/4)))) +}}}}}} +if(J==2 & K==2){ +if(plotit){ +m1<-outer(x[[1]],x[[2]],FUN="-") +m2<-outer(x[[3]],x[[4]],FUN="-") +m1<-as.vector(m1) +m2<-as.vector(m2) +g2plot(m1,m2,op=op) +}} +list(test=test) +} + +ifmest<-function(x,bend=1.28,op=2){ +# +# Estimate the influence function of an M-estimator, using +# Huber's Psi, evaluated at x. +# +# Data are in the vector x, bend is the percentage bend +# +# op=2, use adaptive kernel estimator +# otherwise use Rosenblatt's shifted histogram +# +tt<-mest(x,bend) # Store M-estimate in tt +s<-mad(x)*qnorm(.75) +if(op==2){ +val<-akerd(x,pts=tt,plotit=FALSE,pyhat=TRUE) +val1<-akerd(x,pts=tt-s,plotit=FALSE,pyhat=TRUE) +val2<-akerd(x,pts=tt+s,plotit=FALSE,pyhat=TRUE) +} +if(op!=2){ +val<-kerden(x,0,tt) +val1<-kerden(x,0,tt-s) +val2<-kerden(x,0,tt+s) +} +ifmad<-sign(abs(x-tt)-s)-(val2-val1)*sign(x-tt)/val +ifmad<-ifmad/(2*.6745*(val2+val1)) +y<-(x-tt)/mad(x) +n<-length(x) +b<-sum(y[abs(y)<=bend])/n +a<-hpsi(y,bend)*mad(x)-ifmad*b +ifmest<-a/(length(y[abs(y)<=bend])/n) +ifmest +} + +qmjci<-function(x,q=.5,alpha=.05,op=1,pr=TRUE){ +# +# Compute a 1-alpha confidence for qth quantile using the +# Maritz-Jarrett estimate of the standard error. +# +# The default quantile is .5. +# The default value for alpha is .05. +# +x=elimna(x) +if(pr){ +if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb") +} +if(q <= 0 || q>=1)stop("q must be between 0 and 1") +y<-sort(x) +m<-floor(q*length(x)+.5) +crit<-qnorm(1-alpha/2) +qmjci<-vector(mode="numeric",2) +se<-NA +if(op==1)se<-mjse(x) +if(op==2){ +if(q!=.5)stop("op=2 works only with q=.5") +se<-msmedse(x) +} +if(op==3)se<-qse(x,q) +if(is.na(se))stop("Something is wrong, op should be 1, 2 or 3") +qmjci[1]<-y[m]-crit*se +qmjci[2]<-y[m]+crit*se +qmjci +} + + +bootdpci<-function(x,y,est=onestep,nboot=NA,alpha=.05,plotit=FALSE,dif=TRUE,BA=FALSE,SR=TRUE,...){ +# +# Use percentile bootstrap method, +# compute a .95 confidence interval for the difference between +# a measure of location or scale +# when comparing two dependent groups. +# By default, a one-step M-estimator (with Huber's psi) is used. +# If, for example, it is desired to use a fully iterated +# M-estimator, use fun=mest when calling this function. +# +okay=FALSE +if(identical(est,onestep))okay=TRUE +if(identical(est,mom))okay=TRUE +if(!okay)SR=FALSE +output<-rmmcppb(x,y,est=est,nboot=nboot,alpha=alpha,SR=SR, +plotit=plotit,dif=dif,BA=BA,...)$output +list(output=output) +} + + +relfun<-function(xv,yv,C=36,epsilon=.0001,plotit=TRUE,pch='*',xlab='X',ylab='Y'){ +# Compute the measures of location, scale and correlation used in the +# bivariate boxplot of Goldberg and Iglewicz, +# Technometrics, 1992, 34, 307-320. +# +# The code in relplot plots the boxplot. +# +# This code assumes the data are in xv and yv +# +# This code uses the function biloc, stored in the file biloc.b7 and +# bivar stored in bivar.b7 +# +plotit<-as.logical(plotit) +# +# Do pairwise elimination of missing values +# +temp<-matrix(c(xv,yv),ncol=2) +temp<-elimna(temp) +xv<-temp[,1] +yv<-temp[,2] +tx<-biloc(xv) +ty<-biloc(yv) +sx<-sqrt(bivar(xv)) +sy<-sqrt(bivar(yv)) +z1<-(xv-tx)/sx+(yv-ty)/sy +z2<-(xv-tx)/sx-(yv-ty)/sy +ee<-((z1-biloc(z1))/sqrt(bivar(z1)))^2+ +((z2-biloc(z2))/sqrt(bivar(z2)))^2 +w<-(1-ee/C)^2 +if(length(w[w==0])>=length(xv)/2)warning("More than half of the w values equal zero") +sumw<-sum(w[ee1, a standard percentile bootstrap method is used +# with FWE (the probability of at least one type I error) +# controlled via the Bonferroni inequality. +# +# The predictor values are assumed to be in the n by p matrix x. +# The default number of bootstrap samples is nboot=599 +# +# SEED=T causes the seed of the random number generator to be set to 2, +# otherwise the seed is not set. +# +# Warning: probability coverage has been studied only when alpha=.05 +# +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +temp<-elimna(cbind(x,y)) # Remove any missing values. +x<-temp[,1:p] +y<-temp[,p+1] +if(xout){ +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,pp] +} +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples; please wait") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,regboot,x,y,lsfit) # A p+1 by n matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +if(p==1){ +if(alpha != .05){print("Resetting alpha to .05") +print("With p=1, unknown how to adjust confidence interval") +print("when alpha is not equal to .05.") +} +ilow<-15 +ihi<-584 +if(length(y) < 250){ +ilow<-13 +ihi<-586 +} +if(length(y) < 180){ +ilow<-10 +ihi<-589 +} +if(length(y) < 80){ +ilow<-7 +ihi<-592 +} +if(length(y) < 40){ +ilow<-6 +ihi<-593 +} +ilow<-round((ilow/599)*nboot) +ihi<-round((ihi/599)*nboot) +} +if(p>1){ +ilow<-round(alpha*nboot/2)+1 +ihi<-nboot-ilow +} +lsfitci<-matrix(0,ncol(x),2) +for(i in 1:ncol(x)){ +ip<-i+1 +bsort<-sort(bvec[ip,]) +lsfitci[i,1]<-bsort[ilow+1] +lsfitci[i,2]<-bsort[ihi] +} +bsort<-sort(bvec[1,]) +interceptci<-c(bsort[15],bsort[584]) +crit.level<-NA +pmat<-NA +if(p>1){ +crit.level<-alpha/p +pmat<-matrix(NA,nrow=p,ncol=2) +dimnames(pmat) <- list(NULL, c("Slope","p-value")) +for(pv in 1:p){ +pmat[pv,1]<-pv +pp<-pv+1 +pmat[pv,2]<-(sum(bvec[pp,]<0)+.5*sum(bvec[pp,]==0))/nboot +temp3<-1-pmat[pv,2] +pmat[pv,2]<-2*min(pmat[pv,2],temp3) +}} +list(intercept.ci=interceptci,slope.ci=lsfitci,crit.level=crit.level, +p.values=pmat) +} + +wmve<-function(m,SEED=TRUE){ +# +# Compute skipped measure of location and scatter +# using MVE method +# +if(is.matrix(m))n<-nrow(m) +if(is.vector(m))n<-length(m) +flag<-rep(TRUE,n) +vec<-out(m,plotit=FALSE,SEED=SEED)$out.id +flag[vec]<-FALSE +if(is.vector(m)){ +center<-mean(m[flag]) +scatter<-var(m[flag]) +} +if(is.matrix(m)){ +center<-apply(m[flag,],2,mean) +scatter<-var(m[flag,]) +} +list(center=center,cov=scatter) +} + +wmw<-function(x,y){ +# +# Do Mann-Whitney test +# Return the usual p-value followed by adjusted +# p-value using Hodges, Ramsey and Wechsler (1990) method +# (See Wilcox, 2003, p. 559.) +# +x=elimna(x) +y=elimna(y) +m<-length(x) +n<-length(y) +com<-rank(c(x,y)) +xp1<-length(x)+1 +x<-com[1:length(x)] +y<-com[xp1:length(com)] +u<-sum(y)-n*(n+1)/2 +sigsq<-m*n*(n+m+1)/12 +yv<-(u+.5-m*n/2)/sqrt(sigsq) +kv<-20*m*n*(m+n+1)/(m^2+n^2+n*m+m+n) +S<-yv^2 +T1<-S-3 +T2<-(155*S^2-416*S-195)/42 +cv<-1+T1/kv+T2/kv^2 +sighrw<-2*(1-pnorm(abs(cv*yv))) +z<-(u-(.5*m*n))/sqrt(sigsq) +sig<-2*(1-pnorm(abs(z))) +list(p.value=sig,adj.p.value=sighrw,p.hat=u/(n*m)) +} + +lsfitNci<-function(x,y,alpha=.05){ +# +# Compute confidence interval for least squares +# regression using heteroscedastic method +# recommended by Long and Ervin (2000). +# +x<-as.matrix(x) +if(nrow(x) != length(y))stop("Length of y does not match number of x values") +m<-cbind(x,y) +m<-elimna(m) +y<-m[,ncol(x)+1] +temp<-lsfit(x,y) +x<-cbind(rep(1,nrow(x)),m[,1:ncol(x)]) +xtx<-solve(t(x)%*%x) +h<-diag(x%*%xtx%*%t(x)) +hc3<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^2)%*%x%*%xtx +df<-nrow(x)-ncol(x) +crit<-qt(1-alpha/2,df) +al<-ncol(x) +ci<-matrix(NA,nrow=al,ncol=3) +for(j in 1:al){ +ci[j,1]<-j +ci[j,2]<-temp$coef[j]-crit*sqrt(hc3[j,j]) +ci[j,3]<-temp$coef[j]+crit*sqrt(hc3[j,j]) +} +print("Confidence intervals for intercept followed by slopes:") +list(ci=ci,stand.errors=sqrt(diag(hc3))) +} + + + +pow2an<-function(x,y,ci=FALSE,plotit=TRUE,nboot=800){ +# +# Do a power analysis when comparing the 20% trimmed +# means of two independent groups with the percentile +# bootstrap method. +# +# +x<-x[!is.na(x)] +y<-y[!is.na(y)] +lp<-NA +se<-yuen(x,y)$se +gval<-NA +dv<-seq(0,3.5*se,length=15) +for(i in 1:length(dv)){ +gval[i]<-powest(x,y,dv[i],se) +} +if(!ci){ +if(plotit){ +plot(dv,gval,type="n",xlab="delta",ylab="power") +lines(dv,gval) +}} +if(ci){ +print("Taking bootstrap samples. Please wait.") +datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE), + nrow = nboot) +datay <- matrix(sample(y, size = length(y) * nboot, replace = TRUE), + nrow = nboot) +pboot<-matrix(NA,ncol=15,nrow=nboot) +for(i in 1:nboot){ +se<-yuen(datax[i,],datay[i,])$se +for(j in 1:length(dv)){ +pboot[i,j]<-powest(x,y,dv[j],se) +}} +ll<-floor(.05*nboot+.5) +for(i in 1:15){ +temp<-sort(pboot[,i]) +lp[i]<-temp[ll] +} +plot(c(dv,dv),c(gval,lp),type="n",xlab="delta",ylab="power") +lines(dv,gval) +lines(dv,lp,lty=2) +} +list(delta=dv,power=gval,lowp=lp) +} +powest<-function(x=NA,y=NA,delta=0,se=NA,wv1=NA,wv2=NA,n1=NA,n2=NA){ +# +# wv1 = Winsorized variance for group 1 +# wv2 = Winsorized variance for group 2 +# +# Only 20% trimming is allowed. +# +tr<-.2 +if(is.na(se)){ +if(is.na(wv1)){ +h1 <- length(x) - 2 * floor(tr * length(x)) +h2 <- length(y) - 2 * floor(tr * length(y)) +q1 <- ((length(x) - 1) * winvar(x, tr))/(h1 * (h1 - 1)) +q2 <- ((length(y) - 1) * winvar(y, tr))/(h2 * (h2 - 1)) +} +if(!is.na(wv1)){ +if(is.na(n1))stop("Need to specify sample size for group 1") +if(is.na(n2))stop("Need to specify sample size for group 2") +h1<-n1-2*floor(tr*n1) +h2<-n2-2*floor(tr*n2) +q1<-(n1-1)*wv1/(h1*(h1-1)) +q2<-(n2-1)*wv2/(h2*(h2-1)) +} +se<-sqrt(q1+q2) +} +ygam<-sqrt(2*.01155)*c(0:35)/8 +pow<-c(500.0,540.0,607.0, 706.0, 804.0,981.0,1176.0,1402.0,1681.0, 2008.0, + 2353.0, 2769.0, 3191.0, 3646.0, 4124.0, 4617.0, 5101.0, 5630.0, + 6117.0, 6602.0, 7058.0, 7459.0, 7812.0, 8150.0, 8479.0, 8743.0, + 8984.0, 9168.0, 9332.0, 9490.0, 9607.0, 9700.0, 9782.0, 9839.0, + 9868.0)/10000 +flag<-(delta==0 & se==0) +if(flag)powest<-.05 +else{ +chk<-floor(8*delta/se)+1 +chk1<-chk+1 +gval<-delta/se +d1<-(gval-(chk-1)/8)*8 +if(chk > length(pow))powest<-1 +if(chk == length(pow))pow[chk1]<-1 +if(chk <= length(pow)) +powest<-pow[chk]+d1*(pow[chk1]-pow[chk]) +} +powest +} + +twopcor<-function(x1,y1,x2,y2,SEED=TRUE){ +# +# Compute a .95 confidence interval for +# the difference between two Pearson +# correlations corresponding to two independent +# goups. +# +# This function uses an adjusted percentile bootstrap method that +# gives good results when the error term is heteroscedastic. +# +# WARNING: If the number of bootstrap samples is altered, it is +# unknown how to adjust the confidence interval when n1+n2 < 250. +# +nboot<-599 #Number of bootstrap samples +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +X<-elimna(cbind(x1,y1)) +x1<-X[,1] +y1<-X[,2] +X<-elimna(cbind(x2,y2)) +x2<-X[,1] +y2<-X[,2] +data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +bvec1<-apply(data1,1,pcorbsub,x1,y1) # A 1 by nboot matrix. +data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) +bvec2<-apply(data2,1,pcorbsub,x2,y2) # A 1 by nboot matrix. +bvec<-bvec1-bvec2 +ilow<-15 +ihi<-584 +if(length(y1)+length(y2) < 250){ +ilow<-14 +ihi<-585 +} +if(length(y1)+length(y2) < 180){ +ilow<-11 +ihi<-588 +} +if(length(y1)+length(y2) < 80){ +ilow<-8 +ihi<-592 +} +if(length(y1)+length(y2) < 40){ +ilow<-7 +ihi<-593 +} +bsort<-sort(bvec) +r1<-cor(x1,y1) +r2<-cor(x2,y2) +ci<-c(bsort[ilow],bsort[ihi]) +list(r1=r1,r2=r2,ci=ci) +} + + +tworhobt<-function(X1,Y1,X2,Y2,alpha=.05,nboot=499,SEED=TRUE){ +# +# compare two independent correlations using a bootstrap-t method in conjunction with the HC4 estimator +# +if(SEED)set.seed(2) +r1=cor(X1,Y1) +r2=cor(X2,Y2) +n1=length(X1) +n2=length(X2) +v=NA +Nboot=nboot+1 +for(i in 1:Nboot){ +if(i<=nboot){ +id1=sample(n1,n1,replace=TRUE) +id2=sample(n2,n2,replace=TRUE) +} +if(i==Nboot){ +id1=c(1:n1) +id2=c(1:n2) +} +x1=X1[id1] +y1=Y1[id1] +x2=X2[id2] +y2=Y2[id2] +x1=(x1-mean(x1))/sd(x1) +y1=(y1-mean(y1))/sd(y1) +x2=(x2-mean(x2))/sd(x2) +y2=(y2-mean(y2))/sd(y2) +temp1=olshc4(x1,y1) +temp2=olshc4(x2,y2) +if(i<=nboot)v[i]=(temp1$ci[2,2]-r1-temp2$ci[2,2]+r2)/sqrt(temp1$ci[2,6]^2+temp2$ci[2,6]^2) +if(i==Nboot)v[i]=(temp1$ci[2,2]-temp2$ci[2,2])/sqrt(temp1$ci[2,6]^2+temp2$ci[2,6]^2) +} +ibot<-round(alpha*nboot/2) +itop<-nboot-ibot+1 +ibot=ibot+1 #adjusted so that p-value and confidence interval give consistent results. +vs=sort(v[1:nboot]) +crit=c(vs[ibot],vs[itop]) +test=v[Nboot] +if(test<0)G=mean(test>v[1:nboot]) +if(test>=0)G=mean(test1)pv=1 +if(pv<0)pv=0 +list(test=test,crit.val=crit,p.value=pv) +} + + + +indtall<-function(x,y=NULL,tr=0,nboot=500,SEED=TRUE){ +# +# Test the hypothesis of independence for +# 1. all pairs of variables in matrix x, if y=NA, or +# 2. between each variable stored in the matrix x and y. +# This is done by repeated calls to indt +# +x<-as.matrix(x) +# First, eliminate any rows of data with missing values. +if(!is.null(y[1])){ +temp <- cbind(x, y) + temp <- elimna(temp) + pval<-ncol(temp)-1 + x <- temp[,1:pval] + y <- temp[, pval+1] +} +x<-as.matrix(x) +if(is.null(y[1])){ +ntest<-(ncol(x)^2-ncol(x))/2 +if(ntest==0)stop("Something is wrong. Does x have only one column?") +output<-matrix(NA,nrow=ntest,ncol=4) +dimnames(output)<-list(NULL,c("VAR","VAR","Test Stat.","p-value")) +x<-elimna(x) +ic<-0 +for (j in 1:ncol(x)){ +for (jj in 1:ncol(x)){ +if(jyhat)/length(x) +zhat<-NA +if(!is.na(z[1])){ +# +# Make decisions for the data in z, +# set zhat=1 if decide it came from +# group 1. +# +zxhat<-0 +zyhat<-0 +zhat<-0 +if(op==2){ +zxhat<-akerd(x,pts=z,pyhat=TRUE,plotit=FALSE) +zyhat<-akerd(y,pts=z,pyhat=TRUE,plotit=FALSE) +} +for(i in 1:length(z)){ +if(op==1){ +zxhat[i]<-kerden(x,0,z[i]) +zyhat[i]<-kerden(y,0,z[i]) +} +zhat[i]<-1 +if(is.na(zxhat[i]) || is.na(zyhat[i])){ +# Missing values, +# data can't be used to make a decision, +# so make a random decision about whether a value +# came from first group. +arb<-runif(1) +zhat[i]<-1 +if(arb < .5)zhat[i]<-0 +} +else +if(zxhat[i]=2){ +library(akima) +if(ncol(x)==2 & !scale){ +if(pr){ +print("scale=FALSE is specified.") +print("If there is dependence, might use scale=TRUE") +print("To get a p-value, based on the measure of the") +print("strength of association based on this function,") +print("use the function lplotPV") +}} +x<-m[,1:d] +y<-m[,d+1] +if(eout & xout)stop("Can't have both eout and xout = FALSE") +if(eout){ +flag<-outfun(m,plotit=FALSE,...)$keep +m<-m[flag,] +n.keep=nrow(m) +} +if(xout){ +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +n.keep=nrow(m) +} +x<-m[,1:d] +y<-m[,d+1] +if(d==2)fitr<-fitted(loess(y~x[,1]*x[,2],span=span,family=family)) +if(d==3)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3],span=span,family=family)) +if(d==4)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3]*x[,4],span=span,family=family)) +if(d>4)stop("Can have at most four predictors") +last<-fitr +if(d==2 && plotit){ +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fitr<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +if(!ZLIM)persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype) +if(ZLIM)persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype,zlim=c(0,1)) #used by logreg.plot +}} +if(d==1){ +m<-elimna(cbind(x,y)) +x<-m[,1:d] +y<-m[,d+1] +if(eout && xout)stop("Cannot have both eout and xout = T") +if(eout){ +flag<-outfun(m,plotit=FALSE,...)$keep +m<-m[flag,] +n.keep=nrow(m) +} +if(xout){ +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +n.keep=nrow(m) +} +x<-m[,1:d] +y<-m[,d+1] +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab,pch=pc,frame=frame) +lines(lowess(x,y,f=low.span)) +} +tempxy<-lowess(x,y,f=low.span) +yyy<-tempxy$y +xxx<-tempxy$x +last<-yyy +chkit<-sum(duplicated(x)) +if(chkit>0){ +last<-rep(1,length(y)) +for(j in 1:length(yyy)){ +for(i in 1:length(y)){ +if(x[i]==xxx[j])last[i]<-yyy[j] +}} +} +} +if(!STR)E.power=NA +if(STR){ +E.power<-1 +if(!cor.op)E.power<-varfun(last[!is.na(last)])/varfun(y) +if(cor.op || E.power>=1){ +if(d==1){ +xord<-order(x) +E.power<-cor.fun(last,y[xord])$cor^2 +} +if(d>1)E.power<-cor.fun(last,y)$cor^2 +} +E.power=as.numeric(E.power) +} +if(!pyhat)last <- NULL +list(Strength.Assoc=sqrt(E.power),Explanatory.power=E.power,yhat.values=last,n=n.orig, +n.keep=n.keep) +} +qci<-function(x,q=.5,alpha=.05,op=3){ +# +# Compute a confidence interval for qth quantile +# using an estimate of standard error based on +# adaptive kernel density estimator. +# The qth quantile is estimated with a single order statistic. +# +# For argument op, see the function qse. +# +if(sum(duplicated(x)>0))stop("Duplicate values detected; use hdpb") +n<-length(x) +xsort<-sort(x) +iq <- floor(q * n + 0.5) +qest<-xsort[iq] +se<-qse(x,q,op=op) +crit<-qnorm(1-alpha/2) +ci.low<-qest-crit*se +ci.up<-qest+crit*se +list(ci.low=ci.low,ci.up=ci.up,q.est=qest) +} +qint<-function(x,q=.5,alpha=.05,pr=FALSE){ +# +# Compute a 1-alpha confidence interval for the qth quantile +# The function returns the exact probability coverage. +# +if(pr){ +if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb") +} +n<-length(x) +ii<-floor(q*n+.5) +jj<-ii+1 +if(ii<=0)stop("Cannot compute a confidence interval for this q") +if(jj>n)stop("Cannot compute a confidence interval for this q") +jjm<-jj-1 +iim<-ii-1 +cicov<-pbinom(jjm,n,q)-pbinom(iim,n,q) +while(cicov<1-alpha){ +iim<-max(iim-1,0) +jjm<-min(jjm+1,n) +if(iim==0 && jjm==n)break +cicov<-pbinom(jjm,n,q)-pbinom(iim,n,q) +} +xsort<-sort(x) +low<-xsort[iim+1] +hi<-xsort[jjm+1] +if(cicov<1-alpha){ +if(pr)print("Warning: Desired probability coverage could not be achieved") +} +list(ci.low=low,ci.up=hi,ci.coverage=cicov) +} + + +qest<-function(x,q=.5,na.rm=TRUE){ +# +# Compute an estimate of qth quantile +# using a single order statistic +# +if(na.rm)x<-elimna(x) +if(q<=0 || q>=1)stop("q must be > 0 and < 1") +n<-length(x) +xsort<-sort(x) +iq <- floor(q * n + 0.5) +qest<-NA +if(iq>0 || iq<=n)qest<-xsort[iq] +qest +} +taureg<-function(m,y,corfun=tau,...){ +# +# Compute Kendall's tau between y and each of the +# p variables stored in the n by p matrix m. +# +# Alternative measures of correlation can be used via the +# argument corfun. The only requirement is that the function +# corfun returns the correlation in corfun$cor and the p-value +# in corfun$p.value. +# +# This function also returns the two-sided significance level +# for all pairs of variables, plus a test of zero correlations +# among all pairs. (See chapter 9 of Wilcox, 2005, for details.) +# +m<-as.matrix(m) +tauvec<-NA +siglevel<-NA +for (i in 1:ncol(m)){ +pbc<-corfun(m[,i],y,...) +tauvec[i]<-pbc$cor +siglevel[i]<-pbc$p.value +} +list(cor=tauvec,p.value=siglevel) +} + +cor2M=taureg + +correg.sub<-function(X,theta,corfun=tau){ +np<-ncol(X) +p<-np-1 +x<-X[,1:p] +y<-X[,np] +temp<-t(t(x)*theta) +yhat<-apply(temp,1,sum) +yhat<-yhat +res<-y-yhat +val<-sum(abs(taureg(x,res,corfun=corfun)$cor)) +val +} +correg<-function(x,y,corfun=tau,loc.fun=median){ +# +# A generalization of the Theil-Sen estimator +# Rather than use Kendall's tau, can use an alternative +# correlation via the argument corfun. +# loc.fun determines how the intercept is computed; +# +# The Nelder-Mead method is used rather than +# Gauss-Seidel. +# +# +X<-cbind(x,y) +X<-elimna(X) +np<-ncol(X) +N<-np-1 +temp<-tsreg(x,y)$coef +START<-temp[2:np] +temp<-nelderv2(X,N,FN=correg.sub,START=START,corfun=corfun) +x <- as.matrix(x) +alpha <- loc.fun(y - x %*% temp) +coef <- c(alpha,temp) +res <- y - x %*% temp - alpha +list(coef = coef, residuals = res) +} +rmulnorm<-function(n,p,cmat,SEED=FALSE){ +# +# Generate data from a multivariate normal +# n= sample size +# p= number of variables +# cmat is the covariance (or correlation) matrix +# +# Method (e.g. Browne, M. W. (1968) A comparison of factor analytic +# techniques. Psychometrika, 33, 267-334. +# Let U'U=R be the Cholesky decomposition of R. Generate independent data +# from some dist yielding X. Then XU has population correlation matrix R +# +if(SEED)set.seed(2) +y<-matrix(rnorm(n*p),ncol=p) +rval<-matsqrt(cmat) +y<-t(rval%*%t(y)) +y +} + + matsqrt <- function(x) { + xev1<-NA + xe <- eigen(x) + xe1 <- xe$values + if(all(xe1 >= 0)) { + xev1 <- diag(sqrt(xe1)) + } +if(is.na(xev1[1]))stop("The matrix has negative eigenvalues") + xval1 <- cbind(xe$vectors) + xval1i <- solve(xval1) + y <- xval1 %*% xev1 %*% xval1i +y + } + + +ghmul<-function(n,g=0,h=0,p=2,cmat=diag(rep(1,p)),SEED=FALSE){ +# +# generate n observations from a p-variate dist +# based on the g and h dist. +# +# cmat is the correlation matrix +# +x<-rmulnorm(n,p,cmat,SEED=SEED) +for(j in 1:p){ +if (g>0){ +x[,j]<-(exp(g*x[,j])-1)*exp(h*x[,j]^2/2)/g +} +if(g==0)x[,j]<-x[,j]*exp(h*x[,j]^2/2) +} +x +} + +yhall<-function(x,y,tr=.2,alpha=.05){ +# +# Perform Yuen's test for trimmed means on the data in x and y +# in conjunction with Hall's transformation. +# The default amount of trimming is 20% +# Missing values (values stored as NA) are automatically removed. +# +# A confidence interval for the trimmed mean of x minus the +# the trimmed mean of y is computed and returned in yuen$ci. +# +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +winx<-winval(x,tr=tr) +winy<-winval(y,tr=tr) +m3x<-sum((winx-mean(winx))^3)/length(x) +m3y<-sum((winy-mean(winy))^3)/length(y) +h1<-length(x)-2*floor(tr*length(x)) +h2<-length(y)-2*floor(tr*length(y)) +mwx<-length(x)*m3x/h1 +mwy<-length(y)*m3y/h2 +q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) +q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) +sigtil<-q1+q2 +mtil<-(mwx/h1^2)-(mwy/h2^2) +dif<-mean(x,tr)-mean(y,tr) +thall<-dif+mtil/(6*sigtil)+mtil*dif^2/(3*sigtil^2)+mtil^2*dif^3/(27*sigtil^4) +thall<-thall/sqrt(sigtil) +nhat<-mtil/sigtil^1.5 +list(test.stat=thall,nu.tilda=nhat,sig.tilda=sqrt(sigtil)) +} + +linconm<-function(x,con=0,est=onestep,alpha=.05,nboot=500,pr=TRUE,...){ +# +# Compute a 1-alpha confidence interval for a set of d linear contrasts +# involving M-estimators using a bootstrap method. (See Chapter 6.) +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode. Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# +# con is a J by d matrix containing the contrast coefficents of interest. +# If unspecified, all pairwise comparisons are performed. +# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first two measures of location is +# equal to the sum of the second two, and (2) the difference between +# the first two is equal to the difference between the measure of location for +# groups 5 and 6. +# +# The default number of bootstrap samples is nboot=399 +# +# This function uses the function trimpartt written for this +# book. +# +# +# +# +if(pr){ +print("Note: confidence intervals are adjusted to control FWE") +print("But p-values are not adjusted to control FWE") +} +if(is.matrix(x))x<-listm(x) +con<-as.matrix(con) +if(!is.list(x))stop("Data must be stored in list mode.") +J<-length(x) +Jm<-J-1 +d<-(J^2-J)/2 +if(sum(con^2)==0){ +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +if(nrow(con)!=length(x))stop("The number of groups does not match the number of contrast coefficients.") +m1<-matrix(0,J,nboot) +m2<-1 # Initialize m2 +mval<-1 +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(j in 1:J){ +mval[j]<-est(x[[j]],...) +xcen<-x[[j]]-est(x[[j]],...) +data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +m1[j,]<-apply(data,1,est,...) # A J by nboot matrix. +m2[j]<-var(m1[j,]) +} +boot<-matrix(0,ncol(con),nboot) +bot<-1 +for (d in 1:ncol(con)){ +top<-apply(m1,2,trimpartt,con[,d]) +# A vector of length nboot containing psi hat values +consq<-con[,d]^2 +bot[d]<-trimpartt(m2,consq) +boot[d,]<-abs(top)/sqrt(bot[d]) +} +testb<-apply(boot,2,max) +ic<-floor((1-alpha)*nboot) +testb<-sort(testb) +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper","se","p.value")) +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-trimpartt(mval,con[,d]) +psihat[d,3]<-psihat[d,2]-testb[ic]*sqrt(bot[d]) +psihat[d,4]<-psihat[d,2]+testb[ic]*sqrt(bot[d]) +psihat[d,5]<-sqrt(bot[d]) +pval<-mean((boot[d,]1)fval<-akerdmul(xx,pts=pts,hval=hval,aval=aval,fr=fr,pr=pyhat, +plotit=plotit,theta=theta,phi=phi,expand=expand,scale=scale,ticktype=ticktype) +plotit<-F +} +if(is.matrix(xx) && ncol(xx)==1)xx<-xx[,1] +if(!is.matrix(xx)){ +x<-sort(xx) +if(op==1){ +m<-mad(x) +if(m==0){ +temp<-idealf(x) +m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25)) +} +if(m==0)m<-sqrt(winvar(x)/.4129) +if(m==0)stop("All measures of dispersion are equal to 0") +fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr) +if(m>0)fhat<-fhat/(2*fr*m) +} +if(op==2){ +init<-density(xx) +fhat <- init$y +x<-init$x +} +n<-length(x) +if(is.na(hval)){ +sig<-sqrt(var(x)) +temp<-idealf(x) +iqr<-(temp$qu-temp$ql)/1.34 +A<-min(c(sig,iqr)) +if(A==0)A<-sqrt(winvar(x))/.64 +hval<-1.06*A/length(x)^(.2) +# See Silverman, 1986, pp. 47-48 +} +gm<-exp(mean(log(fhat[fhat>0]))) +alam<-(fhat/gm)^(0-aval) +dhat<-NA +if(is.na(pts[1]))pts<-x +pts<-sort(pts) +for(j in 1:length(pts)){ +temp<-(pts[j]-x)/(hval*alam) +epan<-ifelse(abs(temp)yq) +B<-mean(flag1*flag2) +flag1<-(x>xq) +flag2<-(y<=yq) +C1<-mean(flag1*flag2) +flag1<-(x>xq) +flag2<-(y>yq) +D1<-mean(flag1*flag2) +fx<-akerd(x,pts=xq,plotit=FALSE,pyhat=TRUE) +fy<-akerd(y,pts=yq,plotit=FALSE,pyhat=TRUE) +v1<-(q-1)^2*A +v2<-(q-1)*q*B +v3<-(q-1)*q*C1 +v4<-q*q*D1 +temp<-0-2*(v1+v2+v3+v4)/(fx*fy)+q*(1-q)/fx^2+q*(1-q)/fy^2 +val<-sqrt(temp/n) +val +} + +akerdmul<-function(x,pts=NA,hval=NA,aval=.5,fr=.8,pr=FALSE,plotit=TRUE,theta=50, +phi=25,expand=.5,scale=FALSE,xlab="X",ylab="Y",zlab="",ticktype="simple"){ +# +# Compute adaptive kernel density estimate +# for multivariate data +# (See Silverman, 1986) +# +# Use expected frequency as initial estimate of the density +# +# hval is the span used by the kernel density estimator +# fr is the span used by the expected frequency curve +# pr=T, returns density estimates at pts +# ticktype="detailed" will create ticks as done in two-dimensional plot +# +library(MASS) +library(akima) +if(is.na(pts[1]))pts<-x +if(ncol(x)!=ncol(pts))stop("Number of columns for x and pts do not match") +if(!is.matrix(x))stop("Data should be stored in a matrix") +fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr) +n<-nrow(x) +d<-ncol(x) +pi<-gamma(.5)^2 +cd<-c(2,pi) +if(d==2)A<-1.77 +if(d==3)A<-2.78 +if(d>2){ +for(j in 3:d)cd[j]<-2*pi*cd[j-2]/n # p. 76 +} +if(d>3)A<-(8*d*(d+2)*(d+4)*(2*sqrt(pi))^d)/((2*d+1)*cd[d]) # p. 87 +if(is.na(hval))hval<-A*(1/n)^(1/(d+4)) # Silverman, p. 86 +svec<-NA +for(j in 1:d){ +sig<-sqrt(var(x[,j])) +temp<-idealf(x[,j]) +iqr<-(temp$qu-temp$ql)/1.34 +A<-min(c(sig,iqr)) +x[,j]<-x[,j]/A +svec[j]<-A +} +hval<-hval*sqrt(mean(svec^2)) # Silverman, p. 87 +# Now do adaptive; see Silverman, 1986, p. 101 +gm<-exp(mean(log(fhat[fhat>0]))) +alam<-(fhat/gm)^(0-aval) +dhat<-NA +nn<-nrow(pts) +for(j in 1:nn){ +temp1<-t(t(x)-pts[j,])/(hval*alam) +temp1<-temp1^2 +temp1<-apply(temp1,1,FUN="sum") +temp<-.5*(d+2)*(1-temp1)/cd[d] +epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, p. 76 +dhat[j]<-mean(epan/(alam*hval)^d) +} +if(plotit && d==2){ +fitr<-dhat +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype) +} +m<-"Done" +if(pr)m<-dhat +m +} +cov2med<-function(x,y=NA,q=.5){ +# +# Estimate the covariance between two dependent +# order statistics +# By default, q=.5 meaning that an estimate of +# of covariance is made when a single order statistic +# is used to estimate the median. +# y=NA, function returns squared standard error. +# +if(is.na(y[1]))val<-qse(x,q=q,op=3)^2 +if(!is.na(y[1])){ +if(sum((x-y)^2)==0)val<-qse(x,q=q,op=3)^2 +if(sum((x-y)^2)>0){ +n<-length(x) +m<-floor(q*n+.5) +yord<-sort(y) +flag<-(y<=yord[m]) +xord<-sort(x) +xq<-xord[m] +yord<-sort(y) +yq<-yord[m] +flag1<-(x<=xq) +flag2<-(y<=yq) +A<-mean(flag1*flag2) +flag1<-(x<=xq) +flag2<-(y>yq) +B<-mean(flag1*flag2) +flag1<-(x>xq) +flag2<-(y<=yq) +C1<-mean(flag1*flag2) +flag1<-(x>xq) +flag2<-(y>yq) +D1<-mean(flag1*flag2) +fx<-akerd(x,pts=xq,plotit=FALSE,pyhat=TRUE) +fy<-akerd(y,pts=yq,plotit=FALSE,pyhat=TRUE) +v1<-(q-1)^2*A +v2<-(q-1)*q*B +v3<-(q-1)*q*C1 +v4<-q*q*D1 +val<-((v1+v2+v3+v4)/(fx*fy))/n +}} +val +} + + +covmmed<-function(x,p=length(x),grp=c(1:p),q=.5){ +# +# Estimate the covariance matrix for the sample medians +# based on a SINGLE order statistic, using +# the data in the R variable x. +# (x[[1]] contains the data for group 1, x[[2]] the data for group 2, etc.) +# The function returns a p by p matrix of covariances, the diagonal +# elements being equal to the squared standard error of the sample +# trimmed means, where p is the number of groups to be included. +# By default, all the groups in x are used, but a subset of +# the groups can be used via grp. For example, if +# the goal is to estimate the covariances between the medians +# for groups 1, 2, and 5, use the command grp<-c(1,2,5) +# before calling this function. +# +# Missing values (values stored as NA) are not allowed. +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("The data are not stored in a matrix or list mode.") +p<-length(grp) +pm1<-p-1 +for (i in 1:pm1){ +ip<-i+1 +if(length(x[[grp[ip]]])!=length(x[[grp[i]]]))stop("The number of observations in each group must be equal") +} +n<-length(x[[grp[1]]]) +covest<-matrix(0,p,p) +for(j in 1:p){ +for(k in 1:p){ +if(j==k)covest[j,j]<-cov2med(x[[grp[j]]],q=q) +if(jnullval)p.value=min(p.value,alpha) #very remote chance ci and p.value differ. Force them to agree. +if(ybt[2]tval[1:nboot]) +if(test>=0)G=mean(test=20 +# +if(!is.na(y[1]))x<-cbind(x,y) +if(!is.matrix(x))stop("Something is wrong, with x or y") +x<-elimna(x) +y<-x[,2] +x<-x[,1] +n<-length(y) +df<-n-1 +if(is.na(se.val[1])){ +if(!bop)se.val<-sedm(x,y,q=q) +if(bop)se.val<-bootdse(x,y,est=qest,q=q,pr=FALSE,nboot=nboot) +} +test<-(qest(x,q)-qest(y,q))/se.val +sig.level<-2*(1-pt(abs(test),df)) +list(test.stat=test,p.value=sig.level,se=se.val) +} + +lincdm<-function(x,con=0,alpha=.05,q=.5,mop=FALSE,nboot=100,SEED=TRUE){ +# +# A heteroscedastic test of d linear contrasts among +# dependent groups using medians. +# +# The data are assumed to be stored in $x$ in list mode. +# Length(x) is assumed to correspond to the total number of groups, J +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# q is the quantile used to compare groups. +# con contains contrast coefficients, +# con=0 means all pairwise comparisons are used +# mop=F, use single order statistic +# mop=T, use usual sample median, even if q is not equal to .5 +# in conjunction with a bootstrap estimate of covariances among +# the medians using +# nboot samples. +# +# Missing values are automatically removed. +# +# +if(mop && SEED)set.seed(2) +if(is.list(x)){ +x<-matl(x) +x<-elimna(x) +} +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-length(x) +h<-length(x[[1]]) +w<-vector("numeric",J) +xbar<-vector("numeric",J) +for(j in 1:J){ +if(!mop)xbar[j]<-qest(x[[j]],q=q) +if(mop)xbar[j]<-median(x[[j]]) +} +if(sum(con^2)==0){ +temp<-qdmcp(x,alpha=alpha,q=q,pr=FALSE) +test<-temp$test +psihat<-temp$psihat +num.sig<-temp$num.sig +} +if(sum(con^2)>0){ +ncon<-ncol(con) +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(nrow(con)!=length(x)){ +stop("The number of groups does not match the number of contrast coefficients.") +} +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c("con.num","test","p.value","crit.p.value","se")) +df<-length(x[[1]])-1 +if(!mop)w<-covmmed(x,q=q) +if(mop)w<-bootcov(x,nboot=nboot,pr=FALSE) +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +cvec<-as.matrix(con[,d]) +sejk<-sqrt(t(cvec)%*%w%*%cvec) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +test[d,3]<-2*(1-pt(abs(test[d,2]),df)) +test[d,5]<-sejk +} +temp1<-test[,3] +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +test[temp2,4]<-zvec +psihat[,3]<-psihat[,2]-qt(1-test[,4]/2,df)*test[,5] +psihat[,4]<-psihat[,2]+qt(1-test[,4]/2,df)*test[,5] +num.sig<-sum(test[,3]<=test[,4]) +} +list(test=test,psihat=psihat,num.sig=num.sig) +} +mwwmcp<-function(J,K,x,grp=c(1:p),p=J*K,q=.5,bop=FALSE,alpha=.05,nboot=100, +SEED=TRUE){ +# +# For a J by K anova using quantiles with +# repeated measures on both factors, +# Perform all multiple comparisons for main effects +# and interactions. +# +# q=.5 by default meaning medians are compared +# bop=F means bootstrap option not used; +# with bop=T, function uses usual medians rather +# rather than a single order statistic to estimate median +# in conjunction with a bootstrap estimate of covariances +# among sample medians. +# +# The R variable data is assumed to contain the raw +# data stored in a matrix or in list mode. +# When in list mode data[[1]] contains the data +# for the first level of both factors: level 1,1. +# data[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# data[[K]] is the data for level 1,K +# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. +# +# It is assumed that data has length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +Qa<-NA +Qab<-NA +if(is.data.frame(x))x=as.matrix(x) +if(is.list(x))x<-elimna(matl(x)) +if(is.matrix(x))x<-elimna(x) +data<-x +if(is.matrix(data))data<-listm(data) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups stored in x is") +print(length(data)) +print("Warning: These two values are not equal") +} +if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") +tmeans<-0 + # Create the three contrast matrices + # +Ja<-(J^2-J)/2 +Ka<-(K^2-K)/2 +JK<-J*K +conA<-matrix(0,nrow=JK,ncol=Ja) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j < jj){ +ic<-ic+1 +mat<-matrix(0,nrow=J,ncol=K) +mat[j,]<-1 +mat[jj,]<-0-1 +conA[,ic]<-t(mat) +}}} +conB<-matrix(0,nrow=JK,ncol=Ka) +ic<-0 +for(k in 1:K){ +for(kk in 1:K){ +if(k0){ +ncon<-ncol(con) +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(nrow(con)!=length(x)){ +stop("The number of groups does not match the number of contrast coefficients.") +} +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c("con.num","test","p.value","crit.p.value","se")) +df<-length(x[[1]])-1 +w<-covmtrim(x,tr=tr) +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +cvec<-as.matrix(con[,d]) +sejk<-sqrt(t(cvec)%*%w%*%cvec) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +test[d,3]<-2*(1-pt(abs(test[d,2]),df)) +test[d,5]<-sejk +} +temp1<-test[,3] +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +test[temp2,4]<-zvec +psihat[,3]<-psihat[,2]-qt(1-test[,4]/2,df)*test[,5] +psihat[,4]<-psihat[,2]+qt(1-test[,4]/2,df)*test[,5] +num.sig<-sum(test[,3]<=test[,4]) +} +list(test=test,psihat=psihat,num.sig=num.sig) +} + +sintv2<-function(x,y=NULL,alpha=.05,nullval=0,null.value=NULL,pr=TRUE){ +# +# Compute a 1-alpha confidence interval for the median using +# the Hettmansperger-Sheather interpolation method. +# (See section 4.5.2.) +# +# The default value for alpha is .05. +# +# If y is not null, the function uses x-y, as might be done when comparing dependent variables. +# +if(!is.null(y))x=x-y +x=elimna(x) +if(!is.null(null.value))nullval=null.value +if(pr){ +if(sum(duplicated(x)>0))print("Duplicate values detected; hdpb might have more power") +} +ci<-sint(x,alpha=alpha,pr=FALSE) +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-sint(x,alpha=alph[i],pr=FALSE) +if(chkit[1]>nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2] 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +psihat<-matrix(0,CC,5) +dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c("Group","Group","test","p-value","p.crit","se")) +if(bop)se.val<-bootdse(x,nboot=nboot,pr=pr) +temp1<-0 +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +if(!bop)temp<-qdtest(x[,j],x[,k],q=q,bop=bop) +if(bop)temp<-qdtest(x[,j],x[,k],se.val=se.val[jcom]) +sejk<-temp$se +test[jcom,6]<-sejk +test[jcom,3]<-temp$test.stat +test[jcom,4]<-temp$p.value +if(length(x[,j])<20)test[jcom,4]<-mrm1way(x[,c(j,k)],q=q,SEED=SEED)$p.value +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +}}} +temp1<-test[,4] +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +test[temp2,5]<-zvec +psihat[,4]<-psihat[,3]-qt(1-test[,5]/2,df)*test[,6] +psihat[,5]<-psihat[,3]+qt(1-test[,5]/2,df)*test[,6] +num.sig<-sum(test[,4]<=test[,5]) +list(test=test,psihat=psihat,num.sig=num.sig) +} + + + +qdmcpdif<-function(x, con = 0,alpha = 0.05){ +# +# MCP with medians on difference scores +# FWE controlled with Rom's method +# +if(is.data.frame(x))x=as.matrix(x) +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-ncol(x) +xbar<-vector("numeric",J) +x<-elimna(x) # Remove missing values +nval<-nrow(x) +h1<-nrow(x) +df<-h1-1 +if(sum(con^2!=0))CC<-ncol(con) +if(sum(con^2)==0)CC<-(J^2-J)/2 +ncon<-CC +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(sum(con^2)==0){ +psihat<-matrix(0,CC,5) +dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) +test<-matrix(NA,CC,5) +dimnames(test)<-list(NULL,c("Group","Group","p-value","p.crit","se")) +temp1<-0 +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +dv<-x[,j]-x[,k] +test[jcom,5]<-msmedse(dv) +temp<-sintv2(dv,alpha=alpha/CC) +temp1[jcom]<-temp$p.value +test[jcom,3]<-temp$p.value +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-median(dv) +psihat[jcom,4]<-temp$ci.low +psihat[jcom,5]<-temp$ci.up +}}} +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +if(sum(sigvec)0){ +if(nrow(con)!=ncol(x))print("WARNING: The number of groups does not match the number of contrast coefficients.") +ncon<-ncol(con) +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),4) +dimnames(test)<-list(NULL,c("con.num","sig","crit.sig","se")) +temp1<-NA +for (d in 1:ncol(con)){ +psihat[d,1]<-d +for(j in 1:J){ +if(j==1)dval<-con[j,d]*x[,j] +if(j>1)dval<-dval+con[j,d]*x[,j] +} +temp3<-sintv2(dval) +temp1[d]<-temp3$p.value +test[d,1]<-d +test[d,4]<-msmedse(dval) +psihat[d,2]<-median(dval) +psihat[d,3]<-temp3$ci.low +psihat[d,4]<-temp3$ci.up +} +test[,2]<-temp1 +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +print(c(ncon,zvec)) +sigvec<-(test[temp2,2]>=zvec) +if(sum(sigvec)0)+ sum(psihat[,5]<0) +if(sum(con^2)>0)num.sig<-sum(psihat[,3]>0)+ sum(psihat[,4]<0) +list(test=test,psihat=psihat,con=con,num.sig=num.sig) +} + + + +l2dci<-function(x,y,est=median,alpha=.05,nboot=2000,SEED=TRUE,pr=TRUE,...){ +# +# Compute a bootstrap confidence interval for a +# measure of location associated with +# the distribution of x-y, where x and y are possibly dependent. +# est indicates which measure of location will be used +# +# Function returns confidence interval, p-value and estimate +# of square standard error of the estimator used. +# +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print("Taking bootstrap samples. Please wait.") +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-NA +for(i in 1:nboot)bvec[i]<-loc2dif(datax[i,],datay[i,],est=est) +bvec<-sort(bvec) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) +sig.level<-2*(min(temp,1-temp)) +se<-var(bvec) +list(ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se) +} + + +qdec2ci<-function(x,y=NA,nboot=500,alpha=.05,pr=FALSE,SEED=TRUE,plotit=TRUE){ +# +# Compare the deciles of two dependent groups +# with quantiles estimated with a single order statistic +# +# x: can be a matrix with two columns in which case +# y is ignored. +# +if(SEED)set.seed(2) +if(is.na(y[1])){ +y<-x[,2] +x<-x[,1] +} +xy=elimna(cbind(x,y)) +x=xy[,1] +y=xy[,2] +if(sum(duplicated(x))>0)stop('Tied values detected, use Dqcomhd') +if(sum(duplicated(y))>0)stop('Tied values detected, use Dqcomhd') +bvec<-matrix(NA,nrow=nboot,ncol=9) +if(pr)print("Taking bootstrap samples. Please Wait.") +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(i in 1:nboot)bvec[i,]<-qdec(x[data[i,]])-qdec(y[data[i,]]) +pval<-NA +m<-matrix(0,9,5) +dimnames(m)<-list(NULL,c("lower","upper","Delta.hat","p.values",'p.crit')) +crit <- alpha/2 +icl <- round(crit * nboot) + 1 +icu <- nboot - icl +for(i in 1:9){ +pval[i]<-(sum(bvec[,i]<0)+.5*sum(bvec[,i]==0))/nboot +pval[i]<-2*min(pval[i],1-pval[i]) +temp<-sort(bvec[,i]) +m[i,1]<-temp[icl] +m[i,2]<-temp[icu] +} +m[,3]<-qdec(x)-qdec(y) +m[,4]<-pval +temp=order(pval,decreasing=TRUE) +zvec=alpha/c(1:9) +m[temp,5]=zvec +if(plotit){ +xaxis<-c(qdec(x),qdec(x)) +par(pch="+") +yaxis<-c(m[,1],m[,2]) +plot(xaxis,yaxis,ylab="delta",xlab="x (first group)") +par(pch="*") +points(qdec(x),m[,3]) +} +m +} + + + + +ancovam<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE, +pr=TRUE){ +# +# Compare two independent groups using an ancova method +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# This function is designed specifically for +# MEDIANS +# +# Assume data are in x1 y1 x2 and y2 +# +if(pr){ +print("NOTE: Confidence intervals are adjusted to control the probability") +print("of at least one Type I error.") +print("But p-values are not") +} +if(is.na(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,9) +dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi","p.value")) +critv<-NA +critv=qsmm(1-alpha,5,500) +for (i in 1:5){ +g1<-y1[near(x1,x1[isub[i]],fr1)] +g2<-y2[near(x2,x1[isub[i]],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +test<-msmed(g1,g2) +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(g1) +mat[i,3]<-length(g2) +mat[i,4]<-median(g1)-median(g2) +mat[i,5]<-test$test[3] +mat[i,6]<-test$test[5] +cilow<-mat[i,4]-critv*mat[i,6] +cihi<-mat[i,4]+critv*mat[i,6] +mat[i,7]<-cilow +mat[i,8]<-cihi +mat[i,9]<-test$test[6] +}} +if(!is.na(pts[1])){ +if(length(pts)>=29)stop("At most 28 points can be compared") +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),9) +dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi", +"p.value")) +critv<-NA +if(length(pts)>=2){ +#if(alpha==.05)critv<-smmcrit(500,length(pts)) +#if(alpha==.01)critv<-smmcrit01(500,length(pts)) +#if(is.na(critv))critv<-smmval(rep(999,length(pts)),alpha=alpha) +critv=qsmm(1-alpha,length(pts),500) +} +if(length(pts)==1)critv<-qnorm(1-alpha/2) +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +test<-msmed(g1,g2) +mat[i,1]<-pts[i] +mat[i,2]<-length(g1) +mat[i,3]<-length(g2) +if(length(g1)<=5)print(paste("Warning, there are",length(g1)," points corresponding to the design point X=",pts[i])) +if(length(g2)<=5)print(paste("Warning, there are",length(g2)," points corresponding to the design point X=",pts[i])) +mat[i,4]<-median(g1)-median(g2) +mat[i,5]<-test$test[3] +mat[i,6]<-test$test[5] +cilow<-mat[i,4]-critv*mat[i,6] +cihi<-mat[i,4]+critv*mat[i,6] +mat[i,7]<-cilow +mat[i,8]<-cihi +mat[i,9]<-test$test[6] +}} +if(plotit) +runmean2g(x1,y1,x2,y2,fr=fr1,est=median,sm=sm) +list(output=mat,crit=critv) +} + + +modgen<-function(p,adz=FALSE){ +# +# Used by regpre to generate all models +# p=number of predictors +# adz=T, will add the model where only a measure +# of location is used. +# +# +model<-list() +if(p>5)stop("Current version is limited to 5 predictors") +if(p==1)model[[1]]<-1 +if(p==2){ +model[[1]]<-1 +model[[2]]<-2 +model[[3]]<-c(1,2) +} +if(p==3){ +for(i in 1:3)model[[i]]<-i +model[[4]]<-c(1,2) +model[[5]]<-c(1,3) +model[[6]]<-c(2,3) +model[[7]]<-c(1,2,3) +} +if(p==4){ +for(i in 1:4)model[[i]]<-i +model[[5]]<-c(1,2) +model[[6]]<-c(1,3) +model[[7]]<-c(1,4) +model[[8]]<-c(2,3) +model[[9]]<-c(2,4) +model[[10]]<-c(3,4) +model[[11]]<-c(1,2,3) +model[[12]]<-c(1,2,4) +model[[13]]<-c(1,3,4) +model[[14]]<-c(2,3,4) +model[[15]]<-c(1,2,3,4) +} +if(p==5){ +for(i in 1:5)model[[i]]<-i +model[[6]]<-c(1,2) +model[[7]]<-c(1,3) +model[[8]]<-c(1,4) +model[[9]]<-c(1,5) +model[[10]]<-c(2,3) +model[[11]]<-c(2,4) +model[[12]]<-c(2,5) +model[[13]]<-c(3,4) +model[[14]]<-c(3,5) +model[[15]]<-c(4,5) +model[[16]]<-c(1,2,3) +model[[17]]<-c(1,2,4) +model[[18]]<-c(1,2,5) +model[[19]]<-c(1,3,4) +model[[20]]<-c(1,3,5) +model[[21]]<-c(1,4,5) +model[[22]]<-c(2,3,4) +model[[23]]<-c(2,3,5) +model[[24]]<-c(2,4,5) +model[[25]]<-c(3,4,5) +model[[26]]<-c(1,2,3,4) +model[[27]]<-c(1,2,3,5) +model[[28]]<-c(1,2,4,5) +model[[29]]<-c(1,3,4,5) +model[[30]]<-c(2,3,4,5) +model[[31]]<-c(1,2,3,4,5) +} +if(adz){ +ic<-length(model)+1 +model[[ic]]<-0 +} +model +} + + + +locpre<-function(y,est=mean,error=sqfun,nboot=100,SEED=TRUE,pr=TRUE,mval=round(5*log(length(y)))){ +# +# Estimate the prediction error using a measure of location +# given by the argument +# est +# +# The .632 method is used. +# (See Efron and Tibshirani, 1993, pp. 252--254) +# +# Prediction error is the expected value of the function error. +# The argument error defaults to squared error. +# +# est can be any R function that returns a measure of location +# +# The default value for mval, the number of observations to resample +# for each of the B bootstrap samples is based on results by +# Shao (JASA, 1996, 655-665). (Resampling n vectors of observations +# model selection may not lead to the correct model as n->infinity. +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=mval*nboot,replace=TRUE),nrow=nboot) +bid<-apply(data,1,idb,length(y)) +# bid is an n by nboot matrix. If the jth bootstrap sample from +# 1, ..., mval contains the value i, bid[i,j]=0; otherwise bid[i,j]=1 +# +yhat<-apply(data,1,locpres1,y,est=est) +# yhat is nboot vector +# containing the bootstrap estimates +# +yhat<-matrix(yhat,nrow=length(y),ncol=nboot) # convert to n x nboot matrix +bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253 +temp<-(bid*(yhat-y)) +diff<-apply(temp,1,error) +ep0<-sum(diff/bi)/length(y) +aperror<-error(y-est(y))/length(y) # apparent error +val<-.368*aperror+.632*ep0 +val +} + + +locpres1<-function(isub,x,est){ +# +# Compute a measure of location x[isub] +# isub is a vector of length mval, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# mval is the sample size +# of the bootstrap sample, where mval1){ +#if(alpha==.05)crit<-smmcrit(500,CC) +#if(alpha==.01)crit<-smmcrit01(500,CC) +#if(is.na(crit))warning("Can only be used with alpha=.05 or .01") +crit=qsmm(1-alpha,CC,500) +} +test[jcom,4]<-crit +psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5] +psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5] +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.") +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c("con.num","test","crit","se","df")) +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +crit<-NA +if(CC==1)crit<-qnorm(1-alpha/2) +#if(alpha==.05)crit<-smmcrit(500,ncol(con)) +#if(alpha==.01)crit<-smmcrit01(500,ncol(con)) +crit=qsmm(1-alpha,ncol(con),500) +test[d,3]<-crit +test[d,4]<-sejk +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +}} +list(test=test,psihat=psihat) +} + + + +bpmedse<-function(x){ +# +# compute standard error of the median using method +# recommended by Price and Bonett (2001) +# +y<-sort(x) +n<-length(x) +av<-round((n+1)/2-sqrt(n)) +if(av==0)av<-1 +avm<-av-1 +astar<-pbinom(avm,n,.5) #alpha*/2 +zval<-qnorm(1-astar) +top<-n-av+1 +sqse<-((y[top]-y[av])/(2*zval))^2 # The sq. standard error +se<-sqrt(sqse) +se +} +exmed<-function(x,y=NA,con=0,alpha=.05,iter=1000,se.fun=bpmedse,SEED=TRUE){ +# +# Test a set of linear contrasts using medians +# +# Get exact control over type I errors under normality, provided +# iter is sufficietly large. +# iter determines number of replications used in a simulation +# to determine critical value. +# +# se.fun indicates method used to estimate standard errors. +# default is the method used by Bonett and Price (2002) +# To use the McKean-Shrader method, +# set se.fun=msmedse +# +# The data are assumed to be stored in $x$ in a matrix or in list mode. +# Length(x) is assumed to correspond to the total number of groups, J +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# Missing values are automatically removed. +# +# Function returns the critical value used so that FWE=alpha +# (under the column crit) +# p-values are determined for each test but are not adjusted so +# that FWE=alpha. +# The confidence intervals are adjusted so that the simultaneous +# probability coverage is 1-alpha. +# +if(!is.na(y[1])){ +xx<-list() +xx[[1]]<-x +xx[[2]]<-y +if(is.matrix(x) || is.list(x))stop("When y is speficied, x should not have list mode or be a matrix") +x<-xx +} +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-length(x) +h<-vector("numeric",J) +w<-vector("numeric",J) +nval<-vector("numeric",J) +xbar<-vector("numeric",J) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +xbar[j]<-median(x[[j]]) +nval[j]<-length(x[[j]]) +# w[j]<-msmedse(x[[j]])^2 + w[j]<-se.fun(x[[j]])^2 +} +if(sum(con^2!=0))CC<-ncol(con) +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +psihat<-matrix(0,CC,5) +dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","p.value")) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) +# Next determine p-value for each individual test +temp<-msmedsub(c(nval[j],nval[k]),se.fun=se.fun,SEED=SEED,iter=iter) +test[jcom,6]<-sum((test[jcom,3]<=temp))/iter +sejk<-sqrt(w[j]+w[k]) +test[jcom,5]<-sejk +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +# Determine critical value for controlling FWE +temp<-msmedsub(nval,se.fun=se.fun,SEED=SEED,iter=iter) +ic<-round((1-alpha)*iter) +crit<-temp[ic] +test[jcom,4]<-crit +psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5] +psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5] +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.") +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c("con.num","test","crit","se","p.value")) +# Determine critical value that controls FWE +temp<-msmedsub(nval,con=con,se.fun=se.fun,SEED=SEED,iter=iter) +ic<-round((1-alpha)*iter) +crit<-temp[ic] +for (d in 1:ncol(con)){ +flag<-(con[,d]==0) +nvec<-nval[!flag] +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +# Determine p-value for individual (dth) test +temp<-msmedsub(nvec,iter=iter,se.fun=se.fun,SEED=SEED) +test[d,3]<-crit +test[d,4]<-sejk +test[d,5]<-sum(abs((test[d,2])<=temp))/iter +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +}} +list(test=test,psihat=psihat) +} +msmedsub<-function(n,con=0,alpha=.05,se.fun=bpmedse,iter=1000,SEED=TRUE){ +# +# Determine a Studentized critical value, assuming normality +# and homoscedasticity, for the function msmedv2 +# +# Goal: Test a set of linear contrasts using medians +# +# The data are assumed to be stored in $x$ in a matrix or in list mode. +# Length(x) is assumed to correspond to the total number of groups, J +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +if(SEED)set.seed(2) +con<-as.matrix(con) +J<-length(n) +h<-vector("numeric",J) +w<-vector("numeric",J) +xbar<-vector("numeric",J) +x<-list() +test<-NA +testmax<-NA +for (it in 1:iter){ +for(j in 1:J){ +x[[j]]<-rnorm(n[j]) +xbar[j]<-median(x[[j]]) + w[j]<-se.fun(x[[j]])^2 +} +if(sum(con^2!=0))CC<-ncol(con) +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) +}}}} +if(sum(con^2)>0){ +for (d in 1:ncol(con)){ +sejk<-sqrt(sum(con[,d]^2*w)) +test[d]<-sum(con[,d]*xbar)/sejk +}} +testmax[it]<-max(abs(test)) +} +testmax<-sort(testmax) +testmax +} +cnorm<-function(n,epsilon=.1,k=10){ +# +# generate n observations from a contaminated normal +# distribution +# probability 1-epsilon from a standard normal +# probability epsilon from normal with mean 0 and standard deviation k +# +if(epsilon>1)stop("epsilon must be less than or equal to 1") +if(epsilon<0)stop("epsilon must be greater than or equal to 0") +if(k<=0)stop("k must be greater than 0") +val<-rnorm(n) +uval<-runif(n) +flag<-(uval<=1-epsilon) +val[!flag]<-k*val[!flag] +val +} +twwmcp<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,alpha=.05,dif=FALSE){ +# +# For a J by K anova using quantiles with +# repeated measures on both factors, +# Perform all multiple comparisons for main effects +# and interactions. +# +# tr=.2. default trimming +# bop=F means bootstrap option not used; +# with bop=T, function uses usual medians rather +# rather than a single order statistic to estimate median +# in conjunction with bootstrap estimate of covariances +# among the sample medians. +# +# The R variable data is assumed to contain the raw +# data stored in a matrix or in list mode. +# When in list mode data[[1]] contains the data +# for the first level of both factors: level 1,1. +# data[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# data[[K]] is the data for level 1,K +# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. +# +# It is assumed that data has length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +Qa<-NA +Qab<-NA +if(is.list(x))x<-elimna(matl(x)) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-elimna(x) +data<-x +if(is.matrix(data))data<-listm(data) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups stored in x is") +print(length(data)) +print("Warning: These two values are not equal") +} +if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") +tmeans<-0 +temp<-con2way(J,K) # contrasts matrices stored in temp +Qa<-rmmcp(x,con=temp$conA,alpha=alpha,dif=dif,tr=tr) +# Do test for factor B +Qb<-rmmcp(x,con=temp$conB,alpha=alpha,dif=dif,tr=tr) +# Do test for factor A by B interaction +Qab<-rmmcp(x,con=temp$conAB,alpha=alpha,dif=dif,tr=tr) +list(Qa=Qa,Qb=Qb,Qab=Qab) +} + +medpb.old<-function(x,alpha=.05,nboot=NA,grp=NA,est=median,con=0,bhop=FALSE, +SEED=TRUE,...){ +# +# Multiple comparisons for J independent groups using medians. +# +# A percentile bootstrap method is used. FWE is controlled with Rom's method. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# +# con can be used to specify linear contrasts; see the function lincon +# +# Missing values are allowed. +# +con<-as.matrix(con) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +dvec<-alpha/c(1:ncon) +if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +#print(paste("Working on group ",j)) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group +} +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +tv<-sum(bcon[d,]==0)/nboot +test[d]<-sum(bcon[d,]>0)/nboot+.5*tv +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-tvec[ic,] +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +medpb<-function(x,alpha=.05,nboot=NA,grp=NA,est=median,con=0,bhop=FALSE,method='hoch', +SEED=TRUE,...){ +# +# Multiple comparisons for J independent groups using medians. +# +# A percentile bootstrap method. +# FWE controlled via argument method +# method =hoch Hochberg;s method is used by default +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# +# con can be used to specify linear contrasts; see the function lincon +# +# Missing values are allowed. +# +con<-as.matrix(con) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +dvec<-alpha/c(1:ncon) +if(nrow(con)!=J)stop('Something is wrong with con; the number of rows does not match the number of groups.') +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(j in 1:J){ +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group +} +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +tv<-sum(bcon[d,]==0)/nboot +test[d]<-sum(bcon[d,]>0)/nboot+.5*tv +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,7) +dimnames(output)<-list(NULL,c('con.num','psihat','p.value','p.crit','ci.lower','ci.upper','adj.p.value')) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-tvec[ic,] +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +num.sig<-sum(output[,3]<=output[,4]) +output[,7]=p.adjust(output[,3],method=method) + +list(output=output,con=con,num.sig=num.sig) +} + + +medmcp=medpb + +rbbinom<-function(n,nbin,r,s){ +# +# Generate n values from a beta-binomial, +# r and s are the parameters of the beta distribution. +# nbin is for the binomial distribution, +# Example: nbin=10 means the sample space=c(0:10) +# +x<-NA +for(i in 1:n){ +pval<-rbeta(1,r,s) +x[i]<-rbinom(1,nbin,pval) +} +x +} + +rbeta.binomial=rbbinom + +med2g<-function(x,y,alpha=.05,nboot=2000,SEED=TRUE,...){ +# +# Compare medians of two independent groups using percentile bootstrap +# +# Missing values are allowed. +# +x<-elimna(x) +y<-elimna(y) +mvec<-NA +mvec[1]<-median(x) +mvec[2]<-median(y) +bvec<-NA +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec1<-apply(datax,1,median) # Bootstrapped values for jth group +bvec2<-apply(datay,1,median) # Bootstrapped values for jth group +test<-sum((bvec1>bvec2))/nboot +tv<-sum(bvec1==bvec2)/nboot +test<-test+.5*tv +if(test> .5)test<-1-test +test<-2*test +dvec<-sort(bvec1-bvec2) +icl<-round(alpha*nboot/2)+1 +icu<-nboot-icl-1 +cilow<-dvec[icl] +ciup<-dvec[icu] +list(p.value=test,est.1=mvec[1],est.2=mvec[2],est.dif=mvec[1]-mvec[2],ci.low=cilow,ci.up=ciup) +} + + +twobinom<-function(r1=sum(elimna(x)),n1=length(elimna(x)),r2=sum(elimna(y)),n2=length(elimna(y)),x=NA,y=NA,alpha=.05){ +# +# Test the hypothesis that two independent binomials have equal +# probability of success using the Storer--Kim method. +# +# r1=number of successes in group 1 +# n1=number of observations in group 1 +# +n1p<-n1+1 +n2p<-n2+1 +n1m<-n1-1 +n2m<-n2-1 +chk<-abs(r1/n1-r2/n2) +x<-c(0:n1)/n1 +y<-c(0:n2)/n2 +phat<-(r1+r2)/(n1+n2) +m1<-outer(x,y,"-") +m2<-matrix(1,n1p,n2p) +flag<-(abs(m1)>=chk) +m3<-m2*flag +b1<-1 +b2<-1 +xv<-c(1:n1) +yv<-c(1:n2) +xv1<-n1-xv+1 +yv1<-n2-yv+1 +dis1<-c(1,pbeta(phat,xv,xv1)) +dis2<-c(1,pbeta(phat,yv,yv1)) +pd1<-NA +pd2<-NA +for(i in 1:n1)pd1[i]<-dis1[i]-dis1[i+1] +for(i in 1:n2)pd2[i]<-dis2[i]-dis2[i+1] +pd1[n1p]<-phat^n1 +pd2[n2p]<-phat^n2 +m4<-outer(pd1,pd2,"*") +test<-sum(m3*m4) +list(p.value=test,p1=r1/n1,p2=r2/n2,est.dif=r1/n1-r2/n2) +} + +lband.fun<-function(x,y,crit){ +# +# function used to determine probability of type I error given crit +# +pi<-gamma(.5)^2 +xr<-rank(x) +yr<-rank(y) +temp<-apply(cbind(xr,yr),1,max) +n<-length(x) +fj<-NA +for(i in 1:n)fj[i]<-sum(temp==i) +v1<-NA +for(j in 1:n)v1[j]<-(j-sum(fj[1:j]))/n +psi<-rep(0,n) +for(j in 1:n){ +if(v1[j]>0)psi[j]<-crit*exp(0-crit^2/(2*v1[j]))/sqrt(2*pi*v1[j]^3) +} +res<-mean(fj*psi) +res +} + +lband.fun2<-function(m,crit,alpha=.05){ +x<-m[,1] +y<-m[,2] +val<-abs(alpha-lband.fun(x,y,crit)) +val +} +qdec<-function(x){ +# +# compute deciles using single order statistics +# (function deciles uses Harrell-Davis estimator) +# +vals<-NA +for(i in 1:9){ +vals[i]<-qest(x,i/10) +} +vals +} +m2way<-function(J,K,x,est=hd,alpha=.05,nboot=600,SEED=TRUE,grp=NA,pr=FALSE,...){ +# +# Two-way ANOVA based on forming averages +# +# By default +# est=hd meaning that medians are used with the Harrell-Davis estimator. +# +# The data are assumed to be stored in x in list mode or in a matrix. +# If grp is unspecified, it is assumed x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second factor: level 1,2 +# x[[j+1]] is the data for level 2,1, etc. +# If the data are in wrong order, grp can be used to rearrange the +# groups. For example, for a two by two design, grp<-c(2,4,3,1) +# indicates that the second group corresponds to level 1,1; +# group 4 corresponds to level 1,2; group 3 is level 2,1; +# and group 1 is level 2,2. +# +# Missing values are automatically removed. +# +JK<-J*K +if(is.data.frame(x))x=as.matrix(x) +xcen<-list() + if(is.matrix(x)) + x <- listm(x) + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") + if(!is.na(grp[1])) { + yy <- x + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } +for(j in 1:JK){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} +xx<-list() +mloc<-NA +for(i in 1:JK){ +xx[[i]]<-x[[i]] +mloc[i]<-est(xx[[i]],...) +xcen[[i]]<-xx[[i]]-mloc[i] +} +x<-xx +mat<-matrix(mloc,nrow=J,ncol=K,byrow=TRUE) +leva<-apply(mat,1,mean) # J averages over columns +levb<-apply(mat,2,mean) +gm<-mean(levb) +testa<-sum((leva-mean(leva))^2) +testb<-sum((levb-mean(levb))^2) +testab<-NA +tempab<-matrix(NA,nrow=J,ncol=K) +for(j in 1:J){ +for(k in 1:K){ +tempab[j,k]<-mat[j,k]-leva[j]-levb[k]+gm +}} +testab<-sum(tempab^2) +bvec<-matrix(NA,nrow=JK,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print("Taking bootstrap samples. Please wait.") +for(j in 1:JK){ +if(pr)print(paste("Working on group ",j)) +data<-matrix(sample(xcen[[j]],size=length(xcen[[j]])*nboot,replace=TRUE), +nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # JK by nboot matrix, jth row contains +# bootstrapped estimates for jth group +} +boota<-NA +bootb<-NA +bootab<-NA +for(i in 1:nboot){ +mat<-matrix(bvec[,i],nrow=J,ncol=K,byrow=TRUE) +leva<-apply(mat,1,mean) # J averages over columns +levb<-apply(mat,2,mean) +gm<-mean(mat) +boota[i]<-sum((leva-mean(leva))^2) +bootb[i]<-sum((levb-mean(levb))^2) +for(j in 1:J){ +for(k in 1:K){ +tempab[j,k]<-mat[j,k]-leva[j]-levb[k]+gm +}} +bootab[i]<-sum(tempab^2)} +pvala<-1-sum(testa>=boota)/nboot +pvalb<-1-sum(testb>=bootb)/nboot +pvalab<-1-sum(testab>=bootab)/nboot +list(p.value.A=pvala,p.value.B=pvalb,p.value.AB=pvalab, +test.A=testa,test.B=testb, +test.AB=testab,est.loc=matrix(mloc,nrow=J,ncol=K,byrow=TRUE)) +} + + + +b1way<-function(x,est=onestep,nboot=599,SEED=TRUE,...){ +# +# Test the hypothesis that J measures of location are equal +# using the percentile bootstrap method. +# By default, M-estimators are compared using 599 bootstrap samples. +# +# The data are assumed to be stored in x in list mode. Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# +# +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or a matrix.") +J<-length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +nval<-vector("numeric",length(x)) +gest<-vector("numeric",length(x)) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +bvec<-matrix(0,J,nboot) +#print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +#print(paste("Working on group ",j)) +nval[j]<-length(x[[j]]) +gest[j]<-est(x[[j]]) +xcen<-x[[j]]-est(x[[j]],...) +data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # A J by nboot matrix +# containing the bootstrap values of est. +} +teststat<-wsumsq(gest,nval) +testb<-apply(bvec,2,wsumsq,nval) +p.value<-1 - sum(teststat >= testb, na.rm = TRUE)/nboot +teststat<-wsumsq(gest,nval) +if(teststat == 0)p.value <- 1 +list(teststat=teststat,p.value=p.value) +} + + +lintest<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=FALSE,SEED=TRUE, +outfun=out,...){ +# +# Test the hypothesis that the regression surface is a plane. +# Stute et al. (1998, JASA, 93, 141-149). +# +if(SEED)set.seed(2) +#if(identical(regfun,Qreg))print('When using Qreg, be sure to include res.vals=TRUE') +#if(identical(regfun,tshdreg))print('When using tshdreg, be sure to include RES=TRUE') +#if(identical(regfun,MMreg))print('When using MMreg, be sure to include RES=TRUE') # no longer necessary +x<-as.matrix(x) +d<-ncol(x) +temp<-elimna(cbind(x,y)) +x<-temp[,1:d] +x<-as.matrix(x) +y<-temp[,d+1] +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag,] +x<-as.matrix(x) +y<-y[flag] +} +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +reg<-regfun(x,y,...) +yhat<-y-reg$residuals +#print("Taking bootstrap samples, please wait.") +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-sqrt(12)*(data-.5) # standardize the random numbers. +rvalb<-apply(data,1,lintests1,yhat,reg$residuals,mflag,x,regfun,...) +# An n x nboot matrix of R values +rvalb<-rvalb/sqrt(length(y)) +dstatb<-apply(abs(rvalb),2,max) +wstatb<-apply(rvalb^2,2,mean) +# compute test statistic +v<-c(rep(1,length(y))) +rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...) +rval<-rval/sqrt(length(y)) +dstat<-max(abs(rval)) +wstat<-mean(rval^2) +ib<-round(nboot*(1-alpha)) +p.value.d<-1-sum(dstat>=dstatb)/nboot +p.value.w<-1-sum(wstat>=wstatb)/nboot +list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w) +} + + +tauloc<-function(x,cval=4.5){ +# +# Compute the tau measure of location as described in +# Yohai and Zamar (JASA, 83, 406-413). +# +x<-elimna(x) +s<-qnorm(.75)*mad(x) +y<-(x-median(x))/s +W<-(1-(y/cval)^2)^2 +flag<-(abs(W)>cval) +W[flag]<-0 +val<-sum(W*x)/sum(W) +val +} + +tauvar<-function(x,cval=3){ +# +# Compute the tau measure of scale as described in +# Yohai and Zamar (JASA, 1988, 83, 406-413). +# The computational method is described in Maronna and Zamar +# (Technometrics, 2002, 44, 307-317) +# see p. 310 +# +x<-elimna(x) +s<-qnorm(.75)*mad(x) +y<-(x-tauloc(x))/s +cvec<-rep(cval,length(x)) +W<-apply(cbind(y^2,cvec^2),1,FUN="min") +val<-s^2*sum(W)/length(x) +val +} + +gkcor<-function(x,y,varfun=tauvar,ccov=FALSE,...){ +# +# Compute a correlation coefficient using the Gnanadesikan-Ketterning +# estimator. +# ccov=T, computes covariance instead. +# (cf. Marrona & Zomar, 2002, Technometrics +# +val<-.25*(varfun(x+y,...)-varfun(x-y,...)) +if(!ccov)val<-val/(sqrt(varfun(x,...))*sqrt(varfun(y,...))) +val +} +covroc<-function(x){ +# +# compute Rocke's TBS covariance matrix +# + library(robust) +temp<-covRob(x,estim="M") +val<-temp[2]$cov +val +} +indt<-function(x,y,nboot=500,flag=1,SEED=TRUE,pr=TRUE){ +# +# Test the hypothesis of independence between x and y by +# testing the hypothesis that the regression surface is a horizontal plane. +# Stute et al. (1998, JASA, 93, 141-149). +# +# flag=1 gives Kolmogorov-Smirnov test statistic +# flag=2 gives the Cramer-von Mises test statistic +# flag=3 causes both test statistics to be reported. +# +# tr=0 results in the Cramer-von Mises test statistic when flag=2 +# With tr>0, a trimmed version of the test statistic is used. +# +# Modified Dec 2005. +# +tr=0 +#if(tr<0)stop("Amount trimmed must be > 0") +#if(tr>.5)stop("Amount trimmed must be <=.5") +if(SEED)set.seed(2) +x<-as.matrix(x) +# First, eliminate any rows of data with missing values. +temp <- cbind(x, y) + temp <- elimna(temp) + pval<-ncol(temp)-1 + x <- temp[,1:pval] + y <- temp[, pval+1] +x<-as.matrix(x) +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +# ith row of mflag indicates which rows of the matrix x are less +# than or equal to ith row of x +# +yhat<-mean(y) +res<-y-yhat +if(pr)print("Taking bootstrap sample, please wait.") +data<-matrix(runif(length(y)*nboot),nrow=nboot)# +data<-(data-.5)*sqrt(12) # standardize the random numbers. +rvalb<-apply(data,1,regts1,yhat,res,mflag,x,tr) +# An n x nboot matrix of R values +rvalb<-rvalb/sqrt(length(y)) +dstatb<-apply(abs(rvalb),2,max) +wstatb<-apply(rvalb^2,2,mean,tr=tr) +v<-c(rep(1,length(y))) +rval<-regts1(v,yhat,res,mflag,x,tr=0) +rval<-rval/sqrt(length(y)) +dstat<-NA +wstat<-NA +critd<-NA +critw<-NA +p.vald<-NA +p.valw<-NA +if(flag==1 || flag==3){ +dstat<-max(abs(rval)) +p.vald<-1-sum(dstat>=dstatb)/nboot +} +if(flag==2 || flag==3){ +wstat<-mean(rval^2,tr=tr) +p.valw<-1-sum(wstat>=wstatb)/nboot +} +list(dstat=dstat,wstat=wstat,p.value.d=p.vald,p.value.w=p.valw) +} + + +taulc<-function(x,mu.too=FALSE){ +# +val<-tauvar(x) +if(mu.too){ +val[2]<-val +val[1]<-tauloc(x) +} +val +} + + +trimww.sub<-function(cmat,vmean,vsqse,h,J,K){ +# +# This function is used by trimww +# +# The function performs a variation of Johansen's test of C mu = 0 for +# a within by within design +# C is a k by p matrix of rank k and mu is a p by 1 matrix of +# of unknown medians. +# The argument cmat contains the matrix C. +# vmean is a vector of length p containing the p medians +# vsqe is matrix containing the +# estimated covariances among the medians +# h is the sample size +# +p<-J*K +yvec<-matrix(vmean,length(vmean),1) +test<-cmat%*%vsqse%*%t(cmat) +invc<-solve(test) +test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec +temp<-0 +mtem<-vsqse%*%t(cmat)%*%invc%*%cmat +temp<-(sum(diag(mtem%*%mtem))+(sum(diag(mtem)))^2)/(h-1) +A<-.5*sum(temp) +cval<-nrow(cmat)+2*A-6*A/(nrow(cmat)+2) +test<-test/cval +test +} + + + +trimww<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2){ +# +# Perform a J by K anova using trimmed means with +# repeated measures on both factors. +# +# tr=.2 is default trimming +# +# The R variable data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of both factors: level 1,1. +# data[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# data[[K]] is the data for level 1,K +# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. +# +# It is assumed that data has length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +if(is.list(x))x<-elimna(matl(x)) +if(is.matrix(x))x<-elimna(x) +data<-x +if(is.matrix(data))data<-listm(data) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups stored in x is") +print(length(data)) +print("Warning: These two values are not equal") +} +if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") +tmeans<-0 +h<-length(data[[grp[1]]]) +v<-matrix(0,p,p) +for (i in 1:p)tmeans[i]<-mean(data[[grp[i]]],tr=tr,na.rm=TRUE) +v<-covmtrim(data,tr=tr) +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +jm1<-J-1 +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +# Do test for factor A +cmat<-kron(cj,ik) # Contrast matrix for factor A +#Qa<-johansp(cmat,tmeans,v,h,J,K) +Qa<-trimww.sub(cmat,tmeans,v,h,J,K) +#Qa.siglevel<-1-pf(Qa$teststat,J-1,999) +Qa.siglevel<-1-pf(Qa,J-1,999) +# Do test for factor B +cmat<-kron(ij,ck) # Contrast matrix for factor B +#Qb<-johansp(cmat,tmeans,v,h,J,K) +Qb<-trimww.sub(cmat,tmeans,v,h,J,K) +Qb.siglevel<-1-pf(Qb,K-1,999) +# Do test for factor A by B interaction +cmat<-kron(cj,ck) # Contrast matrix for factor A by B +#Qab<-johansp(cmat,tmeans,v,h,J,K) +Qab<-trimww.sub(cmat,tmeans,v,h,J,K) +Qab.siglevel<-1-pf(Qab,(J-1)*(K-1),999) +list(Qa=Qa,Qa.siglevel=Qa.siglevel, +Qb=Qb,Qb.siglevel=Qb.siglevel, +Qab=Qab,Qab.siglevel=Qab.siglevel) +} + + +msmedci<-function(x,alpha=.05,nullval=0){ +# +# Confidence interval for the median +# +se<-msmedse(x) +est<-median(x) +ci.low<-est-qnorm(1-alpha/2)*se +ci.hi<-est+qnorm(1-alpha/2)*se +test<-(est-nullval)/se +p.value<-2*(1-pnorm(abs(test))) +list(test=test,ci.low=ci.low,ci.hi=ci.hi,p.value=p.value,median=est) +} +medcipb<-function(x,alpha=.05,null.val=NA,nboot=500,SEED=TRUE,...){ +# +# Bootstrap confidence interval for the median of single variable. +# The usual sample median is used. hdpb uses the Harrell--Davis estimator +# Missing values are allowed. +# +x<-elimna(x) +est=median(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,median) # Bootstrapped values +test<-NULL +if(!is.na(null.val)){ +tv<-sum(bvec==null.val)/nboot +test<-sum(bvec>null.val)/nboot+.5*tv +if(test> .5)test<-1-test +test<-2*test +} +bvec<-sort(bvec) +icl<-round(alpha*nboot/2)+1 +icu<-nboot-icl-1 +cilow<-bvec[icl] +ciup<-bvec[icu] +list(Est.=est,ci.low=cilow,ci.up=ciup,p.value=test) +} + +regtest<-function(x,y,regfun=tsreg,nboot=600,alpha=.05,plotit=TRUE, +grp=c(1:ncol(x)),nullvec=c(rep(0,length(grp))),xout=FALSE,outfun=outpro,SEED=TRUE,pr=TRUE,...){ +# +# Test the hypothesis that q of the p predictors are equal to +# some specified constants. By default, the hypothesis is that all +# p predictors have a coefficient equal to zero. +# The method is based on a confidence ellipsoid. +# The critical value is determined with the percentile bootstrap method +# in conjunction with Mahalanobis distance. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +if(pr)print("Default for outfun is now outpro") +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,regfun=regfun,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x<-as.matrix(x) +if(length(grp)!=length(nullvec))stop("The arguments grp and nullvec must have the same length.") +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,regboot,x,y,regfun) # A p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +grp<-grp+1 #Ignore the intercept. +est<-regfun(x,y)$coef +estsub<-est[grp] +bsub<-t(bvec[grp,]) +if(length(grp)==1){ +m1<-sum((bvec[grp,]-est)^2)/(length(y)-1) +dis<-(bsub-estsub)^2/m1 +} +if(length(grp)>1){ +mvec<-apply(bsub,2,FUN=mean) +m1<-var(t(t(bsub)-mvec+estsub)) +dis<-mahalanobis(bsub,estsub,m1) +} +dis2<-order(dis) +dis<-sort(dis) +critn<-floor((1-alpha)*nboot) +crit<-dis[critn] +test<-mahalanobis(t(estsub),nullvec,m1) +sig.level<-1-sum(test>dis)/nboot +if(length(grp)==2 && plotit){ +plot(bsub,xlab="Parameter 1",ylab="Parameter 2") +points(nullvec[1],nullvec[2],pch=0) +xx<-bsub[dis2[1:critn],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +list(test=test,crit=crit,p.value=sig.level,nullvec=nullvec,est=estsub,n=length(y)) +} + +reg2ci<-function(x,y,x1,y1,regfun=tsreg,nboot=599,alpha=.05,plotit=TRUE,SEED=TRUE, +xout=FALSE,outfun=outpro,xlab="X",ylab="Y",pr=FALSE,...){ +# +# Compute a .95 confidence interval for the difference between the +# the intercepts and slopes corresponding to two independent groups. +# The default regression method is Theil-Sen. +# +# The predictor values for the first group are +# assumed to be in the n by p matrix x. +# The predictors for the second group are in x1 +# +# The default number of bootstrap samples is nboot=599 +# +# regfun can be any R function that returns the coefficients in +# the vector regfun$coef, the first element of which contains the +# estimated intercept, the second element contains the estimate of +# the first predictor, etc. +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +x1<-as.matrix(x1) +xx1<-cbind(x1,y1) +xx1<-elimna(xx1) +x1<-xx1[,1:ncol(x1)] +x1<-as.matrix(x1) +y1<-xx1[,ncol(x1)+1] +x=as.matrix(x) +x1=as.matrix(x1) +if(xout){ +if(pr)print("outfun now defaults to outpro rather than out") +if(identical(outfun,outblp)){ +flag1=outblp(x,y,plotit=FALSE)$keep +flag2=outblp(x1,y2,plotit=FALSE)$keep +} +if(!identical(outfun,outblp)){ +flag1=outfun(x,plotit=FALSE)$keep +flag2=outfun(x1,plotit=FALSE)$keep +} +x=x[flag1,] +y=y[flag1] +x1=x1[flag2,] +y1=y1[flag2] +} +n=length(y) +n[2]=length(y1) +x<-as.matrix(x) +x1<-as.matrix(x1) +est1=regfun(x,y,...)$coef +est2=regfun(x1,y1,...)$coef +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) # A p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +bvec1<-apply(data,1,regboot,x1,y1,regfun,xout=FALSE,...) +bvec<-bvec-bvec1 +p1<-ncol(x)+1 +regci<-matrix(0,p1,6) +dimnames(regci)<-list(NULL, +c("Parameter","ci.lower","ci.upper","p.value","Group 1","Group 2")) +ilow<-round((alpha/2)*nboot)+1 +ihi<-nboot-(ilow-1) +for(i in 1:p1){ +temp<-sum(bvec[i,]<0)/nboot+sum(bvec[i,]==0)/(2*nboot) +regci[i,4]<-2*min(temp,1-temp) +bsort<-sort(bvec[i,]) +regci[i,2]<-bsort[ilow] +regci[i,3]<-bsort[ihi] +regci[,1]<-c(0:ncol(x)) +} +regci[,5]=est1 +regci[,6]=est2 +if(ncol(x)==1 && plotit){ +plot(c(x,x1),c(y,y1),type="n",xlab=xlab,ylab=ylab) +points(x,y) +points(x1,y1,pch="+") +abline(regfun(x,y,...)$coef) +abline(regfun(x1,y1,...)$coef,lty=2) +} +list(n=n,output=regci) +} + + +anova1<-function(x){ +# +# conventional one-way anova +# +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +x=elimna(x) +A<-0 +B<-0 +C<-0 +N<-0 +for(j in 1:length(x)){ +N<-N+length(x[[j]]) +A<-A+sum(x[[j]]^2) +B<-B+sum(x[[j]]) +C<-C+(sum(x[[j]]))^2/length(x[[j]]) +} +SST<-A-B^2/N +SSBG<-C-B^2/N +SSWG<-A-C +nu1<-length(x)-1 +nu2<-N-length(x) +MSBG<-SSBG/nu1 +MSWG<-SSWG/nu2 +FVAL<-MSBG/MSWG +pvalue<-1-pf(FVAL,nu1,nu2) +list(F.test=FVAL,p.value=pvalue,df1=nu1,df2=nu2,MSBG=MSBG,MSWG=MSWG) +} +twodcor8<-function(x,y){ +# +# Compute a .95 confidence interval for +# the difference between two dependent +# correlations corresponding to two independent +# goups. +# +# +# x is a matrix with two columns, +# y is a vector +# Goal: test equality of Pearson correlation for x1, y versus x2, y. +# +# For general use, twodcor10 is probably better, +# which calls this function and estimates an adjusted p-value. +# +X<-elimna(cbind(x,y)) +Z1<-(X[,1]-mean(X[,1]))/sqrt(var(X[,1])) +Z2<-(X[,2]-mean(X[,2]))/sqrt(var(X[,2])) +temp<-cor.test(Z1-Z2,X[,3]) +temp<-temp[3]$p.value +list(p.value=temp) +} + +twodcor10<-function(x,y,nboot=500,SEED=TRUE,alpha=.05){ +# +# Compute a .95 confidence interval for +# the difference between two dependent +# correlations corresponding to two independent +# goups. +# +# x is a matrix with two columns, +# y is a vector +# Goal: test equality of Pearson correlation for x1, y versus x2, y. +# +# This function uses an adjusted p-value, the adjustment +# being made assuming normality. +# +# nboot indicates how many samples from a normal distribution +# are used to approximate the adjustment. +# +# Simulations suggest that this fucntion +# continues to work well under non-normality. +# +if(SEED)set.seed(2) +X<-elimna(cbind(x,y)) +if(ncol(X)!=3)stop("x should be a matrix with two columns") +n<-nrow(X) +cval<-cor(X) +nval<-(cval[1,3]+cval[2,3])/2 +cmat<-bdiag(1,3,nval) +cmat[1,2]<-nval +cmat[2,1]<-nval +pval<-NA +for(i in 1:nboot){ +d<-rmul(n,p=3,cmat=cmat) +pval[i]<-twodcor8(d[,1:2],d[,3])$p.value +} +pval<-sort(pval) +iv<-round(alpha*nboot) +est.p<-pval[iv] +adp<-alpha/est.p +test<-twodcor8(X[,1:2],X[,3])$p.value +p.value<-test*adp +if(p.value>1)p.value<-1 +list(p.value=p.value) +} + +matsplit<-function(m,coln=NULL){ +# +# Column coln of matrix m is assumed to have a binary variable +# This functions removes rows with missing values +# and then splits m into two matrices based on the values +# in column coln +# +if(is.null(coln))stop("specify coln") +x<-m[,coln] +val<-unique(x) +if(length(val)>2)stop("More than two values detected in specified column") +flag<-(x==val[1]) +m1<-m[flag,] +m2<-m[!flag,] +list(m1=m1,m2=m2) +} +tkmcp<-function(x,alpha=.05,ind.pval=TRUE){ +# +# conventional Tukey-Kramer multiple comparison procedure +# for all pairiwise comparisons. +# +# ind.pval=T, computes p-value for each individual test +# ind.pval=F computes p-value based on controlling the +# familywise error rate. (The alpha level at which the +# Tukey-Kramer test would reject.) +# +if(is.matrix(x))x<-listm(x) +J<-length(x) +A<-0 +B<-0 +C<-0 +N<-0 +for(j in 1:J){ +N<-N+length(x[[j]]) +A<-A+sum(x[[j]]^2) +B<-B+sum(x[[j]]) +C<-C+(sum(x[[j]]))^2/length(x[[j]]) +} +SST<-A-B^2/N +SSBG<-C-B^2/N +SSWG<-A-C +nu1<-length(x)-1 +nu2<-N-length(x) +MSBG<-SSBG/nu1 +MSWG<-SSWG/nu2 +numcom<-length(x)*(length(x)-1)/2 +output<-matrix(nrow=numcom,ncol=7) +dimnames(output)<-list(NULL,c("Group","Group","t.test","est.difference", +"ci.lower","ci.upper","p.value")) +ic<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic<-ic+1 +output[ic,1]<-j +output[ic,2]<-k +dif<-mean(x[[j]])-mean(x[[k]]) +output[ic,3]<-abs(dif)/sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2) +output[ic,4]<-dif +crit<-qtukey(1-alpha,length(x),nu2) +output[ic,5]<-dif-crit*sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2) +output[ic,6]<-dif+crit*sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2) +if(!ind.pval)output[ic,7]<-1-ptukey(output[ic,3],length(x),nu2) +if(ind.pval)output[ic,7]<-2*(1-pt(output[ic,3],nu2)) +}}} +output +} + +lstest4<-function(vstar,yhat,res,x){ +ystar <- yhat + res * vstar +p<-ncol(x) +pp<-p+1 +vals<-t(as.matrix(lsfit(x,ystar)$coef[2:pp])) +sa<-lsfitNci4(x, ystar)$cov[-1, -1] +sai<-solve(sa) +test<-(vals)%*%sai%*%t(vals) +test<-test[1,1] +test +} +twodcor10<-function(x,y,nboot=500,SEED=TRUE,alpha=.05){ +# +# Compute a .95 confidence interval for +# the difference between two dependent +# correlations corresponding to two independent +# goups. +# +# x is a matrix with two columns, +# y is a vector +# Goal: test equality of Pearson correlation for x1, y versus x2, y. +# +# This function uses an adjusted p-value, the adjustment +# being made assuming normality. +# +# nboot indicates how many samples from a normal distribution +# are used to approximate the adjustment. +# +# Simulations suggest that this fucntion +# continues to work well under non-normality. +# +if(SEED)set.seed(2) +X<-elimna(cbind(x,y)) +if(ncol(X)!=3)stop("x should be a matrix with two columns") +n<-nrow(X) +cval<-cor(X) +nval<-(cval[1,3]+cval[2,3])/2 +cmat<-bdiag(1,3,nval) +cmat[1,2]<-nval +cmat[2,1]<-nval +pval<-NA +for(i in 1:nboot){ +d<-rmul(n,p=3,cmat=cmat) +pval[i]<-twodcor8(d[,1:2],d[,3])$p.value +} +pval<-sort(pval) +iv<-round(alpha*nboot) +est.p<-pval[iv] +adp<-alpha/est.p +test<-twodcor8(X[,1:2],X[,3])$p.value +p.value<-test*adp +if(p.value>1)p.value<-1 +list(p.value=p.value) +} + +twodcor8<-function(x,y){ +# +# Compute a .95 confidence interval for +# the difference between two dependent +# correlations corresponding to two independent +# goups. +# +# +# x is a matrix with two columns, +# y is a vector +# Goal: test equality of Pearson correlation for x1, y versus x2, y. +# +# For general use, twodcor10 is probably better, +# which calls this function and estimates an adjusted p-value. +# +X<-elimna(cbind(x,y)) +Z1<-(X[,1]-mean(X[,1]))/sqrt(var(X[,1])) +Z2<-(X[,2]-mean(X[,2]))/sqrt(var(X[,2])) +temp<-cor.test(Z1-Z2,X[,3]) +temp<-temp[3]$p.value +list(p.value=temp) +} + +lsfitNci4<-function(x,y,alpha=.05){ +# +# Compute confidence for least squares +# regression using heteroscedastic method +# recommended by Cribari-Neto (2004). +# +x<-as.matrix(x) +if(nrow(x) != length(y))stop("Length of y does not match number of x values") +m<-cbind(x,y) +m<-elimna(m) +y<-m[,ncol(x)+1] +temp<-lsfit(x,y) +x<-cbind(rep(1,nrow(x)),m[,1:ncol(x)]) +xtx<-solve(t(x)%*%x) +h<-diag(x%*%xtx%*%t(x)) +n<-length(h) +d<-(n*h)/sum(h) +for(i in 1:length(d)){ + d[i]<-min(4, d[i]) +} +hc4<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^d)%*%x%*%xtx +df<-nrow(x)-ncol(x) +crit<-qt(1-alpha/2,df) +al<-ncol(x) +ci<-matrix(NA,nrow=al,ncol=3) +for(j in 1:al){ +ci[j,1]<-j +ci[j,2]<-temp$coef[j]-crit*sqrt(hc4[j,j]) +ci[j,3]<-temp$coef[j]+crit*sqrt(hc4[j,j]) +} +list(ci=ci,stand.errors=sqrt(diag(hc4)), cov=hc4) +} + + +hc4qtest<-function(x,y,k,nboot=500,SEED=TRUE){ +# +# Test the hypothesis that a OLS slope is zero using HC4 wild bootstrap using quasi-t test. +# k is the index of coefficient being tested +# +if(SEED)set.seed(2) +x<-as.matrix(x) +# First, eliminate any rows of data with missing values. +temp <- cbind(x, y) + temp <- elimna(temp) + pval<-ncol(temp)-1 + x <- temp[,1:pval] + y <- temp[, pval+1] +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +temp<-lsfit(x,y) +yhat<-mean(y) +res<-y-yhat +s<-lsfitNci4(x, y)$cov[-1, -1] +s<-as.matrix(s) +si<-s[k,k] +b<-temp$coef[2:pp] +qtest<-b[k]/sqrt(si) +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-(data-.5)*sqrt(12) # standardize the random numbers. +rvalb<-apply(data,1,lsqtest4,yhat,res,x, k) +sum<-sum(abs(rvalb)>= abs(qtest[1])) +p.val<-sum/nboot +list(p.value=p.val) +} + +lsqtest4<-function(vstar,yhat,res,x, k){ +ystar <- yhat + res * vstar +p<-ncol(x) +pp<-p+1 +vals<-lsfit(x,ystar)$coef[2:pp] +sa<-lsfitNci4(x, ystar)$cov[-1, -1] +sa<-as.matrix(sa) +sai<-sa[k,k] +test<-vals[k]/sqrt(sai) +test +} +mrm1way<-function(x,q=.5,grp=NA,bop=FALSE,SEED=TRUE,mop=FALSE){ +# Perform a within groups one-way ANOVA using medians +# +# If grp specified, do analysis on only the groups in grp. +# Example: grp=(c(1,4)), compare groups 1 and 4 only. +# +# bop=F, use non-bootstrap estimate of covariance matrix +# bop=T, use bootstrap +# +# mop=T, use usual median, otherwise use single order statistic +# +if(is.data.frame(x))x=as.matrix(x) +if(SEED)set.seed(2) +if(is.matrix(x))x<-listm(x) +K<-length(x) # Number of groups +p<-K +if(is.na(grp[1]))grp<-c(1:p) +x<-x[grp] +if(!is.list(x))stop("Data are not stored in list mode or a matrix") +tmeans<-0 +n<-length(x[[1]]) +v<-matrix(0,p,p) +if(!mop){ +for (i in 1:p)tmeans[i]<-qest(x[[i]],q=q) +if(!bop)v<-covmmed(x,q=q) +if(bop)v<-bootcov(x,pr=FALSE,est=qest,q=q) +} +if(mop){ +tmeans[i]<-median(x[[i]]) +v<-bootcov(x,pr=FALSE) +} +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +Qb<-johansp(ck,tmeans,v,n,1,K) +p.value<-Qb$p.value +if(n>=20)p.value<-1-pf(Qb$teststat,K-1,999) +list(test.stat=Qb$teststat,p.value=p.value) +} + +rmul<-function(n,p=2,cmat=NULL,rho=0, +mar.fun=ghdist,OP=FALSE,g=0,h=0,...){ +# +# generate n observations from a p-variate dist +# By default, use normal distributions. +# +# Can generate data form a g-and-h distribution via the arguments +# g and h +# +# To adjust rho so that Pearson = remains equal to rho after transforming, use rngh +# +# Example rmul(30,p=4,rho=.3,g=.5,h=.2) will +# generate 30 vectors from a 4-variate distribution where the marginals +# have a g-and-h distribution with g=.5 and h=.2. +# +# This function is similar to ghmul, only here, generate the marginal values +# and then transform the data to have correlation matrix cmat +# +# cmat: if specified, is the correlation matrix that is used to generate data +# +# If not specified, data are generated with a common correlation +# rho +# +#OP= TRUE: +# Method (e.g. Browne, M. W. (1968) A comparison of factor analytic +# techniques. Psychometrika, 33, 267-334. +# Let U'U=R be the Cholesky decomposition of R. Generate independent data +# from some dist yielding X. Then XU has population correlation matrix R +# +# OP=FALSE, use mvrnorm to generate data then transform marginals to g-and-h distribution. +# +if(!is.null(cmat)){ +if(ncol(cmat)!=p)stop('cmat: number of columns must equal the value in the argument p') +} +if(abs(rho)>1)stop('rho must be between -1 and 1') +if(is.null(cmat)){ +cmat<-matrix(rho,p,p) +diag(cmat)<-1 +} +if(OP){ +np<-n*p +if(identical(mar.fun,ghdist))x<-matrix(mar.fun(np,g=g,h=h),nrow=n,ncol=p) +else x<-matrix(mar.fun(np,...),nrow=n,ncol=p) +rmat<-matsqrt(cmat) +x<-x%*%rmat +} +if(!OP){ +library(MASS) +x=mvrnorm(n,rep(0,p),cmat) +if(g==0)x=x*exp(h*x^2/2) +if(g>0)x=(exp(g*x)-1)*exp(h*x^2/2)/g +} +x +} + + +L1medcen <- function(X, tol = 1e-08, maxit = 200, m.init = apply(X, 2, median), + trace = FALSE) +{ + ## L1MEDIAN calculates the multivariate L1 median + ## I/O: mX=L1median(X,tol); + ## + ## X : the data matrix + ## tol: the convergence criterium: + ## the iterative process stops when ||m_k - m_{k+1}|| < tol. + ## maxit: maximum number of iterations + ## init.m: starting value for m; typically coordinatewise median + ## + ## Ref: Hossjer and Croux (1995) + ## "Generalizing Univariate Signed Rank Statistics for Testing + ## and Estimating a Multivariate Location Parameter"; + ## Non-parametric Statistics, 4, 293-308. + ## + ## Implemented by Kristel Joossens + ## Many thanks to Martin Maechler for improving the program! + + ## slightly faster version of 'sweep(x, 2, m)': + centr <- function(X,m) X - rep(m, each = n) + ## computes objective function in m based on X and a: + mrobj <- function(X,m) sum(sqrt(rowSums(centr(X,m)^2))) + d <- dim(X); n <- d[1]; p <- d[2] + m <- m.init + if(!is.numeric(m) || length(m) != p) + stop("'m.init' must be numeric of length p =", p) + k <- 1 + if(trace) nstps <- 0 + while (k <= maxit) { + mold <- m + obj.old <- if(k == 1) mrobj(X,mold) else obj + X. <- centr(X, m) + Xnorms <- sqrt(rowSums(X. ^ 2)) + inorms <- order(Xnorms) + dx <- Xnorms[inorms] # smallest first, i.e., 0's if there are + X <- X [inorms,] + X. <- X.[inorms,] + ## using 1/x weighting {MM: should this be generalized?} + w <- ## (0 norm -> 0 weight) : + if (all(dn0 <- dx != 0)) 1/dx + else c(rep.int(0, length(dx)- sum(dn0)), 1/dx[dn0]) + delta <- colSums(X. * rep(w,p)) / sum(w) + nd <- sqrt(sum(delta^2)) + + maxhalf <- if (nd < tol) 0 else ceiling(log2(nd/tol)) + m <- mold + delta # computation of a new estimate + ## If step 'delta' is too far, we try halving the stepsize + nstep <- 0 + while ((obj <- mrobj(X, m)) >= obj.old && nstep <= maxhalf) { + nstep <- nstep+1 + m <- mold + delta/(2^nstep) + } + if(trace) { + if(trace >= 2) + cat(sprintf("k=%3d obj=%19.12g m=(",k,obj), + paste(formatC(m),collapse=","), + ")", if(nstep) sprintf(" nstep=%2d halvings",nstep) else "", + "\n", sep="") + nstps[k] <- nstep + } + if (nstep > maxhalf) { ## step halving failed; keep old + m <- mold + ## warning("step halving failed in ", maxhalf, " steps") + break + } + k <- k+1 + } + if (k > maxit) warning("iterations did not converge in ", maxit, " steps") + if(trace == 1) + cat("needed", k, "iterations with a total of", + sum(nstps), "stepsize halvings\n") +# return(m) +list(center=m) +} +spatcen<-function(x){ +# +# compute spatial median +# x is an n by p matrix +# +if(!is.matrix(x))stop("x must be a matrix") +x<-elimna(x) +START<-apply(x,2,median) +val=optim(START,spat.sub,x=x,method='BFGS')$par +list(center=val) +} +olswbtest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,alpha=.05){ +# +# Compute confidence intervals for all OLS slopes +# using HC4 wild bootstrap and Wald test. +# +# This function calls the functions +# olshc4 and +# lstest4 +# +if(SEED)set.seed(2) +x<-as.matrix(x) +# First, eliminate any rows of data with missing values. +temp <- cbind(x, y) + temp <- elimna(temp) + pval<-ncol(temp)-1 + x <- temp[,1:pval] + y <- temp[, pval+1] +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +temp<-lsfit(x,y) +yhat<-mean(y) +res<-y-yhat +s<-olshc4(x, y)$cov[-1, -1] +si<-solve(s) +b<-temp$coef[2:pp] +test=abs(b)*sqrt(diag(si)) +if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot) +if(!RAD){ +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-(data-.5)*sqrt(12) # standardize the random numbers. +} +rvalb<-apply(data,1,olswbtest.sub,yhat,res,x) #a p by nboot matrix +rvalb=abs(rvalb) +ic=round((1-alpha)*nboot) +if(p==1)rvalb=t(as.matrix(rvalb)) +temp=apply(rvalb,1,sort) # nboot by p matrix +pvals=NA +for(j in 1:p)pvals[j]=mean((rvalb[j,]>=test[j])) +cr=temp[ic,] +ci=b-cr/diag(sqrt(si)) #dividing because si is reciprocal of sq se +ci=cbind(ci,b+cr/diag(sqrt(si))) +ci=cbind(b,ci) +ci=cbind(c(1:nrow(ci)),ci,test,pvals) +dimnames(ci)<- +list(NULL,c("Slope_No.","Slope_est","Lower.ci","Upper.ci","Test.Stat","p.value")) +ci +} +olswbtest.sub<-function(vstar,yhat,res,x){ +ystar <- yhat + res * vstar +p<-ncol(x) +pp<-p+1 +vals<-t(as.matrix(lsfit(x,ystar)$coef[2:pp])) +sa<-olshc4(x, ystar)$cov[-1, -1] +sai<-solve(sa) +test<-vals*sqrt(diag(sai)) +test +} + + + +regpre<-function(x,y,regfun=lsfit,error=absfun,nboot=100,adz=TRUE, +mval=round(5*log(length(y))),model=NULL,locfun=mean,pr=FALSE, +xout=FALSE,outfun=out,STAND=TRUE, +plotit=TRUE,xlab="Model Number",ylab="Prediction Error",SEED=TRUE,...){ +# +# Estimate prediction error using the regression method +# regfun. The .632 method is used. +# (See Efron and Tibshirani, 1993, pp. 252--254) +# +# The predictor values are assumed to be in the n-by-p matrix x. +# The default number of bootstrap samples is nboot=100 +# +# Prediction error is the expected value of the function error. +# The argument error defaults to squared error. +# +# regfun can be any R function that returns the coefficients in +# the vector regfun$coef, the first element of which contains the +# estimated intercept, the second element contains the estimate of +# the first predictor, etc. +# +# The default value for mval, the number of observations to resample +# for each of the B bootstrap samples is based on results by +# Shao (JASA, 1996, 655-665). (Resampling n vectors of observations +# model selection may not lead to the correct model as n->infinity. +# +# The argument model should have list mode, model[[1]] indicates +# which predictors are used in the first model. For example, storing +# 1,4 in model[[1]] means predictors 1 and 4 are being considered. +# If model is not specified, and number of predictors is at most 5, +# then all models are considered. +# +# If adz=T, added to the models to be considered is where +# all regression slopes are zero. That is, use measure of location only +# corresponding to +# locfun. +# +if(pr){ +print("By default, least squares regression is used, ") +print("But from Wilcox, R. R. 2008, Journal of Applied Statistics, 35, 1-8") +print("Setting regfun=tsreg appears to be a better choice for general use.") +print("That is, replace least squares with the Theil-Sen estimator") +print("Note: Default for the argument error is now absfun") +print(" meaning absolute error is used") +print("To use squared error, set error=sqfun") +} +x<-as.matrix(x) +d<-ncol(x) +p1<-d+1 +temp<-elimna(cbind(x,y)) +x<-temp[,1:d] +y<-temp[,d+1] +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +if(!STAND)flag<-outfun(x,plotit=FALSE,...)$keep +if(STAND)flag<-outpro(x,STAND=TRUE,plotit=FALSE)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(is.null(model)){ +if(d<=5)model<-modgen(d,adz=adz) +if(d>5)model[[1]]<-c(1:ncol(x)) +} +mout<-matrix(NA,length(model),5,dimnames=list(NULL,c("apparent.error", +"boot.est","err.632","var.used","rank"))) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=mval*nboot,replace=TRUE),nrow=nboot) +bid<-apply(data,1,idb,length(y)) +# bid is an n by nboot matrix. If the jth bootstrap sample from +# 1, ..., mval contains the value i, bid[i,j]=0; otherwise bid[i,j]=1 +for (imod in 1:length(model)){ +nmod=length(model[[imod]])-1 +temp=c(nmod:0) +mout[imod,4]=sum(model[[imod]]*10^temp) +if(sum(model[[imod]]==0)!=1){ +xx<-x[,model[[imod]]] +xx<-as.matrix(xx) +if(sum(model[[imod]]==0)!=1)bvec<-apply(data,1,regpres1,xx,y,regfun,mval,...) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +if(sum(model[[imod]]==0)!=1)yhat<-cbind(1,xx)%*%bvec +if(sum(model[[imod]]==0)==1){ +bvec0<-matrix(0,nrow=p1,ncol=nboot) +for(it in 1:nboot){ +bvec0[1,it]<-locfun(y[data[it,]]) +} +yhat<-cbind(1,x)%*%bvec0 +} +# yhat is n by nboot matrix of predicted values based on + # bootstrap regressions. +bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253 +temp<-(bid*(yhat-y)) +diff<-apply(temp,1,error) +ep0<-sum(diff/bi)/length(y) +aperror<-error(regfun(xx,y,...)$resid)/length(y) # apparent error +regpre<-.368*aperror+.632*ep0 +mout[imod,1]<-aperror +mout[imod,3]<-regpre +temp<-yhat-y +diff<-apply(temp,1,error) +mout[imod,2]<-sum(diff)/(nboot*length(y)) +} +if(sum(model[[imod]]==0)==1){ +mout[imod,3]<-locpre(y,error=error,est=locfun,SEED=SEED,mval=mval) +}} +mout[,5]=rank(mout[,3]) +if(plotit)plot(c(1:nrow(mout)),mout[,3],xlab=xlab,ylab=ylab) +list(estimates=mout) +} +push<-function(mat){ +# +# For every column of mat, move entry down 1 +# +matn<-matrix(NA,nrow=nrow(mat),ncol=ncol(mat)) +Jm<-nrow(mat)-1 +for (k in 1:ncol(mat)){ +temp<-mat[,k] +vec<-0 +vec[2:nrow(mat)]<-temp[1:Jm] +matn[,k]<-vec +} +matn +} + +ancova<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE,method="EP",SEED=TRUE, +pr=TRUE,xout=FALSE,outfun=out,LP=FALSE,SCAT=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+', +skip.crit=FALSE,nmin=12,crit.val=1.09,...){ +# +# Compare two independent groups using the ancova method with a single covariate +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# +# sm=TRUE will create smooths using bootstrap bagging. +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +# Argument method indicates which measure of effect size will be used +# EP: explanatory measure of effect size (default) +# QS: quantile shift measure of effect size +# AKP: trimmed mean Winsorized variance analog of Cohen's d +# WMW: P(X1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop("x1 and y1 have different lengths") +if(length(x2)!=length(y2))stop("x2 and y2 have different lengths") +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(pr){ +print("NOTE: Confidence intervals are adjusted to control the probability") +print("of at least one Type I error.") +#print("But p-values are not") +print('Effect size is based on the argument method, default is explanatory measure of effect size') +print('Other options: QS, quantile shift; AKP, robust analog of Cohen d; WMW, P(X=nmin]) +isub[5]<-max(sub[vecn>=nmin]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,14) +dimnames(mat)<-list(NULL,c("X","n1","n2","Est1","Est2","DIF","TEST","se","ci.low","ci.hi","p.value","crit.val","Effect.Size",'p.adjusted')) +for (i in 1:5){ +g1<-y1[near(x1,x1[isub[i]],fr1)] +g2<-y2[near(x2,x1[isub[i]],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +test<-yuen(g1,g2,tr=tr) +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(g1) +mat[i,3]<-length(g2) +mat[i,4]<-test$est.1 +mat[i,5]<-test$est.2 +mat[i,6]<-test$dif +mat[i,7]<-test$teststat +mat[i,8]<-test$se +mat[i,13]=ESfun(g1,g2,method=method,pr=FALSE,SEED=SEED) +mat[i,14]=1-psmm(abs(test$teststat),5,test$df) +if(skip.crit)critv=crit.val +#if(!skip.crit){ +critv<-NA +#if(alpha==.05)critv<-smmcrit(test$df,5) +#if(alpha==.01)critv<-smmcrit01(test$df,5) +#if(is.na(critv))critv<-smmval(test$df,5,alpha=alpha) +critv=qsmm(1-alpha,5,test$df) +mat[i,12]<-critv +#} +cilow<-test$dif-critv*test$se +cihi<-test$dif+critv*test$se +mat[i,9]<-cilow +mat[i,10]<-cihi +mat[i,11]<-test$p.value +}} +if(!is.na(pts[1])){ +#if(!skip.crit){ +#if(length(pts)>=29)stop("At most 28 points can be compared") +#} +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),14) +dimnames(mat)<-list(NULL,c("X","n1","n2","Est1","Est2","DIF","TEST","se","ci.low","ci.hi", +"p.value","crit.val","Effect.Size",'p.adjusted')) +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +test<-yuen(g1,g2,tr=tr) +mat[i,1]<-pts[i] +mat[i,2]<-length(g1) +mat[i,3]<-length(g2) +if(length(g1)<=5)print(paste("Warning, there are",length(g1)," points corresponding to the design point X=",pts[i])) +if(length(g2)<=5)print(paste("Warning, there are",length(g2)," points corresponding to the design point X=",pts[i])) +mat[i,4]<-test$est.1 +mat[i,5]<-test$est.2 +mat[i,6]<-test$dif +mat[i,7]<-test$teststat +mat[i,8]<-test$se +mat[i,13]=ESfun(g1,g2,method=method,pr=FALSE,SEED=SEED) +mat[i,14]=1-psmm(abs(test$teststat),length(pts),test$df) +if(skip.crit)critv=crit.val +if(!skip.crit){ +if(length(pts)>=2)critv=qsmm(1-alpha,length(pts),test$df) #smmcrit(test$df,length(pts)) +if(length(pts)==1)critv<-qt(.975,test$df) +} +cilow<-test$dif-critv*test$se +cihi<-test$dif+critv*test$se +mat[i,9]<-cilow +mat[i,10]<-cihi +mat[i,11]<-test$p.value +mat[i,12]<-critv +}} +if(plotit){ +runmean2g(x1,y1,x2,y2,fr=fr1,est=tmean,tr=tr,sm=sm,xout=FALSE,LP=LP, +SCAT=SCAT,xlab=xlab,ylab=ylab,pch1=pch1,pch2=pch2,...) +} +list(output=mat) +} +miss2na<-function(m,na.val=NULL){ +# +# Convert any missing value, indicatd by na.val, +# to NA. +# +# Example, if 999 is missing value, use miss2na(m,999) +# +if(is.null(na.val))stop("Specify a missing value") +if(is.vector(m)){ +if(!is.list(m)){ +flag=(m==na.val) +m[flag]=NA +}} +if(is.matrix(m)){ +for(j in 1:ncol(m)){ +x=m[,j] +flag=(x==na.val) +x[flag]=NA +m[,j]=x +}} +if(is.list(m)){ +for(j in 1:length(m)){ +x=m[[j]] +flag=(x==na.val) +x[flag]=NA +m[[j]]=x +}} +m +} + +plotCI <- function (x, y = NULL, uiw=NULL, liw = uiw, aui=NULL, ali=aui, + err="y", ylim=NULL, sfrac = 0.01, gap=0, add=FALSE, + col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab=NULL, + ylab=NULL, ...) { +## originally from Bill Venables, R-list + if (is.list(x)) { + y <- x$y + x <- x$x + } + if (is.null(y)) { + if (is.null(x)) + stop("both x and y NULL") + y <- as.numeric(x) + x <- seq(along = x) + } + if (missing(xlab)) xlab <- deparse(substitute(x)) + if (missing(ylab)) ylab <- deparse(substitute(y)) + if (missing(uiw)) { ## absolute limits + ui <- aui + li <- ali + } + else { ## relative limits + if (err=="y") z <- y else z <- x + if(is.null(uiw))stop("Argument uiw, the width of the interval, must be specified") + ui <- z + uiw + li <- z - liw + } + if (is.null(ylim)) ylim <- range(c(y, ui, li), na.rm=TRUE) + if (add) { + points(x, y, col=col, lwd=lwd, ...) + } else { + plot(x, y, ylim = ylim, col=col, lwd=lwd, xlab=xlab, ylab=ylab, ...) + } + if (gap==TRUE) gap <- 0.01 ## default gap size + ul <- c(li, ui) + if (err=="y") { + gap <- rep(gap,length(x))*diff(par("usr")[3:4]) # smidge <- diff(par("usr")[1:2]) * sfrac + smidge <- par("fin")[1] * sfrac +# segments(x , li, x, pmax(y-gap,li), col=col, lwd=lwd, lty=slty) +# segments(x , ui, x, pmin(y+gap,ui), col=col, lwd=lwd, lty=slty) + arrows(x , li, x, pmax(y-gap,li), col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1) + arrows(x , ui, x, pmin(y+gap,ui), col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1) + ## horizontal segments +# x2 <- c(x, x) +# segments(x2 - smidge, ul, x2 + smidge, ul, col=col, lwd=lwd) + } + else if (err=="x") { + gap <- rep(gap,length(x))*diff(par("usr")[1:2]) + smidge <- par("fin")[2] * sfrac +# smidge <- diff(par("usr")[3:4]) * sfrac + arrows(li, y, pmax(x-gap,li), y, col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1) + arrows(ui, y, pmin(x+gap,ui), y, col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1) + ## vertical segments +# y2 <- c(y, y) +# segments(ul, y2 - smidge, ul, y2 + smidge, col=col, lwd=lwd) + } + invisible(list(x = x, y = y)) +} +bdanova2<-function(x1,x2=NULL,alpha=.05,power=.9,delta){ +# +# Do the second stage of the Bishop-Duewicz ANOVA +# +if(is.null(x2[1])){ +stage1=bdanova1(x1,alpha=alpha,power=power,delta=delta) +return(list(N=stage1$N,d=stage1$d,crit=stage1$crit)) +} +if(!is.null(x2[1])){ +if(is.na(delta))stop("A value for delta was not specified") +if(!is.list(x1)){ +if(!is.matrix(x1))stop("Data must be stored in a matrix or in list mode") +y<-x1 +x1<-list() +for(j in 1:ncol(y))x1[[j]]<-y[,j] +} +if(is.na(delta))stop("A value for delta was not specified") +if(!is.list(x2)){ +if(!is.matrix(x2))stop("Data must be stored in matrix or in list mode") +y<-x2 +x2<-list() +for(j in 1:ncol(y))x2[[j]]<-y[,j] +} +if(length(x1)!=length(x2))stop("Length of x1 does not match the length of x2") +TT<-NA +U<-NA +J<-length(x1) +nvec<-NA +nvec2<-NA +svec<-NA +for(j in 1:length(x1)){ +nvec[j]<-length(x1[[j]]) +nvec2[j]<-length(x2[[j]]) +svec[j]<-var(x1[[j]]) +TT[j]<-sum(x1[[j]]) +U[j]<-sum(x2[[j]]) +} +temp<-bdanova1(x1,alpha=alpha,power=power,delta=delta) +need<-temp$N-nvec +#for(j in 1:length(x1))print(c(nvec2[j],need[j])) +for(j in 1:length(x1))if(nvec2[j]=dv[1:nboot])/nboot +if(op==4)print(sig.level) + +list(p.value=sig.level,output=output) +} + +rm2mcp<-function(J,K,x,est=tmean,alpha=.05,grp=NA,dif=TRUE,nboot=NA, +plotit=FALSE,BA=FALSE,hoch=FALSE,...){ +# +# This function performs multiple comparisons for +# dependent groups in a within by within designs. +# It creates the linear contrasts and calls rmmcppb +# assuming that main effects and interactions for a +# two-way design are to be tested. +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # +if(is.data.frame(x))x=as.matrix(x) + JK <- J * K + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp[1])) { + yy <- x + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") + for(j in 1:JK) { + xx <- x[[j]] + # xx[[j]] <- xx[!is.na(xx)] + x[[j]] <- xx[!is.na(xx)] + } + # + # Create the three contrast matrices + # +temp<-con2way(J,K) +conA<-temp$conA +conB<-temp$conB +conAB<-temp$conAB + ncon <- max(nrow(conA), nrow(conB), nrow(conAB)) +FacA<-rmmcppb(x,con=conA,est=est,plotit=plotit,dif=dif,grp=grp, +nboot=nboot,BA=TRUE,hoch=FALSE,...) +FacB<-rmmcppb(x,con=conB,est=est,plotit=plotit,dif=dif,grp=grp, +nboot=nboot,BA=TRUE,hoch=FALSE,...) +FacAB<-rmmcppb(x,con=conAB,est=est,plotit=plotit,dif=dif,grp=grp, +nboot=nboot,BA=TRUE,hoch=FALSE,...) +list(Factor.A=FacA,Factor.B=FacB,Factor.AB=FacAB) + +} + +acbinomci<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05){ +# +# Compute a 1-alpha confidence interval for p, the probability of +# success for a binomial distribution, using a generalization of the +# Agresti-Coull method that was studied by Brown, Cai DasGupta +# (Annals of Statistics, 2002, 30, 160-201.) +# +# y is a vector of 1s and 0s. +# x is number of successes. +# +if(!is.null(y[1])){ +y=elimna(y) +nn=length(y) +} +if(nn==1)stop("Something is wrong: number of observations is only 1") +n<-nn +cr=qnorm(1-alpha/2) +ntil=n+cr^2 +ptil=(x+cr^2/2)/ntil +if(x!=n && x!=0){ +lower=ptil-cr*sqrt(ptil*(1-ptil)/ntil) +upper=ptil+cr*sqrt(ptil*(1-ptil)/ntil) +} +if(x==0){ #Use Clopper-Pearson +lower<-0 +upper<-1-alpha^(1/n) +} +if(x==1){ +upper<-1-(alpha/2)^(1/n) +lower<-1-(1-alpha/2)^(1/n) +} +if(x==n-1){ +lower<-(alpha/2)^(1/n) +upper<-(1-alpha/2)^(1/n) +} +if(x==n){ +lower<-alpha^(1/n) +upper<-1 +} +phat<-x/n +list(phat=phat,se=sqrt(ptil*(1-ptil)/ntil),ci=c(lower,upper),n=n) +} + +covmtrim<-function(x,tr=.2,p=length(x),grp=c(1:p)){ +# +# Estimate the covariance matrix for the sample trimmed means corresponding +# to the data in the R variable x, +# which is assumed to be stored in list mode or a matrix. +# (x[[1]] contains the data for group 1, x[[2]] the data for group 2, etc.) +# The function returns a p by p matrix of covariances, the diagonal +# elements being equal to the squared standard error of the sample +# trimmed means, where p is the number of groups to be included. +# By default, all the groups in x are used, but a subset of +# the groups can be used via grp. For example, if +# the goal is to estimate the covariances between the sample trimmed +# means for groups 1, 2, and 5, use the command grp<-c(1,2,5) +# before calling this function. +# +# The default amount of trimming is 20% +# +# Missing values (values stored as NA) are not allowed. +# +# This function uses winvar from chapter 2. +# +if(is.list(x))x=matl(x) +x=elimna(x) +x=listm(x) +if(!is.list(x))stop("The data are not stored in list mode or a matrix.") +p<-length(grp) +pm1<-p-1 +for (i in 1:pm1){ +ip<-i+1 +if(length(x[[grp[ip]]])!=length(x[[grp[i]]]))stop("The number of observations in each group must be equal") +} +n<-length(x[[grp[1]]]) +h<-length(x[[grp[1]]])-2*floor(tr*length(x[[grp[1]]])) +covest<-matrix(0,p,p) +covest[1,1]<-(n-1)*winvar(x[[grp[1]]],tr)/(h*(h-1)) +for (j in 2:p){ +jk<-j-1 +covest[j,j]<-(n-1)*winvar(x[[grp[j]]],tr)/(h*(h-1)) +for (k in 1:jk){ +covest[j,k]<-(n-1)*wincor(x[[grp[j]]],x[[grp[k]]],tr)$cov/(h*(h-1)) +covest[k,j]<-covest[j,k] +} +} +covmtrim<-covest +covmtrim +} +bwwcovm<-function(J,K,L,x,tr=.2){ +# +# compute covariance matrix for a between by within by within design +# +p=J*K*L +idep=K*L +mat=matrix(0,nrow=p,ncol=p) +id=c(1:idep) +for(j in 1:J){ +mat[id,id]=covmtrim(x[id],tr=tr) +id=id+idep +} +mat +} +bwwmatna<-function(J,K,L,x){ +# +# data are assumed to be stored in a matrix +# for a between by within by within (three-way) anova, +# for the last two factors, eliminate any missing values +# and then store the data in list mode. +# +if(is.data.frame(x))x=as.matrix(x) +y=list() +ad=K*L +ilow=1 +iup=ad +ic=0 +for(j in 1:J){ +z=x[,ilow:iup] +d=elimna(z) +im=0 +for(k in 1:K){ +for(l in 1:L){ +ic=ic+1 +im=im+1 +y[[ic]]=d[,im] +}} +ilow=ilow+ad +iup=iup+ad +} +y +} +bwwna<-function(J,K,L,x){ +# +# data are assumed to be stored in list mode +# for a between by within by within (three-way) anova, +# for the last two factors, eliminate any missing values. +# +if(is.data.frame(x))x=as.matrix(x) +y=list() +ad=K*L +ilow=1 +iup=ad +ic=0 +for(j in 1:J){ +z=x[ilow:iup] +d=elimna(matl(z)) +#print(d) +im=0 +for(k in 1:K){ +for(l in 1:L){ +ic=ic+1 +im=im+1 +y[[ic]]=d[,im] +}} +ilow=ilow+ad +iup=iup+ad +} +y +} +bwwtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L){ +# Perform a between by within by within (three-way) anova +# on trimmed means where +# +# J independent groups, KL dependent groups +# +# The variable data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of all three factors: level 1,1,1. +# data][2]] is assumed to contain the data for level 1 of the +# first two factors and level 2 of the third factor: level 1,1,2 +# data[[L]] is the data for level 1,1,L +# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. +# data[[KL+1]] is level 2,1,1, etc. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JKL, the total number of +# groups being tested. +# +if(is.data.frame(data))data=as.matrix(data) +if(is.list(data))data=bwwna(J,K,L,data) # remove missing values +if(is.matrix(data))data=bwwmatna(J,K,L,data) #remove missing values +# and convert to list mode +if(!is.list(data))stop("The data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in data is") +print(length(data)) +print("Warning: These two values are not equal") +} +tmeans<-0 +h<-0 +v<-0 +for (i in 1:p){ +tmeans[i]<-mean(data[[grp[i]]],tr) +h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) +# h is the effective sample size +} +v=bwwcovm(J,K,L,data,tr=tr) +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +il<-matrix(c(rep(1,L)),1,L) +jm1<-J-1 +cj<-diag(1,jm1,J) +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +lm1<-L-1 +cl<-diag(1,lm1,L) +for (i in 1:lm1)cl[i,i+1]<-0-1 +# Do test for factor A +cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A +Qa=bwwtrim.sub(cmat, tmeans, v, h,p) +Qa.siglevel <- 1 - pf(Qa, J - 1, 999) +# Do test for factor B +cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B +Qb=bwwtrim.sub(cmat, tmeans, v, h,p) + Qb.siglevel <- 1 - pf(Qb, K - 1, 999) +# Do test for factor C +cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C +Qc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qc.siglevel <- 1 - pf(Qc, L - 1, 999) +# Do test for factor A by B interaction +cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B +Qab<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999) +# Do test for factor A by C interaction +cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C +Qac<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999) +# Do test for factor B by C interaction +cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C +Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999) +# Do test for factor A by B by C interaction +cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C +Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999) +list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.p.value=Qb.siglevel, +Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel, +Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel, +Qabc=Qabc,Qabc.p.value=Qabc.siglevel) +} + + +bbwcovm<-function(J,K,L,x,tr=.2){ +# +# compute covariance matrix for a between by between by within design +# +p=J*K*L +idep=L +mat=matrix(0,nrow=p,ncol=p) +id=c(1:idep) +for(j in 1:J){ +for(k in 1:K){ +mat[id,id]=covmtrim(x[id],tr=tr) +id=id+idep +}} +mat +} +bbwmatna<-function(J,K,L,x){ +# +# data are assumed to be stored in a matrix +# for a between by within by within (three-way) anova. +# For the last factor, eliminate any missing values +# and then store the data in list mode. +# +y=list() +ad=L +ilow=1 +iup=ad +ic=0 +for(j in 1:J){ +for(k in 1:K){ +z=x[,ilow:iup] +d=elimna(z) +im=0 +for(l in 1:L){ +ic=ic+1 +im=im+1 +y[[ic]]=d[,im] +} +ilow=ilow+ad +iup=iup+ad +}} +y +} +bbwna<-function(J,K,L,x){ +# +# x: data are assumed to be stored in list mode +# for a between by within by within (three-way) anova. +# For the last factor, eliminate any missing values. +# +y=list() +ad=L +ilow=1 +iup=ad +ic=0 +for(j in 1:J){ +for(k in 1:K){ +z=x[ilow:iup] +d=as.matrix(elimna(matl(z))) +im=0 +ilow=ilow+ad +iup=iup+ad +for(l in 1:L){ +ic=ic+1 +im=im+1 +y[[ic]]=d[,im] +}} +} +y +} +bbwtrim<-function(J,K,L,data,tr=.2,alpha=.05,p=J*K*L){ +# Perform a between-between-within (three-way) anova on trimmed means where +# +# JK independent groups, L dependent groups +# +# The variable data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of all three factors: level 1,1,1. +# data][2]] is assumed to contain the data for level 1 of the +# first two factors and level 2 of the third factor: level 1,1,2 +# data[[L]] is the data for level 1,1,L +# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. +# data[[KL+1]] is level 2,1,1, etc. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JKL, the total number of +# groups being tested. +# +if(is.data.frame(data)) data <- as.matrix(data) +if(is.list(data))data=bbwna(J,K,L,data) +if(is.matrix(data))data=bbwmatna(J,K,L,data) +grp=c(1:p) +data=bbwna(J,K,L,data) +if(!is.list(data))stop("Data are not stored in list mode") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups is") +print(length(data)) +print("Warning: These two values are not equal") +} +tmeans<-0 +h<-0 +v<-0 +for (i in 1:p){ +tmeans[i]<-mean(data[[grp[i]]],tr) +h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) +# h is the effective sample size +} +v=bbwcovm(J,K,L,data,tr=tr) +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +il<-matrix(c(rep(1,L)),1,L) +jm1<-J-1 +cj<-diag(1,jm1,J) +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +lm1<-L-1 +cl<-diag(1,lm1,L) +for (i in 1:lm1)cl[i,i+1]<-0-1 +# Do test for factor A +cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A +Qa=bwwtrim.sub(cmat, tmeans, v, h,p) +Qa.siglevel <- 1 - pf(Qa, J - 1, 999) +# Do test for factor B +cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B +Qb=bwwtrim.sub(cmat, tmeans, v, h,p) + Qb.siglevel <- 1 - pf(Qb, K - 1, 999) +# Do test for factor C +cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C +Qc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qc.siglevel <- 1 - pf(Qc, L - 1, 999) +# Do test for factor A by B interaction +cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B +Qab<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999) +# Do test for factor A by C interaction +cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C +Qac<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999) +# Do test for factor B by C interaction +cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C +Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999) +# Do test for factor A by B by C interaction +cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C +Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999) +list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.p.value=Qb.siglevel, +Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel, +Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel, +Qabc=Qabc,Qabc.p.value=Qabc.siglevel) +} + + +bwwtrim.sub<-function(cmat,vmean,vsqse,h,p){ +# +# The function computes variation of Johansen's test statistic +# used to test the hypothesis C mu = 0 where +# C is a k by p matrix of rank k and mu is a p by 1 matrix of +# of unknown trimmed means. +# The argument cmat contains the matrix C. +# vmean is a vector of length p containing the p trimmed means +# vsqe is matrix containing the +# estimated covariances among the trimmed means +# h is the sample size +# +yvec<-matrix(vmean,length(vmean),1) +test<-cmat%*%vsqse%*%t(cmat) +invc<-solve(test) +test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec +temp<-0 +mtem<-vsqse%*%t(cmat)%*%invc%*%cmat +temp<-(sum(diag(mtem%*%mtem))+(sum(diag(mtem)))^2)/(h-1) +A<-.5*sum(temp) +cval<-nrow(cmat)+2*A-6*A/(nrow(cmat)+2) +test<-test/cval +test +} + +ghmean<-function(g,h){ +# +#Compute the mean and variance of a g-and-h distribution +# +val=0 +if(h==0){ +if(g>0){ +val=(exp(g^2/2)-1)/g +val2=(1-2*exp(g^2/2)+exp(2*g^2))/g^2 +val2=val2-val^2 +}} +if(g>0 & h!=0){ +if(h<1) +val=(exp(g^2/(2*(1-h)))-1)/(g*sqrt(1-h)) +val2=NA +if(h>0){ +if(h<.5) +val2=(exp(2*g^2/(1-2*h))-2*exp(g^2/(2*(1-2*h)))+1)/(g^2*sqrt(1-2*h))- +(exp(g^2/(2*(1-h)))-1)^2/(g^2*(1-h)) +}} +if(g==0){ +val=0 +val2=1/(1-2*h)^1.5 #Headrick et al. (2008) +} +list(mean=val,variance=val2) +} + +gskew<-function(g){ +# +# skew and kurtosis of a g-and-h distribution when h=0 +# +# +v1=sqrt(3*exp(2*g^2)+exp(3*g^2)-4) +v2=3*exp(2*g^2)+2*exp(3*g^2)+exp(4*g^2)-3 #Headrick has -6 not -3, but based on n=1000000, -3 works +list(skew=v1,kurtosis=v2) +} + + +skew<-function(x){ +# +# Compute skew and kurtosis +# +x=elimna(x) +m1<-mean(x) +m2<-var(x) +m3<-sum((x-m1)^3)/length(x) +m4<-sum((x-m1)^4)/length(x) +sk<-m3/m2^1.5 +ku<-m4/m2^2 +list(skew=sk,kurtosis=ku) +} + +t3pval<-function(cmat,tmeans,v,h){ +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-johan(cmat,tmeans,v,h,alph[i]) +if(chkit$teststat>chkit$crit)break +} +p.value <- irem/100 + if(p.value <= 0.1) { + iup <- (irem + 1)/100 + alph <- seq(0.001, iup, 0.001) + for(i in 1:length(alph)) { + p.value <- alph[i] + chkit<-johan(cmat,tmeans,v,h,alph[i]) +if(chkit$teststat>chkit$crit)break + } + } + if(p.value <= 0.001) { + alph <- seq(0.0001, 0.001, 0.0001) + for(i in 1:length(alph)) { + p.value <- alph[i] +chkit<-johan(cmat,tmeans,v,h,alph[i]) +if(chkit$teststat>chkit$crit)break + } + } +p.value +} + +t1way<-function(x,tr=.2,grp=NA,MAT=FALSE,lev.col=1,var.col=2,IV=NULL,pr=TRUE){ +# +# A heteroscedastic one-way ANOVA for trimmed means +# using a generalization of Welch's method. +# +# The data are assumed to be stored in $x$ in a matrix or in list mode. +# +# MAT=F, if x is a matrix, columns correspond to groups. +# if MAT=T, assumes argument +# lev.col +# indicates which column of x denotes the groups. And +# var.col indicates the column where the data are stored. +# +# if x has list mode: +# length(x) is assumed to correspond to the total number of groups. +# By default, the null hypothesis is that all groups have a common mean. +# To compare a subset of the groups, use grp to indicate which +# groups are to be compared. For example, if you type the +# command grp<-c(1,3,4), and then execute this function, groups +# 1, 3, and 4 will be compared with the remaining groups ignored. +# +# IV, if specified, taken to be the independent variable +# That is, the group id values +# and x is assumed to be a vector containing all of the data +# +# Missing values are automatically removed. +# +if(is.data.frame(x))x=as.matrix(x) +if(tr==.5)print("Warning: Comparing medians should not be done with this function") +if(!is.null(IV[1])){ +if(pr)print("Assuming x is a vector containing all of the data, the dependent variable") +xi=elimna(cbind(x,IV)) +x=fac2list(xi[,1],xi[,2]) +} +if(MAT){ +if(!is.matrix(x))stop("With MAT=T, data must be stored in a matrix") +if(length(lev.col)!=1)stop("Argument lev.col should have 1 value") +temp=selby(x,lev.col,var.col) +x=temp$x +grp2=rank(temp$grpn) +x=x[grp2] +} +if(is.matrix(x))x<-listm(x) +#nv=lapply(x,length) +if(is.na(sum(grp[1])))grp<-c(1:length(x)) +if(!is.list(x))stop("Data are not stored in a matrix or in list mode.") +J<-length(grp) +h<-vector("numeric",J) +w<-vector("numeric",J) +xbar<-vector("numeric",J) +nv=NA +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +nv[j]=length(x[[j]]) +h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) + # h is the number of observations in the jth group after trimming. +if(winvar(x[[grp[j]]],tr)==0)print(paste('The Winsorized variance is zero for group',j)) +w[j]<-h[j]*(h[j]-1)/((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr)) +xbar[j]<-mean(x[[grp[j]]],tr) +} +u<-sum(w) +xtil<-sum(w*xbar)/u +A<-sum(w*(xbar-xtil)^2)/(J-1) +B<-2*(J-2)*sum((1-w/u)^2/(h-1))/(J^2-1) +TEST<-A/(B+1) +nu1<-J-1 +nu2<-1./(3*sum((1-w/u)^2/(h-1))/(J^2-1)) +sig<-1-pf(TEST,nu1,nu2) +list(TEST=TEST,nu1=nu1,nu2=nu2,n=nv,p.value=sig) +} + +t3wayv2<-function(J,K,L,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,MAT=FALSE, +lev.col=c(1:3),var.col=4,pr=TRUE){ +# Perform a J by K by L (three-way) anova on trimmed means where +# all JKL groups are independent. +# +# Same as t3way, only computes p-values +# +# if MAT=F (default) +# The R variable data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of all three factors: level 1,1,1. +# data][2]] is assumed to contain the data for level 1 of the +# first two factors and level 2 of the third factor: level 1,1,2 +# data[[L]] is the data for level 1,1,L +# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. +# data[[KL+1]] is level 2,1,1, etc. +# +# MAT=T, assumes data are stored in matrix with 3 columns indicating +# levels of the three factors. +# That is, this function calls selby2 for you. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JKL, the total number of +# groups being tested. +# +if(is.data.frame(x))x=as.matrix(x) +data=x #Yes, odd code +if(MAT){ +if(!is.matrix(data))stop("With MAT=T, data must be a matrix") +if(length(lev.col)!=3)stop("Argument lev.col should have 3 values") +temp=selby2(data,lev.col,var.col) +lev1=length(unique(temp$grpn[,1])) +lev2=length(unique(temp$grpn[,2])) +lev3=length(unique(temp$grpn[,3])) +gv=apply(temp$grpn,2,rank) +gvad=100*gv[,1]+10*gv[,2]+gv[,3] +grp=rank(gvad) +if(pr){ +print(paste("Factor 1 has", lev1, "levels")) +print(paste("Factor 2 has", lev2, "levels")) +print(paste("Factor 3 has", lev3, "levels")) +} +if(J!=lev1)warning("J is being reset to the number of levels found") +if(K!=lev2)warning("K is being reset to the number of levels found") +if(L!=lev3)warning("K is being reset to the number of levels found") +J=lev1 +K=lev2 +L=lev2 +data=temp$x +} +if(is.matrix(data))data=listm(data) +if(!is.list(data))stop("Data is not stored in list mode") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in data is") +print(length(data)) +print("Warning: These two values are not equal") +} +tmeans<-0 +h<-0 +v<-0 +for (i in 1:p){ +tmeans[i]<-mean(data[[grp[i]]],tr) +h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) +# h is the effective sample size +v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1)) +# v contains the squared standard errors +} +v<-diag(v,p,p) # Put squared standard errors in a diag matrix. +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +il<-matrix(c(rep(1,L)),1,L) +jm1<-J-1 +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +lm1<-L-1 +cl<-diag(1,lm1,L) +for (i in 1:lm1)cl[i,i+1]<-0-1 +# Do test for factor A +cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A +Qa <- johan(cmat, tmeans, v, h, alpha) +Qa.pv=t3pval(cmat, tmeans, v, h) +# Do test for factor B +cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B +Qb<-johan(cmat,tmeans,v,h,alpha) +Qb.pv=t3pval(cmat, tmeans, v, h) +# Do test for factor C +cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C +Qc<-johan(cmat,tmeans,v,h,alpha) +Qc.pv=t3pval(cmat, tmeans, v, h) +# Do test for factor A by B interaction +cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B +Qab<-johan(cmat,tmeans,v,h,alpha) +Qab.pv=t3pval(cmat, tmeans, v, h) +# Do test for factor A by C interaction +cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C +Qac<-johan(cmat,tmeans,v,h,alpha) +Qac.pv=t3pval(cmat, tmeans, v, h) +# Do test for factor B by C interaction +cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C +Qbc<-johan(cmat,tmeans,v,h,alpha) +Qbc.pv=t3pval(cmat, tmeans, v, h) +# Do test for factor A by B by C interaction +cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C +Qabc<-johan(cmat,tmeans,v,h,alpha) +Qabc.pv=t3pval(cmat, tmeans, v, h) +list(Qa=Qa$teststat,Qa.crit=Qa$crit,Qa.p.value=Qa.pv, +Qb=Qb$teststat,Qb.crit=Qb$crit,Qb.p.value=Qb.pv, +Qc=Qc$teststat,Qc.crit=Qc$crit,Qc.p.value=Qc.pv, +Qab=Qab$teststat,Qab.crit=Qab$crit,Qab.p.value=Qab.pv, +Qac=Qac$teststat,Qac.crit=Qac$crit,Qac.p.value=Qac.pv, +Qbc=Qbc$teststat,Qbc.crit=Qbc$crit,Qbc.p.value=Qbc.pv, +Qabc=Qabc$teststat,Qabc.crit=Qabc$crit,Qabc.p.value=Qabc.pv) +} + + +olshc4<-function(x,y,alpha=.05,CN=FALSE, +xout=FALSE,outfun=outpro,HC3=FALSE,plotit=FALSE,xlab = "X", ylab = "Y", zlab = "Z",...){ +# +# Compute confidence intervals via least squares +# regression using heteroscedastic method +# recommended by Cribari-Neto (2004). +# CN=F, degrees of freedom are n-p +# CN=T degrees of freedom are infinite, as done by Cribari-Neto (2004) +# All indications are that CN=F is best for general use. +# +# HC3=TRUE, will replace the HC4 estimator with the HC3 estimator. +# +x<-as.matrix(x) +pnum=ncol(x) +if(nrow(x) != length(y))stop("Length of y does not match number of x values") +m<-cbind(x,y) +m<-elimna(m) +y<-m[,ncol(x)+1] +x=m[,1:ncol(x)] +n=length(y) +nrem=n +n.keep=length(y) +x<-as.matrix(x) +if(xout){ +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +x<-as.matrix(x) +x<-x[flag,] +y<-y[flag] +n.keep=length(y) +x<-as.matrix(x) +} +temp<-lsfit(x,y) +rsq=Rsq.ols(x,y) +x<-cbind(rep(1,nrow(x)),x) +xtx<-solve(t(x)%*%x) +h<-diag(x%*%xtx%*%t(x)) +n<-length(h) +d<-(n*h)/sum(h) +for(i in 1:length(d)){ + d[i]<-min(4, d[i]) +} +if(HC3)d=2 +hc4<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^d)%*%x%*%xtx +df<-nrow(x)-ncol(x) +crit<-qt(1-alpha/2,df) +if(CN)crit=qnorm(1-alpha/2) +al<-ncol(x) +p=al-1 +ci<-matrix(NA,nrow=al,ncol=6) +lab.out=rep("Slope",p) +dimnames(ci)<-list(c("(Intercept)",lab.out),c("Coef.","Estimates", +"ci.lower","ci.upper","p-value","Std.Error")) +for(j in 1:al){ +ci[j,1]<-j-1 +ci[j,2]<-temp$coef[j] +ci[j,3]<-temp$coef[j]-crit*sqrt(hc4[j,j]) +ci[j,4]<-temp$coef[j]+crit*sqrt(hc4[j,j]) +test<-temp$coef[j]/sqrt(hc4[j,j]) +names(test)=NULL +ci[j,5]<-2*(1-pt(abs(test),df)) +if(CN)ci[j,5]<-2*(1-pnorm(abs(test),df)) +} +ci[,6]=sqrt(diag(hc4)) +if(plotit){ +if(pnum==1){ +plot(x[,-1],y,xlab=xlab,ylab=ylab) +abline(ci[,2]) +} +if(pnum==2){ +regp2plot(x[,-1],y,regfun=ols,xlab=xlab,ylab=ylab,zlab=zlab) +}} +list(n=nrem,n.keep=n.keep,ci=ci, cov=hc4, test.stat=test,R.squared=rsq) +} + +olsci<-olshc4 + +hc4test<-function(x,y,pval=c(1:ncol(x)),xout=FALSE,outfun=outpro,pr=TRUE,plotit=FALSE,xlab="X",ylab="Y",...){ +# +# Perform omnibus test using OLS and HC4 estimator +# That is, test the hypothesis that all of the slope parameters +# are equal to 0 in a manner that allows heteroscedasticity. +# +# recommended by Cribari-Neto (2004). +# Seems to work well with p=1 but can be unsatisfactory wit p>4 predictors, +# Unknown how large n must be when p>1 +# +x<-as.matrix(x) +if(ncol(x)>1 && pr)print("WARNING: more than 1 predictor, olstest might be better") +if(nrow(x) != length(y))stop("Length of y does not match number of x values") +m<-cbind(x,y) +m<-elimna(m) +p=ncol(x) +p1=p+1 +y<-m[,p1] +x=m[,1:p] +nrem=length(y) +n=length(y) +n.keep=n +x<-as.matrix(x) +if(xout){ +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,...)$keep +x<-as.matrix(x) +x<-x[flag,] +y<-y[flag] +n.keep=length(y) +x<-as.matrix(x) +} +n=n.keep +pvalp1<-pval+1 +temp<-lsfit(x,y) # unrestricted +if(plotit){ +if(p==1){ +plot(x[,1],y,xlab=xlab,ylab=ylab) +abline(temp$coef) +}} +x<-cbind(rep(1,nrow(x)),x) +hval<-x%*%solve(t(x)%*%x)%*%t(x) +hval<-diag(hval) +hbar<-mean(hval) +delt<-cbind(rep(4,n),hval/hbar) +delt<-apply(delt,1,min) +aval<-(1-hval)^(0-delt) +x2<-x[,pvalp1] +pval<-0-pvalp1 +x1<-x[,pval] +df<-length(pval) +x1<-as.matrix(x1) +imat<-diag(1,n) +M1<-imat-x1%*%solve(t(x1)%*%x1)%*%t(x1) +M<-imat-x%*%solve(t(x)%*%x)%*%t(x) +uval<-as.vector(M%*%y) +R2<-M1%*%x2 +rtr<-solve(t(R2)%*%R2) +temp2<-aval*uval^2 +S<-diag(aval*uval^2) +V<-n*rtr%*%t(R2)%*%S%*%R2%*%rtr +nvec<-as.matrix(temp$coef[pvalp1]) +test<-n*t(nvec)%*%solve(V)%*%nvec +test<-test[1,1] +p.value<-1-pchisq(test,df) +list(n=nrem,n.keep=n.keep,test=test,p.value=p.value,coef=temp$coef) +} + + + +standm<-function(x,locfun=lloc,est=mean,scat=var,...){ +# standardize a matrix x +# +x=elimna(x) +x=as.matrix(x) +m1=lloc(x,est=est) +v1=apply(x,2,scat) +p=ncol(x) +for(j in 1:p)x[,j]=(x[,j]-m1[j])/sqrt(v1[j]) +x +} + +t2way<-function(J,K,x,tr=.2,grp=c(1:p),p=J*K,MAT=FALSE, +lev.col=c(1:2),var.col=3,pr=TRUE,IV1=NULL,IV2=NULL){ +# Perform a J by K (two-way) ANOVA on trimmed means where +# all groups are independent. +# +# The R variable x is assumed to contain the raw +# data stored in list mode, or a matrix with columns +# corresponding to groups. If stored in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1,. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second factor: level 1,2 +# +# The default amount of trimming is tr=.2 +# +# It is assumed that x has length JK, the total number of +# groups being tested. +# +# MAT=T, assumes x are stored in matrix with 3 columns +# with two of the columns indicated by the argument +# lev.col +# specifying the columns of x containing the values of the +# levels of the two factors. +# The outcome variable is in column +# var.col +# which defaults to column 3 +# That is, this function calls selby2 for you. +# +# IV1 and IV2: if specified, taken to be the independent variable +# That is, the group id values +# and x is assumed to be a vector containing all of the data +# EXAMPLE: t2way(x=data,IV1=iv1,IV2=iv2) +# would do a two-way ANOVA based on group id's in iv1 and iv2 and +# dependent variable data +# +if(is.data.frame(x))data=as.matrix(x) +if(tr==.5){ +print("For medians, use med2way if there are no ties") +print("With ties, use linear contrasts in conjunction with medpb") +stop("") +} +if(MAT){ +if(!is.matrix(x))stop("With MAT=T, data must be a matrix") +if(length(lev.col)!=2)stop("Argument lev.col should have 3 values") +temp=selby2(x,lev.col,var.col) +lev1=length(unique(temp$grpn[,1])) +lev2=length(unique(temp$grpn[,2])) +gv=apply(temp$grpn,2,rank) +gvad=10*gv[,1]+gv[,2] +grp=rank(gvad) +if(pr){ +print(paste("Factor 1 has", lev1, "levels")) +print(paste("Factor 2 has", lev2, "levels")) +} +if(J!=lev1)warning("J is being reset to the number of levels found") +if(K!=lev2)warning("K is being reset to the number of levels found") +J=lev1 +K=lev2 +x=temp$x +} +if(!is.null(IV1[1])){ +if(is.null(IV2[1]))stop("IV2 is NULL") +if(pr)print("Assuming data is a vector containing all of the data; the dependent variable") +xi=elimna(cbind(x,IV1,IV2)) +J=length(unique(xi[,2])) +K=length(unique(xi[,3])) +x=fac2list(xi[,1],xi[,2:3]) +} +if(is.matrix(x))x=listm(x) +if(!is.list(x))stop("Data are not stored in list mode") +if(p!=length(x)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups is") +print(length(x)) +print("Warning: These two values are not equal") +} +tmeans<-0 +h<-0 +v<-0 +for (i in 1:p){ +x[[grp[i]]]=elimna(x[[grp[i]]]) +tmeans[i]<-mean(x[[grp[i]]],tr) +h[i]<-length(x[[grp[i]]])-2*floor(tr*length(x[[grp[i]]])) +# h is the effective sample size +if(winvar(x[[grp[i]]],tr)==0)print(paste('The Winsorized variance is zero for group',i)) +v[i]<-(length(x[[grp[i]]])-1)*winvar(x[[grp[i]]],tr)/(h[i]*(h[i]-1)) +# v contains the squared standard errors +} +v<-diag(v,p,p) # Put squared standard errors in a diag matrix. +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +jm1<-J-1 +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +# Do test for factor A +cmat<-kron(cj,ik) # Contrast matrix for factor A +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +Qa<-johan(cmat,tmeans,v,h,alval[i]) +if(i==1)dfA=Qa$df +if(Qa$teststat>Qa$crit)break +} +A.p.value=irem/1000 +# Do test for factor B +cmat<-kron(ij,ck) # Contrast matrix for factor B +for(i in 1:999){ +irem<-i +Qb<-johan(cmat,tmeans,v,h,alval[i]) +if(i==1)dfB=Qb$df +if(Qb$teststat>Qb$crit)break +} +B.p.value=irem/1000 +# Do test for factor A by B interaction +cmat<-kron(cj,ck) # Contrast matrix for factor A by B +for(i in 1:999){ +irem<-i +Qab<-johan(cmat,tmeans,v,h,alval[i]) +if(i==1)dfAB=Qab$df +if(Qab$teststat>Qab$crit)break +} +AB.p.value=irem/1000 +tmeans=matrix(tmeans,J,K,byrow=TRUE) +list(Qa=Qa$teststat,A.p.value=A.p.value, df.A=dfA, +Qb=Qb$teststat,B.p.value=B.p.value,df.B=dfB, +Qab=Qab$teststat,AB.p.value=AB.p.value,df.AB=dfAB,means=tmeans) +} + +mcskew <- function(z) +{ + n=length(z) + y1=0 + y2=0 + left=0 + right=0 + q=0 + p=0 + eps=0.0000000000001 + z=-z + xmed=pull(z,n,floor(n/2)+1) + if (n%%2 == 0) + { + xmed=(xmed+pull(z,n,floor(n/2)))/2 + } + z=z-xmed + y=-sort(z) + y1=y[y>-eps] + y2=y[y<=eps] + h1=length(y1) + h2=length(y2) + left[1:h2]=1 + right[1:h2]=h1 + nl=0 + nr=h1*h2 + knew=floor(nr/2)+1 + IsFound=0 + while ((nr-nl>n) & (IsFound==0)) + { + weight=0 + work=0 + j=1 + for (i in 1:h2) + { + if (left[i]<=right[i]) + { + weight[j]=right[i]-left[i]+1 + k=left[i]+floor(weight[j]/2) + work[j]=calwork(y1[k],y2[i],k,i,h1+1,eps) + j=j+1 + } + } + trial=whimed(work,weight,j-1) + j=1 + for (i in h2:1) + { + while ((j<=h1)&(calwork(y1[min(j,h1)],y2[i],j,i,h1+1,eps)>trial)) + { + j=j+1 + } + p[i]=j-1 + } + j=h1 + for (i in 1:h2) + { + while ((j>=1)&(calwork(y1[max(j,1)],y2[i],j,i,h1+1,eps)sumq) + { + left[1:h2]=q[1:h2] + nl=sumq + } + else + { + medc=trial + IsFound=1 + } + } + } + if (IsFound==0) + {work=0 + j=1 + for (i in 1:h2) + { + if (left[i]<=right[i]) + { + for (jj in left[i]:right[i]) + { + work[j]=0-calwork(y1[jj],y2[i],jj,i,h1+1,eps) + j=j+1 + } + } + } + medc=0-pull(work,j-1,knew-nl) + } + medc +} + +pull <- function(a,n,k) +{ + b=0 + b=a + l=1 + lr=n + while (lax) + { + j=j-1 + } + if (jnc<=j) + { + buffer=b[jnc] + b[jnc]=b[j] + b[j]=buffer + jnc=jnc+1 + j=j-1 + } + } + if (jtrial,rep(F,n-nn))]) + wmid=sum(iw[c(a[1:nn]==trial,rep(F,n-nn))]) + + if ((2*wrest+2*wleft)>wtotal) + { + i=c(a[1:nn]wtotal) + { + whmed=trial + IsFound=1 + } + else + { + i=c(a[1:nn]>trial,rep(F,n-nn)) + acand=a[i] + iwcand=iw[i] + nn=length(acand) +# nn_kcand_length(acand) + wrest=wrest+wleft+wmid + } + } + a[1:nn]=acand[1:nn] + iw[1:nn]=iwcand[1:nn] + } + whmed +} + +calwork <- function(a,b,ai,bi,ab,eps) +{ + if (abs(a-b) < 2.0*eps) + { + if (ai+bi==ab) + { + cwork=0 + } + else + { + if (ai+bi (n-p)/(2*n) ) r <- (n-p)/(2*n)} +# maximum achievable breakdown +# +# if rejection is not achievable, use c1=0 and best rejection +# + limvec <- rejpt.bt.lim(p,r) + if (1-limvec[2] <= alpha) + { + c1 <- 0 + M <- sqrt(qchisq(1-alpha,p)) + } + else + { + c1.plus.M <- sqrt(qchisq(1-alpha,p)) + M <- sqrt(p) + c1 <- c1.plus.M - M + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + deps <- 1e-4 + M.old <- M + c1.old <- c1 + er <- erho.bt(p,c1,M) + fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30) + fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps + fcM <- (erho.bt(p,c1,M+deps)-er)/deps + fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30) + M <- M - fc/fcp + if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2} + c1 <- c1.plus.M - M +# if (M-c1 < 0) M <- c1.old+(M.old-c1.old)/2 + crit <- abs(fc) + iter <- iter+1 + } + } +list(c1=c1,M=M,r1=r) +} +erho.bt.lim <- function(p,c1) +# expectation of rho(d) under chi-squared p + return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1)) +erho.bt.lim.p <- function(p,c1) +# derivative of erho.bt.lim wrt c1 + return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1)) + + +rejpt.bt.lim <- function(p,r){ +# find p-value of translated biweight limit c +# that gives a specified breakdown + c1 <- 2*p + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + c1.old <- c1 + fc <- erho.bt.lim(p,c1) - c1^2*r + fcp <- erho.bt.lim.p(p,c1) - 2*c1*r + c1 <- c1 - fc/fcp + if (c1 < 0) c1 <- c1.old/2 + crit <- abs(fc) + iter <- iter+1 + } + return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p)))) +} +chi.int.p <- function(p,a,c1) + return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) +chi.int2.p <- function(p,a,c1) + return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) +ksolve.bt <- function(d,p,c1,M,b0){ +# find a constant k which satisfies the s-estimation constraint +# for modified biweight + k <- 1 + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + k.old <- k + fk <- mean(rho.bt(d/k,c1,M))-b0 + fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2) + k <- k - fk/fkp + if (k < k.old/2) k <- k.old/2 + if (k > k.old*1.5) k <- k.old*1.5 + crit <- abs(fk) +# print(c(iter,k.old,crit)) + iter <- iter+1 + } +# print(c(iter,k,crit)) + return(k) +} +rho.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1*(x^2/2) + +ivec2*(M^2/2+c1*(5*c1+16*M)/30) + +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4) + +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2 + +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3 + +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4 + -4*M*x^5/(5*c1^4)+x^6/(6*c1^4))) +} +psi.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2) +} +psip.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1)) +} +wt.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2) +} +v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M)) + +rung3dlchk<-function(x,y,est=onestep,regfun=tsreg,beta=.2,plotit=FALSE,nmin=0, +fr=NA,...){ +# +# running mean using interval method +# Same as runm3d, but empirically determine the span, f, +# by maximizing the percentage bend correlation using the +# leave-three-out method. +# +# x is an n by p matrix of predictors. +# +# fr controls amount of smoothing and is determined by this function. +# If fr is missing, function first considers fr=.8(.05)1.2. If +# measure of scale of residuals is mininmized for fr=.8, then consider +# fr=.2(.05).75. +# +# +if(!is.matrix(x))stop("Data are not stored in a matrix.") +plotit<-as.logical(plotit) +chkcor<-1 +frtry<-c(.7,.75,.8,.85,.9,.95,1.,1.05,1.1,1.15,1.2) +if(!is.na(fr[1]))frtry<-fr +chkit<-0 +for (it in 1:length(frtry)){ +fr<-frtry[it] +rmd<-runm3ds1(x,y,fr,tr,FALSE,nmin) # Using leave-three-out method. +xm<-y[!is.na(rmd)] +rmd<-rmd[!is.na(rmd)] +dif<-xm-rmd +chkcor[it]<-pbvar(dif,beta) +} +if(sum(is.na(chkcor))== length(chkcor)) +{stop("A value for the span cannot be determined with these data.")} +tempc<-sort(chkcor) +chkcor[is.na(chkcor)]<-tempc[length(tempc)] +temp<-order(chkcor) +fr1<-frtry[temp[1]] +fr2<-fr1 +val1<-min(chkcor) +chkcor2<-0 +if(is.na(fr)){ +if(temp[1] == 1){ +frtry<-c(.2,.25,.3,.35,.4,.45,.5,.55,.6,.65,.7,.75) +for (it in 1:length(frtry)){ +fr<-frtry[it] +rmd<-runm3ds1(x,y,fr,tr,FALSE,nmin) +xm<-y[!is.na(rmd)] +rmd<-rmd[!is.na(rmd)] +dif<-xm-rmd +chkcor2[it]<-pbvar(dif,beta) +} +tempc<-sort(chkcor2) +chkcor2[is.na(chkcor2)]<-tempc[length(tempc)] +print(chkcor2) +temp2<-order(chkcor2) +fr2<-frtry[temp2[1]] +} +} +sortc<-sort(chkcor2) +chkcor2[is.na(chkcor2)]<-sortc[length(sortc)] +val2<-min(chkcor2) +fr<-fr1 +if(val2 < val1)fr<-fr2 +rmd<-runm3d(x,y,fr=fr,tr,plotit=FALSE,nmin,pyhat=TRUE,pr=FALSE) +xm<-y[!is.na(rmd)] +rmd<-rmd[!is.na(rmd)] +etasq<-pbcor(rmd,xm)$cor^2 +# Next, fit regression line +temp<-y-regfun(x,y)$res +pbc<-pbcor(temp,y)$cor^2 +temp<-(etasq-pbc)/(1-pbc) +list(gamma.L=temp,pbcorsq=pbc,etasq=etasq,fr=fr,rmd=rmd,yused=xm,varval=chkcor) +} + +near3dl1<-function(x,pt,fr=1,m){ +dis<-mahalanobis(x,pt,m$cov) +sdis<-sqrt(sort(dis)) +dflag<-(dis < fr & dis > sdis[3]) +dflag +} + +listm<-function(x){ +# +# Store the data in a matrix or data frame in a new +# R variable having list mode. +# Col 1 will be stored in y[[1]], col 2 in y[[2]], and so on. +# +if(is.null(dim(x)))stop("The argument x must be a matrix or data frame") +y<-list() +for(j in 1:ncol(x))y[[j]]<-x[,j] +y +} + +m2l=listm + +matrix2list=listm + +pbanova<-function(x,tr=.2,alpha=.05,nboot=NA,grp=NA,WIN=FALSE,win=.1){ +# +# Test the hypothesis that J independent groups have +# equal trimmed means using the percentile bootstrap method. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# tr is the amount of trimming +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# WIN=T means data are Winsorized before taking bootstraps by the +# amount win. +# +# Missing values are allowed. +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] +x<-xx +} +J<-length(x) +tempn<-0 +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +} +Jm<-J-1 +if(WIN){ +if(tr < .2){print("Warning: When Winsorizing,") +print("the amount of trimming should be at least.2") +} +if(win > tr)stop("Amount of Winsorizing must be <= amount of trimming") +if(min(tempn) < 15){ +print("Warning: Winsorizing with sample sizes less than 15") +print("can result in poor control over the probability of a Type I error") +} +for (j in 1:J){ +x[[j]]<-winval(x[[j]],win) +} +} +con<-matrix(0,J,J-1) +for (j in 1:Jm){ +jp<-j+1 +con[j,j]<-1 +con[jp,j]<-0-1 +} +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(Jm > 10){ +avec<-.05/c(11:Jm) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(Jm > 10){ +avec<-.01/c(11:Jm) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:Jm) +bvec<-matrix(NA,nrow=J,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +paste("Working on group ",j) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group +} +test<-NA +for (d in 1:Jm){ +dp<-d+1 +test[d]<-sum(bvec[d,]>bvec[dp,])/nboot +if(test[d]> .5)test[d]<-1-test[d] +} +test<-(0-1)*sort(-2*test) +sig<-sum((test0)print("Significant result obtained: Reject") +if(sig==0)print("No significant result obtained: Fail to reject") +list(test.vec=test,crit.vec=dvec[1:Jm]) +} + +pbanovag<-function(x,alpha=.05,nboot=NA,grp=NA,est=onestep,...){ +# +# Test the hypothesis that J independent groups have +# equal measures of location using the percentile bootstrap method. +# (Robust measures of scale can be compared as well.) +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to a M-estimator +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are allowed. +# +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ +# Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] +x<-xx +} +J<-length(x) +tempn<-0 +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +} +Jm<-J-1 +icl<-ceiling(crit*nboot) +icu<-ceiling((1-crit)*nboot) +con<-matrix(0,J,J-1) +for (j in 1:Jm){ +jp<-j+1 +con[j,j]<-1 +con[jp,j]<-0-1 +} +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(Jm > 10){ +avec<-.05/c(11:Jm) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(Jm > 10){ +avec<-.01/c(11:Jm) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:Jm) +bvec<-matrix(NA,nrow=J,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +paste("Working on group ",j) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped trimmed means for jth group +} +test<-NA +for (d in 1:Jm){ +dp<-d+1 +test[d]<-sum(bvec[d,]>bvec[dp,])/nboot +if(test[d]> .5)test[d]<-1-test[d] +} +test<-(0-1)*sort(-2*test) +sig<-sum((test0)print("Significant result obtained: Reject") +if(sig==0)print("No significant result obtained: Fail to reject") +list(test.vec=test,crit.vec=dvec[1:Jm]) +} +bootse<-function(x,nboot=1000,est=median,SEED=TRUE,...){ +# +# Compute bootstrap estimate of the standard error of the +# estimator est +# The default number of bootstrap samples is nboot=100 +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,est,...) +bootse<-sqrt(var(bvec)) +bootse +} + + +rananova<-function(x,tr=.2,grp=NA){ +# +# A heteroscedastic one-way random effects ANOVA for trimmed means. +# +# The data are assumed to be stored in a matrix on in list mode. +# If in list mode, +# Length(x) is assumed to correspond to the total number of groups. +# If the data are stored in a matrix, groups correspond to columns. +# By default, the null hypothesis is that all group have a common mean. +# To compare a subset of the groups, use grp to indicate which +# groups are to be compared. For example, if you type the +# command grp<-c(1,3,4), and then execute this function, groups +# 1, 3, and 4 will be compared with the remaining groups ignored. +# +if(is.matrix(x))x<-listm(x) +if(is.na(grp[1]))grp<-c(1:length(x)) +if(!is.list(x))stop("Data are not stored in a matrix or in list mode") +J<-length(grp) # The number of groups to be compared +#if(pr)print("The number of groups to be compared is") +#print(J) +h<-1 +xbar<-1 +ybar<-1 +wvar<-1 +ell<-0 +for(j in 1:J){ +ell[j]<-length(x[[grp[j]]])/(length(x[[grp[j]]])+1) +h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) + # h is the number of observations in the jth group after trimming. +ybar[j]<-winmean(x[[grp[j]]],tr) +xbar[j]<-mean(x[[grp[j]]],tr) +wvar[j]<-winvar(x[[grp[j]]],tr) +} +q<-NA +bsst<-var(xbar) +for (j in 1:J)q[j]<-(length(x[[grp[j]]]-1)-1)*wvar[j]/(h[j]*(h[j]-1)) +wssw<-mean(q) +D<-bsst/wssw +g<-q/J +nu1<-((J-1)*sum(q))^2/((sum(q))^2+(J-2)*J*sum(q^2)) +nu2<-(sum(J*q))^2/sum((J*q)^2/(h-1)) +sig<-1-pf(D,nu1,nu2) +# Next, estimate the Winsorized intraclass correlation +sighat<-mean(ell*(ybar-(sum(ell*ybar)/sum(ell)))^2) +rho<-sighat/(sighat+winmean(wvar,tr)) +list(teststat=D,df=c(nu1,nu2),p.value=sig,rho=rho,num.groups=J) +} + + +linpbg<-function(x,con=0,alpha=.05,nboot=NA,est=mest,...){ +# +# Compute a 1-alpha confidence interval +# for a set of d linear contrasts +# involving trimmed means using the percentile bootstrap method. +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode or in a matrix. +# Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. +# If x has list mode, length(x)=the number of groups = J, say. +# +# Missing values are automatically removed. +# +# con is a J by d matrix containing the +# contrast coefficents of interest. +# If unspecified, all pairwise comparisons are performed. +# For example, con[,1]=c(1,1,-1,-1,0,0) +# and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first +# two trimmed means is +# equal to the sum of the second two, +# and (2) the difference between +# the first two is equal to the difference +# between the trimmed means of +# groups 5 and 6. +# +# +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +J<-length(x) +for(j in 1:J){ +xx<-x[[j]] +x[[j]]<-xx[!is.na(xx)] # Remove any missing values. +} +Jm<-J-1 +d<-(J^2-J)/2 +if(sum(con^2)==0){ +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 #If con not specified do all pairwise comparisons +con[k,id]<-0-1 +}}} +if(nrow(con)!=length(x)){ +stop("The number of groups does not match the number of contrast coefficients.") +} +if(is.na(nboot)){ +nboot<-5000 +if(ncol(con)<=4)nboot<-2000 +} +m1<-matrix(0,nrow=J,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +paste("Working on group ",j) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +m1[j,]<-apply(data,1,est,...) +} +testb<-NA +boot<-matrix(0,ncol(con),nboot) +testvec<-NA +for (d in 1:ncol(con)){ +boot[d,]<-apply(m1,2,trimpartt,con[,d]) +# A vector of length nboot containing psi hat values +# and corresponding to the dth linear contrast +testb[d]<-sum((boot[d,]>0))/nboot +testvec[d]<-min(testb[d],1-testb[d]) +} +# +# Determine critical value +# +dd<-ncol(con) +if(alpha==.05){ +if(dd==1)crit<-alpha/2 +if(dd==2)crit<-.014 +if(dd==3)crit<-.0085 +if(dd==4)crit<-.007 +if(dd==5)crit<-.006 +if(dd==6)crit<-.0045 +if(dd==10)crit<-.0023 +if(dd==15)crit<-.0016 +} +else{ +crit<-alpha/(2*dd) +} +icl<-round(crit*nboot) +icu<-round((1-crit)*nboot) +psihat<-matrix(0,ncol(con),4) +test<-matrix(0,ncol(con),3) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +dimnames(test)<-list(NULL,c("con.num","test","crit.val")) +for (d in 1:ncol(con)){ +test[d,1]<-d +psihat[d,1]<-d +testit<-lincon(x,con[,d],tr) +test[d,2]<-testvec[d] +temp<-sort(boot[d,]) +psihat[d,3]<-temp[icl] +psihat[d,4]<-temp[icu] +psihat[d,2]<-testit$psihat[1,2] +test[d,3]<-crit +} +list(psihat=psihat,test=test,con=con) +} + + + + +lintpb<-function(x,con=0,tr=.2,alpha=.05,nboot=NA){ +# +# Compute a 1-alpha confidence interval +# for a set of d linear contrasts +# involving trimmed means using the percentile bootstrap method. +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode or in a matrix. +# Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. +# If x has list mode, length(x)=the number of groups = J, say. +# +# Missing values are automatically removed. +# +# con is a J by d matrix containing the +# contrast coefficents of interest. +# If unspecified, all pairwise comparisons are performed. +# For example, con[,1]=c(1,1,-1,-1,0,0) +# and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first +# two trimmed means is +# equal to the sum of the second two, +# and (2) the difference between +# the first two is equal to the difference +# between the trimmed means of +# groups 5 and 6. +# +# +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +J<-length(x) +for(j in 1:J){ +xx<-x[[j]] +xx[[j]]<-xx[!is.na(xx)] # Remove any missing values. +} +Jm<-J-1 +d<-(J^2-J)/2 +if(sum(con^2)==0){ +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 #If con not specified do all pairwise comparisons +con[k,id]<-0-1 +}}} +if(nrow(con)!=length(x)){ +stop("The number of groups does not match the number of contrast coefficients.") +} +if(is.na(nboot)){ +nboot<-5000 +if(ncol(con)<=4)nboot<-2000 +} +m1<-matrix(0,nrow=J,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +paste("Working on group ",j) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +m1[j,]<-apply(data,1,mean,tr) +} +testb<-NA +boot<-matrix(0,ncol(con),nboot) +testvec<-NA +for (d in 1:ncol(con)){ +boot[d,]<-apply(m1,2,trimpartt,con[,d]) +# A vector of length nboot containing psi hat values +# and corresponding to the dth linear contrast +testb[d]<-sum((boot[d,]>0))/nboot +testvec[d]<-min(testb[d],1-testb[d]) +} +# +# Determine critical value +# +dd<-ncol(con) +if(alpha==.05){ +if(dd==1)crit<-alpha/2 +if(dd==2)crit<-.014 +if(dd==3)crit<-.0085 +if(dd==4)crit<-.007 +if(dd==5)crit<-.006 +if(dd==6)crit<-.0045 +if(dd==10)crit<-.0023 +if(dd==15)crit<-.0016 +} +else{ +crit<-alpha/(2*dd) +} +icl<-round(crit*nboot) +icu<-round((1-crit)*nboot) +psihat<-matrix(0,ncol(con),4) +test<-matrix(0,ncol(con),3) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +dimnames(test)<-list(NULL,c("con.num","test","crit.val")) +for (d in 1:ncol(con)){ +test[d,1]<-d +psihat[d,1]<-d +testit<-lincon(x,con[,d],tr) +test[d,2]<-testvec[d] +temp<-sort(boot[d,]) +psihat[d,3]<-temp[icl] +psihat[d,4]<-temp[icu] +psihat[d,2]<-testit$psihat[1,2] +test[d,3]<-crit +} +list(psihat=psihat,test=test,con=con) +} + +t2waypbg<-function(J,K,x,alpha=.05,nboot=NA,grp=NA,est=onestep,...){ +# +# Two-way ANOVA for independent groups based on +# robust measures of location +# and a percentile bootstrap method. + +# The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. +# +# Missing values are automatically removed. +# +if(is.data.frame(x))x=as.matrix(x) +JK<-J*K +if(is.matrix(x))x<-listm(x) +if(!is.na(grp)){ +yy<-x +for(j in 1:length(grp)) +x[[j]]<-yy[[grp[j]]] +} +if(!is.list(x))stop("Data must be stored in list mode or a matrix.") +for(j in 1:JK){ +xx<-x[[j]] +x[[j]]<-xx[!is.na(xx)] # Remove any missing values. +} +# +# Create the three contrast matrices +# + ij <- matrix(c(rep(1, J)), 1, J) + ik <- matrix(c(rep(1, K)), 1, K) + jm1 <- J - 1 + cj <- diag(1, jm1, J) + for(i in 1:jm1) + cj[i, i + 1] <- 0 - 1 + km1 <- K - 1 + ck <- diag(1, km1, K) + for(i in 1:km1) + ck[i, i + 1] <- 0 - 1 +conA<-t(kron(cj,ik)) +conB<-t(kron(ij,ck)) +conAB<-t(kron(cj,ck)) +ncon<-max(nrow(conA),nrow(conB),nrow(conAB)) +if(JK!=length(x)){ +print("Warning: The number of groups does not match") +print("the number of contrast coefficients.") +} +if(is.na(nboot)){ +nboot<-5000 +if(ncon<=4)nboot<-2000 +} +m1<-matrix(0,nrow=JK,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:JK){ +paste("Working on group ",j) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +m1[j,]<-apply(data,1,est,...) +} +bootA<-matrix(0,ncol(conA),nboot) +bootB<-matrix(0,ncol(conB),nboot) +bootAB<-matrix(0,ncol(conAB),nboot) +testA<-NA +testB<-NA +testAB<-NA +testvecA<-NA +testvecB<-NA +testvecAB<-NA +for (d in 1:ncol(conA)){ +bootA[d,]<-apply(m1,2,trimpartt,conA[,d]) +# A vector of length nboot containing psi hat values +# corresponding to the dth linear contrast +testA[d]<-sum((bootA[d,]>0))/nboot +testA[d]<-min(testA[d],1-testA[d]) +} +for (d in 1:ncol(conB)){ +bootB[d,]<-apply(m1,2,trimpartt,conB[,d]) +# A vector of length nboot containing psi hat values +# corresponding to the dth linear contrast +testB[d]<-sum((bootB[d,]>0))/nboot +testB[d]<-min(testB[d],1-testB[d]) +} +for (d in 1:ncol(conAB)){ +bootAB[d,]<-apply(m1,2,trimpartt,conAB[,d]) +# A vector of length nboot containing psi hat values +# corresponding to the dth linear contrast +testAB[d]<-sum((bootAB[d,]>0))/nboot +testAB[d]<-min(testAB[d],1-testAB[d]) +} +# +# Determine critical value +# +Jm<-J-1 +Km<-K-1 +JKm<-(J-1)*(K-1) +dvecA <- alpha/c(1:Jm) +dvecB <- alpha/c(1:Km) +dvecAB <- alpha/c(1:JKm) +testA<-(0 - 1) * sort(-2 * testA) +testB<-(0 - 1) * sort(-2 * testB) +testAB<-(0 - 1) * sort(-2 * testAB) +sig <- sum((testA < dvecA[1:Jm])) +if(sig > 0) +print("Significant result obtained for Factor A: Reject") +if(sig == 0) +print("No significant result Factor A: Fail to reject") +sig <- sum((testB < dvecB[1:Km])) +if(sig > 0) +print("Significant result obtained for Factor B: Reject") +if(sig == 0) +print("No significant result Factor B: Fail to reject") +sig <- sum((testAB < dvecAB[1:JKm])) +if(sig > 0) +print("Significant Interaction: Reject") +if(sig == 0) +print("No significant Interaction: Fail to reject") +list(testA=testA,crit.vecA=dvecA,testB=testB,crit.vecB=dvecB,testAB=testAB,crit.vecAB=dvecAB) +} + +regout<-function(x,y,regest=stsreg,plotit=TRUE,mbox=TRUE){ +# +# Check for regression outliers by fitting a +# a line to data using regest and then applying +# a boxplot rule to the residuals. +# mbox=T uses Carling's method +# mbox=F uses ideal fourths with conventional boxplot rules. +# +chk<-regest(x,y) +flag<-outbox(chk$residuals,mbox=mbox)$out.id +if(plotit){ +plot(x,y) +points(x[flag],y[flag],pch="o") +abline(chk$coef) +} +list(out.id=flag) +} + +stsregp1<-function(x,y,sc=pbvar,xout=FALSE,outfun=out,...){ +# +# Compute the S-type modification of +# the Theil-Sen regression estimator. +# Only a single predictor is allowed in this version +# +xy=elimna(cbind(x,y)) +p=ncol(as.matrix(x)) +if(p!=1)stop("Current version is limited to one predictor") +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +ord<-order(x) +xs<-x[ord] +ys<-y[ord] +vec1<-outer(ys,ys,"-") +vec2<-outer(xs,xs,"-") +v1<-vec1[vec2>0] +v2<-vec2[vec2>0] +slope<-v1/v2 +allvar<-NA +for(i in 1:length(slope))allvar[i]<-sc(y-slope[i]*x,...) +temp<-order(allvar) +coef<-0 +coef[2]<-slope[temp[1]] +coef[1]<-median(y)-coef[2]*median(x) +res<-y-coef[2]*x-coef[1] +list(coef=coef,residuals=res) +} + +stsreg<-function(x,y,xout=FALSE,outfun=outpro,iter=10,sc=pbvar,varfun=pbvar, +corfun=pbcor,plotit=FALSE,...){ +# +# Compute Theil-Sen regression estimator +# +# Use Gauss-Seidel algorithm +# when there is more than one predictor +# +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(ncol(x)==1){ +temp1<-stsregp1(x,y,sc=sc) +coef<-temp1$coef +res<-temp1$res +} +if(ncol(x)>1){ +for(p in 1:ncol(x)){ +temp[p]<-tsp1reg(x[,p],y)$coef[2] +} +res<-y-x%*%temp +alpha<-median(res) +r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) +tempold<-temp +for(it in 1:iter){ +for(p in 1:ncol(x)){ +r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] +temp[p]<-stsregp1(x[,p],r[,p],sc=sc)$coef[2] +} +alpha<-median(y-x%*%temp) +tempold<-temp +} +coef<-c(alpha,temp) +res<-y-x%*%temp-alpha +} +yhat<-y-res +stre=NULL +e.pow<-varfun(yhat)/varfun(y) +if(!is.na(e.pow)){ +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +e.pow=as.numeric(e.pow) +stre=sqrt(e.pow) +} +list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow) +} + + + +yuend<-function(x,y,tr=.2,alpha=.05){ +# +# Compare the trimmed means of two dependent random variables +# using the data in x and y. +# The default amount of trimming is 20% +# +# Any pair with a missing value is eliminated +# The function rm2miss allows missing values. +# +# A confidence interval for the trimmed mean of x minus the +# the trimmed mean of y is computed and returned in yuend$ci. +# The significance level is returned in yuend$p.value +# +# For inferences based on difference scores, use trimci +# +if(length(x)!=length(y))stop("The number of observations must be equal") +m<-cbind(x,y) +m<-elimna(m) +x<-m[,1] +y<-m[,2] +h1<-length(x)-2*floor(tr*length(x)) +q1<-(length(x)-1)*winvar(x,tr) +q2<-(length(y)-1)*winvar(y,tr) +q3<-(length(x)-1)*wincor(x,y,tr)$cov +df<-h1-1 +se<-sqrt((q1+q2-2*q3)/(h1*(h1-1))) +crit<-qt(1-alpha/2,df) +dif<-mean(x,tr)-mean(y,tr) +low<-dif-crit*se +up<-dif+crit*se +test<-dif/se +yuend<-2*(1-pt(abs(test),df)) +list(ci=c(low,up),p.value=yuend,est1=mean(x,tr),est2=mean(y,tr),dif=dif,se=se,teststat=test,n=length(x),df=df) +} + + +rmmcppbtm<-function(x,alpha=.05,con=0,tr=.2,grp=NA,nboot=NA){ +# +# Using the percentile bootstrap method, +# compute a .95 confidence interval for all linear contasts +# specified by con, a J by C matrix, where C is the number of +# contrasts to be tested, and the columns of con are the +# contrast coefficients. +# +# The trimmed means of dependent groups are being compared. +# By default, 20% trimming is used. +# +# nboot is the bootstrap sample size. If not specified, a value will +# be chosen depending on the number of contrasts there are. +# +# x can be an n by J matrix or it can have list mode +# +# For alpha=.05, some critical values have been +# determined via simulations and are used by this function; +# otherwise an approximation is used. +# +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +if(is.matrix(con)){ +if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") +}} +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))mat[,j]<-x[[j]] +} +if(is.matrix(x) && is.matrix(con)){ +if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") +mat<-x +} +if(is.matrix(x))mat<-x +if(!is.na(sum(grp)))mat<-mat[,grp] +mat<-elimna(mat) # Remove rows with missing values. +J<-ncol(mat) +Jm<-J-1 +if(sum(con^2)==0){ +d<-(J^2-J)/2 +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +d<-ncol(con) +if(is.na(crit) && tr != .2){ +print("A critical value must be specified when") +stop("the amount of trimming differs from .2") +} +if(is.na(nboot)){ +if(d<=3)nboot<-1000 +if(d==6)nboot<-2000 +if(d==10)nboot<-4000 +if(d==15)nboot<-8000 +if(d==21)nboot<-8000 +if(d==28)nboot<-10000 +} +n<-nrow(mat) +crit<-NA +if(alpha==.05){ +if(d==1)crit<-alpha/2 +if(d==3){ +crit<-.004 +if(n>=15)crit<-.006 +if(n>=30)crit<-.007 +if(n>=40)crit<-.008 +if(n>=100)crit<-.009 +} +if(d==6){ +crit<-.001 +if(n>=15)crit<-.002 +if(n>=20)crit<-.0025 +if(n>=30)crit<-.0035 +if(n>=40)crit<-.004 +if(n>=60)crit<-.0045 +} +if(d==10){ +crit<-.00025 +if(n>=15)crit<-.00125 +if(n>=20)crit<-.0025 +} +if(d==15){ +crit<-.0005 +if(n>=20)crit<-.0010 +if(n>=30)crit<-.0011 +if(n>=40)crit<-.0016 +if(n>=100)crit<-.0019 +} +if(d==21){ +crit<-.00025 +if(n>=20)crit<-.00037 +if(n>=30)crit<-.00075 +if(n>=40)crit<-.00087 +if(n>=60)crit<-.00115 +if(n>=100)crit<-.00125 +} +if(d==28){ +crit<-.0004 +if(n>=30)crit<-.0006 +if(n>=60)crit<-.0008 +if(n>=100)crit<-.001 +} +} +if(is.na(crit)){ +crit<-alpha/(2*d) +if(n<20)crit<-crit/2 +if(n<=10)crit<-crit/2 +} +icl<-ceiling(crit*nboot)+1 +icu<-ceiling((1-crit)*nboot) +connum<-ncol(con) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# data is an nboot by n matrix +xbars<-matrix(0,nboot,ncol(mat)) +psihat<-matrix(0,connum,nboot) +print("Taking bootstrap samples. Please wait.") +bvec<-bootdep(mat,tr,nboot) +# +# Now have an nboot by J matrix of bootstrap values. +# +test<-1 +for (ic in 1:connum){ +psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) +test[ic]<-sum((psihat[ic,]>0))/nboot +test[ic]<-min(test[ic],1-test[ic]) +} +print("Reminder: Test statistic must be less than critical value in order to reject.") +output<-matrix(0,connum,5) +dimnames(output)<-list(NULL,c("con.num","psihat","test","ci.lower","ci.upper")) +tmeans<-apply(mat,2,mean,trim=tr) +psi<-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(psihat[ic,]) +output[ic,4]<-temp[icl] +output[ic,5]<-temp[icu] +} +list(output=output,crit=crit,con=con) +} + +mcppb20<-function(x,crit=NA,con=0,tr=.2,alpha=.05,nboot=2000,grp=NA,WIN=FALSE, +win=.1){ +# +# Compute a 1-alpha confidence interval for a set of d linear contrasts +# involving trimmed means using the percentile bootstrap method. +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode. Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# +# By default, all pairwise comparisons are performed, but contrasts +# can be specified with the argument con. +# The columns of con indicate the contrast coefficients. +# Con should have J rows, J=number of groups. +# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first two trimmed means is +# equal to the sum of the second two, and (2) the difference between +# the first two is equal to the difference between the trimmed means of +# groups 5 and 6. +# +# The default number of bootstrap samples is nboot=2000 +# +# +con<-as.matrix(con) +if(is.matrix(x)){ +xx<-list() +for(i in 1:ncol(x)){ +xx[[i]]<-x[,i] +} +x<-xx +} +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] +x<-xx +} +J<-length(x) +tempn<-0 +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +} +Jm<-J-1 +d<-ifelse(sum(con^2)==0,(J^2-J)/2,ncol(con)) +if(is.na(crit) && tr != .2){ +print("A critical value must be specified when") +stop("the amount of trimming differs from .2") +} +if(WIN){ +if(tr < .2){ +print("Warning: When Winsorizing, the amount") +print("of trimming should be at least .2") +} +if(win > tr)stop("Amount of Winsorizing must <= amount of trimming") +if(min(tempn) < 15){ +print("Warning: Winsorizing with sample sizes") +print("less than 15 can result in poor control") +print("over the probability of a Type I error") +} +for (j in 1:J){ +x[[j]]<-winval(x[[j]],win) +} +} +if(is.na(crit)){ +if(d==1)crit<-alpha/2 +if(d==2 && alpha==.05 && nboot==1000)crit<-.014 +if(d==2 && alpha==.05 && nboot==2000)crit<-.014 +if(d==3 && alpha==.05 && nboot==1000)crit<-.009 +if(d==3 && alpha==.05 && nboot==2000)crit<-.0085 +if(d==3 && alpha==.025 && nboot==1000)crit<-.004 +if(d==3 && alpha==.025 && nboot==2000)crit<-.004 +if(d==3 && alpha==.01 && nboot==1000)crit<-.001 +if(d==3 && alpha==.01 && nboot==2000)crit<-.001 +if(d==4 && alpha==.05 && nboot==2000)crit<-.007 +if(d==5 && alpha==.05 && nboot==2000)crit<-.006 +if(d==6 && alpha==.05 && nboot==1000)crit<-.004 +if(d==6 && alpha==.05 && nboot==2000)crit<-.0045 +if(d==6 && alpha==.025 && nboot==1000)crit<-.002 +if(d==6 && alpha==.025 && nboot==2000)crit<-.0015 +if(d==6 && alpha==.01 && nboot==2000)crit<-.0005 +if(d==10 && alpha==.05 && nboot<=2000)crit<-.002 +if(d==10 && alpha==.05 && nboot==3000)crit<-.0023 +if(d==10 && alpha==.025 && nboot<=2000)crit<-.0005 +if(d==10 && alpha==.025 && nboot==3000)crit<-.001 +if(d==15 && alpha==.05 && nboot==2000)crit<-.0016 +if(d==15 && alpha==.025 && nboot==2000)crit<-.0005 +if(d==15 && alpha==.05 && nboot==5000)crit<-.0026 +if(d==15 && alpha==.025 && nboot==5000)crit<-.0006 +} +if(is.na(crit) && alpha==.05)crit<-0.0268660714*(1/d)-0.0003321429 +if(is.na(crit))crit<-alpha/(2*d) +if(d> 10 && nboot <5000){ +print("Warning: Suggest using nboot=5000") +print("when the number of contrasts exceeds 10.") +} +icl<-round(crit*nboot)+1 +icu<-round((1-crit)*nboot) +if(sum(con^2)==0){ +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c("con.num","psihat","se","ci.lower", +"ci.upper","p-value")) +if(nrow(con)!=length(x)){ +print("The number of groups does not match") +stop("the number of contrast coefficients.") +} +bvec<-matrix(NA,nrow=J,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +paste("Working on group ",j) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group +} +test<-NA +for (d in 1:ncol(con)){ +top<-0 +for (i in 1:J){ +top<-top+con[i,d]*bvec[i,] +} +test[d]<-(sum(top>0)+.5*sum(top==0))/nboot +test[d]<-min(test[d],1-test[d]) +top<-sort(top) +psihat[d,4]<-top[icl] +psihat[d,5]<-top[icu] +} +for (d in 1:ncol(con)){ +psihat[d,1]<-d +testit<-lincon(x,con[,d],tr,pr=FALSE) +psihat[d,6]<-2*test[d] +psihat[d,2]<-testit$psihat[1,2] +psihat[d,3]<-testit$test[1,4] +} +list(psihat=psihat,crit.p.value=2*crit,con=con) +} + +comvar2d<-function(x,y,SEED=TRUE){ +# +# Compare the variances of two dependent groups. +# +nboot<-599 +m<-cbind(x,y) +m<-elimna(m) # Remove missing values +U<-m[,1]-m[,2] +V<-m[,1]+m[,2] +ci<-pcorb(U,V,SEED=SEED)$ci +list(n=nrow(m),ci=ci) +} +mom<-function(x,bend=2.24,na.rm=TRUE){ +# +# Compute MOM-estimator of location. +# The default bending constant is 2.24 +# +if(na.rm)x<-x[!is.na(x)] #Remove missing values +flag1<-(x>median(x)+bend*mad(x)) +flag2<-(xnull.value)+.5*mean(bvec==null.value) +pv=2*min(c(pv,1-pv)) +list(ci=c(bvec[low],bvec[up]),p.value=pv,est.mom=est) +} + + +rmanogsub<-function(isub,x,est=onestep,...){ +tsub <- est(x[isub],...) +tsub +} + +bd1way1<-function(isub,xcen,est,misran,...){ +# +# Compute test statistic for bd1way +# +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# xcen is an n by J matrix containing the input data +# +val<-vector("numeric") +for (j in 1:ncol(xcen))val[j]<-est(xcen[isub,j],na.rm=misran,...) +bd1way1<-(length(val)-1)*var(val) +bd1way1 +} + + +bicovm<-function(x){ +# +# compute a biweight midcovariance matrix for the vectors of +# observations in x, where x is assumed to have list mode, or +# x is an n by p matrix +# +if(is.matrix(x)){ +mcov<-matrix(0,ncol(x),ncol(x)) +mcor<-matrix(0,ncol(x),ncol(x)) +for (i in 1:ncol(x)){ +for (j in 1:ncol(x))mcov[i,j]<-bicov(x[,i],x[,j]) +} +} +if(is.list(x)){ +mcov<-matrix(0,length(x),length(x)) +mcor<-matrix(0,length(x),length(x)) +for (i in 1:length(x)){ +for (j in 1:length(x))mcov[i,j]<-bicov(x[[i]],x[[j]]) +} +} +for (i in 1:ncol(mcov)){ +for (j in 1:ncol(mcov))mcor[i,j]<-mcov[i,j]/sqrt(mcov[i,i]*mcov[j,j]) +} +list(mcov=mcov,mcor=mcor) +} + +bicovM<-function(x){ +M=bicovm(x)$mcov +M +} + +apdis<-function(m,est=sum,...){ +# +# For bivariate data, +# compute distance between each pair +# of points and measure depth of a point +# in terms of its distance to all +# other points +# +# m is an n by 2 matrix +# (In this version, ncol(m)=2 only, for general +# case, use apgdis +# +m<-elimna(m) # eliminate any missing values +disx<-outer(m[,1],m[,1],"-") +disy<-outer(m[,2],m[,2],"-") +temp<-sqrt(disx^2+disy^2) +dis<-apply(temp,1,est,...) +dis +temp2<-order(dis) +center<-m[temp2[1],] +list(center=center,distance=dis) +} + +onesampb<-function(x,est=onestep,alpha=.05,nboot=2000,SEED=TRUE,nv=0,null.value=NULL,...){ +# +# Compute a bootstrap, .95 confidence interval for the +# measure of location corresponding to the argument est. +# By default, a one-step +# M-estimator of location based on Huber's Psi is used. +# The default number of bootstrap samples is nboot=500 +# +# nv=null value when computing a p-value +# +if(!is.null(null.value))nv=null.value +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +x=elimna(x) +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,est,...) +bvec<-sort(bvec) +low<-round((alpha/2)*nboot) +up<-nboot-low +low<-low+1 +pv=mean(bvec>nv)+.5*mean(bvec==nv) +pv=2*min(c(pv,1-pv)) +estimate=est(x,...) +list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv) +} + + +pdep<-function(x,y,alpha=.05){ +# +# For two dependent variables, x and y, +# estimate p=P(X.5)pvec[i]<-1-pvec[i] +regci[i,1]<-bsort[ilow] +regci[i,2]<-bsort[ihi] +se[i]<-sqrt(var(bvec[i,])) +} +pvec<-2*pvec +list(regci=regci,p.value=pvec,se=se) +} + + +pbcan<-function(x,nboot=1000,grp=NA,est=onestep,...){ +# +# Test the hypothesis that J independent groups have +# equal measures of location using the percentile bootstrap method. +# in conjunction with a partially centering technique. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to an M-estimator +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are allowed. +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] +x<-xx +} +J<-length(x) +tempn<-0 +vecm<-0 +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +vecm[j]<-est(x[[j]],...) +} +xcen<-list() +flag<-rep(TRUE,J) +for(j in 1:J){ +flag[j]<-FALSE +temp<-mean(vecm[flag]) +xcen[[j]]<-x[[j]]-temp +flag[j]<-T +} +icrit<-round((1-alpha)*nboot) +bvec<-matrix(NA,nrow=J,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +paste("Working on group ",j) +data<-matrix(sample(xcen[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group +} +vvec<-NA +for(j in 1:J){ +vvec[j]<-sum((bvec[j,]-vecm[j])^2)/(nboot-1) +} +dis<-NA +for(i in 1:nboot){ +dis[i]<-sum((bvec[,i]-vecm)^2/vvec) +} +tvec<-sum((0-vecm)^2/vvec) +dis<-sort(dis) +print(tvec) +print(dis[icrit]) +print(vecm) +sig<-1-sum((tvec>=dis))/nboot +list(p.value=sig) +} + +rmaseq<-function(x,est=onestep,alpha=.05,grp=NA,nboot=NA,...){ +# +# Using the percentile bootstrap method, +# test hypothesis that all marginal distributions +# among J dependent groups +# have a common measure of location. +# This is done by using a sequentially rejective method +# of J-1 pairs of groups. +# That is, compare group 1 to group 2, group 2 to group 3, etc. +# +# By default, onestep M-estimator is used. +# +# nboot is the bootstrap sample size. If not specified, a value will +# be chosen depending on the number of groups +# +# x can be an n by J matrix or it can have list mode +# grp can be used to specify a subset of the groups for analysis +# +# the argument ... can be used to specify options associated +# with the argument est. +# +if(!is.list(x) && !is.matrix(x)){ +stop("Data must be stored in a matrix or in list mode.") +} +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))mat[,j]<-x[[j]] +} +if(is.matrix(x))mat<-x +mat<-elimna(mat) # Remove rows with missing values. +J<-ncol(mat) +Jm<-J-1 +con<-matrix(0,ncol=Jm,nrow=J) +for(j in 1:Jm){ +jp<-j+1 +for(k in j:jp){ +con[j,j]<-1 +con[jp,j]<-0-1 +}} +rmmcp(x,est=est,alpha=alpha,con=con,nboot=nboot,...) +} + +rmanog<-function(x,alpha=.05,est=onestep,grp=NA,nboot=NA,...){ +# +# Using the percentile bootstrap method, +# test the hypothesis that all differences among J +# dependent groups have a +# measure of location equal to zero. +# That is, if +# Dij is the difference between ith observations +# in groups j and j+1, +# and Dij has measure of location muj +# the goal is to test +# H0: mu1=mu2=...=0 +# +# By default, an M-estimator is used. +# +# nboot is the bootstrap sample size. If not specified, a value will +# be chosen depending on the number of groups +# +# x can be an n by J matrix or it can have list mode +# grp can be used to specify a subset of the groups for analysis +# +# the argument ... can be used to specify options associated +# with the argument est. +# +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))mat[,j]<-x[[j]] +} +if(is.matrix(x))mat<-x +mat<-elimna(mat) # Remove rows with missing values. +J<-ncol(mat) +Jm<-J-1 +jp<-0 +dif<-matrix(NA,nrow=nrow(mat),ncol=Jm) +for(j in 1:Jm){ +jp<-j+1 +dif[,j]<-mat[,j]-mat[,jp] +} +if(is.na(nboot)){ +nboot<-5000 +if(Jm <= 4)nboot<-1000 +} +print("Taking bootstrap samples. Please wait.") +data <- matrix(sample(nrow(mat), size = nrow(mat) * nboot, replace = T), + nrow = nboot) +bvec <- matrix(NA, ncol = ncol(dif), nrow = nboot) + for(j in 1:ncol(dif)) { + temp <- dif[, j] + bvec[, j] <- apply(data, 1., rmanogsub, temp, est) + } #bvec is an nboot by Jm matrix +testvec<-NA +for(j in 1:Jm){ +testvec[j]<-sum(bvec[,j]>0)/nboot +if(testvec[j] > .5)testvec[j]<-1-testvec[j] +} +critvec<-alpha/c(1:Jm) +#testvec<-2*testvec[order(-1*testvec)] +test<-2*testvec +test.sort<-order(-1*test) +chk<-sum((test.sort <= critvec)) +if(chk > 0)print("Significant difference found") +output<-matrix(0,Jm,6) +dimnames(output)<-list(NULL,c("con.num","psihat","sig","crit.sig","ci.lower","ci.upper")) +tmeans<-apply(dif,2,est,...) +psi<-1 +output[,2]<-tmeans +for (ic in 1:Jm){ +output[ic,1]<-ic +output[ic,3]<-test[ic] +crit<-critvec[ic] +output[test.sort[ic],4]<-crit +} +for(ic in 1:Jm){ +icrit<-output[ic,4] +icl<-round(icrit*nboot/2)+1 +icu<-round((1-icrit/2)*nboot) +temp<-sort(bvec[,ic]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +list(output=output) +} + +ecor<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,outkeep=FALSE,outfun=outmgvf){ +# +# Estimate the explanatory correlation between x and y +# +# It is assumed that x is a vector or a matrix having one column only +xx<-elimna(cbind(x,y)) # Remove rows with missing values +x<-xx[,1] +y<-xx[,2] +x<-as.matrix(x) +if(ncol(x) > 1)stop("x must be a vector or matrix with one column") +flag<-rep(TRUE,nrow(x)) +if(!outkeep){ +temp<-outfun(cbind(x,y))$out.id +flag[temp]<-FALSE +} +coef<-regfun(x,y)$coef +ip<-ncol(x)+1 +yhat<-x %*% coef[2:ip] + coef[1] +if(pcor)epow2<-cor(yhat[flag],y[flag])^2 +if(!pcor)epow2<-corfun(yhat[flag],y[flag])$cor^2 +ecor<-sqrt(epow2)*sign(coef[2]) +ecor +} +ocor<-function(x,y,corfun=pbcor,outfun=outmgvf,pcor=FALSE,plotit=FALSE){ +# +# Compute a correlation when outliers are ignored. +# +xx<-elimna(cbind(x,y)) # Remove rows with missing values +x<-xx[,1] +y<-xx[,2] +flag<-rep(TRUE,length(x)) +temp<-outfun(cbind(x,y),plotit=plotit)$out.id +flag[temp]<-FALSE +if(pcor)ocor<-cor(x[flag],y[flag]) +if(!pcor)ocor<-corfun(x[flag],y[flag])$cor +list(cor=ocor) +} + + +rmdzero<-function(x,est=mom,grp=NA,nboot=500,SEED=TRUE,...){ +# +# Do ANOVA on dependent groups +# using # depth of zero among bootstrap values +# based on difference scores. +# +# The data are assumed to be stored in x in list mode +# or in a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, columns correspond to groups. +# +# grp is used to specify some subset of the groups, if desired. +# By default, all J groups are used. +# +# The default number of bootstrap samples is nboot=500 +# +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))mat[,j]<-x[[j]] +} +if(is.matrix(x))mat<-x +if(!is.na(grp[1])){ +mat<-mat[,grp] +} +mat<-elimna(mat) # Remove rows with missing values. +J<-ncol(mat) +jp<-0 +Jall<-(J^2-J)/2 +dif<-matrix(NA,nrow=nrow(mat),ncol=Jall) +ic<-0 +for(j in 1:J){ +for(k in 1:J){ +if(jcrit,1,0) +id<-vec[chk==1] +keep<-vec[chk==0] +x<-as.matrix(x) +if(plotit && ncol(x)==2){ +plot(x[,1],x[,2],xlab="X",ylab="Y",type="n") +flag<-rep(TRUE,nrow(x)) +flag[id]<-FALSE +points(x[flag,1],x[flag,2]) +if(sum(chk)!=0)points(x[!flag,1],x[!flag,2],pch=outsym) +} +if(SEED) { + assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) +} +list(out.id=id,keep.id=keep,dis=dis,crit=crit) +} + +rundis<-function(x,y,est=onestep,plotit=TRUE,pyhat=FALSE,...){ +# +# Do a smooth where x is discrete with a +# relatively small number of values. +# +temp<-sort(unique(x)) +yhat<-NA +for(i in 1:length(temp)){ +flag<-(temp[i]==x) +yhat[i]<-est(y[flag],...) +} +plot(x,y) +lines(temp,yhat) +output<-"Done" +if(pyhat)output<-yhat +output +} + +bdm<-function(x,grp=NA){ +# +# Perform the Brunner, Dette, Munk rank-based ANOVA +# (JASA, 1997, 92, 1494--1502) +# +# x can be a matrix with columns corresponding to groups +# or it can have list mode. +# +if(is.matrix(x))x<-listm(x) +J<-length(x) +xx<-list() +if(is.na(grp[1]))grp<-c(1:J) +for(j in 1:J)xx[[j]]<-x[[grp[j]]] +Ja<-matrix(1,J,J) +Ia<-diag(1,J) +Pa<-Ia-Ja/J +cona<-Pa +outA<-bdms1(xx,cona) +outA +} +cori<-function(x,y,z,pt=median(z),fr=.8,est=onestep,corfun=pbcor,testit=FALSE, +nboot=599,sm=FALSE,xlab="X",ylab="Y",...){ +# +# Split the data according to whether z is < or > pt, then +# use runmean2g to plot a smooth of the regression +# lines corresponding to these two groups. +# +# If testit=T, the hypothesis of equal correlations is tested using the +# the R function twocor +# +m<-cbind(x,y,z) +m<-elimna(m) +x<-m[,1] +y<-m[,2] +z<-m[,3] +flag<-(z0] +v2<-vec2[vec2>0] +slope<-v1/v2 +tmin<-wrregfun(slope[1],x,y) +ikeep<-1 +for(i in 2:length(slope)){ +tryit<-wrregfun(slope[i],x,y) +if(tryit1){ +for(p in 1:ncol(x)){ +temp[p]<-wsp1reg(x[,p],y)$coef[2] +} +res<-y-x%*%temp +alpha<-median(res) +r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) +tempold<-temp +for(it in 1:iter){ +for(p in 1:ncol(x)){ +r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] +temp[p]<-wsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2] +} +alpha<-median(y-x%*%temp) +if(max(abs(tempold-temp))<.0001)break +tempold<-temp +} +coef<-c(alpha,temp) +res<-y-x%*%temp-alpha +} +list(coef=coef,residuals=res) +} + +mgvar<-function(m,se=FALSE,op=0,cov.fun=covmve,SEED=TRUE){ +# +# Find the center of a scatterplot, add point that +# increases the generalized variance by smallest amount +# continue for all points +# return the generalized variance +# values corresponding to each point. +# The central values and point(s) closest to it get NA +# +# op=0 find central points using pairwise differences +# op!=0 find central points using measure of location +# used by cov.fun +# +# choices for cov.fun include +# covmve +# covmcd +# tbs (Rocke's measures of location +# rmba (Olive's median ball algorithm) +# +if(op==0)temp<-apgdis(m,se=se)$distance +if(op!=0)temp<-out(m,cov.fun=cov.fun,plotit=FALSE,SEED=SEED)$dis +flag<-(temp!=min(temp)) +temp2<-temp +temp2[!flag]<-max(temp) +flag2<-(temp2!=min(temp2)) +flag[!flag2]<-F +varvec<-NA +while(sum(flag)>0){ +ic<-0 +chk<-NA +remi<-NA +for(i in 1:nrow(m)){ +if(flag[i]){ +ic<-ic+1 +chk[ic]<-gvar(rbind(m[!flag,],m[i,])) +remi[ic]<-i +}} +sor<-order(chk) +k<-remi[sor[1]] +varvec[k]<-chk[sor[1]] +flag[k]<-F +} +varvec +} + +outmgv<-function(x,y=NULL,plotit=TRUE,outfun=outbox,se=TRUE,op=1,ndir=1000, +cov.fun=rmba,xlab="X",ylab="Y",SEED=TRUE,STAND=FALSE,...){ +# +# Check for outliers using mgv method +# +# NOTE: if columns of the input matrix are reordered, this can +# have an effect on the results due to rounding error when calling +# the R function eigen. +# +# (Argument STAND is included simply to avoid programming issues when outmgv is called by other functions.) +# +if(is.null(y[1]))m<-x +if(!is.null(y[1]))m<-cbind(x,y) +m=elimna(m) +m=as.matrix(m) +nv=nrow(m) +temp<-mgvar(m,se=se,op=op,cov.fun=cov.fun,SEED=SEED) +temp[is.na(temp)]<-0 +if(ncol(m)==1){ +temp2=outpro(m) +nout=temp2$n.out +keep=temp2$keep +temp2=temp2$out.id +} +if(ncol(m)>1){ +if(ncol(m)==2)temp2<-outfun(temp,...) +if(ncol(m)>2){ +temp2<-outbox(temp,mbox=TRUE,gval=sqrt(qchisq(.975,ncol(m)))) +} +if(plotit && ncol(m)==2){ +x<-m[,1] +y<-m[,2] +plot(x,y,type="n",xlab=xlab,ylab=ylab) +points(x[temp2$keep],y[temp2$keep],pch="*") +if(!is.null(temp2$out.id))points(x[temp2$out.id],y[temp2$out.id],pch="o") + +d=prodepth(m,ndir=ndir,SEED=SEED) +dis=1/d +id.cen=which(d==max(d)) +if(length(id.cen)==1)center=m[id.cen,] +else +center=apply(m[id.cen,],2,mean) +points(center[1],center[2],pch="+") +flag=which(d>=median(d)) +xx<-m[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +nout=0 +if(!is.na(temp2[1]))nout=length(temp2$out.id) +} +list(n=nv,n.out=nout,out.id=temp2$out.id,keep=temp2$keep) +} + +outmgvf<-function(x,y=NA,plotit=TRUE,outfun=outbox,se=TRUE,ndir=1000,SEED=TRUE,...){ +# +# Check for outliers using inward mgv method +# This method is faster than outmgv. +# +if(is.na(y[1]))m<-x +if(!is.na(y[1]))m<-cbind(x,y) +m<-elimna(m) # eliminate any rows with missing values +if(se){ +for(i in 1:ncol(m))m[,i]<-(m[,i]-median(m[,i]))/mad(m[,i]) +} +iflag<-rep(TRUE,nrow(m)) +dval<-0 +for(i in 1:nrow(m)){ +dval[i]<-gvar(m[-i,]) +} +temp2<-outfun(dval,...) +if(plotit && ncol(m)==2){ +flag=which(dval<=median(dval)) +x<-m[,1] +y<-m[,2] +plot(x,y,type="n",xlab="X",ylab="Y") +points(x[temp2$keep],y[temp2$keep],pch='*') +d=prodepth(m,ndir=ndir,SEED=SEED) +dis=1/d +id.cen=which(d==max(d)) +center=apply(m[id,],2,mean) +points(center[1],center[2],pch="+") +flag=which(d>=median(d)) +xx<-m[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +if(!is.null(temp2$out.id))points(x[temp2$out.id],y[temp2$out.id],pch="o") +} +list(n=temp2$n,out.id=temp2$out.id,keep=temp2$keep,out.val=m[temp2$out.id,],depth.values=dval) +} + +epow<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,outkeep=FALSE,outfun=outmgvf,varfun=pbvar,op=TRUE){ +# +# Estimate the explanatory power between x and y +# +xx<-elimna(cbind(x,y)) +pval<-1 +if(is.matrix(x))pval<-ncol(x) +pp<-pval+1 +x<-xx[,1:pval] +y<-xx[,pp] +x<-as.matrix(x) +flag<-rep(TRUE,nrow(x)) +temp<-regfun(x,y) +ip<-ncol(x)+1 +yhat<-y-temp$res +if(!outkeep){ +temp<-outfun(cbind(x,y),plotit=FALSE)$out.id +flag[temp]<-FALSE +} +epow1<-varfun(yhat[flag])/varfun(y[flag]) +if(pcor)epow2<-cor(yhat[flag],y[flag])^2 +if(!pcor)epow2<-corfun(yhat[flag],y[flag])$cor^2 +if(op)est<-epow2 +if(!op)est<-epow1 +est +} + +cmanova<-function(J,K,x,grp=c(1:JK),JK=J*K){ +# +# Perform the Choi and Marden +# multivariate one-way rank-based ANOVA +# (Choi and Marden, JASA, 1997, 92, 1581-1590. +# +# x can be a matrix with columns corresponding to groups +# or it can have list mode. +# +# Have a J by K design with J independent levels and K dependent +# measures +# +# +x=elimna(x) +if(is.matrix(x))x<-listm(x) +xx<-list() +nvec<-NA +jk<-0 +for(j in 1:J){ +for(k in 1:K){ +jk<-jk+1 +xx[[jk]]<-x[[grp[jk]]] +if(k==1)nvec[j]<-length(xx[[jk]]) +}} +N<-sum(nvec) +RVALL<-matrix(0,nrow=N,K) +x<-xx +jk<-0 +rmean<-matrix(NA,nrow=J,ncol=K) +for(j in 1:J){ +RV<-matrix(0,nrow=nvec[j],ncol=K) +jk<-jk+1 +temp1<-matrix(x[[jk]],ncol=1) +for(k in 2:K){ +jk<-jk+1 +temp1<-cbind(temp1,x[[jk]]) +} +X<-temp1 +if(j==1)XALL<-X +if(j>1)XALL<-rbind(XALL,X) +n<-nvec[j] +for(i in 1:n){ +for (ii in 1:n){ +temp3<-sqrt(sum((X[i,]-X[ii,])^2)) +if(temp3 != 0)RV[i,]<-RV[i,]+(X[i,]-X[ii,])/temp3 +} +RV[i,]<-RV[i,]/nvec[j] +if(j==1 && i==1)sighat<-RV[i,]%*%t(RV[i,]) +if(j>1 || i>1)sighat<-sighat+RV[i,]%*%t(RV[i,]) +} +} +# Assign ranks to pooled data and compute R bar for each group +for(i in 1:N){ +for (ii in 1:N){ +temp3<-sqrt(sum((XALL[i,]-XALL[ii,])^2)) +if(temp3 != 0)RVALL[i,]<-RVALL[i,]+(XALL[i,]-XALL[ii,])/temp3 +} +RVALL[i,]<-RVALL[i,]/N +} +bot<-1-nvec[1] +top<-0 +for(j in 1:J){ +bot<-bot+nvec[j] +top<-top+nvec[j] +flag<-c(bot:top) +rmean[j,]<-apply(RVALL[flag,],2,mean) +} +sighat<-sighat/(N-J) +shatinv<-solve(sighat) +KW<-0 +for(j in 1:J){ +KW<-KW+nvec[j]*t(rmean[j,])%*%shatinv%*%rmean[j,] +} +df<-K*(J-1) +sig.level<-1-pchisq(KW,df) +list(test.stat=KW[1,1],df=df,p.value=sig.level) +} + + +signt<-function(x,y=NULL,dif=NULL,alpha=.05,method='AC',AUTO=TRUE,PVSD=FALSE){ +# +# Do a sign test on data in x and y +# If y=NA, assume x is a matrix with +# two columns or has list mode. +# +# Returns n, the original sample size +# N, number of paired observations that are not equal to one another. +# phat, an estimate of p, the probability that xnullval || chkit[2]nullval || chkit[2]nullval || chkit[2] 28)qval<-2.383904*connum^.1-.202 +aval<-4*(1-pnorm(qval)) +if(J==2 && K==2)aval<-.05 +if(J==5 && K==2)aval<-2*(1-pnorm(qval)) +if(J==3 && K==2)aval<-3*(1-pnorm(qval)) +if(J==4 && K==2)aval<-3*(1-pnorm(qval)) +if(J==2 && K==3)aval<-3*(1-pnorm(qval)) +for (j in 1:J){ +for (jj in 1:J){ +if(j=80, hochberg's method is used. +# +if(!is.null(y[1]))x<-cbind(x,y) +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +if(is.matrix(con)){ +if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") +}} +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matl(x) +} +if(is.matrix(x) && is.matrix(con)){ +if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") +mat<-x +} +if(is.matrix(x))mat<-x +if(!is.na(sum(grp)))mat<-mat[,grp] +x<-mat +mat<-elimna(mat) # Remove rows with missing values. +x<-mat +J<-ncol(mat) +n=nrow(mat) +if(n>=80)hoch=TRUE +Jm<-J-1 +if(sum(con^2)==0){ +d<-(J^2-J)/2 +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +d<-ncol(con) +if(is.na(nboot)){ +nboot<-5000 +if(d<=10)nboot<-3000 +if(d<=6)nboot<-2000 +if(d<=4)nboot<-1000 +} +n<-nrow(mat) +crit.vec<-alpha/c(1:d) +connum<-ncol(con) +# Create set of differences based on contrast coefficients +xx<-x%*%con +xx<-as.matrix(xx) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +psihat<-matrix(0,connum,nboot) +bvec<-matrix(NA,ncol=connum,nrow=nboot) +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +# data is an nboot by n matrix +if(ncol(xx)==1){ +for(ib in 1:nboot)psihat[1,ib]<-est(xx[data[ib,]],...) +} +if(ncol(xx)>1){ +for(ib in 1:nboot)psihat[,ib]<-apply(elimna(xx[data[ib,],]),2,est,...) +} +# +# Now have an nboot by connum matrix of bootstrap values. +# +test<-1 +icl<-round(alpha*nboot/2)+1 +icu<-nboot-icl-1 +cimat=matrix(NA,nrow=connum,ncol=2) +for (ic in 1:connum){ +test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot +test[ic]<-min(test[ic],1-test[ic]) +temp=sort(psihat[ic,]) +cimat[ic,1]=temp[icl] +cimat[ic,2]=temp[icu] +} +test<-2*test +ncon<-ncol(con) +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvec[2]<-alpha/2 +} +if(hoch)dvec<-alpha/(2*c(1:ncon)) +dvec<-2*dvec +if(plotit && connum==1){ +plot(c(psihat[1,],0),xlab="",ylab="Est. Difference") +points(psihat[1,]) +abline(0,0) +} +temp2<-order(0-test) +ncon<-ncol(con) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) +tmeans<-apply(xx,2,est,...) +psi<-1 +output[temp2,4]<-zvec +for (ic in 1:ncol(con)){ +output[ic,2]<-tmeans[ic] +output[ic,1]<-ic +output[ic,3]<-test[ic] +output[ic,5:6]<-cimat[ic,] +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + + +bdms1<-function(x,con){ +# This function is used by bdm +# +# Pool all data and rank +pool<-x[[1]] +JK<-length(x) +for (j in 2:JK)pool<-c(pool,x[[j]]) +N<-length(pool) +rval<-rank(pool) +rvec<-list() +up<-length(x[[1]]) +rvec[[1]]<-rval[1:up] +rbar<-mean(rvec[[1]]) +nvec<-length(rvec[[1]]) +for(j in 2:JK){ +down<-up+1 +up<-down+length(x[[j]])-1 +rvec[[j]]<-rval[down:up] +nvec[j]<-length(rvec[[j]]) +rbar[j]<-mean(rvec[[j]]) +} +phat<-(rbar-.5)/N +phat<-as.matrix(phat) +svec<-NA +for(j in 1:JK)svec[j]<-sum((rvec[[j]]-rbar[j])^2)/(nvec[j]-1) +svec<-svec/N^2 +VN<-N*diag(svec/nvec) +top<-con[1,1]*sum(diag(VN)) +Ftest<-N*(t(phat)%*%con%*%phat)/top +nu1<-con[1,1]^2*sum(diag(VN))^2/sum(diag(con%*%VN%*%con%*%VN)) +lam<-diag(1/(nvec-1)) +nu2<-sum(diag(VN))^2/sum(diag(VN%*%VN%*%lam)) +sig<-1-pf(Ftest,nu1,nu2) +list(F=Ftest,nu1=nu1,nu2=nu2,q.hat=phat,p.value=sig) +} + +r1mcp<-function(x,alpha=.05,bhop=FALSE){ +# +# Do all pairwise comparisons using a modification of +# the Brunner, Dette and Munk (1997) rank-based method. +# FWE is controlled using Rom's technique. +# +# Setting bhop=T, FWE is controlled using the +# Benjamini-Hochberg Method. +# +# The data are assumed to be stored in x in list mode or in a matrix. +# +# Missing values are automatically removed. +# + if(is.matrix(x))x <- listm(x) + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") +J<-length(x) + for(j in 1:J) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] # Remove missing values + } +# +CC<-(J^2-J)/2 +# Determine critical values +ncon<-CC +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +output<-matrix(0,CC,5) +dimnames(output)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit")) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j < jj){ +ic<-ic+1 +output[ic,1]<-j +output[ic,2]<-jj +temp<-bdm(x[c(j,jj)]) +output[ic,3]<-temp$F +output[ic,4]<-temp$p.value +}}} +temp2<-order(0-output[,4]) +output[temp2,5]<-dvec[1:length(temp2)] +list(output=output) +} + + +tamhane<-function(x,x2=NA,cil=NA,crit=NA){ +# +# First stage of Tamhane's method +# +# x contains first stage data +# x2 contains second stage data +# +# cil is the desired length of the confidence intervals. +# That is, cil is the distance between the upper and lower +# ends of the confidence intervals. +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +J<-length(x) +tempn<-0 +svec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +svec[j]<-var(temp) +} +A<-sum(1/(tempn-1)) +df<-J/A +paste("The degrees of freedom are:",df) +if(is.na(crit))stop("Enter a critical value and reexecute this function") +if(is.na(cil))stop("To proceed, you must specify the length of the confidence intervals.") +d<-(cil/(2*crit))^2 +n.vec<-NA +for(j in 1:J){ +n.vec[j]<-max(tempn[j]+1,floor(svec[j]/d)+1) +} +ci.mat<-NA +if(!is.na(x2[1])){ +if(is.matrix(x2))x2<-listm(x2) +if(!is.list(x2))stop("Data must be stored in list mode or in matrix mode.") +TT<-NA +U<-NA +J<-length(x) +nvec2<-NA +for(j in 1:length(x)){ +nvec2[j]<-length(x2[[j]]) +if(nvec2[j] 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +Fac.A<-matrix(0,CC,5) +dimnames(Fac.A)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit")) +mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j < jj){ +ic<-ic+1 +Fac.A[ic,1]<-j +Fac.A[ic,2]<-jj +temp<-bdm2way(2,K,x[c(mat[j,],mat[jj,])]) +#Fac.A[ic,3]<-temp$outputA$F +#Fac.A[ic,4]<-temp$outputA$sig +Fac.A[ic,3]<-temp$A.F +Fac.A[ic,4]<-temp$p.valueA +}}} +temp2<-order(0-Fac.A[,4]) +Fac.A[temp2,5]<-dvec[1:length(temp2)] +CCB<-(K^2-K)/2 +ic<-0 +Fac.B<-matrix(0,CCB,5) +dimnames(Fac.B)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit")) +for(k in 1:K){ +for(kk in 1:K){ +if(k1){ +for(k in 2:K){ +con1<-push(con1) +con<-cbind(con,con1) +}}} +d<-ncol(con) +if(is.na(nboot)){ +if(d<=4)nboot<-1000 +if(d>4)nboot<-5000 +} +# +# Now take bootstrap samples from jth level +# of Factor A and average K corresponding estimates +# of location. +# +bloc<-matrix(NA,nrow=J,ncol=nboot) +print("Taking bootstrap samples. Please wait.") +mvec<-NA +ik<-0 +for(j in 1:J){ +paste("Working on level ",j," of Factor A") +x<-matrix(NA,nrow=nvec[j],ncol=K) +# +for(k in 1:K){ +ik<-ik+1 +x[,k]<-xx[[ik]] +if(!avg)mvec[ik]<-est(xx[[ik]],...) +} +tempv<-apply(x,2,est,...) +data<-matrix(sample(nvec[j],size=nvec[j]*nboot,replace=TRUE),nrow=nboot) +bvec<-matrix(NA,ncol=K,nrow=nboot) +mat<-listm(x) +for(k in 1:K){ +temp<-x[,k] +bvec[,k]<-apply(data,1,rmanogsub,temp,est,...) # An nboot by K matrix +} +if(avg){ +mvec[j]<-mean(tempv) +bloc[j,]<-apply(bvec,1,mean) +} +if(!avg){ +if(j==1)bloc<-bvec +if(j>1)bloc<-cbind(bloc,bvec) +} +} +if(avg)bloc<-t(bloc) +connum<-d +psihat<-matrix(0,connum,nboot) +test<-1 +for (ic in 1:connum){ +psihat[ic,]<-apply(bloc,1,bptdpsi,con[,ic]) +#test[ic]<-sum((psihat[ic,]>0))/nboot +test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot +test[ic]<-min(test[ic],1-test[ic]) +} +ncon<-ncol(con) +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvec[1]<-alpha/2 +} +temp2<-order(0-test) +ncon<-ncol(con) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.sig","ci.lower","ci.upper")) +tmeans<-mvec +psi<-1 +output[temp2,4]<-zvec +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(psihat[ic,]) +temp3<-round(output[ic,4]*nboot)+1 +icl<-round(dvec[ncon]*nboot)+1 +icu<-nboot-(icl-1) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +output[,3]<-2*output[,3] +output[,4]<-2*output[,4] +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +spmcpi<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),alpha=.05,nboot=NA, +SEED=TRUE,pr=TRUE,SR=FALSE,...){ +# +# Multiple comparisons for interactions +# in a split-plot design. +# The analysis is done by taking difference scores +# among all pairs of dependent groups and +# determining which of +# these differences differ across levels of Factor A. +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} +if(pr)print("As of Sept. 2005, est defaults to tmean") +JK<-J*K +if(JK!=length(x)){ +print("Something is wrong.") +paste(" Expected ",JK," groups but x contains ", length(x), "groups instead.") +stop() +} +MJ<-(J^2-J)/2 +MK<-(K^2-K)/2 +JMK<-J*MK +Jm<-J-1 +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +jp<-1-K +kv<-0 +kv2<-0 +for(j in 1:J){ +jp<-jp+K +xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) +for(k in 1:K){ +kv<-kv+1 +xmat[,k]<-x[[kv]] +} +xmat<-elimna(xmat) +for(k in 1:K){ +kv2<-kv2+1 +x[[kv2]]<-xmat[,k] +}} +xx<-x +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# Next determine the n_j values +nvec<-NA +jp<-1-K +for(j in 1:J){ +jp<-jp+K +nvec[j]<-length(x[[jp]]) +} +# +MJMK<-MJ*MK +con<-matrix(0,nrow=JMK,ncol=MJMK) +cont<-matrix(0,nrow=J,ncol=MJ) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j1){ +for(k in 2:MK){ +con1<-push(con1) +con<-cbind(con,con1) +}} +d<-ncol(con) +if(is.na(nboot)){ +if(d<=4)nboot<-1000 +if(d>4)nboot<-5000 +} +connum<-d +psihat<-matrix(0,connum,nboot) +# +# Now take bootstrap samples from jth level +# of Factor A and average K corresponding estimates +# of location. +# +bloc<-matrix(NA,ncol=J,nrow=nboot) +print("Taking bootstrap samples. Please wait.") +mvec<-NA +it<-0 +for(j in 1:J){ +paste("Working on level ",j," of Factor A") +x<-matrix(NA,nrow=nvec[j],ncol=MK) +# +im<-0 +for(k in 1:K){ +for(kk in 1:K){ +if(k1)bloc<-cbind(bloc,bvec) +} +test<-1 +for (ic in 1:connum){ +psihat[ic,]<-apply(bloc,1,bptdpsi,con[,ic]) +#test[ic]<-sum((psihat[ic,]>0))/nboot +test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot +test[ic]<-min(test[ic],1-test[ic]) +} +ncon<-ncol(con) +dvec<-alpha/c(1:ncon) +if(SR){ +okay=FALSE +if(identical(est,onestep))okay=TRUE +if(identical(est,mom))okay=TRUE +if(!okay)stop('For estimators other than onestep and mom, use SR=FALSE') +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvec[1]<-alpha/2 +}} +temp2<-order(0-test) +ncon<-ncol(con) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) +tmeans<-mvec +psi<-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +output[temp2,4]<-zvec +temp<-sort(psihat[ic,]) +icl<-round(dvec[ncon]*nboot)+1 +icu<-nboot-(icl-1) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +output[,3]<-2*output[,3] +if(SR)output[,4]<-2*output[,4] +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +sppbb<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),nboot=500,SEED=TRUE,pr=TRUE,...){ +# +# A percentile bootstrap for main effects +# among dependent groups in a split-plot design +# The analysis is done based on all pairs +# of difference scores. The null hypothesis is that +# all such differences have a typical value of zero. +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +if(pr)print('As of Oct, 2014, the argument est defaults to tmean') + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} + +JK<-J*K +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +jp<-1-K +kv<-0 +kv2<-0 +for(j in 1:J){ +jp<-jp+K +xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) +for(k in 1:K){ +kv<-kv+1 +xmat[,k]<-x[[kv]] +} +xmat<-elimna(xmat) +for(k in 1:K){ +kv2<-kv2+1 +x[[kv2]]<-xmat[,k] +}} +xx<-x +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# Next determine the n_j values +nvec<-NA +jp<-1-K +for(j in 1:J){ +jp<-jp+K +nvec[j]<-length(x[[jp]]) +} +# +# Now stack the data in an N by K matrix +# +x<-matrix(NA,nrow=nvec[1],ncol=K) +# +for(k in 1:K)x[,k]<-xx[[k]] +kc<-K +for(j in 2:J){ +temp<-matrix(NA,nrow=nvec[j],ncol=K) +for(k in 1:K){ +kc<-kc+1 +temp[,k]<-xx[[kc]] +} +x<-rbind(x,temp) +} +# Now call function rmdzero to do the analysis +temp<-rmdzero(x,est=est,nboot=nboot,...) +list(p.value=temp$p.value,center=temp$center) +} + + +spmcpb<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),dif=TRUE,alpha=.05,SEED=TRUE, +nboot=NA,...){ +# +# A percentile bootstrap for all pairwise +# multiple comparisons +# among dependent groups in a split-plot design +# +# Levels of A are ignored. +# +# If dif=T, the analysis is done based on all pairs +# of difference scores. +# Otherwise, marginal measures of location are used. +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# + if(is.matrix(x) || is.data.frame(x))x=listm(x) +JK<-J*K +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x=data +x=pool.fun(J,K,x) +temp<-rmmcppb(x,est=est,nboot=nboot,dif=dif,alpha=alpha,plotit=FALSE,SEED=SEED,...) +list(output=temp$output,con=temp$con,num.sig=temp$num.sig) +} + + + +bwamcp<-function(J,K,x,tr=.2,JK=J*K,grp=c(1:JK),alpha=.05,op=TRUE){ +# +# All pairwise comparisons among levels of Factor A +# in a split-plot design using trimmed means. +# +# Data among dependent groups are pooled for each level +# of Factor A. +# Then this function calls lincon. +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} + +JK<-J*K +if(!op){ +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +data<-list() +jp<-1-K +kv<-0 +for(j in 1:J){ +jp<-jp+K +for(k in 1:K){ +kv<-kv+1 +if(k==1)temp<-x[[jp]] +if(k>1)temp<-c(temp,x[[kv]]) +} +data[[j]]<-temp +} +print("Group numbers refer to levels of Factor A") +temp<-lincon(data,tr=tr,alpha=alpha) +} +if(op){ +MJK<-K*(J^2-J)/2 # NUMBER OF COMPARISONS +JK<-J*K +MJ<-(J^2-J)/2 +cont<-matrix(0,nrow=J,ncol=MJ) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j1){ +for(k in 2:K){ +con1<-push(con1) +con<-cbind(con,con1) +}} +print("Contrast Matrix Used:") +print(con) +temp<-lincon(x,con=con,tr=tr,alpha=alpha) +} +temp +} + +pcor<-function(x,y=NA){ +if(!is.na(y[1]))temp<-wincor(x,y,tr=0) +if(is.na(y[1]))temp<-winall(x,tr=0) +list(cor=temp$cor,p.value=temp$p.value) +} + +apgdis<-function(m,est=sum,se=TRUE,...){ +# +# For multivariate data, +# compute distance between each pair +# of points and measure depth of a point +# in terms of its distance to all +# other points +# +# Using se=T ensures that ordering of distance +# will not change with a change in scale. +# +# m is an n by p matrix +# +m<-elimna(m) # eliminate any missing values +temp<-0 +if(se){ +for(j in 1:ncol(m))m[,j]<-(m[,j]-median(m[,j]))/mad(m[,j]) +} +for(j in 1:ncol(m)){ +disx<-outer(m[,j],m[,j],"-") +temp<-temp+disx^2 +} +temp<-sqrt(temp) +dis<-apply(temp,1,est,...) +temp2<-order(dis) +center<-m[temp2[1],] +list(center=center,distance=dis) +} + + +rd2plot<-function(x,y,fr=.8,xlab="",ylab=""){ +# +# Expected frequency curve +# for two groups. +# +# fr controls amount of smoothing +x<-elimna(x) +y<-elimna(y) +rmdx<-NA +rmdy<-NA +for(i in 1:length(x)){ +rmdx[i]<-sum(near(x,x[i],fr)) +} +for(i in 1:length(y)){ +rmdy[i]<-sum(near(y,y[i],fr)) +} +rmdx<-rmdx/length(x) +rmdy<-rmdy/length(y) +plot(c(x,y),c(rmdx,rmdy),type="n",ylab=ylab,xlab=xlab) +sx<-sort(x) +xorder<-order(x) +sysm<-rmdx[xorder] +lines(sx,sysm) +sy<-sort(y) +yorder<-order(y) +sysm<-rmdy[yorder] +lines(sy,sysm,lty=2) +} + +depth2<-function(x,pts=NA,plotit=TRUE,xlab="VAR 1",ylab="VAR 2"){ +# +# Compute exact depths for bivariate data +if(ncol(x)!=2)stop("x must be a matrix with 2 columns") +x<-elimna(x) +if(is.na(pts[1]))pts<-x +if(ncol(pts)!=2)stop("Argument pts must be stored as a matrix with 2 columns") +pts<-as.matrix(pts) +ndepth<-NA +for(i in 1:nrow(pts)){ +ndepth[i]<-depth(pts[i,1],pts[i,2],x) +} +if(plotit){ +m<-x +plot(m,xlab=xlab,ylab=ylab) +flag<-(ndepth==max(ndepth)) +if(sum(flag)==1)center<-m[flag,] +if(sum(flag)>1)center<-apply(m[flag,],2,mean) +points(center[1],center[2],pch="+") +temp<-ndepth +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +ndepth +} + +fdepth<-function(m,pts=NA,plotit=TRUE,cop=3,center=NA,xlab="VAR 1", +ylab="VAR 2"){ +# +# Determine depth of points in pts, relative to +# points in m. If pts is not specified, +# depth of all points in m are determined. +# +# m and pts can be vectors or matrices with +# p columns (the number of variables). +# +# Determine center, for each point, draw a line +# connecting it with center, project points onto this line +# and determine depth of the projected points. +# The final depth of a point is its minimum depth +# among all projections. +# +# plotit=TRUE creates a scatterplot when working with +# bivariate data and pts=NA +# +# There are three options for computing the center of the +# cloud of points when computing projections, assuming center=NA: +# +# cop=2 uses MCD center +# cop=3 uses median of the marginal distributions. +# cop=4 uses MVE center +# +# If a value for center is passed to this function, +# this value is used to determine depths. +# +# When plotting, +# center is marked with a cross, +. +# +library(MASS) +if(cop!=2 && cop!=3 && cop!=4)stop("Only cop=2, 3 or 4 is allowed") +if(is.list(m))stop("Store data in a matrix; might use function listm") +m<-as.matrix(m) +pts<-as.matrix(pts) +if(!is.na(pts[1]))remm<-m +nm<-nrow(m) +nm1<-nm+1 +if(!is.na(pts[1])){ +if(ncol(m)!=ncol(pts))stop("Number of columns of m is not equal to number of columns for pts") +} +m<-elimna(m) # Remove missing values +m<-as.matrix(m) +if(ncol(m)==1)dep<-unidepth(as.vector(m[,1]),pts=pts) +if(ncol(m)>1){ +if(is.na(center[1])){ +if(cop==2){ +center<-cov.mcd(m)$center +} +if(cop==4){ +center<-cov.mve(m)$center +} +if(cop==3){ +center<-apply(m,2,median) +}} +if(is.na(pts[1])){ +mdep <- matrix(NA,nrow=nrow(m),ncol=nrow(m)) +} +if(!is.na(pts[1])){ +mdep <- matrix(NA,nrow=nrow(m),ncol=nrow(pts)) +} +for (i in 1:nrow(m)){ +B<-m[i,]-center +dis<-NA +BB<-B^2 +bot<-sum(BB) +if(bot!=0){ +if(is.na(pts[1])){ +for (j in 1:nrow(m)){ +A<-m[j,]-center +temp<-sum(A*B)*B/bot +dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2)) +}} +if(!is.na(pts[1])){ +m<-rbind(remm,pts) +for (j in 1:nrow(m)){ +A<-m[j,]-center +temp<-sum(A*B)*B/bot +dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2)) +}} +# +# For ith projection, store depths of +# points in mdep[i,] +# +if(is.na(pts[1]))mdep[i,]<-unidepth(dis) +if(!is.na(pts[1])){ +mdep[i,]<-unidepth(dis[1:nm],dis[nm1:nrow(m)]) +}} +if(bot==0)mdep[i,]<-rep(0,ncol(mdep)) +} +dep<-apply(mdep,2,min) +if(ncol(m)==2 && is.na(pts[1])){ +flag<-chull(m) +dep[flag]<-min(dep) +} +} +if(ncol(m)==2){ +if(is.na(pts[1]) && plotit){ +plot(m,xlab=xlab,ylab=ylab) +points(center[1],center[2],pch="+") +x<-m +temp<-dep +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +}} +dep<-round(dep*nrow(m))/nrow(m) +dep +} + +unidepth<-function(x,pts=NA){ +# +# Determine depth of points in the vector x +# +if(!is.vector(x))stop("x should be a vector") +if(is.na(pts[1]))pts<-x +pup<-apply(outer(pts,x,FUN="<="),1,sum)/length(x) +pdown<-apply(outer(pts,x,FUN="<"),1,sum)/length(x) +pdown<-1-pdown +m<-matrix(c(pup,pdown),nrow=2,byrow=TRUE) +dep<-apply(m,2,min) +dep +} + +opreg<-function(x,y,regfun=tsreg,cop=3,MC=FALSE,varfun=pbvar,corfun=pbcor,STAND=TRUE,xout=FALSE){ +# +# Do regression on points not labled outliers +# using projection-type outlier detection method +# +# Note: argument xout is not relevant here, but is included to avoid conflicts when using regci. +# +if(MC)library(parallel) +x<-as.matrix(x) +m<-cbind(x,y) +m<-elimna(m) # eliminate any rows with missing data +if(!MC)ivec<-outpro(m,plotit=FALSE,cop=cop,STAND=STAND)$keep +if(MC)ivec<-outproMC(m,plotit=FALSE,cop=cop,STAND=STAND)$keep +np1<-ncol(x)+1 +coef<-regfun(m[ivec,1:ncol(x)],m[ivec,np1])$coef +vec<-rep(1,length(y)) +residuals<-y-cbind(vec,x)%*%coef +stre=NULL +yhat<-y-residuals +e.pow<-varfun(yhat)/varfun(y) +if(!is.na(e.pow)){ +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +stre=sqrt(e.pow) +} +list(coef=coef,residuals=residuals,Strength.Assoc=stre,Explanatory.Power=e.pow) +} + +mgvdep<-function(m,se=FALSE){ +# +# Find the center of a scatterplot, add point that +# increases the generalized variance by smallest amount +# continue for all points +# return the MGV depths. +# +# Essentially the same as mgvar which +# determine MGV distances, only here, +# follow convention that deepest points +# have the largest numerical value. Here +# depth of the deepest values equal one. +# +temp<-apgdis(m,se=se)$distance +icen<-ncol(m) +temp3<-order(temp) +chkit<-sum(duplicated(temp[temp3[1:icen]])) +icen<-icen+chkit +flag<-rep(TRUE,length(temp)) +flag[temp3[1:icen]]<-FALSE +# set duplicated central values to F +varvec<-0 +varvec[!flag]<-NA +while(sum(flag)>0){ +ic<-0 +chk<-NA +remi<-NA +for(i in 1:nrow(m)){ +if(flag[i]){ +ic<-ic+1 +chk[ic]<-gvar(rbind(m[!flag,],m[i,])) +remi[ic]<-i +}} +sor<-order(chk) +k<-remi[sor[1]] +varvec[k]<-chk[sor[1]] +flag[k]<-F +} +varvec[is.na(varvec)]<-0 +varvec<-1/(1+varvec) +varvec +} + + +fdepthv2<-function(m,pts=NA,plotit=TRUE){ +# +# Determine depth of points in pts relative to +# points in m +# +# Draw a line between each pair of distinct points +# and determine depth of the projected points. +# The final depth of a point is its minimum depth +# among all projections. +# +# This function is slower than fdepth and requires +# space for a nc by nc matrix, nc=(n^2-n)/2. +# But it allows +# data to have a singular covariance matrix +# and it provides a more accurate approximation of +# halfspace depth. +# +# plotit=TRUE creates a scatterplot when working with +# bivariate data and pts=NA +# +# When plotting, +# center is marked with a cross, +. +# +m<-elimna(m) # Remove missing values +if(!is.na(pts[1]))remm<-m +if(!is.matrix(m))dep<-unidepth(m) +if(is.matrix(m)){ +nm<-nrow(m) +nt<-nm +nm1<-nm+1 +if(!is.na(pts[1])){ +if(ncol(m)!=ncol(pts))stop("Number of columns of m is not equal to number of columns for pts") +nt<-nm+nrow(pts) +}} +if(ncol(m)==1)depth<-unidepth(m) +if(ncol(m)>1){ +m<-elimna(m) # Remove missing values +nc<-(nrow(m)^2-nrow(m))/2 +if(is.na(pts[1]))mdep <- matrix(0,nrow=nc,ncol=nrow(m)) +if(!is.na(pts[1])){ +mdep <- matrix(0,nrow=nc,ncol=nrow(pts)) +} +ic<-0 +for (iall in 1:nm){ +for (i in 1:nm){ +if(iall < i){ +ic<-ic+1 +B<-m[i,]-m[iall,] +dis<-NA +BB<-B^2 +bot<-sum(BB) +if(bot!=0){ +if(is.na(pts[1])){ +for (j in 1:nrow(m)){ +A<-m[j,]-m[iall,] +temp<-sum(A*B)*B/bot +dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2)) +}} +if(!is.na(pts[1])){ +m<-rbind(remm,pts) +for (j in 1:nrow(m)){ +A<-m[j,]-m[iall,] +temp<-sum(A*B)*B/bot +dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2)) +}} +# +# For ic_th projection, store depths of +# points in mdep[ic,] +# +if(is.na(pts[1]))mdep[ic,]<-unidepth(dis) +if(!is.na(pts[1])){ +mdep[ic,]<-unidepth(dis[1:nm],dis[nm1:nrow(m)]) +}} +if(bot==0)mdep[ic,]<-rep(0,ncol(mdep)) +}}} +dep<-apply(mdep,2,min) +} +if(ncol(m)==2 &&is.na(pts[1])){ +flag<-chull(m) +dep[flag]<-min(dep) +} +if(ncol(m)==2){ +if(is.na(pts[1]) && plotit){ +plot(m) +x<-m +temp<-dep +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +}} +dep +} + +g2plot<-function(x1,x2,op=4,rval=15,fr=.8,aval=.5,xlab="X",ylab=""){ +# +# plot estimates of the density functions for two groups. +# +# op=1: Use Rosenblatt shifted histogram +# +# op=2: +# Use kernel density estimate +# Using the built-in S+ function density, +# +# op=3: Use expected frequency curve. +# +# op=4: Use adaptive kernel estimator +# +x1<-elimna(x1) +x2<-elimna(x2) +if(op==3){ +rd2plot(x1,x2,fr=fr,xlab=xlab,ylab=ylab) +print("Might consider using op=4 if graph is ragged") +} +if(op==2){ +tempx<-density(x1,na.rm=TRUE,kernel="epanechnikov") +tempy<-density(x2,na.rm=TRUE,kernel="epanechnikov") +plot(c(tempx$x,tempy$x),c(tempx$y,tempy$y),type="n",xlab=xlab,ylab=ylab) +lines(tempx$x,tempx$y) +lines(tempy$x,tempy$y,lty=2) +} +if(op==1){ + y1 <- sort(x1) + z1 <- 1 + z2 <- 1 + par(yaxt = "n") + temp <- floor(0.01 * length(x1)) + if(temp == 0) + temp <- 5 + ibot <- y1[temp] + itop <- y1[floor(0.99 * length(x1))] + xaxis1 <- seq(ibot, itop, length = rval) + for(i in 1:rval) + z1[i] <- kerden(x1, 0, xaxis1[i]) + y2 <- sort(x2) + temp <- floor(0.01 * length(x2)) + if(temp == 0) + temp <- 5 + ibot <- y2[temp] + itop <- y2[floor(0.99 * length(x2))] + xaxis2 <- seq(ibot, itop, length = rval) + for(i in 1:rval) + z2[i] <- kerden(x2, 0, xaxis2[i]) +plot(c(xaxis1,xaxis2),c(z1,z2), xlab =xlab, ylab =ylab, type = "n") +lines(xaxis1,z1) +lines(xaxis2,z2,lty=2) +} +if(op==4){ +x1<-sort(x1) +x2<-sort(x2) +z1<-akerd(x1,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) +z2<-akerd(x2,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) +plot(c(x1,x2),c(z1,z2), xlab =xlab, ylab =ylab, type = "n") +lines(x1,z1) +lines(x2,z2,lty=2) +} +} + +mulwmw<-function(m1,m2,plotit=TRUE,cop=3,alpha=.05,nboot=1000,pop=4,fr=.8,pr=FALSE,SEED=TRUE,tr=.5,NC=TRUE){ +# +# +# Determine center correpsonding to two +# independent groups, project all points onto line +# connecting the centers, +# then based on the projected distances, +# estimate p=probability that a randomly sampled +# point from group 1 is less than a point from group 2 +# based on the projected distances. +# +# plotit=TRUE creates a plot of the projected data +# pop=1 plot two dotplots based on projected distances +# pop=2 boxplots +# pop=3 expected frequency curve. +# pop=4 adaptive kernel density +# +# There are three options for computing the center of the +# cloud of points when computing projections: +# cop=1 uses Donoho-Gasko median +# cop=2 uses MCD center +# cop=3 uses median of the marginal distributions. +# +# When using cop=2 or 3, default critical value for outliers +# is square root of the .975 quantile of a +# chi-squared distribution with p degrees +# of freedom. +# +# NC=F: critical values not computed +# +# Donoho-Gasko (Tukey) median is marked with a cross, +. +# +if(is.null(dim(m1))||dim(m1)[2]<2){print("Data are assumed to be stored in") +print(" a matrix or data frame having two or more columns.") +stop(" For univariate data, use the function outbox or out") +} +m1<-elimna(m1) # Remove missing values +m2<-elimna(m2) +n1=nrow(m1) +n2=nrow(m2) +if(cop==1){ +if(ncol(m1)>2){ +center1<-dmean(m1,tr=.5) +center2<-dmean(m2,tr=.5) +} +if(ncol(m1)==2){ +tempd<-NA +for(i in 1:nrow(m1)) +tempd[i]<-depth(m1[i,1],m1[i,2],m1) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center1<-m1[flag,] +if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) +for(i in 1:nrow(m2)) +tempd[i]<-depth(m2[i,1],m2[i,2],m2) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center2<-m2[flag,] +if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) +}} +if(cop==2){ +center1<-cov.mcd(m1)$center +center2<-cov.mcd(m2)$center +} +if(cop==3){ +center1<-apply(m1,2,mean,tr=tr) +center2<-apply(m2,2,mean,tr=tr) +} +if(cop==4){ +center1<-smean(m1) +center2<-smean(m2) +} +center<-(center1+center2)/2 +B<-center1-center2 +if(sum(center1^2)2){ +center1<-dmean(m1,tr=.5) +center2<-dmean(m2,tr=.5) +} +if(ncol(m1)==2){ +tempd<-NA +for(i in 1:nrow(m1)) +tempd[i]<-depth(m1[i,1],m1[i,2],m1) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center1<-m1[flag,] +if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) +for(i in 1:nrow(m2)) +tempd[i]<-depth(m2[i,1],m2[i,2],m2) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center2<-m2[flag,] +if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) +}} +if(cop==2){ +center1<-cov.mcd(m1)$center +center2<-cov.mcd(m2)$center +} +if(cop==3){ +center1<-apply(m1,2,median) +center2<-apply(m2,2,median) +} +center<-(center1+center2)/2 +B<-center1-center2 +if(sum(center1^2)>sum(center2^2))B<-(0-1)*B +BB<-B^2 +bot<-sum(BB) +disx<-NA +disy<-NA +if(bot!=0){ +for (j in 1:nrow(m1)){ +AX<-m1[j,]-center +tempx<-sum(AX*B)*B/bot +disx[j]<-sign(sum(AX*B))*sqrt(sum(tempx^2)) +} +for (j in 1:nrow(m2)){ +AY<-m2[j,]-center +tempy<-sum(AY*B)*B/bot +disy[j]<-sign(sum(AY*B))*sqrt(sum(tempy^2)) +}} +m<-outer(disx,disy,FUN="-") +m<-sign(m) +val[it]<-(1-mean(m))/2 +if(bot==0)val[it]<-.5 +if(pr)print(paste("Iteration ",it," of ",iter," complete")) +} +val<-sort(val) +low<-round(alpha*iter/2)+1 +up<-iter-low +crit<-NA +crit[1]<-val[low] +crit[2]<-val[up] +crit +} + + +dmean<-function(m,tr=.2,dop=1,cop=2){ +# +# Compute multivariate measure of location +# using Donoho-Gasko method. +# +# dop=1, use fdepth to compute depths +# dop=2, use fdepthv2 to compute depths +# +# cop=1, Tukey median; can't be used here. +# cop=2, use MCD in fdepth +# cop=3, use marginal medians in fdepth +# cop=4, use MVE in fdepth +# +if(is.list(m))m<-matl(m) +if(!is.matrix(m))stop("Data must be stored in a matrix or in list mode.") +if(ncol(m)==1){ +if(tr==.5)val<-median(m) +if(tr>.5)stop("Amount of trimming must be at most .5") +if(tr<.5)val<-mean(m,tr) +} +if(ncol(m)>1){ +temp<-NA +if(ncol(m)!=2){ +# Use approximate depth +if(dop==1)temp<-fdepth(m,plotit=FALSE,cop=cop) +if(dop==2)temp<-fdepthv2(m) +} +# Use exact depth if ncol=2 +if(ncol(m)==2){ +for(i in 1:nrow(m)) +temp[i]<-depth(m[i,1],m[i,2],m) +} +mdep<-max(temp) +flag<-(temp==mdep) +if(tr==.5){ +if(sum(flag)==1)val<-m[flag,] +if(sum(flag)>1)val<-apply(m[flag,],2,mean) +} +if(tr<.5){ +flag2<-(temp>=tr) +if(sum(flag2)==0 && sum(flag)>1)val<-apply(as.matrix(m[flag,]),2,mean) +if(sum(flag2)==0 && sum(flag)==1)val=m[flag,] +if(sum(flag2)==1)val<-m[flag2,] +if(sum(flag2)>1)val<-apply(m[flag2,],2,mean) +}} +val +} + +lsqs2<-function(x,y,MD=FALSE,tr=.05,plotit=TRUE){ +# cf Liu and Singh, JASA 1993, 252-260 +# +if(is.list(x))x<-matl(x) +if(is.list(y))y<-matl(y) +disyx<-NA # depth of y in x +disxy<-NA # depth of x in y +if(!is.matrix(x) && !is.matrix(y)){ +x<-x[!is.na(x)] +y<-y[!is.na(y)] +# +tempxx<-NA +for(i in 1:length(x)){ +tempxx[i]<-sum(x[i]<=x)/length(x) +if(tempxx[i]>.5)tempxx[i]<-1-tempxx[i] +} +for(i in 1:length(x)){ +temp<-sum(x[i]<=y)/length(y) +if(temp>.5)temp<-1-temp +disxy[i]<-mean(temp>tempxx) +} +tempyy<-NA +for(i in 1:length(y)){ +tempyy[i]<-sum(y[i]<=y)/length(y) +if(tempyy[i]>.5)tempyy[i]<-1-tempyy[i] +} +for(i in 1:length(y)){ +temp<-sum(y[i]<=x)/length(x) +if(temp>.5)temp<-1-temp # depth of y_i in x +disyx[i]<-mean(temp>tempyy) +} +qhatxy<-mean(disyx) +qhatyx<-mean(disxy) +qhat<-(qhatxy+qhatyx)/2 +} +if(is.matrix(x) && is.matrix(x)){ +if(!MD){ +if(ncol(x)!=2 || ncol(y)!=2){ +# Use approximate depth +tempyy<-fdepth(y) +temp<-fdepth(y,x) +for(i in 1:nrow(x)){ +disxy[i]<-mean(temp[i]>tempyy) +} +tempxx<-NA +tempxx<-fdepth(x) +temp<-fdepth(x,pts=y) +for(i in 1:nrow(y)){ +disyx[i]<-mean(temp[i]>tempxx) +}} +if(ncol(x)==2 && ncol(y)==2){ +if(plotit){ +plot(rbind(x,y),type="n",xlab="Var 1",ylab="VAR 2") +points(x) +points(y,pch="o") +temp<-NA +for(i in 1:nrow(x)){ +temp[i]<-depth(x[i,1],x[i,2],x) +} +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +temp<-NA +for(i in 1:nrow(y)){ +temp[i]<-depth(y[i,1],y[i,2],y) +} +flag<-(temp>=median(temp)) +xx<-y[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +flag<-(temp>=median(temp)) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,],lty=2) +lines(xx[c(temp[1],temp[length(temp)]),],lty=2) +} +tempyy<-NA +for(i in 1:nrow(y))tempyy[i]<-depth(y[i,1],y[i,2],y) +for(i in 1:nrow(x)){ +temp<-depth(x[i,1],x[i,2],y) +disxy[i]<-mean(temp>tempyy) +} +tempxx<-NA +for(i in 1:nrow(x))tempxx[i]<-depth(x[i,1],x[i,2],x) +for(i in 1:nrow(y)){ +temp<-depth(y[i,1],y[i,2],x) +disyx[i]<-mean(temp>tempxx) +} +}} +if(MD){ +mx<-apply(x,2,median) +my<-apply(y,2,median) +vx<-apply(x,2,winval,tr=tr)-apply(x,2,mean,trim=tr)+mx +vx<-var(vx) +vy<-apply(y,2,winval,tr=tr)-apply(y,2,mean,trim=tr)+my +vy<-var(vy) +tempxx<-1/(1+mahalanobis(x,mx,vx)) +tempyx<-1/(1+mahalanobis(y,mx,vx)) +for(i in 1:nrow(y)){ +disyx[i]<-mean(tempyx[i]>tempxx) +} +tempyy<-1/(1+mahalanobis(y,my,vy)) +tempxy<-1/(1+mahalanobis(x,my,vy)) +for(i in 1:nrow(x)){ +disxy[i]<-mean(tempxy[i]>tempyy) +} +} +qhatxy<-sum(disxy) +qhatyx<-sum(disyx) +qhat<-(qhatxy+qhatyx)/(length(disxy)+length(disyx)) +} +qhatyx<-mean(disyx) +qhatxy<-mean(disxy) +list(qhatxy,qhatyx,qhat) +} + +depthg2<-function(x,y,alpha=.05,nboot=500,MD=FALSE,plotit=TRUE,op=FALSE,fast=FALSE,SEED=TRUE, +xlab="VAR 1",ylab="VAR 2"){ +# +# Compare two independent groups based on p measures +# for each group. +# +# The method is based on Tukey's depth if MD=F; +# otherwise the Mahalanobis depth is used. +# If p>2, then Mahalanobis depth is used automatically +# +# The method is designed to be sensitive to differences in scale +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +x=elimna(x) +y=elimna(y) +x=as.matrix(x) +y=as.matrix(y) +if(is.matrix(x) && is.matrix(y)){ # YES, code is odd. +nv1<-nrow(x) +nv2<-nrow(y) +if(ncol(x)!=ncol(y))stop("Number of columns of x is not equal to number for y") +if(ncol(x) >2)MD<-T +if(ncol(x)==2 && plotit){ +plot(rbind(x,y),type="n",xlab=xlab,ylab=ylab) +points(x,pch="*") +points(y,pch="o") +temp<-NA +for(i in 1:nrow(x)){ +temp[i]<-depth(x[i,1],x[i,2],x) +} +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +temp<-NA +for(i in 1:nrow(y)){ +temp[i]<-depth(y[i,1],y[i,2],y) +} +flag<-(temp>=median(temp)) +xx<-y[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +flag<-(temp>=median(temp)) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,],lty=2) +lines(xx[c(temp[1],temp[length(temp)]),],lty=2) +} +print("Taking bootstrap samples. Please wait.") +data1<-matrix(sample(nv1,size=nv1*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(nv2,size=nv2*nboot,replace=TRUE),nrow=nboot) +qhatd<-NA +dhatb<-NA +for(ib in 1:nboot){ +if(op)print(paste("Bootstrap sample ",ib," of ",nboot, "is complete.")) +if(!fast)temp<-lsqs2(x[data1[ib,],],y[data2[ib,],],plotit=FALSE,MD=MD) +if(fast)temp<-lsqs2.for(x[data1[ib,],],y[data2[ib,],],plotit=FALSE,MD=MD) +qhatd[ib]<-temp[[1]]-temp[[2]] +} +temp<-sort(qhatd) +lv<-round(alpha*nboot/2) +uv<-nboot-lv +difci<-c(temp[lv+1],temp[uv]) +} +# +if(!is.matrix(x) && !is.matrix(y)){ +nv1<-length(x) +nv2<-length(y) +print("Taking bootstrap samples. Please wait.") +data1<-matrix(sample(nv1,size=nv1*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(nv2,size=nv2*nboot,replace=TRUE),nrow=nboot) +qhatd<-NA +dhatb<-NA +for(ib in 1:nboot){ +if(!fast)temp<-lsqs2(x[data1[ib,]],y[data2[ib,]],plotit=FALSE,MD=MD) +if(fast)temp<-lsqs2.for(x[data1[ib,]],y[data2[ib,]],plotit=FALSE,MD=MD) +qhatd[ib]<-temp[[1]]-temp[[2]] +dhatb[ib]<-(temp[[1]]+temp[[2]])/2 +}} +temp<-sort(qhatd) +temp2<-sort(dhatb) +lv<-round(alpha*nboot/2) +uv<-nboot-lv +difci<-c(temp[lv+1],temp[uv]) +list(difci=difci) +} + +hochberg<- +function(x,x2=NA,cil=NA,con=0,tr=.2,alpha=.05){ +# +# A generalization of Hochberg's two-stage method +# method to trimmed mean# +# +# THIS FUNCTION WAS UPDATED FEB., 2024. IT NOW HAS A MORE CONVENIENT AND +# SLIGHTLY MORE ACCURATE METHOD FOR +# COMPUTING THE CRITICAL VALUE; NO NEED TO USE TABLES AS BEFORE. +# +# x contains first stage data +# x2 contains second stage data +# +# cil is the desired length of the confidence intervals. +# That is, cil is the distance between the upper and lower +# ends of the confidence intervals. +# +x3<-x2 +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +J<-length(x) +tempn<-0 +svec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +svec[j]<-winvar(temp,tr=tr)/(1-2*tr)^2 +} +tempt<-floor((1-2*tr)*tempn) +A<-sum(1/(tempt-1)) +df<-J/A +if(!is.list(x2) && !is.matrix(x2)){ +x2<-list() +for(j in 1:J)x2[[j]]<-NA +} +if(is.na(cil))stop("To proceed, you must specify the maximum length of the confidence intervals.") +#crit<-trange(tempn-1,alpha=alpha,iter=iter,SEED=SEED) #OLD CODE +crit=qtukey(1-alpha,J,df) +# +if(con[1] == 0){ + Jm<-J-1 + ncon <- (J^2 - J)/2 + con <- matrix(0, J, ncon) + id <- 0 + for(j in 1:Jm) { + jp <- j + 1 + for(k in jp:J) { + id <- id + 1 + con[j, id] <- 1 + con[k, id] <- 0 - 1 + } + } + } + ncon <- ncol(con) +avec<-NA +for(i in 1:ncon){ +temp<-con[,i] +avec[i]<-sum(temp[temp>0]) +} +dvec<-(cil/(2*crit*avec))^2 +d<-max(dvec) +n.vec<-NA +for(j in 1:J){ +n.vec[j]<-max(tempn[j],floor(svec[j]/d)+1) +print(paste("Need an additional ", n.vec[j]-tempn[j], +" observations for group", j)) +} +# +# Do second stage if data are supplied +# +ci.mat=NULL +if(!is.na(x2[1])){ +if(is.matrix(x2))x2<-listm(x2) +temp2<-n.vec-tempn +#if(!is.list(x3) && !is.matrix(x3) && sum(temp2)>0)stop("No second stage data supplied; this function is terminating") +if(length(x) != length(x2))warning("Number of groups in first stage data does not match the number in the second stage.") +ci.mat<-NA +if(!is.na(x2[1]) || sum(temp2)==0){ +xtil<-NA +nvec2<-NA +for(j in 1:J){ +nvec2[j]<-0 +temp<-x2[[j]] +if(!is.na(temp[1]))nvec2[j]<-length(x2[[j]]) +if(nvec2[j] 0]) +C<-0-sum(bvec[bvec<0]) +D<-max(A,C) +ci.mat[ic,2]<-sum(con[,ic]*xtil)-crit*D +ci.mat[ic,3]<-sum(con[,ic]*xtil)+crit*D +}}} +list(ci.mat=ci.mat,con=con) +} + +trange<-function(dfvec,iter=10000,alpha=.05,SEED=TRUE){ +if(SEED)set.seed(1) +dfv<-length(dfvec)/sum(1/dfvec) +vals<-NA +tvals<-NA +J<-length(dfvec) +for(i in 1:iter){ +for(j in 1:J){ +tvals[j]<-rt(1,dfvec[j]) +} +vals[i]<-max(tvals)-min(tvals) +} +vals<-sort(vals) +ival<-round((1-alpha)*iter) +qval<-vals[ival] +qval +} + + +lsqs3<-function(x,y,plotit=TRUE,cop=2,ap.dep=FALSE,v2=FALSE,pv=FALSE,SEED=TRUE,nboot=1000,ypch="o",xpch="+"){ +# +# Compute the typical depth of x in y, +# Compute the typical depth of y in x, +# use the maximum of the two typical depths +# as a test statistic. +# This method is designed to be sensitive to +# shifts in location. +# +# Use Tukey's depth; bivariate case only. +# +# cop=2 use MCD location estimator when +# computing depth with function fdepth +# cop=3 uses medians +# cop=3 uses MVE +# +# xpch="+" means when plotting the data, data from the first +# group are indicated by a + +# ypch="o" are data from the second group +# +if(is.list(x))x<-matl(x) +if(is.list(y))y<-matl(y) +x<-elimna(x) +y<-elimna(y) +x<-as.matrix(x) +y<-as.matrix(y) +nx=nrow(x) +ny=nrow(y) +if(ncol(x) != ncol(y))stop("Number of variables not equal") +disyx<-NA # depth of y in x +disxy<-NA # depth of x in y +# +if(ncol(x)==2){ +if(plotit){ +plot(rbind(x,y),type="n",xlab="VAR 1",ylab="VAR 2") +points(x,pch=xpch) +points(y,pch=ypch) +if(nrow(x)>50){ +if(!ap.dep){ +print("If execution time is high, might use ap.dep=FALSE") +} +if(!ap.dep)temp<-depth2(x,plotit=FALSE) +if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop) +} +if(!ap.dep)temp<-depth2(x,plotit=FALSE) +if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop) +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +if(ap.dep)temp<-fdepth(y,plotit=FALSE,cop=cop) +if(!ap.dep)temp<-depth2(y,plotit=FALSE) +if(!ap.dep)temp<-depth2(y,plotit=FALSE) +if(!ap.dep)temp<-fdepth(y,plotit=FALSE) +flag<-(temp>=median(temp)) +xx<-y[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +flag<-(temp>=median(temp)) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,],lty=2) +lines(xx[c(temp[1],temp[length(temp)]),],lty=2) +} +tempyx<-NA +tempxy<-NA +if(ap.dep)tempyx<-fdepth(x,y,plotit=FALSE,cop=cop) +if(!ap.dep)tempyx<-depth2(x,y,plotit=FALSE) +if(ap.dep)tempxy<-fdepth(y,x,plotit=FALSE,cop=cop) +if(!ap.dep)tempxy<-depth2(y,x,plotit=FALSE) +} +if(ncol(x)==1){ +tempyx<-unidepth(as.vector(x),as.vector(y)) +tempxy<-unidepth(as.vector(y),as.vector(x)) +} +if(ncol(x)>2){ +if(!v2){ +tempxy<-fdepth(y,x,plotit=FALSE,cop=cop) +tempyx<-fdepth(x,y,plotit=FALSE,cop=cop) +} +if(v2){ +tempxy<-fdepthv2(y,x,plotit=FALSE) +tempyx<-fdepthv2(x,y,plotit=FALSE) +}} +qhatxy<-mean(tempxy) +qhatyx<-mean(tempyx) +qhat<-max(c(qhatxy,qhatyx)) +n1<-nrow(x) +n2<-nrow(y) +nv<-(3*min(c(n1,n2))+max(c(n1,n2)))/4 +if(ncol(x)==1)crit<-.2536-.4578/sqrt(nv) +if(ncol(x)==2)crit<-.1569-.3/sqrt(nv) +if(ncol(x)==3)crit<-.0861-.269/sqrt(nv) +if(ncol(x)==4)crit<-.054-.1568/sqrt(nv) +if(ncol(x)==5)crit<-.0367-.0968/sqrt(nv) +if(ncol(x)==6)crit<-.0262-.0565/sqrt(nv) +if(ncol(x)==7)crit<-.0174-.0916/sqrt(nv) +if(ncol(x)>7)crit<-.013 +rej<-"Fail to reject" +if(qhat<=crit)rej<-"Reject" +testv=NULL +pval=NULL +if(pv){ +if(SEED)set.seed(2) +rej="NULL" +for(i in 1:nboot)testv[i]=lsqs3.sub(rmul(n1,ncol(x)),rmul(n2,ncol(x)),cop=cop,ap.dep=ap.dep,v2=v2,)$test +pval=mean(qhat>=testv) +} +list(n1=nx,n2=ny,avg.depth.of.x.in.y=qhatxy,avg.depth.of.y.in.x=qhatyx,test=qhat,crit=crit,Decision=rej,p.value=pval) +} + +# The next function is used to compute p-values for lsqs3; it avoids lsqs3 calling itself. + +lsqs3.sub<-function(x,y,plotit=FALSE,cop=2,ap.dep=FALSE,v2=FALSE,pv=FALSE,SEED=TRUE,nboot=1000,ypch="o",xpch="+"){ +# +# Compute the typical depth of x in y, +# Compute the typical depth of y in x, +# use the maximum of the two typical depths +# as a test statistic. +# This method is designed to be sensitive to +# shifts in location. +# +# Use Tukey's depth; bivariate case only. +# +# cop=2 use MCD location estimator when +# computing depth with function fdepth +# cop=3 uses medians +# cop=3 uses MVE +# +# xpch="+" means when plotting the data, data from the first +# group are indicated by a + +# ypch="o" are data from the second group +# +if(is.list(x))x<-matl(x) +if(is.list(y))y<-matl(y) +x<-elimna(x) +y<-elimna(y) +x<-as.matrix(x) +y<-as.matrix(y) +nx=nrow(x) +ny=nrow(y) +if(ncol(x) != ncol(y))stop("Number of variables not equal") +disyx<-NA # depth of y in x +disxy<-NA # depth of x in y +# +if(ncol(x)==2){ +if(plotit){ +plot(rbind(x,y),type="n",xlab="VAR 1",ylab="VAR 2") +points(x,pch=xpch) +points(y,pch=ypch) +if(nrow(x)>50){ +if(!ap.dep){ +print("If execution time is high, might use ap.dep=FALSE") +} +if(!ap.dep)temp<-depth2(x,plotit=FALSE) +if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop) +} +if(!ap.dep)temp<-depth2(x,plotit=FALSE) +if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop) +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +if(ap.dep)temp<-fdepth(y,plotit=FALSE,cop=cop) +if(!ap.dep)temp<-depth2(y,plotit=FALSE) +if(!ap.dep)temp<-depth2(y,plotit=FALSE) +if(!ap.dep)temp<-fdepth(y,plotit=FALSE) +flag<-(temp>=median(temp)) +xx<-y[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +flag<-(temp>=median(temp)) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,],lty=2) +lines(xx[c(temp[1],temp[length(temp)]),],lty=2) +} +tempyx<-NA +tempxy<-NA +if(ap.dep)tempyx<-fdepth(x,y,plotit=FALSE,cop=cop) +if(!ap.dep)tempyx<-depth2(x,y,plotit=FALSE) +if(ap.dep)tempxy<-fdepth(y,x,plotit=FALSE,cop=cop) +tempxy<-depth2(y,x,plotit=FALSE) +} +if(ncol(x)==1){ +tempyx<-unidepth(as.vector(x),as.vector(y)) +tempxy<-unidepth(as.vector(y),as.vector(x)) +} +if(ncol(x)>2){ +if(!v2){ +tempxy<-fdepth(y,x,plotit=FALSE,cop=cop) +tempyx<-fdepth(x,y,plotit=FALSE,cop=cop) +} +if(v2){ +tempxy<-fdepthv2(y,x,plotit=FALSE) +tempyx<-fdepthv2(x,y,plotit=FALSE) +}} +qhatxy<-mean(tempxy) +qhatyx<-mean(tempyx) +qhat<-max(c(qhatxy,qhatyx)) +n1<-nrow(x) +n2<-nrow(y) +nv<-(3*min(c(n1,n2))+max(c(n1,n2)))/4 +if(ncol(x)==1)crit<-.2536-.4578/sqrt(nv) +if(ncol(x)==2)crit<-.1569-.3/sqrt(nv) +if(ncol(x)==3)crit<-.0861-.269/sqrt(nv) +if(ncol(x)==4)crit<-.054-.1568/sqrt(nv) +if(ncol(x)==5)crit<-.0367-.0968/sqrt(nv) +if(ncol(x)==6)crit<-.0262-.0565/sqrt(nv) +if(ncol(x)==7)crit<-.0174-.0916/sqrt(nv) +if(ncol(x)>7)crit<-.013 +rej<-"Fail to reject" +if(qhat<=crit)rej<-"Reject" +testv=NULL +pval=NULL +if(pv){ +if(SEED)set.seed(2) +rej="NULL" +for(i in 1:nboot)testv[i]=lsqs3.sub(rmul(n1,ncol(x)),rmul(n2,ncol(x)),cop=cop,ap.dep=ap.dep,v2=v2,)$test +pval=mean(qhat>=testv) +} +list(n1=nx,n2=ny,avg.depth.of.x.in.y=qhatxy,avg.depth.of.y.in.x=qhatyx,test=qhat,crit=crit,Decision=rej,p.value=pval) +} + + + + +kercon<-function(x,y,pyhat=FALSE,cval=NA,plotit=TRUE,eout=FALSE,xout=FALSE, +outfun=out,iran=.05,xlab="X",ylab="Y",pch='.'){ +# +# Compute conditional local weighted regression with Epanechnikov kernel +# +# cf. Fan, Annals of Statistics, 1993, 21, 196-217. +# +d<-ncol(x) +if(d!=2)stop("Argument x should have two columns only") +np1<-d+1 +m<-elimna(cbind(x,y)) +x<-m[,1:d] +y<-m[,np1] +yhat1<-NA +if(eout && xout)stop("Can't have both eout and xout=F") +if(eout){ +flag<-outfun(m)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(x)$keep +m<-m[flag,] +} +x<-m[,1:d] +y<-m[,np1] +if(is.na(cval[1])){temp<-idealf(x[,2]) +cval<-c(temp$ql,median(x[,2]),temp$qu) +} +xrem<-x +x2<-x[,2] +n<-nrow(x) +sig<-sqrt(var(x2)) +temp<-idealf(x2) +iqr<-(temp$qu-temp$ql)/1.34 +A1<-min(c(sig,iqr)) +A<-1.77 +hval<-A*(1/n)^(1/6) # Silverman, 1986, p. 86 +svec<-NA +for(j in 1:d){ +sig<-sqrt(var(x[,j])) +temp<-idealf(x[,j]) +iqr<-(temp$qu-temp$ql)/1.34 +A<-min(c(sig,iqr)) +svec[j]<-A +x[,j]<-x[,j]/A +} +hval<-hval*sqrt(mean(svec^2)) +ilow<-round(iran*length(y)) +iup<-round((1-iran)*length(y)) +for(il in 1:length(cval)){ +temp4<-NA +for(j in 1:nrow(x)){ +temp4[j]<-((x2[j]-cval[il])/A1)^2 +} +yhat<-NA +epan1<-ifelse(temp4<1,.75*(1-temp4),0) # Epanechnikov kernel for x2 +for(j in 1:n){ +yhat[j]<-NA +temp1<-cbind(x[,1]-x[j,1],x[,2]-cval[il]/A)/hval +temp1<-temp1^2 +temp1<-apply(temp1,1,FUN="sum") +temp<-.5*(d+2)*(1-temp1)/gamma(.5)^2 +epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, for both x1 and x2 +if(epan1[j]>0)epan[j]<-epan[j]/epan1[j] +if(epan1[j]==0)epan[j]<-0 +chkit<-sum(epan!=0) +if(chkit >= np1){ +vals<-lsfit(x[,1],y,wt=epan)$coef +yhat[j]<-x[j,1]*vals[2]+vals[1] +}} +if(plotit){ +xorder<-order(xrem[,1]) +if(il==1)plot(xrem[,1],y,xlab=xlab,ylab=ylab,pch=pch) +lines(xrem[xorder[ilow:iup],1],yhat[xorder[ilow:iup]],lty=il) +}} +m<-"Done" +if(pyhat)m<-yhat +m +} + +mscor<-function(m,corfun=spear,cop=3,MM=FALSE,gval=NA,ap=TRUE,pw=TRUE,STAND=TRUE, +outfun=outpro,alpha=.05){ +# +# m is an n by p matrix +# +# Compute a skipped correlation matrix +# +# corfun indicates the correlation to be used +# corfun=pcor uses Pearson's correlation +# corfun=spear uses Spearman's correlation +# +# When calling outpro, +# STAND=T means marginals are first standardized. +# This function returns the p by p matrix of correlations +# +# Method: Eliminate outliers using a projection technique. +# That is, compute Donoho-Gasko median, for each point +# consider the line between it and the median, +# project all points onto this line, and +# check for outliers using a boxplot rule. +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# cop determines how center of the scatterplot is +# estimated; see the function outpro. +# cop=l Donoho-Gasko halfspace median +# cop=2 MCD measure of location +# cop=3 marginal medians +# cop=4 MVE measure of location +# +# Eliminate any outliers and compute +# correlations using remaining data. +# +# gval is critical value for determining whether a point +# is an outlier. It is determined automatically if not specified, +# assuming that Spearman's correlation is used. Critical +# values when using some other correlation have not been +# determined. +# +# Hypothesis of zero correlations tested with FWE=.05 +# +# AGRUMENTS: +# MM; see function outpro +# ap=T all pairwise comparisons are tested +# ap=F first variable is tested versus all others +# (for a total of p-1 tests). +# pw=T, print message about high execution time +# pw=F, suppress the message. +# +if(alpha!=.05)stop('For alpha other than .05, use mscorpb or mscorpbMC') +m<-elimna(m) +p<-ncol(m) +pm<-p-1 +n<-nrow(m) +if(p<2)stop("Something wrong; number of variables is < 2") +if(pw && cop==1){ +print("If execution time is too high,") +print("use cop=2 or 4 rather than 1") +} +if(ap){ +inter<-c(2.374,2.780,3.030,3.208,3.372,3.502,3.722,3.825,3.943) +slope<-c(5.333,8.8,25.67,32.83,51.53,75.02,111.34,123.16,126.72) +expo<-c(-1,-1,-1.2,-1.2,-1.3,-1.4,-1.5,-1.5,-1.5) +if(p>10){ +qvec<-NA +for(i in 1:9)qvec[i]<-inter[i]+slope[i]*n^expo[i] +pval<-c(2:10) +temp<-lsfit(pval,qvec)$coef +} +} +if(!ap){ +inter<-c(2.374,2.54,2.666,2.92,2.999,3.097,3.414,3.286,3.258) +slope<-c(5.333,8.811,14.89,20.59,51.01,52.15,58.498,64.934,59.127) +expo<-c(-1,-1,-1.2,-1.2,-1.5,-1.5,-1.5,-1.5,-1.5) +if(p>10){ +qvec<-NA +for(i in 1:9)qvec[i]<-inter[i]+slope[i]*n^expo[i] +pval<-c(1:9) +temp<-lsfit(pval,qvec)$coef +} +} +if(p<=10)crit<-inter[pm]+slope[pm]*n^expo[pm] +if(p>10)crit<-temp[2]*p+temp[1] +if(cop!=1 && is.na(gval))gval<-sqrt(qchisq(.975,ncol(m))) +temp<-outfun(m,plotit=FALSE,MM=MM,gval=gval,cop=cop,STAND=STAND)$keep +mcor<-corfun(m[temp,])$cor +test<-abs(mcor*sqrt((nrow(m)-2)/(1-mcor^2))) +diag(test) <- NA +if(!ap){ +test<-as.matrix(test[1,]) +} +list(cor=mcor,crit.val=crit,test.stat=test) +} + +dfried<-function(m,plotit=TRUE,pop=0,fr=.8,v2=FALSE,op=FALSE){ +# +# Compare dependent groups using halfspace depth of +# 0 relative to distribution of differences. +# +# When plotting differences scores: +# pop=1 Plot expected frequency curve +# pop=2 kernel density estimate +# pop=3 S+ kernel density estimate +# pop=4 boxplot +# +if(is.list(m))m<-matl(m) +if(!is.matrix(m))stop("m should be a matrix having at least 2 columns.") +m<-elimna(m) +library(MASS) +K<-ncol(m) +n<-nrow(m) +if(n<=10 && !op)print("With n<=10, might want to use op=T") +J<-(K^2-K)/2 +dcen<-cov.mcd(m)$center +center<-NA +pval<-matrix(NA,ncol=J,nrow=nrow(m)) +zvec<-rep(0,J) +ic<-0 +for(k in 1:K){ +for(kk in 1:K){ +if(k1)temp<-fdepth(pval0,center=center) +} +if(v2){ +if(ncol(pval)>1)temp<-fdepthv2(pval0) +} +big.dep<-max(temp) +if(op){ +v3<-dmean(pval,tr=.5,dop=2) +v3<-t(as.matrix(v3)) +big.dep<-max(max(temp),fdepthv2(pval0,v3)) +} +phat<-temp[nrow(m)+1]/big.dep +# Determine critical value +if(K==2)crit<-0.95-1.46/n^.5 +if(K==3)crit<-1.00-1.71/n^.5 +if(K==4)crit<-1.06-1.77/n^.5 +if(K==5)crit<-1.11-1.76/n^.5 +if(K==6)crit<-1.41-1.62/n^.3 +if(K==7)crit<-1.49-1.71/n^.3 +if(K>=8)crit<-1.39-1.38/n^.3 +crit<-min(c(crit,1)) +if(plotit && ncol(pval)==1){ +if(pop==0)akerd(pval,fr=fr) +if(pop==1)rdplot(pval,fr=fr) +if(pop==2)kdplot(pval) +if(pop==3)skerd(pval) +if(pop==4)boxplot(pval) +} +list(phat=phat,crit.val=crit) +} + +wrregfun<-function(slope,x=x,y=y){ +x<-as.matrix(x) +res<-y-x%*%slope +v1<-rank(res) +v2<-sqrt(12)*(v1/(length(y)+1)-.5) +wrregfun<-sum(v2*res) +wrregfun +} + +spat.sub<-function(x,theta){ +xx<-x +for(i in 1:ncol(x))xx[,i]<-x[,i]-theta[i] +xx<-xx^2 +temp<-sqrt(apply(xx,1,sum)) +val<-mean(temp) +val +} +spat<-function(x){ +# +# compute spatial median +# x is an n by p matrix +# +if(!is.matrix(x))stop("x must be a matrix") +x<-elimna(x) +START<-apply(x,2,median) +val=optim(START,spat.sub,x=x,method='BFGS')$par +val +} + +rungen<-function(x,y,est=onestep,fr=1,plotit=TRUE,scat=TRUE,pyhat=FALSE, +eout=FALSE,xout=FALSE,xlab="x",ylab="y",outfun=out,LP=TRUE,pch='.',...){ +# +# running interval smoother that can be used with any measure +# of location or scale. By default, an M-estimator is used. +# +# LP=TRUE, the plot is further smoothed via lows +# +# fr controls amount of smoothing +plotit<-as.logical(plotit) +scat<-as.logical(scat) +m<-cbind(x,y) +m<-elimna(m) +if(eout && xout)stop("Not allowed to have eout=xout=T") +if(eout){ +flag<-outfun(m,plotit=FALSE)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(m[,1])$keep +m<-m[flag,] +} +x=m[,1] +y=m[,2] +rmd<-c(1:length(x)) +for(i in 1:length(x))rmd[i]<-est(y[near(x,x[i],fr)],...) +if(LP){ +ord=order(x) +x=x[ord] +rmd=rmd[ord] +y=y[ord] +rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE,STR=FALSE)$yhat +} +if(plotit){ +if(scat){ +plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type="n") +points(x,y,pch=pch) +} +if(!scat)plot(c(x,x),c(y,rmd),type="n",ylab=ylab,xlab=xlab) +points(x,rmd,type="n") +sx<-sort(x) +xorder<-order(x) +sysm<-rmd[xorder] +lines(sx,sysm) +} +if(pyhat)output<-rmd +if(!pyhat)output<-"Done" +list(output=output) +} + + +#adpchk<-function(x,y,adfun=adrun,gfun=runm3d,xlab="First Fit", +adpchk<-function(x,y,adfun=adrun,gfun=rplot,xlab="First Fit", +ylab="Second Fit",...){ +# +# Compare adfun, usually an additive fit, to fit +# based on gfun. +# +fit1<-adfun(x,y,pyhat=TRUE,plotit=FALSE) +if(is.list(fit1))fit1=fit1$yhat +fit2<-gfun(x,y,pyhat=TRUE,plotit=FALSE)$yhat +if(is.list(fit2))fit2=fit2$yhat +plot(fit1,fit2,xlab=xlab,ylab=ylab) +abline(0,1) +} + + + +riplot<-function(x,y,adfun=adrun,plotfun=lplot,eout=FALSE,xout=TRUE,scale=FALSE){ +# +# Plot used to investigate regression interaction +# (the extent a generalized additive model does not fit data). +# Compute additive fit, plot residuals +# versus x, an n by 2 matrix. +# +if(!is.matrix(x))stop(" x must be a matrix") +if(ncol(x)!=2)stop(" x must have two columns only") +yhat<-adfun(x,y,pyhat=TRUE,eout=eout,xout=xout,plotit=FALSE) +plotfun(x,y-yhat,eout=eout,xout=xout,scale=scale) +} + +adtestv2<-function(x,y,est=tmean,nboot=500,alpha=.05,fr=NA,xout=TRUE,outfun=outpro,com.pval=FALSE,SEED=TRUE,qval=.5,...){ +# +# For two predictors, test the hypothesis that the regression model is additive. That is, there is no interaction. +# In essence, for the model Y=g_1(X_1)+g_2(X_2)+g_3(X_1X_2), test H_0: g_3(X_1X_2)=0 +# +# The method fits an additive model using running interval smoother and the backfitting +# algorithm and then tests the hypothesis that the median of X_1X_2, given the residuals, +# is a straight horizontal line. +# +if(ncol(x)!=2)stop("There should be two predictors") +temp<-cbind(x,y) +p<-ncol(x) +p1<-p+1 +temp<-elimna(temp) +x<-temp[,1:p] +x<-as.matrix(x) +y<-temp[,p1] +if(xout){ +keepit<-rep(TRUE,nrow(x)) +flag<-outfun(x,plotit=FALSE,...)$out.id +keepit[flag]<-FALSE +x<-x[keepit,] +y<-y[keepit] +} +if(alpha<.05 && nboot<=100)warning("You used alpha<.05 and nboot<=100") +if(is.na(fr)){ +fr<-.8 +if(ncol(x)==2){ +nval<-c(20,30,50,80,100,200,300,400) +fval<-c(0.40,0.36,0.3,0.25,0.23,.12,.08,.015) +if(length(y)<=400)fr<-approx(nval,fval,length(y))$y +if(length(y)>400)fr<-.01 +} +} +if(SEED)set.seed(2) +x<-as.matrix(x) +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +yhat<-adrun(x,y,est=est,plotit=FALSE,fr=fr,pyhat=TRUE) +regres<-y-yhat +test2=medind(regres,x[,1]*x[,2],qval=qval,nboot=nboot,com.pval=com.pval,SEED=SEED,alpha=alpha, +pr=TRUE,xout=xout,outfun=outfun,...) +test2 +} + + +adtests1<-function(vstar,yhat,res,mflag,x,fr){ +ystar<-yhat+res*vstar +bres<-adrun(x,ystar,fr=fr,pyhat=TRUE,plotit=FALSE) +bres<-ystar-bres +rval<-0 +for (i in 1:nrow(x)){ +rval[i]<-sum(bres[mflag[,i]]) +} +rval +} +runsm2g<-function(x1,y1,x2,val=median(x2),est=tmean,sm=FALSE,fr=.8,xlab="X", +ylab="Y",...){ +# +# Plot of running interval smoother for two groups +# Groups are defined according to whether x2=1. +# +if(!is.matrix(x))stop("Predictors are not stored in a matrix.") +if(!is.matrix(pts))stop("The third argument, pts, must be a matrix.") +library(MASS) +if(DET)m=DETMCD(x) +else m<-cov.mve(x) +rmd<-1 # Initialize rmd +nval<-1 +for(i in 1:nrow(pts)){ +rmd[i]<-est(y[near3d(x,pts[i,],fr,m)],...) +nval[i]<-length(y[near3d(x,pts[i,],fr,m)]) +} +list(rmd=rmd,nval=nval) +} + + +lta.sub<-function(X,theta,h){ +np<-ncol(X) +p<-np-1 +x<-X[,1:p] +y<-X[,np] +temp<-t(t(x)*theta[2:np]) +yhat<-apply(temp,1,sum)+theta[1] +res<-abs(y-yhat) +res<-sort(res) +val<-sum(res[1:h]) +val +} + ltareg<-function(x, y, tr = 0.2, h = NA,op=2) +{ + # + # Compute the least trimmed absolute value regression estimator. + # The default amount of trimming is .2 +# op=1, use ltsreg as initial estimate +# op!=1, use tsreg +# +# If h is specfied, use h smallest residuals, and ignore tr +# +x<-as.matrix(x) +library(MASS) +if(is.na(h)) h <- length(y) - floor(tr * length(y)) +X<-cbind(x,y) +X<-elimna(X) +np<-ncol(X) +p<-np-1 +x<-X[,1:p] +x<-as.matrix(x) +y<-X[,np] +if(op==1)temp<-ltsreg(x,y)$coef +if(op!=1)temp<-tsreg(x,y)$coef +START<-temp +coef<-nelderv2(X,np,FN=lta.sub,START=START,h=h) + res <- y - x%*%coef[2:np] - coef[1] + list(coef = coef, residuals = res) +} + + + +nelderv2<-function(x,N,FN,START=c(rep(1,N)),STEP=c(rep(1,N)),REQMIN=.0001, +XMIN=c(rep(0,N)),XSEC=c(rep(0,N)),...){ +# NELDER-MEAD method for minimzing a function +# +# TAKEN FROM OLSSON, J QUALITY TECHNOLOGY, 1974, 6, 56. +# +# x= n by p matrix containing data; it is used by +# function to be minimized. +# N= number of parameters +# +# FN=the function to be minimized +# FORM: FN(x,theta), theta is vector containing +# values for N parameters. +# +# START = starting values. +# STEP=initial step. +# This function returns the N values for theta that minimize FN +# + ICOUNT<-500 + NN<-N+1 + P<-matrix(NA,nrow=N,ncol=NN) + P[,NN]<-START + PBAR<-NA + RCOEFF<-1 + ECOEFF<-2 + CCOEFF<-.5 + KCOUNT<-ICOUNT + ICOUNT<-0 + DABIT<-2.04067e-35 + BIGNUM<-1.e38 + KONVGE<-5 + XN<-N + DN<-N + Y<-rep(0,NN) + Y[NN]<-FN(x,START,...) + ICOUNT<-ICOUNT+1 + for(J in 1:N){ + DCHK<-START[J] + START[J]<-DCHK+STEP[J] + for(I in 1:N){ + P[I,J]<-START[I] +} + Y[J]<-FN(x,START,...) + ICOUNT<-ICOUNT+1 + START[J]<-DCHK +} + I1000<-TRUE + while(I1000){ + YLO<-Y[1] + YNEWLO<-YLO + ILO<-1 + IHI<-1 + for(I in 2:NN){ + if(Y[I] < YLO){ + YLO<-Y[I] + ILO<-I} + if(Y[I] > YNEWLO){ + YNEWLO<-Y[I] + IHI<-I} +} + DCHK<-(YNEWLO+DABIT)/(YLO+DABIT)-1 + if(abs(DCHK) < REQMIN){ + I1000<-FALSE + next +} + KONVGE<-KONVGE-1 + if(KONVGE == 0){ + KONVGE<-5 + for(I in 1:N){ + COORD1<-P[I,1] + COORD2<-COORD1 + for(J in 2:NN){ + if(P[I,J] < COORD1)COORD1<-P[I,J] + if(P[I,J] > COORD2)COORD2<-P[I,J] +} # 2010 CONTINUE + DCHK<-(COORD2+DABIT)/(COORD1+DABIT)-1 + if(abs(DCHK) > REQMIN)break +} +} + if(ICOUNT >= KCOUNT){ + I1000<-F + next +} + for(I in 1:N){ + Z<-0.0 + Z<-sum(P[I,1:NN]) # 6 + Z<-Z-P[I,IHI] + PBAR[I]<-Z/DN +} + PSTAR<-(1.+RCOEFF)*PBAR-RCOEFF*P[,IHI] + YSTAR<-FN(x,PSTAR,...) + ICOUNT<-ICOUNT+1 + if(YSTAR < YLO && ICOUNT >= KCOUNT){ + P[,IHI]<-PSTAR + Y[IHI]<-YSTAR + next +} + IFLAG<-TRUE + if(YSTAR < YLO){ + P2STAR<-ECOEFF*PSTAR+(1-ECOEFF)*PBAR + Y2STAR<-FN(x,P2STAR,...) + ICOUNT<-ICOUNT+1 + if(Y2STAR >= YSTAR){ + P[,IHI]<-PSTAR + Y[IHI]<-YSTAR + next #In essence, go to 19 which goes to 1000 +} + IFLAG<-TRUE + while(YSTAR < Y[IHI]){ + P[,IHI]<-P2STAR + Y[IHI]<-Y2STAR + IFLAG<-FALSE + break + L<-sum(Y[1:NN] > YSTAR) + if(L > 1){ + P[,IHI]<-PSTAR + Y[IHI]<-YSTAR + IFLAG<-TRUE + break +} + if(L > 1)break # go to 19 + if(L != 0){ + P[1:N,IHI]<-PSTAR[1:N] + Y[IHI]<-YSTAR +} +I1000<-FALSE +break + if(ICOUNT >= KCOUNT){ + I1000<-FALSE + next +} + P2STAR[1:N]<-CCOEFF*P[1:N,IHI]+(1-CCOEFF)*PBAR[1:N] + Y2STAR<-FN(x,P2STAR,...) + ICOUNT<-ICOUNT+1 +} # END WHILE +} +if(IFLAG){ +for(J in 1:NN){ +P[,J]=(P[,J]+P[,ILO])*.5 + XMIN<-P[,J] + Y[J]<-FN(x,XMIN,...) +} + ICOUNT<-ICOUNT+NN + if(ICOUNT < KCOUNT)next + I1000<-F +next +} + P[1:N,IHI]<-PSTAR[1:N] + Y[IHI]<-YSTAR +} + for(J in 1:NN){ + XMIN[1:N]<-P[1:N,J] +} + Y[J]<-FN(x,XMIN,...) + YNEWLO<-BIGNUM + for(J in 1:NN){ + if (Y[J] < YNEWLO){ + YNEWLO<-Y[J] + IBEST<-J +}} + Y[IBEST]<-BIGNUM + YSEC<-BIGNUM +for(J in 1:NN){ +if(Y[J] < YSEC){ + YSEC<-Y[J] + ISEC<-J +}} + XMIN[1:N]<-P[1:N,IBEST] + XSEC[1:N]<-P[1:N,ISEC] +XMIN +} + + + + +nelder<-function(x,N,FN,START=c(rep(1,N)),STEP=c(rep(1,N)), +XMIN=c(rep(0,N)),XSEC=c(rep(0,N))){ +# NELDER-MEAD method for minimzing a function +# +# TAKEN FROM OLSSON, J QUALITY TECHNOLOGY, 1974, 6, 56. +# +# x= n by p matrix containing data; it is used by +# function to be minimized. +# N= number of parameters +# +# FN=the function to be minimized +# FORM: FN(x,theta), theta is vector containing +# values for N parameters. +# +# START = starting values. +# STEP=initial step. +# This function returns the N values for theta that minimize FN +# + ICOUNT<-500 + REQMIN<-.0000001 + NN<-N+1 + P<-matrix(NA,nrow=N,ncol=NN) + P[,NN]<-START + PBAR<-NA + RCOEFF<-1 + ECOEFF<-2 + CCOEFF<-.5 + KCOUNT<-ICOUNT + ICOUNT<-0 + DABIT<-2.04067e-35 + BIGNUM<-1.e38 + KONVGE<-5 + XN<-N + DN<-N + Y<-rep(0,NN) + Y[NN]<-FN(x,START) + ICOUNT<-ICOUNT+1 + for(J in 1:N){ + DCHK<-START[J] + START[J]<-DCHK+STEP[J] + for(I in 1:N){ + P[I,J]<-START[I] +} + Y[J]<-FN(x,START) + ICOUNT<-ICOUNT+1 + START[J]<-DCHK +} + I1000<-T + while(I1000){ + YLO<-Y[1] + YNEWLO<-YLO + ILO<-1 + IHI<-1 + for(I in 2:NN){ + if(Y[I] < YLO){ + YLO<-Y[I] + ILO<-I} + if(Y[I] > YNEWLO){ + YNEWLO<-Y[I] + IHI<-I} +} + DCHK<-(YNEWLO+DABIT)/(YLO+DABIT)-1 + if(abs(DCHK) < REQMIN){ + I1000<-F + next +} + KONVGE<-KONVGE-1 + if(KONVGE == 0){ + KONVGE<-5 + for(I in 1:N){ + COORD1<-P[I,1] + COORD2<-COORD1 + for(J in 2:NN){ + if(P[I,J] < COORD1)COORD1<-P[I,J] + if(P[I,J] > COORD2)COORD2<-P[I,J] +} # 2010 CONTINUE + DCHK<-(COORD2+DABIT)/(COORD1+DABIT)-1 + if(abs(DCHK) > REQMIN)break +} +} + if(ICOUNT >= KCOUNT){ + I1000<-F + next +} + for(I in 1:N){ + Z<-0.0 + Z<-sum(P[I,1:NN]) # 6 + Z<-Z-P[I,IHI] + PBAR[I]<-Z/DN +} + PSTAR<-(1.+RCOEFF)*PBAR-RCOEFF*P[,IHI] + YSTAR<-FN(x,PSTAR) + ICOUNT<-ICOUNT+1 + if(YSTAR < YLO && ICOUNT >= KCOUNT){ + P[,IHI]<-PSTAR + Y[IHI]<-YSTAR + next +} + IFLAG<-T + if(YSTAR < YLO){ + P2STAR<-ECOEFF*PSTAR+(1-ECOEFF)*PBAR + Y2STAR<-FN(x,P2STAR) + ICOUNT<-ICOUNT+1 + if(Y2STAR >= YSTAR){ + P[,IHI]<-PSTAR + Y[IHI]<-YSTAR + next #In essence, go to 19 which goes to 1000 +} + IFLAG<-T + while(YSTAR < Y[IHI]){ + P[,IHI]<-P2STAR + Y[IHI]<-Y2STAR + IFLAG<-F + break + L<-sum(Y[1:NN] > YSTAR) + if(L > 1){ + P[,IHI]<-PSTAR + Y[IHI]<-YSTAR + IFLAG<-T + break +} + if(L > 1)break # go to 19 + if(L != 0){ + P[1:N,IHI]<-PSTAR[1:N] + Y[IHI]<-YSTAR +} +I1000<-F +break + if(ICOUNT >= KCOUNT){ + I1000<-F + next +} + P2STAR[1:N]<-CCOEFF*P[1:N,IHI]+(1-CCOEFF)*PBAR[1:N] + Y2STAR<-FN(x,P2STAR) + ICOUNT<-ICOUNT+1 +} # END WHILE +} +if(IFLAG){ +for(J in 1:NN){ +P[,J]<-(P[,J]+P[,ILO])*.5 + XMIN<-P[,J] + Y[J]<-FN(x,XMIN) +} + ICOUNT<-ICOUNT+NN + if(ICOUNT < KCOUNT)next + I1000<-F +next +} + P[1:N,IHI]<-PSTAR[1:N] + Y[IHI]<-YSTAR +} + for(J in 1:NN){ + XMIN[1:N]<-P[1:N,J] +} + Y[J]<-FN(x,XMIN) + YNEWLO<-BIGNUM + for(J in 1:NN){ + if (Y[J] < YNEWLO){ + YNEWLO<-Y[J] + IBEST<-J +}} + Y[IBEST]<-BIGNUM + YSEC<-BIGNUM +for(J in 1:NN){ +if(Y[J] < YSEC){ + YSEC<-Y[J] + ISEC<-J +}} + XMIN[1:N]<-P[1:N,IBEST] + XSEC[1:N]<-P[1:N,ISEC] +XMIN +} + + +stein1.tr<-function(x,del,alpha=.05,pow=.8,tr=.2){ +# +# Extension of Stein's method when performing all pairwise +# comparisons among J dependent groups. +# +# If x represents a single group, one-sample analysis is performed. +# +if(tr < 0 || tr >=.5)stop("Argument tr must be between 0 and .5") +if(is.matrix(x))m<-x +if(is.list(x))m<-matl(x) +if(!is.matrix(x) && !is.list(x))m<-matrix(x,ncol=1) +m<-elimna(m) +m<-as.matrix(m) +ntest<-1 +n<-nrow(m) +J<-ncol(m) +if(ncol(m) > 1)ntest<-(J^2-J)/2 +g<-floor(tr*nrow(m)) +df<-n-2*g-1 +t1<-qt(pow,df) +t2<-qt(alpha/(2*ntest),df) +dv<-(del/(t1-t2))^2 +nvec<-NA +if(ntest > 1){ +ic<-0 +for (j in 1:ncol(m)){ +for (jj in 1:ncol(m)){ +if(j=.5)stop("Argument tr must be between 0 and .5") +if(is.matrix(x))m<-x +if(is.list(x))m<-matl(x) +if(is.list(y))y<-matl(y) +if(!is.matrix(x) && !is.list(x))m<-matrix(x,ncol=1) +if(!is.matrix(y) && !is.list(y))y<-matrix(y,ncol=1) +m<-elimna(m) +m<-as.matrix(m) +g<-floor(tr*nrow(m)) +df<-nrow(m)-2*g-1 +m<-rbind(m,y) +ic<-0 +ntest<-(ncol(m)^2-ncol(m))/2 +if(ntest==0)ntest<-1 +test<-matrix(NA,ncol=3,nrow=ntest) +for (j in 1:ncol(m)){ +for (jj in 1:ncol(m)){ +if(j nmin +# atr is amount of trimming when averaging over the bagged +# values +# est is the measure of location to be estimated +# est=tmean means estimate 20% trimmed mean of y given x +# +if(SEED)set.seed(2) +temp<-cbind(x,y) +if(ncol(temp)>2)stop("Use run3bo with more than 1 predictor") +temp<-elimna(temp) # Eliminate any rows with missing values +if(eout && xout)stop("Not allowed to have eout=xout=T") +if(eout){ +flag<-outfun(temp,plotit=FALSE)$keep +temp<-temp[flag,] +} +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +temp<-temp[flag,] +} +x<-temp[,1] +y<-temp[,2] +pts<-as.matrix(pts) +mat<-matrix(NA,nrow=nboot,ncol=nrow(pts)) +vals<-NA +for(it in 1:nboot){ +idat<-sample(c(1:length(y)),replace=TRUE) +xx<-temp[idat,1] +yy<-temp[idat,2] +mat[it,]<-runhat(xx,yy,pts=pts,est=est,fr=fr,...) +} +rmd<-apply(mat,2,mean,na.rm=RNA,tr=atr) +if(plotit){ +if(scat){ +plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type="n") +points(x,y,pch=pch) +} +if(!scat)plot(c(x,x),c(y,rmd),type="n",xlab=xlab,ylab=ylab) +points(x, rmd, type = "n") +sx <- sort(x) +xorder <- order(x) +sysm <- rmd[xorder] +lines(sx, sysm) +} +output="Done" +if(pyhat)output<-rmd +output +} + + +run3bo<-function(x,y,fr=1,est=tmean,theta = 50, phi = 25,nmin=0, +pyhat=FALSE,eout=FALSE,outfun=out,plotit=TRUE,xout=FALSE,nboot=40,SEED=TRUE,STAND=TRUE, +expand=.5,scale=FALSE,xlab="X",ylab="Y",zlab="",ticktype="simple",...){ +# +# running mean using interval method +# +# fr controls amount of smoothing +# tr is the amount of trimming +# +# Missing values are automatically removed. +# +library(MASS) +library(akima) +if(SEED)set.seed(2) +temp<-cbind(x,y) +x<-as.matrix(x) +p<-ncol(x) +p1<-p+1 +if(p>2)plotit<-FALSE +temp<-elimna(temp) # Eliminate any rows with missing values. +x<-temp[,1:p] +x<-as.matrix(x) +y<-temp[,p1] +if(xout){ +keepit<-rep(TRUE,nrow(x)) +flag<-outfun(x,plotit=FALSE,STAND=STAND,...)$out.id +keepit[flag]<-FALSE +x<-x[keepit,] +y<-y[keepit] +} +mat<-matrix(NA,nrow=nboot,ncol=length(y)) +vals<-NA +for(it in 1:nboot){ +idat<-sample(c(1:length(y)),replace=TRUE) +xx<-temp[idat,1:p] +yy<-temp[idat,p1] +tmy<-rung3hat(xx,yy,pts=x,est=est,fr=fr,...)$rmd +mat[it,]<-tmy +} +rmd<-apply(mat,2,mean,na.rm=TRUE) +flag<-!is.na(rmd) +rmd<-elimna(rmd) +x<-x[flag,] +y<-y[flag] +nval<-NA +m<-cov.mve(x) +for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)]) +if(plotit && ncol(x)==2){ +#if(ncol(x)!=2)stop("When plotting, x must be an n by 2 matrix") +fitr<-rmd[nval>nmin] +y<-y[nval>nmin] +x<-x[nval>nmin,] +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype) +} +last<-"Done" +if(pyhat)last<-rmd +list(output=last) +} + + +ancom<-function(x1,y1,x2,y2,dchk=FALSE,plotit=TRUE,plotfun=rplot,nboot=500, +alpha=.05,SEED=TRUE,PARTEST=FALSE,tr=0,...){ +# +# Omnibus ANCOVA +# tr=0 is recommended for general use. tr>0 might result in +# poor control over the probability of a Type I error. +# PARTEST=T will test the hypothesis of parallel regression lines. +# +# Setting plotfun=rplotsm will smooth the plots via bagging +# +# dchk=T, points in design space with a halfspace of zero are eliminated +# +# PARTEST=F tests hypothesis that regression surface is a horizontal +# plane through the origin +# PARTEST=T tests the hypothesis that the two regression surfaces +# are parallel. +# +flag1<-rep(TRUE,length(y1)) +flag2<-rep(TRUE,length(y2)) +if(dchk){ +dep1<-fdepth(x2,x1) # depth of points in x1 relative to x2 +dep2<-fdepth(x1,x2) +flag1<-(dep1>0) +flag2<-(dep2>0) +} +n1<-sum(flag1) +n2<-sum(flag2) +n<-n1+n2 +y<-c(n2*y1[flag1]/n,0-n1*y2[flag2]/n) +x1<-as.matrix(x1) +x1<-x1[flag1,] +x2<-as.matrix(x2) +x2<-x2[flag2,] +x1<-as.matrix(x1) +x2<-as.matrix(x2) +x<-rbind(x1,x2) +if(plotit){ +if(ncol(x)<=2)plotfun(x,y,...) +} +if(PARTEST)output<-indt(x,y,nboot=nboot,SEED=SEED) +if(!PARTEST)output<-indt0(x,y,nboot=nboot,alpha=alpha,SEED=SEED) +list(dstat=output$dstat,critd=output$critd) +} +indt0<-function(x,y,nboot=500,alpha=.05,flag=1,SEED=TRUE){ +# +# Test the hypothesis that the regression plane +# between x and y is a flat horizontal plane with intercept 0 +# The method is based on results in +# Stute et al. (1998, JASA, 93, 141-149). +# +# flag=1 gives Kolmogorov-Smirnov test statistic +# flag=2 gives the Cramer-von Mises test statistic +# flag=3 causes both test statistics to be reported. +# +if(SEED)set.seed(2) +x<-as.matrix(x) +# First, eliminate any rows of data with missing values. +temp <- cbind(x, y) + temp <- elimna(temp) + pval<-ncol(temp)-1 + x <- temp[,1:pval] + y <- temp[, pval+1] +x<-as.matrix(x) +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +# ith row of mflag indicates which rows of the matrix x are less +# than or equal to ith row of x +# +yhat<-0 +res<-y-yhat +print("Taking bootstrap sample, please wait.") +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-(data-.5)*sqrt(12) # standardize the random numbers. +rvalb<-apply(data,1,indt0sub,yhat,res,mflag,x,tr) +# An n x nboot matrix of R values +rvalb<-rvalb/sqrt(length(y)) +dstatb<-apply(abs(rvalb),2,max) +wstatb<-apply(rvalb^2,2,mean) +mstatb<-apply(abs(rvalb),2,median) +dstatb<-sort(dstatb) +wstatb<-sort(wstatb) +mstatb<-sort(mstatb) +# compute test statistic +v<-c(rep(1,length(y))) +rval<-indt0sub(v,yhat,res,mflag,x,tr) +rval<-rval/sqrt(length(y)) +dstat<-NA +wstat<-NA +critd<-NA +critw<-NA +ib<-round(nboot*(1-alpha)) +if(flag==1 || flag==3){ +dstat<-max(abs(rval)) +critd<-dstatb[ib] +} +if(flag==2 || flag==3){ +wstat<-mean(rval^2) +critw<-wstatb[ib] +} +list(dstat=dstat,wstat=wstat,critd=critd,critw=critw) +} + + +indt0sub<-function(vstar,yhat,res,mflag,x,tr){ +bres<-res*vstar +rval<-0 +for (i in 1:nrow(x)){ +rval[i]<-sum(bres[mflag[,i]]) +} +rval +} + +smeancr<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE,FAST=FALSE, +nboot=500,plotit=TRUE,xlab="VAR 1",ylab="VAR 2",STAND=TRUE){ +# +# m is an n by p matrix +# +# Test hypothesis that multivariate skipped estimators +# are all equal to the null value, which defaults to zero. +# The level of the test is .05. +# +# Eliminate outliers using a projection method +# That is, determine center of data using: +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 MVE +# +# For each point +# consider the line between it and the center +# project all points onto this line, and +# check for outliers using +# +# MM=F, a boxplot rule. +# MM=T, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# Eliminate any outliers and compute means +# using remaining data. +# +if(SEED)set.seed(2) +#if(!is.na(SEED))set.seed(SEED) +m<-elimna(m) +n<-nrow(m) +crit.level<-.05 +if(n<=120)crit.level<-.045 +if(n<=80)crit.level<-.04 +if(n<=60)crit.level<-.035 +if(n<=40)crit.level<-.03 +if(n<=30)crit.level<-.025 +if(n<=20)crit.level<-.02 +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +val<-matrix(NA,ncol=ncol(m),nrow=nboot) +for(j in 1: nboot){ +mm<-m[data[j,],] +if(FAST)temp<-outpro.depth(mm,plotit=FALSE,SEED=FALSE)$keep +if(!FAST)temp<-outpro(mm,plotit=FALSE,cop=cop,STAND=STAND)$keep +val[j,]<-apply(mm[temp,],2,mean) +} +temp<-pdis(rbind(val,nullv)) +sig.level<-sum(temp[nboot+1]1){ +if(ncol(x)==2 && !scale){ +if(pr){ +print("scale=F is specified.") +print("If there is dependence, use scale=T") +}} +if(ncol(x)>2)plotit<-F +val<-run3bo(x,y,est=est,fr=fr,nmin=nmin,plotit=plotit,pyhat=TRUE,phi=phi, +theta=theta,xlab=xlab,ylab=ylab,ticktype=ticktype,STAND=STAND, +SEED=SEED,expand=expand,scale=scale,nboot=nboot,...) +val<-val$output +} +E.power<-varfun(val[!is.na(val)])/varfun(y) +if(!pyhat)val <- NULL +E.power=as.numeric(E.power) +list(Strength.Assoc=sqrt(E.power),Explanatory.Power = E.power, yhat = val) +} + +zdepth<-function(m,pts=m,zloc=median,zscale=mad){ +# +# Compute depth of points as in Zuo, Annals, 2003 +# +if(!is.matrix(m))stop("argument m should be a matrix") +if(!is.matrix(pts))stop("argument pts should be a matrix") +if(ncol(m)!=ncol(pts))stop("Number of columns for m and pts are not equal") +np<-ncol(m) +val<-NA +for(i in 1:nrow(pts)){ +pval<-pts[i,] +START<-rep(1,np)/sqrt(np) +temp<-nelderv2(m,np,FN=zdepth.sub,START=START,zloc=zloc,zscale=zscale,pts=pval) +temp<-temp/sqrt(sum(temp^2)) +y<-t(t(m)*temp) +y<-apply(y,1,sum) +ppro<-sum(pval*temp) +val[i]<-abs(ppro-zloc(y))/zscale(y) +} +val +} + +zdepth.sub<-function(x,theta,zloc=median,zscale=mad,pts=NA){ +theta<-theta/sqrt(sum(theta^2)) +temp<-t(t(x)*theta) +ppro<-sum(t(t(pts)*theta)) +yhat<-apply(temp,1,sum) +val<-0-abs(ppro-zloc(yhat))/zscale(yhat) +val +} + +zdist=zdepth + +opregpb<-function(x,y,nboot=1000,alpha=.05,om=TRUE,ADJ=TRUE,SEED=TRUE, +nullvec=rep(0,ncol(x)+1),plotit=TRUE,opdis=2,gval=sqrt(qchisq(.95,ncol(x)+1))){ +# +# generate bootstrap estimates +# use projection-type outlier detection method followed by +# TS regression. +# +# om=T and ncol(x)>1, means an omnibus test is performed, +# otherwise only individual tests of parameters are performed. +# +# opdis=2, means that Mahalanobis distance is used +# opdis=1, means projection-type distance is used +# +# gval is critical value for projection-type outlier detection +# method +# +# ADJ=T, Adjust p-values as described in Section 11.1.5 of the text. +# +if(SEED)set.seed(2) +x<-as.matrix(x) +m<-cbind(x,y) +p1<-ncol(x)+1 +m<-elimna(m) # eliminate any rows with missing data +x<-m[,1:ncol(x)] +x<-as.matrix(x) +y<-m[,p1] +if(nrow(x)!=length(y))stop("Sample size of x differs from sample size of y") +if(!is.matrix(x))stop("Data should be stored in a matrix") +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,regboot,x,y,regfun=opreg) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +# using Hochberg method +bvec<-t(bvec) +dvec<-alpha/(c(1:ncol(x))) +test<-NA +icl0<-round(alpha*nboot/2) +icl<-round(alpha*nboot/(2*ncol(x))) +icu0<-nboot-icl0 +icu<-nboot-icl +output<-matrix(0,p1,6) +vlabs="Intercept" +for(j in 2:p1)vlabs[j]=paste("Slope",j-1) +dimnames(output)<-list(vlabs,c("Param.","p.value","p.crit", +"ci.lower","ci.upper","s.e.")) +pval<-NA +for(i in 1:p1){ +output[i,1]<-i-1 +se.val<-var(bvec[,i]) +temp<-sort(bvec[,i]) +output[i,6]<-sqrt(se.val) +if(i==1){ +output[i,4]<-temp[icl0+1] +output[i,5]<-temp[icu0] +} +if(i>1){ +output[i,4]<-temp[icl+1] +output[i,5]<-temp[icu] +} +pval[i]<-sum((temp>nullvec[i]))/length(temp) +if(pval[i]>.5)pval[i]<-1-pval[i] +} +fac<-2 +if(ADJ){ +# Adjust p-value if n<60 +nval<-length(y) +if(nval<20)nval<-20 +if(nval>60)nval<-60 +fac<-2-(60-nval)/40 +} +pval[1]<-2*pval[1] +pval[2:p1]<-fac*pval[2:p1] +output[,2]<-pval +temp2<-order(0-pval[2:p1]) +zvec<-dvec[1:ncol(x)] +sigvec<-(test[temp2]>=zvec) +output[temp2+1,3]<-zvec +output[1,3]<-NA +output[,2]<-pval +om.pval<-NA +temp<-opreg(x,y)$coef +if(om && ncol(x)>1){ +temp2<-rbind(bvec[,2:p1],nullvec[2:p1]) +if(opdis==1)dis<-pdis(temp2,center=temp[2:p1]) +if(opdis==2){ +cmat<-var(bvec[,2:p1]-apply(bvec[,2:p1],2,mean)+temp[2:p1]) +dis<-mahalanobis(temp2,temp[2:p1],cmat) +} +om.pval<-sum((dis[nboot+1]<=dis[1:nboot]))/nboot +} +# do adjusted p-value +nval<-length(y) +if(nval<20)nval<-20 +if(nval>60)nval<-60 +adj.pval<-om.pval/2+(om.pval-om.pval/2)*(nval-20)/40 +if(ncol(x)==2 && plotit){ +plot(bvec[,2],bvec[,3],xlab="Slope 1",ylab="Slope 2") +temp.dis<-order(dis[1:nboot]) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],2:3] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +list(output=output,om.pval=om.pval,adj.om.pval=adj.pval) +} + + +kslope<-function(x,y,pyhat=FALSE,pts=x){ +# +# Estimate slope at points in pts using kernel method +# +# See Doksum et al. 1994, JASA, 89, 571- +# +m<-elimna(cbind(x,y)) +x<-m[,1] +y<-m[,2] +n<-length(y) +sig<-sqrt(var(x)) +temp<-idealf(x) +iqr<-(temp$qu-temp$ql)/1.34 +A<-min(c(sig,iqr)) +yhat<-NA +vval<-NA +vals<-NA +rhosq<-NA +for(k in 1:n){ +temp1<-NA +for(j in 1:n){ +temp1[j]<-((x[j]-x[k])/A)^2 +} +epan<-ifelse(temp1<1,.75*(1-temp1),0) # Epanechnikov kernel, p. 76 +chkit<-sum(epan!=0) +if(chkit >= 2){ +temp4<-lsfit(x,y,wt=epan) +vals[k]<-temp4$coef[2] +}} +vals +} + +nearl<-function(x,pt,fr=1){ +# determine which values in x are near and less than pt +# based on fr * mad +m<-mad(x) +if(m==0){ +temp<-idealf(x) +m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25)) +} +if(m==0)m<-sqrt(winvar(x)/.4129) +if(m==0)stop("All measures of dispersion are equal to 0") +dis<-abs(x-pt) +dflag<-dis <= fr*m +flag2<-(xpt) +dflag<-dflag*flag2 +dflag +} +mgvmean<-function(m,op=0,outfun=outbox,se=TRUE){ +# +# m is an n by p matrix +# +# Compute a multivariate skipped measure of location +# using the MGV method +# +# Eliminate outliers using MGV method +# +# op=0 pairwise distances of points +# op=1 MVE distances +# op=2 MCD distances +# +# outfun indicates outlier rule to be applied to +# the MGV distances. +# By default, use boxplot rule +# +# Eliminate any outliers and compute means +# using remaining data. +# +m<-elimna(m) +temp<-outmgv(m,op=op,plotit=FALSE)$keep +val<-apply(m[temp,],2,mean) +val +} + +smgvcr<-function(m,nullv=rep(0,ncol(m)),SEED=TRUE,op=0, +nboot=500,plotit=TRUE){ +# +# m is an n by p matrix +# +# Test hypothesis that estimand of the MGV estimator +# is equal to the null value, which defaults to zero vector. +# The level of the test is .05. +# +# Argument op: See function outmgv +# +if(SEED)set.seed(2) +m<-elimna(m) +n<-nrow(m) +crit.level<-.05 +if(n<=120)crit.level<-.045 +if(n<=80)crit.level<-.04 +if(n<=60)crit.level<-.035 +if(n<=40)crit.level<-.03 +if(n<=30)crit.level<-.025 +if(n<=20)crit.level<-.02 +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +val<-matrix(NA,ncol=ncol(m),nrow=nboot) +for(j in 1: nboot){ +mm<-m[data[j,],] +temp<-outmgv(mm,plotit=FALSE,op=op)$keep +val[j,]<-apply(mm[temp,],2,mean) +} +temp<-mgvar(rbind(val,nullv),op=op) +flag2<-is.na(temp) +if(sum(flag2)>0)temp[flag2]<-0 +sig.level<-sum(temp[nboot+1]0)pts<-seq(min(x),max(x),length=np) +if(np==0)pts<-x +} +pts<-sort(pts) +for(i in 1:length(pts)){ +yhat[i]<-NA +for(j in 1:length(x)){ +temp[j]<-((x[j]-pts[i])/A)^2 +} +epan<-ifelse(temp<1,.75*(1-temp),0) +chkit<-sum(epan!=0) +if(chkit > 1){ +vals<-lsfit(x,y,wt=epan)$coef +yhat[i]<-vals[2]*pts[i]+vals[1] +} +} +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab,pch=pch) +if(np>0){ +ilow<-round(.1*np) +iup<-round(.9*np) +} +if(np==0){ +ilow<-1 +iup<-length(pts) +} +lines(pts[ilow:iup],yhat[ilow:iup]) +} +m<-"Done" +if(pyhat)m<-yhat +m +} + +qreg.sub<-function(X,theta,qval=.5){ +np<-ncol(X) +p<-np-1 +x<-X[,1:p] +y<-X[,np] +temp<-t(t(x)*theta[2:np]) +yhat<-apply(temp,1,sum)+theta[1] +res<-y-yhat +flag<-(res<=0) +rval<-(qval-flag)*res +val<-sum(rval) +val +} + +rmmcppb<-function(x,y=NULL,alpha=.05, +con=0,est=onestep,plotit=FALSE,dif=TRUE,grp=NA,nboot=NA,BA=FALSE,hoch=FALSE,xlab="Group 1",ylab="Group 2",pr=TRUE,SEED=TRUE,SR=FALSE,...){ +# +# Use a percentile bootstrap method to compare dependent groups. +# By default, +# compute a .95 confidence interval for all linear contrasts +# specified by con, a J-by-C matrix, where C is the number of +# contrasts to be tested, and the columns of con are the +# contrast coefficients. +# If con is not specified, all pairwise comparisons are done. +# +# If est=onestep or mom, method SR (see my book on robust methods) +# is used to control the probability of at least one Type I error. +# +# Otherwise, Hochberg is used. +# +# dif=T indicates that difference scores are to be used +# dif=F indicates that measure of location associated with +# marginal distributions are used instead. +# +# nboot is the bootstrap sample size. If not specified, a value will +# be chosen depending on the number of contrasts there are. +# +# x can be an n by J matrix or it can have list mode +# for two groups, data for second group can be put in y +# otherwise, assume x is a matrix (n by J) or has list mode. +# +# A sequentially rejective method is used to control alpha using method SR. +# +# Argument BA: When using dif=F, BA=T uses a correction term +# when computing a p-value. +# +if(hoch)SR=FALSE #Assume Hochberg if hoch=TRUE even if SR=TRUE +if(SR){ +okay=FALSE +if(identical(est,onestep))okay=TRUE +if(identical(est,mom))okay=TRUE +SR=okay # 'Only use method SR (argument SR=TRUE) when est=onestep or mom +} +if(dif){ +if(pr){print("dif=TRUE, so analysis is done on difference scores.") +print(" Each confidence interval has probability coverage 1-alpha.") +print("Also note that a sequentially rejective method is being used") +} +temp<-rmmcppbd(x,y=y,alpha=alpha,con=con,est,plotit=plotit,grp=grp,nboot=nboot, SEED=SEED, +hoch=TRUE,...) +output<-temp$output +con<-temp$con +} +if(!dif){ +if(pr){ +print("dif=FALSE, so analysis is done on marginal distributions") +if(!BA){ +if(identical(est,onestep))print("With M-estimator or MOM, suggest using BA=TRUE and hoch=TRUE") +if(identical(est,mom))print("With M-estimator or MOM, suggest using BA=TRUE and hoch=TRUE") +}} +if(!is.null(y[1]))x<-cbind(x,y) +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +if(is.matrix(con)){ +if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") +}} +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matl(x) +} +if(is.matrix(x) && is.matrix(con)){ +if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.") +mat<-x +} +if(is.matrix(x))mat<-x +if(!is.na(sum(grp)))mat<-mat[,grp] +mat<-elimna(mat) # Remove rows with missing values. +x<-mat +J<-ncol(mat) +xcen<-x +for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j],...) +Jm<-J-1 +if(sum(con^2)==0){ +d<-(J^2-J)/2 +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +d<-ncol(con) +if(is.na(nboot)){ +if(d<=4)nboot<-1000 +if(d>4)nboot<-5000 +} +n<-nrow(mat) +crit.vec<-alpha/c(1:d) +connum<-ncol(con) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +xbars<-apply(mat,2,est,...) +psidat<-NA +for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) +psihat<-matrix(0,connum,nboot) +psihatcen<-matrix(0,connum,nboot) +bvec<-matrix(NA,ncol=J,nrow=nboot) +bveccen<-matrix(NA,ncol=J,nrow=nboot) +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec[ib,]<-apply(x[data[ib,],],2,est,...) +bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...) +} +# +# Now have an nboot by J matrix of bootstrap values. +# +test<-1 +bias<-NA +for (ic in 1:connum){ +psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) +psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic]) +bias[ic]<-sum((psihatcen[ic,]>0))/nboot-.5 +ptemp<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot +if(BA)test[ic]<-ptemp-.1*bias[ic] +if(!BA)test[ic]<-ptemp +test[ic]<-min(test[ic],1-test[ic]) +test[ic]<-max(test[ic],0) # bias corrected might be less than zero +} +test<-2*test +ncon<-ncol(con) +dvec<-alpha/c(1:ncon) # Assume Hochberg unless specified otherwise +if(SR){ +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvecba<-dvec +dvec[2]<-alpha +}} +if(hoch)dvec<-alpha/c(1:ncon) +dvecba<-dvec +if(plotit && ncol(bvec)==2){ +z<-c(0,0) +one<-c(1,1) +plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") +points(bvec) +totv<-apply(x,2,est,...) +cmat<-var(bvec) +dis<-mahalanobis(bvec,totv,cmat) +temp.dis<-order(dis) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +abline(0,1) +} +temp2<-order(0-test) +ncon<-ncol(con) +zvec<-dvec[1:ncon] +if(BA)zvec<-dvecba[1:ncon] +sigvec<-(test[temp2]>=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) +tmeans<-apply(mat,2,est,...) +psi<-1 +output[temp2,4]<-zvec +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(psihat[ic,]) +#icl<-round(output[ic,4]*nboot/2)+1 # This adjustment causes confusion; it's not based on Hochberg +icl<-round(alpha*nboot/2)+1 +icu<-nboot-(icl-1) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +} +ids=NA +num.sig=nrow(output) +ior=order(output[,3],decreasing=TRUE) +for(j in 1:nrow(output)){ +if(output[ior[j],3]<=output[ior[j],4])break +else num.sig=num.sig-1 +} +list(output=output,con=con,num.sig=num.sig) +} + +linconb<-function(x,con=0,tr=.2,alpha=.05,nboot=599,pr=FALSE,SEED=TRUE,method='holm'){ +# +# Compute a 1-alpha confidence interval for a set of d linear contrasts +# involving trimmed means using the bootstrap-t bootstrap method. +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode. Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# +# Missing values are automatically removed. +# +# con is a J by d matrix containing the contrast coefficents of interest. +# If unspecified, all pairwise comparisons are performed. +# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first two trimmed means is +# equal to the sum of the second two, and (2) the difference between +# the first two is equal to the difference between the trimmed means of +# groups 5 and 6. +# +# The default number of bootstrap samples is nboot=599 +# +# This function uses functions trimparts and trimpartt written for this +# book. +# +# +# +# +if(is.data.frame(x))x=as.matrix(x) +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +for(j in 1:J){ +xx<-x[[j]] +x[[j]]<-xx[!is.na(xx)] # Remove any missing values. +} +Jm<-J-1 +d<-(J^2-J)/2 +if(sum(con^2)==0){ +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +if(nrow(con)!=length(x))stop('The number of groups does not match the number of contrast coefficients.') +bvec<-array(0,c(J,2,nboot)) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print('Taking bootstrap samples. Please wait.') +nsam=matl(lapply(x,length)) +for(j in 1:J){ +paste('Working on group ',j) +xcen<-x[[j]]-mean(x[[j]],tr) +data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row +# contains the bootstrap trimmed means, the second row +# contains the bootstrap squared standard errors. +} +m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means +m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq. se. +boot<-matrix(0,ncol(con),nboot) +for (d in 1:ncol(con)){ +top<-apply(m1,2,trimpartt,con[,d]) +# A vector of length nboot containing psi hat values +consq<-con[,d]^2 +bot<-apply(m2,2,trimpartt,consq) +boot[d,]<-abs(top)/sqrt(bot) +} +testb<-apply(boot,2,max) +ic<-floor((1-alpha)*nboot) +testb<-sort(testb) +psihat<-matrix(0,ncol(con),4) +test<-matrix(0,ncol(con),5) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper')) +dimnames(test)<-list(NULL,c('con.num','test','se','p.value','p.adjusted')) +for (d in 1:ncol(con)){ +test[d,1]<-d +psihat[d,1]<-d +testit<-lincon(x,con[,d],tr,pr=FALSE) +test[d,2]<-testit$test[1,2] +pval<-mean((abs(testit$test[1,2])1){ +if(STAND){ +x=standm(x) +m1=apply(x,1,mean) +v=apply(x,1,sd) +for(j in 1:ncol(x))pts[,j]=(pts[,j]-m1[j])/v[j] +}} +outmat<-matrix(NA,ncol=nrow(x),nrow=nrow(pts)) +for(i in 1:nrow(pts)){ +center<-pts[i,] +if(!MC)blob<-pdis(x,center=center,MM=MM) +if(MC)blob<-pdisMC(x,center=center,MM=MM) +# +# Note: distances already divided by +# interquartile range +# +# Determine which points in m are close to pts +flag2<-(blob < fr) +outmat[i,]<-flag2 +} +# Return matrix, ith row indicates which points +# in x are close to pts[i,] +# +outmat +} + +adtestl<-function(x,y,est=tmean,nboot=100,alpha=.05,fr=NA,SEED=TRUE,...){ +# +# Test the hypothesis that the regression model is additive. +# Use a variation of Stute et al. (1998, JASA, 93, 141-149). +# method, and running interval version of the backfitting +# algorithm +# +if(!is.matrix(x))stop("X values should be stored in a matrix") +if(ncol(x)==1)stop("There should be two or more predictors") +temp<-cbind(x,y) +p<-ncol(x) +p1<-p+1 +temp<-elimna(temp) +x<-temp[,1:p] +x<-as.matrix(x) +y<-temp[,p1] +if(alpha<.05 && nboot<=100)warning("You used alpha<.05 and nboot<=100") +if(is.na(fr)){ +fr<-.8 +if(ncol(x)==2){ +nval<-c(20,30,50,80,150) +fval<-c(0.40,0.36,0.18,0.15,0.09) +if(length(y)<=150)fr<-approx(nval,fval,length(y))$y +if(length(y)>150)fr<-.09 +} +} +if(SEED)set.seed(2) +x<-as.matrix(x) +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +yhat<-adrunl(x,y,plotit=FALSE,fr=fr,pyhat=TRUE) +regres<-y-yhat +print("Taking bootstrap sample, please wait.") +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-sqrt(12)*(data-.5) # standardize the random numbers. +rvalb<-apply(data,1,adtestls1,yhat,regres,mflag,x,fr) +# An n x nboot matrix of R values +rvalb<-rvalb/sqrt(length(y)) +dstatb<-apply(abs(rvalb),2,max) +wstatb<-apply(rvalb^2,2,mean) +dstatb<-sort(dstatb) +wstatb<-sort(wstatb) +# compute test statistic +v<-c(rep(1,length(y))) +rval<-adtestls1(v,yhat,regres,mflag,x,fr) +rval<-rval/sqrt(length(y)) +dstat<-max(abs(rval)) +wstat<-mean(rval^2) +ib<-round(nboot*(1-alpha)) +critd<-dstatb[ib] +critw<-wstatb[ib] +list(dstat=dstat,wstat=wstat,critd=critd,critw=critw) +} + + +adtestls1<-function(vstar,yhat,res,mflag,x,fr){ +ystar<-yhat+res*vstar +bres<-adrunl(x,ystar,fr=fr,pyhat=TRUE,plotit=FALSE) +bres<-ystar-bres +rval<-0 +for (i in 1:nrow(x)){ +rval[i]<-sum(bres[mflag[,i]]) +} +rval +} +adcom<-function(x,y,est=mean,tr=0,nboot=600,alpha=.05,fr=NA, +jv=NA,SEED=TRUE,...){ +# +# Test the hypothesis that component +# jv +# is zero. That is, in a generalized additive model, test +# H_0: f_jv(X_jv) = 0. +# Use a variation of Stute et al. (1998, JASA, 93, 141-149). +# method, and running interval version of the backfitting +# algorithm +# +# if jv=NA, all components are tested. +# +# Current version allows only 0 or 20% trimming +# +x=as.matrix(x) +if(!is.matrix(x))stop("X values should be stored in a matrix") +if(ncol(x)==1)stop("There should be two or more predictors") +temp<-cbind(x,y) +p<-ncol(x) +p1<-p+1 +temp<-elimna(temp) +x<-temp[,1:p] +x<-as.matrix(x) +y<-temp[,p1] +if(is.na(fr)){ +if(tr==.2){ +nval<-c(20,40,60,80,120,160) +fval<-c(1.2,1,.85,.75,.65,.65) +if(length(y)<=160)fr<-approx(nval,fval,length(y))$y +if(length(y)>160)fr<-.65 +} +if(tr==0){ +nval<-c(20,40,60,80,120,160) +fval<-c(.8,.7,.55,.5,.5,.5) +if(length(y)<=160)fr<-approx(nval,fval,length(y))$y +if(length(y)>160)fr<-.6 +} +} +if(is.na(fr))stop("Span can be deteremined only for 0 or .2 trimming") +if(SEED)set.seed(2) +x<-as.matrix(x) +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +if(!is.na(jv))prval<-jv +if(is.na(jv))prval<-c(1:ncol(x)) +c.sum<-matrix(NA,nrow=length(prval),ncol=2) +dimnames(c.sum)<-list(NULL,c("d.stat","p.value")) +for(ip in 1:length(prval)){ +flag<-rep(TRUE,ncol(x)) +flag[prval[ip]]<-FALSE +yhat<-adrun(x[,flag],y,plotit=FALSE,fr=fr,pyhat=TRUE) +regres<-y-yhat +temp<-indt(x[,!flag],regres) +c.sum[ip,1]<-temp$dstat +c.sum[ip,2]<-temp$p.value.d +} +list(results=c.sum) +} + +logadr<-function(x,y,est=mean,iter=10,pyhat=FALSE,plotit=TRUE,fr=.8,xout=FALSE,eout=xout, +outfun=out,theta=50,phi=25,expand=.5,STAND=TRUE,ticktype="simple",scale=FALSE,...){ +# +# additive model based on a variation of Copas' (1983) smooth +# for binary outcomes. +# (Use backfitting algorithm.) +# +m<-elimna(cbind(x,y)) +x<-as.matrix(x) +p<-ncol(x) +p1<-p+1 +y<-m[,p1] +x<-m[,1:p] +x<-as.matrix(x) +if(STAND){ +for (ip in 1:p)x[,ip]<-(x[,ip]-mean(x[,ip]))/sqrt(var(x[,ip])) +} +if(xout){ +keepit<-rep(TRUE,nrow(x)) +flag<-outfun(x,plotit=FALSE)$out.id +keepit[flag]<-FALSE +x<-x[keepit,] +y<-y[keepit] +} +x<-as.matrix(x) +if(p==1)val<-logrsm(x[,1],y,pyhat=TRUE,plotit=plotit,fr=fr,...)$output +if(p>1){ +np<-p+1 +x<-m[,1:p] +y<-m[,np] +fhat<-matrix(NA,ncol=p,nrow=length(y)) +fhat.old<-matrix(NA,ncol=p,nrow=length(y)) +res<-matrix(NA,ncol=np,nrow=length(y)) +dif<-1 +for(i in 1:p) +fhat.old[,i]<-logrsm(x[,i],y,pyhat=TRUE,plotit=FALSE,fr=fr)$output +eval<-NA +for(it in 1:iter){ +for(ip in 1:p){ +res[,ip]<-y +for(ip2 in 1:p){ +if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2] +} +fhat[,ip]=logrsm(x[,ip],y,pyhat=TRUE,plotit=FALSE,fr=fr)$output +} +eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2)))) +if(it > 1){ +itm<-it-1 +dif<-abs(eval[it]-eval[itm]) +} +fhat.old<-fhat +if(dif<.01)break +} +#print(fhat) +val<-apply(fhat,1,sum) +aval<-est(y-val,...) +val<-val+aval +flag=(val<0) +val[flag]=0 +flag=(val>1) +val[flag]=1 +if(plotit && p==2){ +fitr<-val +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fitr<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fitr,theta=theta,phi=phi,expand=expand,xlab="x1",ylab="x2",zlab="", +scale=scale,ticktype=ticktype) +}} +if(!pyhat)val<-"Done" +val +} + +qhomtsub<-function(isub,x,y,qval){ +# +# Perform quantile regression using x[isub] to predict y[isub] +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by other functions when computing +# bootstrap estimates. +# +# regfun is some regression method already stored in R +# It is assumed that regfun$coef contains the intercept and slope +# estimates produced by regfun. The regression methods written for +# this book, plus regression functions in R, have this property. +# +# x is assumed to be a matrix containing values of the predictors. +# +xmat<-matrix(x[isub,],nrow(x),ncol(x)) +temp<-qplotreg(xmat,y[isub],qval=qval,plotit=FALSE) +regboot<-temp[1,2]-temp[2,2] +regboot +} + +qplotreg<-function(x, y,qval=c(.2,.8),q=NULL,plotit=TRUE,xlab="X",ylab="Y",xout=FALSE, +outfun=outpro,pch='*',...){ +# +# Compute the quantile regression line for each of the +# quantiles indicated by qval. +# plotit=TRUE, plot the results. +# +if(!is.null(q))qval=q +xy=elimna(cbind(x,y)) +if(ncol(xy)>2)stop("Only One Predictor Allowed") +x=xy[,1] +y=xy[,2] +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +} +n<-length(qval) +coef<-matrix(NA,ncol=2,nrow=n) +x<-as.matrix(x) +if(ncol(x)>1)stop("This version allows one predictor only.") +if(plotit)plot(x,y,xlab=xlab,ylab=ylab,pch=pch) +for(it in 1:n){ +coef[it,]<-qreg(x,y,qval=qval[it],pr=FALSE)$coef +dimnames(coef)=list(NULL,c("Inter.","Slope")) +if(plotit)abline(coef[it,1],coef[it,2]) +} +coef +} + + +ancmpbpb<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,pts=NA,est=tmean,nboot=NA, +bhop=FALSE,SEED=TRUE,...){ +print("This function has been eliminated. Please use ancmppb instead.") +} + + +qsm<-function(x,y,qval=c(.2,.5,.8),fr=.8,plotit=TRUE,scat=TRUE,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,op=TRUE,LP=TRUE,tr=FALSE, +xlab='X',ylab='Y',pch='.'){ +# +# running interval smoother for the quantiles stored in +# qval +# +# fr controls amount of smoothing +# op=T, use Harrell-Davis estimator +# op=F, use single order statistic +# +# LP=TRUE: The initial smooth is smoothed again using LOESS +# +plotit<-as.logical(plotit) +scat<-as.logical(scat) +m<-cbind(x,y) +if(ncol(m)!=2)stop("Must have exactly one predictor. For more than one, use qhdsm.") +m<-elimna(m) +x<-m[,1] +y<-m[,2] +if(eout && xout)stop("Not allowed to have eout=xout=T") +if(eout){ +flag<-outfun(m,plotit=FALSE)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(x)$keep +m<-m[flag,] +} +x<-m[,1] +y<-m[,2] +rmd<-c(1:length(x)) +if(pyhat)outval<-matrix(NA,ncol=length(qval),nrow=length(x)) +if(scat)plot(x,y,xlab=xlab,ylab=ylab,pch=pch) +if(!scat)plot(x,y,type="n",xlab=xlab,ylab=ylab) +for(it in 1:length(qval)){ +if(!op)for(i in 1:length(x))rmd[i]<-qest(y[near(x,x[i],fr)],q=qval[it]) +if(op)for(i in 1:length(x))rmd[i]<-hd(y[near(x,x[i],fr)],q=qval[it],tr=tr) +if(pyhat)outval[,it]<-rmd +points(x,rmd,type="n") +sx<-sort(x) +xorder<-order(x) +sysm<-rmd[xorder] +if(LP)sysm=lplot(sx,sysm,pyhat=TRUE,plotit=FALSE,pr=FALSE)$yhat.values +lines(sx,sysm) +} +if(pyhat)output<-outval +if(!pyhat)output<-"Done" +list(output=output) +} +locvar<-function(x,y,pyhat=FALSE,pts=x,plotit=TRUE){ +# +# For each x, estimate VAR(y|x) +# with the method used by Bjerve and Doksum +# i.e., use Fan's kernel regression method. +# +yhat<-locreg(x,y,pyhat=TRUE,plotit=FALSE,pts=x) +val<-locreg(x,(y-yhat)^2,pyhat=pyhat,pts=pts,plotit=plotit) +val +} + +smmval<-function(dfvec,iter=10000,alpha=.05,SEED=TRUE){ +# +# Determine the upper 1-alpha quantile of the maximum of +# K independent Student's T random variables. +# dfvec is a vector of length K containing the degrees of freedom +# +# So this distribution is similar to a Studentized maximum modulus distribution but +# the T statistics are not based on an estimate of an assumed common variance. +# +if(SEED)set.seed(1) +vals<-NA +tvals<-NA +J<-length(dfvec) +for(i in 1:iter){ +for(j in 1:J){ +tvals[j]<-rt(1,dfvec[j]) +} +vals[i]<-max(abs(tvals)) +} +vals<-sort(vals) +ival<-round((1-alpha)*iter) +qval<-vals[ival] +qval +} + + +bwmedimcp<-function(J,K,x,JK=J*K,grp=c(1:JK),alpha=.05){ +# +# Multiple comparisons for interactions +# in a split-plot design. +# The analysis is done by taking difference scores +# among all pairs of dependent groups and +# determining which of +# these differences differ across levels of Factor A +# using trimmed means. +# +# For MOM or M-estimators, use spmcpi which uses a bootstrap method +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} + +JK<-J*K +if(JK!=length(x))stop("Something is wrong. Expected ",JK," groups but x contains ", length(x), "groups instead.") +MJ<-(J^2-J)/2 +MK<-(K^2-K)/2 +JMK<-J*MK +MJMK<-MJ*MK +Jm<-J-1 +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +output<-matrix(0,MJMK,7) +dimnames(output)<-list(NULL,c("A","A","B","B","psihat","sig","crit.sig")) +jp<-1-K +kv<-0 +kv2<-0 +test<-NA +for(j in 1:J){ +jp<-jp+K +xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) +for(k in 1:K){ +kv<-kv+1 +xmat[,k]<-x[[kv]] +} +xmat<-elimna(xmat) +for(k in 1:K){ +kv2<-kv2+1 +x[[kv2]]<-xmat[,k] +}} +m<-matrix(c(1:JK),J,K,byrow=TRUE) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +for (ic in 1:ncol(con)){ +output[temp2,7]<-zvec +} +output +} + + + + +bwmedbmcp<-function(J,K,x,JK=J*K,grp=c(1:JK),con=0,alpha=.05,dif=FALSE,pool=FALSE,bop=FALSE,nboot=100,SEED=TRUE){ +# +# All pairwise comparisons among levels of Factor B +# in a split-plot design using trimmed means. +# +# Data are pooled for each level +# of Factor B. +# bop=T, use bootstrap estimates of standard errors. +# FWE controlled with Rom's method +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} +JK<-J*K +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +if(pool){ +data<-list() +m1<-matrix(c(1:JK),J,K,byrow=TRUE) +for(k in 1:K){ +for(j in 1:J){ +flag<-m1[j,k] +if(j==1)temp<-x[[flag]] +if(j>1){ +temp<-c(temp,x[[flag]]) +}} +data[[k]]<-temp +} +print("Group numbers refer to levels of Factor B") +if(!dif)temp<-lincdm(data,con=con,alpha=alpha,nboot=nboot,mop=bop) +if(dif)temp<-qdmcpdif(data,con=con,alpha=alpha) +return(temp) +} +if(!pool){ +mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) +for(j in 1:J){ +data<-list() +ic<-0 +for(k in 1:K){ +ic<-ic+1 +data[[ic]]<-x[[mat[j,k]]] +} +print(paste("For level ", j, " of Factor A:")) +if(!dif)temp<-lincdm(data,con=con,alpha=alpha,nboot=nboot,mop=bop) +if(dif)temp<-qdmcpdif(data,con=con,alpha=alpha) +print(temp$test) +print(temp$psihat) +}} +} + +gamplot<-function(x,y,sop=TRUE,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,plotit=TRUE, +xlab="X",ylab="",zlab="",theta=50,phi=25,expand=.5,scale=TRUE,ticktype="simple"){ +# +# Plot regression surface using generalized additive model +# +# sop=F, use usual linear model y~x1+x2... +# sop=T, use splines +# +library(akima) +library(mgcv) +x<-as.matrix(x) +np<-ncol(x) +np1<-np+1 +if(ncol(x)>4)stop("x should have at most four columns of data") +m<-elimna(cbind(x,y)) +x<-m[,1:np] +x<-as.matrix(x) +y<-m[,np1] +if(xout && eout)stop("Can't have xout=eout=T") +if(eout){ +flag<-outfun(m)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +} +x<-m[,1:np] +x<-as.matrix(x) +y<-m[,np1] +if(!sop){ +if(ncol(x)==1)fitr<-fitted(gam(y~x[,1])) +if(ncol(x)==2)fitr<-fitted(gam(y~x[,1]+x[,2])) +if(ncol(x)==3)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3])) +if(ncol(x)==4)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3]+x[,4])) +} +if(sop){ +if(ncol(x)==1)fitr<-fitted(gam(y~s(x[,1]))) +if(ncol(x)==2)fitr<-fitted(gam(y~s(x[,1])+s(x[,2]))) +if(ncol(x)==3)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3]))) +if(ncol(x)==4)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3])+s(x[,4]))) +} +last<-fitr +if(plotit){ +if(ncol(x)==1){ +plot(x,fitr,xlab=xlab,ylab=ylab) +} +if(ncol(x)==2){ +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fitr<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fitr,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab, +scale=scale,ticktype=ticktype) +} +} +if(!pyhat)last <- "Done" +last +} + +rgvar<-function(x,est=covmcd,...){ +# +# compute a robust generalized variance +# +# choices for est are: +# var +# covmcd +# covmve +# skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method) +# op=2 (projection method for outliers) +# covroc (S+ only as of Dec, 2005) +# Rocke's measure of scatter, this requires that the command +# library(robust) has been executed. +# +library(MASS) +val<-prod(eigen(est(x,...))$values) +val +} +rgvarseb<-function(x,nboot=100,est=skipcov,SEED=TRUE,...){ +# +n<-nrow(x) +val<-NA +for(i in 1:nboot){ +data<-sample(n,n,replace=TRUE) +val[i]<-rgvar(x[data,],est=est,...) +} +se<-sqrt(var(val)) +se +} +covmve<-function(x){ +library(MASS) +oldSeed <- .Random.seed +val<-cov.mve(x) +assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) +list(center=val$center,cov=val$cov) +} + +mvecov<-function(x){ +library(MASS) +val<-cov.mve(x) +val$cov +} + + +rgvar2g<-function(x,y,nboot=100,est=covmcd,alpha=.05,cop=3,op=2,SEED=TRUE,...){ +# +# Two independent groups. +# Test hypothesis of equal generalized variances. +# +# Choices for est include: +# var +# covmcd +# covmve +# skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method) +# op=2 (projection method for outliers) +# covroc Rocke's measure of scatter, this requires that the command +# library(robust) has been executed. +# +if(SEED)set.seed(2) +se1<-rgvarseb(x,nboot=nboot,est=est,SEED=SEED,...) +se2<-rgvarseb(y,nboot=nboot,est=est,SEED=SEED,...) +dif<-rgvar(x,est=est,...)-rgvar(y,est=est,...) +test.stat<-dif/sqrt(se1^2+se2^2) +test.stat +} + +covmcd<-function(x,nsamp="sample"){ +# +# nsamp="best" is the default used by R, +# meaning that the number of samples is chosen so that +# exhaustive enumeration is done up to 5000 samples +# nsamp="sample" the number of samples +# is min(5*p, 3000) +# +library(MASS) +oldSeed <- .Random.seed +val<-cov.mcd(x,nsamp=nsamp) +assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) +list(center=val$center,cov=val$cov) +} + + +mcdcov<-function(x,nsamp="sample"){ +# +# nsamp="best" is the default used by R, +# meaning that the number of samples is chosen so that +# exhaustive enumeration is done up to 5000 samples +# nsamp="sample" the number of samples +# is min(5*p, 3000) +# +#library(lqs) +library(MASS) +oldSeed <- .Random.seed +val<-cov.mcd(x,nsamp=nsamp) + assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) +val$cov +} + +ancdes<-function(x,depfun=fdepth,DH=FALSE,FRAC=.5,...){ +# +# Choose points for design of an ANCOVA +# x is the n by p matrix m. +# +# FRAC some value between 0 and 1. +# +# FRAC is the fraction of the least deep points that will not be returned when +# DH=TRUE +# That is, return 1-FRAC deepest points. +# For example, FRAC=.2 means that the deepest 80% of the +# data will be returned. +# +# DH=F, return deepest point and those points on the +# .5 depth contour +# +if(is.data.frame(x))x=as.matrix(x) +if(!is.matrix(x))stop("x must be a matrix or a data frame") +temp<-depfun(x,plotit=FALSE,...) +temp2<-order(temp) +if(!DH){ +val<-matrix(x[temp2[length(temp)],],ncol=ncol(x)) +nmid<-round(length(temp)/2) +id2<-(temp[temp2[nmid]]==temp) +val2<-matrix(x[id2,],ncol=ncol(x)) +if(!is.matrix(val2))val2<-t(as.matrix(val2)) +val<-rbind(val,val2) +} +if(DH){ +bot=round(length(temp)*FRAC) +val=matrix(x[temp2[bot:length(temp)],],ncol=ncol(x)) +} +val=elimna(val) +val +} + + +stacklist<-function(x){ +# +# Assumes x has list mode with each entry a +# matrix having p columns. +# +# Goal: stack the data into a matrix having p columns. +# +p<-ncol(x[[1]]) +xx<-as.matrix(x[[1]]) +for(j in 2:length(x)){ +temp<-as.matrix(x[[j]]) +xx<-rbind(xx,temp) +} +xx +} + +smvar<-function(x,y,fr=.6,xout=TRUE,eout=FALSE,xlab="X",ylab="VAR(Y|X)",pyhat=FALSE,plotit=TRUE,nboot=40, +RNA=FALSE,SEED=TRUE){ +# +# Estimate VAR(Y|X) using bagged version of running interval method +# +# xout=T eliminates all points for which x is an outlier. +# eout=F eliminates all points for which (x,y) is an outlier. +# +# pyhat=T will return estimate for each x. +# +# RNA=T removes missing values when applying smooth +# with RNA=F, might get NA for some pyhat values. +# +# plotit=TRUE, scatterplot of points x versus square of +# predicted y minus y +# stemming from a smooth. Then plots a line indicating +# var(y|x) using bagged smooth +# +temp <- cbind(x, y) +temp <- elimna(temp) +x <- temp[, 1] +y <- temp[, 2] +yhat<-lplot(x, y, pyhat = TRUE, plotit = FALSE)$yhat.values +yvar<-(y-yhat)^2 +estvar<-runmbo(x,y,est=var,pyhat=TRUE,fr=fr,plotit=FALSE,RNA=RNA,nboot=nboot) +if(plotit){ +plot(c(x,x),c(yvar,estvar),type="n",xlab=xlab,ylab=ylab) +points(x,yvar) +sx<-sort(x) +xorder<-order(x) +sysm<-estvar[xorder] +lines(sx,sysm) +} +output <- "Done" +if(pyhat)output <- estvar +output +} +locvarsm<-function(x,y,pyhat=FALSE,pts=x,plotit=TRUE,nboot=40,RNA=TRUE,xlab="X", +ylab="VAR(Y|X)",op=2,xout=TRUE,eout=FALSE,pr=TRUE,fr=.6,scat=TRUE,outfun=out,SEED=TRUE){ +# +# For each x, estimate VAR(y|x) using bootstrap bagging. +# with +# op=1 uses Fan's kernel method plus bootstrap bagging. +# op=2 uses running interval smoother plus bootstrap bagging +# +# xout=T eliminates points where there are outliers among x values +# this option applies only when using op=2 and when using +# running interval smoother. +# eout=T eliminates outliers among cloud of all data. +# +if(SEED)set.seed(2) +temp<-cbind(x,y) +temp<-elimna(temp) +x<-temp[,1] +y<-temp[,2] +if(op==2){ +if(pr){ +print("Running interval method plus bagging has been chosen") +print("op=1 will use Fan's method plus bagging") +}} +if(op==1){ +if(pr){ +print("Fan's method plus bagging has been chosen (cf. Bjerve and Doksum)") +print("op=2 will use running interval plus bagging") +} +mat <- matrix(NA, nrow = nboot, ncol = nrow(temp)) +for(it in 1:nboot) { +idat <- sample(c(1:length(y)), replace = T) +xx <- temp[idat, 1] +yy <- temp[idat, 2] +mat[it, ] <- locvar(xx,yy,pts=x,pyhat=TRUE,plotit=FALSE) +} +rmd<-apply(mat,2,mean) + if(plotit) { +plot(c(x, x), c(y, rmd), type = "n", xlab = xlab, ylab= ylab) +sx <- sort(x) +xorder <- order(x) +sysm <- rmd[xorder] +lines(sx, sysm) +} + +output<-"Done" +if(pyhat)output <- rmd +} +if(op==2){ +output<-runmbo(x,y,fr=fr,est=var,xlab=xlab,ylab=ylab,pyhat=pyhat,eout=eout, +xout=xout,RNA=RNA,plotit=plotit,scat=scat,nboot=nboot,outfun=outfun,SEED=SEED) +} +output +} + +mcp2atm<-function(J,K,x,tr=.2,alpha=.05,grp=NA,op=FALSE,pr=TRUE){ +# +# Test all linear contrasts associated with +# main effects for Factor A and B and all interactions based on trimmed means +# By default, +# tr=.2, meaning 20% trimming is used. +# +# bbmcpEP has an option for pooling over the levels of the factors. +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # + JK <- J * K + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp[1])) { + yy <- x + x<-list() + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") + for(j in 1:JK) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] # Remove missing values + } + # + + if(JK != length(x)) + warning("The number of groups does not match the number of contrast coefficients.") +for(j in 1:JK){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} + # Create the three contrast matrices +temp<-con2way(J,K) +conA<-temp$conA +conB<-temp$conB +conAB<-temp$conAB +if(!op){ +Factor.A<-lincon(x,con=conA,tr=tr,alpha=alpha,pr=pr) +Factor.B<-lincon(x,con=conB,tr=tr,alpha=alpha,pr=FALSE) +Factor.AB<-lincon(x,con=conAB,tr=tr,alpha=alpha,pr=FALSE) +} +All.Tests<-NA +if(op){ +Factor.A<-NA +Factor.B<-NA +Factor.AB<-NA +con<-cbind(conA,conB,conAB) +All.Tests<-lincon(x,con=con,tr=tr,alpha=alpha) +} +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,All.Tests=All.Tests,conA=conA,conB=conB,conAB=conAB) +} + +bbmcp=mcp2atm + +mdifloc<-function(x,y,est=tukmed,...){ +# +# Compute multivariate measure of location associated +# with the distribution of x-y +# +# By default, use Tukey's median. +# +x<-as.matrix(x) +y<-as.matrix(y) +FLAG<-F +if(ncol(x)!=ncol(y))stop("x and y should have the same number of columns") +if(ncol(x)==1 && ncol(y)==1)FLAG<-T +if(FLAG)val<-loc2dif(x,y,est=est,...) +if(!FLAG){ +J<-(ncol(x)^2-ncol(x))/2 +mat<-matrix(NA,ncol=ncol(x),nrow=nrow(x)*nrow(y)) +for(j in 1:ncol(x))mat[,j]<-as.vector(outer(x[,j], y[,j], FUN = "-")) +val<-est(mat,...) +} +val +} +mdiflcr<-function(m1,m2,tr=.5,nullv=rep(0,ncol(m1)),plotit=TRUE, +SEED=TRUE,pop=1,fr=.8,nboot=600){ +# +# For two independent groups, let D=X-Y. +# Let theta_D be median of marginal distributions +# Goal: Test theta_D=0 +# +# This is a multivariate analog of Wilcoxon-Mann-Whitney method +# Only alpha=.05 can be used. +# +# When plotting: +# pop=1 Use scatterplot +# pop=2 Use expected frequency curve. +# pop=3 Use adaptive kernel density +# +if(!is.matrix(m1))stop("m1 is not a matrix") +if(!is.matrix(m2))stop("m2 is not a matrix") +if(ncol(m1)!=ncol(m2))stop("number of columns for m1 and m2 are not equal") +n1<-nrow(m1) +n2<-nrow(m2) +if(SEED)set.seed(2) +data1 <- matrix(sample(n1, size = n1 * nboot, replace = T), nrow = nboot) +data2 <- matrix(sample(n2, size = n2 * nboot, replace = T), nrow = nboot) +bcon <- matrix(NA, ncol = ncol(m1), nrow = nboot) +for(j in 1:nboot)bcon[j,]<-mdifloc(m1[data1[j,],],m2[data2[j,],],est=lloc,tr=tr) +tvec<-mdifloc(m1,m2,est=lloc,tr=tr) +tempcen <- apply(bcon, 1, mean) +smat <- var(bcon - tempcen + tvec) +temp <- bcon - tempcen + tvec +bcon <- rbind(bcon, nullv) +dv <- mahalanobis(bcon, tvec, smat) +bplus <- nboot + 1 +sig.level <- 1 - sum(dv[bplus] >= dv[1:nboot])/nboot +if(plotit && ncol(m1)==2){ +if(pop==2)rdplot(mdif,fr=fr) +if(pop==1){ +plot(mdif[,1],mdif[,2],xlab="VAR 1",ylab="VAR 2",type="n") +points(mdif[,1],mdif[,2],pch=".") +points(center[1],center[2],pch="o") +points(0,0,pch="+") +} +if(pop==3)akerdmul(mdif,fr=fr) +} +list(p.value=sig.level,center=tvec) +} + +mwmw<-function(m1,m2,cop=5,pr=TRUE,plotit=TRUE,pop=1,fr=.8,op=1,dop=1){ +# +# Compute measure of effect size, p, +# a multivariate analog of Wilcoxon-Mann-Whitney p +# +# When plotting: +# pop=1 Use scatterplot +# pop=2 Use expected frequency curve. +# pop=3 Use adaptive kernel density +# +# dop=1, use method A1 approximation of halfspace depth +# dop=2, use method A2 approximation of halfspace depth +# +# cop determines how center of data is determined when +# approximating halfspace depth +# cop=1, Halfspace median +# cop=2, MCD +# cop=3, marginal medians +# cop=4, MVE +# cop=5, skipped mean +# +library(akima) +if(is.null(dim(m1)))stop("m1 is not a matrix or data frame") +if(is.null(dim(m2)))stop("m2 is not a matrix or data frame") +if(ncol(m1)!=ncol(m2))stop("number of columns for m1 and m2 are not equal") +if(ncol(m1)==1)stop("Use R function cid or bmp") +nn<-min(c(nrow(m1),nrow(m2))) +mdif<-matrix(as.vector(outer(m1[,1],m2[,1],"-")),ncol=1) +for(j in 2:ncol(m1)){ +mdif<-cbind(mdif,matrix(as.vector(outer(m1[,j],m2[,j],"-")),ncol=1)) +} +if(op==1){ +if(ncol(m1)==2)temp2<-depth2(rbind(mdif,c(rep(0,ncol(m1))))) +#if(ncol(m1)==3)temp2<-depth3(rbind(mdif,c(rep(0,ncol(m1))))) +if(ncol(m1)>2){ +if(cop==1)center<-dmean(mdif,tr=.5,dop=dop) +if(cop==2)center<-cov.mcd(mdif)$center +if(cop==3)center<-apply(mdif,2,median) +if(cop==4)center<-cov.mve(mdif)$center +if(cop==5)center<-smean(mdif) +temp2<-fdepth(rbind(mdif,c(rep(0,ncol(m1))))) +}} +if(op==2){ +temp2<-pdis(rbind(mdif,c(rep(0,ncol(m1))))) +temp2<-1/(temp2+1) +} +center<-dmean(mdif,tr=.5,dop=dop) +phat<-temp2[nrow(mdif)+1]/max(temp2) +# phat is relative depth of zero vector +# Determine critical value +crit<-NA +alpha<-c(.1,.05,.025,.01) +crit[1]<-1-1.6338/sqrt(nn) +crit[2]<-1-1.8556/sqrt(nn) +crit[3]<-1-2.0215/sqrt(nn) +crit[4]<-1-2.1668/sqrt(nn) +if(pr){ +print("For alpha=.1,.05,.025,.01, the correspoding critical values are") +print(crit) +print("Reject if phat is less than or equal to the critical value") +} +if(plotit && ncol(m1)==2){ +if(pop==2)rdplot(mdif,fr=fr) +if(pop==1){ +plot(mdif[,1],mdif[,2],xlab="VAR 1",ylab="VAR 2",type="n") +points(mdif[,1],mdif[,2],pch=".") +points(center[1],center[2],pch="o") +points(0,0,pch="+") +} +if(pop==3)akerdmul(mdif,fr=fr) +} +list(phat=phat,center=center,crit.val=crit) +} + +qreg<-function(x, y,qval=.5, q=NULL,pr=FALSE,xout=FALSE, outfun=outpro,plotit=FALSE,xlab="X",ylab="Y",op=1,v2=TRUE,method='br',WARN=FALSE,...) +{ +# +# Compute the quantile regression line. That is, the goal is to +# determine the qth (qval) quantile of Y given X using the +# the Koenker-Bassett approach. +# +# v2=T, uses the function rq in the R library quantreg +# v2=F, uses an older and slower version +# op=1 has to do with the old version. +# +# method=scad, see Wu and Liu (2009). VARIABLE SELECTION IN QUANTILE REGRESSION, Statistica Sinica 19, 801-817. +# +if(!is.null(q))qval=q +x<-as.matrix(x) +X<-cbind(x,y) +X<-elimna(X) +np<-ncol(X) +p<-np-1 +x<-X[,1:p] +x<-as.matrix(x) +y<-X[,np] +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(!v2){ +temp<-ltareg(x,y,0,op=op) +if(qval==.5){ +coef<-temp$coef +res<-temp$res +} +if(qval!=.5){ +START<-temp$coef +coef<-nelderv2(X,np,FN=qreg.sub,START=START,qval=qval) +}} +if(v2){ +library(quantreg) +x<-as.matrix(x) +if(!WARN)options(warn=-1) +temp<-rq(y~x,tau=qval,method=method) +coef<-temp[1]$coefficients +if(!WARN)options(warn=0) +} +if(ncol(x)==1){ +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab) +abline(coef) +}} +res <- y - x%*%coef[2:np] - coef[1] +list(coef = coef, residuals = res) +} + + + +qindbt.sub<-function(isub,x,y,qval){ +# +# Perform regression using x[isub] to predict y[isub] +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by other functions when computing +# bootstrap estimates. +# +# regfun is some regression method already stored in R +# It is assumed that regfun$coef contains the intercept and slope +# estimates produced by regfun. The regression methods written for +# this book, plus regression functions in R, have this property. +# +# x is assumed to be a matrix containing values of the predictors. +# +xmat<-matrix(x[isub,],nrow(x),ncol(x)) +regboot<-NA +for(i in 1:length(qval)){ +regboot[i]<-qreg(xmat,y[isub],qval[i])$coef[2] +} +regboot +} + + + + + +runmq<-function(x,y,HD=FALSE,qval=c(.2,.5,.8),xlab="X",ylab="Y",fr=1, +sm=FALSE,nboot=40,SEED=TRUE,eout=FALSE,xout=FALSE,...){ +# +# Plot of running interval smoother based on specified quantiles in +# qval +# +# fr controls amount of smoothing +# tr is the amount of trimming +# +# Missing values are automatically removed. +# +rmd1<-NA +xx<-cbind(x,y) +p<-ncol(xx)-1 +xx<-elimna(xx) +x<-xx[,1:p] +y<-xx[,ncol(xx)] +plot(x,y,xlab=xlab,ylab=ylab) +sx1<-sort(x) +xorder1<-order(x) +for(it in 1:length(qval)){ +if(!sm){ +if(!HD)temp<-rungen(x,y,est=qest,fr=fr,pyhat=TRUE,plotit=FALSE,q=qval[it]) +if(HD)temp<-rungen(x,y,est=hd,fr=fr,pyhat=TRUE,plotit=FALSE,q=qval[it]) +rmd1<-temp[1]$output +sysm1<-rmd1[xorder1] +lines(sx1,sysm1) +} +if(sm){ +if(!HD)temp<-runmbo(x,y,est=qest,fr=fr,pyhat=TRUE,plotit=FALSE,SEED=SEED, +nboot=nboot,eout=FALSE,xout=FALSE,q=qval[it]) +if(HD)temp<-runmbo(x,y,est=hd,fr=fr,pyhat=TRUE,plotit=FALSE,SEED=SEED, +nboot=nboot,eout=FALSE,xout=FALSE,q=qval[it]) +rmd1<-temp +sysm1<-rmd1[xorder1] +lines(sx1,sysm1) +} +}} + + +ritest<-function(x,y,adfun=adrun,plotfun=lplot,eout=FALSE,xout=TRUE,plotit=TRUE,flag=3, +nboot=500,alpha=.05,tr=.2,...){ +# +# There are two methods for testing for regression interactions +# using robust smooths. +# The first, performed by this function, fits an additive model +# and test the hypothesis that the residuals, given x, is a +# horizontal plane. +# +# The second, which is done by function adtest, tests the hypothesis +# that a generalized additive model fits the data. +# +# Plot used to investigate regression interaction +# (the extent a generalized additive model does not fit data). +# Compute additive fit, plot residuals +# versus x, an n by 2 matrix. +# +if(!is.matrix(x))stop(" x must be a matrix") +if(ncol(x)!=2)stop(" x must have two columns only") +yhat<-adfun(x,y,pyhat=TRUE,eout=eout,xout=xout,plotit=FALSE) +res<-y-yhat +output<-indt(x,res,flag=flag,nboot=nboot) +if(plotit)plotfun(x,y-yhat,eout=eout,xout=xout,expand = 0.5,scale=FALSE,xlab="X", +ylab="Y",zlab="",theta=50,phi=25,...) +output +} + +gvar2g<-function(x,y,nboot=100,DF=TRUE,eop=1,est=skipcov, +alpha=.05,cop=3,op=1,MM=FALSE,SEED=TRUE,pr=FALSE,fast=FALSE,...){ +# +# Two independent groups. +# Test hypothesis of equal generalized variances. +# +# DF=T, means skipcov with MM=F is used. +# +# That is, W-estimator based on a projection outlier detection method +# and Carling's method applied to projections. +# if equal sample sizes, adjusted critical value is used where appopriate +# +# DF=F +# no adjusted critical value is used and any robust measure of +# scatter can be used. +# +# Choices for est include: +# var +# covmcd +# covmve +# skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method) +# op=2 (projection method for outliers) +# covroc Rocke's measure of scatter, +# +# op, cop and eop, see skipcov +# adjusted critical level should be used with +# skipcov and alpha=.05 only. +# fast=T, will use skipcov.for if it is available. +# +# Function returns ratio of first estimate divided by second estimate +# +if(SEED)set.seed(2) +#if(!is.matrix(x))stop("x should be a matrix with ncol>1") +if(is.null(dim(x)))stop("x should be a matrix or data frame with ncol>1") +if(is.null(dim(y)))stop("y should be a matrix or data frame with ncol>1") +#if(!is.matrix(y))stop("y should be a matrix with ncol>1") +if(ncol(x)==1 || ncol(y)==1)stop("Only multivariate data are allowed") +n1<-nrow(x) +n2<-nrow(y) +adalpha<-NA +if(DF){ +if(n1==n2 && alpha==.05){ +p1<-ncol(x) +if(p1==2){ +if(n1>=20)adalpha<-1.36/n1+.05 +} +if(p1==3){ +if(n1>=20)adalpha<-1.44/n+.05 +} +if(p1==4){ +if(n1>=40)adalpha<-2.47/n1+.05 +} +if(p1==5){ +if(n1>=40)adalpha<-3.43/n+.05 +} +if(p1==6){ +if(n1>=60)adalpha<-4.01/n1+.05 +}}} +val<-NA +for(j in 1:nboot) { + data1 <- sample(n1, size = n1, replace = T) + data2 <- sample(n2, size = n2, replace = T) +if(!DF){ +val[j]<-rgvar(as.matrix(x[data1,]),est=est,...)- +rgvar(as.matrix(y[data2,]),est=est,...) +} +if(DF){val[j]<- +if(!fast){ +rgvar(as.matrix(x[data1,]),est=skipcov,op=op,outpro.cop=cop,MM=MM,...)- +rgvar(as.matrix(y[data2,]),est=skipcov,op=op,outpro.cop=cop,MM=MM,...) +} +if(fast){ +rgvar(as.matrix(x[data1,]),est=skipcov.for,op=op,outpro.cop=cop,MM=MM,...)- +rgvar(as.matrix(y[data2,]),est=skipcov.for,op=op,outpro.cop=cop,MM=MM,...) +} +if(pr)print(c(j,val[j])) +}} +p.value<-sum(val<0)/nboot +p.value<-2*min(p.value,1-p.value) +est1=rgvar(x,est=est) +est2=rgvar(y,est=est) +list(p.value=p.value,adjusted.crit.level=adalpha,ratio.of.estimates=est1/est2,n1=n1,n2=n2) +} + +grit<-function(x,y,itest=1,sm.fun=rplot,nboot=500,alpha=.05,SEED=TRUE, +fr=1,plot.fun=rplot,plotit=TRUE,...){ +# +# Fit a running interval smoother using projection distances +# excluding the predictor variable itest +# itest=1 by default, meaning that the goal is to test +# the hypothesis that the first variable does not contribute +# to the regression model +# +# Method fits a smooth using x_1, ..., x_p, excluding variabe itest +# Then x_itest and the resulting residuals are passed to indt +# Alternative choices for smooth include +# sm.fun=lplot, and if p>2, runpd +# +if(!is.matrix(x))stop("Should have two or more predictors stored in a matrix") +p<-ncol(x) +pp<-p+1 +x<-elimna(cbind(x,y)) +y<-x[,pp] +x<-x[,1:p] +flag<-rep(TRUE,ncol(x)) +flag[itest]<-FALSE +temp<-sm.fun(x[,flag],y,plotit=FALSE,pyhat=TRUE,fr=fr) +res<-y-temp +test.it<-indt(x[,itest],res) +if(plotit)plot.fun(x[,itest],res,...) +test.it +} +stackit<-function(x,jval){ +# +# Take a matrix having p columns and convert +# it to a matrix having jval columns and np/jval rows +# So take first jval columns, and rbind this with +# next jval columns, etc. +# +x<-as.matrix(x) +chkit<-ncol(x)%%jval +if(chkit!=0)stop("ncol(x) is not a multiple of jval") +xval<-x[,1:jval] +xval<-as.matrix(xval) +iloop<-ncol(x)/jval-1 +il<-1 +iu<-jval +for(i in 1:iloop){ +il<-il+jval +iu<-iu+jval +temp<-x[,il:iu] +temp<-as.matrix(temp) +xval<-rbind(xval,temp) +} +xval +} +ancmg<-function(x,y,pool=TRUE,jcen=1,fr=1,depfun=fdepth,nmin=8,op=3,tr=.2,pts=NULL, +SEED=TRUE,pr=TRUE,cop=3,con=0,nboot=NA,alpha=.05,bhop=FALSE){ +# +# ANCOVA +# for two or more groups based on trimmed means or medians +# Two or more covariates is assumed. +# +# op=1 use omnibus test for trimmed means, with trimming given by tr +# op=2 use omnibus test for medians. +# (Not recommended when there are tied values, use op=4) +# op=3 multiple comparisons using trimming and percentile bootstrap. +# This method seems best for general use. +# op=4 multiple comparisons using medians and percentile bootstrap +# +# y is matrix with J columns, so have J groups. +# or y can have list mode with length J +# +# x is a matrix with Jp columns, so first p columns +# correspond to the p covariates in the first group, etc. +# Or, +# x can have list mode with length J and each component +# being a matrix with p columns. +# So if covariates for group 1 are in the matrix m1 +# x[[1]]<-m1 will store them in x, x having list mode +# +# nmin is the minimum sample size allowed for any group +# when testing hypotheses. +# If a design point results in a sample size ncol(x))stop("jcen has an invalid value") +xcen<-x[,js:jcenp] +} +if(is.list(x))xcen<-x[[jcen]] +if(pool){ +if(is.matrix(x))xval<-stackit(x,pval) +if(is.list(x))xval<-stacklist(x) +mval<-cov.mve(xval) +if(is.null(pts))pts<-ancdes(xval,depfun=depfun,cop=cop) +} +if(!pool){ +if(is.null(pts))pts<-ancdes(xcen,depfun=depfun,cop=cop) +mval<-cov.mve(xcen) +} +npts=1 +if(is.matrix(pts))npts=nrow(pts) +nval<-matrix(NA,ncol=J,nrow=npts) +icl<-0-pval+1 +icu<-0 +for(j in 1:J){ +icl<-icl+pval +icu<-icu+pval +for(i in 1:nrow(pts)){ +if(is.matrix(x) && is.matrix(y)){ +nval[i,j]<-length(y[near3d(x[,icl:icu],pts[i,],fr,mval),j]) +} +if(is.matrix(x) && is.list(y)){ +tempy<-y[[j]] +nval[i,j]<-length(tempy[near3d(x[,icl:icu],pts[i,],fr,mval)]) +} +if(is.list(x) && is.matrix(y)){ +xm<-as.matrix(x[[j]]) +nval[i,j]<-length(y[near3d(xm,pts[i,],fr,mval),j]) +} +if(is.list(x) && is.list(y)){ +tempy<-y[[j]] +xm<-as.matrix(x[[j]]) +nval[i,j]<-length(tempy[near3d(xm,pts[i,],fr,mval)]) +} +# +}} +flag<-rep(TRUE,nrow(pts)) +for(i in 1:npts){ +if(min(nval[i,])=nmin && sum(flagr)>=nmin){ +yl<-est(y[flagl],...) +yr<-est(y[flagr],...) +xl<-est(x[flagl],...) +xr<-est(x[flagr],...) +vals[i]<-(yr-yl)/(xr-xl) +}} +if(plotit){ +plot(c(x,x[1],x[2]),c(vals,-5,5),xlab=xlab,ylab=ylab) +xord<-order(x) +lines(x[xord],vals[xord]) +} +vals +} + + +rslopesm<-function(x,y,fr=1,est=tmean,nmin=10,pts=x,plotit=FALSE,xlab="X", +ylab="Y",SEED=TRUE,nboot=40,xout=FALSE,RNA=TRUE,atr=.2,scat=TRUE,pyhat=TRUE,...){ +# +# For a regression line predicting Y given X +# Estimate slope at points in pts with bagging +# followed by a smooth. +# +# pyhat=T, returns estimated slopes corresponding to the sorted +# x values. +# fr controls amount of smoothing +# atr controls the amount of trimming. +# +# OUTPUT: by default, the estimated slopes at +# X_1<=X_2<=...<=X_n +# That is, for the x values written in ascending order, the +# slope is estimated for each value. If the slope is not considered +# estimable, the estimate is set to NA. +# +# pts is used if the goal is to estimate the slope for some +# other collection of points. +# +# nmin controls how many points close to x are required when +# deciding that the slope is estimable. +# plotit=TRUE will plot the estimates. +# +# The plotted points are the estimates using rslope and +# the solid line gives the estimated values reported by this function +# +# Missing values are automatically removed. +# +if(SEED) set.seed(2) +temp<-cbind(x,y) +if(ncol(temp)!=2)stop("One predictor only is allowed") +temp<-elimna(temp) # Eliminate any rows with missing values +if(xout) { + flag <- outfun(temp[, 1], plotit = FALSE)$keep + temp <- temp[flag, ] +x<-temp[,1] +y<-temp[,2] +} +flag<-order(x) +x<-x[flag] +y<-y[flag] +mat<-matrix(NA,nrow=nboot,ncol=length(pts)) +vals<-NA + for(it in 1:nboot) { + idat <- sample(c(1:length(y)), replace = T) + xx <- temp[idat, 1] + yy <- temp[idat, 2] +# mat[it, ] <- runhat(xx, yy, pts = x, est = est, fr = fr, ...) +mat[it,]<-rslope(xx,yy,fr=fr,est=est,nmin=nmin,pts=x,plotit=FALSE) + } +rmd<-apply(mat,2,mean,na.rm=RNA,tr=atr) +flag<-is.na(rmd) +rmdsm<-lplot(x,rmd,pyhat=TRUE,plotit=plotit) +output<-"Done" +if(pyhat){ +temp<-rep(NA,length(x)) +temp[!flag]<-rmdsm$yhat.values +output<-temp +} +output +} + +m1way<-function(x,est=hd,nboot=599,SEED=TRUE,...){ +# +# Test the hypothesis that J measures of location are equal +# using the percentile bootstrap method. +# By default, medians are compared using 599 bootstrap samples. +# and the Harrell-Davis Estimator. To use the usual sample median, set +# est=median +# +# The data are assumed to be stored in x in list mode. Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or a matrix.") +J<-length(x) +nval<-vector("numeric",length(x)) +gest<-vector("numeric",length(x)) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +bvec<-matrix(0,J,nboot) +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +print(paste("Working on group ",j)) +nval[j]<-length(x[[j]]) +gest[j]<-est(x[[j]]) +xcen<-x[[j]]-est(x[[j]],...) +data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # A J by nboot matrix +# containing the bootstrap values of est. +} +teststat<-wsumsq(gest,nval) +testb<-apply(bvec,2,wsumsq,nval) +p.value<-1 - sum(teststat >= testb)/nboot +teststat<-wsumsq(gest,nval) +list(teststat=teststat,p.value=p.value) +} + +oancpb<-function(x1,y1,x2,y2,est=tmean,tr=.2,pts=NA,fr1=1,fr2=1,nboot=600, +alpha=.05,plotit=TRUE,SEED=TRUE,PRO=FALSE,...){ +# +# Compare two independent groups using an ancova method +# with a percentile bootstrap combined with a running interval +# smooth. +# +# CURRENTLY SEEMS THAT THE R FUNCTION ancGLOB is better. +# +# This function performs an omnibus test using data corresponding +# to K design points specified by the argument pts. If +# pts=NA, K=5 points are chosen for you (see Introduction to Robust +# Estimation and Hypothesis Testing.) +# Null hypothesis is that conditional distribution of Y, given X for first +# group, minus the conditional distribution of Y, given X for second +# group is equal to zero. +# The strategy is to choose K specific X values +# and then test the hypothesis that all K differences are zero. +# +# If you want to choose specific X values, Use the argument +# pts +# Example: pts=c(1,3,5) will use X=1, 3 and 5. +# +# For multiple comparisons using these J points, use ancpb +# +# Assume data are in x1 y1 x2 and y2 +# +# PRO=F, means Mahalanobis distance is used. +# PRO=T, projection distance is used. +# +# fr1 and fr2 are the spans used to fit a smooth to the data. +# +stop('USE ancGLOB') +# +# +gv1<-vector("list") +if(is.na(pts[1])){ +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +for (i in 1:5){ +j<-i+5 +temp1<-y1[near(x1,x1[isub[i]],fr1)] +temp2<-y2[near(x2,x1[isub[i]],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +gv1[[i]]<-temp1 +gv1[[j]]<-temp2 +} +# +loc<-NA +if(SEED)set.seed(2) +bvec<-matrix(NA,nrow=nboot,ncol=5) +for(j in 1:5){ +k<-j+5 +loc[j]<-est(gv1[[j]])-est(gv1[[k]]) +xx<-matrix(sample(gv1[[j]],size=length(gv1[[j]])*nboot,replace=TRUE), +nrow=nboot) +yy<-matrix(sample(gv1[[k]],size=length(gv1[[k]])*nboot,replace=TRUE), +nrow=nboot) +bvec[,j]<-apply(xx,1,FUN=est,...)-apply(yy,1,FUN=est,...) +} +nullv<-rep(0,5) +if(!PRO){ +mvec<-apply(bvec,2,FUN=mean) +m1<-var(t(t(bvec)-mvec+loc)) +temp<-mahalanobis(rbind(bvec,nullv),loc,m1) +} +if(PRO){ +temp<-pdis(rbind(bvec,nullv)) +} +sig.level<-sum(temp[nboot+1]nullval || chkit[2]nullval || chkit[2]150)fr<-7.57/length(y)+.05 +} +} +if(SEED)set.seed(2) +x<-as.matrix(x) +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +yhat<-adrun(x,y,est=est,plotit=FALSE,fr=fr,pyhat=TRUE) +regres<-y-yhat +print("Taking bootstrap samples, please wait.") +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-sqrt(12)*(data-.5) # standardize the random numbers. +rvalb<-apply(data,1,adtests1,yhat,regres,mflag,x,fr) +# An n x nboot matrix of R values +rvalb<-rvalb/sqrt(length(y)) +dstatb<-apply(abs(rvalb),2,max) +wstatb<-apply(rvalb^2,2,mean) +v<-c(rep(1,length(y))) +rval<-adtests1(v,yhat,regres,mflag,x,fr) +rval<-rval/sqrt(length(y)) +dstat<-max(abs(rval)) +wstat<-mean(rval^2) +p.value.d<-1-sum(dstat>=dstatb)/nboot +p.value.w<-1-sum(wstat>=wstatb)/nboot +list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w) +} + +rhom<-function(x,y,op=1,op2=FALSE,tr=.2,plotit=TRUE,xlab="NA",ylab="NA",zlab="ABS(res)", +est=median,sm=FALSE,SEED=TRUE,xout=FALSE,outfun=outpro,...){ +# For regression model, Y=m(X)+s(X)e, +# where s(X) models heteroscedasticity, and e has median 0, +# test hypothesis s(X)=1 for any X +# +# For p>1, method tests for each p whether residuals and x_j +# have a horizontal regression line. +# +# op2=F, tests for homogeneity using running interval smoother +# op2=T, test of independence based on Y-M(Y), M(Y) some measure +# of location given by argument est. +# In general, op2=T should NOT be used when the goal is to test +# the hypothesis of a homoscedastic error term. +# +# op=1 test using regression method (function regci) +# op=2 test using Winsorized correlation +# tr is amount of Winsorizing. A heteroscedastic bootstrap method is used. wincor is not asymptotically correct. +# op=3 test using a wild boostrap method +# +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,pp] +x<-as.matrix(x) +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,pp] +x<-as.matrix(x) +} +output<-NA +if(ncol(x)==1){ +if(!op2)res<-y-runhat(x[,1],y,est=est,pts=x) +if(op2)res<-y-est(y) +if(op==1)output<-regci(x,abs(res),SEED=SEED,pr=FALSE)$regci[2,5] +if(op==2)output<-wincorci(x,abs(res),tr=tr,SEED=SEED)$p.value +if(op==3)output<-indt(x,abs(res),SEED=SEED)$p.value.d +} +if(ncol(x)>1){ +pv<-ncol(x)+1 +if(!op2)res<-y-rung3hat(x,y,est=est,pts=x)$rmd +if(op2)res<-y-est(y) +if(op==1)output<-regci(x,abs(res),pr=FALSE)$regci[2:pv,5] +if(op==2)output<-winall(cbind(x,abs(res)),tr=tr)$p.values[1:ncol(x),pv] +if(op==3)output<-indt(x,abs(res),SEED=SEED)$p.value.d +} +if(plotit){ +if(ncol(x)==1){ +if(xlab=='NA')xlab="X" +if(ylab=='NA')ylab="ABS(res)" +if(!sm)rungen(x,abs(res),est=est,xlab=xlab,ylab=ylab) +if(sm)runmbo(x,abs(res),est=est,xlab=xlab,ylab=ylab) +} +if(ncol(x)==2){ +if(xlab=='NA')xlab="X1" +if(ylab=='NA')ylab="X2" +if(sm)rung3d(x,abs(res),est=est,xlab=xlab,ylab=ylab,zlab=zlab) +if(!sm)run3bo(x,abs(res),est=est,xlab=xlab,ylab=ylab,zlab=zlab) +}} +list(p.value=output) +} + +gk.sigmamu <- function(x, c1 = 4.5, c2 = 3.0, mu.too = FALSE, ...) +{ + n <- length(x) + + medx <- median(x) + sigma0 <- median(abs(x - medx)) +w <- abs(x - medx) / sigma0 +w <- ifelse(w<=c1,(1.0 - (w / c1)^2)^2,0) + mu <- sum(x * w) / sum(w) + + x <- (x - mu) / sigma0 + rho <- x^2 + rho[rho > c2^2] <- c2^2 + sigma2 <- sigma0^2 / n * sum(rho) + + if(mu.too) + c(mu, sqrt(sigma2)) + else + sqrt(sigma2) +} + +gk <- function(x, y, ...) +{ + ((gk.sigmamu(x + y, ...))^2 - (gk.sigmamu(x - y, ...))^2) / 4.0 +} + +hard.rejection <- function(distances, p, beta = 0.9, ...) +{ + d0 <- qchisq(beta, p) * median(distances) / qchisq(0.5, p) + weights <- double(length(distances)) + weights[distances <= d0] <- 1.0 + weights +} +# +# +# + +gkcov<-function(x,y,gk.sigmamu=taulc,...){ +# +# Compute robust covariance using the Gnanadesikan-Kettenring +# estimator. +# (cf. Marrona & Zomar, 2002, Technometrics +# +val<-.25*(gk.sigmamu(x+y,...)-gk.sigmamu(x-y,...)) +val +} +covogk<-function(x,sigmamu=taulc,v=gkcov,n.iter=5,beta=.9,...){ +# +# Compute robust (weighted) covariance matrix in Maronna and Zamar +# (2002, Technometrics, eq. 7). +# +# x is an n by p matrix +# n.iter number of iterations. 1 seems to be best +# sigmamu is any user supplied function having the form +# sigmamu(x,mu.too=F) and which computes a robust measure of +# of dispersion if mu.too=F. If mu.too=T, it returns +# a robust measure of location as well. +# v is any robust covariance +# +if(!is.matrix(x))stop("x should be a matrix") +x<-elimna(x) # remove any rows with missing data +temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...)$wcovmat +temp +} +ogk<-function(x,sigmamu=taulc,v=gkcov,n.iter=1,beta=.9,...){ +# +# Compute robust (weighted) covariance matrix in Maronna and Zamar +# (2002, Technometrics, eq. 7). +# +# x is an n by p matrix +# n.iter number of iterations. 1 seems to be best +# sigmamu is any user supplied function having the form +# sigmamu(x,mu.too=F) and which computes a robust measure of +# of dispersion if mu.too=F. If mu.too=T, it returns +# a robust measure of location as well. +# v is any robust covariance +# +if(!is.matrix(x))stop("x should be a matrix") +x<-elimna(x) # remove any rows with missing data +temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...) +list(center=temp$wcenter,cov=temp$wcovmat) +} + +ogk.pairwise <- function(X,n.iter=1,sigmamu=taulc,v=gkcov,beta=.9,...) +#weight.fn=hard.rejection,beta=.9,...) +{ +# Downloaded (and modified slightly) from www.stats.ox.ac.uk/~konis/pairwise.q +# Corrections noted by V. Todorov have been incorporated +# + data.name <- deparse(substitute(X)) + X <- as.matrix(X) + n <- dim(X)[1] + p <- dim(X)[2] + Z <- X + U <- diag(p) + A <- list() + # Iteration loop. + for(iter in 1:n.iter) { + # Compute the vector of standard deviations d and + # the correlation matrix U. + d <- apply(Z, 2, sigmamu, ...) + Z <- sweep(Z, 2, d, '/') + + for(i in 1:(p - 1)) { + for(j in (i + 1):p) { + U[j, i] <- U[i, j] <- v(Z[ , i], Z[ , j], ...) + } + } + + # Compute the eigenvectors of U and store them in + # the columns of E. + + E <- eigen(U, symmetric = TRUE)$vectors + + # Compute A, there is one A for each iteration. + + A[[iter]] <- d * E + + # Project the data onto the eigenvectors. + + Z <- Z %*% E + } + + # End of orthogonalization iterations. + + # Compute the robust location and scale estimates for + # the transformed data. + +# sqrt.gamma <- apply(Z, 2, sigmamu, mu.too = TRUE, ...) + sqrt.gamma <- apply(Z, 2, sigmamu, mu.too = TRUE) + center <- sqrt.gamma[1, ] + sqrt.gamma <- sqrt.gamma[2, ] + + # Compute the mahalanobis distances. + + Z <- sweep(Z, 2, center) + Z <- sweep(Z, 2, sqrt.gamma, '/') + distances <- rowSums(Z^2) + + # From the inside out compute the robust location and + # covariance matrix estimates. See equation (5). + + covmat <- diag(sqrt.gamma^2) + + for(iter in seq(n.iter, 1, -1)) { + covmat <- A[[iter]] %*% covmat %*% t(A[[iter]]) + center <- A[[iter]] %*% center + } + + center <- as.vector(center) + + # Compute the reweighted estimate. First, compute the + # weights using the user specified weight function. + + #weights <- weight.fn(distances, p, ...) +weights <- hard.rejection(distances, p, beta=beta,...) + sweights <- sum(weights) + + # Then compute the weighted location and covariance + # matrix estimates. + + wcenter <- colSums(sweep(X, 1, weights, '*')) / sweights + + Z <- sweep(X, 2, wcenter) + Z <- sweep(Z, 1, sqrt(weights), '*') + wcovmat <- (t(Z) %*% Z) / sweights; + + list(center = center, + covmat = covmat, + wcenter = wcenter, + wcovmat = wcovmat, + distances = distances, + sigmamu = deparse(substitute(sigmamu)), + v = deparse(substitute(v)), + data.name = data.name, + data = X) +} + + +gk.sigmamu <- function(x, c1 = 4.5, c2 = 3.0, mu.too = FALSE, ...) +{ + n <- length(x) + + medx <- median(x) + sigma0 <- median(abs(x - medx)) +# w <- (x - medx) / sigma0 +# w <- (1.0 - (w / c1)^2)^2 + #w[w < 0.0] <- 0.0 +w <- abs(x - medx) / sigma0 +w <- ifelse(w<=c1,(1.0 - (w / c1)^2)^2,0) + mu <- sum(x * w) / sum(w) + + x <- (x - mu) / sigma0 + rho <- x^2 + rho[rho > c2^2] <- c2^2 + sigma2 <- sigma0^2 / n * sum(rho) + + if(mu.too) + c(mu, sqrt(sigma2)) + else + sqrt(sigma2) +} + +gk <- function(x, y, ...) +{ + ((gk.sigmamu(x + y, ...))^2 - (gk.sigmamu(x - y, ...))^2) / 4.0 +} + +hard.rejection <- function(distances, p, beta = 0.9, ...) +{ + d0 <- qchisq(beta, p) * median(distances) / qchisq(0.5, p) + weights <- double(length(distances)) + weights[distances <= d0] <- 1.0 + weights +} + +outogk<-function(x,sigmamu=taulc,v=gkcov,op=TRUE,SEED=FALSE, +beta=max(c(.95,min(c(.99,1/nrow(x)+.94)))),n.iter=1,plotit=TRUE,...){ +# +# Use the ogk estimator to +# determine which points are outliers +# +# op=T uses robust Mahalanobis distance based on +# the OGK estimator with beta adjusted so that +# the outside rate per observation is approximately .05 +# under normality. +# op=F returns the outliers based on the distances used +# by the OGK estimator +# (Currently, op=T seems best for detecting outliers.) +# +if(!is.matrix(x))stop("x should be a matrix") +x<-elimna(x) +if(!op){ +temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,beta=beta,n.iter=n.iter,...) +vals<-hard.rejection(temp$distances,p=ncol(x),beta=beta,...) +flag<-(vals==1) +vals<-c(1:nrow(x)) +outid<-vals[!flag] +keep<-vals[flag] +if(is.matrix(x)){ +if(ncol(x)==2 && plotit){ +plot(x[,1],x[,2],xlab="X", ylab="Y",type="n") +points(x[flag,1],x[flag,2]) +if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="o") +}}} +if(op){ +temp<-out(x,cov.fun=ogk,beta=beta,plotit=plotit,SEED=SEED) +outid<-temp$out.id +keep<-temp$keep +} +list(out.id=outid,keep=keep,distances=temp$dis) +} + +splot<-function(x,op=TRUE,VL=FALSE,xlab="X",ylab="Rel. Freq.",frame.plot=TRUE,plotit=TRUE){ +# +# Frequency plot +# +# For each unique value in x, +# the relatively frequency is determined and plotted. +# +# op=TRUE a line connecting the relative frequencies is drawn if VL=FALSE. +# VL=TRUE, a vertical line is drawn for each unique value in x; +# the height of the line indicates the relative frequency. +# +# op=FALSE. No lines are drawn +# +# The function returns the sample size as well as the frequencies +# associated with each unique value stored in x. +# +x<-x[!is.na(x)] +temp<-sort(unique(x)) +freq<-NA +for(i in 1:length(temp)){ +freq[i]<-sum(x==temp[i]) +} +rmfreq=freq +nval=sum(freq) +freq<-freq/length(x) +tfreq<-freq +tfreq[1]<-0 +tfreq[2]<-max(freq) +if(plotit){ +plot(temp,tfreq,xlab=xlab,ylab=ylab,type="n",frame.plot=frame.plot) +points(temp,freq,pch="*") +if(op) +if(!VL)lines(temp,freq) +if(VL){ +for(i in 1:length(temp))lines(c(temp[i],temp[i]),c(0,freq[i])) +}} +den=sum(rmfreq) +list(obs.values=temp,n=nval,frequencies=rmfreq,rel.freq=rmfreq/den) +} + +outcov<-function(x,y=NA,outfun=outogk,plotit=FALSE){ +# +# Remove outliers and compute covariances +# +if(!is.na(y[1]))x<-cbind(x,y) +keep<-outfun(x,plotit=plotit)$keep +val<-var(x[keep,]) +if(ncol(val)==2)val<-val[1,2] +list(cov=val) +} + +covout<-function(x,y=NA,outfun=outogk,plotit=FALSE){ +# +# Remove outliers and compute covariances +# +if(!is.na(y[1]))x<-cbind(x,y) +keep<-outfun(x,plotit=plotit)$keep +val<-var(x[keep,]) +if(ncol(val)==2)val<-val[1,2] +val +} + +tbscor<-function(x,y=NA){ +# +# Compute a correlation coefficient using the TBS measure of scatter +# +if(!is.na(y[1]))x<-cbind(x,y) +if(!is.matrix(x))stop("x should be a matrix") +x<-elimna(x) +n<-nrow(x) +p<-ncol(x) +temp<-tbs(x)$cov +val<-matrix(NA,p,p) +for(j in 1:p){ +for(k in 1:p){ +val[j,k]<-temp[k,j]/sqrt(temp[k,k]*temp[j,j]) +}} +test<-abs(val*sqrt((n-2)/(1-val^2))) +if(p==2){ +val<-val[1,2] +p.value<-c("Greater than .1") +crit<-20.20/n+1.89 +if(test>=crit)p.value<-c("Less than .1") +crit<-30.41/n+2.21 +if(test>=crit)p.value<-c("Less than .05") +crit<-39.72/n+2.5 +if(test>=crit)p.value<-c("Less than .025") +crit<-58.55/n+2.80 +if(test>=crit)p.value<-c("Less than .01") +} +list(cor=val,test.stat=test,p.value=p.value) +} + +skiptbs<-function(x,y=NA,plotit=FALSE){ +# +# Remove outliers and compute correlations +# +if(!is.na(y[1]))x<-cbind(x,y) +x<-elimna(x) +n<-nrow(x) +keep<-outtbs(x,plotit=plotit)$keep +val<-cor(x[keep,]) +p.value<-NA +test<-NA +crit.05<-30.41/n+2.21 +vat<-val +diag(vat)<-0 +test<-abs(vat*sqrt((n-2)/(1-vat^2))) +diag(test)<-NA +if(ncol(val)==2){ +p.value<-c("Greater than .1") +val<-val[1,2] +test<-abs(val*sqrt((n-2)/(1-val^2))) +p.value<-c("Greater than .1") +crit<-20.20/n+1.89 +if(test>=crit)p.value<-c("Less than .1") +crit<-30.41/n+2.21 +if(test>=crit)p.value<-c("Less than .05") +crit<-39.72/n+2.5 +if(test>=crit)p.value<-c("Less than .025") +crit<-58.55/n+2.80 +if(test>=crit)p.value<-c("Less than .01") +} +list(cor=val,test.stat=test,p.value=p.value,crit.05=crit.05) +} +skipogk<-function(x,y=NA,plotit=FALSE){ +# +# Remove outliers and compute correlations +# +if(!is.na(y[1]))x<-cbind(x,y) +x<-elimna(x) +n<-nrow(x) +keep<-outogk(x,plotit=plotit)$keep +val<-cor(x[keep,]) +p.value<-NA +test<-NA +crit.05<-15.49/n+2.68 +vat<-val +diag(vat)<-0 +test<-abs(vat*sqrt((n-2)/(1-vat^2))) +diag(test)<-NA +if(ncol(val)==2){ +p.value<-c("Greater than .1") +val<-val[1,2] +test<-abs(val*sqrt((n-2)/(1-val^2))) +crit<-4.8/n+2.72 +if(test>=crit)p.value<-c("Less than .1") +crit<-15.49/n+2.68 +if(test>=crit)p.value<-c("Less than .05") +crit<-14.22/n+3.26 +if(test>=crit)p.value<-c("Less than .025") +crit<-24.83/n+3.74 +if(test>=crit)p.value<-c("Less than .01") +} +list(cor=val,test.stat=test,p.value=p.value,crit.05=crit.05) +} + +rqfit<-function(x,y,qval=0.5,alpha=0.05,xout=FALSE,outfun=outpro,res=FALSE,method='br',...){ +# +# Do a quantile regression fit +# +if(alpha!=.05)stop("This function only allows alpha=0.05. Use qregci") +library(quantreg) +xx<-cbind(x,y) +p<-ncol(xx)-1 +xx<-elimna(xx) +x<-xx[,1:p] +y<-xx[,ncol(xx)] +x=as.matrix(x) +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +} +residuals<-NA +if(res)residuals<-rq(y~x)$residuals +temp<-summary(rq(y~x,tau=qval,alpha=alpha,method=method)) +temp0<-temp[[4]] +if(is.matrix(temp[[3]]))temp0<-temp[[3]] #Newer R version +temp<-temp0 +coef<-temp[,1] +ci<-temp[,2:3] +list(coef=coef,ci=ci,residuals=residuals) +} +rqtest.sub<-function(isub,x,y,qval=.5){ +# +# Perform regression using x[isub] to predict y[isub] +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by other functions when computing +# bootstrap estimates. +# +# x is assumed to be a matrix containing values of the predictors. +# +xmat<-matrix(x[isub,],nrow(x),ncol(x)) +#regboot<-rqfit(xmat,y[isub],qval=qval)$coef +regboot<-qreg(xmat,y[isub],qval=qval)$coef +regboot +} + + + +erho.bt <- function(p,c1,M) +# expectation of rho(d) under chi-squared p + return(chi.int(p,2,M)/2 + +(M^2/2+c1*(5*c1+16*M)/30)*chi.int2(p,0,M+c1) + +(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4))*( +chi.int(p,0,M+c1)-chi.int(p,0,M)) + +(1/2+M^4/(2*c1^4)-M^2/c1^2)*(chi.int(p,2,M+c1)-chi.int(p,2,M)) + +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*(chi.int(p,3,M+c1)-chi.int(p,3,M)) + +(3*M^2/(2*c1^4)-1/(2*c1^2))*(chi.int(p,4,M+c1)-chi.int(p,4,M)) + -(4*M/(5*c1^4))*(chi.int(p,5,M+c1)-chi.int(p,5,M)) + +(1/(6*c1^4))*(chi.int(p,6,M+c1)-chi.int(p,6,M))) +chi.int <- function(p,a,c1) +# partial expectation d in (0,c1) of d^a under chi-squared p + return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*pchisq(c1^2,p+a) ) +chi.int2 <- function(p,a,c1) +# partial expectation d in (c1,\infty) of d^a under chi-squared p + return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*(1-pchisq(c1^2,p+a))) +cgen.bt <- function(n,p,r,alpha,asymp=FALSE){ +# find constants c1 and M that gives a specified breakdown r +# and rejection point alpha +if (asymp == FALSE){if (r > (n-p)/(2*n) ) r <- (n-p)/(2*n)} +# maximum achievable breakdown +# +# if rejection is not achievable, use c1=0 and best rejection +# + limvec <- rejpt.bt.lim(p,r) + if (1-limvec[2] <= alpha) + { + c1 <- 0 + M <- sqrt(qchisq(1-alpha,p)) + } + else + { + c1.plus.M <- sqrt(qchisq(1-alpha,p)) + M <- sqrt(p) + c1 <- c1.plus.M - M + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + deps <- 1e-4 + M.old <- M + c1.old <- c1 + er <- erho.bt(p,c1,M) + fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30) + fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps + fcM <- (erho.bt(p,c1,M+deps)-er)/deps + fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30) + M <- M - fc/fcp + if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2} + c1 <- c1.plus.M - M +# if (M-c1 < 0) M <- c1.old+(M.old-c1.old)/2 + crit <- abs(fc) + iter <- iter+1 + } + } +list(c1=c1,M=M,r1=r) +} +erho.bt.lim <- function(p,c1) +# expectation of rho(d) under chi-squared p + return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1)) +erho.bt.lim.p <- function(p,c1) +# derivative of erho.bt.lim wrt c1 + return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1)) + + +rejpt.bt.lim <- function(p,r){ +# find p-value of translated biweight limit c +# that gives a specified breakdown + c1 <- 2*p + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + c1.old <- c1 + fc <- erho.bt.lim(p,c1) - c1^2*r + fcp <- erho.bt.lim.p(p,c1) - 2*c1*r + c1 <- c1 - fc/fcp + if (c1 < 0) c1 <- c1.old/2 + crit <- abs(fc) + iter <- iter+1 + } + return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p)))) +} +chi.int.p <- function(p,a,c1) + return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) +chi.int2.p <- function(p,a,c1) + return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) +ksolve.bt <- function(d,p,c1,M,b0){ +# find a constant k which satisfies the s-estimation constraint +# for modified biweight + k <- 1 + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + k.old <- k + fk <- mean(rho.bt(d/k,c1,M))-b0 + fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2) + k <- k - fk/fkp + if (k < k.old/2) k <- k.old/2 + if (k > k.old*1.5) k <- k.old*1.5 + crit <- abs(fk) + iter <- iter+1 + } + return(k) +} +rho.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1*(x^2/2) + +ivec2*(M^2/2+c1*(5*c1+16*M)/30) + +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4) + +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2 + +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3 + +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4 + -4*M*x^5/(5*c1^4)+x^6/(6*c1^4))) +} +psi.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2) +} +psip.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1)) +} +wt.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2) +} +v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M)) + + +olstests1<-function(vstar,yhat,res,x){ +ystar <- yhat + res * vstar +p<-ncol(x) +pp<-p+1 +vals<-lsfit(x,ystar)$coef[2:pp] +test<-sum(vals^2) +test +} +kerreg<-function(x,y,pyhat=FALSE,pts=NA,plotit=TRUE,theta=50,phi=25,expand=.5, +scale=FALSE,zscale=FALSE,eout=FALSE,xout=FALSE,outfun=out,np=100,xlab="X",ylab="Y",zlab="Z", +varfun=pbvar,e.pow=TRUE,pr=TRUE,ticktype="simple",pch='.',...){ +# +# Compute local weighted regression with Epanechnikov kernel +# +# See Fan, Annals of Statistics, 1993, 21, 196-217. +# cf. Bjerve and Doksum, Annals of Statistics, 1993, 21, 890-902 +# +# With a single predictor, this function calls locreg +# See locreg for information about np and plotting +# +library(akima) +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +d<-ncol(x) +np1<-d+1 +m<-elimna(cbind(x,y)) +if(xout && eout)stop("Can't have eout=xout=T") +if(eout){ +flag<-outfun(m,plotit=FALSE,...)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +} +if(zscale){ +for(j in 1:np1){ +m[,j]<-(m[,j]-median(m[,j]))/mad(m[,j]) +}} +x<-m[,1:d] +x<-as.matrix(x) +y<-m[,np1] +n<-nrow(x) +if(d>1){ +xrem<-x +pi<-gamma(.5)^2 +cd<-c(2,pi) +if(d==2)A<-1.77 +if(d==3)A<-2.78 +if(d>2){ +for(j in 3:d)cd[j]<-2*pi*cd[j-2]/j # p. 76 +} +if(d>3)A<-(8*d*(d+2)*(d+4)*(2*sqrt(pi))^d)/((2*d+1)*cd[d]) # p. 87 +hval<-A*(1/n)^(1/(d+4)) # p. 86 +for(j in 1:d){ +sig<-sqrt(var(x[,j])) +temp<-idealf(x[,j]) +iqr<-(temp$qu-temp$ql)/1.34 +A<-min(c(sig,iqr)) +x[,j]<-x[,j]/A +} +xx<-cbind(rep(1,nrow(x)),x) +yhat<-NA +for(j in 1:n){ +yhat[j]<-NA +temp1<-t(t(x)-x[j,])/(hval) +temp1<-temp1^2 +temp1<-apply(temp1,1,FUN="sum") +temp<-.5*(d+2)*(1-temp1)/cd[d] +epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, p. 76 +chkit<-sum(epan!=0) +if(chkit >= np1){ +vals<-lsfit(x,y,wt=epan)$coef +yhat[j]<-xx[j,]%*%vals +}} +if(plotit && d==2){ +if(pr){ +if(!scale){ +print("scale=F is specified") +print("If there is dependence, might use scale=T") +}} +m<-elimna(cbind(xrem,yhat)) +xrem<-m[,1:d] +yhat<-m[,np1] +fitr<-yhat +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(xrem[i,]==xrem[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] +mkeep<-xrem[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fit,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab, +scale=scale,ticktype=ticktype) +}} +if(d==1){ +yhat<-locreg(x[,1],y,pyhat=TRUE,np=np,plotit=plotit,pts=pts, +xlab=xlab,ylab=ylab,pch=pch) +yhat2<-locreg(x[,1],y,pyhat=TRUE,np=0,plotit=FALSE) +} +if(d>1)yhat2<-yhat +m<-NULL +#E.pow<-varfun(yhat2[!is.na(yhat2)])/varfun(y) +# Estimate of explanatory power performs poorly. +if(pyhat)m<-yhat +#list(Strength.Assoc=sqrt(E.pow),Explanatory.Power=E.pow,yhat=m) +m +} + + +attract<-function(X, Y, k = 5) +{ +# Works in Splus but not in R. +# For simple linear regression: plots k elemental starts and +# their domains of attraction. Calls conc2. + l1coef <- l1fit(X, Y)$coef + X <- as.matrix(X) + nr <- dim(X)[1] + nc <- dim(X)[2] + 1 + J <- 1:nc + dom <- matrix(nrow = k, ncol = nc) + par(mfrow = c(1, 2)) + plot(X, Y) + title("a) 5 Elemental Starts") + for(i in 1:k) { +## get J + J <- sample(nr, nc) ## get bJ, the elem fit + if(abs(X[J[1]] - X[J[2]]) < 1/100000000) { + slope <- 0 + } + else { + slope <- (Y[J[1]] - Y[J[2]])/(X[J[1]] - X[J[2]]) + } + int <- Y[J[1]] - slope * X[J[1]] + fit <- c(int, slope) + yhat <- X %*% fit[2:nc] + fit[1] + lines(X, yhat) + ## get the domain of attraction for LTA concentration + dom[i, ] <- conc2(X, Y, start = fit)$coef + } + plot(X, Y) + for(i in 1:k) { + fit <- dom[i, ] + yhat <- X %*% fit[2:nc] + fit[1] + lines(X, yhat) + } + title("b) The Corresponding Attractors") +} + +bg2ci<-function(x, alpha = 0.05) +{ +#gets BGse with middle n^0.8 cases for sample median and +#the corresponding robust 100 (1-alpha)% CI. This is optimal +#for estimating the SE but is not resistant. + n <- length(x) + up <- 1 - alpha/2 + med <- median(x) + ln <- max(1,floor(n/2) - ceiling(0.5 * n^0.8)) + un <- n - ln + rdf <- un - ln - 1 + cut <- qt(up, rdf) + d <- sort(x) + se2 <- (d[un] - d[ln])/(2 * n^0.3) + rval <- cut * se2 + rlo2 <- med - rval + rhi2 <- med + rval + #got low and high endpoints of robust CI + list(int = c(rlo2, rhi2), med = med, se2 = se2) +} + +cav<-function(alpha = 0.01, k = 5) +{ +#gets n(asy var) for the alpha trimmed mean +#and T_(A,n)(k) if errors are Cauchy(0,1) + z <- tan(pi * (alpha - 0.5)) + val <- (z - atan(z))/((1 - 2 * alpha) * atan(z)) + ntmav <- val + (2 * alpha * (tan(pi * (alpha - 0.5)))^2)/(1 - 2 * alpha + )^2 + zj <- k + alphaj <- 0.5 + atan( - k)/pi + alphaj <- ceiling(100 * alphaj)/100 + zj <- tan(pi * (alphaj - 0.5)) + val <- (zj - atan(zj))/((1 - 2 * alphaj) * atan(zj)) + natmav <- val + (2 * alphaj * (tan(pi * (alphaj - 0.5)))^2)/(1 - 2 * + alphaj)^2 + return(ntmav, natmav) +} + +cci<-function(x, alpha = 0.05) +{ +#gets classical 100 (1-alpha)% CI +#defaults are alpha = .05 + n <- length(x) + up <- 1 - alpha/2 + mn <- mean(x) + v <- var(x) + se <- sqrt(v/n) + val <- qt(up, n - 1) * se + lo <- mn - val + hi <- mn + val + list(int = c(lo, hi), mean = mn, se = se) +} + +cgci<-function(x, alpha = 0.05, ks = 3.5) +{ +#gets T_S,n with a coarse grid +# and the corresponding robust 100 (1-alpha)% CI + n <- length(x) + up <- 1 - alpha/2 + med <- median(x) + madd <- mad(x, constant = 1) + d <- sort(x) ##get robust T_S,n CI + lo <- sum(x < (med - ks * madd)) + hi <- sum(x > (med + ks * madd)) + tp <- max(hi, lo)/n + if(tp == 0) + tp <- 0 + if(tp > 0 && tp <= 0.01) + tp <- 0.01 + if(tp > 0.01 && tp <= 0.1) + tp <- 0.1 + if(tp > 0.1 && tp <= 0.25) + tp <- 0.25 + if(tp > 0.25 && tp <= 0.4) + tp <- 0.4 + if(tp > 0.4) + tp <- 0.49 + tstmn <- mean(x, trim = tp) + #have obtained the two stage trimmed mean + ln <- floor(n * tp) + un <- n - ln + if(ln > 0) { + d[1:ln] <- d[(ln + 1)] + d[(un + 1):n] <- d[un] + } + den <- ((un - ln)/n)^2 + swv <- var(d)/den + #got the scaled Winsorized variance + rdf <- un - ln - 1 + rval <- qt(up, rdf) * sqrt(swv/n) + tslo <- tstmn - rval + tshi <- tstmn + rval + ##got low and high endpoints of robust T_S,n CI + list(int = c(tslo, tshi), tp = tp) +} + + +cltv<- +function(gam = 0.5) +{ +# Gets asy var for lts(h) and lta(h)at Cauchy C(0,1) +# where h/n -> gam. + k <- tan((pi * gam)/2) + num <- 2 * k - pi * gam + den <- pi * (gam - (2 * k)/(pi * (1 + k^2)))^2 + ltsv <- num/den + num <- gam + den <- 4 * (1/pi - 1/(pi * (1 + k^2)))^2 + ltav <- num/den + return(ltsv, ltav) +} + +cmba2<- +function(x, csteps = 5, ii = 1) +{ +# gets the covmba estimator using 98, 95, 90, 80, 70, 60 and 50% trimming + n <- dim(x)[1] + p <- dim(x)[2] + mds <- matrix(nrow = n, ncol = 8, 0) ##get the DGK estimator + covs <- var(x) + mns <- apply(x, 2, mean) + cmd <- sqrt(mahalanobis(x, mns, covs)) ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + mds[, 8] <- sqrt(mahalanobis(x, mns, covs)) + covb <- covs + mnb <- mns ##get the square root of det(covb) + critb <- prod(diag(chol(covb))) ##get the resistant estimator + covv <- diag(p) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = med, covv) + smd2 <- sort(md2) + val <- p + 3 + tem <- 1:7 + tem[1] <- smd2[val + floor(0.02 * n)] + tem[2] <- smd2[val + floor(0.05 * n)] + tem[3] <- smd2[val + floor(0.1 * n)] + tem[4] <- smd2[val + floor(0.2 * n)] + tem[5] <- smd2[val + floor(0.3 * n)] + tem[6] <- smd2[val + floor(0.4 * n)] + tem[7] <- median(md2) + medd2 <- tem[7] + for(j in ii:7) { +## get the start + val2 <- tem[j] + mns <- apply(x[md2 <= val2, ], 2, mean) + covs <- var(x[md2 <= val2, ]) ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + mds[, j] <- sqrt(mahalanobis(x, mns, covs)) + plot(cmd, mds[, j]) + identify(cmd, mds[, j]) + crit <- prod(diag(chol(covs))) + if(crit < critb) { + critb <- crit + covb <- covs + mnb <- mns + } + } + pairs(mds) ##scale for better performance at MVN + rd2 <- mahalanobis(x, mnb, covb) + const <- median(rd2)/(qchisq(0.5, p)) + covb <- const * covb + list(center = mnb, cov = covb, mds = mds) +} + +conc2<- +function(x, y, start = l1fit(x, y)$coef) +{ #Finds that LTA attractor of the start. + nc <- dim(x)[2] + 1 + res <- y - (x %*% start[2:nc] + start[1]) + ares <- abs(res) + cov <- ceiling(length(y)/2) + m <- sort(ares, partial = cov)[cov] + old <- sum(ares[ares <= m]) + new <- old - 1 + ct <- 0 + while(new < old) { + ct <- ct + 1 + start <- l1fit(x[ares <= m, ], y[ares <= + m])$coef + res <- y - (x %*% start[2:nc] + start[1 + ]) + ares <- abs(res) + m <- sort(ares, partial = cov)[cov] + new <- sum(ares[ares <= m]) #print(old) + if(new < old) { + old <- new + new <- new - 1 + } + } + list(coef = start, ct = ct) +} + +concmv<- +function(n = 100, csteps = 5, gam = 0.4, outliers = TRUE, start = 2) +{ +#Shows how concentration works when p = 2. +# Use start = 1 for DGK, start = 2 for MBA sphere, start = 3 for MBA MAD + p <- 2 #A <- cbind(c(1, 0.9), c(0.9, 1)) + x <- matrix(rnorm(n * p), ncol = p, nrow = n) #A <- diag(sqrt(1:p)) +#if(outliers == T) { +# val <- floor(gam * n) +# tem <- 10 + 0 * 1:p +# x[1:val, ] <- x[1:val, ] + tem +#} +#x <- x %*% A + A <- cbind(c(1, 0.4), c(0.4, 1)) + B <- cbind(c(0.5, 0), c(0, 0.5)) + if(outliers == T) { + val <- floor(gam * n) + x[(val + 1):n, ] <- x[(val + 1):n, ] %*% A + x[1:val, ] <- x[1:val, ] %*% B + x[1:val, 1] <- x[1:val, 1] + 0 + x[1:val, 2] <- x[1:val, 2] + 6 + } + else { + x <- x %*% A + } + if(start == 1) { + covs <- var(x) + mns <- apply(x, 2, mean) + } + if(start == 2) { + covv <- diag(p) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = med, covv) + medd2 <- median(md2) ## get the start + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + if(start >= 2) { + tem <- apply(x, 2, mad)^2 + covv <- diag(tem) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = med, covv) + medd2 <- median(md2) ## get the start + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } +## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + plot(x[, 1], x[, 2]) + points(x[md2 <= medd2, 1], x[md2 <= medd2, 2], pch = 15) + identify(x[, 1], x[, 2]) + } +} + +concsim<- +function(n = 100, p = 2, steps = 5, gam = 0.4, runs = 20) +{ +# This Splus function is used to determine when the DD +# plot separates outliers from non-outliers for various starts. + A <- sqrt(diag(1:p)) + mbact <- 0 + fmcdct <- 0 + mbct <- 0 + madct <- 0 + dgkct <- 0 + for(i in 1:runs) { + x <- matrix(rnorm(n * p), ncol = p, nrow = n) + ## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T + val <- floor(gam * n) + tem <- 10 + 0 * 1:p + x[1:val, ] <- x[1:val, ] + tem + x <- x %*% A #MBA + out <- covmba(x, csteps = steps) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + if(min(rd2[1:val]) > max(rd2[(val + 1):n])) +mbact <- mbact + 1 + #DGK + covs <- var(x) + mns <- apply(x, 2, mean) ## concentrate + for(i in 1:steps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + rd2 <- mahalanobis(x, mns, covs) + if(min(rd2[1:val]) > max(rd2[(val + 1):n])) dgkct <- dgkct + 1 + #Median Ball start + covv <- diag(p) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = med, covv) + medd2 <- median(md2) ## get the start + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) ## concentrate + for(i in 1:steps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + rd2 <- mahalanobis(x, mns, covs) + if(min(rd2[1:val]) > max(rd2[(val + 1):n])) mbct <- mbct + 1 + #MAD start + tem <- apply(x, 2, mad)^2 + covv <- diag(tem) + md2 <- mahalanobis(x, center = med, covv) + medd2 <- median(md2) ## get the start + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) ## concentrate + for(i in 1:steps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + rd2 <- mahalanobis(x, mns, covs) + if(min(rd2[1:val]) > max(rd2[(val + 1):n])) madct <- madct + 1 + #FMCD + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + if(min(rd2[1:val]) > max(rd2[(val + 1):n])) + fmcdct <- fmcdct + 1 + } + list(mbact = mbact, fmcdct = fmcdct, dgkct = dgkct, mbct = mbct, madct + = madct) +} + +corrsim<- +function(n = 100, p = 3, eps = 0.4, nruns = 100, type = 1) +{ +#For R, first type "library(lqs)" before using this function +# This function generates 100 n by p matrices x. +# The output is the 100 sample correlations between the MDi and RDi +# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for type = 3 +# mahalanobis gives squared Maha distances + corrs <- 1:nruns + for(i in 1:nruns) { + wt <- 0 * (1:n) + x <- matrix(rnorm(n * p), ncol = p, nrow = n) + #The following 3 commands make x elliptically contoured. +#zu <- runif(n) +#x[zu < eps,] <- x[zu < eps,]*5 +#x <- x^2 +# To make marginals of x lognormal, use +#x <- exp(x) + center <- apply(x, 2, mean) + cov <- var(x) + md2 <- mahalanobis(x, center, cov) + if(type == 1) { + out <- covmba(x) + } + if(type == 2) { + out <- rmba(x) + } + if(type == 3) { + out <- cov.mcd(x) + } + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + # need square roots for the usual distances + md <- sqrt(md2) + rd <- sqrt(rd2) + const <- sqrt(qchisq(0.5, p))/median(rd) + rd <- const * rd + # wt[rd < sqrt(qchisq(0.975, p))] <- 1 +# corrs[i] <- cor(md[wt > 0], rd[wt > 0])} + corrs[i] <- cor(md, rd) + } + cmean <- mean(corrs) + cmin <- min(corrs) + clt95 <- sum(corrs < 0.95) + clt80 <- sum(corrs < 0.8) + list(cmean = cmean, cmin = cmin, clt95 = clt95, clt80 = clt80, + corrs = corrs) +} + + +covdgk<- +function(x, csteps = 10) +{ +#computes the scaled DGK multivariate estimator + p <- dim(x)[2] + covs <- var(x) + mns <- apply(x, 2, mean) ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, + mean) + covs <- var(x[md2 <= medd2, ]) + } +##scale for consistency at MVN + rd2 <- mahalanobis(x, mns, covs) + const <- median(rd2)/(qchisq(0.5, p)) + covs <- const * covs + list(center = mns, cov = covs) +} + +covmba <- function(x, csteps = 5) +{ # gets the MBA estimator + zx <- x + x <- as.matrix(x) + p <- dim(x)[2] + ##get the DGK estimator + covs <- var(x) + mns <- apply(x, 2, mean) ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + if(p > 1){ + mns <- apply(x[md2 <= medd2, ], 2, + mean) + covs <- var(x[md2 <= medd2, ]) + } + if(p == 1){ + mns <- mean(x[md2 <= medd2]) + covs <- var(x[md2 <= medd2]) + } + } + covb <- covs + mnb <- mns ##get the square root of det(covb) + critb <- prod(diag(chol(covb))) + ##get the resistant estimator + covv <- diag(p) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = med, covv) + medd2 <- median(md2) ## get the start + if(p > 1){ + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + if(p == 1){ + mns <- mean(zx[md2 <= medd2]) + covs <- var(zx[md2 <= medd2]) + } + ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + if(p > 1){ + mns <- apply(x[md2 <= medd2, ], 2, + mean) + covs <- var(x[md2 <= medd2, ]) + } + if(p == 1){ + mns <- mean(zx[md2 <= medd2]) + covs <- var(zx[md2 <= medd2]) + } + } + crit <- prod(diag(chol(covs))) + if(crit < critb) { + critb <- crit + covb <- covs + mnb <- mns + } +##scale for better performance at MVN + rd2 <- mahalanobis(x, mnb, covb) + const <- median(rd2)/(qchisq(0.5, p)) + covb <- const * covb + list(center = mnb, cov = covb) +} + +covmba2<- +function(x, csteps = 5) +{ # gets the MBA estimator, use covmba2 instead of covmba if p > 1 + p <- dim(x)[2] + ##get the DGK estimator + covs <- var(x) + mns <- apply(x, 2, mean) ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, + mean) + covs <- var(x[md2 <= medd2, ]) + } + covb <- covs + mnb <- mns ##get the square root of det(covb) + critb <- prod(diag(chol(covb))) + ##get the resistant estimator + covv <- diag(p) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = med, covv) + medd2 <- median(md2) ## get the start + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, + mean) + covs <- var(x[md2 <= medd2, ]) + } + crit <- prod(diag(chol(covs))) + if(crit < critb) { + critb <- crit + covb <- covs + mnb <- mns + } +##scale for better performance at MVN + rd2 <- mahalanobis(x, mnb, covb) + const <- median(rd2)/(qchisq(0.5, p)) + covb <- const * covb + list(center = mnb, cov = covb) +} + +covsim2<- +function(n=100, p = 2, steps = 5, gam = 0.4, runs = 20) +{ +# This Splus function is used to determine when the DD +# plot separates outliers from non-outliers. + A <- sqrt(diag(1:p)) + mbact <- 0 + for(i in 1:runs) { + x <- matrix(rnorm(n * p), ncol = p, nrow = n) + ## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T + val <- floor(gam * n) + tem <- 10 + 0 * 1:p + x[1:val, ] <- x[1:val, ] + tem + x <- x %*% A + out <- covmba(x, csteps = steps) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + if(min(rd2[1:val]) > max(rd2[(val + 1):n])) + mbact <- mbact + 1 + } + list(mbact = mbact) +} + +ctrviews<- +function(x, Y, ii = 1) +{ +# Uses classical distances instead of robust distances. +# Trimmed views for 90, 80, ... 0 percent +# trimming. Allows visualization of m +# and crude estimatation of c beta in models +# of the form y = m(x^T beta) + e. +# Workstation: activate a graphics +# device with command "X11()" or "motif()." +# R needs command "library(lqs)." +# Advance the view with the right mouse button. +# In R, highight "stop." + x <- as.matrix(x) + center <- apply(x, 2, mean) + cov <- var(x) + rd2 <- mahalanobis(x, center, cov) + labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", + "0%") + tem <- seq(0.1, 1, 0.1) + for(i in ii:10) { + val <- quantile(rd2, tem[i]) + bhat <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$coef + ESP <- x %*% bhat[-1] + plot(ESP, Y) + title(labs[i]) + identify(ESP, Y) + print(bhat) + } +} + +ddcomp<- +function(x, steps = 5) +{ +# Makes 4 DD plots using the FMCD and MBA estimators. +# Click left mouse button to identify points. +# Click right mouse button to end the function. +# Unix systems turn on graphics device eg enter +# command "X11()" or "motif()" before using. +# R users need to type "library(lqs)" before using. + p <- dim(x)[2] + par(mfrow = c(2, 2)) + center <- apply(x, 2, mean) + cov <- var(x) + md2 <- mahalanobis(x, center, cov) + # MD is the classical and RD the robust distance + MD <- sqrt(md2) #DGK start + md2 <- mahalanobis(x, center, cov) + medd2 <- median(md2) ## get the start + mns <- center + covs <- cov ## concentrate + for(i in 1:steps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + rd2 <- mahalanobis(x, mns, covs) + rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line +#if the data is multivariate normal. + const <- sqrt(qchisq(0.5, p))/median(rd) + RDdgk <- const * rd + plot(MD, RDdgk) + abline(0, 1) + identify(MD, RDdgk) + title("DGK DD Plot") #MBA + out <- covmba(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + rd <- sqrt(rd2) #Scale the RD so the plot follows the identity line +#if the data is multivariate normal. + const <- sqrt(qchisq(0.5, p))/median(rd) + RDm <- const * rd + plot(MD, RDm) + abline(0, 1) + identify(MD, RDm) + title("MBA DD Plot") #FMCD + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line +#if the data is multivariate normal. + const <- sqrt(qchisq(0.5, p))/median(rd) + RDf <- const * rd + plot(MD, RDf) + abline(0, 1) + identify(MD, RDf) + title("FMCD DD Plot") #Median Ball start + covv <- diag(p) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = med, covv) + medd2 <- median(md2) ## get the start + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) ## concentrate + for(i in 1:steps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + mns <- apply(x[md2 <= medd2, ], 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + rd2 <- mahalanobis(x, mns, covs) + rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line +#if the data is multivariate normal. + const <- sqrt(qchisq(0.5, p))/median(rd) + RDmb <- const * rd + plot(MD, RDmb) + abline(0, 1) + identify(MD, RDmb) + title("Med Ball DD Plot") +} + +ddmv<- +function(n = 100, p = 2, steps = 5, gam = 0.4, + outtype = 2, est = 1) +{ +# This Splus function is used to determine when the DD +# plot separates outliers from non-outliers for various starts. +# Workstation needs to activate a graphics +# device with the command "X11()" or "motif()." +# Advance the view with the right mouse button. +## est = 1 for DGK, 2 for median ball, 3 for MAD + A <- sqrt(diag(1:p)) + x <- matrix(rnorm(n * p), ncol = p, nrow + = n) + val <- floor(gam * n) + tem <- 10 + 0 * 1:p + x[1:val, ] <- x[1:val, ] + tem + #if outtype = 1, outliers are Np(10 1, Ip) nonoutliers Np(0,Ip) + if(outtype == 2) x <- x %*% A + ## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T +## get the start + if(est == 1) { +#DGK classical start + covs <- var(x) + mns <- apply(x, 2, mean) + } + if(est == 2) { +#Median Ball high breakdown start + covv <- diag(p) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = + med, covv) + medd2 <- median(md2) + ## get the start + mns <- apply(x[md2 <= medd2, ], + 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + if(est == 3) { +#MAD high breakdown start + tem <- apply(x, 2, mad)^2 + covv <- diag(tem) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = + med, covv) + medd2 <- median(md2) + ## get the start + mns <- apply(x[md2 <= medd2, ], + 2, mean) + covs <- var(x[md2 <= medd2, ]) + } +## concentrate and plot, highlighting outliers + MD <- sqrt(mahalanobis(x, mns, covs)) + for(i in 1:steps) { + md <- sqrt(mahalanobis(x, mns, + covs)) + medd <- median(md) + mns <- apply(x[md <= medd, ], 2, + mean) + covs <- var(x[md <= medd, ]) + rd <- sqrt(mahalanobis(x, mns, + covs)) + plot(MD, rd) + points(MD[1:val], rd[1:val], pch + = 15) + identify(MD, rd) + } +} + + +ddplot<- +function(x) +{ +# Makes a DD plot. cov.mcd is used for the RDi. +# Click left mouse button to identify points. +# Click right mouse button to end the function. +# Unix systems turn on graphics device eg enter +# command "X11()" or "motif()" before using. +# R users need to type "library(lqs)" before using. + p <- dim(x)[2] + center <- apply(x, 2, mean) + cov <- var(x) + md2 <- mahalanobis(x, center, cov) + out <- cov.mcd(x) # or use out <- cov.mve(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + # md is the classical and rd the robust distance + MD <- sqrt(md2) + rd <- sqrt(rd2) + #Scale the RD so the plot follows the 0-1 line +#if the data is multivariate normal. + const <- sqrt(qchisq(0.5, p))/median(rd) + RD <- const * rd + plot(MD, RD) + abline(0, 1) + identify(MD, RD) # list(MD = MD, RD = RD) +} + + +ddsim<- +function(n = 100, p = 3, eps = 0.4, type = 1) +{ +# R: type "library(lqs)" before using if type = 3. +# Rapidly plots 20 DD plots in a row. +# Unix: type "X11()" or "motif()" to +# turn on a graphics device. +# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for type = 3 + med <- 1:20 + for(i in 1:20) { + x <- matrix(rnorm(n * p), ncol = p, nrow = n) + ## For elliptically contoured data, use: +#zu <- runif(n) +#x[zu < eps,] <- x[zu < eps,]*5 +#x <- x^2 +##For lognormal marginals, add: +#x <- exp(x) + center <- apply(x, 2, mean) + cov <- var(x) + md2 <- mahalanobis(x, center, cov) + if(type == 1) { + out <- covmba(x) + } + if(type == 2) { + out <- rmba(x) + } + if(type == 3) { + out <- cov.mcd(x) + } + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + md <- sqrt(md2) + rd <- sqrt(rd2) #Scale the RDi so plot follows 0-1 line +#if the data is multivariate normal. + const <- sqrt(qchisq(0.5, p))/median(rd) + rd <- const * rd + plot(md, rd) + abline(0, 1) + med[i] <- median(md) #The following command can be inserted +#to slow down the plots "identify(md,rd)" + } + list(med = med) +} + + +deav<- +function(alpha = 0.01, k = 5) +{ +#gets n(asy var) for the alpha trimmed mean +#and T_(A,n)(k) if errors are DE(0,1) + z <- - log(2 * alpha) + num <- 2 - (2 + 2 * z + z^2) * exp( - z) + den <- (1 - exp( - z)) * (1 - 2 * alpha) + val1 <- num/den + num <- 2 * alpha * z^2 + den <- (1 - 2 * alpha)^2 + ntmav <- val1 + num/den + zj <- k * log(2) + alphaj <- 0.5 * exp( - zj) + alphaj <- ceiling(100 * alphaj)/100 + zj <- - log(2 * alphaj) + num <- 2 - (2 + 2 * zj + zj^2) * exp( - zj) + den <- (1 - exp( - zj)) * (1 - 2 * alphaj) + val1 <- num/den + num <- 2 * alphaj * zj^2 + den <- (1 - 2 * alphaj)^2 + natmav <- val1 + num/den + return(ntmav, natmav) +} + + +deltv<- +function(gam = 0.5) +{ +# Gets asy var for lts(h) and lta(h) at standard double exp +# where h/n -> gam. + k <- -1 * log(1 - gam) + num <- 2 - (2 + 2 * k + k^2) * exp( - k) + den <- (gam - k * exp( - k))^2 + ltsv <- num/den + ltav <- 1/gam + return(ltsv, ltav) +} + +diagplot<- +function(x, Y) +{ +# Scatterplot matrix of OLS diagnostics. +# Workstation need to activate a graphics +# device with command "X11()" or "motif()." + n <- length(Y) + rmat <- matrix(nrow = n, ncol = 7) + out <- lsfit(x, Y) + tem <- ls.diag(out) + rmat[, 1] <- tem$cooks + rmat[, 2] <- tem$hat + rmat[, 3] <- tem$std.res + rmat[, 4] <- tem$stud.res + rmat[, 5] <- tem$dfits + rmat[, 6] <- Y - out$resid + rmat[, 7] <- Y + pairs(rmat, labels = c("Cook's CD", "leverages", "stand resid", + "stud resid", "DFFITS", "YHAT", "Y")) +} + +ellipse <- function(x, center = apply(x, 2, mean), cov = var(x), alph = 0.95) +{# Makes a covering interval. The x should have 2 columns. + mu1 <- center[1] + mu2 <- center[2] + w <- solve(cov) + w11 <- w[1, 1] + w12 <- w[1, 2] + w22 <- w[2, 2] + tem <- x[, 2] - mu2 + y2 <- seq(min(tem), max(tem), length = 100) + xc <- qchisq(alph, 2) + el <- matrix(0, 2, 2) + ind <- 0 + for(i in 1:100) { + j1 <- (y2[i] * w12)^2 + j2 <- w11 * ((y2[i])^2 * w22 - xc) + # print(i) +# print(j1 - j2) + if((j1 - j2) >= 0) { + ind <- ind + 2 + tem <- (y2[i] * w12)^2 + tem <- tem - w11 * ((y2[i])^2 * + w22 - xc) + tem <- sqrt(tem) + term <- ( - y2[i] * w12 + tem)/ + w11 + el <- rbind(el, c((term + mu1), ( + y2[i] + mu2))) + term <- ( - y2[i] * w12 - tem)/ + w11 + el <- rbind(el, c((term + mu1), ( + y2[i] + mu2))) + } + } + el <- el[3:ind, ] + nn <- dim(x)[1] + if((ind - 2) > nn) { + tem <- sample((ind - 2), nn) + el <- el[tem, ] + } + xt <- cbind(x[, 1], el[, 1]) + yt <- cbind(x[, 2], el[, 2]) + matplot(xt, yt) +} + +essp<- +function(x, Y, M = 50) +{ +# Trimmed view or ESSP for M percent +# trimming. Allows visualization of g +# and crude estimation of c beta in models +# of the form y = g(x^T beta,e). +# Workstation need to activate a graphics +# device with command "X11()" or "motif()." +# R needs command "library(lqs)." +# Click on the right mouse button to finish. +# In R, highlight "stop." + x <- as.matrix(x) + tval <- M/100 + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + val <- quantile(rd2, (1 - tval)) + bhat <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$ + coef + ESP <- x %*% bhat[-1] + plot(ESP, Y) + identify(ESP, Y) + return(bhat[-1]) +} + +ffL<- +function(x, y) +{ +# for unix, use X11() to turn on the graphics device before using this function +# this function makes a FF lambda plot where the competing models are Y^L + n <- length(y) + rmat <- matrix(nrow = n, ncol = 5) + rmat[, 1] <- y - lsfit(x, y)$resid + ytem <- (y^(0.5) - 1)/0.5 + rmat[, 2] <- ytem - lsfit(x, ytem)$resid + rmat[, 3] <- log(y) - lsfit(x, log(y))$resid + ytem <- (y^(-0.5) - 1)/-0.5 + rmat[, 4] <- ytem - lsfit(x, ytem)$resid + ytem <- (y^(-1) - 1)/-1 + rmat[, 5] <- ytem - lsfit(x, ytem)$resid + pairs(rmat, labels = c("YHAT", "YHAT^(0.5)", "YHAT^(0)", "YHAT^(-0.5)", + "YHAT^(-1)")) + min(cor(rmat)) +} + +fflynx<-function(){ +# R users need to type library(ts) and data(lynx) +Y <- log10(lynx) +FAR2 <- 1:114 +FAR11 <- 1:114 +FAR12 <- 1:114 +SETAR272 <- 1:114 +SETAR252 <- 1:114 +for(i in 3:114){ +FAR2[i ] <- 1.05 + 1.41*Y[i-1] -0.77*Y[i-2]} +for(i in 12:114){ +FAR11[i ] <- 1.13*Y[i-1] -0.51*Y[i-2] + .23*Y[i-3] -0.29*Y[i-4] + + .14*Y[i-5] -0.14*Y[i-6] + 0.08*Y[i-7] -0.04*Y[i-8] + + .13*Y[i-9] + 0.19*Y[i-10] - .31*Y[i-11] } +for(i in 13:114){ +FAR12[i ] <- 1.123 + 1.084*Y[i-1] -0.477*Y[i-2] + .265*Y[i-3] -0.218*Y[i-4] + + .180*Y[i-9] - .224*Y[i-12] } +for(i in 13:114){ +if( Y[i-2] <= 3.116){ +SETAR272[i ] <- 0.546 + 1.032*Y[i-1] -0.173*Y[i-2] + .171*Y[i-3] -0.431*Y[i-4] + + .332*Y[i-5] - .284*Y[i-6] + .210*Y[i-7]} +else {SETAR272[i ] <- 2.632 + 1.492*Y[i-1] -1.324*Y[i-2]} +} +for(i in 13:114){ +if( Y[i-2] <= 3.05){ +SETAR252[i ] <- 0.768 + 1.064*Y[i-1] -0.200*Y[i-2] + .164*Y[i-3] -0.428*Y[i-4] + + .181*Y[i-5] } +else {SETAR252[i ] <- 2.254 + 1.474*Y[i-1] -1.202*Y[i-2]} +} +x <- cbind(Y,FAR2,FAR11,FAR12,SETAR272,SETAR252) +x <- x[13:114,] +print(cor(x)) +pairs(x) +} + + +ffplot<- +function(x, y, nsamps = 7) +{ +# For Unix, use X11() to turn on the graphics device before +# using this function. For R, first type library(lqs). +# Makes an FF plot with several resistant estimators. +# Need the program mbareg.. + n <- length(y) + rmat <- matrix(nrow = n, ncol = 6) + lsfit <- y - lsfit(x, y)$residuals + print("got OLS") + l1fit <- y - l1fit(x, y)$residuals + print("got L1") + almsfit <- y - lmsreg(x, y)$resid + print("got ALMS") + altsfit <- y - ltsreg(x, y)$residuals + print("got ALTS") + mbacoef <- mbareg(x, y, nsamp = nsamps)$coef + MBAFIT <- mbacoef[1] + x %*% mbacoef[-1] + print("got MBA") + rmat[, 1] <- y + rmat[, 2] <- lsfit + rmat[, 3] <- l1fit + rmat[, 4] <- almsfit + rmat[, 5] <- altsfit + rmat[, 6] <- MBAFIT + pairs(rmat, labels = c("Y", "OLS Fit", "L1 Fit", "ALMS Fit", + "ALTS Fit", "MBAREG Fit")) +} + +ffplot2<- +function(x, y, nsamps = 7) +{ +# For Unix, use X11() to turn on the graphics device before +# using this function. For R, first type library(lqs). +# Makes an FF plot with several resistiant estimators. +# Need the program mbareg. + n <- length(y) + rmat <- matrix(nrow = n, ncol = 5) + lsfit <- y - lsfit(x, y)$residuals + print("got OLS") + almsfit <- y - lmsreg(x, y)$resid + print("got ALMS") + altsfit <- y - ltsreg(x, y)$residuals + print("got ALTS") + mbacoef <- mbareg(x, y, nsamp = nsamps)$coef + MBAFIT <- mbacoef[1] + x %*% mbacoef[-1] + print("got MBA") + rmat[, 1] <- y + rmat[, 2] <- lsfit + rmat[, 3] <- almsfit + rmat[, 4] <- altsfit + rmat[, 5] <- MBAFIT + pairs(rmat, labels = c("Y", "OLS Fit", "ALMS Fit", "ALTS Fit", "MBAREG Fit")) +} + +fysim<-function( runs = 20) +{ +# 20 FY plots for simulated AR(2) time series data +fycorr <- 1:runs +for(i in 1: runs){ +Y <- ardata()$arts +out <- ar.yw(Y) +Yts <- Y[10:200] +FIT <- Yts - out$resid[10:200] +plot(FIT,Yts) +abline(0,1) +fycorr[i] <- cor(FIT,Yts) +} +list(fycorr=fycorr) +} + +gamper<- +function(h, k=500) +{ + n <- 10000 + c <- 5000 + gam0 <- min((n - c)/n, (1 - (1 - 0.2^(1/k))^(1/ + h))) * 100 + print(gam0) +} + +gamper2<- +function(p, k = 500) +{ +##estimates the amount of contamination fmcd can tolerate + n <- 10000 + c <- 5000 + h <- p + 1 + gam0 <- min((n - c)/n, (1 - (1 - 0.2^(1/k))^(1/h))) * 100 + print(gam0) +} + + +llrdata <- function(n = 100, q=5) +{ +# Generates data for loglinear regression. +# + y <- 0 * 1:n + beta <- 0 * 1:q + beta[1:3] <- 1 + alpha <- -2.5 + x <- matrix(rnorm(n * q), nrow = n, + ncol = q) + x <- 0.5*x + 1 + SP <- alpha + x%*%beta + y <- rpois(n,lambda=exp(SP)) + list(x=x,y=y) +} + +llressp <- function(x,y) +{ +# Makes the ESSP for loglinear regression. +# Workstation: need to activate a graphics +# device with command "X11()" or "motif()." +# +# If q is changed, change the formula in the glm statement. + q <- 5 +# change formula to x[,1]+ ... + x[,q] with q + out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + + x[, 4] + x[,5], family = poisson) + ESP <- x %*% out$coef[-1] + out$coef[1] + Y <- y + plot(ESP,Y) + abline(mean(y),0) + fit <- y + fit <- exp(ESP) + indx <- sort.list(ESP) + lines(ESP[indx],fit[indx]) + lines(lowess(ESP,y),type="s") + } + +llrplot<- +function(x, y) +{ +# Makes ESSP, the weighted forward response and residual plots for loglinear regression. +# +# If q is changed, change the formula in the glm statement. + q <- 5 # change formula to x[,1]+ ... + x[,q] with q + out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = + poisson) + ESP <- x %*% out$coef[-1] + out$coef[1] + Y <- y + par(mfrow = c(2, 2)) + plot(ESP, Y) + abline(mean(y), 0) + Ehat <- exp(ESP) + indx <- sort.list(ESP) + lines(ESP[indx], Ehat[indx]) + lines(lowess(ESP, y), type = "s") + title("a) ESSP") + Vhat <- (y - Ehat)^2 + plot(Ehat, Vhat) + abline(0, 1) + #abline(lsfit(Ehat, Vhat)$coef) + title("b)") + Z <- y + Z[y < 1] <- Z[y < 1] + 0.5 + MWRES <- sqrt(Z) * (log(Z) - x %*% out$coef[-1] - out$coef[1]) + MWFIT <- sqrt(Z) * log(Z) - MWRES + plot(MWFIT, sqrt(Z) * log(Z)) + abline(0, 1) + #abline(lsfit(MWFIT, sqrt(Z) * log(Z))$coef) + title("c) WFRP Based on MLE") + plot(MWFIT, MWRES) + title("d) WRP Based on MLE") +} + +llrsim<- +function(n = 100, nruns = 1, type = 1) +{ +# Runs llrpot 10 times on simulated LLR. +# Type = 1 for Poisson data, Type = 2 for negative binomial data +# Calls llrdata, oddata, llrplot. + q <- 5 + for(i in 1:nruns) { + if(type == 1) + out <- llrdata(n, q) + else out <- oddata(n, q) + x <- out$x + y <- out$y + llrplot(x, y) #identify(MWFIT, MWRES) + } +} + +llrwtfrp <- function(x,y) +{ +# Makes the weighted forward response and residual plots for loglinear regression. +# Workstation: need to activate a graphics +# device with command "X11()" or "motif()." + +# +# If q is changed, change the formula in the glm statement. + q <- 5 +# change formula to x[,1]+ ... + x[,q] with q + out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + + x[, 4] + x[,5], family = poisson) + ESP <- x %*% out$coef[-1] + out$coef[1] + Z <- y + Z[y<1] <- Z[y<1] + 0.5 + out2<-lsfit(x,y=log(Z),wt=Z) + #WRES <- sqrt(Z)*(log(Z) - x%*%out2$coef[-1] - out2$coef[1]) + WRES <- out2$res + WFIT <- sqrt(Z)*log(Z) - WRES + MWRES <- sqrt(Z)*(log(Z) - x%*%out$coef[-1] - out$coef[1]) + MWFIT <- sqrt(Z)*log(Z) - MWRES + par(mfrow=c(2,2)) + plot(WFIT,sqrt(Z)*log(Z)) + abline(0,1) + title("a) Weighted Forward Response Plot") + plot(WFIT,WRES) + title("b) Weighted Residual Plot") + plot(MWFIT,sqrt(Z)*log(Z)) + abline(0,1) + title("c) WFRP Based on MLE") + plot(MWFIT,MWRES) + title("d) WRP Based on MLE") + } + +lmsviews<- +function(x, Y, ii = 1) +{ +# Trimmed views using lmsreg for 90, 80, ... 0 percent +# trimming. Allows visualization of m +# and crudely estimation of c beta in models +# of the form y = m(x^T beta) + e. +# Workstation: activate a graphics device +# with commands "X11()" or "motif()." +# R needs command "library(lqs)." +# Advance the view with the right mouse button and +# in R, highight "stop." + x <- as.matrix(x) + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", + "0%") + tem <- seq(0.1, 1, 0.1) + for(i in ii:10) { + val <- quantile(rd2, tem[i]) + b <- lmsreg(x[rd2 <= val, ], Y[rd2 <= val])$coef + ESP <- x %*% b[-1] + plot(ESP, Y) + title(labs[i]) + identify(ESP, Y) + print(b) + } +} + +lrdata <- function(n = 200, type = 3) +{ +# Generates data for logistic regression. +# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = -0.5 ||mu_1||^2. +# +# If q is changed, change the formula in the glm statement. + q <- 5 + y <- 0 * 1:n + y[(n/2 + 1):n] <- y[(n/2 + 1):n] + 1 + beta <- 0 * 1:q + if(type == 1) { + beta[1] <- 1 + alpha <- -0.5 + } + if(type == 2) { + beta <- beta + 1 + alpha <- -q/2 + } + if(type == 3) { + beta[1:3] <- 1 + alpha <- -1.5 + } + x <- matrix(rnorm(n * q), nrow = n, + ncol = q) + if(type == 1) { + x[(n/2 + 1):n, 1] <- x[(n/2 + 1 + ):n, 1] + 1 + } + if(type == 2) { + x[(n/2 + 1):n, ] <- x[(n/2 + 1 + ):n, ] + 1 + } + if(type == 3) { + x[(n/2 + 1):n, 1:3 ] <- x[(n/2 + 1 + ):n, 1:3 ] + 1 + } + #X|y=0 ~ N(0, I) and X|y=1 ~ N(beta,I) + # change formula to x[,1]+ ... + x[,q] with q + out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + + x[, 4] + x[,5], family = binomial) + list(alpha = alpha, beta = beta, lrcoef = out$coef,x=x,y=y) +} + +lressp <- function(x,y,slices=10) +{ +# Makes the ESSP for logistic regression. +# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = ||mu_1||^2. +# Workstation need to activate a graphics +# device with command "X11()" or "motif()." +# R needs command "library(lqs)." +# Advance the view with the right mouse button. +# In R, highlight "stop." +# +# If q is changed, change the formula in the glm statement. + q <- 5 +# change formula to x[,1]+ ... + x[,q] with q + out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + + x[, 4] + x[,5], family = binomial) + ESP <- x %*% out$coef[-1] + out$coef[1] + Y <- y + plot(ESP,Y) + abline(mean(y),0) + fit <- y + fit <- exp(ESP)/(1 + exp(ESP)) + # lines(sort(ESP),sort(fit)) + indx <- sort.list(ESP) + lines(ESP[indx],fit[indx]) + fit2 <- fit + n <- length(y) + val <- as.integer(n/slices) + for(i in 1: (slices-1)){ + fit2[((i-1)*val+1):(i*val)] <- mean(y[indx[((i-1)*val+1):(i*val)]]) + } + fit2[((slices-1)*val+1):n] <- mean(y[indx[((slices-1)*val+1):n]]) +# fit2 is already sorted in order corresponding to indx + lines(ESP[indx],fit2) +#list(fit2=fit2,n=n,slices=slices,val=val) + } + + +lsviews<- +function(x, Y, ii = 1) +{ +# This function is the same as tvreg except that the untrimmed +# cases are highlighted. It compares the LS fits for 90, 80, +# ..., 0 percent trimming. Used to visualize g if y = g(beta^T x,e). +# Workstation: activate a graphics +# device with command "X11()" or "motif()." +# R needs command "library(lqs)." +# Advance the view with the right mouse button. +# In R, highlight ``stop." + x <- as.matrix(x) + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", + "0%") + tem <- seq(0.1, 1, 0.1) + for(i in ii:10) { + val <- quantile(rd2, tem[i]) + bhat <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$coef + ESP <- bhat[1] + x %*% bhat[-1] + plot(ESP, Y) + points(ESP[rd2 <= val], Y[rd2 <= val], pch = 15, cex = 1.4) + abline(0, 1) + title(labs[i]) + identify(ESP, Y) + print(bhat) + } +} + +maha<- +function(x) +{ +# Generates the classical mahalanobis distances. + center <- apply(x, 2, mean) + cov <- var(x) + return(sqrt(mahalanobis(x, center, cov))) +} + +mbalata<- +function(x, y, k=6, nsamp = 7) +{ +#gets the median ball fit with 7 centers, med resid crit, 7 ball sizes + x <- as.matrix(x) + n <- dim(x)[1] + q <- dim(x)[2] + # q + 1 is number of predictors including intercept + vals <- c(q + 3 + floor(n/100), q + 3 + floor(n/40), q + 3 + + floor(n/20), q + 3 + floor(n/10), q + 3 + floor(n/5), q + + 3 + floor(n/3), q + 3 + floor(n/2)) + covv <- diag(q) + centers <- sample(n, nsamp) + temp <- lsfit(x, y) + mbaf <- temp$coef ## get LATA criterion + res <- temp$residuals + crit <- k^2*median(res^2) + cn <- sum(res^2 <= crit) + absres <- sort(abs(res)) + critf <- sum(absres[1:cn]) ## + for(i in 1:nsamp) { + md2 <- mahalanobis(x, center = x[centers[i], ], covv) + smd2 <- sort(md2) + for(j in 1:7) { + temp <- lsfit(x[md2 <= smd2[vals[j]], ], y[md2 <= + smd2[vals[j]]]) + #Use OLS on rows with md2 <= cutoff = smd2[vals[j]] + res <- y - temp$coef[1] - x %*% temp$coef[-1] + ## get LATA criterion + crit <- k^2*median(res^2) + cn <- sum(res^2 <= crit) + absres <- sort(abs(res)) + crit <- sum(absres[1:cn]) ## + if(crit < critf) { + critf <- crit + mbaf <- temp$coef + } + } + } + list(coef = mbaf, critf = critf) +} + +mbamv<- +function(x, y, nsamp = 7) +{ +# This function is for simple linear regression. The +# highlighted boxes get weight 1. Click on right +# mouse button to advance plot. Only uses 50% trimming. + x <- as.matrix(x) + n <- dim(x)[1] + q <- dim(x)[2] + covv <- diag(q) + centers <- sample(n, nsamp) + for(i in 1:nsamp) { + md2 <- mahalanobis(x, center = x[centers[i], ], covv) + med <- median(md2) + plot(x, y) + points(x[md2 < med], y[md2 < med], pch = 15) + abline(lsfit(x[md2 < med],y[md2 < med])) + identify(x, y) + } +} + +mbamv2<- +function(x, Y, nsamp = 7) +{ +# This function is for multiple linear regression. The +# highlighted boxes get weight 1. Click on right +# mouse button to advance plot. Only uses 50% trimming. + x <- as.matrix(x) + n <- dim(x)[1] + q <- dim(x)[2] + covv <- diag(q) + centers <- sample(n, nsamp) + for(i in 1:nsamp) { + md2 <- mahalanobis(x, center = x[centers[i], ], covv) + med <- median(md2) + if(q ==1){out <- lsfit(x[md2 < med],Y[md2 < med])} + else{out <- lsfit(x[md2 < med,],Y[md2 < med])} + FIT <- out$coef[1] + x%*%out$coef[-1] + RES <- Y - FIT + par(mfrow=c(2,1)) + plot(FIT,Y) + points(FIT[md2 < med], Y[md2 < med], pch = 15) + abline(0,1) + identify(FIT, Y) + plot(FIT,RES) + points(FIT[md2 < med], RES[md2 < med], pch = 15) + abline(0,0) + identify(FIT, RES) + } +} + +mbareg<- +function(x, y, nsamp = 7) +{ +#gets the mbareg fit with 7 centers, med resid crit, 7 ball sizes + x <- as.matrix(x) + n <- dim(x)[1] + q <- dim(x)[2] # q + 1 is number of predictors including intercept + vals <- c(q + 3 + floor(n/100), q + 3 + floor(n/40), q + 3 + floor(n/20 + ), q + 3 + floor(n/10), q + 3 + floor(n/5), q + 3 + floor(n/3), + q + 3 + floor(n/2)) + covv <- diag(q) + centers <- sample(n, nsamp) + temp <- lsfit(x, y) + mbaf <- temp$coef + critf <- median(temp$residuals^2) + for(i in 1:nsamp) { + md2 <- mahalanobis(x, center = x[centers[i], ], covv) + smd2 <- sort(md2) + for(j in 1:7) { + temp <- lsfit(x[md2 <= smd2[vals[j]], ], y[md2 <= smd2[ + vals[j]]]) + #Use OLS on rows with md2 <= cutoff = smd2[vals[j]] + res <- y - temp$coef[1] - x %*% temp$coef[-1] + crit <- median(res^2) + if(crit < critf) { + critf <- crit + mbaf <- temp$coef + } + } + } + list(coef = mbaf, critf = critf) +} + +med2ci<- +function(x, cc = 4, alpha = 0.05) +{ +#gets ~ 50% trimmed mean se for sample median and the corresponding robust 100 (1-alpha)% CI +#defaults are alpha = .05, cc = 5 may be better than the default + up <- 1 - alpha/2 + n <- length(x) + med <- median(x) + ln <- floor(n/2) - ceiling(sqrt(n/cc)) + un <- n - ln + low <- ln + 1 + d <- sort(x) + if(ln > 0) { + d[1:ln] <- d[(low)] + d[(un + 1):n] <- d[un] + } + den <- ((un - ln)/n)^2 + swv <- var(d)/den + #got the scaled Winsorized variance + rdf <- un - low + rval <- qt(up, rdf) * sqrt(swv/n) + rlo <- med - rval + rhi <- med + rval + list(int = c(rlo, rhi), med = med, swv = swv) +} + +medci<- +function(x, alpha = 0.05) +{ +#gets Bloch and Gastwirth SE for sample median and the corresponding resistant 100 (1-alpha)% CI +#defaults are alpha = .05 + n <- length(x) + up <- 1 - alpha/2 + med <- median(x) + ln <- floor(n/2) - ceiling(sqrt(n/4)) + un <- n - ln + d <- sort(x) + rdf <- un - ln - 1 + cut <- qt(up, rdf) + sebg <- 0.5 * (d[un] - d[ln + 1]) + rval <- cut * sebg + rlo <- med - rval + rhi <- med + rval + list(int = c(rlo, rhi), med = med, sebg = sebg) +} +MLRplot<-function(x, Y) +{ +# Forward response plot and residual plot. +# R needs command "library(lqs)" if a robust estimator replaces lsfit. +# Advance the view with the right mouse button. + x <- as.matrix(x) + out <- lsfit(x, Y) + cook <- ls.diag(out)$cooks + n <- dim(x)[1] + p <- dim(x)[2] + 1 + tem <- cook > min(0.5, (2 * p)/n) + bhat <- out$coef + FIT <- bhat[1] + x %*% bhat[-1] + par(mfrow = c(2, 1)) + plot(FIT, Y) + abline(0, 1) + points(FIT[tem], Y[tem], pch = 15) + identify(FIT, Y) + title("Forward Response Plot") + RES <- Y - FIT + plot(FIT, RES) + points(FIT[tem], RES[tem], pch = 15) + identify(FIT, RES) + title("Residual Plot") +} + +mlrplot2 <- function(x, Y) +{ +# Forward response plot and residual plot for two mbareg estimators. +# Workstation need to activate a graphics +# device with command "X11()" or "motif()." +# R needs command "library(lqs)" if a robust estimator replaces lsfit. +# Advance the view with the right mouse button. + x <- as.matrix(x) + out <- mbareg(x, Y) + bhat <- out$coef + FIT <- bhat[1] + x %*% bhat[-1] + par(mfrow = c(2, 2)) + plot(FIT, Y) + abline(0, 1) + identify(FIT, Y) + title("MBA Forward Response Plot") + RES <- Y - FIT + plot(FIT, RES) + identify(FIT, RES) + title("MBA Residual Plot") +# + out <- mbalata(x, Y) + bhat <- out$coef + FIT <- bhat[1] + x %*% bhat[-1] + plot(FIT, Y) + abline(0, 1) + identify(FIT, Y) + title("MBALATA Forward Response Plot") + RES <- Y - FIT + plot(FIT, RES) + identify(FIT, RES) + title("MBALATA Residual Plot") +} + + +mplot<- +function(x) +{ +# Makes a DD plot only using the MDi, the RDi are not used. + p <- dim(x)[2] + center <- apply(x, 2, mean) + cov <- var(x) + md2 <- mahalanobis(x, center, cov) + md <- sqrt(md2) + rd <- md + const <- sqrt(qchisq(0.5, p))/median(rd) + rd <- const * rd + plot(md, rd) + abline(0, 1) + identify(md, rd) +} + +nav<- +function(alpha = 0.01, k = 5) +{ +#gets n(asy var) for the alpha trimmed mean +#and T_(A,n)(k) if errors are N(0,1) + z <- - qnorm(alpha) + den <- 1 - (2 * z * dnorm(z))/(2 * pnorm(z) - 1 + ) + val <- den/(1 - 2 * alpha) + ntmav <- val + (2 * alpha * z^2)/(1 - 2 * alpha + )^2 + zj <- k * qnorm(0.75) + alphaj <- pnorm( - zj) + alphaj <- ceiling(100 * alphaj)/100 + zj <- - qnorm(alphaj) + den <- 1 - (2 * zj * dnorm(zj))/(2 * pnorm(zj) - + 1) + val <- den/(1 - 2 * alphaj) + natmav <- val + (2 * alphaj * zj^2)/(1 - 2 * + alphaj)^2 + return(ntmav, natmav) +} + +nltv<- +function(gam = 0.5) +{ +# Gets asy var for lts(h) and lta(h) at standard normal +# where h/n -> gam. + k <- qnorm(0.5 + gam/2) + den <- gam - 2 * k * dnorm(k) + ltsv <- 1/den + tem <- (1 - exp( - (k^2)/2))^2 + ltav <- (2 * pi * gam)/(4 * tem) + return(ltsv, ltav) +} + +oddata<- +function(n = 100, q = 5, theta = 1) +{ +# Generates overdispersion (negative binomial) data for loglinear regression. +# + y <- 1:n + pr <- 1/(1 + theta) + beta <- 0 * 1:q + beta[1:3] <- 1 + alpha <- -2.5 + x <- matrix(rnorm(n * q), nrow = n, ncol = q) + x <- 0.5 * x + 1 + SP <- alpha + x %*% beta + y <- rnbinom(n, size = ceiling(exp(SP)), pr) + list(x = x, y = y) +} + +pifclean<- +function(k, gam) +{ + p <- floor(log(3/k)/log(1 - gam)) + list(p = p) +} + +piplot<-function(x, y, alpha = 0.05) +{ +# Makes an FY plot with prediction limits added. + x <- as.matrix(x) + p <- dim(x)[2] + 1 + n <- length(y) + up <- 1:n + low <- up + out <- lsfit(x, y) + tem <- ls.diag(out) + lev <- tem$hat + res <- out$residuals + FIT <- y - res + Y <- y + corfac <- (1 + 15/n)*sqrt(n/(n - p)) + val2 <- quantile(res, c(alpha/2, 1 - alpha/2)) + #get lower and upper PI limits for each case + for(i in 1:n) { + val <- sqrt(1 + lev[i]) + val3 <- as.single(corfac * val2[1] * val) + val4 <- as.single(corfac * val2[2] * val) + up[i] <- FIT[i] + val4 + low[i] <- FIT[i] + val3 + } + zy <- c(min(low), Y, max(up)) + zx <- c(min(FIT), FIT, max(FIT)) + #change labels so plot labels are good + ff <- FIT + yy <- Y + Y <- zy + FIT <- zx + plot(FIT, Y, type = "n") + points(ff, yy) + abline(0, 1) + points(ff, up, pch = 17) + points(ff, low, pch = 17) +} + +pisim<-function(n = 100, q = 7, nruns = 100, alpha = 0.05, eps = 0.1, shift = 9, type + = 1) +{ +# compares new and classical PIs for multiple linear regression +# if type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors +# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors +# constant = 1 so there are p = q+1 coefficients + b <- 0 * 1:q + 1 + cpicov <- 0 + npicov <- 0 + acpicov <- 0 + opicov <- 0 + val3 <- 1:nruns + val4 <- val3 + val5 <- val3 + pilen <- matrix(0, nrow = nruns, ncol = 4) + coef <- matrix(0, nrow = nruns, ncol = q + 1) + corfac <- (1 + 15/n) * sqrt(n/(n - q - 1)) + corfac2 <- sqrt(n/(n - q - 1)) + for(i in 1:nruns) { + x <- matrix(rnorm(n * q), nrow = n, ncol = q) + if(type == 1) { + y <- 1 + x %*% b + rnorm(n) + xf <- rnorm(q) + yf <- 1 + xf %*% b + rnorm(1) + } + if(type == 2) { + y <- 1 + x %*% b + rt(n, df = 3) + xf <- rnorm(q) + yf <- 1 + xf %*% b + rt(1, df = 3) + } + if(type == 3) { + y <- 1 + x %*% b + rexp(n) - 1 + xf <- rnorm(q) + yf <- 1 + xf %*% b + rexp(1) - 1 + } + if(type == 4) { + y <- 1 + x %*% b + runif(n, min = -1, max = 1) + xf <- rnorm(q) + yf <- 1 + xf %*% b + runif(1, min = -1, max = 1) + } + if(type == 5) { + err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift) + y <- 1 + x %*% b + err + xf <- rnorm(q) + yf <- 1 + xf %*% b + rnorm(1, sd = 1 + rbinom(1, 1, eps + ) * shift) + } + out <- lsfit(x, y) + fres <- out$resid + coef[i, ] <- out$coef + yfhat <- out$coef[1] + xf %*% out$coef[-1] + w <- cbind(1, x) + xtxinv <- solve(t(w) %*% w) + xf <- c(1, xf) + hf <- xf %*% xtxinv + hf <- hf %*% xf + val <- sqrt(1 + hf) #get classical PI + mse <- sum(fres^2)/(n - q - 1) + val2 <- qt(1 - alpha/2, n - q - 1) * sqrt(mse) * val + up <- yfhat + val2 + low <- yfhat - val2 + pilen[i, 1] <- up - low + if(low < yf && up > yf) cpicov <- cpicov + 1 + #get semiparametric PI + val2 <- quantile(fres, c(alpha/2, 1 - alpha/2)) + val3[i] <- as.single(corfac * val2[1] * val) + val4[i] <- as.single(corfac * val2[2] * val) + up <- yfhat + val4[i] + low <- yfhat + val3[i] + pilen[i, 2] <- up - low + if(low < yf && up > yf) npicov <- npicov + 1 + # asymptotically conservative PI + val6 <- corfac2 * max(abs(val2)) + val5[i] <- val6 * val + up <- yfhat + val5[i] + low <- yfhat - val5[i] + pilen[i, 3] <- up - low + if(low < yf && up > yf) acpicov <- acpicov + 1 + # asymptotically optimal PI + sres <- sort(fres) + cc <- ceiling(n * (1 - alpha)) + rup <- sres[cc] + rlow <- sres[1] + olen <- rup - rlow + if(cc < n) { + for(j in (cc + 1):n) { + zlen <- sres[j] - sres[j - cc + 1] + if(zlen < olen) { + olen <- zlen + rup <- sres[j] + rlow <- sres[j - cc + 1] + } + } + } + up <- yfhat + corfac * val * rup + low <- yfhat + corfac * val * rlow + pilen[i, 4] <- up - low + if(low < yf && up > yf) + opicov <- opicov + 1 + } + pimnlen <- apply(pilen, 2, mean) + mnbhat <- apply(coef, 2, mean) + lcut <- mean(val3) + hcut <- mean(val4) + accut <- mean(val5) + cpicov <- cpicov/nruns + npicov <- npicov/nruns + acpicov <- acpicov/nruns + opicov <- opicov/nruns + list(mnbhat = mnbhat, pimenlen = pimnlen, cpicov = cpicov, npicov = + npicov, acpicov = acpicov, opicov = opicov, lcut = lcut, hcut + = hcut, accut = accut) +} + +ratmn<- +function(x, k1 = 6, k2 = 6) +{ +#robust 2 stage asymmetically trimmed mean + madd <- mad(x, constant = 1) + med <- median(x) + LM <- sum(x < (med - k1 * madd)) + nmUM <- sum(x > (med + k2 * madd)) + n <- length(x) + # ll (hh) is the percentage to be trimmed to the left (right) + ll <- ceiling((100 * LM)/n) + hh <- ceiling((100 * (nmUM))/n) + tem <- sort(x) + ln <- floor((ll * n)/100) + un <- floor((n * (100 - hh))/100) + low <- ln + 1 + val1 <- tem[low] + val2 <- tem[un] + rtmn <- mean(x[(x >= val1) & (x <= val2)]) + trmn +} + +rmaha<- +function(x) +{ +# Produces robust Mahalanobis distances (scaled for normal data). + p <- dim(x)[2] + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd <- mahalanobis(x, center, cov) + const <- sqrt(qchisq(0.5, p))/median(rd) + return(const * sqrt(rd)) +} + +robci <- function(x, alpha = 0.05, trmp = 0.25, ka = 6, ks = 3.5 + ) +{ +#Gets several robust 100 (1-alpha)% CI's for data x. +#defaults are alpha = .05 + n <- length(x) + up <- 1 - alpha/2 + med <- median(x) + madd <- mad(x, constant = 1) + d <- sort(x) + dtem <- d ## get the CI for T_A, + LM <- sum(x < (med - ka * madd)) + nmUM <- sum(x > (med + ka * madd)) + # ll (hh) is the percentage to be trimmed to the left (right) + ll <- ceiling((100 * LM)/n) + hh <- ceiling((100 * (nmUM))/n) + ln <- floor((ll * n)/100) + un <- floor((n * (100 - hh))/100) + low <- ln + 1 + val1 <- dtem[low] + val2 <- dtem[un] + tstmn <- mean(x[(x >= val1) & (x <= val2)]) + #have obtained the two stage asymmetrically trimmed mean + if(ln > 0) { + d[1:ln] <- d[low] + } + if(un < n) { + d[(un + 1):n] <- d[un] + } + den <- ((un - ln)/n)^2 + swv <- var(d)/den + #got the scaled Winsorized variance + rdf <- un - low + rval <- qt(up, rdf) * sqrt(swv/n) + talo <- tstmn - rval + tahi <- tstmn + rval + ##got low and high endpoints of robust T_A,n CI +##get robust T_S,n CI + d <- dtem + lo <- sum(x < (med - ks * madd)) + hi <- sum(x > (med + ks * madd)) + low <- ceiling((100 * lo)/n) + high <- ceiling((100 * hi)/n) + tp <- min(max(low, high)/100, 0.5) + tstmn <- mean(x, trim = tp) + #have obtained the two stage symetrically trimmed mean + ln <- floor(n * tp) + un <- n - ln + if(ln > 0) { + d[1:ln] <- d[(ln + 1)] + } + if(un < n) { + d[(un + 1):n] <- d[un] + } + den <- ((un - ln)/n)^2 + swv <- var(d)/den + #got the scaled Winsorized variance + rdf <- un - ln - 1 + rval <- qt(up, rdf) * sqrt(swv/n) + tslo <- tstmn - rval + tshi <- tstmn + rval + ##got low and high endpoints of robust T_S,n CI +##get median CI that uses a scaled Winsorized variance + d <- dtem + lnbg <- floor(n/2) - ceiling(sqrt(n/4)) + unbg <- n - lnbg + lowbg <- lnbg + 1 + if(lnbg > 0) { + d[1:lnbg] <- d[(lowbg)] + } + if(unbg < n) { + d[(unbg + 1):n] <- d[unbg] + } + den <- ((unbg - lnbg)/n)^2 + swv <- var(d)/den + #got the scaled Winsorized variance + rdf <- unbg - lnbg - 1 + cut <- qt(up, rdf) + rval <- cut * sqrt(swv/n) + rlo <- med - rval + rhi <- med + rval + ##got median CI that uses a scaled Winsorized variance +##get BG CI + se2 <- 0.5 * (d[unbg] - d[lowbg]) + rval <- cut * se2 + rlo2 <- med - rval + rhi2 <- med + rval + #got low and high endpoints of BG CI +## get classical CI + mn <- mean(x) + v <- var(x) + se <- sqrt(v/n) + val <- qt(up, n - 1) * se + lo <- mn - val + hi <- mn + val ##got classical CI endpoints +## get trimmed mean CI + d <- dtem + ln <- floor(n * trmp) + un <- n - ln + trmn <- mean(x, trim = trmp) + if(ln > 0) { + d[1:ln] <- d[(ln + 1)] + } + if(un < n) { + d[(un + 1):n] <- d[un] + } + den <- ((un - ln)/n)^2 + swv <- var(d)/den + #got the scaled Winsorized variance + rdf <- un - ln - 1 + rval <- qt(up, rdf) * sqrt(swv/n) + trlo <- trmn - rval + trhi <- trmn + rval + ##got trimmed mean CI endpoints + list(tint = c(lo, hi), taint = c(talo, tahi), + tsint = c(tslo, tshi), bgint = c(rlo2, + rhi2), mint = c(rlo, rhi), trint = c( + trlo, trhi)) +} + + +rrplot<- +function(x, y, nsamps = 7) +{ +# Makes an RR plot. Needs the mbareg function. + n <- length(y) + rmat <- matrix(nrow = n, ncol = 5) + lsres <- lsfit(x, y)$residuals + print("got OLS") + l1res <- l1fit(x, y)$residuals + print("got L1") + almsres <- lmsreg(x, y)$resid + print("got ALMS") + altsres <- ltsreg(x, y)$residuals + print("got ALTS") + out <- mba$coef + mbacoef <- mbareg(x, y, nsamp = nsamps)$coef + MBARES <- y - mbacoef[1] - x %*% mbacoef[-1] + print("got MBA") + rmat[, 1] <- lsres + rmat[, 2] <- l1res + rmat[, 3] <- almsres + rmat[, 4] <- altsres + rmat[, 5] <- MBARES + pairs(rmat, labels = c("OLS residuals", + "L1 residuals", "ALMS residuals", + "ALTS residuals", "MBA residuals")) +} + +rrplot2<- +function(x, y, nsamps = 7) +{ +# Makes an RR plot. Needs the mbareg function. + n <- length(y) + rmat <- matrix(nrow = n, ncol = 4) + lsres <- lsfit(x, y)$residuals + print("got OLS") + almsres <- lmsreg(x, y)$resid + print("got ALMS") + altsres <- ltsreg(x, y)$residuals + print("got ALTS") + out <- mba$coef + mbacoef <- mbareg(x, y, nsamp = nsamps)$coef + MBARES <- y - mbacoef[1] - x %*% mbacoef[-1] + print("got MBA") + rmat[, 1] <- lsres + rmat[, 2] <- almsres + rmat[, 3] <- altsres + rmat[, 4] <- MBARES + pairs(rmat, labels = c("OLS residuals", + "ALMS residuals", + "ALTS residuals", "MBA residuals")) +} + +rstmn<- +function(x, k1 = 5, k2=5) +{ +#robust symmetically trimmed 2 stage mean +#truncates too many cases when the contamination is asymmetric + madd <- mad(x, constant = 1) + med <- median(x) + LM <- sum(x < (med - k1 * madd)) + nmUM <- sum(x > (med + k2 * madd)) + n <- length(x) #ll (hh) is the percentage trimmed to the left (right) +# tp is the trimming proportion + ll <- ceiling((100 * LM)/n) + hh <- ceiling((100 * nmUM)/n) + tp <- min(max(ll, hh)/100, 0.5) + mean(x, trim = tp) +} + +sir<- +function(x, y, h) +{ +# Obtained from STATLIB. Contributed by Thomas Koetter. +# Calculates the effective dimension-reduction (e.d.r.) +# directions by Sliced Inverse Regression (K.C. Li 1991, JASA 86, 316-327) +# +# Input: x n x p matrix, explanatory variable +# y n x 1 vector, dependent variable +# h scalar: if h >= 2 number of slices +# if h <= -2 number of elements within a slice +# 0 < h < 1 width of a slice: h = slicewidth / +# range +# +# Output: list(edr, evalues) +# edr p x p matrix, estimates for the e.d.r. directions +# evalues p x 1 vector, the eigenvalues to the directions +# +# written by Thomas Koetter (thomas@wiwi.hu-berlin.de) 1995 +# last modification: 7/18/95 +# based on the implementation in XploRe +# a full description of the XploRe program can be found in (chapter 11) +# 'XploRe: An interactive statistical computing environment', +# W. Haerdle, S. Klinke, B.A. Turlach, Springer, 1995 +# +# This software can be freely used for non-commercial purposes and freely +# distributed. +#+-----------------------------------------------------------------------------+ +#| Thomas Koetter | +#| Institut fuer Statistik und Oekonometrie | +#| Fakultaet Wirtschaftswissenschaften | +#| Humboldt-Universitaet zu Berlin, 10178 Berlin, GERMANY | +#+-----------------------------------------------------------------------------+ +#| Tel. voice: +49 30 2468-321 | +#| Tel. FAX: +49 30 2468-249 | +#| E-mail: thomas@wiwi.hu-berlin.de | +#+-----------------------------------------------------------------------------+ + n <- nrow(x) + ndim <- ncol(x) + if(n != length(c(y))) { + stop("length of y doesn't match to number of rows of x !!") + } + if( - h > n) { + stop("Number of elements within slices can't exceed number of data !!" + ) + } +# stanardize the x variable to z (mean 0 and cov I) + xb <- apply(x, 2, mean) + si2 <- solve(chol(var(x))) + xt <- (x - matrix(xb, nrow(x), ncol(x), byrow = T)) %*% si2 + # sort the data regarding y. x values are now packed into slices + ord1 <- order(y) + data <- cbind(y[ord1], xt[ord1, ]) # determine slicing strategy + if(h <= -2) { +# abs(h) is number of elements per slice + h <- abs(h) + ns <- floor(n/h) + condit <- 1:n + choice <- (1:ns) * h + # if there are observations left, add them to the first and last slice + if(h * ns != n) { + hk <- floor((n - h * ns)/2) + choice <- choice + hk + choice[ns] <- n # to aviod numerical problems + } + } + else if(h >= 2) { +# h is number of slices + ns <- h + slwidth <- (data[n, 1] - data[1, 1])/ns + slend <- seq(data[1, 1] + slwidth, length = ns, by = slwidth) + slend[ns] <- data[n, 1] + condit <- c(data[, 1]) + choice <- slend + } + else if((0 < h) && (h < 1)) { +# h is widht of a slice divides by the range of y + ns <- floor(1/h) + slwidth <- (data[n, 1] - data[1, 1]) * h + slend <- seq(data[1, 1] + slwidth, length = ns, by = slwidth) + slend[ns] <- data[n, 1] # to aviod numerical problems + condit <- c(data[, 1]) + choice <- slend + } + else stop("values of third parameter not valid") + v <- matrix(0, ndim, ndim) # estimate for Cov(E[z|y]) + ind <- rep(TRUE, n) # index for already sliced elements + ndim <- ndim + 1 + j <- 1 # loop counter + while(j <= ns) { + sborder <- (condit <= choice[j]) & ind # index of slice j + if(any(sborder)) { +# are there elements in slice j ? + ind <- ind - sborder + xslice <- data[sborder, 2:ndim] + if(sum(sborder) == 1) { +# xslice is a vector ! + xmean <- xslice + v <- v + outer(xmean, xmean, "*") + } + else { + xmean <- apply(xslice, 2, mean) + v <- v + outer(xmean, xmean, "*") * nrow(xslice + ) + } + } + j <- j + 1 + } + if(any(ind)) { + print("Error: elements unused !!") + print(ind) + } + v <- (v + t(v))/(2 * n) # to prevent numerical errors (v is symmetric) + eig <- eigen(v) + b <- si2 %*% eig$vectors # estimates for e.d.r. directions + data <- sqrt(apply(b * b, 2, sum)) + b <- t(b)/data + return(list(edr = t(b), evalues = eig$values)) +} + +sirviews<- +function(x, Y, ii = 1) +{ +# Uses the function "sir" from STATLIB. +# Trimmed views for 90, 80, ... 0 percent +# trimming. Allows visualization of m +# and crude estimation of c beta in models +# of the form y = m(x^T beta) + e. +# beta is obtained from SIR. +# Workstation need to activate a graphics +# device with command "X11()" or "motif()." +# R needs command "library(lqs)." +# Advance the view with the right mouse button. +# In R, highlight "stop." + x <- as.matrix(x) + q <- dim(x)[2] + out <- cov.mcd(x) # or use out <- cov.mve(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", + "0%") + tem <- seq(0.1, 1, 0.1) + h <- q + 7 + for(i in ii:10) { + val <- quantile(rd2, tem[i]) + b <- sir(x[rd2 <= val, ], Y[rd2 <= val], h)$edr[, 1] + ESP <- x %*% b + plot(ESP, Y) + title(labs[i]) + identify(ESP, Y) + print(b) + } +} + +stmci<- +function(x, alpha = 0.05, ks = 3.5) +{ +#gets se for sample median and the corresponding robust 100 (1-alpha)% CI +#defaults are alpha = .05 + n <- length(x) + up <- 1 - alpha/2 + med <- median(x) + madd <- mad(x, constant = 1) + lo <- sum(x < (med - ks * madd)) + hi <- sum(x > (med + ks * madd)) + low <- ceiling((100 * lo)/n) + high <- ceiling((100 * hi)/n) + tp <- min(max(low, high)/100, 0.5) + tstmn <- mean(x, trim = tp) + #have obtained the two stage symetrically trimmed mean + ln <- floor(n * tp) + un <- n - ln + d <- sort(x) + if(ln > 0) { + d[1:ln] <- d[(ln + 1)] + d[(un + 1):n] <- d[un] + } + den <- ((un - ln)/n)^2 + swv <- var(d)/den + #got the scaled Winsorized variance + rdf <- un - ln - 1 + rval <- qt(up, rdf) * sqrt(swv/n) + tslo <- tstmn - rval + tshi <- tstmn + rval + list(int = c(tslo, tshi), tp = tp) +} + +symviews<- +function(x, Y) +{ +# Makes trimmed views for 90, 80, ..., 0 +# percent trimming and sometimes works even if m +# is symmetric about E(x^t beta) where +# y = m(x^T beta ) + e. +# For work stations, activate a graphics +# device with command "X11()" or "motif()." +# For R, use "library(lqs)." +# Use the rightmost mouse button to advance +# the view. In R, highlight ``stop." + x <- as.matrix(x) + tem <- seq(0.1, 1, 0.1) + bols <- lsfit(x, Y)$coef + fit <- x %*% bols[-1] + temx <- x[fit > median(fit), ] + temy <- Y[fit > median(fit)] + out <- cov.mcd(temx) # or use out <- cov.mve(temx) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(temx, center, cov) + for(i in 1:10) { + val <- quantile(rd2, tem[i]) + bhat <- lsfit(temx[rd2 <= val, ], temy[rd2 <= val])$coef + ESP <- x %*% bhat[-1] + plot(ESP, Y) + identify(ESP, Y) + print(bhat) + } +} + + +tmci<- +function(x, alpha = 0.05, tp = 0.25) +{ +#gets se for the tp trimmed mean and the corresponding robust 100 (1-alpha)% CI +#defaults are alpha = .05 + n <- length(x) + up <- 1 - alpha/2 + tmn <- mean(x, trim = tp) + ln <- floor(n * tp) + un <- n - ln + d <- sort(x) + if(ln > 0) { + d[1:ln] <- d[(ln + 1)] + d[(un + 1):n] <- d[un] + } + den <- ((un - ln)/n)^2 + swv <- var(d)/den + #got the scaled Winsorized variance + rdf <- un - ln - 1 + rval <- qt(up, rdf) * sqrt(swv/n) + tmlo <- tmn - rval + tmhi <- tmn + rval + list(int = c(tmlo, tmhi), tp = tp) +} + +Tplt<- +function(x, y) +{ +# For Unix, use X11() to turn on the graphics device before using this function. +# This function plots y^L vs OLS fit. If plot is linear for L, use y^L instead of y. +# This is a graphical method for a response transform. + olsfit <- y - lsfit(x, y)$resid + lam <- c(-1, -2/3, -1/2, -1/3, -1/4, 0, 1/4, 1/ + 3, 1/2, 2/3, 1) + xl <- c("Y**(-1)", "Y**(-2/3)", "Y**(-0.5)", + "Y**(-1/3)", "Y**(-1/4)", "LOG(Y)", + "Y**(1/4)", "Y**(1/3)", "Y**(1/2)", + "Y**(2/3)", "Y") + for(i in 1:length(lam)) { + if(lam[i] == 0) + ytem <- log(y) + else if(lam[i] == 1) + ytem <- y + else ytem <- (y^lam[i] - 1)/lam[i] + plot(olsfit, ytem, xlab = "YHAT", ylab + = xl[i]) + abline(lsfit(olsfit, ytem)$coef) + identify(olsfit, ytem) + } +} + +trviews<- +function(x, Y, ii = 1) +{ +# Trimmed views for 90, 80, ... 0 percent +# trimming. Increase ii if 90% trimming is too harsh. +# Allows visualization of m and crudely estimation of +# c beta in models of the form y = m(x^T beta) + e. +# Workstation: activate a graphics device +# with commands "X11()" or "motif()." +# R needs command "library(lqs)." +# Advance the view with the right mouse button and +# in R, highight "stop." + x <- as.matrix(x) + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", + "20%","10%","0%") + tem <- seq(0.1, 1, 0.1) + for(i in ii:10) { + val <- quantile(rd2, tem[i]) + b <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$coef + ESP <- x %*% b[-1] + plot(ESP, Y) + title(labs[i]) + identify(ESP, Y) + print(b) + } +} + +tvreg<- +function(x, Y, ii = 1) +{ +# Trimmed views (TV) regression for 90, 80, ..., 0 percent +# trimming. Increase ii if 90% trimming is too harsh. +# Workstation: activate a graphics device +# with commands "X11()" or "motif()." +# R needs command "library(lqs)." +# Advance the view with the right mouse button and +# in R, highight "stop." + x <- as.matrix(x) + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + labs <- c("90%", "80%", "70%", "60%", "50%", + "40%", "30%", "20%", "10%", "0%") + tem <- seq(0.1, 1, 0.1) + for(i in ii:10) { + val <- quantile(rd2, tem[i]) + b <- lsfit(x[rd2 <= val, ], Y[rd2 <= + val])$coef + FIT <- x %*% b[-1] + b[1] + plot(FIT, Y) + abline(0, 1) + title(labs[i]) + identify(FIT, Y) + print(b) + } +} + +tvreg2<- +function(X, Y, M = 0) +{ +# Trimmed views regression for M percent trimming. +# Workstation: activate a graphics device +# with commands "X11()" or "motif()." +# R needs command "library(lqs)." + X <- as.matrix(X) + out <- cov.mcd(X) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(X, center, cov) + tem <- (100 - M)/100 + val <- quantile(rd2, tem) + b <- lsfit(X[rd2 <= val, ], Y[rd2 <= val])$coef + FIT <- X %*% b[-1] + b[1] + plot(FIT, Y) + abline(0, 1) + identify(FIT, Y) + list(coef = b) +} + + +wddplot<- +function(x) +{# Shows the southwest corner of the DD plot. + n <- dim(x)[1] + wt <- 0 * (1:n) + p <- dim(x)[2] + center <- apply(x, 2, mean) + cov <- var(x) + md2 <- mahalanobis(x, center, cov) + out <- cov.mcd(x) + center <- out$center + cov <- out$cov + rd2 <- mahalanobis(x, center, cov) + md <- sqrt(md2) + rd <- sqrt(rd2) + const <- sqrt(qchisq(0.5, p))/median(rd) + rd <- const * rd + wt[rd < sqrt(qchisq(0.975, p))] <- 1 + MD <- md[wt > 0] + RD <- rd[wt > 0] + plot(MD, RD) +} + +skipcov<-function(m,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,STAND=TRUE){ +# +# m is an n by p matrix +# +# Compute skipped covariance matrix +# +# op=1: +# Eliminate outliers using a projection method +# That is, first determine center of data using: +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 uses MVE center +# cop=5 uses TBS +# cop=6 uses rmba (Olive's median ball algorithm) +# +# For each point +# consider the line between it and the center, +# project all points onto this line, and +# check for outliers using +# +# MM=F, a boxplot rule. +# MM=T, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# +# op=2 use mgv (function outmgv) method to eliminate outliers +# +# Eliminate any outliers and compute means +# using remaining data. +# mgv.op=0, mgv uses all pairwise distances to determine center of the data +# mgv.op=1 uses MVE +# mgv.op=2 uses MCD +# +temp<-NA +m<-elimna(m) +m<-as.matrix(m) +if(op==2)temp<-outmgv(m,plotit=FALSE,op=mgv.op)$keep +if(op==1)temp<-outpro(m,plotit=FALSE,MM=MM,cop=outpro.cop,STAND=STAND,pr=FALSE)$keep +val<-var(m[temp,]) +val +} + +hc4wtest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,xout=FALSE,outfun=outpro,...){ +# +# Test the hypothesis that all OLS slopes are zero +# using HC4 wild bootstrap using wald test. +# +# This function calls the functions +# olshc4 and +# lstest4 +# +if(SEED)set.seed(2) +x<-as.matrix(x) +# First, eliminate any rows of data with missing values. +m<-elimna(cbind(x,y)) +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +x<-m[,1:p] +y<-m[,pp] +if(xout){ +flag<-outfun(x,...)$keep +x<-as.matrix(x) +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +temp<-lsfit(x,y) +Rsq=ols(x,y)$R.squared +yhat<-mean(y) +res<-y-yhat +s<-olshc4(x, y)$cov[-1, -1] +si<-solve(s) +b<-temp$coef[2:pp] +wtest<-t(b)%*%si%*%b +if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot) +if(!RAD){ +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-(data-.5)*sqrt(12) # standardize the random numbers. +} +rvalb<-apply(data,1,lstest4,yhat,res,x) +sum<-sum(rvalb>= wtest[1,1]) +p.val<-sum/nboot +list(p.value=p.val,R.squared=Rsq) +} +lscale<-function(x,m,q) +{ +# +# Compute the L-scale as used by Marrona +# Technometrics, 2005, 47, 264-273 +# +# so it is assumed that values in x have been centered +# (a measure of location has been subtracted from each value) +# and the results squared. +# +# q is defined in Marrona. For principal components, want to reduce +# to p dimensional data, q=ncol(x)-p +# +hval<-floor((length(x)+m-q+2)/2) +flag<-(x<0) +if(sum(flag)>0)stop("For lscale, all values must be nonnegative") +x<-sort(x) +val<-sum(x[1:hval]) +val +} +ortho<-function(x){ +# Orthnormalize x +# +y<-qr(x) +y<-qr.Q(y) +y +} + +Mpca<-function(x,N1=3,N2=2,tol=.001,N2p=10,Nran=50, +Nkeep=10,SEED=TRUE,op.pro=.1,SCORES=FALSE,pval=NULL){ +# +# Robust PCA using Marrona's method (2005, Technometrics) +# +# x is an N by m matrix containing data +# N1, N2, N2p, Nran and Nkeep indicate how many +# iterations are used in the various portions of the +# Marrona robust PCA; see Marrona's paper. +# +# op.pro is the maximum proportion of unexplained +# variance that is desired. If pval is not specified, will +# add variables until this proportion is less than op.pro. +# +# pval, if specified, will use p=pval of the m variables only and report +# the proportion of unexplained variance. +# The weighted covariance matrix is returned as well. +# +# SCORES=T, scores are reported and return based on the number of +# variables indicated by pval. pval must be specified. +# +# pval not specified, computes proportion of unexplained variance +# using p=1, 2 ... variables; results returned in +# +scores<-NULL +wt.cov<-NULL +x<-elimna(x) +if(SEED)set.seed(2) +m<-ncol(x) +n<-nrow(x) +bot<-marpca(x,p=0,N1=N1,N2=N2,tol=tol,N2p=N2p,Nran=Nran,Nkeep=Nkeep,SEED=SEED) +bot<-bot$var.op +mn1<-m-1 +rat<-1 +it<-0 +ratval<-NULL +if(is.null(pval)){ +ratval<-matrix(nrow=mn1,ncol=2) +dimnames(ratval)<-list(NULL,c("p","pro.unex.var")) +ratval[,1]<-c(1:mn1) +for(it in 1:mn1){ +if(rat>op.pro){ +temp<-marpca(x,p=it,N1=N1,N2=N2,tol=tol,N2p=N2p,Nran=Nran,Nkeep=Nkeep, +SEED=SEED) +rat<-temp$var.op/bot +ratval[it,2]<-rat +}}} +if(!is.null(pval)){ +if(pval>=m)stop("This method assumes pval0))cor.b=rcovb/temp + list(center = rmnb, cov = rcovb, cor=cor.b) +} +tbscov <- function(x,eps=1e-3,maxiter=20,r=.45,alpha=.05){ +# Rocke's contrained s-estimator +# returns covariance matrix only. For both locatiion and scatter, use tbs +# +# r=.45 is the breakdown point +# alpha=.05 is the asymptotic rejection probability. +# +if(!is.matrix(x))stop("x should be a matrix with two or more columns") +x<-elimna(x) +library(MASS) +temp<-cov.mve(x) +t1<-temp$center +s<-temp$cov + n <- nrow(x) + p <- ncol(x) +if(p==1)stop("x should be a matrix with two or more columns") +c1M<-cgen.bt(n,p,r,alpha,asymp=FALSE) +c1<-c1M$c1 +if(c1==0)c1<-.001 #Otherwise get division by zero +M<-c1M$M + b0 <- erho.bt(p,c1,M) + crit <- 100 + iter <- 1 + w1d <- rep(1,n) + w2d <- w1d + while ((crit > eps)&(iter <= maxiter)) + { + t.old <- t1 + s.old <- s + wt.old <- w1d + v.old <- w2d + d2 <- mahalanobis(x,center=t1,cov=s) + d <- sqrt(d2) + k <- ksolve.bt(d,p,c1,M,b0) + d <- d/k + w1d <- wt.bt(d,c1,M) + w2d <- v.bt(d,c1,M) + t1 <- (w1d %*% x)/sum(w1d) + s <- s*0 + for (i in 1:n) + { + xc <- as.vector(x[i,]-t1) + s <- s + as.numeric(w1d[i])*(xc %o% xc) + } + s <- p*s/sum(w2d) + mnorm <- sqrt(as.vector(t.old) %*% as.vector(t.old)) + snorm <- eigen(s.old)$values[1] + crit1 <- max(abs(t1 - t.old)) +# crit <- max(crit1,crit2) + crit <- max(abs(w1d-wt.old))/max(w1d) + iter <- iter+1 + } +# mnorm <- sqrt(as.vector(t1) %*% as.vector(t1)) +# snorm <- eigen(s)$values[1] +# return(list(t1=t1,s=s)) +s +} +erho.bt <- function(p,c1,M) +# expectation of rho(d) under chi-squared p + return(chi.int(p,2,M)/2 + +(M^2/2+c1*(5*c1+16*M)/30)*chi.int2(p,0,M+c1) + +(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4))*( +chi.int(p,0,M+c1)-chi.int(p,0,M)) + +(1/2+M^4/(2*c1^4)-M^2/c1^2)*(chi.int(p,2,M+c1)-chi.int(p,2,M)) + +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*(chi.int(p,3,M+c1)-chi.int(p,3,M)) + +(3*M^2/(2*c1^4)-1/(2*c1^2))*(chi.int(p,4,M+c1)-chi.int(p,4,M)) + -(4*M/(5*c1^4))*(chi.int(p,5,M+c1)-chi.int(p,5,M)) + +(1/(6*c1^4))*(chi.int(p,6,M+c1)-chi.int(p,6,M))) +chi.int <- function(p,a,c1) +# partial expectation d in (0,c1) of d^a under chi-squared p + return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*pchisq(c1^2,p+a) ) +chi.int2 <- function(p,a,c1) +# partial expectation d in (c1,\infty) of d^a under chi-squared p + return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*(1-pchisq(c1^2,p+a))) +cgen.bt <- function(n,p,r,alpha,asymp=FALSE){ +# find constants c1 and M that gives a specified breakdown r +# and rejection point alpha +if (asymp == FALSE){if (r > (n-p)/(2*n) ) r <- (n-p)/(2*n)} +# maximum achievable breakdown +# +# if rejection is not achievable, use c1=0 and best rejection +# + limvec <- rejpt.bt.lim(p,r) + if (1-limvec[2] <= alpha) + { + c1 <- 0 + M <- sqrt(qchisq(1-alpha,p)) + } + else + { + c1.plus.M <- sqrt(qchisq(1-alpha,p)) + M <- sqrt(p) + c1 <- c1.plus.M - M + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + deps <- 1e-4 + M.old <- M + c1.old <- c1 + er <- erho.bt(p,c1,M) + fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30) + fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps + fcM <- (erho.bt(p,c1,M+deps)-er)/deps + fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30) + M <- M - fc/fcp + if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2} + c1 <- c1.plus.M - M +# if (M-c1 < 0) M <- c1.old+(M.old-c1.old)/2 + crit <- abs(fc) + iter <- iter+1 + } + } +list(c1=c1,M=M,r1=r) +} +erho.bt.lim <- function(p,c1) +# expectation of rho(d) under chi-squared p + return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1)) +erho.bt.lim.p <- function(p,c1) +# derivative of erho.bt.lim wrt c1 + return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1)) + + +rejpt.bt.lim <- function(p,r){ +# find p-value of translated biweight limit c +# that gives a specified breakdown + c1 <- 2*p + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + c1.old <- c1 + fc <- erho.bt.lim(p,c1) - c1^2*r + fcp <- erho.bt.lim.p(p,c1) - 2*c1*r + c1 <- c1 - fc/fcp + if (c1 < 0) c1 <- c1.old/2 + crit <- abs(fc) + iter <- iter+1 + } + return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p)))) +} +chi.int.p <- function(p,a,c1) + return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) +chi.int2.p <- function(p,a,c1) + return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 ) +ksolve.bt <- function(d,p,c1,M,b0){ +# find a constant k which satisfies the s-estimation constraint +# for modified biweight + k <- 1 + iter <- 1 + crit <- 100 + eps <- 1e-5 + while ((crit > eps)&(iter<100)) + { + k.old <- k + fk <- mean(rho.bt(d/k,c1,M))-b0 + fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2) + k <- k - fk/fkp + if (k < k.old/2) k <- k.old/2 + if (k > k.old*1.5) k <- k.old*1.5 + crit <- abs(fk) + iter <- iter+1 + } + return(k) +} +rho.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1*(x^2/2) + +ivec2*(M^2/2+c1*(5*c1+16*M)/30) + +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4) + +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2 + +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3 + +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4 + -4*M*x^5/(5*c1^4)+x^6/(6*c1^4))) +} +psi.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2) +} +psip.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1)) +} +wt.bt <- function(x,c1,M) +{ + x1 <- (x-M)/c1 + ivec1 <- (x1 < 0) + ivec2 <- (x1 > 1) + return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2) +} +v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M)) + +gvarg<-function(m,var.fun=cov.mba,...){ +# +# Compute the generalized variance of a matrix m +# It is assumed that var.fun returns a covariance matrix only +# +# (Some functions return a covariance matrix in list mode: $cov +# These functions do not work here.) +# +# other possible choices for var.fun: +# skipcov +# tbscov +# covout +# covogk +# mgvcov +# mvecov +# mcdcov +# +m<-elimna(m) +m<-as.matrix(m) +temp<-var.fun(m,...) +gvar<-prod(eigen(temp)$values) +gvar +} +marpca<-function(x,p=ncol(x)-1,N1=3,N2=2,tol=.001,N2p=10,Nran=50, +Nkeep=10,SEED=TRUE,LSCALE=TRUE,SCORES=FALSE){ +# +# Marrona (2005, Technometrics, 47, 264-273) robust PCA +# +# x is an n by m matrix, pNran)stop("Must have Nkeep<=Nran") +if(SEED)set.seed(2) +n<-nrow(x) +m<-ncol(x) +q<-m-p +if(q<0)stop("p should have value between 0 and ncol(x)") +if(q>0){ +bkeep<-array(dim=c(q,m,Nran)) +akeep<-matrix(nrow=Nran,ncol=q) +sig.val<-NA +for(it in 1:Nran){ +temp<-marpca.sub(x,p,N1=N1,N2=N2,tol=tol,LSCALE=LSCALE) +bkeep[,,it]<-temp$B +akeep[it,]<-temp$a +sig.val[it]<-temp$var.op +} +ord<-order(sig.val) +bkeep2<-array(dim=c(q,m,Nkeep)) +cmatkeep<-array(dim=c(m,m,Nkeep)) +akeep2<-matrix(nrow=Nkeep,ncol=q) +sig.val2<-NA +for(it in 1:Nkeep){ +temp<-marpca.sub(x,p,N1=0,N2=N2p,tol=tol,B=bkeep[,,ord[it]],a=akeep[ord[it],], +LSCALE=LSCALE) +bkeep2[,,it]<-temp$B +akeep2[it,]<-temp$a +sig.val2[it]<-temp$var.op +cmatkeep[,,it]<-temp$wt.cov +} +ord<-order(sig.val2) +B<-bkeep2[,,ord[1]] +a<-akeep2[ord[1],] +var.op<-sig.val2[ord[1]] +Cmat<-cmatkeep[,,ord[1]] +} +wt.mu<-NULL +if(q==0){ +output<-marpca.sub(x,0,LSCALE=LSCALE) +B<-output$B +a<-output$a +var.op<-output$var.op +wt.mu<-output$mu +Cmat<-output$wt.cov +} +scores<-NULL +if(SCORES){ +ev<-eigen(Cmat) +ord.val<-order(ev$values) +mn1<-m-p+1 +wt.mu<-marpca.sub(x,p=p)$mu +Bp<-ev$vectors[,ord.val[mn1:m]] #m by m +xmmu<-x +for(j in 1:m)xmmu[,j]<-x[,j]-wt.mu[j] +scores<-matrix(ncol=p,nrow=n) +for(i in 1:n)scores[i,]<-t(Bp)%*%as.matrix(xmmu[i,]) +} +list(B=B,a=a,var.op=var.op,wt.cov=Cmat,wt.mu=wt.mu,scores=scores) +} + + + +marpca.sub<-function(x,p=ncol(x)-1,N1=3,N2=2,tol=.001,B=NULL,a=NULL, +LSCALE=TRUE){ +# +# Marrona (2005, Technometrics, 47, 264-273) robust PCA +# +# Note: setting +# p=0 causes B to be the identity matrix, which is used in the case +# p=ncol(x) to estimate proportion of unexplained variance. +# +wt.cov<-NULL +if(!is.null(B)){ +B<-as.matrix(B) +if(ncol(B)==1)B<-t(B) +} +n<-nrow(x) +m<-ncol(x) +q<-m-p +if(q<0)stop("p and q should have values between 1 and ncol(x)") +hval<-floor((n + m - q + 2)/2) +DEL<-Inf +sig0<-Inf +if(is.null(B)){ +if(p>0 && ptol){ +r<-NA +for(i in 1:n)r[i]<-sum(Bx[i,]-a)^2 +if(LSCALE)sig<-lscale(r,m,q) +if(!LSCALE){ +delta<-delta<-(n-m+q-1)/(2*n) +sig<-mscale(r,delta) +} +DEL<-1-sig/sig0 +sig0<-sig +ord.r<-order(r) +w<-rep(0,n) +w[ord.r[1:hval]]<-1 +xx<-x +for(i in 1:n)xx[i,]<-x[i,]*w[i] +mu<-apply(xx,2,FUN="sum")/sum(w) #m by 1 locations +Cmat<-matrix(0,nrow=m,ncol=m) +for(i in 1:n){ +temp<-w[i]*as.matrix(x[i,]-mu)%*%t(as.matrix(x[i,]-mu)) +Cmat<-Cmat+temp +} +wt.cov<-Cmat/sum(w) +if(it>N1){ +temp<-eigen(wt.cov) +ord.eig<-order(temp$values) +for(iq in 1:q)B[iq,]<-temp$vectors[,ord.eig[iq]] +} +a<-B%*%mu +it<-it+1 +} +list(B=B,a=a,var.op=sig,mu=mu,wt.cov=wt.cov) +} + +qregsm<-function(x, y,est=hd,qval=.5,sm=TRUE,plotit=TRUE,pyhat=FALSE,fr=0.8,nboot=40,xlab="X", +ylab="Y",xout=FALSE,outfun=outpro,STAND=TRUE,...) +{ +# +# Do a smooth of x versus the quantiles of y +# +# qval indicates quantiles of interest. +# Example: qval=c(.2,.8) will create two smooths, one for the +# .2 quantile and the other for the .8 quantile. +# +# est can be any quantile estimator having the argument qval, indicating +# the quantile to be used. +# +# est = hd uses Harrel Davis estimator, +# est = qest uses a single order statistic. +# +# sm=T, bagging will be used. +# pyhat=T returns the estimates +# +chk=FALSE +if(identical(est,hd))chk=TRUE +#if(identical(est,qest))chk=TRUE +if(!chk)stop('For current version, argument est must be hd') +x<-as.matrix(x) +X<-cbind(x,y) +X<-elimna(X) +np<-ncol(X) +p<-np-1 +x<-X[,1:p] +x<-as.matrix(x) +y<-X[,np] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,np] +} +vals<-matrix(NA,ncol=length(y),nrow=length(qval)) +for(i in 1:length(qval)){ +if(sm)vals[i,]<-rplotsm(x,y,est=est,q=qval[i],pyhat=TRUE,plotit=FALSE,fr=fr,nboot=nboot, +na.rm=FALSE,STAND=STAND)$yhat +#if(!sm)vals[i,]<-rungen(x,y,est=est,q=qval[i],pyhat=TRUE,plotit=FALSE,fr=fr,na.rm=FALSE)$output +if(!sm)vals[i,]<-rplot(x,y,est=est,q=qval[i],pyhat=TRUE,plotit=FALSE,fr=fr,na.rm=FALSE)$yhat +} +if(p==1){ +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab) +for(i in 1:length(qval)){ +sx <- sort(x) +xorder <- order(x) +sysm <- vals[i,] +#lines(sx, sysm) +lines(sx, sysm[xorder]) +}}} +output <- "Done" +if(pyhat)output <- vals +output +} + +L1median <- function(X, tol = 1e-08, maxit = 200, m.init = apply(X, 2, median), + trace = FALSE) +{ + ## L1MEDIAN calculates the multivariate L1 median + ## I/O: mX=L1median(X,tol); + ## + ## X : the data matrix + ## tol: the convergence criterium: + ## the iterative process stops when ||m_k - m_{k+1}|| < tol. + ## maxit: maximum number of iterations + ## init.m: starting value for m; typically coordinatewise median + ## + ## Ref: Hossjer and Croux (1995) + ## "Generalizing Univariate Signed Rank Statistics for Testing + ## and Estimating a Multivariate Location Parameter"; + ## Non-parametric Statistics, 4, 293-308. + ## + ## Implemented by Kristel Joossens + ## Many thanks to Martin Maechler for improving the program! + + ## slightly faster version of 'sweep(x, 2, m)': + centr <- function(X,m) X - rep(m, each = n) + ## computes objective function in m based on X and a: + mrobj <- function(X,m) sum(sqrt(rowSums(centr(X,m)^2))) + + d <- dim(X); n <- d[1]; p <- d[2] + m <- m.init + if(!is.numeric(m) || length(m) != p) + stop("'m.init' must be numeric of length p =", p) + k <- 1 + if(trace) nstps <- 0 + while (k <= maxit) { + mold <- m + obj.old <- if(k == 1) mrobj(X,mold) else obj + X. <- centr(X, m) + Xnorms <- sqrt(rowSums(X. ^ 2)) + inorms <- order(Xnorms) + dx <- Xnorms[inorms] # smallest first, i.e., 0's if there are + X <- X [inorms,] + X. <- X.[inorms,] + ## using 1/x weighting {MM: should this be generalized?} + w <- ## (0 norm -> 0 weight) : + if (all(dn0 <- dx != 0)) 1/dx + else c(rep.int(0, length(dx)- sum(dn0)), 1/dx[dn0]) + delta <- colSums(X. * rep(w,p)) / sum(w) + nd <- sqrt(sum(delta^2)) + + maxhalf <- if (nd < tol) 0 else ceiling(log2(nd/tol)) + m <- mold + delta # computation of a new estimate + ## If step 'delta' is too far, we try halving the stepsize + nstep <- 0 + while ((obj <- mrobj(X, m)) >= obj.old && nstep <= maxhalf) { + nstep <- nstep+1 + m <- mold + delta/(2^nstep) + } + if(trace) { + if(trace >= 2) + cat(sprintf("k=%3d obj=%19.12g m=(",k,obj), + paste(formatC(m),collapse=","), + ")", if(nstep) sprintf(" nstep=%2d halvings",nstep) else "", + "\n", sep="") + nstps[k] <- nstep + } + if (nstep > maxhalf) { ## step halving failed; keep old + m <- mold + ## warning("step halving failed in ", maxhalf, " steps") + break + } + k <- k+1 + } + if (k > maxit) warning("iterations did not converge in ", maxit, " steps") + if(trace == 1) + cat("needed", k, "iterations with a total of", + sum(nstps), "stepsize halvings\n") + return(m) +} +llocv2<-function(x,est=median,...){ +if(!is.list(x))val<-est(x,...) +if(is.list(x)){ +val<-NA +for(i in 1:length(x))val[i]<-est(x[[i]],...) +} +if(is.matrix(x))val<-apply(x,2,est,...) +list(center=val) +} +mcppb<-function(x,crit=NA,con=0,tr=.2,alpha=.05,nboot=2000,grp=NA,WIN=FALSE, +win=.1){ +# +# Compute a 1-alpha confidence interval for a set of d linear contrasts +# involving trimmed means using the percentile bootstrap method. +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode. Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# +# Or the data can be stored in a matrix with J columns +# +# By default, all pairwise comparisons are performed, but contrasts +# can be specified with the argument con. +# The columns of con indicate the contrast coefficients. +# Con should have J rows, J=number of groups. +# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first two trimmed means is +# equal to the sum of the second two, and (2) the difference between +# the first two is equal to the difference between the trimmed means of +# groups 5 and 6. +# +# The default number of bootstrap samples is nboot=2000 +# +# +con<-as.matrix(con) +if(is.matrix(x)){ +xx<-list() +for(i in 1:ncol(x)){ +xx[[i]]<-x[,i] +} +x<-xx +} +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] +x<-xx +} +J<-length(x) +tempn<-0 +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +} +Jm<-J-1 +d<-ifelse(sum(con^2)==0,(J^2-J)/2,ncol(con)) +if(is.na(crit) && tr != .2)stop("A critical value must be specified when +the amount of trimming differs from .2") +if(WIN){ +if(tr < .2)warning("When Winsorizing, the amount of trimming should be at least +.2") +if(win > tr)stop("Amount of Winsorizing must <= amount of trimming") +if(min(tempn) < 15){warning("Winsorizing with sample sizes less than 15 can") +warning(" result in poor control over the probability of a Type I error") +} +for (j in 1:J){ +x[[j]]<-winval(x[[j]],win) +} +} +if(is.na(crit)){ +if(d==1)crit<-alpha/2 +if(d==2 && alpha==.05 && nboot==1000)crit<-.014 +if(d==2 && alpha==.05 && nboot==2000)crit<-.014 +if(d==3 && alpha==.05 && nboot==1000)crit<-.009 +if(d==3 && alpha==.05 && nboot==2000)crit<-.0085 +if(d==3 && alpha==.025 && nboot==1000)crit<-.004 +if(d==3 && alpha==.025 && nboot==2000)crit<-.004 +if(d==3 && alpha==.01 && nboot==1000)crit<-.001 +if(d==3 && alpha==.01 && nboot==2000)crit<-.001 +if(d==4 && alpha==.05 && nboot==2000)crit<-.007 +if(d==5 && alpha==.05 && nboot==2000)crit<-.006 +if(d==6 && alpha==.05 && nboot==1000)crit<-.004 +if(d==6 && alpha==.05 && nboot==2000)crit<-.0045 +if(d==6 && alpha==.025 && nboot==1000)crit<-.002 +if(d==6 && alpha==.025 && nboot==2000)crit<-.0015 +if(d==6 && alpha==.01 && nboot==2000)crit<-.0005 +if(d==10 && alpha==.05 && nboot<=2000)crit<-.002 +if(d==10 && alpha==.05 && nboot==3000)crit<-.0023 +if(d==10 && alpha==.025 && nboot<=2000)crit<-.0005 +if(d==10 && alpha==.025 && nboot==3000)crit<-.001 +if(d==15 && alpha==.05 && nboot==2000)crit<-.0016 +if(d==15 && alpha==.025 && nboot==2000)crit<-.0005 +if(d==15 && alpha==.05 && nboot==5000)crit<-.0026 +if(d==15 && alpha==.025 && nboot==5000)crit<-.0006 +} +if(is.na(crit) && alpha==.05)crit<-0.0268660714*(1/d)-0.0003321429 +if(is.na(crit))crit<-alpha/(2*d) +if(d> 10 && nboot <5000)warning("Suggest using nboot=5000 when the number +of contrasts exceeds 10.") +icl<-round(crit*nboot)+1 +icu<-round((1-crit)*nboot) +if(sum(con^2)==0){ +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c("con.num","psihat","se","ci.lower", +"ci.upper","p.value")) +if(nrow(con)!=length(x))stop("The number of groups does not match the number + of contrast coefficients.") +bvec<-matrix(NA,nrow=J,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(j in 1:J){ +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group +} +test<-NA +for (d in 1:ncol(con)){ +top<-0 +for (i in 1:J){ +top<-top+con[i,d]*bvec[i,] +} +test[d]<-sum((top>0))/nboot +test[d]<-min(test[d],1-test[d]) +top<-sort(top) +psihat[d,4]<-top[icl] +psihat[d,5]<-top[icu] +} +for (d in 1:ncol(con)){ +psihat[d,1]<-d +testit<-lincon(x,con[,d],tr,pr=FALSE) +psihat[d,6]<-test[d] +psihat[d,2]<-testit$psihat[1,2] +psihat[d,3]<-testit$test[1,4] +} +print("Reminder: To control FWE, reject if the p-value is less than") +print("the crit.p.value listed in the output.") +list(psihat=psihat,crit.p.value=crit,con=con) +} + +llocv2<-function(x,est=median,...){ +if(!is.list(x))val<-est(x,...) +if(is.list(x)){ +val<-NA +for(i in 1:length(x))val[i]<-est(x[[i]],...) +} +if(is.matrix(x))val<-apply(x,2,est,...) +list(center=val) +} +NMpca<-function(x,B,...){ +# +# Robust PCA using orthogonal matrices and +# robust generalized variance method +# This function is used by Ppca +# +n<-x[1] +m<-x[2] +p=x[3] +x=matrix(x[4:length(x)],ncol=m) +B=matrix(B,ncol=m) +vals<-NA +z<-matrix(nrow=n,ncol=p) +B <- t(ortho(t(B))) # so rows are orthogonal +for(i in 1:n)z[i,]<-B%*%as.matrix(x[i,]) +vals<-0-gvarg(z) +vals +} + +ancbbpb<-function(x1,y1,x2,y2,fr1=1,est=tmean,fr2=1,nboot=200,pts=NA,plotit=TRUE,SCAT=TRUE, +pch1='+',pch2='o', +SEED=TRUE,alpha=.05,RNA=TRUE,sm=FALSE,LP=TRUE,xout=FALSE,outfun=outpro,...){ +# +# Compare two independent groups using an ancova method. +# A running-interval smooth is used to estimate the regression lines and is +# based in part on bootstrap bagging. +# +# This function is limited to two groups and one covariate. +# +# No assumption is made about the parametric form of the regression +# lines. +# Confidence intervals are computed using a percentile bootstrap +# method. Comparisons are made at five empirically chosen design points when +# pts=NA. To compare groups at specified x values, use pts. +# Example: pts=c(60,70,80) will compare groups at the three design points +# 60, 70 and 80. +# +# xout=F, when plotting, keep leverage points +# sm=F, when plotting, do not use bootstrap bagging +# +# Assume data are in x1 y1 x2 and y2 +# +# fr1 and fr2 are the spans used by the smooth. +# +# SCAT=FALSE will suppress the scatterplot when plotting the regression lines. +# +# RNA=F, when computing bagged estimate, NA values are not removed +# resulting in no estimate of Y at the specified design point, +# RNA=T, missing values are removed and the remaining values are used. +# +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +# +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +if(SEED)set.seed(2) +flag=TRUE +if(is.na(pts[1])){ +flag=FALSE +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,8) +dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi","p.value","p.crit")) +gv1<-vector("list") +for (i in 1:5){ +j<-i+5 +temp1<-y1[near(x1,x1[isub[i]],fr1)] +temp2<-y2[near(x2,x1[isub[i]],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(temp1) +mat[i,3]<-length(temp2) +mat[,4]<-runmbo(x1,y1,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,RNA=RNA)- +runmbo(x2,y2,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,RNA=RNA) +gv1[[i]]<-temp1 +gv1[[j]]<-temp2 +} +I1<-diag(5) +I2<-0-I1 +con<-rbind(I1,I2) +estmat1<-matrix(nrow=nboot,ncol=length(isub)) +estmat2<-matrix(nrow=nboot,ncol=length(isub)) +data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) +# +for(ib in 1:nboot){ +estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=x1[isub], +pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...) +estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=x1[isub], +pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...) +} +dif<-(estmat1 maxhalf) { ## step halving failed; keep old + m <- mold + ## warning("step halving failed in ", maxhalf, " steps") + break + } + k <- k+1 + } + if (k > maxit) warning("iterations did not converge in ", maxit, " steps") + if(trace == 1) + cat("needed", k, "iterations with a total of", + sum(nstps), "stepsize halvings\n") +# return(m) +list(center=m) +} + +matl<-function(x){ +# +# take data in list mode and store it in a matrix +# +J=length(x) +nval=NA +for(j in 1:J)nval[j]=length(x[[j]]) +temp<-matrix(NA,ncol=J,nrow=max(nval)) +for(j in 1:J)temp[1:nval[j],j]<-x[[j]] +temp +} + +list2mat=matl + +list2vec<-function(x){ +if(!is.list(x))stop("x should have list mode") +res=as.vector(matl(x)) +res +} + + +list2matrix<-function(x){ +# +# take data in list mode and store it in a matrix +# +J=length(x) +nval=NA +for(j in 1:J)nval[j]=length(x[[j]]) +temp<-matrix(NA,ncol=J,nrow=max(nval)) +for(j in 1:J)temp[1:nval[j],j]<-x[[j]] +temp +} +Aband<-function(x,alpha=.05,plotit=TRUE,sm=TRUE,SEED=TRUE,nboot=500,grp=c(1:4), +xlab="X (First Factor)",ylab="Delta",crit=NA,print.all=FALSE,plot.op=FALSE){ +# +# Apply the shift function when analyzing main effect in a +# 2 by 2 design. +# +# For variables x1, x2, x3 and x4, +# In effect, this function applies a shift function to the distributions +# d1=(x1+x2)/2 and d2=(x3+x4)/2 +# That is, focus on first factor. +# For second factor, use Bband. +# +# grp indicates the groups to be compared. By default grp=c(1,2,3,4) +# meaning that the first level of factor A consists of groups 1 and 2 +# and the 2nd level of factor A consists of groups 3 and 4. +# (So level 1 of factor B consists of groups 1 and 3 +# +# print.all=F, +# returns number sig, meaning number of confidence intervals that do not +# contain zero, +# the critical value used as well as the KS test statistics. +# print.all=T reports all confidence intervals, the number of which can +# be large. +# +if(!is.list(x) && !is.matrix(x))stop("store data in list mode or a matrix") +if(SEED)set.seed(2) +if(is.matrix(x))x<-listm(x) +for(j in 1:length(x))x[[j]]=elimna(x[[j]])/2 +if(length(grp)<4)stop("There must be at least 4 groups") +if(length(x)!=4)stop("The argument grp must have 4 values") +x<-x[grp] +n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]])) +# Approximate the critical value +# +vals<-NA +y<-list() +if(is.na(crit)){ +print("Approximating critical value. Please wait.") +for(i in 1:nboot){ +for(j in 1:4) +y[[j]]<-rnorm(n[j]) +temp<-ks.test(outer(y[[1]],y[[2]],FUN="+"),outer(y[[3]],y[[4]],FUN="+")) +vals[i]<-temp[1]$statistic +} +vals<-sort(vals) +ic<-(1-alpha)*nboot +crit<-vals[ic] +} +if(plot.op){ +plotit<-F +g2plot(v1,v2) +} +output<-sband(outer(x[[1]],x[[2]],FUN="+"),outer(x[[3]],x[[4]],FUN="+"), +plotit=plotit,crit=crit,flag=FALSE,sm=sm,xlab=xlab,ylab=ylab) +if(!print.all){ +numsig<-output$numsig +ks.test.stat<-ks.test(outer(x[[1]],x[[2]],FUN="+"), +outer(x[[3]],x[[4]],FUN="+"))$statistic +output<-matrix(c(numsig,crit,ks.test.stat),ncol=1) +dimnames(output)<-list(c("number sig","critical value","KS test statistics"), +NULL) +} +output +} + +Bband<-function(x,alpha=.05,plotit=TRUE,sm=TRUE,SEED=TRUE,nboot=500,grp=c(1:4), +xlab="X (First Level)",ylab="Delta",crit=NA,print.all=FALSE,plot.op=FALSE){ +# +# Apply the shift function when analyzing main effect in a +# 2 by 2 design. +# +# For variables x1, x2, x3 and x4, +# In effect, this function applies a shift function to the distributions +# d1=(x1+x3)/2 and d2=(x2+x4)/2. +# That is, focus on main effects of Factor B. +# +# grp indicates the groups to be compared. By default grp=c(1,2,3,4) +# meaning that the first level of factor A consists of groups 1 and 2 +# and the 2nd level of factor A consists of groups 3 and 4. +# (So level 1 of factor B consists of groups 1 and 3 +# +# print.all=F, +# returns number sig, meaning number of confidence intervals that do not +# contain zero, +# the critical value used as well as the KS test statistics. +# print.all=T reports all confidence intervals, the number of which can +# be large. +# +if(!is.list(x) && !is.matrix(x))stop("store data in list mode or a matrix") +if(SEED)set.seed(2) +if(is.matrix(x))x<-listm(x) +for(j in 1:length(x))x[[j]]=elimna(x[[j]])/2 +if(length(x)<4)stop("There must be at least 4 groups") +if(length(grp)!=4)stop("The argument grp must have 4 values") +x<-x[grp] +grp=c(1,3,2,4) +x<-x[grp] # Arrange groups for main effects on factor B +n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]])) +# Approximate the critical value +# +vals<-NA +y<-list() +if(is.na(crit)){ +print("Approximating critical value. Please wait.") +for(i in 1:nboot){ +for(j in 1:4) +y[[j]]<-rnorm(n[j]) +temp<-ks.test(outer(y[[1]],y[[2]],FUN="+"),outer(y[[3]],y[[4]],FUN="+")) +vals[i]<-temp[1]$statistic +} +vals<-sort(vals) +ic<-(1-alpha)*nboot +crit<-vals[ic] +} +if(plot.op){ +plotit<-F +g2plot(v1,v2) +} +output<-sband(outer(x[[1]],x[[2]],FUN="+"),outer(x[[3]],x[[4]],FUN="+"), +plotit=plotit,crit=crit,flag=FALSE,sm=sm,xlab=xlab,ylab=ylab) +if(!print.all){ +numsig<-output$numsig +ks.test.stat<-ks.test(outer(x[[1]],x[[2]],FUN="+"), +outer(x[[3]],x[[4]],FUN="+"))$statistic +output<-matrix(c(numsig,crit,ks.test.stat),ncol=1) +dimnames(output)<-list(c("number sig","critical value","KS test statistics"), +NULL) +} +output +} + +iband<-function(x,alpha=.05,q = c(0.1, 0.25, 0.5, 0.75, 0.9), method='BH', SW=FALSE, plotit=FALSE,SEED=TRUE,nboot=500,grp=c(1:4), +xlab='X'){ +# +# 2 by 2 design. +# +# For variables x1, x2, x3 and x4, +# This function compares the quantiles of the distributions +# d1=x1-x2 and d2=x3-x4 +# +# SW=TRUE: switch rows and columns +# +if(SEED)set.seed(2) +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +if(length(x)!=4)stop('Should be exactly 4 groups') +for(j in 1:length(x))x[[j]]=elimna(x[[j]]) +if(SW)x=x[c(1,3,2,4)] +n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]])) +nq=length(q) +output=matrix(NA,nrow=length(q),ncol=8) +dimnames(output)=list(NULL,c('Quant','Est.Lev 1','Est.Lev 2','Dif','ci.low','ci.up','p-value','p.adj')) +output[,1]=q +for(j in 1:nq)output[j,2]=hd(outer(x[[1]],x[[2]],FUN='-'),q=q[j]) +for(j in 1:nq)output[j,3]=hd(outer(x[[3]],x[[4]],FUN='-'),q=q[j]) +output[,4]=output[,2]-output[,3] +e=lapply(q,iband.sub,x=x,nboot=nboot) +for(j in 1:nq)output[j,5]=e[[j]]$ci[1] +for(j in 1:nq)output[j,6]=e[[j]]$ci[2] +for(j in 1:nq)output[j,7]=e[[j]]$p.value +output[,8]=p.adjust(output[,7],method=method) +if(plotit){ +g2plot(outer(x[[1]],x[[2]],FUN='-'),outer(x[[3]],x[[4]],FUN='-'),xlab=xlab) + +} +output +} + +iband.sub<-function(q,x,nboot=500,alpha=.05,SEED=FALSE){ +# +# +# +if(SEED)set.seed(2) +if(is.matrix(x))x<-listm(x) +if(length(x)!=4)stop('There must be 4 groups') +for(j in 1:length(x))x[[j]]=elimna(x[[j]]) +v1=NA +v2=NA +B=list() +for(i in 1:nboot){ +for(j in 1:4)B[[j]]=sample(x[[j]],replace=TRUE) +v1[i]=hd(outer(B[[1]],B[[2]],FUN='-'),q=q) +v2[i]=hd(outer(B[[3]],B[[4]],FUN='-'),q=q) +} +p=mean(v10 & !is.na(l)])+length(u[u<0 & !is.na(u)]) +qhat<-c(1:length(x))/length(x) +m<-matrix(c(qhat,l,u),length(x),3) +dimnames(m)<-list(NULL,c("qhat","lower","upper")) +xsort<-sort(x) +ysort<-sort(y) +del<-0 +for (i in 1:length(x)){ +ival<-round(length(y)*i/length(x)) +if(ival<=0)ival<-1 +if(ival>length(y))ival<-length(y) +del[i]<-ysort[ival]-xsort[i] +} +if(iloop==1){ +allx<-c(xsort,xsort,xsort) +ally<-c(del,m[,2],m[,3]) +} +if(iloop==2){ +allx<-c(allx,xsort,xsort,xsort) +ally<-c(ally,del,m[,2],m[,3]) +plot(allx,ally,type="n",ylab=ylab,xlab=xlab) +} +ik<-rep(F,length(xsort)) +if(sm){ +if(op==1){ +ik<-duplicated(xsort) +del<-lowess(xsort,del)$y +} +if(op!=1)del<-runmean(xsort,del,pyhat=TRUE) +} +if(iloop==1){ +xsort1=xsort[!ik] +del1=del[!ik] +} +if(iloop==2){ +lines(xsort1,del1,lty=iloop) +lines(xsort[!ik],del[!ik],lty=iloop) +}} +done="Done" +done +} + + +scor<-function(x,y=NULL,corfun=pcor,gval=NA,plotit=FALSE,op=TRUE,MM=FALSE,cop=3,xlab='VAR 1', +ylab='VAR 2',STAND=TRUE,pr=TRUE,SEED=TRUE,MC=FALSE,RAN=FALSE){ +# +# Compute a skipped correlation coefficient. +# +# Eliminate outliers using a projection method +# That is, compute Donoho-Gasko median, for each point +# consider the line between it and the median, +# project all points onto this line, and +# check for outliers using a boxplot rule. +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# For information about the argument cop, see the function +# outpro. +# +# Eliminate any outliers and compute correlation using +# remaining data. +# +# MC=TRUE, the multicore version of outpro is used +# +# corfun=pcor means Pearson's correlation is used. +# corfun=spear means Spearman's correlation is used. +# corfun=tau means Kendall tau is used. +# +#. RAN=TRUE uses random projections instead, which results in faster execution time +# +if(SEED){ +oldSeed <- .Random.seed +set.seed(12) # So when using MVE or MCD, get consistent results +} +if(identical(corfun,wincor))corfun=winall +if(is.null(y[1]))m<-x +if(!is.null(y[1]))m<-cbind(x,y) +m<-elimna(m) +if(!RAN){ +if(!MC)temp<-outpro(m,gval=gval,plotit=plotit,op=op,cop=cop,MM=MM, +xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep +if(MC)temp<-outproMC(m,gval=gval,plotit=plotit,op=op,cop=cop,MM=MM, +xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep +} +if(RAN)temp=outpro.depth(m,MM=MM,plotit=plotit)$keep +tcor<-corfun(m[temp,])$cor +if(!is.null(dim((tcor))))tcor<-tcor[1,2] +test<-abs(tcor*sqrt((nrow(m)-2)/(1-tcor**2))) +if(ncol(m)!=2)diag(test)<-NA +crit<-6.947/nrow(m)+2.3197 +if(SEED) { + assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) +} +list(cor=tcor,test.stat=test,crit.05=crit) +} + + +cov.mba<-function(x,COR=FALSE){ +val<-covmba2(x)$cov +if(COR){ +val=val/outer(sqrt(diag(val)),sqrt(diag(val))) +} +val +} +qregci<-function(x,y,nboot=100,alpha=.05,qval=.5,q=NULL,SEED=TRUE,pr=TRUE,xout=FALSE,outfun=outpro,...){ +# +# Test the hypothesis that the quantile regression slopes are zero. +# +# qval=.5 i.e, default is to +# use the .5 quantile regression line only. +# +# Suggest only using quantiles between +# .2 and .8. If using both .2 and .8 quantiles, or +# the .2, .5 and .8 quantile regression lines. +# FWE is controlled for alpha=.1, .05, .025 and .01. +# +if(!is.null(q))qval=q +xx<-elimna(cbind(x,y)) +np<-ncol(xx) +p<-np-1 +y<-xx[,np] +x<-xx[,1:p] +x<-as.matrix(x) +if(xout){ +if(pr)print("Default for argument outfun is now outpro") +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +x<-as.matrix(x) +n<-length(y) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#if(pr)print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +# determine critical value. +crit<-NA +if(alpha==.1)crit<-1.645-1.19/sqrt(n) +if(alpha==.05)crit<-1.96-1.37/sqrt(n) +if(alpha==.025)crit<-2.24-1.18/sqrt(n) +if(alpha==.01)crit<-2.58-1.69/sqrt(n) +crit.fwe<-crit +if(length(qval)==2 || p==2){ +if(alpha==.1)crit.fwe<-1.98-1.13/sqrt(n) +if(alpha==.05)crit.fwe<-2.37-1.56/sqrt(n) +if(alpha==.025)crit.fwe<-2.60-1.04/sqrt(n) +if(alpha==.01)crit.fwe<-3.02-1.35/sqrt(n) +} +if(length(qval)==3 || p==3){ +if(alpha==.1)crit.fwe<-2.145-1.31/sqrt(n) +if(alpha==.05)crit.fwe<-2.49-1.49/sqrt(n) +if(alpha==.025)crit.fwe<-2.86-1.52/sqrt(n) +if(alpha==.01)crit.fwe<-3.42-1.85/sqrt(n) +} +if(is.na(crit.fwe)){ +print("Could not determine a critical value") +print("Only alpha=.1, .05, .025 and .01 are allowed") +} +if(p==1){ +bvec<-apply(data,1,qindbt.sub,x,y,qval=qval) +estsub<-NA +for(i in 1:length(qval)){ +estsub[i]<-qreg(x,y,qval[i])$coef[2] +} +if(is.matrix(bvec))se.val<-sqrt(apply(bvec,1,FUN=var)) +if(!is.matrix(bvec))se.val<-sqrt(var(bvec)) +test<-abs(estsub)/se.val +ci.mat<-matrix(nrow=length(qval),ncol=3) +dimnames(ci.mat)<-list(NULL,c("Quantile","ci.lower","ci.upper")) +ci.mat[,1]<-qval +ci.mat[,2]<-estsub-crit*se.val +ci.mat[,3]<-estsub+crit*se.val +} +if(p>1){ +if(length(qval)>1){ +print("With p>1 predictors,only the first qval value is used") +} +bvec<-apply(data,1,regboot,x,y,regfun=qreg,qval=qval[1]) +se.val<-sqrt(apply(bvec,1,FUN=var)) +estsub<-qreg(x,y,qval=qval[1])$coef +test<-abs(estsub)/se.val +ci.mat<-matrix(nrow=np,ncol=3) +dimnames(ci.mat)<-list(NULL,c("Predictor","ci.lower","ci.upper")) +ci.mat[,1]<-c(0:p) +ci.mat[,2]<-estsub-crit*se.val +ci.mat[,3]<-estsub+crit*se.val +} +list(test=test,se.val=se.val,crit.val=crit,crit.fwe=crit.fwe, +slope.est=estsub,ci=ci.mat) +} + + + + +covmba2<-function(x, csteps = 5) +{ +# Perform the median ball algorithm. +# +# It returns a measure of location and scatter for the +# multivariate data in x, which is assumed to have +# p>-2 column and n rows. +# +# This code is based on a very slight modificatiion of code originally +# written by David Olive +# +x<-as.matrix(x) +if(!is.matrix(x))stop("x should be a matrix") + p <- dim(x)[2] +#if(p==1)stop("x should be a matrix with two or more columns of variables") + ##get the DGK estimator + covs <- var(x) + mns <- apply(x, 2, mean) ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) +# mns <- apply(x[md2 <= medd2, ], 2, + mns <- apply(as.matrix(x[md2 <= medd2, ]), 2, + mean) + covs <- var(x[md2 <= medd2, ]) + } + covb <- covs + mnb <- mns ##get the square root of det(covb) + critb <- prod(diag(chol(covb))) + ##get the resistant estimator + covv <- diag(p) + med <- apply(x, 2, median) + md2 <- mahalanobis(x, center = med, covv) + medd2 <- median(md2) ## get the start +# mns <- apply(x[md2 <= medd2, ], 2, mean) + mns <- apply(as.matrix(x[md2 <= medd2, ]), 2, mean) + covs <- var(x[md2 <= medd2, ]) ## concentrate + for(i in 1:csteps) { + md2 <- mahalanobis(x, mns, covs) + medd2 <- median(md2) + # mns <- apply(x[md2 <= medd2, ], 2,mean) + mns <- apply(as.matrix(x[md2 <= medd2, ]), 2, mean) + covs <- var(x[md2 <= medd2, ]) + } + crit <- prod(diag(chol(covs))) + if(crit < critb) { + critb <- crit + covb <- covs + mnb <- mns + } +##scale for better performance at MVN + rd2 <- mahalanobis(x, mnb, covb) + const <- median(rd2)/(qchisq(0.5, p)) + covb <- const * covb + list(center = mnb, cov = covb) +} + + +rmmcp<-function(x, y=NULL,con = 0, tr = 0.2, alpha = 0.05,dif=TRUE,hoch=TRUE,na.rm=TRUE){ +# +# MCP on trimmed means with FWE controlled with Hochberg's method +# hoch=FALSE, will use Rom's method if alpha=.05 or .01 and number of tests is <=10 +# +# Note: confidence intervals are adjusted based on the corresponding critical p-value. +# +if(!is.null(y))x=cbind(x,y) +flagcon=FALSE +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-ncol(x) +xbar<-vector("numeric",J) +x<-elimna(x) # Remove missing values +nval<-nrow(x) +h1<-nrow(x)-2*floor(tr*nrow(x)) +df<-h1-1 +for(j in 1: J)xbar[j]<-mean(x[,j],tr) +if(sum(con^2!=0))CC<-ncol(con) +if(sum(con^2)==0)CC<-(J^2-J)/2 +ncon<-CC +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(hoch)dvec<-alpha/c(1:ncon) +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(sum(con^2)==0){ +flagcon<-TRUE +psihat<-matrix(0,CC,5) +dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c("Group","Group","test","p.value","p.crit","se")) +temp1<-0 +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +q1<-(nrow(x)-1)*winvar(x[,j],tr) +q2<-(nrow(x)-1)*winvar(x[,k],tr) +q3<-(nrow(x)-1)*wincor(x[,j],x[,k],tr)$cov +sejk<-sqrt((q1+q2-2*q3)/(h1*(h1-1))) +if(!dif){ +test[jcom,6]<-sejk +test[jcom,3]<-(xbar[j]-xbar[k])/sejk +temp1[jcom]<-2 * (1 - pt(abs(test[jcom,3]), df)) +test[jcom,4]<-temp1[jcom] +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +} +if(dif){ +dv<-x[,j]-x[,k] +test[jcom,6]<-trimse(dv,tr) +temp<-trimci(dv,alpha=alpha/CC,pr=FALSE,tr=tr) +test[jcom,3]<-temp$test.stat +temp1[jcom]<-temp$p.value +test[jcom,4]<-temp1[jcom] +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-mean(dv,tr=tr) +psihat[jcom,4]<-temp$ci[1] +psihat[jcom,5]<-temp$ci[2] +} +}}} +if(hoch)dvec<-alpha/c(1:ncon) +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2,4]>=zvec) +if(sum(sigvec)0){ +if(nrow(con)!=ncol(x))warning("The number of groups does not match the number + of contrast coefficients.") +ncon<-ncol(con) +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c("con.num","test","p.value","p.crit","se")) +temp1<-NA +for (d in 1:ncol(con)){ +psihat[d,1]<-d +if(!dif){ +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-0 +for(j in 1:J){ +for(k in 1:J){ +djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1)) +sejk<-sejk+con[j,d]*con[k,d]*djk +}} +sejk<-sqrt(sejk) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +test[d,5]<-sejk +temp1[d]<-2 * (1 - pt(abs(test[d,2]), df)) +} +if(dif){ +for(j in 1:J){ +if(j==1)dval<-con[j,d]*x[,j] +if(j>1)dval<-dval+con[j,d]*x[,j] +} +temp1[d]<-trimci(dval,tr=tr,pr=FALSE)$p.value +test[d,1]<-d +test[d,2]<-trimci(dval,tr=tr,pr=FALSE)$test.stat +test[d,5]<-trimse(dval,tr=tr) +psihat[d,2]<-mean(dval,tr=tr) +}} +test[,3]<-temp1 +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2,3]>=zvec) +if(sum(sigvec)=K) +i0<-sum(flag) +il<-length(y)-i0+1 +res.sort<-sort(res.scale) +if(i0>0){ +dval<-pnorm(res.sort[il:length(y)])-c(il:length(y))/length(y) +} +if(i0<=0)dval<-0 +dval<-max(dval) +ndval<-floor(length(y)*dval) +if(ndval<0)ndval<-0 +iup<-length(y)-ndval +rord<-order(res.scale) +flag<-rord[1:iup] +x=as.matrix(x) +temp<-lsfit(x[flag,],y[flag]) +list(coef=temp$coef,res=temp$residual) +} + + + +bwrmcp<-function(J,K,x,grp=NA,alpha=.05,bhop=TRUE){ +# +# Do all pairwise comparisons of +# main effects for Factor A and B and all interactions +# using a rank-based method that tests for equal distributions. +# +# A between by within subjects design is assumed. +# Levels of Factor A are assumed to be independent and +# levels of Factor B are dependent. +# +# The data are assumed to be stored in x in list mode or in a matrix. +# If grp is unspecified, it is assumed x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second factor: level 1,2 +# x[[j+1]] is the data for level 2,1, etc. +# If the data are in wrong order, grp can be used to rearrange the +# groups. For example, for a two by two design, grp<-c(2,4,3,1) +# indicates that the second group corresponds to level 1,1; +# group 4 corresponds to level 1,2; group 3 is level 2,1; +# and group 1 is level 2,2. +# +# Missing values are automatically removed. +# + if(is.list(x))xrem=matl(x) + JK <- J * K + if(is.matrix(x)){ + xrem=x + x <- listm(x) +} + + if(!is.na(grp[1])) { + yy <- x + x<-list() + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") +# for(j in 1:JK) { +# xx <- x[[j]] +# x[[j]] <- xx[!is.na(xx)] # Remove missing values +# } + # +if(JK != length(x))warning("The number of groups does not match the number of contrast coefficients.") +for(j in 1:JK){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} +# +CC<-(J^2-J)/2 +# Determine critical values +ncon<-CC*(K^2-K)/2 +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +Fac.A<-matrix(0,CC,5) +dimnames(Fac.A)<-list(NULL,c("Level","Level","test.stat","p-value","sig.crit")) +mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j < jj){ +ic<-ic+1 +Fac.A[ic,1]<-j +Fac.A[ic,2]<-jj +datsub=xrem[,c(mat[j,],mat[jj,])] +datsub=elimna(datsub) +#temp<-bwrank(2,K,elimna(x[,c(mat[j,],mat[jj,])])) +temp<-bwrank(2,K,datsub) +Fac.A[ic,3]<-temp$test.A +Fac.A[ic,4]<-temp$p.value.A +}}} +temp2<-order(0-Fac.A[,4]) +Fac.A[temp2,5]<-dvec[1:length(temp2)] +CCB<-(K^2-K)/2 +ic<-0 +Fac.B<-matrix(0,CCB,5) +dimnames(Fac.B)<-list(NULL,c("Level","Level","test.stat","p-value","sig.crit")) +for(k in 1:K){ +for(kk in 1:K){ +if(k=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,7) +dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi","p.value")) +gv1<-vector("list") +for (i in 1:5){ +j<-i+5 +temp1<-y1[near(x1,x1[isub[i]],fr1)] +temp2<-y2[near(x2,x1[isub[i]],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(temp1) +mat[i,3]<-length(temp2) +mat[,4]<-runmbo(x1,y1,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=tmean)- +runmbo(x2,y2,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median) +gv1[[i]]<-temp1 +gv1[[j]]<-temp2 +} +I1<-diag(5) +I2<-0-I1 +con<-rbind(I1,I2) +estmat1<-matrix(nrow=nboot,ncol=length(isub)) +estmat2<-matrix(nrow=nboot,ncol=length(isub)) +data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) +# +for(ib in 1:nboot){ +estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=x1[isub], +pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median) +estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=x1[isub], +pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median) +} +dif<-(estmat1=.5)stop("Amount of trimming must be less than .5") +if(is.list(m))m<-matl(m) +if(!is.matrix(m))stop("Data must be stored in a matrix or in list mode.") +if(ncol(m)==1){ +if(tr<.5)val<-mean(m,tr) +} +if(ncol(m)>1){ +temp<-NA +if(ncol(m)!=2){ +# Use approximate depth +if(dop==1)temp<-fdepth(m,plotit=FALSE,cop=cop) +if(dop==2)temp<-fdepthv2(m) +} +# Use exact depth if ncol=2 +if(ncol(m)==2){ +for(i in 1:nrow(m)) +temp[i]<-depth(m[i,1],m[i,2],m) +}} +mdep<-max(temp) +flag<-(temp==mdep) +flag2<-(temp>=tr) +if(sum(flag2)==0)stop("Trimmed all of the data") +if(sum(flag2)==1){ +if(pr)print("Warning: Trimmed all but one point") +val<-0 +} +if(sum(flag2)>1)val<-var(m[flag2,]) +val +} + +medr<-function(x,est=median,alpha=.05,nboot=500,grp=NA,op=1,MM=FALSE,cop=3,pr=TRUE, +SEED=TRUE,...){ +# +# Test the hypothesis that the distribution for each pairwise +# difference has a measure of location = 0 +# By default, the median estimator is used +# +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode or in a matrix. +# If stored in list mode, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# If stored in a matrix, columns correspond to groups. +# +# By default, all pairwise differences are used, but contrasts +# can be specified with the argument con. +# The columns of con indicate the contrast coefficients. +# Con should have J rows, J=number of groups. +# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first +# two measures of location is +# equal to the sum of the second two, and (2) the difference between +# the first two is equal to the difference between the +# measures of location for groups 5 and 6. +# +# The default number of bootstrap samples is nboot=500 +# +# op controls how depth is measured +# op=1, Mahalanobis +# op=2, Mahalanobis based on MCD covariance matrix +# op=3, Projection distance +# op=4, Projection distance using FORTRAN version +# +# for arguments MM and cop, see pdis. +# +if(is.matrix(x)){ +xx<-list() +for(i in 1:ncol(x)){ +xx[[i]]<-x[,i] +} +x<-xx +} +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(grp)){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]] +x<-xx +} +J<-length(x) +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +Jm<-J-1 +d<-(J^2-J)/2 +data<-list() +bvec<-matrix(NA,ncol=d,nrow=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print("Taking bootstrap samples. Please wait.") +for(it in 1:nboot){ +for(j in 1:J)data[[j]]<-sample(x[[j]],size=length(x[[j]]),replace=TRUE) +dval<-0 +for(j in 1:J){ +for(k in 1:J){ +if(j=dv[1:nboot])/nboot +if(op==4)print(sig.level) +list(sig.level=sig.level,output=output) +} + +medind<-function(x,y,qval=.5,nboot=1000,com.pval=FALSE,SEED=TRUE,alpha=.05,pr=TRUE, +xout=FALSE,outfun=out,...){ +# +# Test the hypothesis that the regression surface is a flat +# horizontal plane. +# The method is based on a modification of a method derived by +# He and Zhu 2003, JASA, 98, 1013-1022. +# Here, resampling is avoided using approximate critical values if +# com.pval=F +# +# critical values are available for 10<=n<=400, p=1,...,8 and +# quantiles +# qval=.25,.5, .75. +# +# To get a p-value, via simulations, set com.pval=T +# nboot is number of simulations used to determine the p-value. +# +if(pr){ +if(!com.pval)print("To get a p-value, set com.pval=T") +print("Reject if the test statistic exceeds the critical value") +if(length(y)>400)print("If n>400, current version requires com.pval=TRUE, resulting in high execution time") +} +#store.it=F +x<-as.matrix(x) +p<-ncol(x) +pp1<-p+1 +p.val<-NULL +crit.val<-NULL +yx<-elimna(cbind(y,x)) #Eliminate missing values. +y<-yx[,1] +x<-yx[,2:pp1] +x<-as.matrix(x) +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +} +n<-length(y) +if(n>400)com.pval=T +if(qval==.5){ +resmat1=matrix(c( 0.0339384580, 0.044080032, 0.050923441, 0.064172557, + 0.0153224731, 0.021007108, 0.027687963, 0.032785044, + 0.0106482053, 0.014777728, 0.018249546, 0.023638611, + 0.0066190573, 0.009078091, 0.011690825, 0.014543009, + 0.0031558563, 0.004374515, 0.005519069, 0.007212951, + 0.0015448987, 0.002231473, 0.002748314, 0.003725916, + 0.0007724197, 0.001021767, 0.001370776, 0.001818037),ncol=4,nrow=7,byrow=TRUE) +resmat2=matrix(c( + 0.052847794, 0.061918744, 0.071346969, 0.079163419, + 0.021103277, 0.027198076, 0.031926052, 0.035083610, + 0.013720585, 0.018454145, 0.022177381, 0.026051716, + 0.008389969, 0.010590374, 0.012169233, 0.015346065, + 0.004261627, 0.005514060, 0.007132021, 0.008416836, + 0.001894753, 0.002416311, 0.003085230, 0.003924706, + 0.001045346, 0.001347837, 0.001579373, 0.001864344),ncol=4,nrow=7,byrow=TRUE) +resmat3=matrix(c( +0.071555715, 0.082937665, 0.089554679, 0.097538044, +0.031060795, 0.035798539, 0.043862556, 0.053712151, +0.019503635, 0.023776479, 0.027180121, 0.030991367, +0.011030001, 0.013419347, 0.015557409, 0.017979524, +0.005634478, 0.006804788, 0.007878358, 0.008807657, +0.002552182, 0.003603778, 0.004275965, 0.005021989, +0.001251044, 0.001531919, 0.001800608, 0.002037870),ncol=4,nrow=7,byrow=TRUE) +resmat4=matrix(c( +0.093267532, 0.101584002, 0.108733965, 0.118340448, +0.038677863, 0.045519806, 0.051402903, 0.060097046, +0.024205231, 0.029360145, 0.034267265, 0.039381482, +0.013739157, 0.015856343, 0.018065898, 0.019956084, +0.006467562, 0.007781030, 0.009037972, 0.010127143, +0.003197162, 0.003933525, 0.004656625, 0.005929469, +0.001652690, 0.001926060, 0.002363874, 0.002657071),ncol=4,nrow=7,byrow=TRUE) +resmat5=matrix(c( +0.117216934, 0.124714114, 0.129458602, 0.136456163, +0.048838630, 0.055608712, 0.060580045, 0.067943676, +0.030594644, 0.035003872, 0.040433885, 0.047648696, +0.016940240, 0.019527491, 0.022047442, 0.025313443, +0.008053039, 0.009778574, 0.011490394, 0.013383628, +0.003760567, 0.004376294, 0.005097890, 0.005866240, +0.001894616, 0.002253522, 0.002612405, 0.002938808),ncol=4,nrow=7,byrow=TRUE) +resmat6=matrix(c( +0.136961531, 0.144120225, 0.149003907, 0.152667432, +0.055909481, 0.062627211, 0.069978086, 0.081189957, +0.034634825, 0.040740587, 0.044161376, 0.047722045, +0.020165417, 0.023074738, 0.025881208, 0.028479913, +0.009436297, 0.011246968, 0.013220963, 0.015100546, +0.004644596, 0.005334418, 0.006040595, 0.007237195, +0.002277590, 0.002635712, 0.002997398, 0.003669488),ncol=4,nrow=7,byrow=TRUE) +resmat7=matrix(c( + 0.156184672, 0.163226643, 0.171754686, 0.177142753, + 0.070117003, 0.077052773, 0.082728047, 0.090410797, + 0.041774517, 0.047379662, 0.053101833, 0.057674454, + 0.023384451, 0.026014421, 0.029609042, 0.032619018, + 0.010856382, 0.012567043, 0.013747870, 0.016257014, + 0.005164004, 0.006131755, 0.006868101, 0.008351046, + 0.002537642, 0.003044154, 0.003623654, 0.003974469),ncol=4,nrow=7,byrow=TRUE) +resmat8=matrix(c( +0.178399742, 0.180006714, 0.193799396, 0.199585892, +0.078032767, 0.085624186, 0.091511226, 0.102491785, +0.045997886, 0.052181615, 0.057362163, 0.062630424, +0.025895739, 0.029733034, 0.033764463, 0.037873655, +0.012195876, 0.013663248, 0.015487587, 0.017717864, +0.005892418, 0.006876488, 0.007893475, 0.008520783, +0.002839731, 0.003243909, 0.003738571, 0.004124057),ncol=4,nrow=7,byrow=TRUE) +crit5=array(cbind(resmat1,resmat2,resmat3,resmat4,resmat5,resmat6,resmat7, +resmat8),c(7,4,8)) +flag=TRUE +crit.val=NULL +if(p > 8)flag=FALSE +if(n<10 || n>=400)flag=FALSE +aval<-c(.1,.05,.025,.01) +aokay<-duplicated(c(alpha,aval)) +if(sum(aokay)==0)flag=FALSE +if(flag){ +nalpha=c(0:4) +asel=c(0,aval) +ialpha=nalpha[aokay] +critit=crit5[,ialpha,p] +nvec<-c(10,20,30,50,100,200,400) +nval<-duplicated(c(n,nvec)) +nval<-nval[2:8] +if(sum(nval)>0)crit.val<-critit[nval] +loc<-rank(c(n,nvec)) +xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5) +yy<-c(critit[loc[1]-1],critit[loc[1]]) +icoef<-tsp1reg(xx,yy)$coef +crit.val<-icoef[1]+icoef[2]/n^1.5 +}} +mqval<-min(c(qval,1-qval)) +if(mqval==.25){ +resmat1=matrix(c( + 0.029933486, 0.0395983678, 0.054087714, 0.062961453, + 0.011122294, 0.0149893431, 0.018154062, 0.022685244, + 0.009207200, 0.0113020766, 0.014872309, 0.019930730, + 0.004824185, 0.0070402246, 0.010356886, 0.013176896, + 0.002370379, 0.0033146605, 0.004428004, 0.005122988, + 0.001106460, 0.0016110185, 0.001984450, 0.002650256, + 0.000516646, 0.0006796144, 0.000868751, 0.001202042),ncol=4,nrow=7,byrow=TRUE) +resmat2=matrix(c( +0.0448417783, 0.0602598211, 0.066001091, 0.087040667, +0.0173410522, 0.0224713157, 0.027370822, 0.033435727, +0.0121205549, 0.0150409465, 0.018938516, 0.022643559, +0.0064894201, 0.0084611518, 0.010700320, 0.013232000, +0.0029734778, 0.0040641310, 0.004911086, 0.005769038, +0.0015149104, 0.0020584993, 0.002582982, 0.003114029, +0.0007984207, 0.0009929547, 0.001182739, 0.001398774),ncol=4,nrow=7,byrow=TRUE) +resmat3=matrix(c( +0.0636530860, 0.072974943, 0.083840562, 0.097222407, +0.0216586978, 0.027436566, 0.031875356, 0.036830302, +0.0152898678, 0.018964066, 0.021728817, 0.028959751, +0.0083568493, 0.010071525, 0.012712862, 0.015254576, +0.0039033578, 0.004764140, 0.005577071, 0.006660322, +0.0019139215, 0.002343152, 0.002833612, 0.003465269, +0.0009598105, 0.001146689, 0.001355930, 0.001547572),ncol=4,nrow=7,byrow=TRUE) +resmat4=matrix(c( + 0.085071252, 0.095947936, 0.104197413, 0.118449765, + 0.029503024, 0.034198704, 0.039543410, 0.045043759, + 0.019203266, 0.022768842, 0.026886843, 0.033481535, + 0.011440493, 0.013555017, 0.016138970, 0.018297815, + 0.004863139, 0.005756305, 0.007385239, 0.009114958, + 0.002635144, 0.003111160, 0.003769051, 0.004215897, + 0.001188837, 0.001435179, 0.001727871, 0.001956372),ncol=4,nrow=7,byrow=TRUE) +resmat5=matrix(c( +0.102893512, 0.114258558, 0.122545016, 0.130222265, +0.036733497, 0.042504996, 0.048663576, 0.055456582, +0.024192946, 0.028805967, 0.032924489, 0.038209545, +0.012663224, 0.014635216, 0.017275594, 0.019736410, +0.006105572, 0.007310803, 0.008960242, 0.009745320, +0.003067163, 0.003614637, 0.003997615, 0.004812373, +0.001441008, 0.001732819, 0.002078651, 0.002307551),ncol=4,nrow=7,byrow=TRUE) +resmat6=matrix(c( +0.117642769, 0.126566104, 0.133106804, 0.142280074, +0.044309420, 0.049731991, 0.053912739, 0.060512997, +0.028607224, 0.033826020, 0.038616476, 0.043546500, +0.015445120, 0.017557181, 0.020040720, 0.022747707, +0.007334749, 0.008406468, 0.009392098, 0.010919651, +0.003352200, 0.003814582, 0.004380562, 0.005252154, +0.001703698, 0.002001713, 0.002338651, 0.002772864),ncol=4,nrow=7,byrow=TRUE) +resmat7=matrix(c( +0.106573121, 0.113058950, 0.117388191, 0.121286795, +0.052170054, 0.058363322, 0.064733684, 0.069749344, +0.030696897, 0.035506926, 0.039265698, 0.044437674, +0.016737307, 0.019605734, 0.021253610, 0.022922988, +0.007767232, 0.009231789, 0.010340874, 0.011471110, +0.003998261, 0.004590177, 0.005506926, 0.006217415, +0.001903372, 0.002174748, 0.002519055, 0.002858655),ncol=4,nrow=7,byrow=TRUE) +resmat8=matrix(c( + 0.119571179, 0.126977461, 0.130120853, 0.133258294, + 0.059499563, 0.067185338, 0.071283297, 0.079430577, + 0.034310968, 0.039827130, 0.044451690, 0.048512464, + 0.018599530, 0.021093909, 0.023273085, 0.027471116, + 0.009135712, 0.010901687, 0.012288682, 0.013729545, + 0.004382249, 0.005191810, 0.005598429, 0.006484433, + 0.002196973, 0.002525918, 0.002818550, 0.003242426),ncol=4,nrow=7,byrow=TRUE) +crit5=array(cbind(resmat1,resmat2,resmat3,resmat4,resmat5,resmat6,resmat7, +resmat8),c(7,4,8)) +flag=TRUE +crit.val=NULL +if(p > 8)flag=FALSE +if(n<10 || n>=400)flag=FALSE +aval<-c(.1,.05,.025,.01) +aokay<-duplicated(c(alpha,aval)) +if(sum(aokay)==0)flag=FALSE +if(flag){ +nalpha=c(0:4) +asel=c(0,aval) +ialpha=nalpha[aokay] +critit=crit5[,ialpha,p] +nvec<-c(10,20,30,50,100,200,400) +nval<-duplicated(c(n,nvec)) +nval<-nval[2:8] +if(sum(nval)>0)crit.val<-critit[nval,p] +loc<-rank(c(n,nvec)) +xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5) +yy<-c(critit[loc[1]-1],critit[loc[1]]) +icoef<-tsp1reg(xx,yy)$coef +crit.val<-icoef[1]+icoef[2]/n^1.5 +}} +if(is.null(crit.val))com.pval=TRUE +# no critical value found, so a p-value will be computed +# the code for checking the file medind.crit, which appears +# next, is not working yet. +gdot<-cbind(rep(1,n),x) +gdot<-ortho(gdot) +x<-gdot[,2:pp1] +x<-as.matrix(x) +coef<-NULL +if(qval==.5)coef<-median(y) +if(qval==.25)coef<-idealf(y)$ql +if(qval==.75)coef<-idealf(y)$qu +if(is.null(coef))coef<-qest(y,q=qval) +res<-y-coef +psi<-NA +psi<-ifelse(res>0,qval,qval-1) +rnmat<-matrix(0,nrow=n,ncol=pp1) +ran.mat<-apply(x,2,rank) +flagvec<-apply(ran.mat,1,max) +for(j in 1:n){ +flag<-ifelse(flagvec<=flagvec[j],TRUE,FALSE) +flag<-as.numeric(flag) +rnmat[j,]<-apply(flag*psi*gdot,2,sum) +} +rnmat<-rnmat/sqrt(n) +temp<-matrix(0,pp1,pp1) +for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,]) +temp<-temp/n +test<-max(eigen(temp)$values) +if(com.pval){ +if(SEED)set.seed(2) +p.val<-0 +rem<-0 +for(i in 1:nboot){ +yboot<-rnorm(n) +if(p==1)xboot<-rnorm(n) +if(p>1)xboot<-rmul(n,p=p) +temp3<-medindsub(x,yboot,qval=qval) +if(test>=temp3)p.val<-p.val+1 +rem[i]<-temp3 +} +ic10<-round(.9*nboot) +ic05<-round(.95*nboot) +ic025<-round(.975*nboot) +ic001<-round(.99*nboot) +rem<-sort(rem) +p.val<-1-p.val/nboot +# now remember the critical values by storing them in "medind.crit" +#if(store.it) +#write(c(n,p,qval,rem[ic10],rem[ic05],rem[ic025],rem[ic001]),"medind.crit", +#append=T,ncolumns=7) +print("The .1, .05, .025 and .001 critical values are:") +print(c(rem[ic10],rem[ic05],rem[ic025],rem[ic001])) +crit.val<-rem[ic05] +} +names(crit.val)="" +Decision="Fail To Reject" +if(test>=crit.val)Decision="Reject" +list(test.stat=test,crit.value=crit.val,p.value=p.val,Decision=Decision) +} + + +medindsub<-function(x,y,qval=.5){ +# +x<-as.matrix(x) +n<-length(y) +p<-ncol(x) +pp1<-p+1 +tvec<-c(qval,0-qval,1-qval,qval-1) +pval<-c((1-qval)/2,(1-qval)/2,qval/2,qval/2) +gdot<-cbind(rep(1,n),x) +gdot<-ortho(gdot) +x<-gdot[,2:pp1] +x<-as.matrix(x) +if(qval==.5)coef<-median(y) +if(qval!=.5)coef<-qest(y) +res<-y-coef +psi<-NA +psi<-ifelse(res>0,qval,qval-1) +rnmat<-matrix(0,nrow=n,ncol=pp1) +ran.mat<-apply(x,2,rank) +flagvec<-apply(ran.mat,1,max) +for(j in 1:n){ +flag<-ifelse(flagvec>=flagvec[j],T,F) +rnmat[j,]<-apply(flag*psi*gdot,2,sum) +} +rnmat<-rnmat/sqrt(n) +temp<-matrix(0,pp1,pp1) +for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,]) +temp<-temp/n +test<-max(eigen(temp)$values) +test +} +linplot<-function(x,con=0,plotfun=akerd,nboot=800,plotit=TRUE,pyhat=FALSE,...){ +# +# plot distribtion of the linear contrast +# c_1X_2+c_2X_2+... +# +# con contains contrast coefficients. If not specified, +# con<-c(1,1,...,1) +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +} +Jm<-J-1 +# +# Determine contrast matrix +# If not specified, assume distribution of the sum is to be plotted +# +if(sum(con^2)==0)con<-matrix(1,J,1) +bvec<-matrix(NA,nrow=J,ncol=nboot) +for(j in 1:J){ +data<-matrix(sample(x[[j]],size=nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-data +} +bcon<-t(con)%*%bvec #ncon by nboot matrix +bcon<-as.vector(bcon) +dval<-plotfun(bcon,pyhat=pyhat,...) +dval +} +lin2plot<-function(x,con,op=4,nboot=800,plotit=TRUE,pyhat=FALSE){ +# +# plot two distribtions. +# The first is the distribtion of the linear contrast +# c_1X_2+c_2X_2+... c_i>0 +# and the second is the distribution of c_1X_2+c_2X_2+... c_i<0 +# +# con contains contrast coefficients. If not specified, +# function terminates. +# +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +J<-length(x) +if(J != length(con)){ +stop("Number of contrast coefficients must equal the number of groups") +} +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} +# +# Determine contrast matrix for positive contrast coefficients +# +flag<-(con<0) +con1<-con +con1[flag]<-0 +# Determine contrast matrix for negative contrast coefficients +flag<-(con>0) +con2<-con +con2[flag]<-0 +bvec<-matrix(NA,nrow=J,ncol=nboot) +for(j in 1:J){ +data<-matrix(sample(x[[j]],size=nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-data +} +bcon1<-t(con1)%*%bvec +bcon2<-t(con2)%*%bvec +bcon1<-as.vector(bcon1) +bcon2<-as.vector(bcon2) +fval<-g2plot(bcon1,bcon2,op=op,rval=15,fr=0.8,aval=0.5,xlab="X",ylab="") +fval +} +adrunl<-function(x,y,est=tmean,iter=10,pyhat=FALSE,plotit=TRUE,fr=.8, +xlab="x1",ylab="x2",zlab="",theta=50,phi=25,expand=.5,scale=FALSE, +zscale=TRUE,xout=FALSE,outfun=out,ticktype="simple",...){ +# +# additive model based on running interval smoother +# and backfitting algorithm +# +m<-elimna(cbind(x,y)) +x<-as.matrix(x) +p<-ncol(x) +if(p==1)val<-lplot(x[,1],y,pyhat=TRUE,plotit=plotit,span=fr,pr=FALSE)$yhat.values +if(p>1){ +library(MASS) +library(akima) +np<-p+1 +x<-m[,1:p] +y<-m[,np] +fhat<-matrix(NA,ncol=p,nrow=length(y)) +fhat.old<-matrix(NA,ncol=p,nrow=length(y)) +res<-matrix(NA,ncol=np,nrow=length(y)) +dif<-1 +for(i in 1:p) +fhat.old[,i]<-lplot(x[,i],y,pyhat=TRUE,plotit=FALSE,span=fr,pr=FALSE)$yhat.values +eval<-NA +for(it in 1:iter){ +for(ip in 1:p){ +res[,ip]<-y +for(ip2 in 1:p){ +if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2] +} +fhat[,ip]<-lplot(x[,ip],res[,ip],pyhat=TRUE,plotit=FALSE,span=fr,pr=FALSE)$yhat.values +} +eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2)))) +if(it > 1){ +itm<-it-1 +dif<-abs(eval[it]-eval[itm]) +} +fhat.old<-fhat +if(dif<.01)break +} +val<-apply(fhat,1,sum) +aval<-est(y-val,...) +val<-val+aval +if(plotit && p==2){ +fitr<-val +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fitr<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype) +}} +if(!pyhat)val<-"Done" +val +} + + + + +Rpca<-function(x,p=ncol(x)-1,locfun=llocv2,loc.val=NULL,iter=100,SCORES=FALSE, +gvar.fun=cov.mba,SEED=TRUE,...){ +# +# Robust PCA using random orthogonal matrices and +# robust generalized variance method +# +# locfun, by default, use the marginal medians +# alternatives are mcd, tauloc, spat,... +# +if(SEED)set.seed(2) +x<-elimna(x) +n<-nrow(x) +m<-ncol(x) +if(is.null(loc.val))info<-locfun(x,...)$center +if(!is.null(loc.val))info<-loc.val +for(i in 1:n)x[i,]<-x[i,]-info +vals<-NA +z<-matrix(nrow=n,ncol=p) +bval<-array(NA,c(p,m,iter)) +for(it in 1:iter){ +B<-matrix(runif(p*m),nrow=p,ncol=m) +B <- t(ortho(t(B))) # so rows are orthogonal +bval[,,it]<-B +for(i in 1:n)z[i,]<-B%*%as.matrix(x[i,]) +#vals[it]<-gvar(z) +vals[it]<-gvarg(z,var.fun=gvar.fun) +} +iord<-order(vals) +Bop<-0-bval[,,iord[iter]] +zval<-NULL +if(SCORES){ +for(i in 1:n)z[i,]<-Bop%*%as.matrix(x[i,]) +zval<-z +} +list(B=Bop,gen.var=vals[iord[iter]],scores=zval) +} + +Rsq.ols<-function(x,y){ +res=lsfit(x,y)$residuals +yhat=y-res +rsq=var(yhat)/var(y) +rsq +} + +ols<-function(x,y,xout=FALSE,outfun=outpro,alpha=.05,plotit=FALSE,xlab='X',ylab='Y',zlab='Z',RES=TRUE,...){ +# +# Performs OLS regression calling built-in R function. +# +# xout=T will eliminate any leverage points (outliers among x values) +# if one predictor, +# plotit=TRUE will plot the points and the regression line +# +m<-elimna(cbind(x,y)) +n=nrow(m) +n.keep=n +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +x<-m[,1:p] +y<-m[,pp] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,pp] +n.keep=length(y) +} +x<-as.matrix(x) +temp<-summary(lm(y~x)) +coef<-temp[4]$coefficients +CI=matrix(NA,nrow(coef),ncol=2) +CI[,1]=coef[,1]-qt(1-alpha/2,temp[10]$fstatistic[3])*coef[,2] +CI[,2]=coef[,1]+qt(1-alpha/2,temp[10]$fstatistic[3])*coef[,2] +dimnames(CI)=list(NULL,c("low.ci","up.ci")) +coef=cbind(coef,CI) +if(plotit){ +if(p==1){ +plot(x,y,xlab=xlab,ylab=ylab) +abline(coef[,1]) +} +if(p==2){ +regp2plot(x,y,regfun=ols,xlab=xlab,ylab=ylab,zlab=zlab) +}} +Ftest<-temp[10]$fstatistic +Ftest.p.value<-1-pf(Ftest[1],Ftest[2],Ftest[3]) +Rval=Rsq.ols(x,y) +res=NULL +if(RES)res=y-x%*%coef[2:pp,1]-coef[1,1] +list(n=n,n.keep=n.keep,summary=coef,coef=coef[,1],F.test=temp[10]$fstatistic[1],Ftest.p.value=Ftest.p.value, +F.test.degrees.of.freedom=temp[10]$fstatistic[2:3],R.squared=Rval,residuals=as.vector(res)) +} + +olstest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,xout=FALSE,outfun=outpro,...){ +# +# Test the hypothesis that all OLS slopes are zero. +# Heteroscedasticity is allowed. +# +# RAD=T: use Rademacher function to generate wild bootstrap values. +# RAD=F, use standardized uniform distribution. +# +if(SEED)set.seed(2) +m<-elimna(cbind(x,y)) +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +x<-m[,1:p] +y<-m[,pp] +if(xout){ +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,pp] +} +x<-as.matrix(x) +temp<-lsfit(x,y) +yhat<-mean(y) +res<-y-yhat +test<-sum(temp$coef[2:pp]^2) +if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot) +if(!RAD){ +data<-matrix(runif(length(y)*nboot),nrow=nboot)# +data<-(data-.5)*sqrt(12) # standardize the random numbers. +} +rvalb<-apply(data,1,olstests1,yhat,res,x) +p.val<-sum(rvalb>=test)/nboot +list(p.value=p.val) +} + +qrchkv2<-function(x,y,qval=.5,...){ +# +# Test of a linear fit based on quantile regression +# The method stems from He and Zhu 2003, JASA, 98, 1013-1022. +# Here, resampling is avoided using approximate critical values if +# com.pval=F +# +# To get a p-value, via simulations, set com.pval=T +# nboot is number of simulations used to determine p-value. +# Execution time can be quite high +# +# This function quickly determines .1, .05, .025 and .01 +# critical values for n<=400 and p<=6 (p= number of predictors) +# and when dealing with the .5 quantile. +# Otherwise, critical values are determined via simulations, which +# can have high execution time. +# +# But, once critical values are determined for a given n, p and +# quantile qval, the function will remember these values and use them +# in the future. They are stored in a file called qrchk.crit +# Currently, however, when you source the Rallfun files, these values +# will be lost. You might save the file qrchk.crit in another file, +# source Rallfun, then copy the save file back to qrchk.crit +# +x=as.matrix(x) +p<-ncol(x) +pp1<-p+1 +yx<-elimna(cbind(y,x)) #Eliminate missing values. +y<-yx[,1] +x<-yx[,2:pp1] +store.it=F +x<-as.matrix(x) +p.val<-NULL +crit.val<-NULL +x<-as.matrix(x) +# shift the marginal x values so that the test statistic is +# invariant under changes in location +n<-length(y) +x=standm(x) +gdot<-cbind(rep(1,n),x) +gdot<-ortho(gdot) +x<-gdot[,2:pp1] +x<-as.matrix(x) +temp<-rqfit(x,y,qval=qval,res=TRUE) +coef<-temp$coef +psi<-NA +psi<-ifelse(temp$residuals>0,qval,qval-1) +rnmat<-matrix(0,nrow=n,ncol=pp1) +ran.mat<-apply(x,2,rank) +flagvec<-apply(ran.mat,1,max) +for(j in 1:n){ +flag<-ifelse(flagvec<=flagvec[j],TRUE,FALSE) +flag<-as.numeric(flag) +rnmat[j,]<-apply(flag*psi*gdot,2,sum) +} +rnmat<-rnmat/sqrt(n) +temp<-matrix(0,pp1,pp1) +for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,]) +temp<-temp/n +test<-max(eigen(temp)$values) +test +} + +sm2str<-function(xx,y,iv=c(1,2),nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro, +STAND=TRUE,...){ +# +# Compare robust measures of association of two predictors +# based on a smooth +# +if(!is.matrix(xx))stop("x should be a matrix with 2 or more columns") +if(ncol(xx)<2)stop("x should be a matrix with 2 or more columns") +val1=NA +val2=NA +x=xx[,iv] +xy=elimna(cbind(x,y)) +x=xy[,1:2] +y=xy[,3] +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(SEED)set.seed(2) +data1<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec1=apply(data1,1,sm2str.sub,x[,1],y) # 2 by nboot matrix +bvec2=apply(data2,1,sm2str.sub,x[,2],y) # 2 by nboot matrix +bvecd=bvec1-bvec2 +pv=akerdcdf(bvecd,pts=0) +vcor=cor(x,method="kendall") +pv=2*min(c(pv,1-pv)) +p.crit=.25*abs(vcor[1,2])+.05+(100-length(y))/10000 +p.crit=max(c(.05,p.crit)) +list(p.value=pv,p.crit=p.crit) +} + +sm2str.sub<-function(isub,x,y){ +xmat<-x[isub] +val1<-lplot(xmat,y[isub],plotit=FALSE)$Explanatory.power +val1 +} + +akerdcdf<-function(xx,hval=NA,aval=.5,op=1,fr=.8,pyhat=TRUE,pts=0,plotit=FALSE, +xlab="",ylab=""){ +# +# Compute cumulative adaptive kernel density estimate +# for univariate data +# (See Silverman, 1986) +# By default (univiate case) determine P(X<=pts), +# pts=0 by default. +# +# op=1 Use expected frequency as initial estimate of the density +# op=2 Univariate case only +# Use normal kernel to get initial estimate of the density +# +fval<-"Done" +if(is.matrix(xx)){ +if(ncol(xx)>1)fval<-akerdmul(xx,pts=pts,hval=hval,aval=aval,fr=fr,pr=pyhat,plotit=plotit) +plotit<-F +} +if(is.matrix(xx) && ncol(xx)==1)xx<-xx[,1] +if(!is.matrix(xx)){ +x<-sort(xx) +if(op==1){ +m<-mad(x) +if(m==0){ +temp<-idealf(x) +m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25)) +} +if(m==0)m<-sqrt(winvar(x)/.4129) +if(m==0)stop("All measures of dispersion are equal to 0") +fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr) +if(m>0)fhat<-fhat/(2*fr*m) +} +if(op==2){ +init<-density(xx) +fhat <- init$y +x<-init$x +} +n<-length(x) +if(is.na(hval)){ +sig<-sqrt(var(x)) +temp<-idealf(x) +iqr<-(temp$qu-temp$ql)/1.34 +A<-min(c(sig,iqr)) +if(A==0)A<-sqrt(winvar(x))/.64 +hval<-1.06*A/length(x)^(.2) +# See Silverman, 1986, pp. 47-48 +} +gm<-exp(mean(log(fhat[fhat>0]))) +alam<-(fhat/gm)^(0-aval) +dhat<-NA +if(is.na(pts[1]))pts<-x +pts<-sort(pts) +for(j in 1:length(pts)){ +temp<-(pts[j]-x)/(hval*alam) +sq5=0-sqrt(5) +epan=.75*(temp-.2*temp^3/3)/sqrt(5)-.75*(sq5-.2*sq5^3/3)/sqrt(5) +flag=(temp>=sqrt(5)) +epan[flag]=1 +flag=(temp= 0 + negres <- res <= 0 + lplus <- cumsum(posres) + rplus <- lplus[n] - lplus + lmin <- cumsum(negres) + rmin <- lmin[n] - lmin + depth <- pmin(lplus + rmin, rplus + lmin) + min(depth) +} +depthcom<-function(x1,y1,x2,y2,est=tmean,fr=1){ +temp1=depthcomsub(x1,y1,x2,y2,est=est,fr=fr) +temp2=depthcomsub(x2,y2,x1,y1,est=est,fr=fr) +dep=max(c(abs(temp1$dep1-temp1$dep2),abs(temp2$dep1-temp2$dep2))) +dep +} +depthcomsub<-function(x1,y1,x2,y2,est=tmean,fr=1){ +x1=(x1-median(x1))/mad(x1) +x2=(x2-median(x2))/mad(x2) +yh1=runhat(x1,y1,est=tmean,fr=fr) +yh2=runhat(x2,y2,pts=x1,est=tmean,fr=fr) +flag=is.na(yh2) +res1=y1-yh1 +res2=y1[!flag]-yh2[!flag] +dep1=resdepth(x1,res1) +dep2=resdepth(x1[!flag],res2) +list(dep1=dep1,dep2=dep2) +} + +ancsm<-function(x1,y1,x2,y2,crit.mat=NULL,nboot=200,SEED=TRUE,REP.CRIT=FALSE,LP=TRUE, +est=tmean,fr=NULL,plotit=TRUE,sm=FALSE,xout=FALSE,outfun=out,xlab="X",ylab="Y",...){ +# +# Compare two nonparametric +# regression lines corresponding to two independent groups +# using the depths of smooths. +# One covariate only is allowed. +# +# A running interval smoother is used. +# +# sm=T will create smooths using bootstrap bagging. +# +if(ncol(as.matrix(x1))>1)stop("One covariate only is allowed") +if(xout){ +flag1=outfun(x1,...)$keep +flag2=outfun(x2,...)$keep +x1=x1[flag1] +y1=y1[flag1] +x2=x2[flag2] +y2=y2[flag2] +} +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +xord=order(x1) +x1=x1[xord] +y1=xy[xord,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +xord=order(x2) +x2=x2[xord] +y2=xy[xord,2] +n1=length(y1) +n2=length(y2) +if(is.null(fr)){ +fr=1 +if(min(n1,n2)>150)fr=.2 +if(max(n1,n2)<35)fr=.5 +} +if(SEED)set.seed(2) +if(is.null(crit.mat[1])){ +crit.val=NA +yall=c(y1,y2) +xall=c(x1,x2) +nn=n1+n2 +il=n1+1 +for(i in 1:nboot){ +data=sample(nn,nn,TRUE) +yy1=yall[data[1:n1]] +yy2=yall[data[il:nn]] +xx1=xall[data[1:n1]] +xx2=xall[data[il:nn]] +crit.mat[i]=depthcom(xx1,yy1,xx2,yy2,est=est,fr=fr) +}} +if(plotit)runmean2g(x1,y1,x2,y2,fr=fr,est=est,sm=sm,xlab=xlab,ylab=ylab,LP=LP,...) +dep=depthcom(x1,y1,x2,y2,est=est,fr=fr) +n=min(n1,n2) +pv=1-mean(crit.mat=crit)p.value<-c("Less than .1") +crit<-15.49/n+2.68 +if(test>=crit)p.value<-c("Less than .05") +crit<-14.22/n+3.26 +if(test>=crit)p.value<-c("Less than .025") +crit<-24.83/n+3.74 +if(test>=crit)p.value<-c("Less than .01") +p.values[ic,3]=p.value +}}} +list(cor=val,test.results=info,p.values=p.values) +} + + +resdepth.sub<-function(x,res) +{ +########################################################################## +# This function computes the regression depth of a regression line based +# on its residuals. The fit could be, for example, a nonparmatric +# regression or smooth. +# +# The algorithm is based on a simple modification of +# +# Rousseeuw, P.J. and Hubert, M. (1996), +# Regression Depth, Technical report, University of Antwerp +# +########################################################################## + if(!is.vector(x)) stop("x should be vectors") + n <- length(x) + if(n < 2) + stop("you need at least two observations") +flag=is.na(res) +x=x[!flag] +res[!flag] +xord=order(x) +x=x[xord] +res=res[xord] + posres <- res >= 0 + negres <- res <= 0 + lplus <- cumsum(posres) + rplus <- lplus[n] - lplus + lmin <- cumsum(negres) + rmin <- lmin[n] - lmin + depth <- pmin(lplus + rmin, rplus + lmin) + min(depth) +} + +tbs<- function(x,eps=1e-3,maxiter=20,r=.45,alpha=.05,init.est=OGK){ +# Rocke's contrained s-estimator +# +# r=.45 is the breakdown point +# alpha=.05 is the asymptotic rejection probability. +# +library(MASS) +x<-elimna(x) +x=as.matrix(x) + n <- nrow(x) + p <- ncol(x) +LIST=FALSE +if(p==1){ +LIST=T +p=2 +x=cbind(x,rnorm(nrow(x))) +# Yes, this code is odd, but for moment easiest way of handling p=1 +} +temp<-init.est(x) +# very poor outside rate per obs under normality. +t1<-temp$center +s<-temp$cov +c1M<-cgen.bt(n,p,r,alpha,asymp=FALSE) +c1<-c1M$c1 +if(c1==0)c1<-.001 #Otherwise get division by zero +M<-c1M$M + b0 <- erho.bt(p,c1,M) + crit <- 100 + iter <- 1 + w1d <- rep(1,n) + w2d <- w1d + while ((crit > eps)&(iter <= maxiter)) + { + t.old <- t1 + s.old <- s + wt.old <- w1d + v.old <- w2d + d2 <- mahalanobis(x,center=t1,cov=s) + d <- sqrt(d2) + k <- ksolve.bt(d,p,c1,M,b0) + d <- d/k + w1d <- wt.bt(d,c1,M) + w2d <- v.bt(d,c1,M) + t1 <- (w1d %*% x)/sum(w1d) + s <- s*0 + for (i in 1:n) + { + xc <- as.vector(x[i,]-t1) + s <- s + as.numeric(w1d[i])*(xc %o% xc) + } + s <- p*s/sum(w2d) + mnorm <- sqrt(as.vector(t.old) %*% as.vector(t.old)) + snorm <- eigen(s.old)$values[1] + crit1 <- max(abs(t1 - t.old)) +# crit <- max(crit1,crit2) + crit <- max(abs(w1d-wt.old))/max(w1d) + iter <- iter+1 + } +if(LIST){ +v1=t1[1] +v2=s[1,1] +return(list(center=v1,var=v2)) +} +if(!LIST)return(list(center=t1,cov=s)) +} + +pcorhc4sub<-function(x,y,CN=FALSE){ +# +# Compute a .95 confidence interval for Pearson's correlation coefficient. +# using the HC4 method +# +# CN=T degrees of freedom are infinite, as done by Cribari-Neto (2004) +# CN=F degrees of freedom are n-p +# +xy<-elimna(cbind(x,y)) +x<-xy[,1] +y<-xy[,2] +z1=(x-mean(x))/sqrt(var(x)) +z2=(y-mean(y))/sqrt(var(y)) +ans=olshc4sub(z1,z2,CN=CN) +ci=ans$ci[2,3:4] +ci +} + +TWOpNOV<-function(x,y,HC4=FALSE,alpha=.05){ +# +# Compute a .95 confidence interval +# for the difference between two dependent Pearson correlations, +# non-overlapping case. +# +# Both x and y are assumed to be matrices with two columns. +# The function compares the correlation between x[,1] and x[,2] +# to the correlation between y[,1] and y[,2]. +# +# For simulation results, see Wilcox (2009). +# COMPARING PEARSON CORRELATIONS: DEALING WITH +# HETEROSCEDASTICITY AND NON-NORMALITY, Communications in Statistics--Simulations +# and Computations, 38, 2220-2234. +# +# +if(!HC4 && alpha!=.05)stop('For alpha not equal to .05, must use HC4=TRUE') +#if(!is.matrix(x))stop("x should be a matrix") +#if(!is.matrix(y))stop("y should be a matrix") +if(ncol(x)!=2)stop("x should be a matrix or data a frame with 2 columns") +if(ncol(y)!=2)stop("y should be a matrix or a data frame with 2 columns") +xy=elimna(cbind(x,y)) +x1=xy[,1] +x2=xy[,2] +y1=xy[,3] +y2=xy[,4] +r12=cor(x1,x2) +r13=cor(x1,y1) +r14=cor(x1,y2) +r23=cor(x2,y1) +r24=cor(x2,y2) +r34=cor(y1,y2) +term1=.5*r12*r34*(r13^2+r14^2+r23^2+r24^2) +term2=r12*r13*r14+r12*r23*r24+r13*r23*r34+r14*r24*r34 +corhat=(term1+r13*r24+r14*r23-term2)/((1-r12^2)*(1-r34^2)) +if(!HC4)temp=pcorbv4(x1,x2,SEED=FALSE) +if(HC4)temp=pcorhc4(x1,x2,alpha=alpha) +ci12=temp$ci[1] +ci12[2]=temp$ci[2] +if(!HC4)temp=pcorbv4(y1,y2,SEED=FALSE) +if(HC4)temp=pcorhc4(y1,y2,alpha=alpha) +ci34=temp$ci[1] +ci34[2]=temp$ci[2] +terml=2*corhat*(r12-ci12[1])*(ci34[2]-r34) +termu=2*corhat*(ci12[2]-r12)*(r34-ci34[1]) +L=r12-r34-sqrt((r12-ci12[1])^2+(ci34[2]-r34)^2-terml) +U=r12-r34+sqrt((r12-ci12[2])^2+(ci34[1]-r34)^2-termu) +if(ZCI){ +if(is.na(L) || is.na(U))L=U=0 +} +list(est.1=r12,est.2=r34,ci.lower=L,ci.upper=U) +} + +TWOpov<-function(x,y,alpha=.05,CN=FALSE,BOOT=TRUE, nboot=499,SEED=TRUE,ZCI=FALSE){ +# +# Comparing two dependent correlations: Overlapping case +# +# x is assumed to be a matrix with 2 columns +# +# Compare correlation of x[,1] with y to x[,2] with y +# +# returns a confidence stored in +# ci +# +if(ncol(x)!=2)stop('x should be a matrix with two columns') +x1y=elimna(cbind(x[,1],y)) +x2y=elimna(cbind(x[,2],y)) +xx=elimna(x) +r12=cor(x1y[,1],x1y[,2]) +r13=cor(x2y[,1],x2y[,2]) +r23=cor(xx[,1],xx[,2]) +if(!BOOT){ +ci12=pcorhc4(x1y[,1],x1y[,2],alpha=alpha,CN=CN)$ci +ci13=pcorhc4(x2y[,1],x2y[,2],alpha=alpha,CN=CN)$ci +} +if(BOOT){ +ci12=rhohc4bt(x1y[,1],x1y[,2],alpha=alpha,SEED=SEED,nboot=nboot)$ci +ci13=rhohc4bt(x2y[,1],x2y[,2],alpha=alpha,SEED=SEED,nboot=nboot)$ci +} +corhat=((r23-.5*r12*r13)*(1-r12^2-r13^2-r23^2)+r23^3)/((1-r12^2)*(1-r13^2)) +term1=2*corhat*(r12-ci12[1])*(ci13[2]-r13) +term2=2*corhat*(r12-ci12[2])*(ci13[1]-r13) +L=r12-r13-sqrt((r12-ci12[1])^2+(ci13[2]-r13)^2-term1) +U=r12-r13+sqrt((r12-ci12[2])^2+(ci13[1]-r13)^2-term2) +if(ZCI){ +if(is.na(L) || is.na(U))L=U=0 +} +list(est.rho1=r12,est.rho2=r13,dif=r12-r13,ci=c(L,U)) +} + + + + +sm2strv7<-function(xx,y,iv=c(1,2),nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro, +STAND=TRUE,...){ +# +# Compare robust measures of association of two predictors +# based on a smooth +# +# x is a matrix with two columns +# robust explanatory of x[,1] with y is compared to x[,2] with y. +# xout=T eliminates any leverage points found with outfun, which +# defaults to outpro, a projecion method for detecting outliers. +# +# iv: indicates the two columns of x that will be used. By default, col 1 and 2 are used. +# +if(!is.matrix(xx))stop("x should be a matrix with 2 or more columns") +if(ncol(xx)<2)stop("x should be a matrix with 2 or more columns") +val1=NA +val2=NA +x=xx[,iv] +xy=elimna(cbind(x,y)) +x=xy[,1:2] +y=xy[,3] +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(SEED)set.seed(2) +data1<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec1=apply(data1,1,sm2str.sub,x[,1],y) # 2 by nboot matrix +bvec2=apply(data2,1,sm2str.sub,x[,2],y) # 2 by nboot matrix +bvecd=bvec1-bvec2 +pv=akerdcdf(bvecd,pts=0) +vcor=cor(x,method="kendall") +pv=2*min(c(pv,1-pv)) +p.crit=.25*abs(vcor[1,2])+.05+(100-length(y))/10000 +p.crit=max(c(.05,p.crit)) +list(p.value=pv,p.crit=p.crit) +} + +pcorhc4<-function(x,y,alpha=.05,CN=FALSE,HC3=FALSE){ +# +# Compute a .95 confidence interval for Pearson's correlation coefficient. +# using the HC4 method +# +# CN=F, degrees of freedom are n-p; seems better for general use. +# CN=T degrees of freedom are infinite, as done by Cribari-Neto (2004) +# +print('Can return meaningless confidence interval due to outliers') +xy<-elimna(cbind(x,y)) +x<-xy[,1] +y<-xy[,2] +z1=(x-mean(x))/sqrt(var(x)) +z2=(y-mean(y))/sqrt(var(y)) +ans=olshc4(z1,z2,alpha=alpha,CN=CN,HC3=HC3) +list(r=ans$ci[2,2],ci=ans$ci[2,3:4],p.value=ans$ci[2,5],test.stat=ans$test.stat) +} +regpreS<-function(x,y,regfun=lsfit,error=absfun,nboot=100, +mval=round(5*log(length(y))),locfun=mean,pr=TRUE, +xout=FALSE,outfun=out, +plotit=TRUE,xlab="Model Number",ylab="Prediction Error",SEED=TRUE,...){ +# +# Stepwise selection of predictors based on +# estimates of prediction error using the regression method +# regfun, +# which defaults to least squares. Prediction error +# is estimated with .632 method. +# (See Efron and Tibshirani, 1993, pp. 252--254) +# +# The predictor values are assumed to be in the n by p matrix x. +# The default number of bootstrap samples is nboot=100 +# +# Prediction error is the expected value of the function error. +# The argument error defaults to absolute error. To use +# squared error, set error=sqfun. +# +# regfun can be any R function that returns the coefficients in +# the vector regfun$coef, the first element of which contains the +# estimated intercept, the second element contains the estimate of +# the first predictor, etc. +# +# The default value for mval, the number of observations to resample +# for each of the B bootstrap samples is based on results by +# Shao (JASA, 1996, 655-665). (Resampling n vectors of observations, +# model selection may not lead to the correct model as n->infinity. +# +if(SEED)set.seed(2) +q=ncol(x) +qm1=q-1 +x<-as.matrix(x) +d<-ncol(x) +p1<-d+1 +temp<-elimna(cbind(x,y)) +x<-temp[,1:d] +y<-temp[,d+1] +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,SEED=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +adit=NULL +pval=c(1:ncol(x)) +#pval=c(1:q) +allp=pval +for(ip in 1:qm1){ +model=list() +for(j in 1:length(pval))model[[j]]=c(adit,pval[j]) +temp=regpre(x,y,model=model,pr=FALSE,plotit=FALSE,adz=FALSE,regfun=regfun, +SEED=SEED)$estimates +pbest=order(temp[,5]) +adit=model[[pbest[1]]] +pval=allp[-adit] +} +output=model[[pbest[1]]] +output=c(output,allp[-output]) +output +} + +akp.effect<-function(x,y,EQVAR=TRUE,tr=.2){ +# +# Computes the robust effect size suggested by +#Algina, Keselman, Penfield Psych Methods, 2005, 317-328 +library(MASS) +x<-elimna(x) +y<-elimna(y) +n1<-length(x) +n2<-length(y) +s1sq=winvar(x,tr=tr) +s2sq=winvar(y,tr=tr) +spsq<-(n1-1)*s1sq+(n2-1)*s2sq +sp<-sqrt(spsq/(n1+n2-2)) +cterm=1 +if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr +cterm=sqrt(cterm) +if(EQVAR)dval<-cterm*(tmean(x,tr)-tmean(y,tr))/sp +if(!EQVAR){ +dval<-cterm*(tmean(x,tr)-tmean(y,tr))/sqrt(s1sq) +dval[2]=cterm*(tmean(x,tr)-tmean(y,tr))/sqrt(s2sq) +} +dval +} + +akp.effect.ci<-function(x,y,alpha=.05,tr=.2,nboot=1000,SEED=TRUE,null.val=0){ +# +# Computes the robust effect size for two-sample case using +# Algina, Keselman, Penfield Pcyh Methods, 2005, 317-328 +# +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +n1=length(x) +n2=length(y) +be.f=NA +for(i in 1:nboot){ +X=sample(x,n1,replace=TRUE) +Y=sample(y,n2,replace=TRUE) +be.f[i]=akp.effect(X,Y,tr=tr) +} +L=alpha*nboot/2 +U=nboot-L +be.f=sort(be.f) +ci=be.f[L+1] +ci[2]=be.f[U] +est=akp.effect(x,y,tr=tr) +pv=mean(be.f0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr +cterm=sqrt(cterm) +del=cterm*d1 #rescale for a normal distribution. +list(effect.size=del,Cohen.d.equiv=2*del) +} + + + + +wwwtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L){ +# Perform a within by within by within (three-way) anova on trimmed means. +# +# That is, there are three factors with a total of JKL dependent groups. +# +# The argument data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of all three factors: level 1,1,1. +# data][2]] is assumed to contain the data for level 1 of the +# first two factors and level 2 of the third factor: level 1,1,2 +# data[[L]] is the data for level 1,1,L +# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. +# data[[KL+1]] is level 2,1,1, etc. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JKL, the total number of +# groups being tested. +# +if(is.data.frame(data))data=as.matrix(data) +if(is.list(data))data=listm(elimna(matl(data))) +if(is.matrix(data))data=listm(elimna(data)) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in data is") +print(length(data)) +print("Warning: These two values are not equal") +} +tmeans<-0 +h<-0 +v<-0 +for (i in 1:p){ +tmeans[i]<-mean(data[[grp[i]]],tr) +h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) +# h is the effective sample size +} +v=covmtrim(data,tr=tr) +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +il<-matrix(c(rep(1,L)),1,L) +jm1<-J-1 +cj<-diag(1,jm1,J) +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +lm1<-L-1 +cl<-diag(1,lm1,L) +for (i in 1:lm1)cl[i,i+1]<-0-1 +# Do test for factor A +cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A +Qa=bwwtrim.sub(cmat, tmeans, v, h,p) +Qa.siglevel <- 1 - pf(Qa, J - 1, 999) +# Do test for factor B +cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B +Qb=bwwtrim.sub(cmat, tmeans, v, h,p) + Qb.siglevel <- 1 - pf(Qb, K - 1, 999) +# Do test for factor C +cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C +Qc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qc.siglevel <- 1 - pf(Qc, L - 1, 999) +# Do test for factor A by B interaction +cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B +Qab<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999) +# Do test for factor A by C interaction +cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C +Qac<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999) +# Do test for factor B by C interaction +cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C +Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999) +# Do test for factor A by B by C interaction +cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C +Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p) +Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999) +list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.p.value=Qb.siglevel, +Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel, +Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel, +Qabc=Qabc,Qabc.p.value=Qabc.siglevel) +} + + +ltsR<-function(x,y,RES=FALSE,varfun=pbvar,corfun=pbcor){ +# +library(MASS) +xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +x=xy[,1:p] +y=xy[,p1] +temp=ltsreg(x,y)$coef +x=as.matrix(x) +p=ncol(x)+1 +res<-y-x%*%temp[2:p]-temp[1] +yhat<-y-res +if(!RES)res=NULL +e.pow<-varfun(yhat)/varfun(y) +if(is.na(e.pow))e.pow<-1 +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +list(coef=temp,residuals=res,Explanatory.Power=e.pow, +Strength.Assoc=sqrt(e.pow)) +} + +standmar<-function(x,locfun=lloc,est=mean,scat=var,...){ +# standardize a matrix x +# +x=as.matrix(x) +m1=apply(x,2,est,na.rm=TRUE) +v1=apply(x,2,scat,na.rm=TRUE) +p=ncol(x) +for(j in 1:p)x[,j]=(x[,j]-m1[j])/sqrt(v1[j]) +x +} + +qsmcobs<-function(x,y,qval=.5,xlab="X",ylab="Y",FIT=TRUE,pc=".",plotit=TRUE, +xout=FALSE,outfun=out,q=NULL,lambda=0,...){ +# +# Plots smooths of quantile regression lines using R package cobs +# +# qval is the quantile +# qsmcobs(x,y,qval=c(.2,.5,.8)) will plot three smooths corresponding to +# the .2, .5 and .8 quantile regression lines. +# +# FIT=T, uses the values returned by predict +# FIT=F, determines predicted Y for each X and plots the results +library(cobs) +if(!is.null(q))qval=q +x=as.matrix(x) +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +} +yhat=NULL +res=NULL +if(plotit)plot(x,y,xlab=xlab,ylab=ylab,pch=pc) +if(FIT){ +for(j in 1:length(qval)){ +if(plotit)lines(predict(cobs(x,y,tau=qval[j],lambda=lambda,print.mesg=FALSE,print.warn=FALSE))) +}} +if(!FIT){ +for(j in 1:length(qval)){ +temp=cobs(x,y,tau=qval[j],print.mesg=FALSE,print.warn=FALSE,lambda=lambda) +xord=order(x) +if(plotit)lines(x[xord],temp$fitted[xord]) +} +if(length(qval)==1){ +yhat=temp$fitted +#res=y-yhat + # yhat is only for the unique x values. If x has,say, +# three tied values = 6, then +# yhat contains only one predicted value for x=6, not three yhat values +# all equal to the predicted value at x=6 +} +} +list(yhat=yhat) +} + + +Qdepthcom<-function(x1,y1,x2,y2,qval){ +temp1=Qdepthcomsub(x1,y1,x2,y2,qval) +temp2=Qdepthcomsub(x2,y2,x1,y1,qval) +dep=max(c(abs(temp1$dep1-temp1$dep2),abs(temp2$dep1-temp2$dep2))) +dep +} +Qdepthcomsub<-function(x1,y1,x2,y2,qval){ +x1=(x1-median(x1))/mad(x1) +x2=(x2-median(x2))/mad(x2) +yh1=qsmcobs(x1,y1,FIT=FALSE,qval=qval,plotit=FALSE)$yhat +temp2=cobs(x2,y2,print.mesg=FALSE,print.warn=FALSE,tau=qval) +yh2=predict(temp2,z=x1) +yh2=yh2[,2] +flag=is.na(yh2) +res1=y1-yh1 +res2=y1[!flag]-yh2[!flag] +dep1=resdepth(x1,res1) +dep2=resdepth(x1[!flag],res2) +list(dep1=dep1,dep2=dep2) +} + + +mulgreg<-function(x,y,cov.fun=rmba){ +# +# Do Multivariate regression in Rousseeuw, Van Aelst, Van Driessen Agullo +# (2004) Technometrics, 46, 293-305 +# +# (y can be multivariate) +# +library(MASS) +if(!is.matrix(y))stop("y is not a matrix") +X<-cbind(x,y) +X<-elimna(X) +qy<-ncol(y) +qx<-ncol(x) +qxp1<-qx+1 +tqyqx<-qy+qx +y<-X[,qxp1:tqyqx] +# compute initial estimate of slopes and intercept: +locscat<-cov.fun(X) +sig<-locscat$cov +mu<-locscat$center +sigxx<-sig[1:qx,1:qx] +sigxy<-sig[1:qx,qxp1:tqyqx] +sigyy<-sig[qxp1:tqyqx,qxp1:tqyqx] +Bhat<-solve(sigxx)%*%sigxy +sige<-sigyy-t(Bhat)%*%sigxx%*%Bhat +sige.inv<-solve(sige) +Ahat<-t(mu[qxp1:tqyqx]-t(Bhat)%*%mu[1:qx]) +resL<-matrix(nrow=nrow(X),ncol=qy) +for(i in 1:nrow(X))resL[i,]<-y[i,]-t(Bhat)%*%X[i,1:qx] +for(j in 1:qy)resL[,j]<-resL[,j]-Ahat[j] +list(coef=rbind(Ahat,Bhat),residuals=resL) +} + +tsp1reg<-function(x,y,plotit=FALSE,HD=FALSE,OPT=TRUE,tr=FALSE){ +# +# Compute the Theil-Sen regression estimator. +# Only a single predictor is allowed in this version +# +# OPT=TRUE, compute the intercept using median(y)-beta_1median(X) +# OPT=FALSE compute the intercept using median of y-beta_1X +# +temp<-matrix(c(x,y),ncol=2) +temp<-elimna(temp) # Remove any pairs with missing values +x<-temp[,1] +y<-temp[,2] +ord<-order(x) +xs<-x[ord] +ys<-y[ord] +vec1<-outer(ys,ys,"-") +vec2<-outer(xs,xs,"-") +v1<-vec1[vec2>0] +v2<-vec2[vec2>0] +if(!HD)slope<-median(v1/v2,na.rm=TRUE) +if(HD)slope<-hd(v1/v2,na.rm=TRUE,tr=tr) +if(OPT){ +if(!HD)coef<-median(y,na.rm=TRUE)-slope*median(x,na.rm=TRUE) +if(HD)coef<-hd(y,na.rm=TRUE)-slope*hd(x,na.rm=TRUE,tr=tr) +} +if(!OPT){ +if(!HD)coef<-median(y-slope*x,na.rm=TRUE) +if(HD)coef<-hd(y-slope*x,na.rm=TRUE,tr=tr) +} +names(coef)<-"Intercept" +coef<-c(coef,slope) +if(plotit){ +plot(x,y,xlab="X",ylab="Y") +abline(coef) +} +res<-y-slope*x-coef[1] +list(coef=coef,residuals=res) +} + +gplot<-function(x,xlab="Group",ylab="",xnum=FALSE){ +if(is.matrix(x))x<-listm(x) +if(!xnum)par(xaxt="n") +mval<-NA +vals<-x[[1]] +gval<-rep(1,length(x[[1]])) +for(j in 2:length(x)){ +vals<-c(vals,x[[j]]) +gval<-c(gval,rep(j,length(x[[j]]))) +} +plot(gval,vals,xlab=xlab,ylab=ylab) +} + +trimpb<-function(x,y=NULL,tr=.2,alpha=.05,nboot=2000,WIN=FALSE,win=.1, +plotit=FALSE,pop=1,null.value=0,pr=TRUE,xlab="X",fr=NA,SEED=TRUE){ +# +# Compute a 1-alpha confidence interval for +# a trimmed mean. +# +# The default number of bootstrap samples is nboot=2000 +# +# win is the amount of Winsorizing before bootstrapping +# when WIN=T. +# +# Missing values are automatically removed. +# +# nv is null value. That test hypothesis trimmed mean equals nv +# +# plotit=TRUE gives a plot of the bootstrap values +# pop=1 results in the expected frequency curve. +# pop=2 kernel density estimate +# pop=3 boxplot +# pop=4 stem-and-leaf +# pop=5 histogram +# pop=6 adaptive kernel density estimate. +# +# fr controls the amount of smoothing when plotting the bootstrap values +# via the function rdplot. fr=NA means the function will use fr=.8 +# (When plotting bivariate data, rdplot uses fr=.6 by default.) +# +# If y is not null, the function uses x-y; so can be used for two dependent variables. +# +if(pr){ +print("The p-value returned by this function is based on the") +print("null value specified by the argument null.value, which defaults to 0") +} +if(!is.null(y))x=x-y +x<-x[!is.na(x)] +if(WIN){ +if(win > tr)stop("The amount of Winsorizing must be <= to the amount of trimming") +x<-winval(x,win) +} +crit<-alpha/2 +icl<-round(crit*nboot)+1 +icu<-nboot-icl +bvec<-NA +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,mean,tr) # Bootstrapped trimmed means +bvec<-sort(bvec) +#p.value<-sum(bvec9, this adjustment can be crucial +# +m=elimna(m) +m=as.matrix(m) +n=nrow(m) +if(SEED)set.seed(2) +z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m))) +newq=0 +gtry=NA +for(itry in 1:ip){ +newq=newq+9/10^itry +gtry[itry]=newq +} +gtry=c(.95,.975,gtry[-1]) +if(pr)print("Computing adjustment") +for(itry in 1:ip){ +val=NA +for(i in 1:iter){ +temp=outpro(z[i,,],gval = sqrt(qchisq(gtry[itry],ncol(m))), +center=center,plotit=FALSE,op=op,MM=MM,cop=cop,STAND=STAND)$out.id +val[i]=length(temp) +} +erate=mean(val)/n +if(erate=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value", +"crit.sig","ci.lower","ci.upper")) +tmeans<-apply(x,2,est,na.rm=TRUE,...) +psi<-1 +output[temp2,4]<-zvec +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(psihat[ic,]) +icl<-round(output[ic,4]*nboot/2)+1 +icu<-nboot-(icl-1) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +if(!flag.con){ +} +if(flag.con){ +CC=(J^2-J)/2 +test<-matrix(NA,CC,7) +dimnames(test)<-list(NULL,c("Group","Group","psi.hat","p.value","p.crit", +"ci.low","ci.upper")) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,1]=j +test[jcom,2]=k +test[jcom,3:5]=output[jcom,2:4] +test[jcom,6:7]=output[jcom,5:6] +con=NULL +}}}} +if(!flag.con)test=output +#num.sig<-sum(output[,4]<=output[,5]) +if(flag.con)num.sig<-sum(test[,4]<=test[,5]) +if(!flag.con)num.sig<-sum(test[,3]<=test[,4]) +list(output=test,con=con,num.sig=num.sig) +} + + + +mulrank<-function(J,K,x,grp=c(1:p),p=J*K){ +# +# Perform the Munzel and Brunner +# multivariate one-way rank-based ANOVA +# (Munzel and Brunner, Biometrical J., 2000, 42, 837--854 +# +# x can be a matrix with columns corresponding to groups +# +# Have a J by K design with J independent levels and K dependent +# measures +# +# or it can have list mode. +# +newx=list() +GV=matrix(c(1:p),ncol=K,byrow=TRUE) +if(is.list(x)){ +temp=NA +jk=0 +for(j in 1:J){ +temp=elimna(matl(x[GV[j,]])) +for(k in 1:K){ +jk=jk+1 +newx[[jk]]=temp[,k] +}} +x=NA +x=newx +} +if(is.matrix(x)){ +x=elimna(x) +x<-listm(x) +} +xx<-list() +nvec<-NA +for(j in 1:p){ +xx[[j]]<-x[[grp[j]]] +nvec[j]<-length(xx[[j]]) +} +Nrow=nvec[GV[,1]] +v<-matrix(0,p,p) +Ja<-matrix(1,J,J) +Ia<-diag(1,J) +Pa<-Ia-Ja/J +Jb<-matrix(1,K,K) +Ib<-diag(1,K) +Pb<-Ib-Jb/K +cona<-kron(Pa,Ib) +xr<-list() +N<-0 +jj=0 +for(k in 1:K){ +temp<-x[[k]] +jk<-k +for (j in 2:J){ +jj=jj+1 +jk<-jk+K +temp<-c(temp,x[[jk]]) +} +N<-length(temp) +pr<-rank(temp) +xr[[k]]<-pr[1:nvec[k]] #Put ranks of pooled data for first +# variable in xr +top<-nvec[k] +jk<-k +bot<-1 +for (j in 2:J){ +jk<-jk+K +bot<-bot+nvec[jk] +top<-top+nvec[jk] +xr[[jk]]<-pr[bot:top] # Put midranks in xr +}} +phat<-NA +botk<-0 +for(j in 1:J){ +for(k in 1:K){ +botk<-botk+1 +phat[botk]<-(mean(xr[[botk]])-.5)/N +}} +klow<-1-K +kup<-0 +for(j in 1:J){ +klow<-klow+K +kup<-kup+K +sel<-c(klow:kup) +v[sel,sel]<-covmtrim(xr[klow:kup],tr=0)/N +} +qhat<-matrix(phat,J,K,byrow=TRUE) +test<-N*t(phat)%*%cona%*%phat/sum(diag(cona%*%v)) +nu1<-sum(diag(cona%*%v))^2/sum(diag(cona%*%v%*%cona%*%v)) +sig.level<-1-pf(test,nu1,1000000) +list(test.stat=test[1,1],nu1=nu1,p.value=sig.level,N=N,q.hat=qhat) +} + + +lincon.old<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,KB=FALSE){ +# +# A heteroscedastic test of d linear contrasts using trimmed means. +# +# The data are assumed to be stored in $x$ in list mode, a matrix +# or a data frame. If in list mode, +# length(x) is assumed to correspond to the total number of groups. +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# Missing values are automatically removed. +# +# To apply the Kaiser-Bowden method, use the function kbcon +# +if(tr==.5)stop("Use the R function medpb to compare medians") +if(is.data.frame(x))x=as.matrix(x) +if(KB)stop("Use the function kbcon") +flag<-TRUE +if(alpha!= .05 && alpha!=.01)flag<-FALSE +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-length(x) +sam=NA +h<-vector("numeric",J) +w<-vector("numeric",J) +xbar<-vector("numeric",J) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +sam[j]=length(x[[j]]) +h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) + # h is the number of observations in the jth group after trimming. +w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) +xbar[j]<-mean(x[[j]],tr) +} +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +if(CC>28)print("For faster execution time but less power, use kbcon") +psihat<-matrix(0,CC,8) +dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper", +"p.value","Est.1","Est.2")) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","df")) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) +sejk<-sqrt(w[j]+w[k]) +test[jcom,5]<-sejk +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) +test[jcom,6]<-df +psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) +psihat[jcom,7]=xbar[j] +psihat[jcom,8]=xbar[k] +if(!KB){ +if(CC>28)flag=FALSE +if(flag){ +if(alpha==.05)crit<-smmcrit(df,CC) +if(alpha==.01)crit<-smmcrit01(df,CC) +} +if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED) +} +if(KB)crit<-sqrt((J-1)*(1+(J-2)/df)*qf(1-alpha,J-1,df)) +test[jcom,4]<-crit +psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk +psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x)){ +stop("The number of groups does not match the number of contrast coefficients.") +} +psihat<-matrix(0,ncol(con),5) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper", +"p.value")) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c("con.num","test","crit","se","df")) +df<-0 +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) +if(flag){ +if(alpha==.05)crit<-smmcrit(df,ncol(con)) +if(alpha==.01)crit<-smmcrit01(df,ncol(con)) +} +if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED) +test[d,3]<-crit +test[d,4]<-sejk +test[d,5]<-df +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) +} +} +if(pr){ +print("Note: confidence intervals are adjusted to control FWE") +print("But p-values are not adjusted to control FWE") +print('Adjusted p-values can be computed with the R function p.adjust') +} +list(n=sam,test=test,psihat=psihat) +} + +lincon.pool<-function(x,con=0,tr=.2,alpha=.05,POOL=FALSE){ +# +# Same as lincon but with a pooling option that is used when +# dealing with main effects in a two-way and three-way designs +# +# See, for example, the function twowayA.poolB +# + +if(tr==.5)stop('Use the R function medpb to compare medians') +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +if(sum(con^2)>0){ +if(POOL){ +ic=0 +y=list() +nc=ncol(con) +nc2=nc*2 +Ncon=matrix(0,nrow=nc2,ncol=nc) +for(k in 1:nc){ +id1=which(con[,k]==1) +id2=which(con[,k]==-1) +ic=ic+1 +print(ic) +Ncon[ic,k]=1 +y[[ic]]=pool.a.list(x[id1]) +ic=ic+1 +Ncon[ic,k]=-1 +y[[ic]]=pool.a.list(x[id2]) +} +res=lincon(y,con=Ncon,tr=tr) +print(Ncon) +}} +if(!POOL)res=lincon(x,con=con,tr=tr,alpha=alpha) +res +} + + + +poireg<-function(x,y,xout=FALSE,outfun=outpro,plotit=FALSE,xlab="X",ylab="Y", +varfun=var,YHAT=FALSE,STAND=TRUE,...){ +# +# Perform Poisson regression. +# The predictors are assumed to be stored in the n by p matrix x. +# The y values are typically count data (integers). +# +# xout=T will remove outliers from among the x values and then fit +# the regression line. +# Default: +# One predictor, a mad-median rule is used. +# With more than one, projection method is used. +# +# outfun=out will use MVE method +# +xy=elimna(cbind(x,y)) +x<-as.matrix(x) +x=xy[,1:ncol(x)] +y=xy[,ncol(xy)] +x<-as.matrix(x) +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +temp=glm(formula=y~x,family=poisson) +init=summary(temp) +yhat=temp$coef[1] +for(j in 1:ncol(x)){ +j1=j+1 +yhat=yhat+temp$coef[j1]*x[,j] +} +yhat=exp(yhat) +if(plotit){ +x=as.matrix(x) +if(ncol(x)>1)stop("Cannot plot with more than one predictor") +plot(x,y,xlab=xlab,ylab=ylab) +#points(x,yhat,pch=".") +xord=order(x) +lines(x[xord],yhat[xord]) +init$coef +} +ex=varfun(yhat)/varfun(y) +str=sqrt(ex) +hatv=NULL +if(YHAT)hatv=yhat +list(results=init,Explanatory.Power=ex,Strength.Assoc=str,yhat=hatv) +} + + +smcorcom<-function(x1,y1,x2,y2,nboot=200,pts=NA,plotit=TRUE, +SEED=TRUE,varfun=pbvar,xout=TRUE,outfun=out,...){ +# +# Compare strength of association of pairs of variables associated with +# two independent group. +# The strength of the association is based on Cleveland's LOWESS +# smoother coupled with a robust analog of explanatory power. +# +# The method generalizes the goal of compared the usual +# coefficient of determination associated with two independent groups. +# +# Assume data are in x1 y1 x2 and y2 +# +# Reject at the .05 level if the reported p-value is less than or +# equal to p.crit, which is returned by the function. +# +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +} +m<-elimna(cbind(x2,y2)) +x2<-m[,1] +y2<-m[,2] +if(xout){ +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +if(SEED)set.seed(2) +estmat1=NA +estmat2=NA +data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) +# +for(ib in 1:nboot){ +estmat1[ib]=lplot(x1[data1[ib,]],y1[data1[ib,]],plotit=FALSE, +varfun=varfun)$Explanatory.power +estmat2[ib]=lplot(x2[data2[ib,]],y2[data2[ib,]], +varfun=varfun,plotit=FALSE)$Explanatory.power +} +dif<-(estmat11){ +for(p in 1:ncol(x)){ +temp[p]<-tsp1reg(x[,p],y,OPT=OPT,tr=tr)$coef[2] +} +res<-y-x%*%temp +if(!HD)alpha<-median(res) +if(HD)alpha<-hd(res,tr=tr) +r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) +tempold<-temp +for(it in 1:iter){ +for(p in 1:ncol(x)){ +r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] +temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE,OPT=OPT,tr=tr)$coef[2] +} +if(!HD)alpha<-median(y-x%*%temp) +if(HD)alpha<-hd(y-x%*%temp,tr=tr) +tempold<-temp +} +coef<-c(alpha,temp) +res<-y-x%*%temp-alpha +} +yhat<-y-res +stre=e.pow=NULL +if(do.stre){ +temp=varfun(y) +if(temp==0){ +if(WARN)print("Warning: When computing strength of association, measure of variation=0") +} +e.pow=NULL +if(temp>0){ +e.pow<-varfun(yhat)/varfun(y) +if(!is.na(e.pow)){ +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +e.pow=as.numeric(e.pow) +stre=sqrt(e.pow) +}}} +if(plotit){ +if(ncol(x)==1){ +plot(x,y,xlab=xlab,ylab=ylab) +abline(coef) +}} +list(n=n,n.keep=n.keep, +coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow) +} + +lplotv2<-function(x,y,span=.75,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,plotit=TRUE, +expand=.5,low.span=2/3,varfun=pbvar,cor.op=FALSE,cor.fun=pbcor,ADJ=FALSE,nboot=20, +scale=TRUE,xlab="X",ylab="Y",zlab="",theta=50,phi=25,family="gaussian", +duplicate="error",pr=TRUE,SEED=TRUE,ticktype="simple"){ +# +# Plot regression surface using LOESS +# +# low.span is the span when lowess is used and there is one predictor +# span is the span when loess is used with two or more predictors +# pyhat=T will return Y hat values +# eout=T will eliminate outliers +# xout=T will eliminate points where X is an outliers +# family="gaussian"; see the description of the built-in function loess +# +# duplicate="error" +# In some situations where duplicate values occur, when plotting with +# two predictors, it is necessary to set duplicate="strip" +# +st.adj=NULL +e.adj=NULL +if(ADJ){ +if(SEED)set.seed(2) +} +si=1 +library(stats) +x<-as.matrix(x) +if(!is.matrix(x))stop("x is not a matrix") +d<-ncol(x) +if(d>=2){ +library(akima) +if(ncol(x)==2 && !scale){ +if(pr){ +print("scale=F is specified.") +print("If there is dependence, might use scale=T") +}} +m<-elimna(cbind(x,y)) +x<-m[,1:d] +y<-m[,d+1] +if(eout && xout)stop("Can't have both eout and xout = F") +if(eout){ +flag<-outfun(m,plotit=FALSE)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +} +x<-m[,1:d] +y<-m[,d+1] +if(d==2)fitr<-fitted(loess(y~x[,1]*x[,2],span=span,family=family)) +if(d==3)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3],span=span,family=family)) +if(d==4)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3]*x[,4],span=span,family=family)) +if(d>4)stop("Can have at most four predictors") +last<-fitr +if(d==2 && plotit){ +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fitr<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype) +}} +if(d==1){ +m<-elimna(cbind(x,y)) +x<-m[,1:d] +y<-m[,d+1] +if(eout && xout)stop("Can't have both eout and xout = F") +if(eout){ +flag<-outfun(m)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(x)$keep +m<-m[flag,] +} +x<-m[,1:d] +y<-m[,d+1] +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab) +lines(lowess(x,y,f=low.span)) +} +yyy<-lowess(x,y)$y +xxx<-lowess(x,y)$x +if(d==1){ +ordx=order(xxx) +yord=yyy[ordx] +flag=NA +for (i in 2:length(yyy))flag[i-1]=sign(yord[i]-yord[i-1]) +if(sum(flag)<0)si=-1 +} +last<-yyy +chkit<-sum(duplicated(x)) +if(chkit>0){ +last<-rep(1,length(y)) +for(j in 1:length(yyy)){ +for(i in 1:length(y)){ +if(x[i]==xxx[j])last[i]<-yyy[j] +}} +} +} +E.power<-1 +if(!cor.op)E.power<-varfun(last[!is.na(last)])/varfun(y) +if(cor.op || E.power>=1){ +if(d==1){ +xord<-order(x) +E.power<-cor.fun(last,y[xord])$cor^2 +} +if(d>1)E.power<-cor.fun(last,y)$cor^2 +} +if(ADJ){ +x=as.matrix(x) +val=NA +n=length(y) +data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(i in 1:nboot){ +temp=lplot.sub(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE) +val[i]=temp$Explanatory.power +} +vindt=median(val) +v2indt=median(sqrt(val)) +st.adj=(sqrt(E.power)-max(c(0,v2indt)))/(1-max(c(0,v2indt))) +e.adj=(E.power-max(c(0,vindt)))/(1-max(c(0,vindt))) +st.adj=max(c(0,st.adj)) +e.adj=max(c(0,e.adj)) +} +if(!pyhat)last <- NULL +list(Strength.Assoc=si*sqrt(E.power),Explanatory.power=E.power, +Strength.Adj=st.adj,Explanatory.Adj=e.adj,yhat.values=last) +} +yuendna<-function(x,y=NULL,tr=.2,alpha=.05){ +# +# Compare the trimmed means of two dependent random variables +# using the data in x and y. +# The default amount of trimming is 20% +# +# If y is not supplied, this function assumes x is a matrix with 2 columns. +# +# pairs of observations, for which one value is missing, are NOT deleted. +# Marginal trimmed means are compared +# using all available data. +# +if(is.null(y)){ +if(!is.matrix(x))stop("y is null and x is not a matrix") +y=x[,2] +x=x[,1] +} +if(length(x)!=length(y))stop("The number of observations must be equal") +m<-cbind(x,y) +# first eliminate any rows with both values missing. +flag=(apply(is.na(m),1,sum)==2) +m=m[!flag,] +x<-m[,1] +y<-m[,2] +flagx=is.na(y) # Indicates observed x values for which y is missing +flagy=is.na(x) # Indicates the y values for which x is missing +m<-elimna(m) # m has data where both values are available--no missing values +n=nrow(m) +n1=sum(flagx) # number of x values for which y is missing +n2=sum(flagy) +h=n-2*floor(tr*n) +h1=n1-2*floor(tr*n1) +h2=n2-2*floor(tr*n2) +xbarn=mean(x,tr=tr,na.rm=TRUE) +xbarn1=0 +if(h1>0)xbarn1=mean(x[flagx],tr=tr) +ybarn=mean(y[!flagy],tr=tr,na.rm=TRUE) +ybarn1=0 +if(h2>0)ybarn1=mean(y[flagy],tr=tr) +lam1=h/(h+h1) +lam2=h/(h+h2) +est=lam1*xbarn-lam2*ybarn+(1-lam1)*xbarn1-(1-lam2)*ybarn1 +sex=trimse(elimna(x),tr=tr) +sey=trimse(elimna(y),tr=tr) +q1<-(n-1)*winvar(m[,1],tr) +q2<-(n-1)*winvar(m[,2],tr) +q3<-(n-1)*wincor(m[,1],m[,2],tr)$cov +sen=sqrt((lam1^2*q1+lam2^2*q2-2*lam1*lam2*q3)/(h*(h-1))) +SE=sqrt(sen^2+(1-lam1)^2*sex^2+(1-lam2)^2*sey^2) +test=est/SE +list(estimate=est,test=test,se=SE) +} + +rm2miss<-function(x,y=NULL,tr=0,nboot=1000,alpha=.05,SEED=TRUE){ +# +# Compare the marginal trimmed means of two dependent groups +# using a bootstrap t method that allows missing values +# +# If y is not supplied, this function assumes x is a matrix with 2 columns. +# +# NOTE: This function can fail if there are too many missing values +# get the error: incorrect number of dimensions +# +# +if(SEED)set.seed(2) +if(is.null(y)){ +if(!is.matrix(x))stop("y is null and x is not a matrix") +} +if(!is.null(y))x=cbind(x,y) +if(ncol(x)!=2) +print("warning: x has more than one column; columns 1 and 2 are used") +n=nrow(x) +test=yuendna(x,tr=tr) +cen=x +cen[,1]=cen[,1]-mean(x[,1],na.rm=TRUE,tr=tr) +cen[,2]=cen[,2]-mean(x[,2],na.rm=TRUE,tr=tr) +data=matrix(sample(n,n*nboot,replace=TRUE),ncol=nboot) +tval=apply(data,2,FUN=rm2miss.sub,x=cen,tr=tr) +tval=sort(abs(tval)) +icrit<-floor((1-alpha)*nboot+.5) +ci=test$est-tval[icrit]*test$se +ci[2]=test$est+tval[icrit]*test$se +pv=mean(abs(test$test)<=abs(tval)) +list(est.dif=test$est,ci=ci,p.value=pv) +} +rm2miss.sub<-function(data,x,tr){ +n=nrow(x) +m=x[data,] +ans=yuendna(m,tr=tr)$test +ans +} +ydbt<-function(x,y,tr=.2,alpha=.05,nboot=599,side=TRUE,plotit=FALSE,op=1,SEED=TRUE){ +# +# Using the bootstrap-t method, +# compute a .95 confidence interval for the difference between +# the marginal trimmed means of paired data. +# By default, 20% trimming is used with B=599 bootstrap samples. +# +# side=F returns equal-tailed ci +# side=T returns symmetric ci. +# +side<-as.logical(side) +if(length(x)!=length(y))stop("Must have equal sample sizes.") +m<-cbind(x,y) +m<-elimna(m) +x<-m[,1] +y<-m[,2] +if(sum(c(!is.na(x),!is.na(y)))!=(length(x)+length(y)))stop("Missing values are not allowed.") +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +e1=mean(x,tr) +e2=mean(y,tr) +xcen<-x-mean(x,tr) +ycen<-y-mean(y,tr) +bvec<-apply(data,1,tsub,xcen,ycen,tr) +# bvec is a 1 by nboot matrix containing the bootstrap test statistics. +dotest=yuend(x,y,tr=tr) +estse<-dotest$se +p.value=NULL +dif<-mean(x,tr)-mean(y,tr) +if(!side){ +ilow<-round((alpha/2)*nboot) +ihi<-nboot-ilow +bsort<-sort(bvec) +ci<-0 +ci[1]<-dif-bsort[ihi]*estse +ci[2]<-dif-bsort[ilow+1]*estse +} +if(side){ +bsort<-sort(abs(bvec)) +ic<-round((1-alpha)*nboot) +ci<-0 +ci[1]<-dif-bsort[ic]*estse +ci[2]<-dif+bsort[ic]*estse +p.value<-(sum(abs(dotest$teststat)<=abs(bvec)))/nboot +} +if(plotit){ +if(op==1)akerd(bsort) +if(op==2)rdplot(bsort) +if(op==3)boxplot(bsort) +} +list(ci=ci,Est.1=e1,Est.2=e2,dif=dif,p.value=p.value) +} + + +rmrvar<-function(x,y=NA,alpha=.05,con=0,est=pbvar,plotit=FALSE,grp=NA, +hoch=TRUE,nboot=NA,xlab="Group 1",ylab="Group 2",pr=TRUE,SEED=TRUE,...){ +# +# Use a percentile bootstrap method to compare dependent groups. +# based on some robust measure of variation indicated by the argument +# est +# By default, est=pbvar, the percentage bend midvariance. +# +# The function computes a .95 confidence interval for all linear contrasts +# specified by con, a J by C matrix, where C is the number of +# contrasts to be tested, and the columns of con are the +# contrast coefficients. +# If con is not specified, all pairwise comparisons are done. +# +# nboot is the bootstrap sample size. If not specified, a value will +# be chosen depending on the number of contrasts there are. +# +# x can be an n by J matrix or it can have list mode +# for two groups, data for second group can be put in y +# otherwise, assume x is a matrix (n by J) or has list mode. +# +# Hochberg's sequentially rejective method is used to control alpha. +# +if(!is.na(y[1]))x=cbind(x,y) +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matl(x) +} +if(is.matrix(x) && is.matrix(con)){ +if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the +number of groups.") +mat<-x +} +if(is.matrix(x))mat<-x +if(!is.na(sum(grp)))mat<-mat[,grp] +mat<-elimna(mat) # Remove rows with missing values. +x<-mat +J<-ncol(mat) +Jm<-J-1 +if(sum(con^2)==0){ +d<-(J^2-J)/2 +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +d<-ncol(con) +if(is.na(nboot)){ +if(d<=4)nboot<-1000 +if(d>4)nboot<-5000 +} +n<-nrow(mat) +crit.vec<-alpha/c(1:d) +connum<-ncol(con) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +xbars<-apply(mat,2,est) +psidat<-NA +for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) +psihat<-matrix(0,connum,nboot) +bvec<-matrix(NA,ncol=J,nrow=nboot) +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec[ib,]<-apply(x[data[ib,],],2,est,...) +} +# +# Now have an nboot by J matrix of bootstrap values. +# +test<-1 +bias<-NA +for (ic in 1:connum){ +psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) +test[ic]<-sum((psihat[ic,]>0))/nboot +test[ic]<-min(test[ic],1-test[ic]) +} +test<-2*test +ncon<-ncol(con) +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +#if(hoch)dvec<-alpha/(2* c(1:ncon)) +#dvec<-2*dvec +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvecba<-dvec +dvec[1]<-alpha/2 +} +if(hoch)dvec<-alpha/(c(1:ncon)) +if(plotit && ncol(bvec)==2){ +z<-c(0,0) +one<-c(1,1) +plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") +points(bvec) +totv<-apply(x,2,est,...) +cmat<-var(bvec) +dis<-mahalanobis(bvec,totv,cmat) +temp.dis<-order(dis) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +abline(0,1) +} +temp2<-order(0-test) +ncon<-ncol(con) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","est.var","p.value","crit.p.value", +"ci.lower","ci.upper")) +tmeans<-apply(mat,2,est,...) +psi<-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +output[temp2,4]<-zvec +temp<-sort(psihat[ic,]) +icl<-round(output[ic,4]*nboot/2)+1 +icu<-nboot-(icl-1) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +bprm<-function(x,y=NULL,grp=NA){ +# +# Perform Brunner-Puri within groups rank-based ANOVA +# +# x can be a matrix with columns corresponding to groups +# or it can have list mode. +# +# For computational details, see Brunner, B., Domhof, S. and Langer, F. (2002, +# section 7.2.2, Nonparametric Analysis of Longitudinal Data in +# Factorial Designs) +# +if(is.list(x))x<-matl(x) +if(!is.null(y[1]))x=cbind(x,y) +x<-elimna(x) +if(is.na(grp[1]))grp <- c(1:ncol(x)) +if(!is.matrix(x))stop("Data are not stored in a matrix or in list mode.") +K<-length(grp) # The number of groups. +Jb<-matrix(1,K,K) +Ib<-diag(1,K) +Pb<-Ib-Jb/K +y<-matrix(rank(x),ncol=ncol(x)) #ranks of pooled data +ybar<-apply(y,2,mean) # average of ranks +N<-ncol(x)*nrow(x) +vhat<-var(y)/N^2 +test<-nrow(x)*sum((ybar-(N+1)/2)^2)/N^2 +trval<-sum(diag(Pb%*%vhat)) +test<-test/trval # See Brunner, Domhof and Langer, p. 98, eq. 7.12 +nu1<-trval^2/sum(diag(Pb%*%vhat%*%Pb%*%vhat)) +sig.level<-1-pf(test,nu1,1000000) +list(test.stat=test,nu1=nu1,p.value=sig.level) +} + + + +effectg.sub<-function(x,y,locfun=tmean,varfun=winvarN,...){ +# +# Compute a robust-heteroscedastic measure of effect size +# based on the measure of location indicated by the argument +# locfun, and the measure of scatter indicated by +# varfun. +# +# This subfunction is for the equal sample size case and is called by +# effectg when sample sizes are not equal. +# +# varfun defaults to winvarN, the Winsorized variance rescaled so that +# it estimates the population variance under normality. +# +library(MASS) +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +m1=locfun(x,...) +m2=locfun(y,...) +top=var(c(m1,m2)) +pts=c(x,y) +# +bot=varfun(pts,...) +# +e.pow=top/bot +list(Var.Explained=e.pow,Effect.Size=sqrt(e.pow)) +} + + +effectg<-function(x,y,locfun=tmean,varfun=winvarN,nboot=100,SEED=TRUE,...){ +# +# Compute a robust heteroscedastic measure of effect size +# (explanatory power) based on the measures of location and scale +# indicated by the arguments locfun and varfun, respectively +# +library(MASS) +if(SEED)set.seed(2) +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +n1=length(x) +n2=length(y) +if(n1==n2){ +temp=effectg.sub(x,y,locfun=locfun,varfun=varfun,...) +e.pow=temp$Var.Explained +} +if(n1!=n2){ +N=min(c(n1,n2)) +vals=0 +for(i in 1:nboot)vals[i]=effectg.sub(sample(x,N),sample(y,N), +locfun=locfun,varfun=varfun,...)$Var.Explained +e.pow=mean(vals) +} +list(Explanatory.power=e.pow,Effect.Size=sqrt(e.pow)) +} + + +winvarN<-function(x,tr=.2){ +# +# rescale the Winsorized variance so that it equals one for the standard +# normal distribution +# +x=elimna(x) +library(MASS) +cterm=NULL +if(tr==0)cterm=1 +if(tr==0.1)cterm=0.6786546 +if(tr==0.2)cterm=0.4120867 +if(is.null(cterm))cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr +bot=winvar(x,tr=tr)/cterm +bot +} +covloc<-function(x){ +# +# Return mean and covarinace matrix +# +loc=apply(x,2,mean) +mcov=cov(x) +list(center=loc,cov=mcov) +} +g2plotdifxy<-function(x,y,xlab="Difference",ylab=""){ +# +# Plot an estimate of the distribution of X-Y +# +x<-x[!is.na(x)] +y<-y[!is.na(y)] +m<-as.vector(outer(x,y,FUN="-")) +akerd(m,xlab=xlab,ylab=ylab) +} +sumplot2g<-function(x,y=NULL,xlab="X",ylab="",eblabx="Groups",eblaby="",nse=2){ +# +# create four plots useful when comparing two groups +# 1. error bars +# 2. boxplots +# 3. kernel density estimates +# 4 shift function +# +if(!is.null(y)){ +xy=list() +xy[[1]]=x +xy[[2]]=y +} +if(is.null(y)){ +if(is.matrix(x))xy=matl(x) +} +par(mfrow=c(2,2)) +par(oma=c(4,0,0,0)) +ebarplot(xy,xlab=eblabx,ylab=eblaby,nse=nse) +boxplot(xy) +g2plot(xy[[1]],xy[[2]]) +sband(xy[[1]],xy[[2]]) +par(mfrow=c(1,1)) +} + +yuenv2<-function(x,y=NULL,tr=.2,alpha=.05,plotit=FALSE,plotfun=splot,op=TRUE, VL=TRUE,cor.op=FALSE, loc.fun=median, +xlab="Groups",ylab="",PB=FALSE,nboot=100, SEED=TRUE){ +# +# Perform Yuen's test for trimmed means on the data in x and y. +# The default amount of trimming is 20% +# Missing values (values stored as NA) are automatically removed. +# +# A confidence interval for the trimmed mean of x minus the +# the trimmed mean of y is computed and returned in yuen$ci. +# The significance level is returned in yuen$p.value +# +# For an omnibus test with more than two independent groups, +# use t1way. +# +# Unlike the function yuen, a robust heteroscedastic measure +# of effect size is returned. +# PB=FALSE means that a Winsorized variation of prediction error is used to measure effect size. +# PB=TRUE: A percentage bend measure of variation is used instead. +# +if(tr==.5)stop("Use medpb to compare medians.") +if(tr>.5)stop("Can't have tr>.5") +if(is.null(y)){ +if(is.matrix(x) || is.data.frame(x)){ +y=x[,2] +x=x[,1] +} +if(is.list(x)){ +y=x[[2]] +x=x[[1]] +} +} +library(MASS) +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +n1=length(x) +n2=length(y) +h1<-length(x)-2*floor(tr*length(x)) +h2<-length(y)-2*floor(tr*length(y)) +q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) +q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) +df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) +crit<-qt(1-alpha/2,df) +m1=mean(x,tr) +m2=mean(y,tr) +mbar=(m1+m2)/2 +dif=m1-m2 +low<-dif-crit*sqrt(q1+q2) +up<-dif+crit*sqrt(q1+q2) +test<-abs(dif/sqrt(q1+q2)) +yuen<-2*(1-pt(test,df)) +xx=c(rep(1,length(x)),rep(2,length(y))) +if(h1==h2){ +pts=c(x,y) +top=var(c(m1,m2)) +# +if(!PB){ +if(tr==0)cterm=1 +if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr +bot=winvar(pts,tr=tr)/cterm +e.pow=top/bot +if(!is.na(e.pow)){ +if(e.pow>1){ +x0=c(rep(1,length(x)),rep(2,length(y))) +y0=c(x,y) +e.pow=wincor(x0,y0,tr=tr)$cor^2 +} +} +} +# +if(PB){ +bot=pbvar(pts) +e.pow=top/bot +} +# +} +if(n1!=n2){ +N=min(c(n1,n2)) +vals=0 +if(SEED)set.seed(2) +for(i in 1:nboot)vals[i]=yuen.effect(sample(x,N),sample(y,N),tr=tr)$Var.Explained +e.pow=loc.fun(vals) +} +if(plotit){ +plot(xx,pts,xlab=xlab,ylab=ylab) +if(op) +points(c(1,2),c(m1,m2)) +if(VL)lines(c(1,2),c(m1,m2)) +} +list(ci=c(low,up),n1=n1,n2=n2, +p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test, +crit=crit,df=df,Var.Explained=e.pow,Effect.Size=sqrt(e.pow)) +} + +yuen.effect.ci<-function(x,y,SEED=TRUE,nboot=400,tr=.2,alpha=.05){ +# +# Compute a 1-alpha confidence interval +# for a robust, heteroscedastic measure of effect size +# The absolute value of the measure of effect size is used. +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +x=elimna(x) +y=elimna(y) +bvec=0 +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot) +for(i in 1:nboot){ +bvec[i]=yuenv2(datax[i,],datay[i,],tr=tr,SEED=FALSE)$Effect.Size +} +bvec<-sort(abs(bvec)) +crit<-alpha/2 +icl<-round(crit*nboot)+1 +icu<-nboot-icl +ci<-NA +ci[1]<-bvec[icl] +pchk=yuen(x,y,tr=tr)$p.value +if(pchk>alpha)ci[1]=0 +ci[2]<-bvec[icu] +if(ci[1]<0)ci[1]=0 +es=abs(yuenv2(x,y,tr=tr)$Effect.Size) +list(CI=ci,Effect.Size=es) +} + +interplot<-function(J,K,x,locfun=mean,locvec=NULL,na.rm=TRUE, +g1lev=NULL,g2lev=NULL,type = c("l", + "p", "b"), xlab = "Fac 1", ylab = "means",trace.label="Fac 2",...){ +if(is.null(locvec))locvec=lloc(x,est=locfun,na.rm=na.rm) +if(is.list(locvec))locvec=as.vector(matl(locvec)) +if(is.null(g1lev[1])){ +g1=c(rep(1,K)) +for(j in 2:J)g1=c(g1,rep(j,K)) +} +if(!is.null(g1lev)){ +g1=c(rep(g1lev[1],K)) +for(j in 2:J)g1=c(g1,rep(g1lev[j],K)) +} +g1=as.factor(g1) +if(is.null(g2lev[1]))g2=as.factor(rep(c(1:K),J)) +if(!is.null(g2lev[1]))g2=as.factor(rep(g2lev,J)) +g2=as.factor(g2) +interaction.plot(g1,g2,locvec, xlab = xlab, ylab = ylab, +trace.label=trace.label) +} + + + +pbad2way<-function(J,K,x,est=tmean,conall=TRUE,alpha=.05,nboot=2000,grp=NA, +op=FALSE,pro.dis=TRUE,MM=FALSE,pr=TRUE,...){ +# +# This function is like the function pbadepth, +# only it is assumed that main effects and interactions for a +# two-way design are to be tested. +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # + if(pr){ + print('As of June, 2022, the default measure of location is tmean, a 20% trimmed mean') + print('The default for measuring depth is a projection method rather than Mahalanobis distance') + } + JK <- J * K + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp[1])) { + yy <- x + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") + for(j in 1:JK) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] + } + # + # Create the three contrast matrices + # + if(!conall){ + ij <- matrix(c(rep(1, J)), 1, J) + ik <- matrix(c(rep(1, K)), 1, K) + jm1 <- J - 1 + cj <- diag(1, jm1, J) + for(i in 1:jm1) + cj[i, i + 1] <- 0 - 1 + km1 <- K - 1 + ck <- diag(1, km1, K) + for(i in 1:km1) + ck[i, i + 1] <- 0 - 1 + conA <- t(kron(cj, ik)) + conB <- t(kron(ij, ck)) + conAB <- t(kron(cj, ck)) + conAB <- t(kron(abs(cj), ck)) +} +if(conall){ +temp<-con2way(J,K) +conA<-temp$conA +conB<-temp$conB +conAB<-temp$conAB +} + ncon <- max(nrow(conA), nrow(conB), nrow(conAB)) + if(JK != length(x)) + warning("The number of groups does not match the number of contrast coefficients.") +if(!is.na(grp[1])){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +mvec<-NA +for(j in 1:JK){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +bvec<-matrix(NA,nrow=JK,ncol=nboot) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +for(j in 1:JK){ +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # J by nboot matrix, jth row contains +# bootstrapped estimates for jth group +} +bconA<-t(conA)%*%bvec #C by nboot matrix +tvecA<-t(conA)%*%mvec +tvecA<-tvecA[,1] +tempcenA<-apply(bconA,1,mean) +veczA<-rep(0,ncol(conA)) +bconA<-t(bconA) +smatA<-var(bconA-tempcenA+tvecA) +bconA<-rbind(bconA,veczA) +if(!pro.dis){ +if(!op)dv<-mahalanobis(bconA,tvecA,smatA) +if(op){ +dv<-out(bconA)$dis +}} +if(pro.dis)dv=pdis(bconA,MM=MM) +bplus<-nboot+1 +sig.levelA<-1-sum(dv[bplus]>=dv[1:nboot])/nboot +bconB<-t(conB)%*%bvec #C by nboot matrix +tvecB<-t(conB)%*%mvec +tvecB<-tvecB[,1] +tempcenB<-apply(bconB,1,mean) +veczB<-rep(0,ncol(conB)) +bconB<-t(bconB) +smatB<-var(bconB-tempcenB+tvecB) +bconB<-rbind(bconB,veczB) +if(!pro.dis){ +if(!op)dv<-mahalanobis(bconB,tvecB,smatB) +if(op){ +dv<-out(bconA)$dis +}} +if(pro.dis)dv=pdis(bconB,MM=MM) +sig.levelB<-1-sum(dv[bplus]>=dv[1:nboot])/nboot +bconAB<-t(conAB)%*%bvec #C by nboot matrix +tvecAB<-t(conAB)%*%mvec +tvecAB<-tvecAB[,1] +tempcenAB<-apply(bconAB,1,mean) +veczAB<-rep(0,ncol(conAB)) +bconAB<-t(bconAB) +smatAB<-var(bconAB-tempcenAB+tvecAB) +bconAB<-rbind(bconAB,veczAB) +if(!pro.dis){ +if(!op)dv<-mahalanobis(bconAB,tvecAB,smatAB) +if(op){ +dv<-out(bconAB)$dis +}} +if(pro.dis)dv=pdis(bconAB,MM=MM) +sig.levelAB<-1-sum(dv[bplus]>=dv[1:nboot])/nboot +list(sig.levelA=sig.levelA,sig.levelB=sig.levelB,sig.levelAB=sig.levelAB,conA=conA,conB=conB,conAB=conAB) + +} + + + + +t2way.no.p<-function(J,K,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K){ +# Perform a J by K (two-way) anova on trimmed means where +# all jk groups are independent. +# +# The R variable x is assumed to contain the raw +# data stored in list mode. +# If grp is unspecified, it is assumed x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second factor: level 1,2 +# x[[j+1]] is the data for level 2,1, etc. +# If the data are in wrong order, grp can be used to rearrange the +# groups. For example, for a two by two design, grp<-c(2,4,3,1) +# indicates that the second group corresponds to level 1,1; +# group 4 corresponds to level 1,2; group 3 is level 2,1; +# and group 1 is level 2,2. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that the input variable x has length JK, the total number of +# groups being tested. If not, a warning message is printed. +# +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data are not stored in a matrix or in list mode") +if(p!=length(x)){ +print("Warning: The number of groups in your data is not equal to JK") +} +for(j in 1:p)x[[j]]<-elimna(x[[j]]) +xbar<-0 +h<-0 +d<-0 +R<-0 +W<-0 +d<-0 +r<-0 +w<-0 +nuhat<-0 +omegahat<-0 +DROW<-0 +DCOL<-0 +xtil<-matrix(0,J,K) +aval<-matrix(0,J,K) +for (j in 1:p){ +xbar[j]<-mean(x[[grp[j]]],tr) +h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) +d[j]<-(length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr)/(h[j]*(h[j]-1)) +} +d<-matrix(d,J,K,byrow=TRUE) +xbar<-matrix(xbar,J,K,byrow=TRUE) +h<-matrix(h,J,K,byrow=TRUE) +for(j in 1:J){ +R[j]<-sum(xbar[j,]) +nuhat[j]<-(sum(d[j,]))^2/sum(d[j,]^2/(h[j,]-1)) +r[j]<-1/sum(d[j,]) +DROW[j]<-sum(1/d[j,]) +} +for(k in 1:K){ +W[k]<-sum(xbar[,k]) +omegahat[k]<-(sum(d[,k]))^2/sum(d[,k]^2/(h[,k]-1)) +w[k]<-1/sum(d[,k]) +DCOL[k]<-sum(1/d[,k]) +} +D<-1/d +for(j in 1:J){ +for(k in 1:K){ +xtil[j,k]<-sum(D[,k]*xbar[,k]/DCOL[k])+sum(D[j,]*xbar[j,]/DROW[j])- +sum(D*xbar/sum(D)) +aval[j,k]<-(1-D[j,k]*(1/sum(D[j,])+1/sum(D[,k])-1/sum(D)))^2/(h[j,k]-3) +} +} +Rhat<-sum(r*R)/sum(r) +What<-sum(w*W)/sum(w) +Ba<-sum((1-r/sum(r))^2/nuhat) +Bb<-sum((1-w/sum(w))^2/omegahat) +Va<-sum(r*(R-Rhat)^2)/((J-1)*(1+2*(J-2)*Ba/(J^2-1))) +Vb<-sum(w*(W-What)^2)/((K-1)*(1+2*(K-2)*Bb/(K^2-1))) +nu2<-(J^2-1)/(3*Ba) +sig.A<-1-pf(Va,J-1,nu2) +nu2<-(K^2-1)/(3*Bb) +sig.B<-1-pf(Vb,K-1,nu2) +# Next, do test for interactions +Vab<-sum(D*(xbar-xtil)^2) +dfinter<-(J-1)*(K-1) +crit<-qchisq(1-alpha,dfinter) +hc<-(crit/(2*dfinter))*(1+(3*crit)/(dfinter+2))*sum(aval) +adcrit<-crit+hc +list(Qa=Va,sig.A=sig.A,Qb=Vb,sig.B=sig.B,Qab=Vab,critinter=adcrit) +} + + +t2waybt<-function(J,K,x,tr=.2,grp=c(1:p),p=J*K,nboot=599,SEED=TRUE){ +# +# Two-way ANOVA based on trimmed means and a bootstrap-t method +# +# The data are assumed to be stored as described in the function t2way +# +# The default number of bootstrap samples is nboot=599 +# +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# compute test statistics: +tests=t2way.no.p(J=J,K=K,x,tr=tr,grp=grp) +TA=NULL +TB=NULL +TAB=NULL +data=list() +xcen=list() +for(j in 1:length(x))xcen[[j]]<-x[[j]]-mean(x[[j]],tr) +print("Taking bootstrap samples. Please wait.") +for(b in 1:nboot){ +for(j in 1:length(x))data[[j]]<-sample(xcen[[j]],size=length(x[[j]]),replace=TRUE) +bt=t2way.no.p(J,K,data,tr=tr,grp=grp) +TA[b]=bt$Qa +TB[b]=bt$Qb +TAB[b]=bt$Qab +} +pA<-sum(tests$Qa<=TA)/nboot +pB<-sum(tests$Qb<=TB)/nboot +pAB<-sum(tests$Qab<=TAB)/nboot +list(A.p.value=pA,B.p.value=pB,AB.p.value=pAB) +} + + +t3way<-function(J,K,L,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,MAT=FALSE, +lev.col=c(1:3),var.col=4,pr=TRUE,IV1=NULL,IV2=NULL,IV3=NULL){ +# Perform a J by K by L (three-way) anova on trimmed means where +# all JKL groups are independent. +# +# The R variable data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of all three factors: level 1,1,1. +# data][2]] is assumed to contain the data for level 1 of the +# first two factors and level 2 of the third factor: level 1,1,2 +# data[[L]] is the data for level 1,1,L +# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. +# data[[KL+1]] is level 2,1,1, etc. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JKL, the total number of +# groups being tested. +# +# MAT=T, assumes data are stored in matrix with 3 columns indicating +# levels of the three factors. +# That is, this function calls selby2 for you. +# +if(is.data.frame(x))x=as.matrix(x) +if(!is.null(IV1[1])){ +if(is.null(IV2[1]))stop("IV2 is NULL") +if(is.null(IV3[1]))stop("IV3 is NULL") +if(pr)print("Assuming x is a vector containing all of the data; the dependent variable") +xi=elimna(cbind(x,IV1,IV2,IV3)) +x=fac2list(xi[,1],xi[,2:4]) +J=length(unique(IV1)) +K=length(unique(IV2)) +L=length(unique(IV3)) +p=J*K*L +} +data=x +if(MAT){ +if(!is.matrix(data))stop("With MAT=T, data must be a matrix") +if(length(lev.col)!=3)stop("Argument lev.col should have 3 values") +temp=selby2(data,lev.col,var.col) +lev1=length(unique(temp$grpn[,1])) +lev2=length(unique(temp$grpn[,2])) +lev3=length(unique(temp$grpn[,3])) +gv=apply(temp$grpn,2,rank) +gvad=100*gv[,1]+10*gv[,2]+gv[,3] +grp=rank(gvad) +if(pr){ +print(paste("Factor 1 has", lev1, "levels")) +print(paste("Factor 2 has", lev2, "levels")) +print(paste("Factor 3 has", lev3, "levels")) +} +if(J!=lev1)warning("J is being reset to the number of levels found") +if(K!=lev2)warning("K is being reset to the number of levels found") +if(L!=lev3)warning("K is being reset to the number of levels found") +J=lev1 +K=lev2 +L=lev3 +data=temp$x +} +if(is.matrix(data))data=listm(data) +if(!is.list(data))stop("Data are not stored in list mode") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in data is") +print(length(data)) +print("Warning: These two values are not equal") +} +tmeans<-0 +h<-0 +v<-0 +for (i in 1:p){ +tmeans[i]<-mean(data[[grp[i]]],tr) +h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) +# h is the effective sample size +v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1)) +# v contains the squared standard errors +} +v<-diag(v,p,p) # Put squared standard errors in a diag matrix. +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +il<-matrix(c(rep(1,L)),1,L) +jm1<-J-1 +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +lm1<-L-1 +cl<-diag(1,lm1,L) +for (i in 1:lm1)cl[i,i+1]<-0-1 +alval<-c(1:999)/1000 +# Do test for factor A +cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A +Qa<-johan(cmat,tmeans,v,h,alpha) +A.p.value=t3pval(cmat,tmeans,v,h) +# Do test for factor B +cmat<-kron(ij,kron(ck,il)) # Contrast matrix for factor B +Qb<-johan(cmat,tmeans,v,h,alpha) +B.p.value=t3pval(cmat,tmeans,v,h) +# Do test for factor C +cmat<-kron(ij,kron(ik,cl)) # Contrast matrix for factor C +#Qc<-johan(cmat,tmeans,v,h,alpha) +for(i in 1:999){ +irem<-i +Qc<-johan(cmat,tmeans,v,h,alval[i]) +if(Qc$teststat>Qc$crit)break +} +C.p.value=irem/1000 +# Do test for factor A by B interaction +cmat<-kron(cj,kron(ck,il)) # Contrast matrix for factor A by B +for(i in 1:999){ +irem<-i +Qab<-johan(cmat,tmeans,v,h,alval[i]) +if(Qab$teststat>Qab$crit)break +} +AB.p.value=irem/1000 +# Do test for factor A by C interaction +cmat<-kron(cj,kron(ik,cl)) # Contrast matrix for factor A by C +for(i in 1:999){ +irem<-i +Qac<-johan(cmat,tmeans,v,h,alval[i]) +if(Qac$teststat>Qac$crit)break +} +AC.p.value=irem/1000 +#Qac<-johan(cmat,tmeans,v,h,alpha) +# Do test for factor B by C interaction +cmat<-kron(ij,kron(ck,cl)) # Contrast matrix for factor B by C +#Qbc<-johan(cmat,tmeans,v,h,alpha) +for(i in 1:999){ +irem<-i +Qbc<-johan(cmat,tmeans,v,h,alval[i]) +if(Qbc$teststat>Qbc$crit)break +} +BC.p.value=irem/1000 +# Do test for factor A by B by C interaction +cmat<-kron(cj,kron(ck,cl)) # Contrast matrix for factor A by B by C +#Qabc<-johan(cmat,tmeans,v,h,alpha) +for(i in 1:999){ +irem<-i +Qabc<-johan(cmat,tmeans,v,h,alval[i]) +if(Qabc$teststat>Qabc$crit)break +} +ABC.p.value=irem/1000 +list(Qa=Qa$teststat,Qa.crit=Qa$crit,A.p.value=A.p.value, +Qb=Qb$teststat,Qb.crit=Qb$crit, +B.p.value=B.p.value, +Qc=Qc$teststat,Qc.crit=Qc$crit,C.p.value=C.p.value, +Qab=Qab$teststat,Qab.crit=Qab$crit, +AB.p.value=AB.p.value, +Qac=Qac$teststat,Qac.crit=Qac$crit,AC.p.value=AC.p.value, +Qbc=Qbc$teststat,Qbc.crit=Qbc$crit, +BC.p.value=BC.p.value, +Qabc=Qabc$teststat,Qabc.crit=Qabc$crit,ABC.p.value=ABC.p.value) +} + +regciMC<- +function(x,y,regfun=tsreg,nboot=599,alpha=.05,plotit=FALSE,pr=FALSE, +null.val=NULL,method='hoch', +xlab='Predictor 1',ylab='Predictor 2',xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Compute a .95 confidence interval for each of the parameters of +# a linear regression equation. The default regression method is +# Theil-Sen estimator. +# +# When using the least squares estimator, and when n<250, use +# lsfitci instead. +# +# Same as the function regci, only a multi-core processor is used. +# +# The predictor values are assumed to be in the n by p matrix x. +# The default number of bootstrap samples is nboot=599 +# +# regfun can be any R function that returns the coefficients in +# the vector regfun$coef, the first element of which contains the +# estimated intercept, the second element contains the estimated of +# the first predictor, etc. +# +# plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based +# on the bootstrap samples. +# +library(parallel) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +nrem=length(y) +if(xout){ +if(pr)print('Default for argument outfun is now outpro') +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +estit=regfun(x,y,...)$coef +if(is.null(null.val))null.val=rep(0,p1) +flagF=FALSE +flagF=identical(regfun,tsreg) +if(flagF){ +if(pr){ +if(sum(duplicated(y)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') +}} +x=as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +bvec<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE,xout=FALSE,...) +bvec=matl(bvec) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +p1<-ncol(x)+1 +regci<-matrix(0,p1,6) +vlabs='Intercept' +for(j in 2:p1)vlabs[j]=paste('Slope',j-1) +dimnames(regci)<-list(vlabs,c('ci.low','ci.up','Estimate','S.E.','p-value','p.adj')) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +se<-NA +sig.level<-NA +for(i in 1:p1){ +#temp=(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot +temp=(sum(bvec[i,]1){ +if(is.na(center[1])){ +if(cop==1)center<-dmean(m,tr=.5,dop=dop) +if(cop==2)center<-cov.mcd(m,print=FALSE)$center +if(cop==3)center<-apply(m,2,median) +if(cop==4)center<-cov.mve(m,print=FALSE)$center +if(cop==5)center<-smean(m) +} +cenmat=matrix(rep(center,nrow(m)),ncol=ncol(m),byrow=TRUE) +Amat=m-cenmat +B=listm(t(Amat)) # so rows are now in B[[1]]...B[[n]] +dis=mclapply(B,outproMC.sub,Amat,mc.preschedule=TRUE) +if(!MM){ +dmat<-mclapply(dis,IQRstand,mc.preschedule=TRUE) +} +if(MM)dmat<-mclapply(dis,MADstand,mc.preschedule=TRUE) +pdis<-apply(matl(dmat),1,max,na.rm=TRUE) +} +pdis +} +IQRstand<-function(x){ +vals=idealf(x) +res=x/(vals$qu-vals$ql) +res +} +MADstand<-function(x){ +val=x/mad(x) +val +} +regtestMC<-function(x,y,regfun=tsreg,nboot=600,alpha=.05,plotit=TRUE, +grp=c(1:ncol(x)),nullvec=c(rep(0,length(grp))),xout=FALSE,outfun=outpro,SEED=TRUE,pr=TRUE,...){ +# +# Test the hypothesis that q of the p predictors are equal to +# some specified constants. By default, the hypothesis is that all +# p predictors have a coefficient equal to zero. +# The method is based on a confidence ellipsoid. +# The critical value is determined with the percentile bootstrap method +# in conjunction with Mahalanobis distance. +# +library(parallel) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +if(pr)print("Default for outfun is now outpro") +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x<-as.matrix(x) +if(length(grp)!=length(nullvec))stop("The arguments grp and nullvec must have the same length.") +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +# bvec<-apply(data,1,regboot,x,y,regfun) # A p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +bvec=mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE) # list mode bvec[[1]] +# contains estimate from first bootstrap sample, etc. +bvec=matl(bvec) +grp<-grp+1 +est<-regfun(x,y)$coef +estsub<-est[grp] +bsub<-t(bvec[grp,]) +if(length(grp)==1){ +m1<-sum((bvec[grp,]-est)^2)/(length(y)-1) +dis<-(bsub-estsub)^2/m1 +} +if(length(grp)>1){ +mvec<-apply(bsub,2,FUN=mean) +m1<-var(t(t(bsub)-mvec+estsub)) +dis<-mahalanobis(bsub,estsub,m1) +} +dis2<-order(dis) +dis<-sort(dis) +critn<-floor((1-alpha)*nboot) +crit<-dis[critn] +test<-mahalanobis(t(estsub),nullvec,m1) +sig.level<-1-sum(test>dis)/nboot +if(length(grp)==2 && plotit){ +plot(bsub,xlab="Parameter 1",ylab="Parameter 2") +points(nullvec[1],nullvec[2],pch=0) +xx<-bsub[dis2[1:critn],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +list(test=test,crit=crit,p.value=sig.level,nullvec=nullvec,est=estsub) +} + +pbadepth<-function(x,est=onestep,con=0,alpha=.05,nboot=2000,grp=NA,op=3,allp=TRUE, +MM=FALSE,MC=FALSE,cop=3,SEED=TRUE,na.rm=FALSE,...){ +# +# Test the hypothesis that C linear contrasts all have a value of zero. +# By default, an M-estimator is used +# +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode or in a matrix. +# If stored in list mode, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# If stored in a matrix, columns correspond to groups. +# +# By default, all pairwise differences are used, but contrasts +# can be specified with the argument con. +# The columns of con indicate the contrast coefficients. +# Con should have J rows, J=number of groups. +# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first +# two measures of location is +# equal to the sum of the second two, and (2) the difference between +# the first two is equal to the difference between the +# measures of location for groups 5 and 6. +# +# The default number of bootstrap samples is nboot=2000 +# +# op controls how depth is measured +# op=1, Mahalanobis +# op=2, Mahalanobis based on MCD covariance matrix +# op=3, Projection distance +# +# MC=TRUE, use a multicore processor when op=3 +# +# for arguments MM and cop, see pdis. +# +con<-as.matrix(con) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(grp)){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +mvec<-NA +nvec=NA +for(j in 1:J){ +temp<-x[[j]] +if(na.rm)temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +mvec[j]<-est(temp,...) +nvec[j]=length(temp) +} +Jm<-J-1 +d<-ifelse(con==0,(J^2-J)/2,ncol(con)) +if(sum(con^2)==0){ +if(allp){ +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +if(!allp){ +con<-matrix(0,J,Jm) +for (j in 1:Jm){ +jp<-j+1 +con[j,j]<-1 +con[jp,j]<-0-1 +}}} +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +#print(paste("Working on group ",j)) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,na.rm=na.rm,...) # J by nboot matrix, jth row contains +# bootstrapped estimates for jth group +} +chkna=sum(is.na(bvec)) +if(chkna>0){ +print("Bootstrap estimates of location could not be computed") +print("This can occur when using an M-estimator") +print("Might try est=tmean") +} +bcon<-t(con)%*%bvec #C by nboot matrix +tvec<-t(con)%*%mvec +tvec<-tvec[,1] +tempcen<-apply(bcon,1,mean) +vecz<-rep(0,ncol(con)) +bcon<-t(bcon) +smat<-var(bcon-tempcen+tvec) +temp<-bcon-tempcen+tvec +bcon<-rbind(bcon,vecz) +if(op==1)dv<-mahalanobis(bcon,tvec,smat) +if(op==2){ +smat<-cov.mcd(temp)$cov +dv<-mahalanobis(bcon,tvec,smat) +} +if(op==3){ +#print("Computing p-value. Might take a while with op=3") +if(!MC)dv<-pdis(bcon,MM=MM,cop=cop) +if(MC)dv<-pdisMC(bcon,MM=MM,cop=cop) +} +bplus<-nboot+1 +sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot +list(p.value=sig.level,psihat=tvec,con=con,n=nvec) +} + +outproMC.sub<-function(B,Amat){ +dis<-NA +bot<-sum(B^2) +Bmat=matrix(rep(B,nrow(Amat)),ncol=ncol(Amat),byrow=TRUE) +temp<-apply(Bmat*Amat,1,sum) +temp=matrix(rep(temp,ncol(Amat)),ncol=ncol(Amat)) +temp=temp*Bmat/bot +temp=temp^2 +dis=apply(temp,1,sum) +dis<-sqrt(dis) +flag=(dis==Inf) +dis[flag]=NA +dis +} +outproMC.sub2<-function(dis,MM,gval){ +temp<-idealf(dis) +if(!MM)cu<-median(dis)+gval*(temp$qu-temp$ql) +if(MM)cu<-median(dis)+gval*mad(dis) +outid<-NA +temp2<-(dis> cu) +flag<-rep(0,length(dis)) +flag[temp2]<-1 +flag +} +bdm2way<-function(J,K,x,grp=c(1:p),p=J*K){ +# +# Perform the Brunner, Dette, Munk rank-based ANOVA +# (JASA, 1997, 92, 1494--1502) +# for a J by K independent groups design. +# +# x can be a matrix with columns corresponding to groups +# or it can have list mode. +# +if(is.matrix(x))x<-listm(x) +xx<-list() +for(j in 1:p)xx[[j]]<-x[[grp[j]]] +Ja<-matrix(1,J,J) +Ia<-diag(1,J) +Pa<-Ia-Ja/J +Jb<-matrix(1,K,K) +Ib<-diag(1,K) +Pb<-Ib-Jb/K +cona<-kron(Pa,Jb/K) +conb<-kron(Ja/J,Pb) +conab<-kron(Pa,Pb) +outA<-bdms1(xx,cona) +releff=matrix(outA$q.hat,nrow=J,ncol=K,byrow=TRUE) +outB<-bdms1(xx,conb) +outAB<-bdms1(xx,conab) +# Could report degrees of freedom, but they are meaningless in terms of understanding the data. +list(p.valueA=outA$p.value,p.valueB=outB$p.value, p.valueAB=outAB$p.value, +Relative.Effects=releff,A.F=outA$F,B.F=outB$F,AB.F=outAB$F) +} +mregdepth<-function(X,RES){ +X=as.matrix(X) +XRES=elimna(cbind(X,RES)) +p=ncol(X) +p1=p+1 +vals=NA +for(j in 1:p)vals[j]=resdepth(XRES[,j],XRES[,p1]) +mdepthappr=min(vals) +mdepthappr +} + + +lband<-function(x,y=NULL,alpha=.05,plotit=TRUE,sm=TRUE,op=1,ylab='delta',CI=TRUE, +xlab='x (first group)'){ +# +# Compute a confidence band for the shift function. +# Assuming two dependent groups are being compared +# +# See Lombard (2005, Technometrics, 47, 364-369) +# +# if y=NA, assume x is a matrix with two columns or it has list mode +# +# If plotit=TRUE, a plot of the shift function is created, assuming that +# the graphics window has already been activated. +# +# sm=T, plot of shift function is smoothed using: +# expected frequency curve if op!=1 +# otherwise use S+ function lowess is used. +# +# This function removes all missing observations. +# +# When plotting, the median of x is marked with a + and the two +# quartiles are marked with o. +# +if(!is.null(y[1]))x<-cbind(x,y) +if(is.list(x))x=matl(x) +if(ncol(x)!=2)stop('Should have two groups only') +m<-elimna(x) +y<-m[,2] +x<-m[,1] +n<-length(x) +crit<-nelderv2(m,1,lband.fun2,alpha=alpha) +plotit<-as.logical(plotit) +xsort<-sort(x) +ysort<-sort(y) +l<-0 +u<-0 +ysort[0]<-NA +ysort[n+1]<-NA +lsub<-c(1:n)-floor(sqrt(2*n)*crit) +usub<-c(1:n)+floor(sqrt(2*n)*crit) +for(ivec in 1:n){ +isub<-max(0,lsub[ivec]) +l[ivec]<-NA +if(isub>0)l[ivec]<-ysort[isub]-xsort[ivec] +isub<-min(n+1,usub[ivec]) +u[ivec]<-NA +if(isub <= n)u[ivec]<-ysort[isub]-xsort[ivec] +} +num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)]) +qhat<-c(1:n)/n +m<-cbind(qhat,l,u) +dimnames(m)<-list(NULL,c('qhat','lower','upper')) +if(plotit){ +xsort<-sort(x) +ysort<-sort(y) +del<-0 +for (i in 1:n)del[i]<-ysort[i]-xsort[i] +xaxis<-c(xsort,xsort) +yaxis<-c(m[,1],m[,2]) +allx<-c(xsort,xsort,xsort) +ally<-c(del,m[,2],m[,3]) +temp2<-m[,2] +temp2<-temp2[!is.na(temp2)] +plot(allx,ally,type='n',ylab=ylab,xlab=xlab) +ik<-rep(F,length(xsort)) +if(sm){ +if(op==1){ +ik<-duplicated(xsort) +del<-lowess(xsort,del)$y +} +if(op!=1)del<-runmean(xsort,del,pyhat=TRUE) +} +lines(xsort[!ik],del[!ik]) +lines(xsort,m[,2],lty=2) +lines(xsort,m[,3],lty=2) +temp<-summary(x) +text(temp[3],min(temp2),'+') +text(temp[2],min(temp2),'o') +text(temp[5],min(temp2),'o') +} +id.sig.greater=NULL +id.sig.less.than=NULL +num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)]) +id.sig.greater=which(l>0) +id.sig.less.than=which(u<0) + +flag=is.na(m[,2]) +m[flag,2]=-Inf +flag=is.na(m[,3]) +m[flag,3]=Inf +q.greater=NULL +if(length(id.sig.greater)>0)q.greater=m[id.sig.greater,1] +q.less=NULL +if(length(id.sig.less.than)>0)q.less=m[id.sig.less.than,1] +if(!CI)m=NULL +list(m=m,crit=crit,numsig=num,q.sig.greater=q.greater,q.sig.less=q.less) +} + +cov.ogk<-function(x,y=NA,n.iter=1,sigmamu=taulc,v=gkcov,beta=.9,...){ +# +# Compute robust (weighted) covariance matrix in Maronna and Zamar +# (2002, Technometrics, eq. 7). +# +# n.iter number of iterations. 1 seems to be best +# sigmamu computes a robust measure of location and scale for +# data stored in a single vector. +# v robust correlation coefficient +# estloc, a robust measure of location +# +if(!is.na(y[1]))x<-cbind(x,y) +if(!is.matrix(x))stop("x should be a matrix") +x<-elimna(x) +n<-nrow(x) +p<-ncol(x) +val<-matrix(NA,p,p) +temp<-ogk(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...)$cov +temp +} +pbmcp<-function(x,alpha=.05,nboot=NA,grp=NA,est=onestep,con=0,bhop=FALSE, +SEED=TRUE,...){ +# +# Multiple comparisons for J independent groups. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to an M-estimator +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are allowed. +# +okay=FALSE +if(identical(est,onestep))okay=TRUE +if(identical(est,mom))okay=TRUE +if(!okay)stop('For estimators other than onestep and mom, use linconpb') +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +nmax=max(tempn) +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +if(nrow(con)!=J){ +stop("Something is wrong with con; the number of rows does not match the number of groups.") +} +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(!bhop){ +if(!identical(est,onestep))print('When est is not equal to onestep, suggest using bhop=TRUE') +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(nmax>=100)dvec[1]=.01 +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvec[1]<-alpha/2 +} +dvec<-2*dvec +} +if(nmax>80){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +#paste("Working on group ",j) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group +} +chkna=sum(is.na(bvec)) +if(chkna>0){ +print("Bootstrap estimates of location could not be computed") +print("This can occur when using an M-estimator") +print("Might try est=tmean") +} +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +test[d]<-(sum(bcon[d,]>0)+.5*sum(bcon[d,]==0))/nboot +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit", +"ci.lower","ci.upper")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-tvec[ic,] +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +pbmcpSR=pbmcp + +bmpmul<-function(x,alpha=.05){ +# +# Perform Brunner-Munzel method for all pairs of J independent groups. +# +# The familywise type I error probability is controlled by using +# a critical value from the Studentized maximum modulus distribution. +# +# The data are assumed to be stored in $x$ in list mode +# or in a matrix having J columns. +# +# Missing values are automatically removed. +# +# The default value for alpha is .05. Any other value results in using +# alpha=.01. +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +J<-length(x) +CC<-(J^2-J)/2 +test<-matrix(NA,CC,7) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +} +dimnames(test)<-list(NULL,c("Group","Group","P.hat","ci.lower","ci.upper","df","p.value")) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +temp<-bmp(x[[j]],x[[k]],alpha) +crit<-0-smmcrit(temp$df,CC) +if(alpha!=.05)crit<-0-smmcrit01(temp$df,CC) +temp<-bmp(x[[j]],x[[k]],crit=crit) +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-k +test[jcom,3]<-temp$phat +test[jcom,4]<-temp$ci.p[1] +test[jcom,5]<-temp$ci.p[2] +test[jcom,6]<-temp$df +test[jcom,7]<-temp$p.value +}}} +list(test=test) +} +outproadMC<-function(m,center=NA,plotit=TRUE,op=TRUE,MM=TRUE,cop=3, +xlab="VAR 1",ylab="VAR 2",rate=.05,iter=100,ip=6,pr=TRUE,SEED=TRUE){ +# +# Adjusts the critical value, gval used by outpro, +# so that the outside rate per observation, under normality +# is approximatley equal to the value given by the argument +# rate, which defaults to .05. +# That is, expected proportion of points declared outliers under normality +# is intended to be rate=.05 +# +# When dealing with p-variate data, p>9, this adjustment can be crucial +# +library(parallel) +m=elimna(m) +m=as.matrix(m) +n=nrow(m) +z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m))) +newq=0 +gtry=NA +for(itry in 1:ip){ +newq=newq+9/10^itry +gtry[itry]=newq +} +gtry=c(.95,.975,gtry[-1]) +if(pr)print("Computing adjustment") +val=NA +if(SEED)set.seed(2) +for(itry in 1:ip){ +for(i in 1:iter){ +temp=outproMC(z[i,,],gval = sqrt(qchisq(gtry[itry],ncol(m))), +center=center,plotit=FALSE,op=op,MM=MM,cop=cop)$out.id +val[i]=length(temp) +} +erate=mean(val)/n +if(erate0){ +if(nrow(con)!=length(x)){ +stop("The number of groups does not match the number of contrast coefficients.") +} +v1=nrow(con)-1 +psihat<-matrix(0,ncol(con),5) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper", +"p.value")) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c("con.num","test","crit","se","df")) +df<-0 +L=nrow(con) +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) +A=(L-1)*(1+(L-2)/df) +test[d,2]<-(sum(con[,d]*xbar)/(sqrt(A)*sejk))^2 +crit=qf(1-alpha,v1,df) +test[d,3]<-crit +test[d,4]<-sejk +test[d,5]<-df +psihat[d,3]<-psihat[d,2]-sqrt(crit*A)*sejk +psihat[d,4]<-psihat[d,2]+sqrt(crit*A)*sejk +psihat[d,5]<-1-pf(test[d,2],v1,df) +}} +# +if(pr){ +print("Note: confidence intervals are adjusted to control FWE") +print("But p-values are not adjusted to control FWE") +} +list(test=test,psihat=psihat) +} +smmvalv2<-function(dfvec,iter=20000,alpha=.05,SEED=TRUE){ +# +if(SEED)set.seed(1) +vals<-NA +tvals<-NA +J<-length(dfvec) +z=matrix(nrow=iter,ncol=J) +for(j in 1: J)z[,j]=abs(rt(iter,dfvec[j])) +vals=apply(z,1,max) +vals<-sort(vals) +ival<-round((1-alpha)*iter) +qval<-vals[ival] +qval +} +bwtrim<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K,MAT=FALSE,grpc=1,coln=c(2:3)){ +# Perform a J-by-K anova on trimmed means with +# repeated measures on the second factor. That is, a split-plot design +# is assumed, with the first factor consisting of independent groups. +# +# If the data are stored in a matrix or data frame, it is converted to list mode. +# Once in list mode, +# data[[1]] contains the data +# for the first level of both factors: level 1,1. +# data[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# data[[K]] is the data for level 1,K +# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +# If the between groups are denoted by groups numbers stored in a column +# of dat, you can set MAT=T, which will store the data in the format +# expected by this function +# +# Example, grpc=1 means group id numbers are in col 1. +# coln=c(3:6) means the within variables are stored in col 3-6. +# +# Or you can use the function selbybw to sort the data. +# +if(is.data.frame(data))data=as.matrix(data) +if(MAT) +data=selbybw(data,grpc=grpc,coln=coln)$x +x<-data + if(is.matrix(x) || is.data.frame(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + data <- y + } +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in data is") +print(length(data)) +print("Warning: These two values are not equal") +} +if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") +tmeans<-0 +h<-0 +v<-matrix(0,p,p) +klow<-1-K +kup<-0 +for (i in 1:p)tmeans[i]<-mean(data[[grp[i]]],tr,na.rm=TRUE) +for (j in 1:J){ +h[j]<-length(data[[grp[j]]])-2*floor(tr*length(data[[grp[j]]])) +# h is the effective sample size for the jth level of factor A +# Use covmtrim to determine blocks of squared standard errors and +# covariances. +klow<-klow+K +kup<-kup+K +sel<-c(klow:kup) +v[sel,sel]<-covmtrim(data[grp[klow:kup]],tr) +} +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +jm1<-J-1 +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +# Do test for factor A +cmat<-kron(cj,ik) # Contrast matrix for factor A +Qa<-johansp(cmat,tmeans,v,h,J,K) +# Do test for factor B +cmat<-kron(ij,ck) # Contrast matrix for factor B +Qb<-johansp(cmat,tmeans,v,h,J,K) +# Do test for factor A by B interaction +cmat<-kron(cj,ck) # Contrast matrix for factor A by B +Qab<-johansp(cmat,tmeans,v,h,J,K) +list(Qa=Qa$teststat,Qa.p.value=Qa$p.value, +Qb=Qb$teststat,Qb.p.value=Qb$p.value, +Qab=Qab$teststat,Qab.p.value=Qab$p.value) +} + + +rmmest<-function(x,y=NA,alpha=.05,con=0,est=onestep,plotit=TRUE,dif=FALSE,grp=NA, +hoch=FALSE,nboot=NA,BA=TRUE,xlab="Group 1",ylab="Group 2",pr=TRUE,...){ +# +# Use a percentile bootstrap method to compare dependent groups. +# By default, +# compute a .95 confidence interval for all linear contasts +# specified by con, a J by C matrix, where C is the number of +# contrasts to be tested, and the columns of con are the +# contrast coefficients. +# If con is not specified, all pairwise comparisons are done. +# +# By default, a one-step M-estimator is used +# and a sequentially rejective method +# is used to control the probability of at least one Type I error. +# +# dif=T indicates that difference scores are to be used +# dif=F indicates that measure of location associated with +# marginal distributions are used instead. +# +# nboot is the bootstrap sample size. If not specified, a value will +# be chosen depending on the number of contrasts there are. +# +# x can be an n by J matrix or it can have list mode +# for two groups, data for second group can be put in y +# otherwise, assume x is a matrix (n by J) or has list mode. +# +# A sequentially rejective method is used to control alpha. +# +# Argument BA: When using dif=F, BA=T uses a correction term +# that is recommended when using MOM. +# +if(dif){ +if(pr)print("dif=T, so analysis is done on difference scores") +temp<-rmmcppbd(x,y=y,alpha=.05,con=con,est,plotit=plotit,grp=grp, +nboot=nboot,hoch=hoch,...) +output<-temp$output +con<-temp$con +} +if(!dif){ +if(pr)print("dif=F, so analysis is done on marginal distributions") +if(!is.na(y[1]))x<-cbind(x,y) +if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or +in list mode.") +if(is.list(x)){ +if(is.matrix(con)){ +if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the +number of groups.") +}} +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matl(x) +} +if(is.matrix(x) && is.matrix(con)){ +if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the +number of groups.") +mat<-x +} +n=nrow(x) +if(is.matrix(x))mat<-x +if(!is.na(sum(grp)))mat<-mat[,grp] +mat<-elimna(mat) # Remove rows with missing values. +x<-mat +J<-ncol(mat) +xcen<-x +for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j]) +Jm<-J-1 +if(sum(con^2)==0){ +d<-(J^2-J)/2 +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +d<-ncol(con) +if(is.na(nboot)){ +if(d<=4)nboot<-1000 +if(d>4)nboot<-5000 +} +n<-nrow(mat) +crit.vec<-alpha/c(1:d) +connum<-ncol(con) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +xbars<-apply(mat,2,est) +psidat<-NA +for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) +psihat<-matrix(0,connum,nboot) +psihatcen<-matrix(0,connum,nboot) +bvec<-matrix(NA,ncol=J,nrow=nboot) +bveccen<-matrix(NA,ncol=J,nrow=nboot) +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec[ib,]<-apply(x[data[ib,],],2,est,...) +bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...) +} +# +# Now have an nboot by J matrix of bootstrap values. +# +test<-1 +bias<-NA +for (ic in 1:connum){ +psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) +psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic]) +bias[ic]<-sum((psihatcen[ic,]>0))/nboot-.5 +if(BA){ +test[ic]<-sum((psihat[ic,]>0))/nboot-.1*bias[ic] +if(test[ic]<0)test[ic]<-0 +} +if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot +test[ic]<-min(test[ic],1-test[ic]) +} +test<-2*test +ncon<-ncol(con) +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvecba<-dvec +dvec[1]<-alpha/2 +} +if(n>=80)hoch=T +if(hoch)dvec<-alpha/(c(1:ncon)) +if(plotit && ncol(bvec)==2){ +z<-c(0,0) +one<-c(1,1) +plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") +points(bvec) +totv<-apply(x,2,est,...) +cmat<-var(bvec) +dis<-mahalanobis(bvec,totv,cmat) +temp.dis<-order(dis) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +abline(0,1) +} +temp2<-order(0-test) +ncon<-ncol(con) +zvec<-dvec[1:ncon] +if(BA)zvec<-dvecba[1:ncon] +sigvec<-(test[temp2]>=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","sig.level","crit.sig", +"ci.lower","ci.upper")) +tmeans<-apply(mat,2,est,...) +psi<-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +output[temp2,4]<-zvec +temp<-sort(psihat[ic,]) +icl<-round(output[ic,4]*nboot/2)+1 +icu<-nboot-(icl-1) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +lindep<-function(x,con,cmat,alpha=.05,tr=.2){ +# +# Compute a test statistic based on the +# linear contrast coefficients in con and the covariance matrix +# cmat. +# +# The data are assumed to be stored in x in list mode +# or a matrix with columns correpsonding to groups. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# d=number of linear contrasts +# +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-length(x) +w<-vector("numeric",J) +xbar<-vector("numeric",J) +for(j in 1:J){ +xbar[j]<-mean(x[[j]],tr=tr,na.rm=TRUE) +} +ncon<-ncol(con) +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","se","test")) +w<-cmat +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +cvec<-as.matrix(con[,d]) +sejk<-sqrt(t(cvec)%*%w%*%cvec) +psihat[d,3]<-sejk +psihat[d,4]<-psihat[d,2]/sejk +} +list(test.stat=psihat) +} + +bwmcp<-function(J, K, x, tr = 0.2, JK = J * K, con = 0, + alpha = 0.05, grp =c(1:JK), nboot = 599, method='hoch',SEED = TRUE, ...) +{ + # + # A bootstrap-t for multiple comparisons among + # for all main effects and interactions. + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} + + conM = con2way(J, K) + p <- J * K + v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { + data[[j]] <- x[[grp[j]]] +xx[[j]]=x[[grp[j]]] # save input data + # Now have the groups in proper order. + data[[j]] = data[[j]] - mean(data[[j]], tr = tr,na.rm=TRUE) #centered data for bootstrapping + } +ilow=1-K +iup=0 +for(j in 1:J){ +ilow <- ilow + K + iup = iup + K +sel <- c(ilow:iup) +xx[sel]=listm(elimna(matl(xx[sel]))) + v[sel, sel] <- covmtrim(xx[sel], tr) + } +A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat +B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat +AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat + x <- data + jp <- 1 - K + kv <- 0 + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + # Next determine the n_j values + nvec <- NA + testA = NA + testB = NA + testAB = NA + bsam = list() + bdat = list() +aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA)) +bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB)) +abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB)) +# for(j in 1:J) +# nvec[j] = length(x[[j]]) + for(ib in 1:nboot) { + ilow <- 1 - K + iup = 0 + for(j in 1:J) { + ilow <- ilow + K + iup = iup + K +nv=length(xx[[ilow]]) +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in ilow:iup){ +# bsam[[k]] = xx[[k]][bdat[[j]]] +bsam[[k]] = data[[k]][bdat[[j]]] # Use centered data to determine critical value. +} + } +ilow=0-K +iup=0 +for(j in 1:J){ +ilow <- ilow + K + iup = iup + K +sel <- c(ilow:iup) + v[sel, sel] <- covmtrim(bsam[sel], tr) + } + +temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4]) +aboot[ib,]=temp +testA[ib] = max(temp) +temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4]) +bboot[ib,]=temp +testB[ib]= max(temp) +temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4]) +testAB[ib] = max(temp) +abboot[ib,]=temp + } +pbA=NA +pbB=NA +pbAB=NA +for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])length(x))stop('JKL is less than the Number of groups') +JK=J*K +KL=K*L + v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { + data[[j]] <- x[[grp[j]]] +xx[[j]]=x[[grp[j]]] # save input data + # Now have the groups in proper order. + data[[j]] = data[[j]] - mean(data[[j]], tr = tr) + } +ilow=1-KL +iup=0 +for(j in 1:J){ +ilow <- ilow + KL + iup = iup + KL +sel <- c(ilow:iup) +xx[sel]=listm(elimna(matl(xx[sel]))) + v[sel, sel] <- covmtrim(xx[sel], tr) + } +A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat +B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat +C=lindep(xx,conM$conC,cmat=v,tr=tr)$test.stat +AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat +AC=lindep(xx,conM$conAC,cmat=v,tr=tr)$test.stat +BC=lindep(xx,conM$conBC,cmat=v,tr=tr)$test.stat +ABC=lindep(xx,conM$conABC,cmat=v,tr=tr)$test.stat + x <- data + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + testA = NA + testB = NA +testC=NA + testAB = NA + testAC = NA + testBC = NA + testABC = NA + bsam = list() + bdat = list() +aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA)) +bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB)) +cboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conC)) +abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB)) +acboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAC)) +bcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conBC)) +abcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conABC)) + for(ib in 1:nboot) { + ilow <- 1 - KL + iup = 0 + for(j in 1:J) { + ilow <- ilow + KL + iup = iup + KL +nv=length(x[[ilow]]) + bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in ilow:iup){ + bsam[[k]] = x[[k]][bdat[[j]]] +} + } +ilow=1-KL +iup=0 +for(j in 1:J){ +ilow <- ilow + KL + iup = iup + KL +sel <- c(ilow:iup) + v[sel, sel] <- covmtrim(bsam[sel], tr) + } +temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4]) +aboot[ib,]=temp +testA[ib] = max(temp) +temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4]) +bboot[ib,]=temp +testB[ib]= max(temp) + +temp=abs(lindep(bsam,conM$conC,cmat=v,tr=tr)$test.stat[,4]) +cboot[ib,]=temp +testC[ib]= max(temp) + +temp=abs(lindep(bsam,conM$conAC,cmat=v,tr=tr)$test.stat[,4]) +acboot[ib,]=temp +testAC[ib]= max(temp) + +temp=abs(lindep(bsam,conM$conBC,cmat=v,tr=tr)$test.stat[,4]) +bcboot[ib,]=temp +testBC[ib]= max(temp) + +temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4]) +testAB[ib] = max(temp) +abboot[ib,]=temp + +temp=abs(lindep(bsam,conM$conABC,cmat=v,tr=tr)$test.stat[,4]) +abcboot[ib,]=temp +testABC[ib]= max(temp) + + } +pbA=NA +pbB=NA +pbC=NA +pbAB=NA +pbAC=NA +pbBC=NA +pbABC=NA +for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])length(x))stop("JKL is less than the Number of groups") +JK=J*K + v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { + data[[j]] <- x[[grp[j]]] +xx[[j]]=x[[grp[j]]] # save input data + # Now have the groups in proper order. + data[[j]] = data[[j]] - mean(data[[j]], tr = tr) + } +ilow=1-L +iup=0 +for(j in 1:JK){ +ilow <- ilow + L + iup = iup + L +sel <- c(ilow:iup) +xx[sel]=listm(elimna(matl(xx[sel]))) + v[sel, sel] <- covmtrim(xx[sel], tr) + } +A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat +B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat +C=lindep(xx,conM$conC,cmat=v,tr=tr)$test.stat +AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat +AC=lindep(xx,conM$conAC,cmat=v,tr=tr)$test.stat +BC=lindep(xx,conM$conBC,cmat=v,tr=tr)$test.stat +ABC=lindep(xx,conM$conABC,cmat=v,tr=tr)$test.stat + x <- data + jp <- 1 - K + kv <- 0 + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + testA = NA + testB = NA +testC=NA + testAB = NA + testAC = NA + testBC = NA + testABC = NA + bsam = list() + bdat = list() +aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA)) +bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB)) +cboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conC)) +abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB)) +acboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAC)) +bcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conBC)) +abcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conABC)) +# for(j in 1:JK) +# nvec[j] = length(x[[j]]) + for(ib in 1:nboot) { + ilow <- 1 - L + iup = 0 + for(j in 1:JK) { + ilow <- ilow + L + iup = iup + L +nv=length(x[[ilow]]) + bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in ilow:iup){ + bsam[[k]] = x[[k]][bdat[[j]]] +} + } +ilow=1-L +iup=0 +for(j in 1:JK){ +ilow <- ilow + L + iup = iup + L +sel <- c(ilow:iup) + v[sel, sel] <- covmtrim(bsam[sel], tr) + } +temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4]) +aboot[ib,]=temp +testA[ib] = max(temp) +temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4]) +bboot[ib,]=temp +testB[ib]= max(temp) + +temp=abs(lindep(bsam,conM$conC,cmat=v,tr=tr)$test.stat[,4]) +cboot[ib,]=temp +testC[ib]= max(temp) + +temp=abs(lindep(bsam,conM$conAC,cmat=v,tr=tr)$test.stat[,4]) +acboot[ib,]=temp +testAC[ib]= max(temp) + +temp=abs(lindep(bsam,conM$conBC,cmat=v,tr=tr)$test.stat[,4]) +bcboot[ib,]=temp +testBC[ib]= max(temp) + +temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4]) +testAB[ib] = max(temp) +abboot[ib,]=temp + +temp=abs(lindep(bsam,conM$conABC,cmat=v,tr=tr)$test.stat[,4]) +abcboot[ib,]=temp +testABC[ib]= max(temp) + + } +pbA=NA +pbB=NA +pbC=NA +pbAB=NA +pbAC=NA +pbBC=NA +pbABC=NA +for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])crit,1,0) +id<-vec[chk==1] +keep<-vec[chk==0] +if(is.matrix(x)){ +if(ncol(x)==2 && plotit){ +plot(x[,1],x[,2],xlab=xlab,ylab=ylab,type="n") +flag<-rep(TRUE,nrow(x)) +flag[id]<-FALSE +points(x[flag,1],x[flag,2]) +if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="*") +}} +if(!is.matrix(x))outval<-x[id] +if(is.matrix(x))outval<-x[id,] +n=nrow(as.matrix(x)) +n.out=length(id) +assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) +list(n=n,n.out=n.out,out.val=outval,out.id=id,keep=keep,dis=dis,crit=crit) +} + +lintestMC<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=FALSE,outfun=out,...){ +# +# Test the hypothesis that the regression surface is a plane. +# Stute et al. (1998, JASA, 93, 141-149). +# +if(identical(regfun,Qreg))print('When using Qreg, be sure to include res.vals=TRUE') +if(identical(regfun,tshdreg))print('When using tshdreg, be sure to include RES=TRUE') +if(identical(regfun,MMreg))print('When using MMreg, be sure to include RES=TRUE') +library(parallel) +set.seed(2) +x<-as.matrix(x) +d<-ncol(x) +temp<-elimna(cbind(x,y)) +x<-temp[,1:d] +x<-as.matrix(x) +y<-temp[,d+1] +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag,] +x<-as.matrix(x) +y<-y[flag] +} +mflag<-matrix(NA,nrow=length(y),ncol=length(y)) +for (j in 1:length(y)){ +for (k in 1:length(y)){ +mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x)) +} +} +reg<-regfun(x,y,...) +yhat<-y-reg$residuals +#print("Taking bootstrap sample, please wait.") +data<-matrix(runif(length(y)*nboot),nrow=nboot) +data<-sqrt(12)*(data-.5) # standardize the random numbers. +data=listm(t(data)) +#rvalb<-apply(data,1,lintests1,yhat,reg$residuals,mflag,x,regfun,...) +rvalb<-mclapply(data,lintests1,yhat,reg$residuals,mflag,x,regfun,mc.preschedule=TRUE,...) +# An n x nboot matrix of R values +rvalb=matl(rvalb) +rvalb<-rvalb/sqrt(length(y)) +dstatb<-apply(abs(rvalb),2,max) +wstatb<-apply(rvalb^2,2,mean) +# compute test statistic +v<-c(rep(1,length(y))) +rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...) +rval<-rval/sqrt(length(y)) +dstat<-max(abs(rval)) +wstat<-mean(rval^2) +ib<-round(nboot*(1-alpha)) +p.value.d<-1-sum(dstat>=dstatb)/nboot +p.value.w<-1-sum(wstat>=wstatb)/nboot +#critw<-wstatb[ib] +list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w) +} + + +yuen.effect<-function(x,y,tr=.2,alpha=.05,plotit=FALSE, +plotfun=splot,op=TRUE,VL=TRUE,cor.op=FALSE, +xlab="Groups",ylab="",PB=FALSE){ +# +# Same as yuen, only it computes explanatory power and the related +# measure of effect size. Only use this with n1=n2. Called by yuenv2 +# which allows n1!=n2. +# +# +# Perform Yuen's test for trimmed means on the data in x and y. +# The default amount of trimming is 20% +# Missing values (values stored as NA) are automatically removed. +# +# A confidence interval for the trimmed mean of x minus the +# the trimmed mean of y is computed and returned in yuen$ci. +# The p-valueis returned in yuen$p.value +# +# For an omnibus test with more than two independent groups, +# use t1way. +# This function uses winvar from chapter 2. +# +if(tr==.5)stop("Use medpb to compare medians.") +if(tr>.5)stop("Can't have tr>.5") +library(MASS) +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +h1<-length(x)-2*floor(tr*length(x)) +h2<-length(y)-2*floor(tr*length(y)) +q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) +q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) +df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) +crit<-qt(1-alpha/2,df) +m1=mean(x,tr) +m2=mean(y,tr) +mbar=(m1+m2)/2 +dif=m1-m2 +low<-dif-crit*sqrt(q1+q2) +up<-dif+crit*sqrt(q1+q2) +test<-abs(dif/sqrt(q1+q2)) +yuen<-2*(1-pt(test,df)) +xx=c(rep(1,length(x)),rep(2,length(y))) +pts=c(x,y) +top=var(c(m1,m2)) +# +if(!PB){ +if(tr==0)cterm=1 +if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr +bot=winvar(pts,tr=tr)/cterm +} +if(PB)bot=pbvar(pts)/1.06 +# +e.pow=top/bot +if(e.pow>1){ +x0=c(rep(1,length(x)),rep(2,length(y))) +y0=c(x,y) +e.pow=wincor(x0,y0,tr=tr)$cor^2 +} +if(plotit){ +plot(xx,pts,xlab=xlab,ylab=ylab) +if(op) +points(c(1,2),c(m1,m2)) +if(VL)lines(c(1,2),c(m1,m2)) +} +list(ci=c(low,up),p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test, +crit=crit,df=df,Var.Explained=e.pow,Effect.Size=sqrt(e.pow)) +} + + bbbmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0, + alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ +# +# between-by-between-by-between design +# + # + # A percentile bootstrap for + # multiple comparisons for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using and appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # +# + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JKL, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} +ncon=ncol(con) + p <- J*K*L +JKL=p +if(p>length(x))stop('JKL is less than the Number of groups') +JK=J*K +KL=K*L + data <- list() +xx=list() + for(j in 1:length(x)) { +xx[[j]]=x[[grp[j]]] # save input data +# # Now have the groups in proper order. + } +for(j in 1:p){ +xx[j]=elimna(xx[j]) +} + crit=alpha/2 + icl<-round(crit*nboot)+1 +icu<-nboot-icl + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + # Next determine the n_j values + testA = NA + bsam = list() + bdat = list() +aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) +tvec=NA +tvec=linhat(x,con,est=est,...) + for(ib in 1:nboot) { + for(j in 1:JKL) { +nv=length(x[[j]]) +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +bsam[[j]] = x[[j]][bdat[[j]]] +} +aboot[ib,]=linhat(bsam,con=con,est=est,...) +} +pbA=NA +for(j in 1:ncol(aboot)){ +pbA[j]=mean(aboot[,j]>0) +pbA[j]=2*min(c(pbA[j],1-pbA[j])) +} +outputA<-matrix(0,ncol(con),6) +dimnames(outputA)<-list(NULL,c('con.num','psihat','p.value','p.adjust', +'ci.lower','ci.upper')) +test=pbA +outputA[,2]<-tvec +for (ic in 1:ncol(con)){ +outputA[ic,1]<-ic +outputA[ic,3]<-test[ic] +temp<-sort(aboot[,ic]) +outputA[ic,5]<-temp[icl] +outputA[ic,6]<-temp[icu] +} +outputA[,4]=p.adjust(outputA[,3],method='hoch') +outputA +} + +bbbmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L, + alpha = 0.05, grp =c(1:JKL), nboot = 2000, bhop=FALSE,SEED = TRUE,...) +{ +# +# BETWEEN-BETWEEN-BETWEEN DESIGN +# + # A percentile bootstrap for multiple comparisons among + # multiple comparisons for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # +con=con3way(J,K,L) +A=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +B=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +C=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AB=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +BC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +ABC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) +} + +linhat<-function(x,con,est=tmean,...){ +# +# estimate all linear contrasts specified by con +# +psihat=0 +xbar=llocv2(x,est=est,...)$center +for(i in 1:ncol(con))psihat[i]=sum(con[,i]*xbar) +psihat +} + +bbwmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L, + alpha = 0.05, grp =c(1:JKL), nboot = 2000, bhop=FALSE,SEED = TRUE,...) +{ +# +# BETWEEN-BETWEEN-WITHIN DESIGN +# + # A percentile bootstrap for multiple comparisons + # for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of all three factors: level 1,1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first two factors and level 3 of the third: level 1,1,2 + # x[[K]] is the data for level 1,1,K + # x[[K+1]] is the data for level 1,2,1, x[[2K]] is level 1,2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JKL, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # +con=con3way(J,K,L) +A=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +B=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +C=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AB=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +BC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +ABC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) +} + +bbwmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0, + alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ + # + # A percentile bootstrap for multiple comparisons among + # multiple comparisons for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1,1. + # x[[2]] is assumed to contain the data for levels 1,1,2, etc. + # +# +# JK independent groups, L dependent groups +# + + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JKL, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} +ncon=ncol(con) + p <- J*K*L +if(p>length(x))stop("JKL is less than the Number of groups") +JK=J*K +KL=K*L + data <- list() +xx=list() + for(j in 1:length(x)) { +xx[[j]]=x[[grp[j]]] # save input data +# # Now have the groups in proper order. + } +ilow=1-L +iup=0 +for(j in 1:JK){ +ilow <- ilow + L + iup = iup + L +sel <- c(ilow:iup) +xx[sel]=listm(elimna(matl(xx[sel]))) +} + + jp <- 1 - L + kv <- 0 + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + # Next determine the n_j values + testA = NA + bsam = list() + bdat = list() +aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) +tvec=NA +x=xx +tvec=linhat(x,con,est=est,...) + for(ib in 1:nboot) { + ilow <- 1 - L + iup = 0 + for(j in 1:JK) { + ilow <- ilow + L + iup = iup + L +nv=length(x[[ilow]]) +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in ilow:iup){ + bsam[[k]] = x[[k]][bdat[[j]]] +} +} +ilow=0-L +iup=0 +aboot[ib,]=linhat(bsam,con=con,est=est,...) +} +pbA=NA +for(j in 1:ncol(aboot)){ +pbA[j]=mean(aboot[,j]>0) +pbA[j]=2*min(c(pbA[j],1-pbA[j])) +} +# Determine critical values +if(!bhop)dvec=alpha/c(1:ncol(con)) +if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con) +outputA<-matrix(0,ncol(con),6) +dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit", +"ci.lower","ci.upper")) +test=pbA +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +outputA[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +outputA[,2]<-tvec +for (ic in 1:ncol(con)){ +outputA[ic,1]<-ic +outputA[ic,3]<-test[ic] +temp<-sort(aboot[,ic]) +outputA[ic,5]<-temp[icl] +outputA[ic,6]<-temp[icu] +} +outputA +} + +bwwmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0, + alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ + # + # A percentile bootstrap for multiple comparisons + # for all main effects and interactions. + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # +# +# J independent groups, KL dependent groups +# + + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} +# nvec <- NA +#for(j in 1:length(x))nvec[j]=length(x[[j]]) +ncon=ncol(con) + p <- J*K*L +if(p>length(x))stop("JKL is less than the Number of groups") +JK=J*K +KL=K*L +# v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { +# data[[j]] <- x[[grp[j]]] +xx[[j]]=x[[grp[j]]] # save input data +# # Now have the groups in proper order. + } +ilow=1-KL +iup=0 +for(j in 1:J){ +ilow <- ilow + KL + iup = iup + KL +sel <- c(ilow:iup) +xx[sel]=listm(elimna(matl(xx[sel]))) +} + + jp <- 1 - KL + kv <- 0 + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + # Next determine the n_j values + testA = NA + bsam = list() + bdat = list() +aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) +tvec=NA +x=xx +tvec=linhat(x,con,est=est,...) + for(ib in 1:nboot) { + ilow <- 1 - KL + iup = 0 + for(j in 1:J) { + ilow <- ilow + KL + iup = iup + KL +nv=length(x[[ilow]]) +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in ilow:iup){ + bsam[[k]] = x[[k]][bdat[[j]]] +} +} +ilow=1-KL +iup=0 +aboot[ib,]=linhat(bsam,con=con,est=est,...) +} +pbA=NA +for(j in 1:ncol(aboot)){ +pbA[j]=mean(aboot[,j]>0) +pbA[j]=2*min(c(pbA[j],1-pbA[j])) +} +# Determine critical values +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncol(con) > 10){ +avec<-.05/c(11:(ncol(con))) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(con > 10){ +avec<-.01/c(11:ncol(con)) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncol(con)) +} +} +if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con) +outputA<-matrix(0,ncol(con),6) +dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit", +"ci.lower","ci.upper")) +test=pbA +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +outputA[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +outputA[,2]<-tvec +for (ic in 1:ncol(con)){ +outputA[ic,1]<-ic +outputA[ic,3]<-test[ic] +temp<-sort(aboot[,ic]) +outputA[ic,5]<-temp[icl] +outputA[ic,6]<-temp[icu] +} +outputA +} + +wwwmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0, + alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ + # + # A percentile bootstrap for multiple comparisons among + # multiple comparisons for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # +# +# within-by-within-by-within design +# +# JKL dependent groups +# + + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} +# nvec <- NA +#for(j in 1:length(x))nvec[j]=length(x[[j]]) +ncon=ncol(con) + p <- J*K*L +JKL=p +if(p>length(x))stop("JKL is less than the Number of groups") +JK=J*K +KL=K*L +# v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { +# data[[j]] <- x[[grp[j]]] +xx[[j]]=x[[grp[j]]] # save input data +# # Now have the groups in proper order. + } + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + # Next determine the n_j values + testA = NA + bsam = list() + bdat = list() +aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) +tvec=NA +x=xx +tvec=linhat(x,con,est=est,...) +nv=length(x[[1]]) + for(ib in 1:nboot) { +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in 1:JKL) bsam[[k]] = x[[k]][bdat[[j]]] +aboot[ib,]=linhat(bsam,con=con,est=est,...) +} +pbA=NA +for(j in 1:ncol(aboot)){ +pbA[j]=mean(aboot[,j]>0) +pbA[j]=2*min(c(pbA[j],1-pbA[j])) +} +# Determine critical values +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncol(con) > 10){ +avec<-.05/c(11:(ncol(con))) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(con > 10){ +avec<-.01/c(11:ncol(con)) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncol(con)) +} +} +if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con) +outputA<-matrix(0,ncol(con),6) +dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit", +"ci.lower","ci.upper")) +test=pbA +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +outputA[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +outputA[,2]<-tvec +for (ic in 1:ncol(con)){ +outputA[ic,1]<-ic +outputA[ic,3]<-test[ic] +temp<-sort(aboot[,ic]) +outputA[ic,5]<-temp[icl] +outputA[ic,6]<-temp[icu] +} +outputA +} + +wwwmcppb.OLD<-function(J, K,L, x, est=tmean,JKL = J * K*L, + alpha = 0.05, grp =c(1:JKL), nboot = 2000, bhop=FALSE,SEED = TRUE,...) +{ + # + # A percentile bootstrap for + # multiple comparisons for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # +con=con3way(J,K,L) +A=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +B=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +C=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AB=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +BC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +ABC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) +} + +wwwmcppb<-function(J,K,L,x, alpha = 0.05, con = 0,est=tmean, plotit = FALSE, + dif = TRUE, grp = NA, nboot = NA, BA = TRUE, hoch = TRUE, xlab = "Group 1", + ylab = "Group 2", pr = TRUE, SEED = TRUE,...){ +# +# Do all multiple comparisons for a within-by-within-by-within design. +# using a percentile bootstrap method and trimmed means +# +if(pr){ +print('This new version includes the option to use difference scores and defaults to dif=TRUE') +print('Number of bootstrap samples differs from the old version') +print('To use the old version, use wwwmcppb.OLD') +} +conM=con3way(J,K,L) +A=rmmcppb(x,con=conM$conA,alpha=alpha,dif=dif,plotit=plotit,est=est, +nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) +B=rmmcppb(x,con=conM$conB,alpha=alpha,dif=dif, +plotit=plotit,est=est,nboot=nboot,BA=BA,hoch=hoch, +SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) +C=rmmcppb(x,con=conM$conC,alpha=alpha,dif=dif, +plotit=plotit,est=est,nboot=nboot,BA=BA,hoch=hoch, +SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) +AB=rmmcppb(x,con=conM$conAB,alpha=alpha,dif=dif,plotit=plotit,est=est, +nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) +AC=rmmcppb(x,con=conM$conAC,alpha=alpha,dif=dif,plotit=plotit,est=est, +nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) +BC=rmmcppb(x,con=conM$conBC,alpha=alpha,dif=dif,plotit=plotit,est=est, +nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) +ABC=rmmcppb(x,con=conM$conABC,alpha=alpha,dif=dif,plotit=plotit,est=est, +nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=FALSE,...) +list(Factor_A=A,Factor_B=B,Factor_AB=AB,Factor_AC=AC,Factor_BC=BC,Factor_ABC=ABC) +} + + + +bwwmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L, + alpha = 0.05, grp =c(1:JKL), nboot = 2000, bhop=FALSE,SEED = TRUE,...) +{ + # + # A percentile bootstrap for multiple comparisons + # for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # +con=con3way(J,K,L) +A=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +B=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +C=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AB=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +BC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +ABC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) +} + + +cjMAT<-function(J){ +L=(J^2-J)/2 +cj=matrix(0,nrow=L,ncol=J) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j0)e.pow=top/bot +if(bot==0)e.pow=1 +if(e.pow>=1){ +v1=NULL +v2=NULL +for(j in 1:J){ +v1=c(v1,rep(xbar[j],length(x[[j]]))) +v2=c(v2,x[[j]]) +} +e.pow=wincor(v1,v2,tr=tr)$cor^2 +} +list(TEST=TEST,nu1=nu1,nu2=nu2,p.value=sig,Var.Explained=e.pow, +Effect.Size=sqrt(e.pow)) +} + +snmreg<-function(x,y,SEED=TRUE,xout=FALSE,outfun=outpro,initreg=MMreg,...){ +# +# Compute regression S-estimator via Nelder-Mead method +# The measure of scale is taken to be the percentage bend midvariance +# +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +x <- as.matrix(x) +X<-cbind(x,y) +X<-elimna(X) +np<-ncol(X) +npm1=np-1 +x=X[,1:npm1] +x=as.matrix(x) +y=X[,np] +N<-np-1 +#temp<-initreg(x,y,SEED=SEED)$coef +temp<-initreg(x,y)$coef +START<-temp[2:np] +temp<-nelder(X,N,FN=snmreg.sub,START=START) +alpha <- median(y - x %*% temp) +coef <- c(alpha,temp) +res <- y - x %*% temp - alpha +list(coef = coef, residuals = res) +} +snmregv2<-function(x,y,SEED=TRUE){ +# +# Compute regression S-estimator +# remove points for which residuals are outliers +# then recompute the estimated slopes and intercept +# +res=snmreg(x,y,SEED=SEED)$residuals +chk<-abs(res-median(res))/mad(res) +x=as.matrix(x) +xx<-x[chk<=2,] +yy<-y[chk<=2] +temp<-snmreg(xx,yy,SEED=SEED) +list(coef=temp$coef,residuals=temp$residuals) +} + + +larsR<-function(x,y,type="lasso"){ +library(lars) +p=ncol(x) +p1=p+1 +xy=elimna(cbind(x,y)) +result=lars(xy[,1:p],xy[,p1],type=type) +result +} + +regvarp<-function(x,y,p=1,locfun=lloc,scat=var,est=mean,cov.fun=cov.mba){ +# +# Measure the importance of each of p variables in a regression +# problem, p>1 +# +xy=cbind(x,y) +xy<-elimna(xy) +m<-ncol(x) +x=xy[,1:m] +n<-nrow(x) +m1=m+1 +y=xy[,m1] +x=standm(x,locfun=locfun,est=est,scat=scat) +vals=NA +if(p==1)for(j in 1:m){ +vals[j]=gvarg(cbind(y,x[,j]),cov.fun) +} +if(p>1){ +temp=modgen(m) +ic=0 +for(j in 1:length(temp)){ +if(length(temp[[j]])==p){ +ic=ic+1 +vals[ic]=gvarg(cbind(y,x[,temp[[j]]]),cov.fun) +z=cbind(y,x[,temp[[j]]]) +}}} +vals +} + +bwmcppb<-function(J, K, x, est=tmean,JK = J * K, + alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=TRUE,SEED = TRUE,...) +{ + # + # A percentile bootstrap for multiple comparisons + # for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + # bhop=TRUE, use Benjaminin--Hochberg. When using a one-step M-estimator + # and the sample sizes are small, say less than 80, bhop=TRUE is a bit better. + # +con=con2way(J,K) +A=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conA, + alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...) +B=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conB, + alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...) +AB=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conAB, + alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...) +list(Fac.A=A,Fac.B=B,Fac.AB=AB) +} + +bwmcppb.sub<-function(J, K, x, est=tmean, JK = J * K, con = 0,method='hoch', + alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=TRUE,SEED = TRUE, ...){ + # + # A percentile bootstrap for multiple comparisons + # for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} + nvec <- NA +for(j in 1:length(x))nvec[j]=length(x[[j]]) +nmax=max(nvec) +ncon=ncol(con) + p <- J * K + v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { +# data[[j]] <- x[[grp[j]]] +xx[[j]]=x[[grp[j]]] # save input data +# # Now have the groups in proper order. +# data[[j]] = data[[j]] - mean(data[[j]], tr = tr) + } +ilow=1-K +iup=0 +for(j in 1:J){ +ilow <- ilow + K + iup = iup + K +sel <- c(ilow:iup) +xx[sel]=listm(elimna(matl(xx[sel]))) + } + jp <- 1 - K + kv <- 0 + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + # Next determine the n_j values + testA = NA + bsam = list() + bdat = list() +aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) +tvec=NA +x=xx +tvec=linhat(x,con,est=est,...) + for(ib in 1:nboot) { + ilow <- 1 - K + iup = 0 + for(j in 1:J) { + ilow <- ilow + K + iup = iup + K +nv=length(x[[ilow]]) +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in ilow:iup){ + bsam[[k]] = x[[k]][bdat[[j]]] +} +} +ilow=1-K +iup=0 +aboot[ib,]=linhat(bsam,con=con,est=est,...) +} +pbA=NA +for(j in 1:ncol(aboot)){ +pbA[j]=mean(aboot[,j]>0) +pbA[j]=2*min(c(pbA[j],1-pbA[j])) +} +# Determine critical values +if(!bhop)dvec=alpha/c(1:ncol(con)) +if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con) +outputA<-matrix(0,ncol(con),6) +dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","adj.p.value", +"ci.lower","ci.upper")) +test=pbA +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +outputA[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +outputA[,2]<-tvec +for (ic in 1:ncol(con)){ +outputA[ic,1]<-ic +outputA[ic,3]<-test[ic] +temp<-sort(aboot[,ic]) +outputA[ic,5]<-temp[icl] +outputA[ic,6]<-temp[icu] +} +outputA[,4]=p.adjust(outputA[,3],method=method) +outputA +} + +D.akp.effect<-function(x,y=NULL,null.value=0,tr=.2){ +# +# Computes the robust effect size for one-sample case using +# a simple modification of +# Algina, Keselman, Penfield Pcyh Methods, 2005, 317-328 +# +# When comparing two dependent groups, data for the second group can be stored in +# the second argument y. The function then computes the difference scores x-y +# +library(MASS) +if(!is.null(y))x=x-y +x<-elimna(x) +s1sq=winvar(x,tr=tr) +cterm=1 +if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr +cterm=sqrt(cterm) +dval<-cterm*(tmean(x,tr=tr)-null.value)/sqrt(s1sq) +dval +} + + +smean2v2<-function(m1,m2,nullv=rep(0,ncol(m1)),cop=3,MM=FALSE,SEED=TRUE, +nboot=500,plotit=TRUE,MC=FALSE,STAND=TRUE){ +# +# m is an n by p matrix +# +# For two independent groups, +# test hypothesis that multivariate skipped estimators +# are all equal. +# +# The level of the test is .05. +# +# Skipped estimator is used, i.e., +# eliminate outliers using a projection method. +# That is, determine center of data using: +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 MVE +# +# For each point +# consider the line between it and the center, +# project all points onto this line, and +# check for outliers using +# +# MM=F, a boxplot rule. +# MM=T, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection is an outlier +# using a modification of the usual boxplot rule. +# +# Eliminate any outliers and compute means +# using remaining data. +# +if(ncol(m1) != ncol(m2)){ +stop("Number of variables in group 1 does not equal the number in group 2.") +} +if(SEED)set.seed(2) +m1<-elimna(m1) +m2<-elimna(m2) +n1<-nrow(m1) +n2<-nrow(m2) +n<-min(c(n1,n2)) +crit.level<-.05 +if(n<=120)crit.level<-.045 +if(n<=80)crit.level<-.04 +if(n<=60)crit.level<-.035 +if(n<=40)crit.level<-.03 +if(n<=30)crit.level<-.025 +if(n<=20)crit.level<-.02 +val<-matrix(NA,ncol=ncol(m1),nrow=nboot) +est1=smean(m1) +est2=smean(m2) +#est=smean(m1)-smean(m2) +est=est1-est2 +for(j in 1: nboot){ +data1<-sample(n1,size=n1,replace=TRUE) +data2<-sample(n2,size=n2,replace=TRUE) +mm1<-m1[data1,] +temp<-outpro(mm1,plotit=FALSE,cop=cop,STAND=STAND)$keep +v1<-apply(mm1[temp,],2,mean) +mm2<-m2[data2,] +temp<-outpro(mm2,plotit=FALSE,cop=cop,STAND=STAND)$keep +v2<-apply(mm2[temp,],2,mean) +val[j,]<-v1-v2 +} +if(!MC)temp<-pdis(rbind(val,nullv)) +if(MC)temp<-pdisMC(rbind(val,nullv)) +sig.level<-sum(temp[nboot+1]2){ +center1<-dmean(m1,tr=.5) +center2<-dmean(m2,tr=.5) +} +if(ncol(m1)==2){ +tempd<-NA +for(i in 1:nrow(m1)) +tempd[i]<-depth(m1[i,1],m1[i,2],m1) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center1<-m1[flag,] +if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) +for(i in 1:nrow(m2)) +tempd[i]<-depth(m2[i,1],m2[i,2],m2) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center2<-m2[flag,] +if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) +}} +if(cop==2){ +center1<-cov.mcd(m1)$center +center2<-cov.mcd(m2)$center +} +if(cop==3){ +center1<-apply(m1,2,median) +center2<-apply(m2,2,median) +} +if(cop==4){ +center1<-smean(m1) +center2<-smean(m2) +} +center<-(center1+center2)/2 +B<-center1-center2 +if(sum(center1^2)=crit05)regci[ic,4]<-1 +}}} +regci=data.frame(regci) +flag=(regci[,4]==0) +regci[flag,4]="fail to reject" +regci[!flag,4]="reject" +list(crit.value=crit05,est=est,results=regci) +} + + +mopreg<-function(x,y,regfun=tsreg,cop=3,KEEP=TRUE,MC=FALSE,STAND=TRUE){ +# +# Do multiple (outcomes) regression on points not labled outliers +# using projection-type outlier detection method +# Arg=regfun determines regression method; +# by default, Theil-Sen is used. +# +# KEEP=F, outliers will be eliminated +# KEEP=T, outliers are not eliminated +# cop: see function outpro +library(MASS) +if(MC)library(parallel) +x<-as.matrix(x) +y<-as.matrix(y) +px<-ncol(x) +py<-ncol(y) +m<-cbind(x,y) +m<-elimna(m) # eliminate any rows with missing data +if(KEEP)ivec<-c(1:nrow(x)) +if(!KEEP){ +if(!MC)ivec<-outpro(m,plotit=FALSE,cop=cop,STAND=STAND)$keep +if(MC)ivec<-outproMC(m,plotit=FALSE,cop=cop,STAND=STAND)$keep +} +np1<-ncol(x)+1 +vec<-rep(1,nrow(m)) +pxpy<-px+py +coef<-matrix(ncol=py,nrow=np1) +res<-matrix(ncol=py,nrow=nrow(m)) +for(i in 1:py){ +pv<-px+i +coef[,i]<-regfun(m[ivec,1:ncol(x)],m[ivec,pv])$coef +vec<-as.matrix(vec) +res[,i]<-m[,pv]-cbind(vec,m[,1:ncol(x)])%*%coef[,i] +} +list(coef=coef,residuals=res) +} +robpcaS<-function(x,pval=ncol(x),SCORES=FALSE,STAND=TRUE,est=tmean,varfun=winvar,SEED=TRUE){ +# +# An abbreviated form of robpca. +# +# compute eigen values to determine proportion of scatter. +# Goal is to see how many components are needed +# +# pval indicates the number of principal components. +# +x=elimna(x) +if(STAND)x=standm(x,est=est,scat=varfun) +v=robpca(x,pval=pval,pr=FALSE,plotit=FALSE,SEED=SEED) +cumsum(v$L/sum(v$L)) +val=matrix(NA,ncol=length(v$L),nrow=4) +scores=NULL +if(SCORES)scores=v$T +dimnames(val)=list(c("Number of Comp.","Robust Stand Dev","Proportion Robust var","Cum. Proportion"), +NULL) +val[1,]=c(1:length(v$L)) +val[2,]=sqrt(v$L) +val[3,]=v$L/sum(v$L) +val[4,]=cumsum(v$L/sum(v$L)) +list(summary=val,scores=scores) +} + + +Ppca<-function(x,p=ncol(x)-1,locfun=L1medcen,loc.val=NULL,SCORES=FALSE, +gvar.fun=cov.mba,pr=TRUE,SEED=TRUE,gcov=rmba,SCALE=TRUE,...){ +# +# Robust PCA aimed at finding scores that maximize a +# robust generalized variance given the goal of reducing data from +# m dimensions to +# p, which defaults to m-1 +# +# locfun, location used to center design space. +# by default, use the spatial median +# alternatives are mcd, tauloc, ... +# +# # data are centered based on measure of location indicated by +# locfun: default is spatial median. +# +# SCALE=T means the marginal distributions are rescaled using the +# measure and scatter indicated by +# gcov, which defaults to median ball measure of location and variance +# +# Output: the projection matrix. If +# SCORES=T, the projected scores are returned. +# +x=as.matrix(x) +x<-elimna(x) +n<-nrow(x) +m<-ncol(x) +xdat=c(n,m,p,as.vector(x)) +if(!SCALE){ +if(is.null(loc.val))info<-locfun(x,...)$center +if(!is.null(loc.val))info<-loc.val +for(i in 1:n)x[i,]<-x[i,]-info +} +if(SCALE){ +ms=gcov(x) +for(i in 1:n)x[i,]<-x[i,]-ms$center +for(j in 1:m)x[,j]<-x[,j]/sqrt(ms$cov[j,j]) +} +vals<-NA +z<-matrix(nrow=n,ncol=p) +np=p*m +B=robpca(x,pval=p,plotit=FALSE,pr=pr,SEED=SEED,scree=FALSE)$P +B=t(B) +Bs=nelderv2(xdat,np,NMpca,START=B) +Bop=matrix(Bs,nrow=p,ncol=m) +Bop=t(ortho(t(Bop))) +z<-matrix(nrow=n,ncol=p) +zval<-NULL +for(i in 1:n)z[i,]<-Bop%*%as.matrix(x[i,]) +if(SCORES)zval<-z +val=gvarg(z) +list(B=Bop,gen.sd=sqrt(val),scores=zval) +} +Ppca.sum.sub<-function(j,x,SCALE=TRUE){ +# +res=Ppca(x,p=j,pr=FALSE,SCALE=SCALE)$gen.sd +res +} +Ppca.summary<-function(x,MC=FALSE,SCALE=TRUE,p=NULL){ +# +# x is assumed to be a matrix with p columns +# Using robust principal components (Ppca) +# compute generalized variance for each dimension reduction +# from 1 to p. +# +# report values plus proportion relative to largest value found +# +x=as.matrix(x) +if(!is.matrix(x))stop("x should be a matrix") +x=elimna(x) +gv=NA +if(is.null(p))p=ncol(x) +if(!MC)for(j in 1:p)gv[j]=Ppca(x,p=j,pr=FALSE,SCALE=SCALE)$gen.sd +if(MC){ +library(parallel) +y=list() +for(j in 1:p)y[[j]]=j +gv=mclapply(y,Ppca.sum.sub,x,SCALE=SCALE,mc.preschedule=TRUE) +gv=as.vector(matl(gv)) +} +res=matrix(NA,nrow=3,ncol=p) +res[1,]=c(1:p) +res[2,]=gv +res[3,]=gv/max(gv) +dimnames(res)=list(c("Num. of Comp.","Gen.Stand.Dev","Relative Size"),NULL) +list(summary=res) +} + +mdepreg.coef<-function(x,y,xout=FALSE,outfun=out,...){ +# +# multiple depth regression +# +X<-cbind(x,y) +X<-elimna(X) +p1=ncol(X) +p=p1-1 +if(xout){ +flag<-outfun(X[,1:p],plotit=FALSE,...)$keep +X<-X[flag,] +} +library(mrfDepth) +a=rdepthmedian(X)$deep +list(coef=a) +} + +mdepreg<-function(x,y,xout=FALSE,outfun=out,RES=FALSE,...){ +# +# multiple depth regression +# +X<-cbind(x,y) +X<-elimna(X) +n=nrow(X) +p1=ncol(X) +p=p1-1 +if(xout){ +flag<-outfun(X[,1:p],plotit=FALSE,...)$keep +X<-X[flag,] +} +n.keep=nrow(X) +library(mrfDepth) +a=rdepthmedian(X)$deepest +res=NA +if(RES)res=X[,p1]-X[,1:p]%*%a[2:p1]-a[1] +list(n=n,n.keep=n.keep,coef=a,residuals=res) +} + +# OLD CODE use rdepthmedian in package mdrDepth in new version: +mdepreg.orig<-function(x,y,xout=FALSE,outfun=outpro){ +# +# multiple depth regression +# +X<-cbind(x,y) +X<-elimna(X) +np=n.keep=ncol(X) +p=np-1 +if(xout){ +id=outfun(X[,1:p],plotit=FALSE)$keep +X=X[id,] +n.keep=nrow(X) +} +if(np==2){ +temp=depreg(X[,1],X[,2]) +coef=temp$coef +res=temp$residuals +} +if(np>2){ +N<-np-1 +x=X[,1:N] +y=X[,np] +START<-tsreg(x,y)$coef +coef<-nelderv2(X,np,FN=mdepreg.sub,START=START) +x <- as.matrix(x) +res <- y - x %*% coef[2:np] - coef[1] +} +list(n=n,n.keep=n.keep,coef = coef, residuals = res) +} + + + +l2plot<-function(x1,y1,x2,y2,f=2/3,SCAT=TRUE,xlab="x",ylab="y",pch='*', +eout=FALSE,xout=FALSE,...){ +# +# Plot LOESS smoother for two groups +# +# f is the span used by loess +# SCAT=F, scatterplot not created, just the regression lines +# Missing values are automatically removed. +# +m<-elimna(cbind(x1,y1)) +x1<-m[,1] +y1<-m[,2] +m<-elimna(cbind(x2,y2)) +x2<-m[,1] +y2<-m[,2] +plot(c(x1,x2),c(y1,y2),xlab=xlab,ylab=ylab,pch=pch) +lines(lowess(x1,y1,f=f)) +lines(lowess(x2,y2,f=f)) +} + +contab<-function(dat,alpha=.05){ +# dat is a 2-by-2 contingency table (matrix) +# Goal: compare the marginal probability of the first variable (e.g. Time 1) +# to the marginal probability of the second variable (e.g. Time 2). +# Issue: do the probabilities change from time 1 to time 2. +# +phat=dat +n=sum(phat) +phat=phat/n +p1.=phat[1,1]+phat[1,2] +p.1=phat[1,1]+phat[2,1] +del=p1.-p.1 +sigsq=p1.*(1-p1.)+p.1*(1-p.1)-2*(phat[1,1]*phat[2,2]-phat[1,2]*phat[2,1]) +sig=sqrt(sigsq/n) +test=abs(del)/sig +pv=2*(1-pnorm(test)) +ci=del-qnorm(1-alpha/2)*sig +ci[2]=del+qnorm(1-alpha/2)*sig +list(s.e.=sig,delta=del,CI=ci,p.value=pv) +} + + +Ckappa<-function (x,fleiss=FALSE,w = NULL){ +# +# compute Cohen's kappa +# if fleiss=T, compute weighted kappa with Fleiss weights if w=NULL +# if fleiss=F, w=.5^|i-j| is used. +# if argument w contains weights, they are used +# +if(!is.matrix(x))stop("x should be a square matrix") +if(ncol(x)!=nrow(x))stop("x should be a square matrix") + p <- dim(x)[2] + x <- as.matrix(x) + tot <- sum(x) + x <- x/tot + rs <- rowSums(x) + cs <- colSums(x) + prob <- rs %*% t(cs) + po <- sum(diag(x)) + pc <- sum(diag(prob)) + kappa <- (po - pc)/(1 - pc) + if (is.null(w)) { +v=outer(c(1:p),c(1:p),"-") +w=outer(c(1:p),c(1:p),"-") +if(fleiss)w=1-w^2/(p-1)^2 +if(!fleiss)w=.5^abs(w) +} + weighted.prob <- w * prob + weighted.obser <- w * x + wpo <- sum(weighted.obser) + wpc <- sum(weighted.prob) + wkappa <- (wpo - wpc)/(1 - wpc) + return(list(kappa = kappa, weighted.kappa = wkappa)) +} +ODDSR.CI<-function(x,y=NULL,alpha=.05){ +# +# Compute confidence interval of the odds ratio. +# +# x is either a two-by-two contingency table or a +# vector of 0's and 1's, in which case +# y is also a vector of 0's and 1's +# +# if x is a 2-by-2 matrix, assume col 1 is X=1, col 2 is X=0 +# row 1 is Y=1 and row 2 is Y=0. +# +if(is.matrix(x)){ +if(ncol(x)!=2)stop("If x is a matrix, should have 2 columns") +if(nrow(x)!=2)stop("If x is a matrix, should have 2 rows") +n=sum(x) +x1=rep(1,x[1,1]) +y1=rep(1,x[1,1]) +x2=rep(0,x[1,2]) +y2=rep(1,x[1,2]) +x3=rep(1,x[2,1]) +y3=rep(0,x[2,1]) +x4=rep(0,x[2,2]) +y4=rep(0,x[2,2]) +x=c(x1,x2,x3,x4) +y=c(y1,y2,y3,y4) +} +temp=logreg(x,y) +z=qnorm(1-alpha/2) +ci=c(exp(temp[2,1]-z*temp[2,2]),exp(temp[2,1]+z*temp[2,2])) +list(odds.ratio=exp(temp[2,1]),ci=ci) +} + +smean<-function(m,cop=3,MM=FALSE,op=1,outfun=outogk,cov.fun=rmba,MC=FALSE,STAND=TRUE,...){ +# +# m is an n by p matrix +# +# Compute a multivariate skipped measure of location +# +# op=1: +# Eliminate outliers using a projection method +# If in addition, MC=T, a multicore processor is used +# assuming your computer has multiple cores and the package +# multicore has been installed. +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 uses MVE center +# cop=5 uses TBS +# cop=6 uses rmba (Olive's median ball algorithm) +# +# For each point +# consider the line between it and the center, +# project all points onto this line, and +# check for outliers using +# +# MM=FALSE, a boxplot rule. +# MM=TRUE, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# op=2 use mgv (function outmgv) method to eliminate outliers +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# op=3 use outlier method indicated by outfun +# +# Eliminate any outliers and compute means +# using remaining data. +# +m<-elimna(m) +m=as.matrix(m) +if(nrow(m)<14)op=2 +if(op==1){ +if(!MC)temp<-outpro(m,plotit=FALSE,cop=cop,MM=MM,STAND=STAND)$keep +if(MC)temp<-outproMC(m,plotit=FALSE,cop=cop,MM=MM,STAND=STAND)$keep +} +if(op==2)temp<-outmgv(m,plotit=FALSE,cov.fun=cov.fun)$keep +if(op==3)temp<-outfun(m,plotit=FALSE,...)$keep +val<-apply(m[temp,],2,mean) +val +} + +smeancrv2<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE, +nboot=500,plotit=TRUE,MC=FALSE,xlab="VAR 1",ylab="VAR 2",STAND=TRUE){ +# +# m is an n by p matrix +# +# Test hypothesis that multivariate skipped estimators +# are all equal to the null value, which defaults to zero. +# The level of the test is .05. +# +# Eliminate outliers using a projection method +# That is, determine center of data using: +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 MVE +# +# For each point +# consider the line between it and the center +# project all points onto this line, and +# check for outliers using +# +# MM=F, a boxplot rule. +# MM=T, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# Eliminate any outliers and compute means +# using remaining data. +# +if(SEED)set.seed(2) +m<-elimna(m) +n<-nrow(m) +est=smean(m,MC=MC,cop=cop,STAND=STAND) +crit.level<-.05 +if(n<=120)crit.level<-.045 +if(n<=80)crit.level<-.04 +if(n<=60)crit.level<-.035 +if(n<=40)crit.level<-.03 +if(n<=30)crit.level<-.025 +if(n<=20)crit.level<-.02 +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +val<-matrix(NA,ncol=ncol(m),nrow=nboot) +for(j in 1: nboot){ +mm<-m[data[j,],] +val[j,]<-smean(mm,MC=MC,cop=cop,STAND=STAND) +} +if(!MC)temp<-pdis(rbind(val,nullv),center=est) +if(MC)temp<-pdisMC(rbind(val,nullv),center=est) +sig.level<-sum(temp[nboot+1] kmax) { + warning("Attention robpca: The number of principal components k = ", k, " is larger then kmax = ", kmax, "; k is set to ", kmax,".") + k <- kmax + } + if(!missing(h) & !missing(alpha)) { + stop("Error in robpca: Both inputarguments alpha and h are provided. Only one is required.") + } + if(missing(h) & missing(alpha)) { + h <- min(floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha),n) + } + if(!missing(h) & missing(alpha)) { + alpha <- h/n + if(k==0) { + if(h < floor((n+kmax+1)/2)) { + h <- floor((n+kmax+1)/2) + alpha <- h/n + warning("Attention robpca: h should be larger than (n+kmax+1)/2. It is set to its minimum value ", h, ".") + } + } + else { + if(h < floor((n+k+1)/2)) { + h <- floor((n+k+1)/2) + alpha <- h/n + warning("Attention robpca: h should be larger than (n+k+1)/2. It is set to its minimum value ", h, ".") + } + } + if(h > n) { + alpha <- 0.75 + if(k==0) { + h <- floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha) + } + else { + h <- floor(2*floor((n+k+1)/2)-n+2*(n-floor((n+k+1)/2))*alpha) + } + warning("Attention robpca: h should be smaller than n = ", n, ". It is set to its default value ", h, ".") + } + } + if(missing(h) & !missing(alpha)) { + if(alpha < 0.5) { + alpha <- 0.5 + warning("Attention robpca: Alpha should be larger then 0.5. It is set to 0.5.") + } + if(alpha >= 1) { + alpha <- 0.75 + warning("Attention robpca: Alpha should be smaller then 1. It is set to its default value 0.75.") + + + } + if(k==0) { + h <- floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha) + } + else { + h <- floor(2*floor((n+k+1)/2)-n+2*(n-floor((n+k+1)/2))*alpha) + } + } + labsd <- floor(max(0,min(labsd,n))) + labod <- floor(max(0,min(labod,n))) + + out <- list() + + Xa <- X.svd$scores + center <- X.svd$centerofX + rot <- X.svd$loadings + p1 <- ncol(Xa) + if( (p1 <= min(floor(n/5), kmax)) & (mcd == 1 ) ) { + if(k != 0) { + k <- min(k, p1) + } + else { + k <- p1 +# cat("Message from robpca: The number of principal +# components is defined by the algorithm. It is set to ", k,".\n", sep="") + } + if(h < floor((nrow(Xa) + ncol(Xa) +1)/2)) { + h <- floor((nrow(Xa) + ncol(Xa) +1)/2) + cat("Message from robpca: The number of non-outlying observations h is set to ", h," in order to make the mcd algorithm function.\n", sep="") + } +# Xa.mcd <- cov.mcd(as.data.frame(Xa), quan=h, print=FALSE) +Xa.mcd <- cov.mcd(as.data.frame(Xa), quan=h) # R version +#print(Xa.mcd$method) +#if(length(grep("equation", Xa.mcd$method)) == 1) { +# print(Xa.mcd$method) +# stop("The ROBPCA algorithm can not deal with this +# result from the FAST-MCD algorithm. The algorithm is aborted.") +# } +#print("OUT") + Xa.mcd.svd <- svd(Xa.mcd$cov) + scores <- (Xa - matrix(data=rep(Xa.mcd$center, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=TRUE)) %*% Xa.mcd.svd$u + out$M <- center + as.vector(Xa.mcd$center %*% t(rot)) + out$L <- Xa.mcd.svd$d[1:k] +# +if(scree){ +pv=out$L +cs=pv/sum(pv) +cm=cumsum(cs) +plot(rep(c(1:ncol(x)),2),c(cs,cm),type="n",xlab=xlab,ylab=ylab) +points(c(1:ncol(x)),cs,pch="*") +lines(c(1:ncol(x)),cs,lty=1) +points(c(1:ncol(x)),cm,pch=".") +lines(c(1:ncol(x)),cm,lty=2) +} + + out$P <- X.svd$loadings %*% Xa.mcd.svd$u[,1:k] + out$T <- as.matrix(scores[,1:k]) + if(is.list(dimnames(data))) { + dimnames(out$T)[[1]] <- dimnames(data)[[1]] + } + out$h <- h + out$k <- k + out$alpha <- alpha + } + else { + directions <- choose(n,2) + ndirect <- min(250, directions) + all <- (ndirect == directions) + seed <- 0 + B <- extradir(Xa, ndirect, seed, all) + Bnorm <- vector(mode="numeric", length=nrow(B)) + Bnorm<-apply(B,1,vecnorm) + Bnormr <- Bnorm[Bnorm > 1.E-12] + B <- B[Bnorm > 1.E-12,] + A <- diag(1/Bnormr) %*% B + Y <- Xa %*% t(A) + Z <- matrix(data=0, nrow=n, ncol=length(Bnormr)) + for(i in 1:ncol(Z)) { + univ <- unimcd(Y[,i],quan = h) + if(univ$smcd < 1.E-12) { + r2 <- qr(data[univ$weights==1,])$rank + if(r2 == 1) { + stop("Error in robpca: At least ", sum(univ$weights), " observations are identical.") + } + } + else { + Z[,i] <- abs(Y[,i] - univ$tmcd) / univ$smcd + } + } + H0 <- order(apply(Z, 1, max)) + + Xh <- Xa[H0[1:h],] + Xh.svd <- classSVD(Xh) + + kmax <- min(Xh.svd$rank, kmax) + if( (k == 0) & (plots == 0) ) { + test <- which((Xh.svd$eigenvalues/Xh.svd$eigenvalues[1]) <= 1.E-3) + if(length(test) != 0) { + k <- min(min(Xh.svd$rank, test[1]), kmax) + } + else { + k <- min(Xh.svd$rank, kmax) + } + cumulative <- cumsum(Xh.svd$eigenvalues[1:k]) / sum(Xh.svd$eigenvalues) + if(cumulative[k] > 0.8) { + k <- which(cumulative >= 0.8)[1] + } + cat("Message from robpca: The number of principal components is set by the algorithm. It is set to ", k, ".\n", sep="") + } + else { + if( (k==0) & (plots != 0) ) { + loc <- 1:kmax + plot(loc, Xh.svd$eigenvalues[1:kmax], type='b', axes= FALSE, xlab="Component", ylab="Eigenvalue") + axis(2) + axis(1, at=loc) + cumv <- cumsum(Xh.svd$eigenvalues)/sum(Xh.svd$eigenvalues) + text(loc, Xh.svd$eigenvalues[1:kmax] + par("cxy")[2], as.character(signif(cumv[1:kmax], 2))) + box <- dialogbox(title="ROBPCA", controls=list(),buttons = c("OK")) + box <- dialogbox.add.control(box, where=1, statictext.control(paste("How many principal components would you like to retain?\nMaximum = ", kmax, sep=""), size=c(200,20))) + box <- dialogbox.add.control(box, where=2, editfield.control(label="Your choice:", size=c(30,10))) + input <- as.integer(dialogbox.display(box)$values$"Your choice:") + k <- max(min(min(Xh.svd$rank, input), kmax), 1) + } + else { + k <- min(min(Xh.svd$rank, k), kmax) + } + } + if(k!=X.svd$rank){ + XRc <- Xa-matrix(data=rep(Xh.svd$centerofX, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=TRUE) + Xtilde <- XRc%*%Xh.svd$loadings[,1:k]%*%t(Xh.svd$loadings[,1:k]) + Rdiff <- XRc-Xtilde + odh <- apply(Rdiff,1,vecnorm) + ms <- unimcd(odh^(2/3),h) + cutoffodh <- sqrt(qnorm(0.975,ms$tmcd,ms$smcd)^3) + indexset <- (odh<=cutoffodh) + Xh.svd <- classSVD(Xa[indexset,]) + kmax <- min(Xh.svd$rank, kmax) + } + + center <- center + Xh.svd$centerofX %*% t(rot) + rot <- rot %*% Xh.svd$loadings + Xstar<- (Xa - matrix(data=rep(Xh.svd$centerofX, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=TRUE)) %*% Xh.svd$loadings + Xstar <- as.matrix(Xstar[,1:k]) + rot <- as.matrix(rot[,1:k]) + mah <- mahalanobis(Xstar, center=rep(0, ncol(Xstar)), cov=diag(Xh.svd$eigenvalues[1:k], nrow=k)) + oldobj <- prod(Xh.svd$eigenvalues[1:k]) + niter <- 100 + for(j in 1:niter) { + mah.order <- order(mah) + Xh <- as.matrix(Xstar[mah.order[1:h],]) + Xh.svd <- classSVD(Xh) + obj <- prod(Xh.svd$eigenvalues) + Xstar <- (Xstar - matrix(data=rep(Xh.svd$centerofX, times=nrow(Xstar)), nrow=nrow(Xstar), ncol=ncol(Xstar), byrow=TRUE)) %*% Xh.svd$loadings + center <- center + Xh.svd$centerofX %*% t(rot) + rot <- rot %*% Xh.svd$loadings + mah <- mahalanobis(Xstar, center=rep(0, ncol(Xstar)), cov=diag(x=Xh.svd$eigenvalues, nrow=length(Xh.svd$eigenvalues))) + if( (Xh.svd$rank == k) & ( abs(oldobj - obj) < 1.E-12) ) { + break + } + else { + oldobj <- obj + if(Xh.svd$rank < k) { + j <- 1 + k <- Xh.svd$rank + } + } + } +Xstar.mcd <- cov.mcd(as.data.frame(Xstar), quan=h) # R version +# if(Xstar.mcd$raw.objective < obj) { + covf <- Xstar.mcd$cov + centerf <- Xstar.mcd$center +# } +# else { +# consistencyfactor <- median(mah)/qchisq(0.5,k) +# mah <- mah/consistencyfactor +# weights <- ifelse(mah <= qchisq(0.975, k), T, F) +# noMCD <- weightmecov(Xstar, weights, n, k) +# centerf <- noMCD$center +# covf <- noMCD$cov +# } + + covf.eigen <- eigen(covf) + covf.eigen.values.sort <- greatsort(covf.eigen$values) + P6 <- covf.eigen$vectors + P6 <- covf.eigen$vectors[,covf.eigen.values.sort$index] + +out$T <- (Xstar - matrix(data=rep(centerf, times=n), nrow=n, ncol=ncol(Xstar), byrow=TRUE)) %*% covf.eigen$vectors[,covf.eigen.values.sort$index] + + if(is.list(dimnames(data))) { + dimnames(out$T)[[1]] <- dimnames(data)[[1]] + } + out$P <- rot %*% covf.eigen$vectors[,covf.eigen.values.sort$index] + out$M <- as.vector(center + centerf %*% t(rot)) + out$L <- as.vector(covf.eigen$values) + out$k <- k + out$h <- h + + out$alpha <- alpha + } + oldClass(out) <- "robpca" + out <- CompRobustDist(data, X.svd$rank, out, classic) + if(classic == 1) { + out <- CompClassicDist(X.svd, out) + } + if(plots == 1) { + plot(out, classic, labod=labod, labsd=labsd) + } + return(out) +} +"greatsort"<-function(vec){ + x <- vec * (-1) + index <- order(x) + return(list(sortedvector=rev(sort(vec)), index=index)) +} +"classSVD"<-function(x){ + if(!is.matrix(x)) { + stop("The function classSVD requires input of type 'matrix'.") + } + n <- nrow(x) + p <- ncol(x) + if(n == 1) { + stop("The sample size is 1. No singular value decomposition can be performed.") + } + if(p < 5) { + tolerance <- 1E-12 + } + else { + if(p <= 8) { + tolerance <- 1E-14 + } + else { + tolerance <- 1E-16 + } + } + centerofX <- apply(x, 2, mean) + Xcentered <- scale(x, center=TRUE, scale=FALSE) + XcenteredSVD <- svd(Xcentered/sqrt(n-1)) + rank <- sum(XcenteredSVD$d > tolerance) + eigenvalues <- (XcenteredSVD$d[1:rank])^2 + loadings <- XcenteredSVD$v[,1:rank] + scores <- Xcentered %*% loadings + return(list(loadings=as.matrix(loadings), scores=as.matrix(scores), eigenvalues=as.vector(eigenvalues), rank=rank, + Xcentered=as.matrix(Xcentered), centerofX=as.vector(centerofX))) +} +"kernelEVD"<-function(x){ + if(!is.matrix(x)) { + stop("The function kernelEVD requires input of type 'matrix'.") + } + n <- nrow(x) + p <- ncol(x) + if(n > p) { + return(classSVD(x)) + } + else { + centerofX <- apply(x, 2, mean) + Xcentered <- scale(x, center=TRUE, scale=FALSE) + if(n == 1) { + stop("The sample size is 1. No singular value decomposition can be performed.") + } + eigen <- eigen(Xcentered %*% t(Xcentered)/(n-1)) + eigen.descending <- greatsort(eigen$values) + loadings <- eigen$vectors[,eigen.descending$index] + tolerance <- n * max(eigen$values) * .Machine$double.eps + rank <- sum(eigen.descending$sortedvector > tolerance) + eigenvalues <- eigen.descending$sortedvector[1:rank] + loadings <- t((Xcentered/sqrt(n-1))) %*% loadings[,1:rank] %*% diag(1/sqrt(eigenvalues), nrow=length(eigenvalues), ncol=length(eigenvalues)) + scores <- Xcentered %*% loadings + return(list(loadings=as.matrix(loadings), scores=as.matrix(scores), eigenvalues=as.vector(eigenvalues), rank=rank, + Xcentered=as.matrix(Xcentered), centerofX=as.vector(centerofX))) + } +} +"extradir"<-function(data, ndirect, seed=0, all=TRUE){ + n <- nrow(data) + p <- ncol(data) + B2 <- matrix(data=0, nrow = ndirect, ncol = p) + rowindex <- 1 + i <- 1 + if(all == T) { + while( (i < n) & (rowindex <= ndirect) ) { + j <- i + 1 + while( (j <= n) & (rowindex <= ndirect) ) { + B2[rowindex,] <- data[i,] - data[j,] + j <- j + 1 + rowindex <- rowindex + 1 + } + i <- i + 1 + } + } + else { + while(rowindex <= ndirect) { + sseed<-randomset(n,2,seed) + seed<-sseed$seed + B2[rowindex,] <- data[sseed$ranset[1],] - data[sseed$ranset[2],] + rowindex <- rowindex + 1 + } + } + return(B2) +} +"randomset"<-function(tot,nel,seed){ +out<-list() +for(j in 1:nel){ + randseed<-uniran(seed) + seed<-randseed$seed + num<-floor(randseed$random*tot)+1 + if(j > 1){ + while(any(out$ranset==num)){ + randseed<-uniran(seed) + seed<-randseed$seed + num<-floor(randseed$random*tot)+1 + + } + } + out$ranset[j]<-num + } + out$seed<-seed + return(out) +} +"uniran"<-function(seed = 0){ + out <- list() + seed<-floor(seed*5761)+999 + quot<-floor(seed/65536) + out$seed<-floor(seed)-floor(quot*65536) + out$random<-out$seed/65536 + return(out) +} +"unimcd"<-function(y,quan){ + out<-list() + ncas<-length(y) + len<-ncas-quan+1 + if(len==1){ + out$tmcd<-mean(y) + out$smcd<-sqrt(var(y)) + } + else { + ay<-c() + I<-order(y) + y<-y[I] + ay[1]<-sum(y[1:quan]) + for(samp in 2:len){ + ay[samp]<-ay[samp-1]-y[samp-1]+y[samp+quan-1] + } + ay2<-ay^2/quan + sq<-c() + sq[1]<-sum(y[1:quan]^2)-ay2[1] + for(samp in 2:len){ + sq[samp]<-sq[samp-1]-y[samp-1]^2+y[samp+quan-1]^2-ay2[samp]+ay2[samp-1] + } + sqmin<-min(sq) + Isq<-order(sq) + ndup<-sum(sq == sqmin) + ii<-Isq[1:ndup] + slutn<-c() + slutn[1:ndup]<-ay[ii] + initmean<-slutn[floor((ndup+1)/2)]/quan + initcov<-sqmin/(quan-1) + res<-(y-initmean)^2/initcov + sortres<-sort(res) + factor<-sortres[quan]/qchisq(quan/ncas,1) + initcov<-factor*initcov + res<-(y-initmean)^2/initcov + quantile<-qchisq(0.975,1) + out$weights<-(res9){ +if(pr)print("With more than 9 variables, might want to use ADJ=T") +} +if(!ADJ)flag<-outpro(x,cop=cop,STAND=STAND,plotit=FALSE)$keep +if(ADJ)flag<-outproad(x,cop=cop,SEED=SEED,STAND=STAND)$results$keep +remx<-x +temp2<-princomp(remx) +x<-x[flag,] +loc<-apply(x,2,mean) +temp<-princomp(x,cor=cor,scores=TRUE) +if(scree){ +z=temp$sdev +pv=z^2 +cs=pv/sum(pv) +cm=cumsum(cs) +plot(rep(c(1:ncol(x)),2),c(cs,cm),type="n",xlab=xlab,ylab=ylab) +points(c(1:ncol(x)),cs,pch="*") +lines(c(1:ncol(x)),cs,lty=1) +points(c(1:ncol(x)),cm,pch=".") +lines(c(1:ncol(x)),cm,lty=2) +} +if(!SCORES)temp<-summary(temp,loadings=loadings) +if(SCORES){ +if(is.null(pval)) +stop("When computing scores, specify pval, number of components") +if (!ALL)temp<-temp$scores[,1:pval] +if(ALL){ +temp<-summary(temp,loadings=TRUE) +B<-temp[2]$loadings[1:m,1:m] # Use robust loadings + z<-remx +for(i in 1:nrow(z))z[i,]<-z[i,]-loc +temp<-t(B)%*%t(z) +temp<-t(temp) +temp<-temp[,1:pval] +}} +temp +} + +mcp2a<-function(J,K,x,est=mom,con=NULL,alpha=.05,nboot=NA,grp=NA,...){ +# +# Do all pairwise comparisons of +# main effects for Factor A and B and all interactions +# + # The data are assumed to be stored in x + # in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # + if(identical(est,median))print('Warning: med2mcp is a better when using the usual sample median') + JK <- J * K + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp)) { + yy <- x + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") +mvec<-NA + tempn=0 + for(j in 1:JK) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] + mvec[j]<-est(x[[j]],...) +tempn[j]=length(x[[j]]) + } +nmax=max(tempn) + # + # Create the three contrast matrices + # + if(JK != length(x)) + warning("The number of groups does not match the number of contrast coefficients.") +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +bvec<-matrix(NA,nrow=JK,ncol=nboot) +for(j in 1:JK){ +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # J by nboot matrix, jth row contains +# bootstrapped estimates for jth group +} +outvec<-list() +if(!is.null(con))stop('Use linconm when specifying the linear contrast coefficients') +temp3<-con2way(J,K) +for(jj in 1:3){ +con<-temp3[[jj]] +con<-as.matrix(con) +ncon<-ncol(con) +# Determine critical values +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(nmax>80){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +test[d]<-sum(bcon[d,]>0)/nboot +if(test[d]> .5)test[d]<-1-test[d] +} +output<-matrix(0,ncon,6) +dimnames(output)<-list(NULL,c("con.num","psihat","sig.test","sig.crit","ci.lower","ci.upper")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +if(sum(sigvec)0){ +print("Some bootstrap estimates of the test statistic could not be computed") +print("Effective number of bootstrap samples was") +print(sum(!is.na(testb))) +} +test<-t1wayv2(x,tr=tr,grp=grp) +pval<-mean(test$TEST<=testb,na.rm=TRUE) +list(test=test$TEST,p.value=pval,Var.Explained=test$Var.Explained,Effect.Size=test$Effect.Size) +} + +cidM<-function(x,nboot=1000,alpha=.05,MC=FALSE,SEED=TRUE,g=NULL,dp=NULL){ +# +# Variation of Cliff method based on median of X-Y +# i.e., use p=P(XY)","p.hat")) +dvec<-alpha/c(1:CC) +for(j in 1:J){ +for(k in 1:J){ +if(j0)+.5*mean(MAT[,jcom]==0) +pvec[jcom]=2*min(c(p.value,1-p.value)) +if(is.na(pvec[jcom]))pvec=1 +test[jcom,1]<-j +test[jcom,2]<-k +test[jcom,3]<-pvec[jcom] +test[jcom,5:7]<-cid(x[[j]],x[[k]])$summary.dvals +test[jcom,8]<-test[jcom,5]+.5*test[jcom,6] +}}} +temp2<-order(0-test[,3]) +test[temp2,4]=dvec +list(test=test) +} + +msmedse<-function(x){ +# +# Compute standard error of the median using method +# recommended by McKean and Shrader (1984). +# +x=elimna(x) +chk=sum(duplicated(x)) +if(chk>0){ +print("WARNING: tied values detected.") +print("Estimate of standard error might be highly inaccurate, even with n large") +} +y<-sort(x) +n<-length(x) +av<-round((n+1)/2-qnorm(.995)*sqrt(n/4)) +if(av==0)av<-1 +top<-n-av+1 +sqse<-((y[top]-y[av])/(2*qnorm(.995)))^2 +sqse<-sqrt(sqse) +sqse +} + + + + +t1waybtv2<-function(x,tr=.2,grp=NA,g=NULL,dp=NULL,nboot=599,SEED=TRUE){ +# +# Test the hypothesis of equal trimmed mdeans, corresponding to J independent +# groups, using a bootstrap-t method. +# +# The data are assumed to be stored in x in list mode +# or in a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, columns correspond to groups. +# +# grp is used to specify some subset of the groups, if desired. +# By default, all J groups are used. +# g=NULL, x is assumed to be a matrix or have list mode +# +# if g is specifed, it is assumed that column g of x is +# a factor variable and that the dependent variable of interest is in column +# dp of x, which can be a matrix or data frame. +# +# The default number of bootstrap samples is nboot=599 +# +if(!is.null(g)){ +if(is.null(dp))stop("Specify a value for dp, the column containing the data") +x=fac2list(x[,dp],x[,g]) +} +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +if(is.na(grp[1]))grp<-c(1:length(x)) +J<-length(grp) +nval=NA +x=lapply(x,elimna) +nval=lapply(x,length) +xbar=lapply(x,mean,tr=tr) +bvec<-array(0,c(J,2,nboot)) +hval<-vector("numeric",J) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +hval[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) + # hval is the number of observations in the jth group after trimming. +print(paste("Working on group ",grp[j])) +xcen<-x[[grp[j]]]-mean(x[[grp[j]]],tr) +data<-matrix(sample(xcen,size=length(x[[grp[j]]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row +# contains the bootstrap trimmed means, the second row +# contains the bootstrap squared standard errors. +} +m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means +m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq standard errors +wvec<-1/m2 # J by nboot matrix of w values +uval<-apply(wvec,2,sum) # Vector having length nboot +blob<-wvec*m1 +xtil<-apply(blob,2,sum)/uval # nboot vector of xtil values +blob1<-matrix(0,J,nboot) +for (j in 1:J)blob1[j,]<-wvec[j,]*(m1[j,]-xtil)^2 +avec<-apply(blob1,2,sum)/(length(x)-1) +blob2<-(1-wvec/uval)^2/(hval-1) +cvec<-apply(blob2,2,sum) +cvec<-2*(length(x)-2)*cvec/(length(x)^2-1) +testb<-avec/(cvec+1) +# A vector of length nboot containing bootstrap test values +ct<-sum(is.na(testb)) +if(ct>0)print("Some bootstrap estimates of the test statistic could not be computed") +test<-t1way(x,tr=tr,grp=grp) +pval<-sum(test$TEST<=testb)/nboot +# +# Determine explanatory effect size +# +e.pow=t1wayv2(x)$Var.Explained +list(test=test$TEST,p.value=pval,Explanatory.Power=e.pow, +Effect.Size=sqrt(e.pow)) +} + + + + +t2wayv2<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K,g=NULL,dp=NULL,pr=TRUE){ +# Perform a J by K (two-way) anova on trimmed means where +# all groups are independent. +# +# The R variable data is assumed to contain the raw +# data stored in list mode, or a matrix with columns +# corresponding to groups. If stored in list mode, data[[1]] contains the data +# for the first level of all three factors: level 1,1,. +# data[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second factor: level 1,2 +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JK, the total number of +# groups being tested. +# +# g=NULL, x is assumed to be a matrix or have list mode +# +# if g is specifed, it is assumed that column g of x is +# a factor variable and that the dependent variable of interest is in column +# dp of x, which can be a matrix or data frame. +# +if(!is.null(g[1])){ +if(length(g)!=2)stop("Argument g should have two values") +if(is.null(dp[1])) +stop("Specify a value for dp, the column containing the data") +data=fac2list(data[,dp],data[,g]) +} +if(is.matrix(data))data=listm(data) +if(!is.list(data))stop("Data are not stored in list mode") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in data is") +print(length(data)) +print("Warning: These two values are not equal") +} +tmeans<-0 +h<-0 +v<-0 +for (i in 1:p){ +data[[grp[i]]]=elimna(data[[grp[i]]]) +tmeans[i]<-mean(data[[grp[i]]],tr) +h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) +# h is the effective sample size + if(winvar(data[[grp[i]]],tr)==0)print(paste('The Winsorized variance is zero for group',i)) +v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1)) +# v contains the squared standard errors +} +v<-diag(v,p,p) # Put squared standard errors in a diag matrix. +ij<-matrix(c(rep(1,J)),1,J) +ik<-matrix(c(rep(1,K)),1,K) +jm1<-J-1 +cj<-diag(1,jm1,J) +for (i in 1:jm1)cj[i,i+1]<-0-1 +km1<-K-1 +ck<-diag(1,km1,K) +for (i in 1:km1)ck[i,i+1]<-0-1 +# Do test for factor A +#cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A +cmat<-kron(cj,ik) # Contrast matrix for factor A +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +Qa<-johan(cmat,tmeans,v,h,alval[i]) +if(Qa$teststat>Qa$crit)break +} +A.p.value=irem/1000 +# Do test for factor B +cmat<-kron(ij,ck) # Contrast matrix for factor B +for(i in 1:999){ +irem<-i +Qb<-johan(cmat,tmeans,v,h,alval[i]) +if(Qb$teststat>Qb$crit)break +} +B.p.value=irem/1000 +# Do test for factor A by B interaction +cmat<-kron(cj,ck) # Contrast matrix for factor A by B +for(i in 1:999){ +irem<-i +Qab<-johan(cmat,tmeans,v,h,alval[i]) +if(Qab$teststat>Qab$crit)break +} +AB.p.value=irem/1000 +tmeans=matrix(tmeans,J,K,byrow=TRUE) +list(Qa=Qa$teststat,A.p.value=A.p.value, +Qb=Qb$teststat,B.p.value=B.p.value, +Qab=Qab$teststat,AB.p.value=AB.p.value,means=tmeans) +} + + + +lpindt<-function(x,y,nboot=500,xout=FALSE,outfun=out){ +# +# Test the hypothesis of no association based on the fit obtained +# from lplot (Cleveland's LOESS) +# +m<-elimna(cbind(x,y)) +x<-as.matrix(x) +p<-ncol(x) +pp<-p+1 +x<-m[,1:p] +y<-m[,pp] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,pp] +} +n=length(y) +data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +val=NA +x=as.matrix(x) +for(i in 1:nboot){ +val[i]=lplot(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE)$Strength.Assoc +} +val=sort(val) +est=lplot(x,y,plotit=FALSE,pr=FALSE)$Strength.Assoc +p.value=mean((est4)stop("x should have at most four columns of data") +m<-elimna(cbind(x,y)) +if(xout && eout)stop("Can't have xout=eout=T") +if(eout){ +flag<-outfun(m)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +} +x<-m[,1:np] +x=as.matrix(x) +y<-m[,np1] +if(!sop){ +if(ncol(x)==1)fitr<-fitted(gam(y~x[,1])) +if(ncol(x)==2)fitr<-fitted(gam(y~x[,1]+x[,2])) +if(ncol(x)==3)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3])) +if(ncol(x)==4)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3]+x[,4])) +} +if(sop){ +if(ncol(x)==1)fitr<-fitted(gam(y~s(x[,1]))) +if(ncol(x)==2)fitr<-fitted(gam(y~s(x[,1])+s(x[,2]))) +if(ncol(x)==3)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3]))) +if(ncol(x)==4)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3])+s(x[,4]))) +} +last<-fitr +if(plotit){ +if(ncol(x)==1){ +plot(x,fitr,xlab=xlab,ylab=ylab) +} +if(ncol(x)==2){ +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the S-PLUS function interp +mkeep<-x[iout>=1,] +fitr<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fitr,theta=theta,phi=phi,expand=expand,xlab="x1",ylab="x2",zlab="", +scale=scale,ticktype=ticktype) +} +} +top=varfun(last) +ep=top/varfun(y) +if(ep>=1)ep=cor.fun(last,y)$cor^2 +eta=sqrt(ep) +st.adj=NULL +e.adj=NULL +if(ADJ){ +x=as.matrix(x) +val=NA +n=length(y) +data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(i in 1:nboot){ +temp=gamplotv2.sub(x[data1[i,],],y[data2[i,]],plotit=FALSE) +val[i]=temp$Explanatory.power +} +vindt=median(val) +v2indt=median(sqrt(val)) +st.adj=(sqrt(ep)-max(c(0,v2indt)))/(1-max(c(0,v2indt))) +e.adj=(ep-max(c(0,vindt)))/(1-max(c(0,vindt))) +st.adj=max(c(0,st.adj)) +e.adj=max(c(0,e.adj)) +} +eta=as.matrix(eta) +ep=as.matrix(ep) +dimnames(eta)=NULL +dimnames(ep)=NULL +eta=eta[1] +ep=ep[1] +list(Strength.Assoc=eta,Explanatory.power=ep, +Strength.Adj=st.adj,Explanatory.Adj=e.adj) +} + +cidmul<-function(x,alpha=.05,g=NULL,dp=NULL,pr=TRUE){ +# +# Perform Cliff's method for all pairs of J independent groups. +# Unlike the function meemul, ties are allowed. +# The familywise type I error probability is controlled by using +# a critical value from the Studentized maximum modulus distribution. +# +# The data are assumed to be stored in $x$ in list mode. +# Length(x) is assumed to correspond to the total number of groups, J. +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# The default value for alpha is .05. Any other value results in using +# alpha=.01. +# +if(pr)print('cidmulv2 might provide better power') +if(!is.null(g)){ +if(is.null(dp))stop("Specify a value for dp, the column containing the data") +x=fac2list(x[,dp],x[,g]) +} +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +J<-length(x) +CC<-(J^2-J)/2 +test<-matrix(NA,CC,7) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +} +dimnames(test)<-list(NULL,c("Group","Group","d","ci.lower","ci.upper", +"p.hat","p-value")) +jcom<-0 +crit<-smmcrit(200,CC) +if(alpha!=.05)crit<-smmcrit01(200,CC) +alpha<-1-pnorm(crit) +n=matl(lapply(x,length)) +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +temp<-cid(x[[j]],x[[k]],alpha,plotit=FALSE) +temp2<-cidv2(x[[j]],x[[k]],alpha,plotit=FALSE) +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-k +test[jcom,3]<-temp$d +test[jcom,4]<-temp$cl +test[jcom,5]<-temp$cu +test[jcom,6]<-temp$phat +test[jcom,7]<-temp2$p.value +}}} +list(n=n,test=test) +} + + cidmulv2<-function(x,alpha=.05,g=NULL,dp=NULL,CI.FWE=FALSE){ +# +# Perform Cliff's method for all pairs of J independent groups. +# The familywise type I error probability is controlled via +# Hochberg's method. +# +# The data are assumed to be stored in $x$ in list mode or in a +# matrix with J columns, columns corresponding to groups. +# +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# g=NULL, x is assumed to be a matrix or have list mode +# if g is specified, it is assumed that column g of x is +# a factor variable and that the dependent variable of interest is in column +# dp of x, which can be a matrix or data frame. +# +if(!is.null(g)){ +if(is.null(dp))stop("Specify a value for dp, the column containing the data") +x=fac2list(x[,dp],x[,g]) +} +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +J<-length(x) +CC<-(J^2-J)/2 +test<-matrix(NA,CC,7) +c.sum=matrix(NA,CC,5) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +} +dimnames(test)<-list(NULL,c("Group","Group","p.hat","p.ci.lower", +"p.ci.uppper","p-value","p.crit")) +dvec<-alpha/c(1:CC) +dimnames(c.sum)<-list(NULL,c("Group","Group","P(XY)")) +jcom<-0 +n=matl(lapply(x,length)) +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +temp<-cidv2(x[[j]],x[[k]],alpha,plotit=FALSE) +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-k +c.sum[jcom,1]<-j +c.sum[jcom,2]<-k +c.sum[jcom,3:5]=cid(x[[j]],x[[k]])$summary.dvals +test[jcom,3]<-temp$p.hat +test[jcom,4]<-temp$p.ci[1] +test[jcom,5]<-temp$p.ci[2] +test[jcom,6]<-temp$p.value +}}} +temp2<-order(0-test[,6]) +test[temp2,7]=dvec +if(CI.FWE){ +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +temp<-cidv2(x[[j]],x[[k]],alpha=test[jcom,7],plotit=FALSE) +test[jcom,4]<-temp$p.ci[1] +test[jcom,5]<-temp$p.ci[2] +}}}} +list(n=n,test=test,summary.dvals=c.sum) +} + +cidmcp=cidmulv2 + +fac2list<-function(x,g,pr=TRUE){ +# +# data are stored in x +# information about the level of the value in x is stored in g, +# which can be a matrix with up to 4 columns +# +# sort the data in x into groups based on values in g. +# store results in list mode. +# +# Example: fac2list(m[,2],m[,4]) would sort the values +# in column 2 of m according to the values in column 4 of m +# +g=as.data.frame(g) +ng=ncol(g)+1 +xg=cbind(x,g) +xg=elimna(xg) +x=xg[,1] +x=as.matrix(x) +g=xg[,2:ng] +g=as.data.frame(g) +L=ncol(g) +g=listm(g) +for(j in 1:L)g[[j]]=as.factor(g[[j]]) +g=matl(g) +Lp1=L+1 +if(L>4)stop("Can have at most 4 factors") +if(L==1){ +res=selby(cbind(x,g),2,1) +group.id=res$grpn +res=res$x +} +if(L>1){ +res=selby2(cbind(x,g),c(2:Lp1),1) +group.id=res$grpn +res=res$x +} +if(pr) +{print("Group Levels:") +print(group.id) +} +res=lapply(res,as.numeric) +res +} + +MMreg<-function(x,y,RES=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,varfun=pbvar,corfun=pbcor,WARN=FALSE,...){ +# +# Compute MM regression estimate derived by Yohai (1987) +# simply by calling the R function lmrob +# This function will remove leverage points when +# xout=T +# using the outlier detection method indicated by +# outfun, which defaults to the projection method. +# +library('robustbase') +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else flag<-outpro(x,STAND=STAND,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(!WARN)options(warn=-1) +temp=lmrob(y~x) +if(!WARN)options(warn=0) +coef=temp$coefficients +p1=ncol(x)+1 +res<-y-x%*%coef[2:p1]-coef[1] +yhat<-y-res +stre=NULL +e.pow<-varfun(yhat)/varfun(y) +if(!is.na(e.pow)){ +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +e.pow=as.numeric(e.pow) +stre=sqrt(e.pow) +} +if(!RES)res=NULL +list(coef=coef,residuals=res,Strength.Assoc=stre) +} + +ks<-function(x,y,w=FALSE,sig=TRUE,alpha=.05){ +# Compute the Kolmogorov-Smirnov test statistic +# +# w=T computes the weighted version instead. +# +# sig=T indicates that the exact level is to be computed. +# If there are ties, the reported Type I error probability is exact when +# using the unweighted test, but for the weighted test the reported +# level is too high. +# +# This function uses the functions ecdf, kstiesig, kssig and kswsig +# +# This function returns the value of the test statistic, the approximate .05 +# critical value, and the exact level if sig=T. +# +# Missing values are automatically removed +# +x<-x[!is.na(x)] +y<-y[!is.na(y)] +n1 <- length(x) +n2 <- length(y) +w<-as.logical(w) +sig<-as.logical(sig) +tie<-logical(1) +siglevel<-NA +z<-sort(c(x,y)) # Pool and sort the observations +tie=FALSE +chk=sum(duplicated(x,y)) +if(chk>0)tie=TRUE +v<-1 # Initializes v +for (i in 1:length(z))v[i]<-abs(ecdf(x,z[i])-ecdf(y,z[i])) +ks<-max(v) +if(!tie)crit=ks.crit(n1=n1,n2=n2,alpha=alpha) +else crit=ksties.crit(x,y,alpha=alpha) +if(!w && sig && !tie)siglevel<-kssig(length(x),length(y),ks) +if(!w && sig && tie)siglevel<-kstiesig(x,y,ks) +if(w){ +crit=ksw.crit(length(x),length(y),alpha=alpha) +for (i in 1:length(z)){ +temp<-(length(x)*ecdf(x,z[i])+length(y)*ecdf(y,z[i]))/length(z) +temp<-temp*(1.-temp) +v[i]<-v[i]/sqrt(temp) +} +v<-v[!is.na(v)] +ks<-max(v)*sqrt(length(x)*length(y)/length(z)) +if(sig)siglevel<-kswsig(length(x),length(y),ks) +if(tie && sig) +warning(paste("Ties were detected. The reported significance level of the +weighted Kolmogorov-Smirnov test statistic is not exact.")) +} +list(test=ks,critval=crit,p.value=siglevel) +} + +ks.crit<-function(n1,n2,alpha=.05){ +# +# Compute a critical value so that probability coverage is approximately +# 1-alpha +# +START=sqrt(0-log(alpha/2)*(n1+n2)/(2*n1*n2)) +crit=optim(START,ks.sub,n1=n1,n2=n2,alpha=alpha,lower=.001,upper=.86,method='Brent')$par +crit +} + +ks.sub<-function(crit,n1,n2,alpha){ +v=kssig(n1,n2,crit) +dif=abs(alpha-v) +dif +} + + +ksw.crit<-function(n1,n2,alpha=.05){ +# +# Compute a critical value so that probability coverage is +# >= 1-alpha while being close as possible to 1-alpha +# +if(alpha>.1)stop('The function assumes alpha is at least .1') +crit=2.4 +del=.05 +pc=.12 +while(pc>alpha){ +crit=crit+.05 +pc=kswsig(n1,n2,crit) +} +crit +} + + +bbw2list<-function(x,grp.col,lev.col,pr=TRUE){ +# +# for a between-by-between-by-within design +# grp.col indicates the columns where values of the levels of between factor +# are stored. +# lev.col indicates the columns where repeated measures are contained. +# If, for example, there are data for three times, stored in columns +# 6, 8 and 11, set +# lev.col=c(6,8,11) +# +# Example: Have a 3 x 4 x 2 design +# values in columns 2 and 4 indicate the +# levels of the two between factors. +# column 3 contains time 1 data, +# column 7 contains time 2 data +# bbw2list(x,(c(2,4),c(3,7)) will store data in list mode that can be +# used by bbwtrim and related functions +# +res=selbybbw(x,grp.col,lev.col,pr=pr) +res +} + + +selbybbw<-function(m,grpc,coln,pr=TRUE){ +# +# For a between by-between-by-within design, +# a commmon situation is to have data stored in an n by p matrix where +# two column indicate a group identification numbers (levels) +# for the between factors, +# and two or more other columns contain the within group results. +# +# This function is used by bbw2list to store the data in list mode so +# that the R function bbwtrim can be use. +# +# m is a matrix containing the data. One column contains group +# identification values +# and two or more other columns contain repeated measures. +# +# This function groups all values in the columns +# indicated by coln according to the +# group numbers in column grpc and stores the results in list mode. +# +# So if grpc[1] has J values, grpc[2] has K values, +# and coln indicates L columns, +# this function returns the data stored in list mode have length JKL +# +# Example: +# y<-selbybbw(blob,c(2,3),c(7,9,11))$x +# will look for group numbers in col 2 and 3 of the matrix blob, +# which indicate levels for the between factors, +# and it assumes that times 1, 2 and 3 are stored in col 7, 9, and 11. +# +# Result: the data will now be stored in y having list mode. +# +#if(!is.matrix(m))stop("Data must be stored in a matrix") +if(is.na(grpc[1]))stop("The argument grpc is not specified") +if(is.na(coln[1]))stop("The argument coln is not specified") +if(length(grpc)!=2)stop("The argument grpc must have length 2") +mm=m +m<-as.data.frame(elimna(mm)) +x<-list() +grp1<-sort(unique(m[,grpc[1]])) +grp2<-sort(unique(m[,grpc[2]])) +if(pr){ +print("Levels for first factor:") +print(grp1) +print("Levels for second factor:") +print(grp2) +} +J<-length(grp1) +K<-length(grp2) +L<-length(coln) +JKL<-J*K*L +itt<-0 +it=0 +mm=as.matrix(m[,coln]) +gmat=matrix(NA,ncol=2,nrow=J*K) +for (ig1 in 1:length(grp1)){ +for (ig2 in 1:length(grp2)){ +itt=itt+1 +gmat[itt,]=c(grp1[ig1],grp2[ig2]) +for (ic in 1:length(coln)){ +it<-it+1 +flag<-(m[,grpc[1]]==grp1[ig1])*(m[,grpc[2]]==grp2[ig2]) +flag=as.logical(flag) +x[[it]]<-as.numeric(mm[flag,ic]) +}}} +x +} + +selbybw<-function(m,grpc,coln){ +# +# For a between by within design, +# a commmon situation is to have data stored in an n by p matrix where +# a column is a group identification number +# and the remaining columns are the within group results. +# +# m is a matrix containing the data. One column contains group +# identification values +# and two or more other columns contain repeated measures. +# +# This function groups all values in the columns +# indicated by coln according to the +# group numbers in column grpc and stores the results in list mode. +# +# So if grpc has J values, and coln indicates K columns, +# this function returns the data stored in list mode have length JK +# +# Example: y<-selbybw(blob,3,c(4,6,7))$x +# will look for group numbers in col 3 of the matrix blob, +# and it assumes within +# group data are stored in col 4, 6 and 7. +# Result: the data will now be stored in y having list mode +# + +#if(!is.matrix(m))stop("Data must be stored in a matrix") +if(is.na(grpc[1]))stop("The argument grpc is not specified") +if(is.na(coln[1]))stop("The argument coln is not specified") +if(length(grpc)!=1)stop("The argument grpc must have length 1") +x<-list() +m=m[,c(grpc,coln)] +m<-as.data.frame(elimna(m)) +grpn<-sort(unique(m[,1])) +J<-length(grpn) +K<-length(coln) +JK<-J*K +it<-0 +mm=as.data.frame(m[,2:ncol(m)]) +for (ig in 1:length(grpn)){ +for (ic in 1:length(coln)){ +it<-it+1 +flag<-(m[,1]==grpn[ig]) +x[[it]]<-as.numeric(mm[flag,ic]) +}} +list(x=x,grpn=grpn) +} + +bw2list<-function(x,between.col,within.col,grp.col=between.col,lev.col=within.col,pr=TRUE){ +# +# for a between by within design +# grp.col is column indicating levels of between factor. +# lev.col indicates the columns where repeated measures are contained +# +# Example: column 2 contains information on levels of between factor +# have a 3 by 2 design, column 3 contains time 1 data, +# column 7 contains time 2 +# bw2list(x,2,c(3,7)) will store data in list mode that can be +# used by rmanova and related functions +# +res=selbybw(x,grp.col,lev.col) +if(pr){ +print("Levels for between factor:") +print(unique(x[,grp.col])) +} +res$x +} + + +rmc2list<-function(x,grp.col,lev.col,pr=TRUE){ +# +# for a between by within design +# grp.col is column indicating levels of between factor. +# lev.col indicates the columns where repeated measures are contained +# +# Example: column 2 contains information on levels of between factor +# have a 3 by 2 design, column 3 contains time 1 data, +# column 7 contains time 2 +# rmc2list(x,2,c(3,7)) will store data in list mode that can be +# bw2list(x,2,c(3,7)) also can be used. +# used by rmanova and related functions +# +res=selbybw(x,grp.col,lev.col) +if(pr){ +print("Levels for between factor:") +print(unique(x[,grp.col])) +} +res$x +} + + +wlogregci<-function(x,y,nboot=400,alpha=.05,SEED=TRUE,MC=FALSE, +xlab="Predictor 1",ylab="Predictor 2",xout=FALSE,outfun=out,...){ +# +# Compute a confidence interval for each of the parameters of +# a log linear model based on a robust estimator +# +# The predictor values are assumed to be in the n by p matrix x. +# + +if(MC)library(parallel) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +n=length(y) +data<-matrix(sample(n,size=length(y)*nboot,replace=TRUE),nrow=n,ncol=nboot) +data=listm(data) +if(MC)bvec<-mclapply(data,wlogreg.sub,x,y,mc.preschedule=TRUE) +if(!MC)bvec<-lapply(data,wlogreg.sub,x,y) +bvec=matl(bvec) +# +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +x=as.matrix(x) +p1<-ncol(x)+1 +regci<-matrix(0,p1,3) +VAL<-c("intercept",rep("X",ncol(x))) +dimnames(regci)<-list(VAL,c("Est.","ci.low","ci.up")) +se<-NA +sig.level<-NA +for(i in 1:p1){ +bna=elimna(bvec[i,]) +nbn=length(bna) +ilow<-round((alpha/2) * nbn) +ihi<-nbn - ilow +ilow<-ilow+1 +temp<-mean(bna<0) +sig.level[i]<-2*(min(temp,1-temp)) +bna<-sort(bna) +regci[i,2]<-bna[ilow] +regci[i,3]<-bna[ihi] +se[i]<-sqrt(var(elimna(bvec[i,]))) +} +regci[,1]=wlogreg(x,y)$coef +list(conf.interval=regci,p.values=sig.level,se=se) +} +wlogreg.sub<-function(data,x,y){ +x=as.matrix(x) +vals=wlogreg(x[data,],y[data])$coef +} + + + +# original version of logreg.plot is stored in logreg_plot_orig_chk.tex + + +logreg.plot<-function(x,y,MLE=TRUE,ROB=FALSE,xlab=NULL,ylab=NULL,zlab='P(Z=1)',xout=FALSE,outfun=outpro, +theta=50,phi=25,duplicate="error",LP=TRUE,Lspan=.75,pyhat=FALSE,LABELS=FALSE, +WARN=FALSE,BY=TRUE, +expand=.5,scale=TRUE,fr=2,ticktype="simple",pr=TRUE,...){ +# +# For one predictor, plot logistic regression line +# +# if x is a matrix with more than one column, plot is based on data in +# in column 1. +# +# MLE=T, will plot usual maximum likelihood estimate using a solid line +# ROB=T, will plot robust estimate, which is indicated by a +# dashed line. +# +library(robustbase) +xy=cbind(x,y) +xy=elimna(xy) +p1=ncol(xy) +if(p1>3)stop('Only one or two independent variables can be used') +if(!xout){ +if(pr)print('Suggest also looking at result using xout=TRUE') +} +p=p1-1 +x=xy[,1:p] +x=as.matrix(x) +y=xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +if(p==1){ +if(is.null(ylab))ylab='P(Y=1|X)' +if(is.matrix(x))x=x[,1] +xord=order(x) +xx=x[xord] +yy=y[xord] +est1=logreg(xx,yy)[1:2,1] +if(is.null(xlab))v='X' +if(is.null(ylab))ylab='P(Y=1|X)' +if(LABELS)v=labels(x)[[2]] +if(MLE){ +plot(xx,yy,xlab=v[1],ylab=ylab) +phat=logreg.pred(xx,yy,xx) +lines(xx,phat) +} +if(ROB){ +if(!WARN)options(warn=-1) +if(!BY)est2=wlogreg(xx,yy)$coef[1:2] +if(BY)est2=BYlogreg(xx,yy)$coef[1:2] +phat2=exp(est2[1]+est2[2]*xx)/(1+exp(est2[1]+est2[2]*xx)) +lines(xx,phat2,lty=2) +phat=cbind(xx,phat2) +dimnames(phat)=list(NULL,c(v,'Y.hat')) +if(!WARN)options(warn=0) +} +} +if(p==2){ +library(akima) +fitr=logreg.pred(x,y,x) +if(is.null(xlab))v='X' +if(is.null(ylab))v[2]='Y' +if(LABELS)v=labels(x)[[2]] +if(LP)lplot(x,fitr,xlab=v[1],ylab=v[2],zlab=xlab,z=zlab,ticktype=ticktype,theta=theta,phi=phi,pr=FALSE) +phat=cbind(x,fitr) +dimnames(phat)=list(NULL,c(v,'Y.hat')) +} +if(!pyhat)phat<-"Done" +phat +} + + +logreg.P.ci<-function(x,y,alpha=.05,plotit=TRUE, +xlab='X',ylab='P(Y=1|X)',xout=FALSE,outfun=outpro,...){ +# +# Assuming the logistic regression model provides an adequate fit, +# compute a confidence interval for P(Y=1|X) for each value stored in x. +# +xx<-elimna(cbind(x,y)) +x<-xx[,1] +y<-xx[,2] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1] +y<-m[,2] +} +if(length(unique(y))>2)stop('y should be binary') +# Next convert y to 0 and 1s +n=length(y) +yy=rep(0,n) +y=as.vector(y) +flag=y==max(y) +yy[flag]=1 +y=yy + +xord=order(x) +x=x[xord] +y=y[xord] +mod1 = glm(y ~ x, family=binomial(link='logit')) +v=predict(mod1,se.fit=TRUE) +top=v$fit+qnorm(1-alpha/2)*v$se.fit +bot=v$fit-qnorm(1-alpha/2)*v$se.fit +p=exp(v$fit)/(1+exp(v$fit)) +top=exp(top)/(1+exp(top)) +bot=exp(bot)/(1+exp(bot)) +est=cbind(x,p,bot,top) +dimnames(est)=list(NULL,c('X','est.p','ci.low','ci,up')) +if(plotit){ +plot(c(x,x,x),c(top,bot,p),ylim=c(0,1),type='n',xlab=xlab,ylab=ylab) +lines(x,p) +lines(x,bot,lty=2) +lines(x,top,lty=2) +} +list(Strength.Assoc=sd(p)/sd(y),output=est) +} + + +medpb2<-function(x,y=NULL,alpha=.05,nboot=2000,SEED=TRUE){ +# +# Compare 2 independent groups using medians. +# +# A percentile bootstrap method is used, which performs well when +# there are tied values. +# +# The data are assumed to be stored in x and y. If y=NULL, x is assumed to have two columns. +# +# Missing values are automatically removed. +# +if(is.null(y)){ +if(is.matrix(x) || is.data.frame(x)){ +y=x[,2] +x=x[,1] +} +if(is.list(x)){ +y=x[[2]] +x=x[[1]] +} +} +x=elimna(x) +y=elimna(y) +xx<-list() +xx[[1]]<-x +xx[[2]]<-y +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +est1=median(xx[[1]]) +est2=median(xx[[2]]) +est.dif<-median(xx[[1]])-median(xx[[2]]) +crit<-alpha/2 +temp<-round(crit*nboot) +icl<-temp+1 +icu<-nboot-temp +bvec<-matrix(NA,nrow=2,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(j in 1:2){ +data<-matrix(sample(xx[[j]],size=length(xx[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,median) # Bootstrapped medians for jth group +} +top<-bvec[1,]-bvec[2,] +test<-sum(top<0)/nboot+.5*sum(top==0)/nboot +if(test > .5)test<-1-test +top<-sort(top) +ci<-NA +ci[1]<-top[icl] +ci[2]<-top[icu] +list(n1=length(x),n2=length(y),p.value=2*test,ci=ci,est1=est1,est2=est2, +est.dif=est.dif) +} +m2ci<-function(x,y,alpha=.05,nboot=1000,bend=1.28,os=FALSE){ +# +# Compute a bootstrap, .95 confidence interval for the +# the difference between two independent +# M-estimator of location based on Huber's Psi. +# The default percentage bend is bend=1.28 +# The default number of bootstrap samples is nboot=399 +# +# By default, the fully iterated M-estimator is used. To use the +# one-step M-estimator instead, set os=T +# +os<-as.logical(os) +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +if(length(x)<=19 || length(y)<=19) +warning(paste("The number of observations in at least one group +is less than 20. This function might fail due to division by zero, +which in turn causes an error in function hpsi having to do with +a missing value.")) +set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +if(!os){ +bvecx<-apply(datax,1,mest,bend) +bvecy<-apply(datay,1,mest,bend) +} +if(os){ +bvecx<-apply(datax,1,onestep,bend) +bvecy<-apply(datay,1,onestep,bend) +} +bvec<-sort(bvecx-bvecy) +test<-sum(bvec<0)/nboot+.5*sum(bvec==0)/nboot +pv=2*min(c(test,1-test)) +low<-round((alpha/2)*nboot) +up<-round((1-alpha/2)*nboot) +se<-sqrt(var(bvec)) +list(ci=c(bvec[low],bvec[up]),se=se,p.value=pv) +} + +qsplit<-function(x,y,split.val=NULL){ +# +# x assumed to be a matrix or data frame +# +# IF split.val=NULL, +# +# split the data in x into 3 groups: +# those for which y <= lower quartile +# those between lower and upper quartile +# those >= upper quartile +# +# IF split.val CONTAINS TWO VALUES, SPLIT THE DATA ACCORDING TO +# THE VALUES SPECIFIED. +# +if(!is.data.frame(x))x=as.matrix(x) +if(is.null(split.val)){ +v=idealf(y) +flag1=(y<=v$ql) +flag2=(y>=v$qu) +} +if(!is.null(split.val)){ +flag1=(y<=split.val[1]) +flag2=(y>=split.val[2]) +} +flag3=as.logical(as.numeric(!flag1)*as.numeric(!flag2)) +d1=x[flag1,] +d2=x[flag2,] +d3=x[flag3,] +list(lower=d1,middle=d3,upper=d2) +} +cohen2xi<-function(delta){ +xi=sqrt((2*delta^2)/(4+delta^2)) +xi +} +xi2cohen<-function(xi){ +delta=sqrt((4*xi^2)/(2-xi^2)) +delta +} + +cid<-function(x,y,alpha=.05,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab=""){ +# +# For two independent groups, +# compute a confidence interval for P(XY)-P(X10^6)stop('Use bmp with a large sample size. If using rimul, use ribmp instead') +m<-outer(x,y,FUN="-") +msave<-m +m<-sign(m) +d<-mean(m) +phat<-(1-d)/2 +flag=TRUE +if(phat==0 || phat==1)flag=FALSE +q0<-sum(msave==0)/length(msave) +qxly<-sum(msave<0)/length(msave) +qxgy<-sum(msave>0)/length(msave) +c.sum<-matrix(c(qxly,q0,qxgy),nrow=1,ncol=3) +dimnames(c.sum)<-list(NULL,c("P(XY)")) +if(flag){ +sigdih<-sum((m-d)^2)/(length(x)*length(y)-1) +di<-NA +for (i in 1:length(x))di[i]<-sum(x[i]>y)/length(y)-sum(x[i]x)/length(x)-sum(y[i]2500){ +print("Product of sample sizes exceeds 2500.") +print("Execution time might be high when using pop=0 or 1") +print("If this is case, might consider changing the argument pop") +}} +if(pop==0)akerd(as.vector(msave),xlab=xlab,ylab=ylab) +if(pop==1)rdplot(as.vector(msave),fr=fr,xlab=xlab,ylab=ylab) +if(pop==2)kdplot(as.vector(msave),rval=rval,xlab=xlab,ylab=ylab) +if(pop==3)boxplot(as.vector(msave)) +if(pop==4)stem(as.vector(msave)) +if(pop==5)hist(as.vector(msave),xlab=xlab) +if(pop==6)skerd(as.vector(msave)) +} +if(flag)pci=c((1-cu)/2,(1-cl)/2) +if(!flag){ +pci=bci$ci +cl=1-2*pci[2] +cu=1-2*pci[1] +} +list(n1=length(x),n2=length(y),cl=cl,cu=cu,d=d,sqse.d=sh,phat=phat,summary.dvals=c.sum,ci.p=pci) +} + + +cidv2<-function(x,y,alpha=.05,plotit=FALSE,pop=0,fr=.8,rval=15,xlab='',ylab=''){ +# +# p-value for Cliff's analog of WMW test +# +# To compare the lower and upper quantiles of the distribution of D=X-Y, +# use cbmhd. +# +if(length(x)*length(y)>10^6)stop('Use bmp with a large sample size.') +nullval<-0 +ci<-cid(x,y,alpha=alpha,plotit=plotit,pop=pop,fr=fr,rval=rval) +FLAG=TRUE +if(ci$phat==0 || ci$phat==1)FLAG=FALSE +if(FLAG){ +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-cid(x,y,alpha=alph[i],plotit=FALSE) +if(chkit[[3]]>nullval || chkit[[4]]nullval || chkit[[4]]nullval || chkit[[4]]Y)')) +if(!flag){ +nm=max(c(length(x),length(y))) +if(phat==1)A=binomcipv(nm,nm,alpha=alpha) +if(phat==0)A=binomcipv(0,nm,alpha=alpha) +ci.p=A$ci +sig=A$p.value +} + +if(plotit){ +msave<-outer(x,y,FUN='-') +if(pop==0){ +if(length(x)*length(y)>2500){ +print('Product of sample sizes exceeds 2500.') +print('Execution time might be high when plotting and when using pop=1') +print('If this is case, might consider changing the argument pop or using plotit=F') +} +akerd(as.vector(msave),fr=fr) +} +if(pop==1)rdplot(as.vector(msave),fr=fr,xlab=xlab,ylab=ylab) +if(pop==2)kdplot(as.vector(msave),rval=rval,xlab=xlab,ylab=ylab) +if(pop==3)boxplot(as.vector(msave)) +if(pop==4)stem(as.vector(msave)) +if(pop==5)hist(as.vector(msave)) +if(pop==6)skerd(as.vector(msave),xlab=xlab,ylab=ylab) +} +list(n1=n1,n2=n2,test.stat=bmtest,phat=phat,dhat=dhat,s.e.=se/N,p.value=sig,ci.p=ci.p,df=df,summary.dval=dval) +} + +ribmp<-function(J,K,x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4){ +# +# Rank-based multiple comparisons for all interactions +# in J by K design. The method is based on an +# extension of Cliff's heteroscedastic technique for +# handling tied values and the Patel-Hoel definition of no interaction. +# +# The familywise type I error probability is controlled by using +# a critical value from the Studentized maximum modulus distribution. +# +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# The default value for alpha is .05. Any other value results in using +# alpha=.01. +# +# Argument grp can be used to rearrange the order of the data. +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +CCJ<-(J^2-J)/2 +CCK<-(K^2-K)/2 +CC<-CCJ*CCK +test<-matrix(NA,CC,8) +test.p<-matrix(NA,CC,7) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +} +mat<-matrix(grp,ncol=K,byrow=TRUE) +dimnames(test)<-list(NULL,c("Factor A","Factor A","Factor B","Factor B","delta","ci.lower","ci.upper","p.value")) +jcom<-0 +crit<-smmcrit(200,CC) +if(alpha!=.05)crit<-smmcrit01(200,CC) +alpha<-1-pnorm(crit) +for (j in 1:J){ +for (jj in 1:J){ +if (j < jj){ +for (k in 1:K){ +for (kk in 1:K){ +if (k < kk){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-jj +test[jcom,3]<-k +test[jcom,4]<-kk +temp1<-bmp(x[[mat[j,k]]],x[[mat[j,kk]]],plotit=FALSE,alpha=alpha) +temp2<-bmp(x[[mat[jj,k]]],x[[mat[jj,kk]]],plotit=FALSE,alpha=alpha) +delta<-temp1$phat-temp2$phat +sqse<-temp1$s.e.^2.+temp2$s.e.^2 +test[jcom,5]<-delta +test[jcom,6]<-delta-crit*sqrt(sqse) +test[jcom,7]<-delta+crit*sqrt(sqse) +test[jcom,8]=2*(1-pnorm(abs((delta)/sqrt(sqse)))) +}}}}}} +list(test=test) +} + +adjboxout<-function(x){ +# +# determine outliers using adjusted boxplot rule based on the +# medcouple +# +x=elimna(x) +n=length(x) +MC=mcskew(x) +val=idealf(x) +iqr=val$qu-val$ql +if(MC>=0){ +bot=val$ql-1.5*exp(0-4*MC)*iqr +top=val$qu+1.5*exp(3*MC)*iqr +} +if(MC<0){ +bot=val$ql-1.5*exp(0-3*MC)*iqr +top=val$qu+1.5*exp(4*MC)*iqr +} +flag=rep(F,length(x)) +fl=(xtop) +flag[fl]=T +flag[fu]=T +vec<-c(1:n) +outid<-NULL +if(sum(flag)>0)outid<-vec[flag] +keep<-vec[!flag] +outval<-x[flag] +keep=x[!flag] +list(out.val=outval,out.id=outid,keep=keep,cl=bot,cu=top) +} + +Mreglde.sub<-function(x,B){ +n=x[1] +ncx=x[2] +ncy=x[3] +nxx=n*ncx +nyy=n*ncy +ncx1=ncx+1 +B=matrix(B,nrow=ncx1,ncol=ncy) +iu=nxx+3 +xm=matrix(x[4:iu],ncol=ncx) +il=iu+1 +ym=matrix(x[il:length(x)],ncol=ncy) +ainit=B[1:ncy] +il=ncy+1 +Binit=matrix(B[il:length(B)],nrow=ncx,ncol=ncy) +yhat=matrix(0,nrow=n,ncol=ncy) +for(i in 1:n){ +z=as.matrix(xm[i,]) +yhat[i,]=t(Binit)%*%z +} +yhat=t(t(yhat)+ainit) +res=ym-yhat +res=sum(sqrt(apply(res^2,1,sum))) +res +} + +pbtrmcp<-function(x,alpha=.05,nboot=NA,grp=NA,con=0,bhop=FALSE,tr=.2,SEED=TRUE){ +# +# Multiple comparisons for J independent groups based on trimmed means. +# using a percentile bootstrap method +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# + +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are allowed. +# +stop('Old function for trimmed means. Use bmcppb. (The function tmcppb gives the same results as bmcppb)') +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +mvec[j]<-tmean(temp,tr=tr) +} +nmax=max(tempn) +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(!bhop){ +if(alpha==.05){ +dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvec[1]<-alpha/2 +} +dvec<-2*dvec +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(j in 1:J){ +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,tmean,tr=tr) # Bootstrapped values for jth group +} +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +test[d]<-sum(bcon[d,]>0)/nboot +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,6) +dimnames(output)<-list(NULL,c("con.num","psihat","sig.test","sig.crit","ci.lower","ci.upper")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-tvec[ic,] +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} +mcp3atm<-function(J,K,L, x,tr=.2,con=0,alpha=.05,grp=NA,op=FALSE,pr=TRUE){ +# +# Do all pairwise comparisons of +# main effects for Factor A and B and C and all interactions +# based on trimmed means +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # +if(is.data.frame(x))x=as.matrix(x) + JKL <- J*K*L + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp[1])) { + yy <- x + x<-list() + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") + for(j in 1:JKL) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] # Remove missing values + } + # + + if(JKL != length(x)) + warning("The number of groups does not match the number of contrast coefficients.") +for(j in 1:JKL){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} + # Create the three contrast matrices +temp<-con3way(J,K,L) +conA<-temp$conA +conB<-temp$conB +conC<-temp$conC +conAB<-temp$conAB +conAC<-temp$conAC +conBC<-temp$conBC +conABC<-temp$conABC +if(!op){ +Factor.A<-lincon(x,con=conA,tr=tr,alpha=alpha,pr=pr) +Factor.B<-lincon(x,con=conB,tr=tr,alpha=alpha,pr=pr) +Factor.C<-lincon(x,con=conC,tr=tr,alpha=alpha,pr=pr) +Factor.AB<-lincon(x,con=conAB,tr=tr,alpha=alpha,pr=pr) +Factor.AC<-lincon(x,con=conAC,tr=tr,alpha=alpha,pr=pr) +Factor.BC<-lincon(x,con=conBC,tr=tr,alpha=alpha,pr=pr) +Factor.ABC<-lincon(x,con=conABC,tr=tr,alpha=alpha,pr=pr) +} +All.Tests<-NA +if(op){ +Factor.A<-NA +Factor.B<-NA +Factor.C<-NA +Factor.AB<-NA +Factor.AC<-NA +Factor.BC<-NA +Factor.ABC<-NA +con<-cbind(conA,conB,conB,conAB,conAC,conBC,conABC) +All.Tests<-lincon(x,con=con,tr=tr,alpha=alpha,,pr=pr) +} +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, +Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, +Factor.ABC=Factor.ABC,All.Tests=All.Tests,conA=conA,conB=conB,conC=conC, +conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC) +} + +bbbmcp=mcp3atm + +mcp3med<-function(J,K,L, x,tr=.2,con=0,alpha=.05,grp=NA,op=FALSE){ +# +# Do all pairwise comparisons of +# main effects for Factor A and B and C and all interactions +# based on trimmed means +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # +if(is.data.frame(x))x=as.matrix(x) + JKL <- J*K*L + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp[1])) { + yy <- x + x<-list() + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") + for(j in 1:JKL) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] # Remove missing values + } + # + + if(JKL != length(x)) + warning("The number of groups does not match the number of contrast coefficients.") +for(j in 1:JKL){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} + # Create the three contrast matrices +temp<-con3way(J,K,L) +conA<-temp$conA +conB<-temp$conB +conC<-temp$conC +conAB<-temp$conAB +conAC<-temp$conAC +conBC<-temp$conBC +conABC<-temp$conABC +if(!op){ +Factor.A<-msmed(x,con=conA,alpha=alpha) +Factor.B<-msmed(x,con=conB,alpha=alpha) +Factor.C<-msmed(x,con=conC,alpha=alpha) +Factor.AB<-msmed(x,con=conAB,alpha=alpha) +Factor.AC<-msmed(x,con=conAC,alpha=alpha) +Factor.BC<-msmed(x,con=conBC,alpha=alpha) +Factor.ABC<-msmed(x,con=conABC,alpha=alpha) +} +All.Tests<-NA +if(op){ +Factor.A<-NA +Factor.B<-NA +Factor.C<-NA +Factor.AB<-NA +Factor.AC<-NA +Factor.BC<-NA +Factor.ABC<-NA +con<-cbind(conA,conB,conB,conAB,conAC,conBC,conABC) +All.Tests<-msmed(x,con=con,alpha=alpha) +} +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, +Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, +Factor.ABC=Factor.ABC,All.Tests=All.Tests,conA=conA,conB=conB,conC=conC, +conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC) +} + +bbtrim<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,nboot=600,alpha=.05,pr=FALSE){ +# +# Perform a J by K anova using trimmed means with +# for independent groups using a bootstrap-t method +# +# tr=.2 is default trimming +# +# +# The R variable x is assumed to contain the raw +# data stored in list mode or a matrix with JK columns. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# data[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[2K] is level 2,K, etc. +# +# It is assumed that data has length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +if(is.list(x))x<-elimna(matl(x)) +if(is.matrix(x))x<-elimna(x) +data<-x +if(is.matrix(data))data<-listm(data) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups stored in x is") +print(length(data)) +print("Warning: These two values are not equal") +} +if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") +temp=con2way(J,K) +conA<-temp$conA +conB<-temp$conB +conAB<-temp$conAB +Factor.A<-linconb(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +Factor.B<-linconb(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +Factor.AB<-linconb(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,pr=pr) +} + +bbbtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,nboot=600,pr=FALSE){ +# +# Perform three-way anova, independent groups, based on trimmed means +# +# That is, there are three factors with a total of JKL independent groups. +# +# A bootstrap-t method is used to perform multiple comparisons +# The variable data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of all three factors: level 1,1,1. +# data[[2]] is assumed to contain the data for level 1 of the +# first two factors and level 2 of the third factor: level 1,1,2 +# data[[L]] is the data for level 1,1,L +# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. +# data[[KL+1]] is level 2,1,1, etc. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JKL, the total number of +# groups being tested. +# +if(is.list(data))data=listm(elimna(matl(data))) +if(is.matrix(data))data=listm(elimna(data)) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in data is") +print(length(data)) +print("Warning: These two values are not equal") +} +x=data +temp=con3way(J,K,L) +conA<-temp$conA +conB<-temp$conB +conC<-temp$conC +conAB<-temp$conAB +conAC<-temp$conAC +conBC<-temp$conBC +conABC=temp$conABC +Factor.A<-linconb(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +Factor.B<-linconb(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +Factor.C<-linconb(x,con=conC,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +Factor.AB<-linconb(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +Factor.AC<-linconb(x,con=conAC,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +Factor.BC<-linconb(x,con=conBC,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +Factor.ABC<-linconb(x,con=conABC,tr=tr,alpha=alpha,nboot=nboot,pr=pr) +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, +Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, +Factor.ABC=Factor.ABC,pr=pr) +} + + +pb2trmcp<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,nboot=NA,alpha=.05,SEED=TRUE,pr=TRUE, +bhop=FALSE){ +# +# Perform a J by K anova using trimmed means with +# for two independent groups using a bootstrap-t method +# +# tr=.2 is default trimming +# +# +# The R variable data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of both factors: level 1,1. +# data[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# data[[K]] is the data for level 1,K +# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. +# +# It is assumed that data has length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +if(SEED)set.seed(2) +if(is.list(x))x<-elimna(matl(x)) +if(is.matrix(x))x<-elimna(x) +data<-x +if(is.matrix(data))data<-listm(data) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups stored in x is") +print(length(data)) +print("Warning: These two values are not equal") +} +if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") +temp=con2way(J,K) +conA<-temp$conA +conB<-temp$conB +conAB<-temp$conAB +if(pr)print("Taking bootstrap samples") +Factor.A<-pbtrmcp(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) +Factor.B<-pbtrmcp(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) +Factor.AB<-pbtrmcp(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,bhop=bhop,SEED=FALSE) +} + + + +pb3trmcp<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,nboot=NA, +SEED=TRUE,bhop=FALSE){ +# +# Multiple comparisons for a three-way anova, independent groups, +# based on trimmed means +# +# That is, there are three factors with a total of JKL independent groups. +# +# A percentile bootstrap method is used to perform multiple comparisons +# The variable data is assumed to contain the raw +# data stored in list mode. data[[1]] contains the data +# for the first level of all three factors: level 1,1,1. +# data][2]] is assumed to contain the data for level 1 of the +# first two factors and level 2 of the third factor: level 1,1,2 +# data[[L]] is the data for level 1,1,L +# data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L. +# data[[KL+1]] is level 2,1,1, etc. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that data has length JKL, the total number of +# groups being tested. +# +if(SEED)set.seed(2) +if(is.list(data))data=listm(elimna(matl(data))) +if(is.matrix(data))data=listm(elimna(data)) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in data is") +print(length(data)) +print("Warning: These two values are not equal") +} +temp=con3way(J,K,L) +conA<-temp$conA +conB<-temp$conB +conC<-temp$conC +conAB<-temp$conAB +conAC<-temp$conAC +conBC<-temp$conBC +conABC=temp$conABC +Factor.A<-pbtrmcp(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.B<-pbtrmcp(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.C<-pbtrmcp(x,con=conC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.AB<-pbtrmcp(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.AC<-pbtrmcp(x,con=conAC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.BC<-pbtrmcp(x,con=conBC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.ABC<-pbtrmcp(x,con=conABC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop) +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, +Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, +Factor.ABC=Factor.ABC) +} + + +med2mcp<-function(J,K,x,grp=c(1:p),p=J*K,nboot=NA,alpha=.05,SEED=TRUE,pr=TRUE, +bhop=FALSE){ +# +# Perform multiple comparisons for J by K anova using medians with +# using a percentile bootstrap method +# +# +# The R variable data is assumed to contain the raw +# data stored in a matrix or in list mode. +# If stored in list mode, data[[1]] contains the data +# for the first level of both factors: level 1,1. +# data[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# data[[K]] is the data for level 1,K +# data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc. +# +# It is assumed that data has length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +if(SEED)set.seed(2) +if(is.list(x))x<-elimna(matl(x)) +if(is.matrix(x))x<-elimna(x) +data<-x +if(is.matrix(data))data<-listm(data) +if(!is.list(data))stop("Data are not stored in list mode or a matrix") +if(p!=length(data)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups stored in x is") +print(length(data)) +print("Warning: These two values are not equal") +} +if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.") +temp=con2way(J,K) +conA<-temp$conA +conB<-temp$conB +conAB<-temp$conAB +if(pr)print("Taking bootstrap samples") +Factor.A<-medpb(x,con=conA,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) +Factor.B<-medpb(x,con=conB,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) +Factor.AB<-medpb(x,con=conAB,alpha=alpha,nboot=nboot,bhop=bhop,SEED=FALSE) +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,bhop=bhop,SEED=FALSE) +} + + + + med3mcp<-function(J,K,L,x,grp=c(1:p),alpha=.05,p=J*K*L,nboot=NA, +SEED=TRUE,bhop=FALSE){ +# +# Multiple comparisons for a three-way anova, independent groups, +# based on medians using a percentile bootstrap method +# +# That is, there are three factors with a total of JKL independent groups. +# +# The variable x is assumed to contain the raw +# x stored in a matrix or in list mode. +# If in list mode, x[[1]] contains the x +# for the first level of all three factors: level 1,1,1. +# x][2]] is assumed to contain the x for level 1 of the +# first two factors and level 2 of the third factor: level 1,1,2 +# x[[L]] is the x for level 1,1,L +# x[[L+1]] is the x for level 1,2,1. x[[2L]] is level 1,2,L. +# x[[KL+1]] is level 2,1,1, etc. +# +# It is assumed that x has length JKL, the total number of +# groups being tested. +# +if(SEED)set.seed(2) +if(is.list(x))x=listm(elimna(matl(x))) +if(is.matrix(x))x=listm(elimna(x)) +if(!is.list(x))stop("x are not stored in list mode or a matrix") +if(p!=length(x)){ +print("The total number of groups, based on the specified levels, is") +print(p) +print("The number of groups in x is") +print(length(x)) +print("Warning: These two values are not equal") +} +temp=con3way(J,K,L) +conA<-temp$conA +conB<-temp$conB +conC<-temp$conC +conAB<-temp$conAB +conAC<-temp$conAC +conBC<-temp$conBC +conABC=temp$conABC +Factor.A<-medpb(x,con=conA,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.B<-medpb(x,con=conB,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.C<-medpb(x,con=conC,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.AB<-medpb(x,con=conAB,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.AC<-medpb(x,con=conAC,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.BC<-medpb(x,con=conBC,alpha=alpha,nboot=nboot,bhop=bhop) +Factor.ABC<-medpb(x,con=conABC,alpha=alpha,nboot=nboot,bhop=bhop) +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, +Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, +Factor.ABC=Factor.ABC) +} + +wmwaov<-function(x,est=median,nboot=500,MC=FALSE,SEED=TRUE,MM=FALSE){ +# +# Extension of WMW to J groups +# i.e., use p=P(Xdv[1:nboot])/nboot-.5*sum(dv[bplus]==dv[1:nboot])/nboot +p.value +} + + +wincov<-function(m,tr=.2){ +m=winall(m,tr=tr)$cov +m +} + +mgvreg<-function(x,y,regfun=tsreg,cov.fun=rmba,se=TRUE,varfun=pbvar,corfun=pbcor, +SEED=TRUE){ +# +# Do regression on points not labled outliers +# by the MGV method. +# (This function replaces an older version of mgvreg as of 11/6/06) +# +# SEED=T so that results from outmgv are always duplicated using the same data +# +# In contrast to the old version, +# when calling outmgv, center of data is determined via +# the measure of location corresponding to cov.fun, which defaults +# to the median ball algorithm (MBA) +# +x=as.matrix(x) +m<-cbind(x,y) +m<-elimna(m) # eliminate any rows with missing data +ivec<-outmgv(m,plotit=FALSE,cov.fun=cov.fun,SEED=SEED)$keep +np1<-ncol(x)+1 +y=m[ivec,np1] +x=m[ivec,1:ncol(x)] +coef<-regfun(x,y)$coef +vec<-rep(1,length(y)) +residuals<-y-cbind(vec,x)%*%coef +stre=NULL +yhat<-y-residuals +e.pow<-varfun(yhat)/varfun(y) +if(!is.na(e.pow)){ +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +stre=sqrt(e.pow) +} +list(coef=coef,residuals=residuals,Strength.Assoc=stre,Explanatory.Power=e.pow) +} +opregpbMC<-function(x,y,nboot=1000,alpha=.05,om=TRUE,ADJ=TRUE,cop=3,SEED=TRUE, +nullvec=rep(0,ncol(x)+1),plotit=TRUE,opdis=2,gval=sqrt(qchisq(.95,ncol(x)+1))){ +# +# Same as opregpb, only this function takes advantage of a multi-core +# processor assuming one is availabe and that the R package +# multicore has been installed. +# +# generate bootstrap estimates +# use projection-type outlier detection method followed by +# TS regression. +# +# om=T and ncol(x)>1, means an omnibus test is performed, +# otherwise only individual tests of parameters are performed. +# +# opdis=2, means that Mahalanobis distance is used +# opdis=1, means projection-type distance is used +# +# gval is critical value for projection-type outlier detection +# method +# +# ADJ=T, Adjust p-values as described in Section 11.1.5 of the text. +# +if(SEED)set.seed(2) +library(parallel) +x<-as.matrix(x) +m<-cbind(x,y) +p1<-ncol(x)+1 +m<-elimna(m) # eliminate any rows with missing data +x<-m[,1:ncol(x)] +x<-as.matrix(x) +y<-m[,p1] +if(nrow(x)!=length(y))stop("Sample size of x differs from sample size of y") +if(!is.matrix(x))stop("Data should be stored in a matrix") +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,regboot,x,y,regfun=opregMC) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +# using Hochberg method +bvec<-t(bvec) +dvec<-alpha/(c(1:ncol(x))) +test<-NA +icl0<-round(alpha*nboot/2) +icl<-round(alpha*nboot/(2*ncol(x))) +icu0<-nboot-icl0 +icu<-nboot-icl +output<-matrix(0,p1,6) +dimnames(output)<-list(NULL,c("Param.","p.value","crit.p.value", +"ci.lower","ci.upper","s.e.")) +pval<-NA +for(i in 1:p1){ +output[i,1]<-i-1 +se.val<-var(bvec[,i]) +temp<-sort(bvec[,i]) +output[i,6]<-sqrt(se.val) +if(i==1){ +output[i,4]<-temp[icl0+1] +output[i,5]<-temp[icu0] +} +if(i>1){ +output[i,4]<-temp[icl+1] +output[i,5]<-temp[icu] +} +pval[i]<-sum((temp>nullvec[i]))/length(temp) +if(pval[i]>.5)pval[i]<-1-pval[i] +} +fac<-2 +if(ADJ){ +# Adjust p-value if n<60 +nval<-length(y) +if(nval<20)nval<-20 +if(nval>60)nval<-60 +fac<-2-(60-nval)/40 +} +pval[1]<-2*pval[1] +pval[2:p1]<-fac*pval[2:p1] +output[,2]<-pval +temp2<-order(0-pval[2:p1]) +zvec<-dvec[1:ncol(x)] +sigvec<-(test[temp2]>=zvec) +output[temp2+1,3]<-zvec +output[1,3]<-NA +output[,2]<-pval +om.pval<-NA +temp<-opregMC(x,y)$coef +if(om && ncol(x)>1){ +temp2<-rbind(bvec[,2:p1],nullvec[2:p1]) +if(opdis==1)dis<-pdisMC(temp2,center=temp[2:p1]) +if(opdis==2){ +cmat<-var(bvec[,2:p1]-apply(bvec[,2:p1],2,mean)+temp[2:p1]) +dis<-mahalanobis(temp2,temp[2:p1],cmat) +} +om.pval<-sum((dis[nboot+1]<=dis[1:nboot]))/nboot +} +# do adjusted p-value +nval<-length(y) +if(nval<20)nval<-20 +if(nval>60)nval<-60 +adj.pval<-om.pval/2+(om.pval-om.pval/2)*(nval-20)/40 +if(ncol(x)==2 && plotit){ +plot(bvec[,2],bvec[,3],xlab="Slope 1",ylab="Slope 2") +temp.dis<-order(dis[1:nboot]) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],2:3] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +list(output=output,om.pval=om.pval,adj.om.pval=adj.pval) +} + + +opregMC<-function(x,y,regfun=tsreg,cop=3,fast=FALSE,pr=TRUE,prres=FALSE,STAND=TRUE,xout=FALSE){ +# +# Do regression on points not labled outliers +# using projection-type outlier detection method +# +# Note: argument xout is not relevant here, but is included to avoid conflicts when using regci. +# +library(parallel) +x<-as.matrix(x) +m<-cbind(x,y) +m<-elimna(m) # eliminate any rows with missing data +ivec<-outproMC(m,plotit=FALSE,cop=cop,fast=FALSE,pr=FALSE,STAND=STAND)$keep +np1<-ncol(x)+1 +coef<-regfun(m[ivec,1:ncol(x)],m[ivec,np1])$coef +vec<-rep(1,length(y)) +residuals<-y-cbind(vec,x)%*%coef +if(fast && pr){ +print("Intercept, followed by slopes:") +print(coef) +if(prres){ +print("Residuals:") +print(residuals) +}} +list(coef=coef,residuals=residuals) +} +twocor<-function(x1,y1,x2,y2,corfun=pbcor,nboot=599,alpha=.05,SEED=TRUE,...){ +# +# Compute a .95 confidence interval for the +# difference between two correlation coefficients +# corresponding to two independent groups. +# +# the function corfun is any R function that returns a +# correlation coefficient in corfun$cor. The functions pbcor and +# wincor follow this convention. +# +# For Pearson's correlation, use +# the function twopcor instead. +# +# The default number of bootstrap samples is nboot=599 +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +bvec1<-apply(data1,1,corbsub,x1,y1,corfun,...) # A 1 by nboot matrix. +data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) +bvec2<-apply(data2,1,corbsub,x2,y2,corfun,...) # A 1 by nboot matrix. +bvec<-bvec1-bvec2 +bsort<-sort(bvec) +term<-alpha/2 +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +corci<-1 +corci[1]<-bsort[ilow] +corci[2]<-bsort[ihi] +pv<-(sum(bvec<0)+.5*sum(bvec==0))/nboot +pv=2*min(c(pv,1-pv)) +r1<-corfun(x1,y1)$cor +r2<-corfun(x2,y2)$cor +reject<-"NO" +if(corci[1]>0 || corci[2]<0)reject="YES" +list(r1=r1,r2=r2,ci.dif=corci,p.value=pv) +} + + +rm3mcp<-function(J,K,L, x,tr=.2,alpha=.05,dif=TRUE,op=FALSE,grp=NA){ +# +# MULTIPLE COMPARISONS FOR A 3-WAY within-by-within-by within ANOVA +# Do all multiple comparisons associated with +# main effects for Factor A and B and C and all interactions +# based on trimmed means +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # +if(is.data.frame(x))x=as.matrix(x) + JKL <- J*K*L + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp[1])) { + yy <- x + x<-list() + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") + for(j in 1:JKL) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] # Remove missing values + } + # + + if(JKL != length(x)) + warning("The number of groups does not match the number of contrast coefficients.") +for(j in 1:JKL){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} + # Create the three contrast matrices +temp<-con3way(J,K,L) +conA<-temp$conA +conB<-temp$conB +conC<-temp$conC +conAB<-temp$conAB +conAC<-temp$conAC +conBC<-temp$conBC +conABC<-temp$conABC +Factor.A<-rmmcp(x,con=conA,tr=tr,alpha=alpha,dif=dif) +Factor.B<-rmmcp(x,con=conB,tr=tr,alpha=alpha,dif=dif) +Factor.C<-rmmcp(x,con=conC,tr=tr,alpha=alpha,dif=dif) +Factor.AB<-rmmcp(x,con=conAB,tr=tr,alpha=alpha,dif=dif) +Factor.AC<-rmmcp(x,con=conAC,tr=tr,alpha=alpha,dif=dif) +Factor.BC<-rmmcp(x,con=conBC,tr=tr,alpha=alpha,dif=dif) +Factor.ABC<-rmmcp(x,con=conABC,tr=tr,alpha=alpha,dif=dif) +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, +Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, +Factor.ABC=Factor.ABC,conA=conA,conB=conB,conC=conC, +conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC) +} + +wwwmcp=rm3mcp + +tmcppb<-function(x,alpha=.05,nboot=NA,grp=NA,est=tmean,con=0,bhop=FALSE,SEED=TRUE,...){ +# +# Multiple comparisons for J independent groups using trimmed means +# +# A percentile bootstrap method with Rom's method is used. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are allowed. +# +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +#print(paste("Working on group ",j)) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group +} +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +tv<-sum(bcon[d,]==0)/nboot +test[d]<-sum(bcon[d,]>0)/nboot+.5*tv +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-tvec[ic,] +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +#linconpb=tmcppb + +bbmcppb<-function(J, K, x, est=tmean,JK = J*K, + alpha = 0.05, grp =c(1:JK), nboot = 2000, bhop=FALSE,SEED = TRUE,...) +{ +# +# BETWEEN-BY-BETWEEN DESIGN +# + # A percentile bootstrap for multiple comparisons + # for all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + +if(!is.null(dim(x)))x= listm(x) +x=elimna(x) +n=lapply(x,length) +con=con2way(J,K) +A=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conA, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +B=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +AB=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conAB, + alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...) +list(n=n,Fac.A=A,Fac.B=B,Fac.AB=AB) +} + + bbmcppb.sub<-function(J, K, x, est=tmean, JK = J*K, con = 0, + alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=FALSE,SEED = TRUE, ...){ +# +# between-by-between design +# + # + # A percentile bootstrap for multiple comparisons among + # all main effects and interactions + # The analysis is done by generating bootstrap samples and + # using an appropriate linear contrast. + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # +# +# JK independent groups +# + + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} +ncon=ncol(con) + p <- J*K +JK=p +if(p>length(x))stop('JK is less than the Number of groups') +JK=J*K + data <- list() +xx=list() + for(j in 1:length(x)) { +xx[[j]]=x[[grp[j]]] # save input data +# # Now have the groups in proper order. + } +for(j in 1:p){ +xx[[j]]=elimna(xx[[j]]) +} +x=xx + crit=alpha/2 + icl<-round(crit*nboot)+1 +icu<-nboot-icl + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + # Next determine the n_j values + testA = NA + bsam = list() + bdat = list() + aboot=matrix(NA,nrow=nboot,ncol=ncol(con)) +tvec=NA +tvec=linhat(x,con,est=est,...) + for(ib in 1:nboot) { + for(j in 1:JK) { +nv=length(x[[j]]) +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +bsam[[j]] = x[[j]][bdat[[j]]] +} +aboot[ib,]=linhat(bsam,con=con,est=est,...) +} +pbA=NA +for(j in 1:ncol(aboot)){ +pbA[j]=mean(aboot[,j]>0) +pbA[j]=2*min(c(pbA[j],1-pbA[j])) +} +# Determine critical values +outputA<-matrix(0,ncol(con),6) +dimnames(outputA)<-list(NULL,c('con.num','psihat','p.value','p.adjust', +'ci.lower','ci.upper')) +test=pbA +temp2<-sort(test) #order(0-test) +outputA[,2]<-tvec +for (ic in 1:ncol(con)){ +outputA[ic,1]<-ic +outputA[ic,3]<-test[ic] +temp<-sort(aboot[,ic]) +outputA[ic,5]<-temp[icl] +outputA[ic,6]<-temp[icu] +} +outputA[,4]=p.adjust(outputA[,3],method='hoch') +outputA +} + +t2waypb<-bbmcppb + +ols.plot.inter<-function(x,y, pyhat = FALSE, eout = FALSE, xout = FALSE, outfun = out, + plotit = TRUE, expand = 0.5, scale = TRUE, xlab = "X", + ylab = "Y", zlab = "", theta = 50, phi = 25, family = "gaussian", + duplicate = "error",ticktype="simple",...){ +# +# Plot regression surface based on the classic interaction model: +# usual product term +# +# x is assumed to be a matrix with two columns (two predictors) +library(akima) +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +if(ncol(x)!=2)stop("x should have two columns") +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:2] +y<-m[,3] +} +xx=cbind(x,x[,1]*x[,2]) +temp=lsfit(xx,y) +fitr=y-temp$residuals +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +persp(fit,theta=theta,phi=phi,expand=expand, +scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +} + +gamplotINT<-function(x,y,pyhat=FALSE,plotit=TRUE,theta=50,phi=25,expand=.5,xout=FALSE, +SCALE=FALSE,zscale=TRUE,eout=FALSE,outfun=out,ticktype="simple",xlab = "X", ylab = "Y", zlab = "",...){ +# +# Plot regression surface, assuming two predictors in +# n by 2 matrix x using gam (generalized additive model) +# Same as gamplot, only a product term is included. +# +if(eout && xout)stop("Not allowed to have eout=xout=T") +x<-as.matrix(x) +if(ncol(x)!=2)stop("x must be an n by 2 matrix") +library(akima) +library(mgcv) +np=ncol(x) +np1=np+1 +m<-elimna(cbind(x,y)) +x<-m[,1:np] +x<-as.matrix(x) +y<-m[,np1] +if(xout){ +flag<-outfun(x,...)$keep +m<-m[flag,] +} +if(eout){ +flag<-outfun(m,...)$keep +m<-m[flag,] +} +x1<-m[,1] +x2<-m[,2] +y<-m[,3] +xrem<-m[,1:2] +n<-nrow(x) +fitr<-fitted(gam(y~s(x1)+s(x2)+s(x1,x2))) +allfit<-fitr +if(plotit){ +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(xrem[i,]==xrem[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] +mkeep<-xrem[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fit,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab, +scale=scale,ticktype=ticktype) +} +m<-"Done" +if(pyhat)m<-allfit +m +} + + + +reg.plot.inter<-function(x,y, regfun=tsreg, + pyhat = FALSE, eout = FALSE, xout = FALSE, outfun = out, + plotit = TRUE, expand = 0.5, scale = TRUE, xlab = "X", + ylab = "Y", zlab = "", theta = 50, phi = 25, family = "gaussian", + duplicate = "error",ticktype="simple",...){ +# +# Plot regression surface based on the classic interaction model: +# usual product term +# +# x is assumed to be a matrix with two columns (two predictors) +library(akima) +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +if(xout){ +p=ncol(x) +p1=p+1 +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} + +if(!scale)print("scale=F. If there is an association, try scale=T") +if(ncol(x)!=2)stop("x should have two columns") +xx=cbind(x,x[,1]*x[,2]) +temp=regfun(xx,y) +fitr=y-temp$residuals +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +persp(fit,theta=theta,phi=phi,expand=expand, +scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +} + +bwrank<-function(J,K,x,grp=c(1:p),p=J*K){ +# +# Between by within rank-based ANOVA +# That is, have a J by K design with J independent levels and K dependent +# measures +# +# x can be a matrix with columns corresponding to groups +# or it can have list mode. +# +# +if(is.data.frame(x))data=as.matrix(x) +if(is.matrix(x))x<-listm(x) +x=x[grp] +xx<-list() +nvec<-NA +alldat<-NA +klow<-1-K +kup<-0 +iall=0 +for (j in 1:J){ +klow<-klow+K +kup<-kup+K +mtemp=elimna(matl(x[klow:kup])) +for(k in 1:K){ +iall=iall+1 +xx[[iall]]=mtemp[,k] +}} +for(j in 1:p){ +alldat<-c(alldat,xx[[j]]) +nvec[j]<-length(xx[[j]]) +} +# +# Check sample sizes +# +nmat<-matrix(nvec,J,K,byrow=TRUE) +for(j in 1:J){ +if(var(nmat[j,]) !=0){ +warning("Number of observations among dependent groups for level",paste(j)," of Factor A are unequal") +print("Matrix of sample sizes:") +print(nmat) +}} +if(sum(is.na(alldat[2:length(alldat)])>0))stop("Missing values not allowed") +rval<-rank(alldat[2:length(alldat)]) +rdd<-mean(rval) # R bar ... +xr<-list() +il<-1-nvec[1] +iu<-0 +for(j in 1:p){ +il<-il+nvec[j] +iu<-iu+nvec[j] +xr[[j]]<-rval[il:iu] +} +v<-matrix(0,p,p) +Ja<-matrix(1,J,J) +Ia<-diag(1,J) +Pa<-Ia-Ja/J +Jb<-matrix(1,K,K) +Ib<-diag(1,K) +Pb<-Ib-Jb/K +cona<-kron(Pa,Ib) +conb<-kron(Ia,Pb) +conab<-kron(Pa,Pb) +for(k in 1:K){ +temp<-x[[k]] +bigm<-matrix(temp,ncol=1) +jk<-k +for (j in 2:J){ +jk<-jk+K +tempc<-matrix(x[[jk]],ncol=1) +bigm<-rbind(bigm,tempc) +temp<-c(temp,x[[jk]]) +}} +N<-length(temp) +rbbd<-NA +for(k in 1:K){ +bigm<-xr[[k]] +jk<-k +for (j in 2:J){ +jk<-jk+K +bigm<-c(bigm,xr[[jk]]) +}} +rbjk<-matrix(NA,nrow=J,ncol=K) #R_.jk +ic<-0 +for (j in 1:J){ +for(k in 1:K){ +ic<-ic+1 +rbjk[j,k]<-mean(xr[[ic]]) # R bar_.jk +}} +for(k in 1:K)rbbd[k]<-mean(rbjk[,k]) +rbj<-1 # R_.j. +sigv<-0 +njsam<-0 # n_j +icc<-1-K +ivec<-c(1:K)-K +for (j in 1:J){ +icc<-icc+K +ivec<-ivec+K +temp<-xr[ivec[1]:ivec[K]] +temp<-matl(temp) +tempv<-apply(temp,1,mean) +njsam[j]<-nvec[icc] +rbj[j]<-mean(rbjk[j,]) +sigv[j]<-var(tempv) # var of R bar_ij. +} +nv<-sum(njsam) +phat<-(rbjk-.5)/(nv*K) +sv2<-sum(sigv/njsam) +uv<-sum((sigv/njsam)^2) +dv<-sum((sigv/njsam)^2/(njsam-1)) +testA<-J*var(rbj)/sv2 +klow<-1-K +kup<-0 +sv<-matrix(0,nrow=K,ncol=K) +rvk<-NA +for(j in 1:J){ +klow<-klow+K +kup<-kup+K +sel<-c(klow:kup) +m<-matl(xr[klow:kup]) +m<-elimna(m) +xx<-listm(m) +xx<-listm(m) +vsub<-nv*var(m)/(nv*K*nv*K*njsam[j]) +v[sel,sel]<-vsub +sv<-sv+vsub +} +sv<-sv/J^2 +testB<-nv/(nv*K*nv*K*sum(diag(Pb%*%sv)))*sum((rbbd-mean(rbbd))^2) +testAB<-0 +for (j in 1:J){ +for (k in 1:K){ +testAB<-testAB+(rbjk[j,k]-rbj[j]-rbbd[k]+rdd)^2 +}} +testAB<-nv*testAB/(nv*K*nv*K*sum(diag(conab%*%v))) +nu1B<-(sum(diag(Pb%*%sv)))^2/sum((diag(Pb%*%sv%*%Pb%*%sv))) +nu1A<-(J-1)^2/(1+J*(J-2)*uv/sv2^2) +nu2A<-sv2^2/dv +nu1AB<-(sum(diag(conab%*%v)))^2/sum(diag(conab%*%v%*%conab%*%v)) +sig.A<-1-pf(testA,nu1A,nu2A) +sig.B<-1-pf(testB,nu1B,1000000) +sig.AB<-1-pf(testAB,nu1AB,1000000) +list(test.A=testA,p.value.A=sig.A,test.B=testB,p.value.B=sig.B,test.AB=testAB, +p.value.AB=sig.AB,avg.ranks=rbjk,rel.effects=phat) +} + + + +rqtest<-function(x,y,qval=.5,nboot=200,alpha=.05,SEED=TRUE,xout=FALSE,outfun=outpro,...){ +# +# Omnibus test when using a quantile regression estimator +# +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +} +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,rqtest.sub,x,y,qval=qval) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +p<-ncol(x) +if(p==1)stop("Use qregci when p=1") +n<-length(y) +np<-p+1 +bvec<-t(bvec) +semat<-var(bvec[,2:np]) + +#temp<-rqfit(x,y,qval=qval)$coef[2:np] +temp<-qreg(x,y,qval=qval)$coef[2:np] +temp<-as.matrix(temp) +test<-t(temp)%*%solve(semat)%*%temp +test<-test*(n-p)/((n-1)*p) +p.value<-1-pf(test,p,n-p) +# Determine adjusted critical level, if possible. +adjusted.alpha=NULL +b1=NULL +if(n<=60){ +if(alpha==.1){ +if(p==2){ +b1<-0-0.001965 +b0<-.2179 +} +if(p==3){ +b1<-0-.003 +b0<-.2814 +} +if(p==4){ +b1<-0-.0058 +b0<-.4478 +} +if(p==5){ +b1<-0-.00896 +b0<-.6373 +} +if(p>=6){ +b1<-0-.0112 +b0<-.7699 +}} +if(alpha==.05){ +if(p==2){ +b1<-0-0.001173 +b0<-.1203 +} +if(p==3){ +b1<-0-.00223 +b0<-.184 +} +if(p==4){ +b1<-0-.00476 +b0<-.3356 +} +if(p==5){ +b1<-0-.0063 +b0<-.425 +} +if(p==6){ +b1<-0-.00858 +b0<-.5648 +}} +if(alpha==.025){ +if(p==2){ +b1<-0-0.00056 +b0<-.05875 +} +if(p==3){ +b1<-0-.00149 +b0<-.1143 +} +if(p==4){ +b1<-0-.00396 +b0<-.2624 +} +if(p==5){ +b1<-0-.00474 +b0<-.3097 +} +if(p==6){ +b1<-0-.0064 +b0<-.4111 +}} +if(alpha==.01){ +if(p==2){ +b1<-0-0.00055 +b0<-.043 +} +if(p==3){ +b1<-0-.00044 +b0<-.0364 +} +if(p==4){ +b1<-0-.0024 +b0<-.1546 +} +if(p==5){ +b1<-0-.00248 +b0<-.159 +} +if(p==6){ +b1<-0-.00439 +b0<-.2734 +}} +if(!is.null(b1))adjusted.alpha<-b1*n+b0 +adjusted.alpha<-max(alpha,adjusted.alpha) +} +list(test.stat=test,p.value=p.value,adjusted.alpha=adjusted.alpha) +} + + +runpd<-function(x,y,pts=x,est=tmean,fr=.8,plotit=TRUE,pyhat=FALSE,nmin=0,scale=TRUE, +expand=.5,xout=FALSE,outfun=out,pr=TRUE,xlab="X1",ylab="X2",zlab="",LP=TRUE, +theta=50,phi=25,duplicate="error",MC=FALSE,ticktype="simple",...){ +# +# running mean using interval method +# Distances from a point are determined using a projection method +# see function pdclose +# +# fr controls amount of smoothing +# tr is the amount of trimming +# x is an n by p matrix of predictors. +# +if(is.list(x))stop("Data should not stored be stored in list mode") +x<-as.matrix(x) +pval<-ncol(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:pval] +x<-as.matrix(x) +y<-xx[,pval+1] +if(xout){ +keepit<-outfun(x,plotit=FALSE)$keep +x<-x[keepit,] +y<-y[keepit] +} +plotit<-as.logical(plotit) +iout<-c(1:nrow(x)) +rmd<-1 # Initialize rmd +nval<-1 +nmat<-pdclose(x,pts,fr=fr,MC=MC) +for(i in 1:nrow(pts))rmd[i]<-est(y[nmat[i,]],...) +for(i in 1:nrow(pts))nval[i]<-sum(nmat[i,]) +if(ncol(x)==2){ +if(plotit){ +library(akima) +fitr<-rmd[nval>nmin] +y<-y[nval>nmin] +x<-x[nval>nmin,] +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +if(plotit){ +if(pr){ +if(!scale)print("With dependence, suggest using scale=T") +} +fitr<-rmd[nval>nmin] +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +if(LP)fitr=lplot(x[iout>=1,],fitr,pyhat=TRUE,pr=FALSE,plotit=FALSE)$yhat +fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +persp(fit,theta=theta,phi=phi,expand=expand, +scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +}}} +if(pyhat)last<-rmd +if(!pyhat)last <- "Done" + last +} + +sppbi<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),nboot=500,SEED=TRUE,pr=TRUE,...){ +# +# A percentile bootstrap for interactions +# in a split-plot design. +# The analysis is done by taking difference scores +# among all pairs of dependent groups and seeing whether +# these differences differ across levels of Factor A. +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +if(pr)print('As of Oct. 2014, argument est defaults to tmean') +library(MASS) + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} + +JK<-J*K +MJ<-(J^2-J)/2 +MK<-(K^2-K)/2 +JMK<-J*MK +Jm<-J-1 +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +jp<-1-K +kv<-0 +kv2<-0 +for(j in 1:J){ +jp<-jp+K +xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) +for(k in 1:K){ +kv<-kv+1 +xmat[,k]<-x[[kv]] +} +xmat<-elimna(xmat) +for(k in 1:K){ +kv2<-kv2+1 +x[[kv2]]<-xmat[,k] +}} +xx<-x +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# Next determine the n_j values +nvec<-NA +jp<-1-K +for(j in 1:J){ +jp<-jp+K +nvec[j]<-length(x[[jp]]) +} +# +# Now take bootstrap samples from jth level +# of Factor A and average K corresponding estimates +# of location. +# +bloc<-matrix(NA,ncol=J,nrow=nboot) +#print("Taking bootstrap samples. Please wait.") +mvec<-NA +it<-0 +for(j in 1:J){ +paste("Working on level ",j," of Factor A") +x<-matrix(NA,nrow=nvec[j],ncol=MK) +# +im<-0 +for(k in 1:K){ +for(kk in 1:K){ +if(k1)bloc<-cbind(bloc,bvec) +} +# +MJMK<-MJ*MK +con<-matrix(0,nrow=JMK,ncol=MJMK) +cont<-matrix(0,nrow=J,ncol=MJ) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j1){ +for(k in 2:MK){ +con1<-push(con1) +con<-cbind(con,con1) +}} +bcon<-t(con)%*%t(bloc) #C by nboot matrix +tvec<-t(con)%*%mvec +tvec<-tvec[,1] +tempcen<-apply(bcon,1,mean) +vecz<-rep(0,ncol(con)) +bcon<-t(bcon) +temp=bcon +for(ib in 1:nrow(temp))temp[ib,]=temp[ib,]-tempcen+tvec +smat<-var(temp) +if(sum(is.na(smat))==0){ +chkrank<-qr(smat)$rank +bcon<-rbind(bcon,vecz) +if(chkrank==ncol(smat))dv<-mahalanobis(bcon,tvec,smat) +if(chkrank0)print('Computational Problem. Try est=tmean or use function spmcpi or tsplitbt') +bplus<-nboot+1 +sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot +list(p.value=sig.level,psihat=tvec,con=con) +} +sppba<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),avg=TRUE,nboot=500,SEED=TRUE, +MC=FALSE,MDIS=FALSE,pr=TRUE,...){ +# +# A percentile bootstrap for main effects +# among independent groups in a split-plot design +# +# avg=T: The analysis is done by averaging K measures of +# location for each level of Factor A, +# and then comparing averages by testing the hypothesis +# that all pairwise differences are equal to zero. +# +# avg=F: The analysis is done by testing whether $K$ equalities are +# simultaneously true. For kth level of Factor B, the kth equality is +# theta_{1k}= ... theta_{Jk}, k=1,...,K. +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# +if(pr)print('As of Oct. 2014 the argument est defaults to tmean') +library(MASS) + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} + +JK<-J*K +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +jp<-1-K +kv<-0 +kv2<-0 +for(j in 1:J){ +jp<-jp+K +xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) +for(k in 1:K){ +kv<-kv+1 +xmat[,k]<-x[[kv]] +} +xmat<-elimna(xmat) +for(k in 1:K){ +kv2<-kv2+1 +x[[kv2]]<-xmat[,k] +} +} +xx<-x +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# Next determine the n_j values +nvec<-NA +jp<-1-K +for(j in 1:J){ +jp<-jp+K +nvec[j]<-length(x[[jp]]) +} +# +# Now take bootstrap samples from jth level +# of Factor A. +# +bloc<-matrix(NA,nrow=J,ncol=nboot) +#print("Taking bootstrap samples. Please wait.") +mvec<-NA +ik<-0 +for(j in 1:J){ +paste("Working on level ",j," of Factor A") +x<-matrix(NA,nrow=nvec[j],ncol=K) +# +for(k in 1:K){ +ik<-ik+1 +x[,k]<-xx[[ik]] +if(!avg)mvec[ik]<-est(xx[[ik]],...) +} +tempv<-apply(x,2,est,...) +data<-matrix(sample(nvec[j],size=nvec[j]*nboot,replace=TRUE),nrow=nboot) +bvec<-matrix(NA,ncol=K,nrow=nboot) +for(k in 1:K){ +temp<-x[,k] +bvec[,k]<-apply(data,1,rmanogsub,temp,est,...) # An nboot by K matrix +} +if(avg){ +mvec[j]<-mean(tempv) +bloc[j,]<-apply(bvec,1,mean) +} +if(!avg){ +if(j==1)bloc<-bvec +if(j>1)bloc<-cbind(bloc,bvec) +} +} +if(avg){ +d<-(J^2-J)/2 +con<-matrix(0,J,d) +id<-0 +Jm<-J-1 +for (j in 1:Jm){ +jp<-j+1 +for(k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +if(!avg){ +MJK<-K*(J^2-J)/2 # NUMBER OF COMPARISONS +JK<-J*K +MJ<-(J^2-J)/2 +cont<-matrix(0,nrow=J,ncol=MJ) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j1){ +for(k in 2:K){ +con1<-push(con1) +con<-cbind(con,con1) +}}} +if(!avg)bcon<-t(con)%*%t(bloc) #C by nboot matrix +if(avg)bcon<-t(con)%*%(bloc) +tvec<-t(con)%*%mvec +tvec<-tvec[,1] +tempcen<-apply(bcon,1,mean) +vecz<-rep(0,ncol(con)) +bcon<-t(bcon) +temp=bcon +for(ib in 1:nrow(temp))temp[ib,]=temp[ib,]-tempcen+tvec +bcon<-rbind(bcon,vecz) +if(!MDIS){ +if(!MC)dv=pdis(bcon,center=tvec,na.rm=FALSE) +if(MC)dv=pdisMC(bcon,center=tvec) +lbcon=length(elimna(bcon)) +bplus<-nboot+1 +if(lbcon=dv[1:nboot])/nboot +list(p.value=sig.level,psihat=tvec,con=con) +} + +outpro<-function(m,gval=NA,center=NA,plotit=TRUE,op=TRUE,MM=FALSE,cop=3, +xlab="VAR 1",ylab="VAR 2",STAND=TRUE,tr=.2,q=.5,pr=TRUE,...){ +# +# Detect outliers using a modification of the +# Stahel-Donoho projection method. +# +# Determine center of data cloud, for each point, +# connect it with center, project points onto this line +# and use distances between projected points to detect +# outliers. A boxplot method is used on the +# projected distances. +# +# plotit=TRUE creates a scatterplot when working with +# bivariate data. +# +# op=T +# means the .5 depth contour is plotted +# based on data with outliers removed. +# +# op=F +# means .5 depth contour is plotted without removing outliers. +# +# MM=F Use interquatile range when checking for outliers +# MM=T uses MAD. +# +# If value for center is not specified, +# there are four options for computing the center of the +# cloud of points when computing projections: +# +# cop=2 uses MCD center +# cop=3 uses median of the marginal distributions. +# cop=4 uses MVE center +# cop=5 uses TBS +# cop=6 uses rmba (Olive's median ball algorithm)# cop=7 uses the spatial (L1) median +# +# args q and tr having are not used by this function. They are included to deal +# with situations where smoothers have optional arguments for q and tr +# +# When using cop=2, 3 or 4, default critical value for outliers +# is square root of the .975 quantile of a +# chi-squared distribution with p degrees +# of freedom. +# +# STAND=T means that marginal distributions are standardized before +# checking for outliers. +# +# Donoho-Gasko (Tukey) median is marked with a cross, +. +# +m<-as.matrix(m) +if(pr){ +if(!STAND){ +if(ncol(m)>1)print("STAND=FALSE. If measures are on different scales, might want to use STAND=TRUE") +}} +library(MASS) +m=elimna(m) +m<-as.matrix(m) +nv=nrow(m) +if(ncol(m)==1){ +dis<-(m-median(m,na.rm=TRUE))^2/mad(m,na.rm=TRUE)^2 +dis<-sqrt(dis) +dis[is.na(dis)]=0 +crit<-sqrt(qchisq(.975,1)) +chk<-ifelse(dis>crit,1,0) +vec<-c(1:nrow(m)) +outid<-vec[chk==1] +keep<-vec[chk==0] +} +if(ncol(m)>1){ +M=m +if(STAND)m=standm(m,est=median,scat=mad) +if(is.na(gval) && cop==1)gval<-sqrt(qchisq(.95,ncol(m))) +if(is.na(gval) && cop!=1)gval<-sqrt(qchisq(.975,ncol(m))) +if(cop==1 && is.na(center[1])){ +if(ncol(m)>2)center<-dmean(m,tr=.5,cop=1) +if(ncol(m)==2){ +tempd<-NA +for(i in 1:nrow(m)) +tempd[i]<-depth(m[i,1],m[i,2],m) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center<-m[flag,] +if(sum(flag)>1)center<-apply(m[flag,],2,mean) +}} +if(cop==2 && is.na(center[1])){ +center<-cov.mcd(m)$center +} +if(cop==4 && is.na(center[1])){ +center<-cov.mve(m)$center +} +if(cop==3 && is.na(center[1])){ +center<-apply(m,2,median) +} +if(cop==5 && is.na(center[1])){ +center<-tbs(m)$center +} +if(cop==6 && is.na(center[1])){ +center<-rmba(m)$center +} +if(cop==7 && is.na(center[1])){ +center<-spat(m) +} +flag<-rep(0, nrow(m)) +outid <- NA +vec <- c(1:nrow(m)) +for (i in 1:nrow(m)){ +B<-m[i,]-center +dis<-NA +BB<-B^2 +bot<-sum(BB) +if(bot!=0){ +for (j in 1:nrow(m)){ +A<-m[j,]-center +temp<-sum(A*B)*B/bot +dis[j]<-sqrt(sum(temp^2)) +} +temp<-idealf(dis) +if(!MM)cu<-median(dis)+gval*(temp$qu-temp$ql) +if(MM)cu<-median(dis)+gval*mad(dis) +outid<-NA +temp2<-(dis> cu) +flag[temp2]<-1 +}} +if(sum(flag) == 0) outid <- NA +if(sum(flag) > 0)flag<-(flag==1) +outid <- vec[flag] +idv<-c(1:nrow(m)) +keep<-idv[!flag] +if(ncol(m)==2){ +if(plotit){ +m=M # plot data using the original scale. +plot(m[,1],m[,2],type="n",xlab=xlab,ylab=ylab) +points(m[keep,1],m[keep,2],pch="*") +if(length(outid)>0)points(m[outid,1],m[outid,2],pch="o") +if(op){ +tempd<-NA +keep<-keep[!is.na(keep)] +mm<-m[keep,] +for(i in 1:nrow(mm))tempd[i]<-depth(mm[i,1],mm[i,2],mm) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center<-mm[flag,] +if(sum(flag)>1)center<-apply(mm[flag,],2,mean) +m<-mm +} +points(center[1],center[2],pch="+") +x<-m +temp<-fdepth(m,plotit=FALSE) +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +}}} +list(n=nv,n.out=length(outid),out.id=outid,keep=keep) +} + +skerd<-function(x,op=TRUE,kernel="gaussian",xlab='X',ylab=''){ +# +# Compute kernel density estimate +# for univariate data using S+ function density +# +# kernel=epanechnikov will use the Epanechnikov kernel. +# +if(!op)temp<-density(x,na.rm=TRUE,width=bandwidth.sj(x,method="dpi"),n=256) +if(op)temp<-density(x) +plot(temp$x,temp$y,type="n",ylab=ylab,xlab=xlab) +lines(temp$x,temp$y) +} + + +bkreg<-function(x,y,kerfun=akerd,pyhat=FALSE,plotit=TRUE,xlab="X",ylab="Y", +zlab="Z",xout=FALSE,outfun=outpro,pr=TRUE,theta=50,phi=25,duplicate="error", +expand=.5,scale=FALSE,ticktype="simple",...){ +# +# Kernel estimator for binary regression. +# (See Signorini and Jones, JASA, 2004, 119-) +# +x=as.matrix(x) +p=ncol(x) +p1=p+1 +xx<-elimna(cbind(x,y)) +x<-xx[,1:p] +y<-xx[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x=as.matrix(x) +flag<-(y==1) +mv=sum(flag) +nv=sum(!flag) +phat<-NA +fhat<-kerfun(x[flag,],pyhat=TRUE,plotit=FALSE,pts=x) +ghat<-kerfun(x[!flag,],pyhat=TRUE,plotit=FALSE,pts=x) +phat<-mv*fhat/(mv*fhat+nv*ghat) +if(p==1){ +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab) +flag2<-order(x) +#lines(x[flag2],phat[flag2]) +lines(x[flag2],phat) +}} +if(p==2){ +if(plotit){ +library(akima) +if(pr){ +if(!scale)print("With dependence, suggest using scale=T") +} +fitr<-phat +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +persp(fit,theta=theta,phi=phi,expand=expand, +scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +}} +if(!pyhat)phat<-"Done" +phat +} + +logSM<-function(x,y,pyhat=FALSE,plotit=TRUE,xlab="X",ylab="Pred.Prob", +zlab=" ",xout=FALSE,outfun=outpro,pr=TRUE,theta=50,phi=25,duplicate="error",LP=TRUE,Lspan=.75, +expand=.5,scale=TRUE,fr=2,ticktype="simple",...){ +# +# A smoother designed specifically for binary outcomes +# LP=TRUE: With two independent variables, smooth the initial smooth using LOESS +# +# fr is span +# Lspan: when plotting the regression surface, +# LP =TRUE +# means that the plot will be smoothed using LOESS +# Lspan is the span used by LOESS +# +y=chbin2num(y) +x=as.matrix(x) +p=ncol(x) +p1=p+1 +xx<-elimna(cbind(x,y)) +x<-xx[,1:p] +y<-xx[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +if(length(unique(y))>2)stop('y should be binary') +# Next convert y to 0 and 1s +n=length(y) +yy=rep(0,n) +y=as.vector(y) +flag=y==max(y) +yy[flag]=1 +y=yy +x=as.matrix(x) +library(MASS) +m=cov.mve(x) +flag<-(y==1) +phat<-NA +m1=matrix(NA,nrow=length(y),ncol=length(y)) +for(i in 1:nrow(x))m1[,i]<-mahalanobis(x,x[i,],m$cov) +m2<-exp(-1*m1)*(sqrt(m1)<=fr) +m3<-matrix(y,length(y),length(y))*m2 +phat=apply(m3,2,sum)/apply(m2,2,sum) +if(p==1){ +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab) +flag2<-order(x) +lines(x[flag2],phat[flag2]) +}} +if(p==2){ +if(plotit){ +library(akima) +if(pr){ +if(!scale)print("With dependence, suggest using scale=TRUE") +} +fitr<-phat +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +if(LP){ +fitr=lplot(x[iout>=1,],fitr,pyhat=TRUE,pr=FALSE,plotit=FALSE,span=Lspan)$yhat +fitr[fitr>1]=1 +fitr[fitr<0]=0 +} +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +persp(fit,theta=theta,phi=phi,expand=expand,zlim=c(0,1), +scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +}} +if(!pyhat)phat<-"Done" +phat +} + +logreg.pred<-function(x,y,pts=x,xout=FALSE,outfun=outpro,ROB=FALSE,ridge=FALSE){ +# +# logistic regression: estimate the probability of success for points in pts +# Default is to use pts=x +# +if(!ridge){ +if(!ROB)est=logreg(x,y,xout=xout,outfun=outfun)[,1] +else +est=wlogreg(x,y,)$coef +} +if(ridge)est=logistic.ridge(x,y,xout=xout,outfun=outfun,ROB=ROB)$ridge.est +p=length(est) +if(p==2){z=exp(est[1]+est[2]*pts) +pr=z/(1+z) +} +if(p>2){ +pr=NA +pts=as.matrix(pts) +if(ncol(pts)==1)pts=t(pts) +n=nrow(pts) +if(!is.matrix(pts))stop('pts should be a matrix') +if(ncol(pts)!=ncol(x))stop('pts should have the same number of col. as x') +for(i in 1:n){ +z=exp(est[1]+sum(est[2:p]*pts[i,])) +pr[i]=z/(1+z) +} +} +pr +} + + + +YYmanova<-function(x1,x2,tr=.2){ +# +# Do MANOVA using generalization of +# Yanagihara, H. \& Yuan, K. H. (2005). +# Three approximate solutions to the +# multivariate Behrens-Fisher problem. Communications in Statistics-- +# Simulation and Computation, 34, 975--988; see their eq. (2.7). +# +# x1 and x2 are assumed to be matrices +# +x1=elimna(x1) +x2=elimna(x2) +s1=winall(x1,tr=tr)$cov +s2=winall(x2,tr=tr)$cov +n1=nrow(x1) +n2=nrow(x2) +n=n1+n2 +g1=floor(n1*tr) +g2=floor(n2*tr) +h1=n1-2*g1 +h2=n2-2*g2 +h=h1+h2 +sbar=n2*s1/n+n1*s2/n +sbarinv=solve(sbar) +psi1=n2^2*(n-2)*(sum(diag(s1%*%sbarinv)))^2/(n^2*(n1-1))+ +n1^2*(n-2)*(sum(diag(s2%*%sbarinv)))^2/(n^2*(n2-1)) +psi2=n2^2*(n-2)*(sum(diag(s1%*%sbarinv%*%s1%*%sbarinv)))/(n^2*(n1-1))+ +n1^2*(n-2)*(sum(diag(s2%*%sbarinv%*%s2%*%sbarinv)))/(n^2*(n2-1)) +p=ncol(x1) +theta1=(p*psi1+(p-2)*psi2)/(p*(p+2)) +theta2=(psi1+2*psi2)/(p*(p+2)) +nuhat=(h-2-theta1)^2/((h-2)*theta2-theta1) +xb1=apply(x1,2,mean,tr=tr) +xb2=apply(x2,2,mean,tr=tr) +dif=xb1-xb2 +dif=as.matrix(dif) +Ttest=t(dif)%*%solve((n1-1)*s1/(h1*(h1-1))+(n2-1)*s2/(h2*(h2-1)))%*%dif +TF=(n-2-theta1)*Ttest/((n-2)*p) +pv=1-pf(TF,p,nuhat) +list(test.stat=TF,p.value=pv) +} + + +logreg<-function(x,y,xout=FALSE,outfun=outpro,plotit=FALSE,POLY=FALSE, +xlab='X',ylab='Y',zlab='',scale=TRUE ,expand=.5,theta=50,phi=25, +duplicate='error',ticktype='simple',...){ +# +# Perform logistic regression. +# The predictors are assumed to be stored in the n by p matrix x. +# The y values should be 1 or 0. +# +# xout=TRUE will remove outliers from among the x values and then fit +# the regression line. +# Default: +# One predictor, a mad-median rule is used. +# With more than one, projection method is used. +# +# outfun=out will use MVE method +# +# plotit=TRUE will plot regression line +# POLY=T, will plot regression line assuming predictor +# is in col 1 of x and other columns are x (in col 1) raised to some power +# or some other function of x +# +y=chbin2num(y) +x<-as.matrix(x) +p=ncol(x) +xy=elimna(cbind(x,y)) +n=nrow(xy) +x=xy[,1:ncol(x)] +y=xy[,ncol(xy)] +x<-as.matrix(x) + +yy=rep(1,n) +vals=sort(unique(y)) +if(length(vals)!=2)stop('y should be binary') +flag=y==vals[2] +yy[!flag]=0 +y=yy + +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +x<-as.matrix(x) +if(p==1 || POLY){ +xord=order(x[,1]) +x=x[xord,] +y=y[xord] +} +fitit=glm(formula=y~x,family=binomial) +init<-summary(fitit) +if(plotit){ +vals=fitted.values(fitit) +if(p==1){ +plot(x,y,xlab=xlab,ylab=ylab) +lines(x,vals) +} +if(p==2){ +if(!scale)print('With dependence, suggest using scale=TRUE') +fitr=vals +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate) +persp(fit,theta=theta,phi=phi,expand=expand, +scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +} +} +init$coef +p1=p+1 +p.adjusted.slopes=c(init$coef[1,1],p.adjust(init$coef[2:p1,4],method='hoch')) +p.adjusted.slopes[1]=NA +a=cbind(init$coef,p.adjusted.slopes) +a +} + +rplot.bin<-function(x,y,est=mean,scat=TRUE,fr=NULL,plotit=TRUE,pyhat=FALSE,pts=x,LP=FALSE, +theta=50,phi=25,scale=TRUE,expand=.5,SEED=TRUE, +nmin=0,xout=FALSE,outfun=outpro,xlab=NULL,ylab=NULL, +zlab='P(Y=1)',pr=TRUE,duplicate='error',...){ +# +# This function applies the running interval smoother, but is designed +# specifically for situations where y is binary. +# +# duplicate='error' +# In some situations where duplicate values occur, when plotting with +# two predictors, it is necessary to set duplicate='strip' +# +y=chbin2num(y) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +x<-as.matrix(x) +if(length(unique(y))!=2)stop('y is not binary') +n=length(y) +Y=rep(0,n) +flag=y==max(y) +Y[flag]=1 +y=Y +x<-as.matrix(x) +if(ncol(x)==1){ +if(is.null(ylab))ylab='P(Y=1)' +if(is.null(xlab))ylab='X' +if(is.null(fr))fr=.8 +a=rplot(x,y,est=mean,xout=xout,outfun=outfun,fr=fr,xlab=xlab,ylab=ylab,pr=FALSE,LP=LP) +} +if(ncol(x)>1){ +id=chk4binary(x) +Lid=length(id) +if(Lid>0)stop('Binary independent variables detected. Use rplot.binv2') +if(is.null(xlab))xlab='X1' +if(is.null(ylab))ylab='X2' +if(is.null(fr))fr=1.2 +if(ncol(x)==2){ +if(scale){ +if(pr){print('scale=T is specified.') +print('If there is independence, might want to use scale=F') +a=rplot(x,y,est=mean,xout=xout,outfun=outfun,fr=fr,xlab=xlab,ylab=ylab,zlab=zlab,scale=scale,pr=FALSE) +}}}} +if(!pyhat)val <- 'DONE' +if(pyhat)val=rplot.pred(x,y,pts=pts,est=mean,xout=xout,outfun=outfun,fr=fr) +val +} + + + +rplot.binCI<-function(x,y,pts=NULL,alpha=.05,nmin=5,xout=FALSE,outfun=outpro,fr=.5,tr.plot=FALSE, +method=NULL,plotit=TRUE,LP=TRUE,xlab='X',ylab='P(Y=1|X)',...){ +# +# An alternative to logistic regression. +# +# For a collection of intervals among the values in +# x, compute the probability of success and a confidence based on the corresponding y values +# +# Default: use the deciles to define the intervals +# +# Example: pts=c(-1,0,1,2). The intervals would be (-1,0), (0,1), (1,2). +# +y=chbin2num(y) +xx<-elimna(cbind(x,y)) +x<-xx[,1] +y<-xx[,2] +if(is.null(pts)){ +id=duplicated(x) +pts=x[!id] +} +else plotit=FALSE +if(xout){ +m<-cbind(x,y) +if(ncol(m)!=2)stop('Only one explanatory variable is allowed') +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1] +y<-m[,2] +} +n=length(x) +xor=order(x) +x=x[xor] +y=y[xor] +pts=sort(pts) +npts=length(pts) +# +if(length(unique(y))>2)stop('y should be binary') +# +# Determine which method will be used: +if(is.null(method)){ +if(n<80)method='AC' +if(n>=80)method='CP' +} + +# Next convert y to 0 and 1s if not already 0 and 1s +yy=rep(0,n) +yc=as.character(y) +flag=yc==max(yc) +yy[flag]=1 +y=yy +# +rmd<-matrix(NA,nrow=npts,ncol=7) +for(i in 1:npts){ +isub=near(x,pts[i],fr) +if(sum(isub)>=nmin){ +z=y[isub] +v=binom.conf(y=z,method=method,alpha=alpha,pr=FALSE) +rmd[i,1]=v$n +rmd[i,2]=min(x[isub]) +rmd[i,3]=max(x[isub]) +rmd[i,4]=pts[i] +rmd[i,5]=v$phat +rmd[i,6:7]=v$ci +}} +rs=elimna(rmd) +dimnames(rmd)=list(NULL,c('n','low.end','upper.end','pts','p.hat','ci.low','ci.up')) +if(plotit){ +if(tr.plot){ +v=quantile(rmd[,4],probs=c(.1,.9),na.rm=TRUE) +flag=(rmd[,4]>=v[1] & rmd[,4]<=v[2]) +rmd=rmd[flag,] +} +ys=rs[,5] +plot(rs[,4],rs[,5],ylim=c(0,1),xlab=xlab,ylab=ylab,type='n') +if(LP){ +z1=lplot.pred(rmd[,4],rmd[,5],pts=rmd[,2])$yhat +flag=z1>1 +z1[flag]=1 +flag=z1<0 +z1[flag]=0 +z2=lplot.pred(rmd[,4],rmd[,6],pts=rmd[,2])$yhat +flag=z2>1 +z2[flag]=1 +flag=z2<0 +z2[flag]=0 +z3=lplot.pred(rmd[,4],rmd[,7],pts=rmd[,2])$yhat +flag=z3>1 +z3[flag]=1 +flag=(z3<0) +z3[flag]=0 +lines(rmd[,4],z1) +lines(rmd[,4],z2,lty=2) +lines(rmd[,4],z3,lty=2) +} +if(!LP){ +lines(rmd[,4],rmd[,5]) +lines(rmd[,4],rmd[,6],lty=2) +lines(rmd[,4],rmd[,7],lty=2) +}} +id=duplicated(rmd[,2:3]) +rmd=elimna(rmd[!id,]) +rmd +} + + +wlogregv2<-function(x0,y,initwml=FALSE,const=0.5,kmax=1e3,maxhalf=10) +{ +# Computation of the estimator of Bianco and Yohai (1996) in logistic regression +# ------------- +# This is a slightly modified version of code due to +# Christophe Croux, Gentiane Haesbroeck, and Kristel Joossens +# (Here initwml defaults to F +# +# This program computes the estimator of Bianco and Yohai in +# logistic regression. By default, an intercept term is included +# and p parameters are estimated. +# +# For more details we refer to +# Croux, C., and Haesbroeck, G. (2003), ``Implementing the Bianco and Yohai +# estimator for Logistic Regression'', +# Computational Statistics and Data Analysis, 44, 273-295 +# +#Input: +#------- +# x0= n x (p-1) matrix containing the explanatory variables; +# y= n-vector containing binomial response (0 or 1); +# +# initwml= logical value for selecting one of the two possible methods for computing +# the initial value of the optimization process. If initwml=T (default), a +# weighted ML estimator is computed with weights derived from the MCD estimator +# computed on the explanatory variables. If initwml=F, a classical ML fit is perfomed. +# When the explanatory variables contain binary observations, it is recommended +# to set initwml to F or to modify the code of the algorithm to compute the weights +# only on the continuous variables. +# const= tuning constant used in the computation of the estimator (default=0.5); +# kmax= maximum number of iterations before convergence (default=1000); +# maxhalf= max number of step-halving (default=10). +# +# Example: +# x0=matrix(rnorm(100,1)) +# y0=numeric(runif(100)>0.5) +# BYlogreg(x0,y) +# +#Output: +#-------- +# list with +# 1st component: T or F if convergence achieved or not +# 2nd component: value of the objective function at the minimum +# p next components: estimates for the parameters. +# p last components: standard errors of the parameters (if first component is T) + +library(MASS) +x0=as.matrix(x0) +# n=nrow(x0) + p=ncol(x0)+1 +p0=p-1 + #Smallest value of the scale parameter before implosion + sigmamin=1e-4 + +# eliminate any rows with missing values +zz=elimna(cbind(x,y)) +x=as.matrix(zz[,1:p0]) +y=zz[,p] +n=nrow(x) +# x=as.matrix(cbind(rep(1,n),x0)) + x=as.matrix(cbind(rep(1,n),x)) + y=as.numeric(y) + + # Computation of the initial value of the optimization process + if (initwml==TRUE) + { + hp=floor(n*(1-0.25))+1 + mcdx=cov.mcd(x0, quantile.used =hp,method="mcd") + rdx=sqrt(mahalanobis(x0,center=mcdx$center,cov=mcdx$cov)) + vc=sqrt(qchisq(0.975,p-1)) + wrd=(rdx<=vc) + gstart=glm(y~x0,family=binomial,subset=wrd)$coef + } +else {gstart=glm(y~x0,family=binomial)$coef} + sigmastart=1/sqrt(sum(gstart^2)) + xistart=gstart*sigmastart + stscores=x %*% xistart +sigma1=sigmastart + #Initial value for the objective function + oldobj=mean(phiBY3(stscores/sigmastart,y,const)) + kstep=jhalf=1 + while ((kstep < kmax) & (jhalfoldobj)){ + hstep=hstep/2 + xi1=xistart+finalstep*hstep + xi1=xi1/sqrt(sum(xi1^2)) + scores1=x%*%xi1/sigma1 + newobj=mean(phiBY3(scores1,y,const)) + jhalf=jhalf+1 + } + CONV=FALSE + if ((jhalf==maxhalf+1) & (newobj>oldobj)) {CONV=TRUE + } else { + jhalf=1 + xistart=xi1 + oldobj=newobj + stscores=x%*% xi1 + kstep=kstep+1 + } + } + } + + if (kstep == kmax) { +CONV=FALSE # print("No convergence") + result=list(convergence=FALSE,objective=0,coef=t(rep(NA,p))) + } else { + gammaest=xistart/sigma1 + stander=sterby3(x0,y,const,gammaest) + result=list(convergence=CONV,coef=t(gammaest),sterror=stander) + } + return(result) +} + + + + +############################################################### +############################################################### +#Functions needed for the computation of estimator of Bianco and Yohai + +phiBY3 <- function(s,y,c3) +{ + s=as.double(s) + dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0) + return(rhoBY3(dev,c3)+GBY3Fs(s,c3)+GBY3Fsm(s,c3)) +} +rhoBY3 <- function(t,c3) +{ + (t*exp(-sqrt(c3))*as.numeric(t <= c3))+ + (((exp(-sqrt(c3))*(2+(2*sqrt(c3))+c3))-(2*exp(-sqrt(t))*(1+sqrt(t))))*as.numeric(t >c3)) +} +psiBY3 <- function(t,c3) +{(exp(-sqrt(c3))*as.numeric(t <= c3))+(exp(-sqrt(t))*as.numeric(t >c3))} +derpsiBY3 <- function(t,c3) +{ +res=NULL + for (i in 1:length(t)) +{ +if (t[i] <= c3) + { res=rbind(res,0) } +else +{res=rbind(res,-exp(-sqrt(t[i]))/(2*sqrt(t[i]))) } +} +res +} + + +sigmaBY3<-function(sigma,s,y,c3) {mean(phiBY3(s/sigma,y,c3))} + +derphiBY3=function(s,y,c3) +{ + Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0))) + ds=Fs*(1-Fs) + dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0) + Gprim1=log(1+exp(-abs(s)))+abs(s)*(s<0) + Gprim2=log(1+exp(-abs(s)))+abs(s)*(s>0) + return(-psiBY3(dev,c3)*(y-Fs)+((psiBY3(Gprim1,c3)-psiBY3(Gprim2,c3))*ds)) +} + +der2phiBY3=function(s,y,c3) +{ + s=as.double(s) + Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0))) + ds=Fs*(1-Fs) + dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0) + Gprim1=log(1+exp(-abs(s)))+abs(s)*(s<0) + Gprim2=log(1+exp(-abs(s)))+abs(s)*(s>0) + der2=(derpsiBY3(dev,c3)*(Fs-y)^2)+(ds*psiBY3(dev,c3)) + der2=der2+(ds*(1-2*Fs)*(psiBY3(Gprim1,c3)-psiBY3(Gprim2,c3))) + der2=der2-(ds*((derpsiBY3(Gprim1,c3)*(1-Fs))+(derpsiBY3(Gprim2,c3)*Fs))) + der2 +} + + +GBY3Fs <- function(s,c3) +{ + Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0))) + resGinf=exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(-log(Fs))))-1) + resGinf=(resGinf+(Fs*exp(-sqrt(-log(Fs)))))*as.numeric(s <= -log(exp(c3)-1)) + resGsup=((Fs*exp(-sqrt(c3)))+(exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(c3)))-1)))*as.numeric(s > -log(exp(c3)-1)) + return(resGinf+resGsup) +} + + +GBY3Fsm <- function(s,c3) +{ + Fsm=exp(-(log(1+exp(-abs(s)))+abs(s)*(s>0))) + resGinf=exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(-log(Fsm))))-1) + resGinf=(resGinf+(Fsm*exp(-sqrt(-log(Fsm)))))*as.numeric(s >= log(exp(c3)-1)) + resGsup=((Fsm*exp(-sqrt(c3)))+(exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(c3)))-1)))*as.numeric(s < log(exp(c3)-1)) + return(resGinf+resGsup) +} + +sterby3 <- function(x0,y,const,estim) +{ + n=nrow(x0) + p=ncol(x0)+1 + + z=cbind(matrix(1,nrow=n),x0) + argum=z %*% estim + + matM=matrix(data=0,nrow=p,ncol=p) + IFsquar=matrix(data=0,nrow=p,ncol=p) + for (i in 1:n) +{ +myscalar=as.numeric(der2phiBY3(argum[i],y[i],const)) +matM=matM+myscalar * (z[i,] %*% t(z[i,])) +IFsquar=IFsquar+myscalar^2 * (z[i,] %*% t(z[i,])) +} + matM=matM/n + matMinv=solve(matM) + IFsquar=IFsquar/n + asvBY=matMinv %*% IFsquar %*% t(matMinv) + sqrt(diag(asvBY))/sqrt(n) +} + + +long2mat<-function(x,Sid.col,dep.col){ +# +# Have data in a matrix or data frame, x +# Sid.col indicates Subject's id +# Here, each subject has one or more rows of data +# +# Goal: store the data in a data frame where +# each row contains all of the data for an individual +# subject. +# +# dep.col indicates column of the outcome (dependent) variable +# This version assumed a single column of outcome values are to be +# rearranged. +# +if(length(dep.col)!=1)stop("Argument dep.col must have a single value") +if(is.null(dim(x)))stop("x must be a matrix or data frame") +Sid=unique(x[,Sid.col]) +n=nrow(x) +nid=length(Sid) +flag=(x[,Sid.col]==Sid[1]) +num.out=sum(flag) +res=matrix(NA,nrow=nid,ncol=num.out) +for(i in 1:nid){ +flag=(x[,Sid.col]==Sid[i]) +res[i,]=x[flag,dep.col] +} +res +} + +wlogreg<-function(x,y,initwml=FALSE,const=0.5,kmax=1e3,maxhalf=10){ +# +# +# Bianco and Yohai (1996) in logistic regression +# +# +options(warn=-1) +xy=cbind(x,y) +p1=ncol(xy) +xy=elimna(xy) +p=p1-1 +if(p==1){ +library(robustbase) +a=BYlogreg(x,y,initwml=initwml,const=const,kmax=kmax,maxhalf=maxhalf) +} +else +a=wlogregv2(x,y,initwml=initwml,const=const,kmax=kmax,maxhalf=maxhalf) +options(warn=0) +a +} + + + + + + +longcov2mat<-function(x,Sid.col,dep.col){ +# +# Have data in a matrix or data frame, x +# Sid.col indicates Subject's id +# Here, each subject has one or more rows of data +# +# In a regression setting, each subject has +# one or more covariates corresponding to columns. +# For example, two covariates might be stored in columns +# 3 and 6. +# +# Goal: For ith subject, store the covariate data in +# list mode, which is a matrix. +# So for ith subject, store covariate data in z[[i]], say, which +# contains a matrix of dimension m by p, +# m is the number of observations for ith subject and p +# the number of covariates. +# +# dep.col, having length p, indicates columns containe the covariates +# Column Sid.col indicates the column containing subject's id +# +if(is.null(dim(x)))stop("x must be a matrix or data frame") +Sid=unique(x[,Sid.col]) +res=list() +nid=length(Sid) +p=length(dep.col)# Number of covariates for each subject +n=nrow(x) +flag=(x[,Sid.col]==Sid[1]) +n.each.s=sum(flag) # the number of rows for each subject +ns=n/n.each.s # the number of subjects +if(!is.wholenumber(ns))stop("Not all S's have same number of rows of data") +for(i in 1:ns){ +#res[[i]]=matrix(NA,nrow=n.each.s,ncol=p) +flag=(x[,Sid.col]==Sid[i]) +res[[i]]=as.matrix(x[flag,dep.col]) +} +res +} +is.wholenumber <- + function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol + +long2g<-function(x,x.col,y.col,s.id,grp.id,regfun=tsreg,MAR=TRUE,tr=.2){ +# +# x is a matrix or data frame. +# +# Longitudinal data, compare two groups, where the groups correspond to two +# values in column +# grp.id. +# The outcome (dependent) variable is assumed to be stored in +# the column indicated by the argument y.col. +# Example, y.col=3 means the outcome variable of interest is in col. 3 +# Predictors are stored in columns indicated by +# x.col. +# s.id indicates column where subject's id is stored. +# +# Assuming data are stored as for example in the R variable +# Orthodont, +# which can be accessed via the command library(nlme) +# +m=matsplit(x,grp.id) +g1=longreg(m$m1,x.col,y.col,s.id,regfun)$est.S +g2=longreg(m$m2,x.col,y.col,s.id,regfun)$est.S +res=list() +if(MAR){ +for(iv in 1:ncol(g1))res[[iv]]=yuen(g1[,iv],g2[,iv],tr=tr) +} +if(!MAR)res=smean2(g1,g2) +res +} + +longreg.plot<-function(x,x.col,y.col,s.id,regfun=tsreg,scat=TRUE,xlab="X", +ylab="Y"){ +# +# x is a data frame or matrix +# +# Longitudinal data: plot regression lines +# +# For each subject, fit a regression line +# using outcome data in col y.col and predictors, usually times +# when measures were taken, in columns indicated by x.col. +# s.id indicates column where subject's id is stored. +# +# Assuming data are stored as for example in the R variable +# Orthodont, +# which can be accessed via the command library(nlme) +# For this data set, x.col=2 would indicated that the +# participants age at the time of being measured, is used +# to predict the outcome variable. +# +ymat=long2mat(x,s.id,y.col) # matrix, ith row contains outcome y +# for the ith subject. +# +xvals=longcov2mat(x,s.id,x.col)# list mode +n=nrow(ymat) +p=length(x.col)+1 +if(p!=2)stop("Plot allows a single covariate only") +outmat=matrix(NA,nrow=n,ncol=p) +datx=NULL +daty=NULL +for(i in 1:n){ +outmat[i,]=regfun(as.matrix(xvals[[i]]),ymat[i,])$coef +temp=as.matrix(xvals[[i]]) +datx=c(datx,temp) +daty=c(daty,ymat[i,]) +} +if(!scat)plot(datx,daty,type="n",xlab=xlab,ylab=ylab) +if(scat)plot(datx,daty,xlab=xlab,ylab=ylab) +for(i in 1:n)abline(outmat[i,1],outmat[i,2]) +} + +hotel1.tr<-function(x,null.value=0,tr=.2) { +# +# Perform a trimmed analog of Hotelling's (one-sample) T^2 test +# That is, for p-variate data, test the hypothesis that the p marginal +# trimmed means are equal to the value specified by +# the argument null.value +# +if (is.data.frame(x)) + x <- as.matrix(x) +x=elimna(x) + if(!is.matrix(x)) + stop("'x' must be a numeric matrix or a data frame") + n <- nrow(x) + p <- ncol(x) + mu=null.value +xbar=apply(x,2,mean,tr=tr) + if(!is.numeric(mu) || ((lmu <- length(mu)) > 1 & lmu != p)) + stop("'null.value' must be a numeric vector of length ", p) +if(lmu == 1) mu <- rep(mu, p) + xbar.mu <- xbar - mu + V <- winall(x,tr=tr)$cov +h=n-2*floor(n*tr) + k <- h / (n - 1) * (h - p) / p + stat <- k * crossprod(xbar.mu, solve(V, xbar.mu))[1, ] + pvalue <- 1 - pf(stat, p, h - p) +list(test.statistic = stat, degrees_of_freedom = c(p, h - p), p.value = +pvalue, estimate = xbar, + null.value = mu) +} + +hotel1<-function(x,null.value=0,tr=0) { +# +# Perform a trimmed analog of Hotelling's (one-sample) T^2 test +# That is, for p-variate data, test the hypothesis that the p marginal +# trimmed means are equal to the value specified by +# the argument null.value +# +if (is.data.frame(x)) + x <- as.matrix(x) +x=elimna(x) + if(!is.matrix(x)) + stop("'x' must be a numeric matrix or a data frame") + n <- nrow(x) + p <- ncol(x) + mu=null.value +xbar=apply(x,2,mean,tr=tr) + if(!is.numeric(mu) || ((lmu <- length(mu)) > 1 & lmu != p)) + stop("'null.value' must be a numeric vector of length ", p) +if(lmu == 1) mu <- rep(mu, p) + xbar.mu <- xbar - mu + V <- winall(x,tr=tr)$cov +h=n-2*floor(n*tr) + k <- h / (n - 1) * (h - p) / p + stat <- k * crossprod(xbar.mu, solve(V, xbar.mu))[1, ] + pvalue <- 1 - pf(stat, p, h - p) +list(test.statistic = stat, degrees_of_freedom = c(p, h - p), p.value = +pvalue, estimate = xbar, + null.value = mu) +} + + wwmcp<-function(J,K,x,tr=.2,alpha=.05,dif=TRUE,method='hoch'){ +# +# Do all multiple comparisons for a within-by-within design +# using trimmed means +# +conM=con2way(J,K) +A=rmmcp(x,con=conM$conA,tr=tr,alpha=alpha,dif=dif) + A$test[,4]=p.adjust(A$test[,3],method=method) + dimnames(A$test)=list(NULL,c('con.num', 'test', 'p.value','adj.p.value', 'se')) +B=rmmcp(x,con=conM$conB,tr=tr,alpha=alpha,dif=dif) + B$test[,4]=p.adjust(B$test[,3],method=method) + dimnames(B$test)=list(NULL,c('con.num', 'test', 'p.value','adj.p.value', 'se')) +AB=rmmcp(x,con=conM$conAB,tr=tr,alpha=alpha,dif=dif) + AB$test[,4]=p.adjust(AB$test[,3],method=method) + dimnames(AB$test)=list(NULL,c('con.num', 'test', 'p.value','adj.p.value', 'se')) +list(Factor_A=A,Factor_B=B,Factor_AB=AB) +} + +wwmcpES<-function(J,K,x,tr=.2,alpha=.05,dif=TRUE){ +# +# Do all multiple comparisons for a within-by-within design +# using trimmed means +# +stop('Use ww.es instead') +conM=con2way(J,K) +A=rmmcpES(x,con=conM$conA,tr=tr,alpha=alpha,dif=dif) +B=rmmcpES(x,con=conM$conB,tr=tr,alpha=alpha,dif=dif) +AB=rmmcpES(x,con=conM$conAB,tr=tr,alpha=alpha,dif=dif) +list(Factor_A=A,Factor_B=B,Factor_AB=AB) +} + + +wwmcpbt<-function(J,K,x, tr=.2, dif=TRUE, alpha = 0.05, nboot = 599){ +# +# Do multiple comparisons for a within-by-within design. +# using a bootstrap-t method and trimmed means. +# All linear contrasts relevant to main effects and interactions +# are tested. +# +# +conM=con2way(J,K) +A=lindepbt(x,con=conM$conA,alpha=alpha,tr=tr,dif=dif,nboot=nboot) +B=lindepbt(x,con=conM$conB,alpha=alpha,tr=tr,dif=dif,nboot=nboot) +AB=lindepbt(x,con=conM$conAB,alpha=alpha,tr=tr,dif=dif,nboot=nboot) +list(Factor_A=A,Factor_B=B,Factor_AB=AB) +} + + +wwmcppb<-function(J,K,x, alpha = 0.05, con = 0,est=tmean, plotit = FALSE, + dif = TRUE, grp = NA, nboot = NA, BA = TRUE, hoch = TRUE, xlab = "Group 1", + ylab = "Group 2", pr = TRUE, SEED = TRUE,...){ +# +# Do all multiple comparisons for a within-by-within design. +# using a percentile bootstrap method and trimmed means +# +conM=con2way(J,K) +A=rmmcppb(x,con=conM$conA,alpha=alpha,dif=dif,plotit=plotit,est=est, +nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) +B=rmmcppb(x,con=conM$conB,alpha=alpha,dif=dif, +plotit=plotit,est=est,nboot=nboot,BA=BA,hoch=hoch, +SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) +AB=rmmcppb(x,con=conM$conAB,alpha=alpha,dif=dif,plotit=plotit,est=est, +nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) +list(Factor_A=A,Factor_B=B,Factor_AB=AB) +} + +wmcppb<-function(x, y=NULL,alpha = 0.05, con = 0,est=tmean, plotit = FALSE, + dif = TRUE, grp = NA, nboot = NA, BA = TRUE, hoch = TRUE, xlab = "Group 1", + ylab = "Group 2", pr = TRUE, SEED = TRUE, ...){ +# +# Do all multiple comparisons for a repeated measures design. +# using a percentile bootstrap method and trimmed means +# +if(!is.null(y))x=cbind(x,y) +A=rmmcppb(x,con=con,alpha=alpha,dif=dif,plotit=plotit,est=est, +nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...) +A +} + +lindepbt<-function(x, con = NULL, tr = 0.2, alpha = 0.05,nboot=599,dif=TRUE,method='holm', +SEED=TRUE){ +# +# MCP on trimmed means with FWE controlled with Rom's method +# Using a bootstrap-t method. +# +# dif=T, difference scores are used. And for linear contrasts a simple +# extension is used. +# +# dif=F, hypotheses are tested based on the marginal trimmed means. +# +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.list(x))x=matl(x) +if(is.null(con))con=con.all.pairs(ncol(x)) # all pairwise +x=elimna(x) +n=nrow(x) +flagcon=FALSE +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-ncol(x) +xbar<-vector("numeric",J) +nval<-nrow(x) +h1<-nrow(x)-2*floor(tr*nrow(x)) +df<-h1-1 +xbar=apply(x,2,mean,tr=tr) +if(sum(con^2!=0))CC<-ncol(con) +ncon<-CC +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(nrow(con)!=ncol(x))warning("The number of groups does not match the number + of contrast coefficients.") +ncon<-ncol(con) +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),6) +dimnames(test)<-list(NULL,c("con.num","test","p.value","p.crit","se",'p.adjusted')) +temp1<-NA +for (d in 1:ncol(con)){ +psihat[d,1]<-d +# +# !dif Use marginal trimmed means +# +if(!dif){ +psihat[d,2]<-sum(con[,d]*xbar) +# +# +sejk<-0 +for(j in 1:J){ +for(k in 1:J){ +djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1)) +sejk<-sejk+con[j,d]*con[k,d]*djk +}} +sejk<-sqrt(sejk) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +test[d,5]<-sejk +# +# now use bootstrap-t to determine p-value +# +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +xcen=x +for(j in 1:ncol(x))xcen[,j]=xcen[,j]-tmean(x[,j],tr=tr) +bvec=apply(data,1,lindep.sub,xcen,con[,d],tr) +bsort<-sort(abs(bvec)) +ic<-round((1-alpha)*nboot) +ci<-0 +psihat[d,3]<-psihat[d,2]-bsort[ic]*test[d,5] +psihat[d,4]<-psihat[d,2]+bsort[ic]*test[d,5] +p.value<-mean(abs(test[d,2])<=abs(bvec)) +temp1[d]=p.value +} +if(dif){ +for(j in 1:J){ +if(j==1)dval<-con[j,d]*x[,j] +if(j>1)dval<-dval+con[j,d]*x[,j] +} +temp=trimcibt(dval,tr=tr,alpha=alpha,nboot=nboot,pr=FALSE) +temp1[d]<-temp$p.value #trimci(dval,tr=tr,pr=FALSE)$p.value +test[d,1]<-d +test[d,2]=temp$test.stat +test[d,5]<-trimse(dval,tr=tr) +psihat[d,2]<-mean(dval,tr=tr) +psihat[d,3]<-temp$ci[1] #psihat[,2]-qt(1-test[,4]/2,df)*test[,5] +psihat[d,4]<-temp$ci[2] #psihat[,2]+qt(1-test[,4]/2,df)*test[,5] +}} +# +# d ends here +# +test[,3]<-temp1 +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2,3]>=zvec) +test[temp2,4]<-zvec +if(flagcon)num.sig<-sum(test[,4]<=test[,5]) +if(!flagcon){num.sig<-sum(test[,3]<=test[,4]) +test[,6]=p.adjust(test[,3],method=method) +} +list(test=test,psihat=psihat,con=con,num.sig=num.sig) +} + +lindep.sub<-function(data,x,con=con,tr=tr){ +con=as.matrix(con) +res=rmmcp(x[data,],con=con,tr=tr,dif=FALSE)$test[,2] +res +} + +mcp.nestAP<-function(x,tr=.2){ +# +# Nested ANOVA +# +# Strategy: for each level of factor A, pool the data +# and then perform the analysis +# +# x is assumed to have list mode with length J, +# the number of independent groups. +# +# x[[1]] contains an n by K matrix, the nested data +# for the first level of the first factor. +# x[[2]] contains an n by K matrix, the nested data +# for the second level of the first factor, etc. +# + xx=list() +for(j in 1: length(x))xx[[j]]=as.vector(x[[j]]) +results=lincon(xx,tr=tr) +results +} + +outmgvad<-function(m,center=NA,plotit=TRUE,op=1, +xlab="VAR 1",ylab="VAR 2",rate=.05,iter=100,ip=6,pr=TRUE){ +# +# Adjusts the critical value, gval used by outmgv, +# so that the outside rate per observation, under normality +# is approximately equal to the value given by the argument +# rate, which defaults to .05. +# That is, expected proportion of points declared outliers under normality +# is intended to be rate=.05 +# +# When dealing with p-variate data, p>9, this adjustment can be crucial +# +m=elimna(m) +n=nrow(m) +newgval=sqrt(qchisq(.975,ncol(m))) +z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m))) +newq=0 +gtry=NA +val=NULL +for(itry in 1:ip){ +newq=newq+9/10^itry +gtry[itry]=newq +} +gtry=c(.95,.975,gtry[-1]) +if(pr)print("Computing adjustment") +for(itry in 1:ip){ +for(i in 1:iter){ +temp=outmgv.v2(z[i,,],gval=gval,op=op)$out.id +val[i]=length(temp) +} +erate=mean(val)/n +if(erate1)temp$points(x[outid,],col="red") +} +if(!COLOR){ +if(length(outid)==1)temp$points(t(as.matrix(x[outid,])),pch="*") +if(length(outid)>1)temp$points(x[outid,],pch="*") +} +} +if(reg.plane){ +vals<-regfun(x[,1:2],x[,3],...)$coef +if(COLOR)temp$plane(vals,col="blue") +if(!COLOR)temp$plane(vals) +} +} + +ees.ci<-function(x,y,SEED=TRUE,nboot=400,tr=.2,alpha=.05,pr=TRUE){ +# +# Compute a 1-alpha confidence interval +# for a robust, heteroscedastic measure of effect size +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +x=elimna(x) +y=elimna(y) +bvec=0 +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot) +for(i in 1:nboot){ +bvec[i]=yuenv2(datax[i,],datay[i,],tr=tr,SEED=FALSE)$Var.Explained +} +bvec<-sort(bvec) +crit<-alpha/2 +icl<-round(crit*nboot)+1 +icu<-nboot-icl +ci<-NA +ci[1]<-bvec[icl] +pchk=yuen(x,y,tr=tr)$p.value +if(pchk>alpha)ci[1]=0 +ci[2]<-bvec[icu] +if(ci[1]<0)ci[1]=0 +ci=sqrt(ci) +ci +} +wwwtrimbt<-function(J, K,L, x, tr = 0.2, JKL = J * K*L, con = 0, + alpha = 0.05, grp =c(1:JKL), nboot = 599,SEED = TRUE, ...){ + # + # A bootstrap-t for a within-by-within-by-within omnibus tests + # for all main effects and interactions + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # +# +# within-by-within-by-within design +# +# JKL dependent groups +# + + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # +if(is.data.frame(x))x=as.matrix(x) + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} +ncon=ncol(con) + p <- J*K*L +JKL=p +if(p>length(x))stop("JKL is less than the Number of groups") +JK=J*K +KL=K*L +# v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { +xx[[j]]=x[[grp[j]]] # save input data +data[[j]] = xx[[j]] - mean(xx[[j]], tr = tr) +# # Now have the groups in proper order. + } + if(SEED)set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + # Next determine the n_j values + bsam = list() + bdat = list() +aboot=NA +bboot=NA +cboot=NA +abboot=NA +acboot=NA +bcboot=NA +abcboot=NA +test.stat=wwwtrim(J,K,L,xx,tr=tr) +nv=length(x[[1]]) + for(ib in 1:nboot) { +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in 1:JKL) bsam[[k]] = data[[k]][bdat[[j]]] +temp=wwwtrim(J,K,L,bsam,tr=tr) +aboot[ib]=temp$Qa +bboot[ib]=temp$Qb +cboot[ib]=temp$Qc +acboot[ib]=temp$Qac +bcboot[ib]=temp$Qbc +abboot[ib]=temp$Qab +abcboot[ib]=temp$Qabc +} +pbA=NA +pbB=NA +pbC=NA +pbAB=NA +pbAC=NA +pbBC=NA +pbABC=NA +pbA=mean(test.stat$Qa[1,1]length(x))stop("JKL is less than the Number of groups") +JK=J*K +KL=K*L + v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { + data[[j]] <- x[[grp[j]]] +xx[[j]]=x[[grp[j]]] # save input data + # Now have the groups in proper order. + data[[j]] = data[[j]] - mean(data[[j]], tr = tr) + } + x <- data # centered data xx has original data +test=bwwtrim(J,K,L,xx,tr=tr) + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + bsam = list() + bdat = list() +aboot=NA +bboot=NA +cboot=NA +abboot=NA +acboot=NA +bcboot=NA +abcboot=NA + for(ib in 1:nboot) { + ilow <- 1 - KL + iup = 0 + for(j in 1:J) { + ilow <- ilow + KL + iup = iup + KL +nv=length(x[[ilow]]) + bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in ilow:iup){ + bsam[[k]] = x[[k]][bdat[[j]]] +} +} +temp=bwwtrim(J,K,L,bsam,tr=tr) +aboot[ib]=temp$Qa +bboot[ib]=temp$Qb +cboot[ib]=temp$Qc +acboot[ib]=temp$Qac +bcboot[ib]=temp$Qbc +abboot[ib]=temp$Qab +abcboot[ib]=temp$Qabc + } +pbA=NA +pbB=NA +pbC=NA +pbAB=NA +pbAC=NA +pbBC=NA +pbABC=NA +pbA=mean(test$Qa[1,1]length(x))stop("JKL is less than the Number of groups") +JK=J*K + v <- matrix(0, p, p) + data <- list() +xx=list() + for(j in 1:length(x)) { + data[[j]] <- x[[grp[j]]] +xx[[j]]=x[[grp[j]]] # save input data + # Now have the groups in proper order. + data[[j]] = data[[j]] - mean(data[[j]], tr = tr) + } +#ilow=0-L +#iup=0 +#for(j in 1:JK){ +#ilow <- ilow + L +# iup = iup + L +#sel <- c(ilow:iup) +#xx[sel]=listm(elimna(matl(xx[sel]))) +# v[sel, sel] <- covmtrim(xx[sel], tr) +# } +test.stat=bbwtrim(J,K,L,xx,tr=tr) + x <- data # Centered data +# jp <- 1 - K +# kv <- 0 + if(SEED) + set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + testA = NA + testB = NA + testC=NA + testAB = NA + testAC = NA + testBC = NA + testABC = NA + bsam = list() + bdat = list() +aboot=NA +bboot=NA +cboot=NA +abboot=NA +acboot=NA +bcboot=NA +abcboot=NA +nvec=NA + for(j in 1:JK){ + nvec[j] = length(x[[j]]) + for(ib in 1:nboot) { + ilow <- 1 - L + iup = 0 + for(j in 1:JK) { + ilow <- ilow + L + iup = iup + L +nv=length(x[[ilow]]) + bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in ilow:iup){ + bsam[[k]] = x[[k]][bdat[[j]]] +} +} +temp=bbwtrim(J,K,L,bsam,tr=tr) +aboot[ib]=temp$Qa +bboot[ib]=temp$Qb +cboot[ib]=temp$Qc +acboot[ib]=temp$Qac +bcboot[ib]=temp$Qbc +abboot[ib]=temp$Qab +abcboot[ib]=temp$Qabc +}} +pbA=NA +pbB=NA +pbC=NA +pbAB=NA +pbAC=NA +pbBC=NA +pbABC=NA +pbA=mean(test.stat$Qa[1,1]4)nboot<-5000 +} +n<-nrow(mat) +crit.vec<-alpha/c(1:d) +connum<-ncol(con) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +xbars<-apply(mat,2,est) +psidat<-NA +for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) +psihat<-matrix(0,connum,nboot) +psihatcen<-matrix(0,connum,nboot) +bvec<-matrix(NA,ncol=J,nrow=nboot) +bveccen<-matrix(NA,ncol=J,nrow=nboot) +print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec[ib,]<-apply(x[data[ib,],],2,est,...) +bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...) +} +# +# Now have an nboot by J matrix of bootstrap values. +# +test<-1 +bias<-NA +tval<-NA +tvalcen<-NA +for (ic in 1:connum){ +psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) +psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic]) +tvalcen[ic]<-sum((psihatcen[ic,]==0))/nboot +bias[ic]<-sum((psihatcen[ic,]>0))/nboot+sum((psihatcen[ic,]==0))/nboot-.5 +tval[ic]<-sum((psihat[ic,]==0))/nboot +if(BA){ +test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic]-.1*bias[ic] +if(test[ic]<0)test[ic]<-0 +} +if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic] +test[ic]<-min(test[ic],1-test[ic]) +} +test<-2*test +ncon<-ncol(con) +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(hoch)dvec<-alpha/(2* c(1:ncon)) +dvec<-2*dvec +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvecba<-dvec +dvec[1]<-alpha/2 +} +if(plotit && ncol(bvec)==2){ +z<-c(0,0) +one<-c(1,1) +plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") +points(bvec) +totv<-apply(x,2,est,...) +cmat<-var(bvec) +dis<-mahalanobis(bvec,totv,cmat) +temp.dis<-order(dis) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +abline(0,1) +} +temp2<-order(0-test) +ncon<-ncol(con) +zvec<-dvec[1:ncon] +if(BA)zvec<-dvecba[1:ncon] +sigvec<-(test[temp2]>=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p-value","p.crit", +"ci.lower","ci.upper")) +tmeans<-apply(mat,2,est,...) +psi<-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +output[temp2,4]<-zvec +temp<-sort(psihat[ic,]) +icl<-round(output[ic,4]*nboot/2)+1 +icu<-nboot-(icl-1) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +} +if(PCI){ +if(dif){ +plotCI(output[,2],ali=output[,5],aui=output[,6],xlab='Difference',ylab=ylab.ebar) +}} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} +wwtrimbt<-function(J, K, x, tr = 0.2, JK = J*K, con = 0, + alpha = 0.05, grp =c(1:JK), nboot = 599,SEED = TRUE, ...){ + # + # A bootstrap-t for a within-by-within omnibus tests + # for all main effects and interactions + # + # The R variable x is assumed to contain the raw + # data stored in list mode or in a matrix. + # If in list mode, x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second: level 1,2 + # x[[K]] is the data for level 1,K + # x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. + # + # If the data are in a matrix, column 1 is assumed to + # correspond to x[[1]], column 2 to x[[2]], etc. + # + # When in list mode x is assumed to have length JK, the total number + # groups being tested, but a subset of the data can be analyzed + # using grp + # +if(is.data.frame(x))x=as.matrix(x) + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] +x=y +} +ncon=ncol(con) + p <- J*K +JK=p +if(p>length(x))stop("JK is less than the Number of groups") +JK=J*K + data <- list() +xx=list() + for(j in 1:length(x)) { +xx[[j]]=x[[grp[j]]] # save input data +data[[j]] = xx[[j]] - mean(xx[[j]], tr = tr) +# # Now have the groups in proper order. + } + if(SEED)set.seed(2) + # set seed of random number generator so that + # results can be duplicated. + bsam = list() + bdat = list() +aboot=NA +bboot=NA +cboot=NA +abboot=NA +test.stat=wwtrim(J,K,xx,tr=tr) +nv=length(x[[1]]) + for(ib in 1:nboot) { +bdat[[j]] = sample(nv, size = nv, replace =TRUE) +for(k in 1:JK) bsam[[k]] = data[[k]][bdat[[j]]] +temp=wwtrim(J,K,bsam,tr=tr) +aboot[ib]=temp$Qa +bboot[ib]=temp$Qb +abboot[ib]=temp$Qab +} +pbA=NA +pbB=NA +pbAB=NA +pbA=mean(test.stat$Qa[1,1]0 || ci.up<0){ +pv=alpha[i] +flag=T +} +if(flag)break +} +if(!flag){ +alpha=c(1:99)/100 +for(i in 1:length(alpha)){ +ilow<-round(alpha[i]*nboot/2) +il<-ilow+1 +uval<-nboot-ilow +b.low<-3*((1+nhat*val[il]-nhat/6)^{1/3})/nhat-3/nhat +b.hi<-3*((1+nhat*val[uval]-nhat/6)^{1/3})/nhat-3/nhat +ci.low<-dif-sigtil*b.hi +ci.up<-dif-sigtil*b.low +if(ci.low>0 || ci.up<0){ +pv=alpha[i] +flag=T +} +if(flag)break +} +}}} +list(est.dif=dif,conf.interval=c(ci.LOW,ci.UP),p.value=pv) +} + +mlrregCI<-function(x,y,nboot=300,MC=FALSE,SEED=TRUE,op.dis=TRUE){ +# +# Based on Rousseeuw et al. +# multivariate regression estimator +# compute p-value for each of the parameters using a percentile +# bootstrap method. +# +if(SEED)set.seed(2) +if(MC)library(parallel) +est=mlrreg(x,y)$coef +pval=est +n=nrow(x) +JK=(ncol(x)+1)*ncol(y) +vals=matrix(0,nrow=nboot,ncol=JK) +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +if(!MC)for(ib in 1:nboot){ +vals[ib,]=mlrreg(x[data[ib,],],y[data[ib,],])$coef +} +if(MC){ +data=listm(t(data)) +vals=mclapply(data,mlrreg.est,x,y,mc.preschedule=TRUE) +vals=t(matl(vals)) +} +pv=NULL +for(j in 1:JK){ +pv[j]=mean(vals[,j]>0)+.5*mean(vals[,j]==0) +pv[j]=2*min(c(pv[j],1-pv[j])) +} +ic=0 +il=1 +iu=ncol(x)+1 +for(iy in 1:ncol(y)){ +pval[,iy]=pv[il:iu] +il=il+ncol(x)+1 +iu=iu+ncol(x)+1 +} +list(estimates=est,p.values=pval) +} +mlrreg.est<-function(data,x,y){ +xv=x[data,] +yv=y[data,] +vals=as.vector(mlrreg(xv,yv)$coef) +vals +} +bmcppb<-function(x,alpha=.05,nboot=NA,grp=NA,est=tmean,con=0,bhop=FALSE,SEED=TRUE, +...){ +# +# Multiple comparisons for J independent groups using trimmed means +# +# A percentile bootstrap method with Rom's method is used. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are allowed. +# +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +#print(paste("Working on group ",j)) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group +} +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +tv<-sum(bcon[d,]==0)/nboot +test[d]<-sum(bcon[d,]>0)/nboot+.5*tv +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-tvec[ic,] +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} +mlrregWtest<-function(x,y,nboot=300,MC=FALSE,SEED=TRUE){ +# +# Test hypothesis that all slopes=0 based on Rousseeuw et al. +# multivariate regression estimator +# +# Strategy: a variation of the wild bootstrap method, percentile version. +# +if(SEED)set.seed(2) +if(MC)library(parallel) +estit=mlrreg.subest(y,x) #YES, y before x +n=nrow(x) +JK=ncol(x)*ncol(y) +vals=matrix(0,nrow=nboot,ncol=JK) +data=list() +for(i in 1:nboot){ +bsam=sample(n,replace=TRUE) +data[[i]]=y[bsam,] +} +if(!MC){ +vals=lapply(data,mlrreg.subest,x) +} +if(MC){ +vals=mclapply(data,mlrreg.subest,x,mc.preschedule=TRUE) +} +vals=t(matl(vals)) +nullv=rep(0,JK) +vals=rbind(vals,estit) +cen=rep(0,ncol(vals)) +if(MC)dv=pdisMC(vals,center=cen) +if(!MC)dv=pdis(vals,center=cen) +bplus=nboot+1 +pv=1-sum(dv[bplus]>=dv[1:nboot])/nboot +list(p.value=pv) +} +mlrreg.subest<-function(data,x){ +vals=as.vector(mlrreg(x,data)$coef[-1,]) +vals +} +btrim<-function(x,tr=.2,grp=NA,g=NULL,dp=NULL,nboot=599,SEED=TRUE){ +# +# Test the hypothesis of equal trimmed means, corresponding to J independent +# groups, using a bootstrap-t method. +# +# The data are assumed to be stored in x in list mode +# or in a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, columns correspond to groups. +# +# grp is used to specify some subset of the groups, if desired. +# By default, all J groups are used. +# g=NULL, x is assumed to be a matrix or have list mode +# +# if g is specifed, it is assumed that column g of x is +# a factor variable and that the dependent variable of interest is in column +# dp of x, which can be a matrix or data frame. +# +# The default number of bootstrap samples is nboot=599 +# +if(!is.null(g)){ +if(is.null(dp))stop("Specify a value for dp, the column containing the data") +x=fac2list(x[,dp],x[,g]) +} +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +if(is.na(grp[1]))grp<-c(1:length(x)) +J<-length(grp) +nval=NA +x=lapply(x,elimna) +nval=lapply(x,length) +xbar=lapply(x,mean,tr=tr) +bvec<-array(0,c(J,2,nboot)) +hval<-vector("numeric",J) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +for(j in 1:J){ +hval[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) + # hval is the number of observations in the jth group after trimming. +print(paste("Working on group ",grp[j])) +xcen<-x[[grp[j]]]-mean(x[[grp[j]]],tr) +data<-matrix(sample(xcen,size=length(x[[grp[j]]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row +# contains the bootstrap trimmed means, the second row +# contains the bootstrap squared standard errors. +} +m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means +m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq standard errors +wvec<-1/m2 # J by nboot matrix of w values +uval<-apply(wvec,2,sum) # Vector having length nboot +blob<-wvec*m1 +xtil<-apply(blob,2,sum)/uval # nboot vector of xtil values +blob1<-matrix(0,J,nboot) +for (j in 1:J)blob1[j,]<-wvec[j,]*(m1[j,]-xtil)^2 +avec<-apply(blob1,2,sum)/(length(x)-1) +blob2<-(1-wvec/uval)^2/(hval-1) +cvec<-apply(blob2,2,sum) +cvec<-2*(length(x)-2)*cvec/(length(x)^2-1) +testb<-avec/(cvec+1) +# A vector of length nboot containing bootstrap test values +ct<-sum(is.na(testb)) +if(ct>0)print("Some bootstrap estimates of the test statistic could not be computed") +test<-t1way(x,tr=tr,grp=grp) +pval<-sum(test$TEST<=testb)/nboot +# +# Determine explanatory effect size +# +e.pow=t1wayv2(x)$Effect.Size +list(test=test$TEST,p.value=pval,Explanatory.Power=e.pow, +Effect.Size=e.pow) +} + + +linconMpb.sub<-function(data,x,est,...){ +res=apply(x[data,],2,est,...) +res +} +mcdcen<-function(x){ +# +# Compute MCD measure of location only. +# +res=covmcd(x)$center +res +} +mvecen<-function(x){ +# +# Compute MCD measure of location only. +# +res=covmve(x)$center +res +} + +linconSpb.sub<-function(data,x,est,...){ +res=est(x[data,],...) +res +} + +fac2Mlist<-function(x,grp.col,lev.col,pr=TRUE){ +# +# sort and store data in a matrix or data frame into +# groups, where the jth group +# has p-variate data +# +# grp.col is column indicating levels of between factor. +# lev.col indicates the columns where repeated measures are contained +# +# Example: column 2 contains information on levels of between factor +# have a 3 by 2 design, column 3 contains time 1 data, +# column 7 contains time 2 +# fac2Mlist(x,2,c(3,7)) will store data in list mode, having length +# 2 (the number of levels), with each level containing a +# matrix having two columns. The first column is based on values +# in column 3 of the matrix x, and the second column is based on +# data in column 7 of x. +# +res=selbybw(x,grp.col,lev.col) +if(pr){ +print("Levels for between factor:") +print(sort(unique(x[,grp.col]))) +} +res=res$x +p=length(lev.col) +J=length(unique(x[,grp.col])) +y=list() +ic=1-p +iu=0 +for(j in 1:J){ +ic=ic+p +iu=iu+p +y[[j]]=matl(res[ic:iu]) +} +y +} + + + +fac2BBMlist<-function(x,grp.col,lev.col,pr=TRUE){ +# +# This function is useful when dealing with a two-way MANOVA +# It takes data stored in x, a matrix or data frame, +# and creates groups based on the data in the two columns +# indicated by the argument +# +# grp.col +# lev.col indicates the columns where p-variate are contained. +# +# Example: +# z=fac2BBMlist(plasma,c(2,3),c(7,8)) +# creates groups based on values in columns 2 (Factor A) and 3 (Factor B). +# z[[1]] contains a matrix having two columns; the data are taken +# from columns 7 and 8 of plasma +# +res=selbybbw(x,grp.col,lev.col,pr=pr) +p=length(lev.col) +J=length(unique(x[,grp.col[1]])) +K=length(unique(x[,grp.col[2]])) +y=list() +ic=1-p +iu=0 +jk=0 +for(j in 1:J){ +for(k in 1:K){ +ic=ic+p +iu=iu+p +jk=jk+1 +y[[jk]]=matl(res[ic:iu]) +}} +y +} + + +regmediate<-function(x,y,regfun=tsreg,nboot=400,alpha=.05,xout=FALSE,outfun=out,MC=FALSE,SEED=TRUE,...){ +# +# In a mediation analysis, two of the linear equations that play a role are +# y=b_{01} + b_{11}x + e_1 +# y=b_{03} + b_{13}x + b_{23} x_m + e_3 +# where x_m is the mediator variable. +# An additional assumption is +# x_m=b_{02} + b_{12}x + \epsilon_2. +# Goal: Compute a confidence interval for b_{11}-b_{13} +# +# The default regression method is the Theil-Sen estimator. +# +# The predictor values are assumed to be in the n-by-2 matrix x, with the +# mediator variable in column 2. +# MC=T. A multicore processor will be used. +# xout=T will remove leverage points using the function indicated by the argument out. +# +if(MC)library(parallel) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +if(p!=2)stop("Argument x should have two columns") +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),ncol=nboot) +data=listm(data) +if(MC){ +bvec1<-mclapply(data,regbootMC,as.matrix(x[,1]),y,regfun,mc.preschedule=TRUE) +bvec2<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE) +} +if(!MC){ +bvec1<-lapply(data,regboot,as.matrix(x[,1]),y,regfun) +bvec2<-lapply(data,regboot,x,y,regfun) +} +bvec1=matl(bvec1) +bvec2=matl(bvec2) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +dif=bvec1[2,]-bvec2[2,] +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +sig.level<-NA +temp<-mean(dif<0) +sig.level<-2*(min(temp,1-temp)) +bsort<-sort(dif) +regci<-bsort[ilow] +regci[2]<-bsort[ihi] +list(conf.interval=regci,p.value=sig.level) +} + + + +regmed2<-function(x,y,regfun=tsreg,nboot=400,alpha=.05,xout=FALSE,outfun=out,MC=FALSE, +SEED=TRUE,pr=TRUE,...){ +# +# In a mediation analysis, two of the linear equations that play a role are +# y=b_{01} + b_{11}x + e_1 +# y=b_{03} + b_{13}x + b_{23} x_m + e_3 +# where x_m is the mediator variable. +# An additional assumption is +# x_m=b_{02} + b_{12}x + \epsilon_2. +# Goal: Test hypotheses b_{12}=0 and b_{23}=0 +# +# The default regression method is the Theil-Sen estimator. +# +# The predictor values are assumed to be in the n-by-2 matrix x, with the +# mediator variable in column 2. +# MC=T. A multicore processor will be used. +# xout=T will remove leverage points using the function indicated by the argument out. +# +if(MC)library(parallel) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +if(p!=2)stop("Argument x should have two columns") +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +if(MC){ +temp1=regciMC(x[,1],x[,2],regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE) +temp2=regciMC(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE) +} +if(!MC){ +temp1=regci(x[,1],x[,2],regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE) +temp2=regci(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE) +} +if(pr){ +print("Output returned in res1 is for the slope of the regression line") +print("where the goal is to predict the mediator variable given the other") +print("predictor variable stored in column 1 of x.") +print("Output in res2 is for slope of the mediator when both predictors are used.") +} +res1=c(temp1$regci[2,],temp1$p.value[2]) +z1=t(as.matrix(res1)) +dimnames(z1)=list(NULL,c("ci.low","ci.up",'Estimate','S.E.',"p.value")) +res2=c(temp2$regci[3,],temp2$p.value[3]) +z2=t(as.matrix(res2)) +dimnames(z2)=list(NULL,c("ci.low","ci.up",'Estimate','S.E.',"p.value")) +list(res1=z1,res2=z2) +} + + +ogk.center<-function(x,beta=.9,...){ +# +# Compute OGK multivariate measure of location +# +center=ogk(x,beta=beta,...)$center +center +} +sdwe<-function(m,K=3){ +# +# Stahel-Donoho W-estimator implemented as suggested by +# Zuo, Cui and He 2004, Annals of Statistics, 32, 167--188 +# +m=elimna(m) +pd=1/(1+zdepth(m)) # projection depth +MPD=median(pd) # C in Zuo et al. notation +flag=(pd 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(i in 1:nboot)bvec[,i]=est(x[data[i,],]) +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +tv<-sum(bcon[d,]==0)/nboot +test[d]<-sum(bcon[d,]>0)/nboot+.5*tv +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-tvec[ic,] +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +COVreg<-function(x,y,cov.fun=MARest,loc.fun=MARest,xout=FALSE,outfun=out,...){ +# +# Regression estimation can be done via the usual maximum likelihood +# covariance matrix. This function uses the same approach +# using a robust covariance matrix instead. +# +# The predictors are assumed to be stored in the n-by-p matrix x. +# +xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +x=xy[,1:p] +y=xy[,p1] +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +AC=cov.fun(cbind(x,y),...)$cov +ma<-AC[1:p,p1] +m<-AC[1:p,1:p] +slope<-solve(m,ma) +mvals<-loc.fun(cbind(x,y))$center +b0<-mvals[p1]-sum(slope%*%mvals[1:p]) +res<-y-x%*%slope-b0 +list(coef=c(b0,slope),residuals=res) +} + + + + +dmedpb<-function(x,y=NULL,alpha=.05,con=0,est=median,plotit=TRUE,dif=FALSE,grp=NA, +hoch=TRUE,nboot=NA,xlab="Group 1",ylab="Group 2",ylab.ebar=NULL, +pr=TRUE,SEED=TRUE,BA=FALSE,PCI=FALSE,EBAR=PCI,...){ +# +# Use a percentile bootstrap method to compare +# medians of dependent groups. +# +# This is essentially the function rmmcppb, but set to compare medians +# by default. +# And it is adjusted to handle tied values. +# +# dif=T indicates that difference scores are to be used +# dif=F indicates that measure of location associated with +# marginal distributions are used instead. +# +# nboot is the bootstrap sample size. If not specified, a value will +# be chosen depending on the number of contrasts there are. +# +# x can be an n by J matrix or it can have list mode +# for two groups, data for second group can be put in y +# otherwise, assume x is a matrix (n-by-J) or has list mode. +# +# PCI=TRUE, if dif=TRUE and est=median, confidence intervals for difference scores are plottted +# So this is like plotting error bars. +# +# +if(dif){ +if(pr){ +print("dif=T, so analysis is done on difference scores.") +print(" Each confidence interval has probability coverage 1-alpha.") +print(" Also note a sequentially rejective method is being used.") +} +temp<-rmmcppbd(x,y=y,alpha=alpha,con=con,est=est,plotit=plotit,grp=grp, +nboot=nboot,hoch=hoch,...) +output<-temp$output +con<-temp$con +} +if(!dif){ +if(pr){ +print("dif=F, so analysis is done on marginal distributions.") +print(" Each confidence interval has probability coverage 1-alpha.") +print(" Also note that a sequentially rejective method is being used") +} +if(!is.null(y[1]))x<-cbind(x,y) +if(is.data.frame(x))x=as.matrix(x) +if(!is.list(x) && !is.matrix(x)) +stop("Data must be stored in a matrix or in list mode.") +if(is.list(x)){ +if(is.matrix(con)){ +if(length(x)!=nrow(con)) +stop("The number of rows in con is not equal to the number of groups.") +}} +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matl(x) +} +if(is.matrix(x) && is.matrix(con)){ +if(ncol(x)!=nrow(con)) +stop("The number of rows in con is not equal to the number of groups.") +mat<-x +} +if(is.matrix(x))mat<-x +if(!is.na(sum(grp)))mat<-mat[,grp] +mat<-elimna(mat) # Remove rows with missing values. +x<-mat +J<-ncol(mat) +xcen<-x +for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j]) +Jm<-J-1 +if(sum(con^2)==0){ +d<-(J^2-J)/2 +con<-matrix(0,J,d) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +d<-ncol(con) +if(is.na(nboot)){ +if(d<=4)nboot<-1000 +if(d>4)nboot<-5000 +} +n<-nrow(mat) +crit.vec<-alpha/c(1:d) +connum<-ncol(con) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +xbars<-apply(mat,2,est) +psidat<-NA +for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) +psihat<-matrix(0,connum,nboot) +psihatcen<-matrix(0,connum,nboot) +bvec<-matrix(NA,ncol=J,nrow=nboot) +bveccen<-matrix(NA,ncol=J,nrow=nboot) +if(pr)print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec[ib,]<-apply(x[data[ib,],],2,est,...) +bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...) +} +# +# Now have an nboot by J matrix of bootstrap values. +# +test<-1 +bias<-NA +tval<-NA +tvalcen<-NA +icl=round(alpha*nboot/2)+1 +icu<-nboot-(icl-1) +cimat=matrix(NA,nrow=connum,ncol=2) +for (ic in 1:connum){ +psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) +tp=sort(psihat[ic,]) +cimat[ic,1]=tp[icl] +cimat[ic,2]=tp[icu] +psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic]) +tvalcen[ic]<-sum((psihatcen[ic,]==0))/nboot +bias[ic]<-sum((psihatcen[ic,]>0))/nboot+sum((psihatcen[ic,]==0))/nboot-.5 +tval[ic]<-sum((psihat[ic,]==0))/nboot +if(BA){ +test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic]-.1*bias[ic] +if(test[ic]<0)test[ic]<-0 +} +if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot+.5*tval[ic] +test[ic]<-min(test[ic],1-test[ic]) +} +test<-2*test +ncon<-ncol(con) +dvec<-alpha/c(1:ncon) +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(hoch)dvec<-alpha/(2* c(1:ncon)) +dvec<-2*dvec +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +dvecba<-dvec +#dvec[1]<-alpha/2 +} +if(!EBAR){ +if(plotit && ncol(bvec)==2){ +z<-c(0,0) +one<-c(1,1) +plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") +points(bvec) +totv<-apply(x,2,est,...) +cmat<-var(bvec) +dis<-mahalanobis(bvec,totv,cmat) +temp.dis<-order(dis) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +abline(0,1) +}} +temp2<-order(0-test) +ncon<-ncol(con) +zvec<-dvec[1:ncon] +if(BA)zvec<-dvecba[1:ncon] +sigvec<-(test[temp2]>=zvec) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p-value","p.crit", +"ci.lower","ci.upper")) +tmeans<-apply(mat,2,est,...) +psi<-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +output[temp2,4]<-zvec +temp<-sort(psihat[ic,]) +#print(psihat[ic,]) +#icl=round(alpha*nboot/2)+1 +#icl<-round(output[ic,4]*nboot/2)+1 # This adjustment causes confusion, it is not based on Hochberg +#icu<-nboot-(icl-1) +#output[ic,5]<-temp[icl] +#output[ic,6]<-temp[icu] +output[ic,5:6]<-cimat[ic,] +} +} +num.sig=nrow(output) +ior=order(output[,3],decreasing=TRUE) +for(j in 1:nrow(output)){ +if(output[ior[j],3]<=output[ior[j],4])break +else num.sig=num.sig-1 +} +num.sig<-sum(output[,3]<=output[,4]) +#if(nrow(output)>1)ids=which(output[,3]<=output[,4]) +if(EBAR ){ +if(identical(est,median)){ +if(dif){ +plotCI(output[,2],ali=output[,5],aui=output[,6],xlab='Difference',ylab=ylab.ebar) +}}} +list(output=output,con=con,num.sig=num.sig) +} +MAT2list<-function(x,J=NULL,p=NULL){ +# +# Store the data in a matrix or data frame in a new +# R variable having list mode. +# The results are stored in y, having list mode +# Col 1 to p of x will be stored as a matrix in y[[1]], +# Col p+1 to 2p are stored in y[[2]], and so on. +# +# The function assumes ncol(x)=J*P +# either J, the number of groups, or p, the number of variables, +# must be specified. +# +# This function is used by the R function linconMpb when testing +# hypotheses about linear contrasts based on multivariate data. +# +if(is.null(dim(x)))stop("The argument x must be a matrix or data frame") +y<-list() +if(is.null(J) && is.null(p))stop("Specify J or P") +if(is.null(J))J=ncol(x)/p +if(is.null(p))p=ncol(x)/J +Jp=floor(J)*floor(p) +if(Jp != ncol(x))stop("Jp is not equal to the number of columns") +lp=1-p +up=0 +for(j in 1:J){ +lp=lp+p +up=up+p +y[[j]]<-as.matrix(x[,lp:up]) +} +y +} +linconMpb<-function(x,alpha=.05,nboot=1000,grp=NA,est=tmean,con=0,bhop=FALSE, +SEED=TRUE,PDIS=FALSE,J=NULL,p=NULL,...){ +# +# Multiple comparisons for J independent groups using trimmed means +# with multivariate data for each group. +# +# A percentile bootstrap method with Rom's method is used. +# +# The data are assumed to be stored in x +# which has list mode, +# x[[1]] contains the data for the first group in the form of a +# matrix, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are automatically removed. +# +con<-as.matrix(con) +if(is.matrix(x) || is.data.frame(x)){ +if(is.null(J) && is.null(p))stop("Specify J or P") +x=MAT2list(x,p=p,J=J) +} +if(!is.list(x))stop("Data must be stored in list mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +nullvec=rep(0,ncol(x[[1]])) +bplus=nboot+1 +tempn<-0 +mvec<-list +for(j in 1:J){ +x[[j]]<-elimna(x[[j]]) +} +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") +# Determine critical levels +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-array(NA,c(J,nboot,ncol(x[[1]]))) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +nvec=lapply(x,nrow) +for(j in 1:J){ +data<-matrix(sample(nvec[[j]],size=nvec[[j]]*nboot,replace=TRUE),nrow=nboot) +bvec[j,,]<-apply(data,1,linconMpb.sub,x[[j]],est,...) # Bootstrapped values for jth group +} +test<-NA +for (d in 1:ncon){ +tv=matrix(0,nboot,ncol(x[[1]])) #nboot by p matrix reflecting Psi hat +estit=rep(0,ncol(x[[1]])) +for(j in 1:J){ +tv=tv+con[j,d]*bvec[j,,] +estit=estit+con[j,d]*apply(x[[j]],2,est,...) +} +if(!PDIS)m1=cov(tv) +tv=rbind(tv,nullvec) +if(!PDIS)dv=mahalanobis(tv,center=estit,m1) +if(PDIS)dv=pdis(tv,center=estit) # projection distances +test[d]=1-sum(dv[bplus]>=dv[1:nboot])/nboot +} +output<-matrix(0,ncon,3) +dimnames(output)<-list(NULL,c("con.num","p.value","p.crit")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,3]<-zvec +for (ic in 1:ncol(con)){ +output[ic,1]<-ic +output[ic,2]<-test[ic] +} +num.sig<-sum(output[,2]<=output[,3]) +list(output=output,con=con,num.sig=num.sig) +} +linconMpb.sub<-function(data,x,est,...){ +res=apply(x[data,],2,est,...) +res +} +linconSpb<-function(x,alpha=.05,nboot=1000,grp=NA,est=smean,con=0,bhop=FALSE, +SEED=TRUE,PDIS=FALSE,J=NULL,p=NULL,...){ +# +# Multiple comparisons for J independent groups +# with multivariate data for each group. +# That is, linear contrasts relevant to MANOVA can be tested. +# The method can handle +# multivariate measures of location that take into account +# the overall structure of the data, as opposed to using, for example +# the marginal trimmed means, which is done by default when using +# linconMpb. +# The argument +# +# est=smean, +# +# means that by default the skipped measure of location, based on +# on projection method for detecting outliers, is used. +# +# Mahalanobis distance is used to compute a p-value, but projection +# distances could be used by setting PDIS=T. +# +# A percentile bootstrap method with Rom's method is used. +# +# alpha=.05 means the probability of one or more type I errors is .05. +# +# The data are assumed to be stored in x +# which has list mode, +# x[[1]] contains the data for the first group in the form of a +# matrix, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are automatically removed. +# +if(is.matrix(x) || is.data.frame(x)){ +if(is.null(J) && is.null(p))stop("Specify J or P") +x=MAT2list(x,p=p,J=J) +} +con<-as.matrix(con) +if(!is.list(x))stop("Data must be stored in list mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +nullvec=rep(0,ncol(x[[1]])) +bplus=nboot+1 +tempn<-0 +mvec<-list +for(j in 1:J){ +x[[j]]<-elimna(x[[j]]) +} +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.") +# Determine critical levels +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-array(NA,c(J,nboot,ncol(x[[1]]))) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +nvec=lapply(x,nrow) +for(j in 1:J){ +data<-matrix(sample(nvec[[j]],size=nvec[[j]]*nboot,replace=TRUE),nrow=nboot) +bvec[j,,]<-apply(data,1,linconSpb.sub,x[[j]],est,...) # Bootstrapped values for jth group +} +test<-NA +for (d in 1:ncon){ +tv=matrix(0,nboot,ncol(x[[1]])) #nboot by p matrix reflecting Psi hat +estit=rep(0,ncol(x[[1]])) +for(j in 1:J){ +tv=tv+con[j,d]*bvec[j,,] +estit=estit+con[j,d]*est(x[[j]],...) +} +if(!PDIS)m1=cov(tv) +tv=rbind(tv,nullvec) +if(!PDIS)dv=mahalanobis(tv,center=estit,m1) +if(PDIS)dv=pdis(tv,center=estit) # projection distances +test[d]=1-sum(dv[bplus]>=dv[1:nboot])/nboot +} +output<-matrix(0,ncon,3) +dimnames(output)<-list(NULL,c("con.num","p.value","p.crit")) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,3]<-zvec +for (ic in 1:ncol(con)){ +output[ic,1]<-ic +output[ic,2]<-test[ic] +} +num.sig<-sum(output[,2]<=output[,3]) +list(output=output,con=con,num.sig=num.sig) +} +linconSpb.sub<-function(data,x,est,...){ +res=est(x[data,],...) +res +} + +MULtr.anova<-function(x,J=NULL,p=NULL,tr=.2,alpha=.05){ +# +# Do Multivariate ANOVA with trimmed means using +# Johansen's method +# +# x is assumed to have list mode with length(x)=J=number of groups and +# x[[j]] is an n_j-by-p matrix, p is the number of variables. +# +# x can also be a matrix when J and p are specified. It is assumed the data are stored in +# a matrix in the same manner expected by bwtrim. +# +# To get a p-value, use the function MULAOVp +# +if(is.matrix(x) || is.data.frame(x)){ +if(is.null(J) && is.null(p))stop("Specify J or P") +x=MAT2list(x,p=p,J=J) +} +x=lapply(x,as.matrix) +x=lapply(x,elimna) +p=ncol(x[[1]]) +iden=diag(p) +J=length(x) +tvec=list() +nval=lapply(x,nrow) +Rtil=lapply(x,wincov,tr=tr) +tvec=lapply(x,mmean,tr=tr) +g=list() +gmean=rep(0,p) # grand mean eventually +groupm=list() +Wsum=matrix(0,ncol=p,nrow=p) +W=list() +f=0 +Aw=0 +for(j in 1:J){ +dimnames(x[[j]])=list(NULL,NULL) +tvec[[j]]=as.matrix(tvec[[j]]) +g[[j]]=floor(nval[[j]]*tr) +Rtil[[j]]=Rtil[[j]]*(nval[[j]]-1)/((nval[[j]]-2*g[[j]])*(nval[[j]]-2*g[[j]]-1)) +f[j]=nval[[j]]-2*g[[j]]-1 +W[[j]]=solve(Rtil[[j]]) +groupm[[j]]=apply(x[[j]],2,tmean,tr=tr) +Wsum=Wsum+W[[j]] +gmean=gmean+W[[j]]%*%tvec[[j]] +} +Wsuminv=solve(Wsum) +for(j in 1:J){ +temp=iden-Wsuminv%*%W[[j]] +tempsq=temp%*%temp +Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/f[j] +} +Aw=Aw/2 +gmean=as.matrix(gmean) +gmean=solve(Wsum)%*%gmean # Final weighted grand mean +df=p*(J-1) +crit<-qchisq(1-alpha,df) +crit<-crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +test=0 +for(k in 1:p){ +for(m in 1:p){ +for(j in 1:J){ +test=test+W[[j]][k,m]*(groupm[[j]][m]-gmean[m])*(groupm[[j]][k]-gmean[k]) +}}} +list(test.stat=test,crit.value=crit) +} + + +MULAOVp<-function(x,J=NULL,p=NULL,tr=.2){ +# +# Do Multivariate ANOVA with trimmed means using +# Johansen's method +# +# x is assumed to have list mode with J=number of groups +# x[[j]] is an n_j by p matrix +# +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +Qa<-MULtr.anova(x,J=J,p=p,tr=tr,alpha=alval[i]) +if(Qa$test.stat>Qa$crit.value)break +} +list(test.stat=Qa$test.stat,p.value=alval[i]) +} + +YYmcp<-function(x,alpha=.05,grp=NA,tr=.2,bhop=FALSE,J=NULL,p=NULL,...){ +# +# All pairwise comparisons among J independent groups using trimmed means +# with multivariate data for each group. +# The method applies the Yanagihara - Yuan for each pair of groups +# and controls FWE via Rom's method if bhop=F. +# bhop=T, use Benjamini-Hochberg method +# +# The data are assumed to be stored in x +# which has list mode, +# x[[1]] contains the data for the first group in the form of a +# matrix, x[[2]] the data +# for the second group, etc., each matrix having the same +# number of columns Length(x)=the number of groups = J. +# +# The data can be stored in a single matrix having Jp columns +# J = number of groups. +# If this is the case, specify the argument J or p(number of variables) + +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are automatically removed. +# +con<-as.matrix(con) +if(is.matrix(x) || is.data.frame(x)){ +if(is.null(J) && is.null(p))stop("Specify J or P") +x=MAT2list(x,p=p,J=J) +} +if(!is.list(x))stop("Data must be stored in list mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +nullvec=rep(0,ncol(x[[1]])) +bplus=nboot+1 +tempn<-0 +mvec<-list +for(j in 1:J){ +x[[j]]<-elimna(x[[j]]) +} +Jm<-J-1 +# +# Determine contrast matrix +# +ncon<-(J^2-J)/2 +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +# +output<-matrix(0,ncon,4) +dimnames(output)<-list(NULL,c("Group","Group","p.value","p.crit")) +ic=0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +ic=ic+1 +output[ic,1]=j +output[ic,2]=k +output[ic,3]<-YYmanova(x[[j]],x[[k]],tr=tr)$p.value +}} +test=output[,3] +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,num.sig=num.sig) +} + + +loc2dif<-function(x,y=NULL,est=median,na.rm=TRUE,plotit=FALSE,xlab="",ylab="",...){ +# +# Compute a measure of location associated with the +# distribution of x-y, the typical difference between two randomly sampled values. +# The measure of location is indicated by the argument +# est. +# x and y are paired data or independent variables having the same length. +# If x and y have different lengths, use the function wmwloc +# +# Advantage of this estimator: relatively high efficiency even under normality versus +# using sample means. +# +if(is.null(y)){ +if(ncol(x)!=2)stop("x should be an n-by-2 matrix") +y=x[,2] +x=x[,1] +if(na.rm)m=elimna(cbind(x,y)) +x=m[,1] +y=m[,2] +} +x=elimna(x) +y=elimna(y) +temp=as.vector(outer(x,y,FUN="-")) +val<-est(temp,na.rm=TRUE,...) +if(plotit)akerd(temp,xlab=xlab,ylab=ylab) +val +} + +mlrreg<-function(x,y,cov.fun=cov.mcd,ols.op=TRUE,mcd.op=TRUE, +quantile.used=floor(.75*n),RES=FALSE,...){ +# +# Do Multivariate regression, using by default the method +# in Rousseeuw, Van Aelst, Van Driessen Agullo +# Technometrics, 46, 293-305 +# +# Note, to use the method recommended by Rousseeuw et al., the argument +# quantile.used=.75*n is used when calling cov.mcd. +# +# RES=T, the residuals will be returned. +# +# y is assumed to be multivariate with data stored in a matrix. +# +# an initial fit is found using the measures of scatter and location +# corresponding to cof.fun and mcd.op. If +# mcd.op=T, cov.mcd is used with quanitle.used=.75n +# mcd.op=F, cov.fun is used and defaults to cov.mcd with the +# default value usded by R for the argument quanitle.used +# But any function that returns location and scatter in $center and $cov +# can be used. +# +# if ols.op=T, OLS is applied after points are removed based on iniital fit +# if ols.op=F, Theil-Sen is used by calling the function mopreg +# +# Early version of this function considered estimating +# explanatory power in terms of the generalized variance +# of the predicted y values and the observed y values +# epow.cov determines which robust covariance matrix will be used. +# This idea has not been explored enough +# Some choices are: +# cov (the usual generalized variance) +# skipcov +# tbscov +# covout +# covogk +# mgvcov +# mvecov +# mcdcov +# +library(MASS) +if(!is.matrix(y))stop("y is not a matrix") +X<-cbind(x,y) +X<-elimna(X) +n<-nrow(X) +qy<-ncol(y) +qx<-ncol(x) +qxp1<-qx+1 +tqyqx<-qy+qx +y<-X[,qxp1:tqyqx] +# compute initial estimate of slopes and intercept: +if(!mcd.op)locscat<-cov.fun(X,...) +if(mcd.op)locscat<-cov.mcd(X,quan=quantile.used) +sig<-locscat$cov +mu<-locscat$center +sigxx<-sig[1:qx,1:qx] +sigxy<-sig[1:qx,qxp1:tqyqx] +sigyy<-sig[qxp1:tqyqx,qxp1:tqyqx] +Bhat<-solve(sigxx)%*%sigxy +sige<-sigyy-t(Bhat)%*%sigxx%*%Bhat +sige.inv<-solve(sige) +Ahat<-t(mu[qxp1:tqyqx]-t(Bhat)%*%mu[1:qx]) +resL<-matrix(nrow=nrow(X),ncol=qy) +for(i in 1:nrow(X))resL[i,]<-y[i,]-t(Bhat)%*%X[i,1:qx] +for(j in 1:qy)resL[,j]<-resL[,j]-Ahat[j] +drL<-NA +for(i in 1:nrow(X))drL[i]<-t(resL[i,])%*%sige.inv%*%resL[i,] +# In Rousseeuw notation, drL<- is d^2 +w<-rep(0,nrow(X)) +qdr<-qchisq(.99,qy) +iflag<-(drLalpha) +if(!FWE)id2=which(mat[,8]>alpha) +points(pts[id2,1],pts[id2,2],pch='*') +if(length(id)>0)points(pts[id,1],pts[id,2],pch='+') +} +sig.pts=unique(sig.pts) +list(points=pts,output=mat,crit=critv,sig.pts=sig.pts) +} + +rplot2g<-runmean2g + +Qancsm<-function(x1,y1,x2,y2,crit.mat=NULL,nboot=200,SEED=TRUE,REP.CRIT=FALSE, +qval=.5,q=NULL,xlab="X",ylab="Y",plotit=TRUE,pr=TRUE,xout=FALSE,outfun=out,...){ +# +# Compare two nonparametric +# regression lines corresponding to two independent groups +# using the depths of smooths. +# +# NULL hypothesis: regression lines are identical in terms of the median +# of Y, given$X, for all X +# The method is based on comparing the depth of the fitted regression lines +# and is essentially a slight variation of the method in Wilcox +# (in press) Journal of Data Science. +# +# One covariate only is allowed. +# +if(ncol(as.matrix(x1))>1)stop("One covariate only is allowed") +if(!is.null(q))qval=q +if(xout){ +flag1=outfun(x1)$keep +flag2=outfun(x2)$keep +x1=x1[flag1] +y1=y1[flag1] +x2=x2[flag2] +y2=y2[flag2] +} +if(SEED)set.seed(2) +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +xord=order(x1) +x1=x1[xord] +y1=xy[xord,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +xord=order(x2) +x2=x2[xord] +y2=xy[xord,2] +n1=length(y1) +n2=length(y2) +if(is.null(crit.mat[1])){ +if(pr)print("Determining critical value. This might take a while") +crit.val=NA +yall=c(y1,y2) +xall=c(x1,x2) +nn=n1+n2 +il=n1+1 +for(i in 1:nboot){ +data=sample(nn,nn,T) +yy1=yall[data[1:n1]] +yy2=yall[data[il:nn]] +xx1=xall[data[1:n1]] +xx2=xall[data[il:nn]] +crit.mat[i]=Qdepthcom(xx1,yy1,xx2,yy2,qval=qval) +}} +dep=Qdepthcom(x1,y1,x2,y2,qval=qval) +pv=1-mean(crit.mat2)stop("This function only allows one covariate") +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1] +y1<-m[,2] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1] +y2<-m[,2] +} +x=c(x1,x2) +y=c(y1,y2) +g=c(rep(0,length(x1)),rep(1,length(x2))) +xgy=elimna(cbind(x,g,x*g,y)) +xg=xgy[,1:3] +y=xgy[,4] +res=olswbtest(xg,y,nboot=nboot,SEED=SEED,RAD=RAD,alpha=alpha) +res[3,6] +} + +regpreCV<-function(x,y,regfun=tsreg,varfun=pbvar,adz=TRUE,model=NULL,locfun=mean, +xout=FALSE,outfun=out, +plotit=TRUE,xlab="Model Number",ylab="Prediction Error",...){ +# +# Estimate the prediction error using the regression method +# regfun in conjunction with leave-one-out cross-validation +# +# The argument model should have list mode, model[[1]] indicates +# which predictors are used in the first model. For example, storing +# 1,4 in model[[1]] means predictors 1 and 4 are being considered. +# If model is not specified, and number of predictors is at most 5, +# then all models are considered. +# +# If adz=T, added to the models to be considered is where +# all regression slopes are zero. That is, use measure of location only +# corresponding to +# locfun. +# +x<-as.matrix(x) +d<-ncol(x) +p1<-d+1 +temp<-elimna(cbind(x,y)) +x<-temp[,1:d] +y<-temp[,d+1] +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(is.null(model)){ +if(d<=5)model<-modgen(d,adz=adz) +if(d>5)model[[1]]<-c(1:ncol(x)) +} +mout<-matrix(NA,length(model),3,dimnames=list(NULL,c("est.error", +"var.used","rank"))) +for (imod in 1:length(model)){ +nmod=length(model[[imod]])-1 +temp=c(nmod:0) +mout[imod,2]=sum(model[[imod]]*10^temp) +# +if(sum(model[[imod]]==0)!=1){ +xx<-x[,model[[imod]]] +xx<-as.matrix(xx) +mout[imod,1]<-regpecv(xx,y,regfun=regfun,varfun=varfun,...) +} +# +if(sum(model[[imod]]==0)==1){ +mout[imod,1]<-locCV(y,varfun=varfun,locfun=locfun) +}} +mout[,3]=rank(mout[,1]) +if(plotit)plot(c(1:nrow(mout)),mout[,1],xlab=xlab,ylab=ylab) +mout +} + +locCV<-function(y,varfun=pbvar,locfun=median,...){ +vals=NA +n=length(y) +est=locfun(y) +for(i in 1:n)vals[i]=y[i]-locfun(y[-i],...) +res=varfun(vals) +res +} + + +esI<-function(x,tr=.2,nboot=100,SEED=TRUE){ +# +# Explanatory measure of effect size for an interaction in +# a 2-by-2 ANOVA +# +# Assume x is a mtrix with 4 columns or has list mode with length 4 +# Also assume interaction is for x_1-x_2 versus x_3-x_4 +# +if(is.matrix(x)|| is.data.frame(x))x=listm(x) +es=yuenv2(outer(x[[1]],x[[2]],"-"),outer(x[[3]],x[[4]],"-"), +tr=tr,nboot=nboot,SEED=SEED)$Effect.Size +list(Effect.Size=es) +} + + +esImcp<-function(J,K,x,tr=0.2,nboot=100,SEED=TRUE){ +# +# Compute measure of effect size for all interactions in a J-by-K design +# A robust, heteroscedastic measure of effect (explanatory measure of +# effect size) is used. +# +if(is.matrix(x)|| is.data.frame(x))x=listm(x) +con=con2way(J,K)$conAB +es=NULL +for (j in 1:ncol(con)){ +flag=(con[,j]!=0) +es[j]=esI(x[flag],tr=tr,nboot=nboot,SEED=SEED)$Effect.Size +} +list(Effect.Sizes=es,contrast.coef=con) +} + + +ESmainMCP<-function(J,K,x,tr=0.2,nboot=100,SEED=TRUE){ +# +# Compute explanatory measure of effect size for all main effects +# in a two-way design. That is, for Factor A, compute it for all levels j < j' +# For Factor B, compute it for all level kobs]) + if (sumpr0))print("Duplicate values detected; tshdreg might have more power than tsreg") +}} +nv=length(y) +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +if(!WARNS)options(warn=-1) +bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) +options(warn=0) +#Leverage points already removed. +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +regci<-matrix(0,p1,6) +vlabs="Intercept" +for(j in 2:p1)vlabs[j]=paste("Slope",j-1) +if(LABELS)vlabs[2:p1]=labels(x)[[2]] +dimnames(regci)<-list(vlabs,c("ci.low","ci.up","Estimate","S.E.","p-value",'p.adj')) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +se<-NA +pvec<-NA +for(i in 1:p1){ +bsort<-sort(bvec[i,]) +#pvec[i]<-(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot +pvec[i]<-(sum(bvec[i,].5)pvec[i]<-1-pvec[i] +regci[i,1]<-bsort[ilow] +regci[i,2]<-bsort[ihi] +se[i]<-sqrt(var(bvec[i,])) +} +if(p1==3){ +if(plotit){ +plot(bvec[2,],bvec[3,],xlab=xlab,ylab=ylab) +}} +regci[,3]=estit +pvec<-2*pvec +regci[,4]=se +regci[,5]=regci[,6]=pvec +regci[2:p1,6]=p.adjust(pvec[2:p1],method=method) +list(regci=regci,n=nrem,n.keep=nv) +} +M2m.loc<-function(m,grpc,col.dat,locfun=tmean,...){ +# +# m is a matrix or data frame. +# Compute a measure of location for each of several categories, with +# categories indicated by the values in the column of m given by the +# argument grpc. +# The argument grpc can have up to 4 values, which correspond to factors. +# +# col.dat indicates the column of m containing the outcome measure +# of interest. +# locfun indicates the measure of location, which defaults to the 20% +# trimmed mean. +# +# Example, +# M2m.loc(x,c(1,4),5,locfun=mean) +# indicates that there are 2 factors, with levels of the factors indicated +# by the values in columns 1 and 4 of the matrix x. For each combination +# of levels, +# locfun=mean +# indicates that the sample mean will be computed. +# +flagit=F +if(is.null(dim(m)))stop("Data must be stored in a matrix or data frame") +if(is.na(grpc[1]))stop("The argument grpc is not specified") +if(is.na(col.dat[1]))stop("The argument col.dat is not specified") +if(length(grpc)>4)stop("grpc must have length <= 4") +m=as.data.frame(m) +if(length(grpc)==1){ +p1=ncol(m)+1 +dum=rep(1,nrow(m)) +flagit=T +m=cbind(m,dum) +grpc=c(NULL,gprc,p1) +cat1<-sort(unique(m[,grpc[1]])) +M=NULL +for (ig1 in 1:length(cat1)){ +flag1=(m[,grpc[1]]==cat1[ig1]) +flag=(flag1==1) +msub=as.data.frame(m[flag,]) +loc=locfun(m[flag,col.dat],...) +M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc))) +} +M=M[,c(1,3)] +} +if(length(grpc)==2){ +cat1<-sort(unique(m[,grpc[1]])) +cat2<-sort(unique(m[,grpc[2]])) +M=NULL +for (ig1 in 1:length(cat1)){ +for (ig2 in 1:length(cat2)){ +flag1=(m[,grpc[1]]==cat1[ig1]) +flag2=(m[,grpc[2]]==cat2[ig2]) +flag=(flag1*flag2==1) +msub=m[flag,] +loc=locfun(m[flag,col.dat],...) +M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc))) +}}} +if(length(grpc)==3){ +cat1<-sort(unique(m[,grpc[1]])) +cat2<-sort(unique(m[,grpc[2]])) +cat3<-sort(unique(m[,grpc[3]])) +M=NULL +for (ig1 in 1:length(cat1)){ +for (ig2 in 1:length(cat2)){ +for (ig3 in 1:length(cat3)){ +flag1=(m[,grpc[1]]==cat1[ig1]) +flag2=(m[,grpc[2]]==cat2[ig2]) +flag3=(m[,grpc[3]]==cat3[ig3]) +flag=(flag1*flag2*flag3==1) +msub=m[flag,] +loc=locfun(m[flag,col.dat],...) +M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc))) +}}}} +if(length(grpc)==4){ +cat1<-sort(unique(m[,grpc[1]])) +cat2<-sort(unique(m[,grpc[2]])) +cat3<-sort(unique(m[,grpc[3]])) +cat4<-sort(unique(m[,grpc[4]])) +M=NULL +for (ig1 in 1:length(cat1)){ +for (ig2 in 1:length(cat2)){ +for (ig3 in 1:length(cat3)){ +for (ig4 in 1:length(cat4)){ +flag1=(m[,grpc[1]]==cat1[ig1]) +flag2=(m[,grpc[2]]==cat2[ig2]) +flag3=(m[,grpc[3]]==cat3[ig3]) +flag4=(m[,grpc[4]]==cat4[ig4]) +flag=(flag1*flag2*flag3*flag4==1) +msub=m[flag,] +loc=locfun(m[flag,col.dat],...) +M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc))) +}}}}} +if(flagit)M=M[,c(1,3)] +M +} +skip<-function(m,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,STAND=TRUE,pr=TRUE){ +# +# m is an n by p matrix +# +# Compute skipped location and covariance matrix +# +# op=1: +# Eliminate outliers using a projection method +# That is, first determine center of data using: +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 uses MVE center +# cop=5 uses TBS +# cop=6 uses rmba (Olive's median ball algorithm) +# +# For each point +# consider the line between it and the center, +# project all points onto this line, and +# check for outliers using +# +# MM=F, a boxplot rule. +# MM=T, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# +# op=2 use mgv (function outmgv) method to eliminate outliers +# +# Eliminate any outliers and compute means +# using remaining data. +# mgv.op=0, mgv uses all pairwise distances to determine center of the data +# mgv.op=1 uses MVE +# mgv.op=2 uses MCD +# +temp<-NA +m<-elimna(m) +if(op==2)temp<-outmgv(m,plotit=FALSE,op=mgv.op)$keep +if(op==1)temp<-outpro(m,plotit=FALSE,MM=MM,cop=outpro.cop,STAND=STAND,pr=pr)$keep +val<-var(m[temp,]) +loc<-apply(m[temp,],2,mean) +list(center=loc,cov=val) +} + +ancmppb<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,pts=NA,est=tmean,nboot=NA, +bhop=TRUE,SEED=TRUE,cov.fun=skip,cop=NULL,COV.both=FALSE,pr=TRUE,...){ +# +# Compare two independent groups using the ancova method +# with multiple covariates. +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# Design points are chosen based on depth of points in x1 if pts=NA +# Assume data are in x1 y1 x2 and y2 +# +# cov.fun determines the location and +# scatter matrix used to find closest points to +# a design point. It is used by ancdes. +# +# Choices for cov.fun include +# cov.mve +# cov.mcd +# rmba +# skip +# tbs +# +#if(pr)print("For the old version of this function, use ancmpbpb") +x1=as.matrix(x1) +y1=as.matrix(y1) +if(ncol(x1)==1)stop("Use a function designed for one covariate only") +x2=as.matrix(x2) +y2=as.matrix(y2) +if(ncol(x1)!=ncol(x2)) +stop("Number of covariates must be the same for each group") +xy=elimna(cbind(x1,y1)) +p=ncol(x1) +p1=p+1 +x1=xy[,1:p] +y1=xy[,p1] +xy=elimna(cbind(x2,y2)) +x2=xy[,1:p] +y2=xy[,p1] +x1=as.matrix(x1) +x2=as.matrix(x2) +mval1=cov.fun(x1) +mval2=cov.fun(x2) +if(is.na(pts[1])){ +x1<-as.matrix(x1) +if(!COV.both){ +if(!is.null(cop))pts<-ancdes(x1,cop=cop) +if(is.null(cop))pts=ancdes(x1,center=mval1$center) +} +if(COV.both){ +if(!is.null(cop))pts<-ancdes(rbind(x1,x2),cop=cop) +if(is.null(cop))pts=ancdes(rbind(x1,x2),center=mval1$center) +} +} +pts<-as.matrix(pts) +if(nrow(pts)>=29){ +print("WARNING: More than 28 design points") +print("Only first 28 are used.") +pts<-pts[1:28,] +} +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:nrow(pts)){ +n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)]) +n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)]) +} +flag<-rep(TRUE,nrow(pts)) +for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-FALSE +flag=as.logical(flag) +pts<-pts[flag,] +if(sum(flag)==1)pts<-t(as.matrix(pts)) +if(sum(flag)==0)stop("No comparable design points found, might increase span.") +mat<-matrix(NA,nrow(pts),7) +dimnames(mat)<-list(NULL,c("n1","n2","DIF","TEST","se","ci.low","ci.hi")) +g1<-list() +ip<-nrow(pts) +ncom<-0 +nc2<-ip +con<-matrix(0,nrow=2*ip,ncol=nrow(pts)) +for (i in 1:nrow(pts)){ +ip<-ip+1 +ncom<-ncom+1 +nc2<-nc2+1 +con[ncom,i]<-1 +con[nc2,i]<-0-1 +temp<-y1[near3d(x1,pts[i,],fr1,mval1)] +g1[[i]]<-temp[!is.na(temp)] +temp<-y2[near3d(x2,pts[i,],fr2,mval2)] +g1[[ip]]<-temp[!is.na(temp)] +} +flag.est=FALSE +if(identical(est,onestep))flag.est=TRUE +if(identical(est,mom))flag.est=TRUE +if(flag.est)mat<-pbmcp(g1,alpha=alpha,nboot=nboot,est=est,con=con,bhop=bhop,SEED=SEED,...) +if(!flag.est)mat<-linconpb(g1,alpha=alpha,nboot=nboot,est=est,con=con,bhop=bhop,SEED=SEED,...) +list(points=pts,output=mat) +} + + +hc4wmc<-function(x,y,nboot=599,k=2,grp=NA,con=0,SEED=TRUE,STOP=TRUE,...){ +# +# Test the hypothesis that J independent groups have identical slopes. +# Using least squares regression +# Data are stored in list mode or in a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# Similarly, y[[1]] contains the data for the first group, +# y[[2]] the data for the second groups, etc. +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are allowed. +# +if(STOP)stop('Suggest ols1way. This function assumes equal n. To use anyway, set STOP=FALSE') +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.") +if(is.matrix(y))y<-listm(y) +if(!is.list(y))stop("Data must be stored in list mode or in matrix mode.") +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +yy<-list() +for(i in 1:length(grp)) +xx[[i]]<-x[[grp[i]]] +yy[[i]]<-y[[grp[i]]] +x<-xx +y<-yy +} +J<-length(x) +n<-length(x[[1]]) +tempn<-0 +slopes<-NA +covar<-NA +stemp<-NA +yhat<-numeric(J) +res<-matrix(,ncol=J, nrow=n) +for(j in 1:J){ +temp<-cbind(x[[j]], y[[j]]) +temp<-elimna(temp) # Remove missing values. +#n<-length(y[[j]]) +tempn[j]<-length(temp) +x[[j]]<-temp[,1] +y[[j]]<-temp[,2] +tempx<-as.matrix(x[[j]]) +tempy<-as.matrix(y[[j]]) +#Getting yhat and residuals for wild bootstrap +yhat[j]<-mean(tempy) +res[,j]<-tempy-yhat[j] +#original Slope and SE +stemp<-lsfit(tempx, tempy) +slopes[j]<-stemp$coef[k] #Slopes for original data +covar[j]<-lsfitNci4(tempx, tempy)$cov[k,k] #original HC4 for coefficient(slope) +} +# +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (h in jp:J){ +id<-id+1 +con[j,id]<-1 +con[h,id]<-0-1 +}}} +ncon<-ncol(con) +if(nrow(con)!=J){ +stop("Something is wrong with con; the number of rows does not match the number of groups.") +} +#calculating original statistic +dif.slopes<-t(con)%*%slopes +o.se<-t(con^2)%*%covar +o.stat<-dif.slopes/sqrt(o.se) #original test statistics +# +om<-max(abs(o.stat)) #Max. absolute test statistics +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +data<-matrix(ifelse(rbinom(n*nboot*J,1,0.5)==1,-1,1),ncol=nboot*J) #discrete wild bootstrap sample +test<-numeric(nboot) +u<-rep(1, n) +c<-1 +for (i in 1:nboot*J-J+1){ +d<-data[,i:i+J-1] +ystar<-u%*%t(yhat)+res*d +ystar<-listm(ystar) +i<-i+J +test[c]<-mcslope(x,ystar, con, k) +# +c<-c+1 +} +sum<-sum(test>= om) +p.val<-sum/nboot +list(p.value=p.val) +} +mcslope<-function(X, Y, con, k){ +J=length(X) +slopes<-numeric(J) +covar<-numeric(J) +for(j in 1:J){ +tempx<-as.matrix(X[[j]]) +tempy<-as.matrix(Y[[j]]) +slopes[j]<-lsfit(tempx, tempy)$coef[k] #Slopes for original data +covar[j]<-lsfitNci4(tempx, tempy)$cov[k,k] #original HC4 for coefficient(slope) +} +dif.slopes<-t(con)%*%slopes +o.se<-t(con^2)%*%covar +o.stat<-dif.slopes/sqrt(o.se) #original test statistics +om<-max(abs(o.stat)) +om +} + + +ZYmediate<-function(x,y,nboot=2000,alpha=.05,kappa=.05,SEED=TRUE,xout=FALSE,outfun=out){ +# +# Robust mediation analysis using M-estimator as +# described in Zu and Yuan, 2010, MBR, 45, 1--44. +# +# x[,1] is predictor +# x[,2] is mediator variable +# y is outcome variable. +ep=0.00000001 # convergence criteria +B=nboot # the number of bootstrap replications +kappa # the percent of cases to be controlled when robust method is used + # Zu and Yuan used .05, so this is the default value used here. +level=alpha # alpha level +if(SEED)set.seed(2) +Z=elimna(cbind(x,y)) +if(xout){ +flag<-outfun(Z[,1],plotit=FALSE,SEED=SEED)$keep +Z<-Z[flag,] +} +p=3 +n=nrow(Z) +HT=HuberTun(kappa,p) +r=HT$r +tau=HT$tau +H=robEst(Z,r,tau,ep) +R.v=H$u2*tau +oH=order(R.v) +oCaseH=(1:n)[oH] # case number with its Ri increases +oR.v=R.v[oH] + +thetaH=H$theta +aH=thetaH[1] +bH=thetaH[2] +abH=aH*bH + +muH=H$mu +SigmaH=H$Sigma +dH=H$d + + +### Use robust method +# point estimate +thetaH=H$theta +aH=thetaH[1] +bH=thetaH[2] +abH=aH*bH + +muH=H$mu +SigmaH=H$Sigma +dH=H$d + +#Standard errors +RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau) + +Zr=RH$Zr +SEHI=RH$inf +SEHS=RH$sand + +#Standard errors +RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau) + +Zr=RH$Zr +SEHI=RH$inf +SEHS=RH$sand + +#Standard errors +RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau) + +Zr=RH$Zr +SEHI=RH$inf +SEHS=RH$sand +ParEstH<-round(cbind(thetaH,SEHI[1:6],SEHS[1:6]),3) +rnames<-c("a","b","c","vx","vem","vey") +ParEstH<-cbind(rnames,ParEstH) +res=t(ParEstH) +# +Res=BCI(Z,Zr,ab=3,abH,B,level) +list(CI.ab=Res$CI,p.value=Res$pv,a.est=aH,b.est=bH,ab.est=abH) +} + + +#------------------------------------------------------------ +# Tunning parameter when use Huber type weight +#------------------------------------------------------------ +# Input: + #kappa: the proportion of cases to be controlled + #p: the number of variables +# Output + # r: the critical value of Mahalalanobis distance, as defined in (20) + # tau: the constant to make the robust estimator of Sigma to be unbiased, as defined in (20) + +HuberTun=function(kappa,p){ + prob=1-kappa + chip=qchisq(prob,p) + r=sqrt(chip) + tau=(p*pchisq(chip,p+2)+ chip*(1-prob))/p + Results=list(r=r,tau=tau) + return(Results) +} + +robEst=function(Z,r,tau,ep){ + + p=ncol(Z) + n=nrow(Z) + # Starting values + mu0=MeanCov(Z)$zbar + Sigma0=MeanCov(Z)$S + Sigin=solve(Sigma0) + + diverg=0 # convergence flag + + for (k in 1:200) { + sumu1=0 + mu=matrix(0,p,1) + Sigma=matrix(0,p,p) + d=rep(NA,n) + u1=rep(NA,n) + u2=rep(NA,n) + + for (i in 1:n) { zi=Z[i,] + zi0=zi-mu0 + di2=t(zi0)%*%Sigin%*%zi0 + di=as.numeric(sqrt(di2)) + d[i]=di + + #get u1i,u2i + if (di<=r) { + u1i=1.0 + u2i=1.0/tau + }else { + u1i=r/di + u2i=u1i^2/tau + } + u1[i]=u1i + u2[i]=u2i + + sumu1=sumu1+u1i + mu=mu+u1i*zi + Sigma=Sigma+u2i*zi0%*%t(zi0) + + } # end of loop i + + mu1=mu/sumu1 + Sigma1=Sigma/n + Sigdif=Sigma1-Sigma0 + dt=sum(Sigdif^2) + + mu0=mu1 + Sigma0=Sigma1 + Sigin=solve(Sigma0) + if (dt0) +pv=2*min(c(pstar,1-pstar)) +# Results=list(BP=BP) +# return(Results) +list(BP,pv) +} + +RobRsq<-function(x,y){ +library(robust) +z=lmRob(y~x) +res=robR2w(z) +res +} + +robR2w = function (rob.obj, correc=1.2076) { + ## R2 in robust regression, see + ## Renaud, O. & Victoria-Feser, M.-P. (2010). A robust coefficient of determination for regression. + ## Journal of Statistical Planning and Inference, 140, 1852-1862. + ## rob.obj is an lmRob object. correc is the correction for consistancy. Call: + ## + ## library(robust) + ## creat.lmRob = lmRob(original1 ~ approprie1+approprie2+creativite1+creativite2, data=creatif) + ## summary(creat.lmRob) + ## robR2w(creat.lmRob) + + ## Weights in robust regression + wt.bisquare = function(u, c = 4.685) { + U <- abs(u/c) + w <- ((1. + U) * (1. - U))^2. + w[U > 1.] <- 0. + w + } + weight.rob=function(rob.obj){ + resid.rob=rob.obj$resid + scale.rob=(rob.obj$scale)*rob.obj$df.residual/length(resid.rob) + resid.rob= resid.rob/scale.rob + weight=wt.bisquare(resid.rob) + } + + if (attr(rob.obj, "class") !="lmRob") + stop("This function works only on lmRob objects") + pred = rob.obj$fitted.values + resid = rob.obj$resid + resp = resid+pred + wgt = weight.rob(rob.obj) + scale.rob = rob.obj$scale + resp.mean = sum(wgt*resp)/sum(wgt) + pred.mean = sum(wgt*pred)/sum(wgt) + yMy = sum(wgt*(resp-resp.mean)^2) + rMr = sum(wgt*resid^2) + r2 = (yMy-rMr) / yMy + r2correc= (yMy-rMr) / (yMy-rMr +rMr*correc) + r2adjcor = 1-(1-r2correc) * (length(resid)-1) / (length(resid)-length(rob.obj$coefficients)-1) + return(list(robR2w.NoCorrection=r2, robR2w.WithCorrection=r2correc, robR2w.AdjustedWithCorrection=r2adjcor)) +} + +bi2KMSv2<-function(r1=sum(elimna(x)),n1=length(elimna(x)),r2=sum(elimna(y)),n2=length(elimna(y)), +x=NA,y=NA,nullval=0,alpha=.05){ +# +# Test the hypothesis that two independent binomials have equal +# probability of success using method KMS. +# +# Unlike the function bi2KMS, a p-value is returned +# +# r1=number of successes in group 1 +# n1=number of observations in group 1 +# +# Uses Kulinskaya et al. method American Statistician, 2010, 64, 350- +# +# null value is the hypothesized value for p1-p2 +# +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-bi2KMS(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=x,alpha=alph[i]) +if(chkit$ci[1]>nullval || chkit$ci[2]nullval || chkit$ci[2]TB)-.5*mean(test==TB) +list(test=test,p.value=pv) +} + +wmwloc<-function(x,y,na.rm=TRUE,est=median,...){ +# +# Estimate the median of the distribution of x-y +# +if(na.rm){ +x<-x[!is.na(x)] +y<-y[!is.na(y)] +} +m<-outer(x,y,FUN="-") +est=est(m,na.rm=TRUE,...) +est +} + + +DEPanc<-function(x1,y1,y2,fr1=1,tr=.2,alpha=.05,plotit=TRUE,DISDIF=FALSE,DIF=TRUE, +pts=NULL,sm=FALSE,xout=FALSE,outfun=out,nboot=500){ +# +# Compare two dependent groups using a covariate +# +# x1 is the covariate and +# y1 and y2 are the two measures. For instance time 1 and time 2. +# +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# fr1 is span for running interval smoother +# +# sm=T will create smooths using bootstrap bagging. +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +# If DISDIF=T: 1. compare groups using median of distribution of D=Y1-Y2 +# 2. if na.rm=T, case wise deletion is used, otherwise all of the data are used. +# +# Also see the R function DEPancB, which includes alternative methods for handling missing values +# +m=cbind(x1,y1,y2) +flag=is.na(x1) +m=m[!flag,] +if(is.null(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +y2<-y2[xorder] +vecn<-1 +for(i in 1:length(x1))vecn[i]<-length(y1[near(x1,x1[i],fr1)]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +} +if(!is.null(pts[1]))isub=c(1:length(pts)) +mat<-matrix(NA,length(isub),8) +dimnames(mat)<-list(NULL,c("X","n","DIF","TEST","se","ci.low","ci.hi", +"p.value")) +for (i in 1:length(isub)){ +if(is.null(pts)){ +ch=near(x1,x1[isub[i]],fr1) +mat[i,1]=x1[isub[i]] +} +if(!is.null(pts)){ +ch=near(x1,pts[i],fr1) +mat[i,1]=pts[i] +} +mat[i,2]=sum(ch) +if(!DISDIF){ +if(!DIF){ +test<-yuend(m[ch,2],m[ch,3],tr=tr) +mat[i,3]=mean(m[ch,2],tr=tr)-mean(m[ch,3],tr=tr) +mat[i,4]<-test$teststat +mat[i,5]<-test$se +mat[i,6]<-test$ci[1] +mat[i,7]<-test$ci[2] +mat[i,8]<-test$siglevel +} +if(DIF){ +test=trimci(m[ch,2]-m[ch,3],tr=tr,pr=FALSE) +mat[i,3]=mean(m[ch,2]-m[ch,3],tr=tr) +mat[i,4]<-test$test.stat +mat[i,5]<-test$se +mat[i,6]<-test$ci[1] +mat[i,7]<-test$ci[2] +mat[i,8]<-test$p.value +}} +if(DISDIF){ +test=l2drmci(m[ch,2:3],pr=FALSE,nboot=nboot,na.rm=na.rm) +mat[i,3]<-loc2dif(m[ch,2],m[ch,3],na.rm=na.rm) +mat[i,4]<-NA +mat[i,5]<-NA +mat[i,6]<-test$ci[1] +mat[i,7]<-test$ci[2] +mat[i,8]<-test$p.value +}} +if(plotit) +runmean2g(x1,y1,x1,y2,fr=fr1,est=mean,tr=tr,sm=sm,xout=xout,outfun=outfun) +list(output=mat) +} + + +DEPancpb<-function(x1,y1,y2,fr1=1,est=tmean,alpha=.05,plotit=TRUE,DISDIF=FALSE,DIF=TRUE,TLS=FALSE,SEED=TRUE, +pts=NULL,sm=FALSE,xout=FALSE,outfun=out,nboot=500,pr=FALSE,na.rm=TRUE,xlab="Group 1", ylab="Group 2",...){ +# +# Compare two dependent groups using a covariate +# +# same as DEPanc, only use bootstrap methods in all cases. +# +# x1 is the covariate and +# y1 and y2 are the two measures. For instance time 1 and time 2. +# +# case wise deletion of missing values used by default. +# To use all of the data not missing, set DIF=F and na.rm=F +# For the special case where the goal is to compare means, also set TLS=T +# (But this can produce an error if too many missing values) +# +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# TLS=F, use percentile bootstrap when DIF=FALSE; +# otherwise (TLS=TRUE) use Lin-Stivers method for means +# fr1 is span for running interval smoother +# +# sm=T will create smooths using bootstrap bagging. +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +m=cbind(x1,y1,y2) +flag=is.na(x1) +if(na.rm)m=elimna(m) +if(!na.rm){ +m=m[!flag,] +} +x1=m[,1] +y1=m[,2] +y2=m[,3] +if(is.null(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +y2<-y2[xorder] +vecn<-1 +for(i in 1:length(x1))vecn[i]<-length(y1[near(x1,x1[i],fr1)]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +} +if(!is.null(pts[1]))isub=c(1:length(pts)) +mat<-matrix(NA,length(isub),6) +dimnames(mat)<-list(NULL,c("X","n","DIF","ci.low","ci.hi", +"p.value")) +for (i in 1:length(isub)){ +if(is.null(pts)){ +ch=near(x1,x1[isub[i]],fr1) +mat[i,1]=x1[isub[i]] +} +if(!is.null(pts)){ +ch=near(x1,pts[i],fr1) +mat[i,1]=pts[i] +} +mat[i,2]=sum(ch) +if(!DISDIF){ +if(!DIF){ +if(!TLS){ +test=rmmismcp(m[ch,2],m[ch,3],alpha=alpha,SEED=SEED,est=est,plotit = FALSE, + grp = grp, nboot = 500, xlab = xlab, ylab = ylab, pr = pr, ...) +mat[i,3]=est(m[ch,2],na.rm=TRUE)-est(m[ch,3],na.rm=TRUE) +mat[i,4]<-test$output[1,6] +mat[i,5]<-test$output[1,7] +mat[i,6]<-test$output[1,4] +} +if(TLS){ +test=rm2miss(m[ch,2],m[ch,3], nboot = nboot, alpha = alpha, SEED = SEED) +mat[i,3]=mean(m[ch,2],na.rm=TRUE)-mean(m[ch,3],na.rm=TRUE) +mat[i,4]<-test$ci[1] +mat[i,5]<-test$ci[2] +mat[i,6]<-test$p.value +}} +if(DIF){ +test=onesampb(m[ch,2]-m[ch,3],est=est,nboot=nboot,alpha=alpha,SEED=SEED,...) +mat[i,3]=est(m[ch,2]-m[ch,3],na.rm=TRUE,...) +mat[i,4]<-test$ci[1] +mat[i,5]<-test$ci[2] +mat[i,6]<-test$p.value +}} +if(DISDIF){ +test=l2drmci(m[ch,2:3],pr=FALSE,nboot=nboot,na.rm=na.rm) +mat[i,3]<-loc2dif(m[ch,2],m[ch,3],na.rm=na.rm) +mat[i,4]<-test$ci[1] +mat[i,5]<-test$ci[2] +mat[i,6]<-test$p.value +}} +if(plotit) +runmean2g(x1,y1,x1,y2,fr=fr1,est=est,sm=sm,xout=xout,outfun=outfun,...) +list(output=mat) +} + + +lplotPV<-function(x,y, span = 0.75, xout = FALSE,pr=TRUE, + outfun = out,nboot=1000,SEED=TRUE,plotit=TRUE,pyhat = FALSE, expand = 0.5, low.span = 2/3, + varfun = pbvar, cor.op = FALSE, cor.fun = pbcor, scale = FALSE, + xlab = "X", ylab = "Y", zlab = "", theta = 50, phi = 25, + family = "gaussian", duplicate = "error", pc = "*", ticktype = "simple",...){ +# +# Compute a p-value based on the Strength of Association estimated via lplot +# If significant, conclude there is dependence. +# +if(SEED)set.seed(2) +x=as.matrix(x) +if(ncol(x)==2 && !scale){ +if(pr){ +print("scale=F is specified.") +print("If there is dependence, might use scale=T") +}} +vals=NA +nv=ncol(x) +m=elimna(cbind(x,y)) +x<-m[,1:nv] +y<-m[,nv+1] +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:nv] +y<-m[,nv+1] +} +x=as.matrix(x) +est=lplot(x,y,span=span,plotit=plotit,pr=FALSE, pyhat = pyhat, + outfun = outfun, expand = expand, low.span = low.span, + varfun = varfun, cor.op =cor.op, cor.fun = cor.fun, scale = scale, + xlab = xlab, ylab = ylab, zlab =zlab, theta =theta, phi = phi, + family = family, duplicate = duplicate, pc = pc, ticktype = ticktype,...) +n=nrow(x) +data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(i in 1:nboot){ +vals[i]=lplot(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE)$Strength.Assoc +} +p=mean(est$Strength2)xx=cbind(x,x[,3:p]) +vlabs=c('Intercept','x1','x2','x1*x2') +if(p>2){ +p4=p+2 +for(j in 5:p4)vlabs[j]=paste('x',j-1) +if(LABELS)vlabs[5:p4]=labels(x)[[2]][3:p] +} +clabs=c('ci.low','ci.up','Estimate','S.E.','p-value','p.adj') +if(!MC)a=regci(xx,y,regfun = regfun, nboot = nboot, alpha =alpha, SEED = SEED, pr =pr,...) +if(MC)a=regciMC(xx,y,regfun = regfun, nboot = nboot, alpha =alpha, SEED = SEED, pr =pr,...) +output=a$regci +dimnames(output)=list(vlabs,clabs) +list(output=output,n=a$n,n.keep=a$n.keep) +} + +ancovaG<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=FALSE,pts=NULL,sm=FALSE, +pr=TRUE,xout=FALSE,outfun=out,test=medpb2,...){ +# +# This function generalizes the R function ancova so that any hypothesis testing method +# can be used to compare groups at specified design points. +# +# Compare two independent groups using the ancova method coupled with method +# indicated by the argument test. +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# +# sm=T will create smooths using bootstrap bagging. +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +output=list() +if(is.null(pts[1])){ +mat<-matrix(NA,5,3) +dimnames(mat)<-list(NULL,c("X","n1","n2")) +npt<-5 +isub<-c(1:5) # Initialize isub +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +for (i in 1:5){ +mat[i,1]=x1[isub[i]] +g1<-y1[near(x1,x1[isub[i]],fr1)] +g2<-y2[near(x2,x1[isub[i]],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +mat[i,2]=length(g1) +mat[i,3]=length(g2) +output[[i]]<-test(g1,g2,...) +}} +if(!is.null(pts[1])){ +mat<-matrix(NA,length(pts),3) +dimnames(mat)<-list(NULL,c("X","n1","n2")) +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +for (i in 1:length(pts)){ +mat[i,1]=pts[i] +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +mat[i,2]=length(g1) +mat[i,3]=length(g2) +output[[i]]<-test(g1,g2,...) +}} +if(plotit) +runmean2g(x1,y1,x2,y2,fr=fr1,est=mean,tr=tr,sm=sm,xout=xout,outfun=outfun,...) +list(mat,output) +} + +mat2list<-function(m,grp.dat){ +# +# For data in a matrix m, divide the data into groups based +# on the values in column indicated +# by the argument grp.dat +# and store the data in list mode. +# +# This function is like fac2list, only it handles matrices +# +# Example: z=mat2list(m[,2:5],m[,9]) +# will divide the rows of data in columns 2-5 into groups based +# on the group id data in column 9 +# This is done via the function mat2grp +# +# z[[1]] will contain the data in m[,2:5] that is associated with first group +# z[[2]] will contain the data in m[,2:5] that is associated with second group, etc. +# +# If any entry in grp.dat is NA, this row is eliminated from m +# +if(!is.null(dim(m)))m=as.matrix(m) +if(!is.matrix(m))stop("Data must be stored in a matrix or data frame") +p=ncol(m) +p1=p+1 +M=cbind(m,grp.dat) +#print(dim(M)) +x<-mat2grp(M[,1:p1],p1) +for(i in 1:length(x))x[[i]]=x[[i]][,1:p] +x +} + +regpecv<-function(x,y,regfun=tsreg,varfun=pbvar,...){ +# +# Estimate prediction error via leave-one-out cross-validation +# +# regfun defaults to Theil-Sen estimator +# function returns measure of prediction error: robust measure of variation +# applied to the n differences y_i-y_{-i}, i=1,...,n +# where y_{-1} is estimate of y when ith vector of observations is omitted. +# +xy=elimna(cbind(x,y)) +x=as.matrix(x) +px=ncol(x) +px1=px+1 +n=nrow(xy) +vals=NA +for(i in 1:n){ +est=regfun(xy[-i,1:px],xy[-i,px1])$coef +vals[i]=xy[i,px1]-(est[1]+sum(est[2:px1]*xy[i,1:px])) +} +pe=varfun(vals) +pe +} + + +idmatch<-function(m1,m2,id.col1,id.col2=id.col1){ +# +# for the id data in column id.col of matrices m1 and m2 +# pull out data for which both m1 and m2 have matching id's +# return the data in a matrix, M1 before data and M2, the matching data time 2. +# +flag=!is.na(m1[,id.col1]) +m1=m1[flag,] # eliminate any rows where ID is missing +flag=!is.na(m2[,id.col1]) +m2=m2[flag,] +M1=NULL +#if(sum(duplicated(m1))>0)stop('Duplicate ids detected in m1') +#if(sum(duplicated(m2))>0)stop('Duplicate ids detected in m2') +#print(m1[,id.col1]) +if(sum(duplicated(m1[,id.col1]))>0)stop('Duplicate ids detected in m1') +if(sum(duplicated(m2[,id.col2]))>0)stop('Duplicate ids detected in m2') +for(i in 1:nrow(m1)){ +flag=duplicated(c(m1[i,id.col1],m2[,id.col2])) +if(sum(flag>0)){ +if(is.data.frame(m1)){ +if(!is.null(dim(M1)))M1=rbind(M1,as.data.frame(m1[i,])) +if(is.null(dim(M1)))M1=as.data.frame(m1[i,]) +} +if(!is.data.frame(m1)){ +if(!is.null(dim(M1)))M1=rbind(M1,m1[i,]) +if(is.null(dim(M1)))M1=matrix(m1[i,],nrow=1) +} +}} +M2=NULL +for(i in 1:nrow(m2)){ +flag=duplicated(c(m2[i,id.col2],m1[,id.col1])) +if(sum(flag>0)){ +if(is.data.frame(m2)){ +if(!is.null(dim(M2)))M2=rbind(M2,as.data.frame(m2[i,])) +if(is.null(dim(M2)))M2=as.data.frame(m2[i,]) +} +if(!is.data.frame(m2)){ +if(!is.null(dim(M2)))M2=rbind(M2,m2[i,]) +if(is.null(dim(M2)))M2=matrix(m2[i,],nrow=1) +} +}} +#m=cbind(M1[,id.col1],M1[,-id.col1],M2[,-id.col2]) +list(M1=M1,M2=M2) +} + + +rplotCV<-function(x,y,fr=NA,varfun=pbvar,est=tmean,xout=FALSE,outfun=out,eout=FALSE,corfun=pbvar,...){ +# +# Estimate prediction error based on +# a running interval smoother in conjunction with +# a leave-one-out cross validation method +# +# varfun is the measure of variation used on the predicted Y values. +# est is the measure of location used by the running interval smoother. +# The estimate is returned in VAR.Y.HAT +# +m=elimna(cbind(x,y)) +if(eout){ +flag<-outfun(m,plotit=FALSE)$keep +m=m[flag,] +} +x=as.matrix(x) +p=ncol(x) +p1=p+1 +x=as.matrix(m[,1:p]) +y=m[,p1] +vals=NA +if(is.na(fr)){ +if(p==1)fr=.8 +if(p>1)fr=1 +} +if(xout){ +keepit<-outfun(x,plotit=FALSE,...)$keep +x<-x[keepit,] +y<-y[keepit] +} +x=as.matrix(x) +for(i in 1:nrow(x)){ +if(p==1)vals[i]=runhat(x[-i,],y[-i],fr=fr,est=est,pts=x[i,],...) +if(p>1)vals[i]=rung3hat(x[-i,],y[-i],fr=fr,pts=t(as.matrix(x[i,])))$rmd +} +dif=y-vals +ans=varfun(elimna(dif)) +list(VAR.Y.HAT=ans) +} + +SMpre<-function(x,y,est=tmean,fr=NA,varfun=pbvar,model=NULL,adz=TRUE, +xout=FALSE,outfun=out,...){ +# +# Estimate prediction error for all of the models specified by the +# the argument model, which has list mode. +# Leave-one-out cross-validation is used in conjunction with a running interval smoother +# +x=as.matrix(x) +p=ncol(x) +p1=p+1 +xy=elimna(cbind(x,y)) +x=xy[,1:p] +y=xy[,p1] +n=nrow(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +} +if(p>5)stop("Can have at most 5 predictors") +if(is.null(model))model=modgen(p) +mout<-matrix(NA,length(model),3,dimnames=list(NULL,c("error", +"var.used","rank"))) +for(imod in 1:length(model)){ +nmod=length(model[[imod]])-1 +temp=c(nmod:0) +mout[imod,2]=sum(model[[imod]]*10^temp) +mout[imod,1]=rplotCV(x[,model[[imod]]],y,fr=fr,est=est,varfun=varfun)$VAR.Y.HAT +} +if(adz){ +va=0 + for(i in 1:n)va[i]=y[i]-tmean(y[-i]) +no=pbvar(va) +mout=rbind(mout,c(no,0,NA)) +} +mout[,3]=rank(mout[,1]) +list(estimates=mout) +} + +mch2num<-function(x){ +# convert character, stored in matrix, to numeric data. +m=matrix(NA,nrow=nrow(x),ncol=ncol(x)) +for(j in 1:ncol(x))m[,j]=as.numeric(x[,j]) +m +} + +ddep<-function(x,est=onestep,alpha=.05,grp=NA,nboot=500,plotit=TRUE,SEED=TRUE,pr=TRUE,WT=TRUE,...){ +# +# Do ANOVA on dependent groups +# using the partially centered method plus +# depth of zero among bootstrap values. +# +# An improved version of ddep that better handles heteroscedasticity +# (A weighted grand mean is used in this version.) +# +# The data are assumed to be stored in x in list mode +# or in a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, columns correspond to groups. +# +# grp is used to specify some subset of the groups, if desired. +# By default, all J groups are used. +# +# The default number of bootstrap samples is nboot=2000 +# +if(pr)print("Warning: Might not be level robust if the number of groups is relatively large and n is small") +if(pr)print("Currently seems that rmmismcp is preferable") +if(is.list(x)){ +nv<-NA +for(j in 1:length(x))nv[j]<-length(x[[j]]) +if(var(nv) !=0){ +stop("The groups are stored in list mode and appear to have different sample sizes") +} +temp<-matrix(NA,ncol=length(x),nrow=nv[1]) +for(j in 1:length(x))temp[,j]<-x[[j]] +x<-temp +} +J<-ncol(x) +if(!is.na(grp[1])){ #Select the groups of interest +J<-length(grp) +for(j in 1:J)temp[,j]<-x[,grp[j]] +x<-temp +} +x<-elimna(x) # Remove any rows with missing values. +bvec<-matrix(0,ncol=J,nrow=nboot) +hval<-vector("numeric",J) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +print("Taking bootstrap samples. Please wait.") +n<-nrow(x) +totv<-apply(x,2,est,na.rm=TRUE,...) +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot)bvec[ib,]<-apply(x[data[ib,],],2,est,na.rm=TRUE,...) #nboot by J matrix +if(!WT){ +gv<-rep(mean(totv),J) #Grand mean +#m1<-rbind(bvec,gv) +} +bplus<-nboot+1 +center<-totv +cmat<-var(bvec) +if(WT){ +wt=1/diag(cmat) +ut=sum(wt) +gv<-rep(sum(wt*totv)/ut,J) #Grand mean +} +m1<-rbind(bvec,gv) +discen<-mahalanobis(m1,totv,cmat) +#print("Bootstrap complete; computing significance level") +if(plotit && ncol(x)==2){ +plot(bvec,xlab="Group 1",ylab="Group 2") +temp.dis<-order(discen[1:nboot]) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +abline(0,1) +} +sig.level<-sum(discen[bplus]<=discen)/bplus +list(p.value=sig.level,center=totv,grand.mean=gv) +} + +ddeptr<-function(x,na.rm=TRUE,alpha=.05,grp=NA,nboot=500,plotit=TRUE,SEED=TRUE,op=FALSE,tr=.2,...){ +# +# Do ANOVA on dependent groups +# using the partially centered method plus +# depth of zero among bootstrap values. +# +# The method is like the method used by the R function ddep, +# but a weighted estimate of the grand mean is used. +# This helps deal the heteroscedasticity among the marginal distributions. +# +# The data are assumed to be stored in x in list mode +# or in a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, columns correspond to groups. +# +# trimmed means are compared +# +# grp is used to specify some subset of the groups, if desired. +# By default, all J groups are used. +# +# The default number of bootstrap samples is nboot=500 +# +# na.rm=T, all rows of data with missing values are removed. +# na.rm=F will use all of the data assuming missing values occur at random +# +if(is.list(x)){ +nv<-NA +for(j in 1:length(x))nv[j]<-length(x[[j]]) +if(var(nv) !=0){ +stop("The groups are stored in list mode and appear to have different sample sizes") +} +temp<-matrix(NA,ncol=length(x),nrow=nv[1]) +for(j in 1:length(x))temp[,j]<-x[[j]] +x<-temp +} +J<-ncol(x) +if(!is.na(grp[1])){ #Select the groups of interest +J<-length(grp) +for(j in 1:J)temp[,j]<-x[,grp[j]] +x<-temp +} +if(na.rm)x<-elimna(x) # Remove any rows with missing values. +bvec<-matrix(0,ncol=J,nrow=nboot) +hval<-vector("numeric",J) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(op)print("Taking bootstrap samples. Please wait.") +n<-nrow(x) +wt=apply(x,2,trimse,...) +wt=1/wt^2 +ut=sum(wt) +totv<-apply(x,2,tmean,na.rm=TRUE,...) +gv<-rep(sum(wt*totv)/ut,J) #Weighted grand mean +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot)bvec[ib,]<-apply(x[data[ib,],],2,tmean,na.rm=TRUE,...) #nboot by J matrix +bplus<-nboot+1 +m1<-rbind(bvec,gv) +center<-totv +cmat<-var(bvec) +discen<-mahalanobis(m1,totv,cmat) +if(op)print("Bootstrap complete; computing significance level") +if(plotit && ncol(x)==2){ +plot(bvec,xlab="Group 1",ylab="Group 2") +temp.dis<-order(discen[1:nboot]) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +abline(0,1) +} +sig.level<-sum(discen[bplus]<=discen)/bplus +list(p.value=sig.level,center=totv,weighted.grand.mean=gv[1]) +} + + +qhdplotsm<-function(x,y,q=.5,xlab="X",ylab="Y",pc=".", +xout=FALSE,outfun=out,nboot=40,fr=1,...){ +# +# Plots smooths of quantile regression lines for one or more quantiles +# using rplotsm with Harrell--Davis estimator +# +# q indicates the quantiles to be used. +# +# EXAMPLE: +# qhdplotsm(x,y,q=c(.2,.5,.8)) will plot three smooths corresponding to +# the .2, .5 and .8 quantile regression lines. +# +xy=elimna(cbind(x,y)) +x=as.matrix(x) +if(ncol(x)!=1)stop("Only one predictor is allowed") +x=xy[,1] +y=xy[,2] +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag] +y<-y[flag] +} +plot(x,y,xlab=xlab,ylab=ylab,pch=pc) +xord=order(x) +for(j in 1:length(q)){ +yhat=rplotsm(x,y,fr=fr,pyhat=TRUE,est=hd,q=q[j],plotit=FALSE,nboot=nboot)$yhat +lines(x[xord],yhat[xord]) +} +print("Done") +} + +outmah<-function(x,qval=pnorm(3),plotit=TRUE,xlab="VAR 1",ylab="VAR 2"){ +# +# detect outliers using Mahalanobis Distance +# For demonstration purposes only. Suggest +# using a method that avoids masking. +# +# In univariate case, default strategy is to use 3 standard deviation rule +# +x=elimna(x) +x=as.matrix(x) +m=apply(x,2,mean) +v=cov(x) +dis=mahalanobis(x,m,v) +crit<-sqrt(qchisq(qval,ncol(x))) +vec<-c(1:nrow(x)) +dis[is.na(dis)]=0 +dis<-sqrt(dis) +chk<-ifelse(dis>crit,1,0) +id<-vec[chk==1] +keep<-vec[chk==0] +if(is.matrix(x)){ +if(ncol(x)==2 && plotit){ +plot(x[,1],x[,2],xlab=xlab,ylab=ylab,type="n") +flag<-rep(TRUE,nrow(x)) +flag[id]<-FALSE +points(x[flag,1],x[flag,2]) +if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="*") +}} +if(!is.matrix(x))outval<-x[id] +if(is.matrix(x))outval<-x[id,] +list(out.val=outval,out.id=id,keep=keep,dis=dis,crit=crit) +} + +difQplot<-function(x,y=NULL,xlab="Quantile",ylab="Effect Size"){ +# +# Plot that provides perspective on the degree a distribution is symmetric about zero. +# This function plots the sum of q and 1-q quantiles. If the distributions are symmetric +# the plot should be approximately a horizontal line. If in addition the median +# of the difference scores is zero, the horizontal line will intercept the y-axis at zero. +# +if(is.null(y))dif=x +if(!is.null(y))dif=x-y +x=elimna(x) +qd=NA +for(i in 1:99)qd[i]=hd(dif,.5-i/200)+hd(dif,.5+i/200) +plot(.5-c(1:99)/200,qd,xlab=xlab,ylab=ylab) +} + +Dqcomhd<-function(x,y,est=hd,q=c(1:9)/10,nboot=2000,pr=TRUE, +plotit=FALSE,SEED=TRUE,xlab='Group 1', +ylab='Est.1-Est.2',na.rm=TRUE,alpha=rep(.05,length(q))){ +# +# Compare the quantiles of the marginal distributions associated with two dependent groups +# via hd estimator. Tied values are allowed. +# +# est=thd would use trimmed hd estimator +# +# When comparing lower or upper quartiles, both power and the probability of Type I error +# compare well to other methods have been derived. +# +# x: data for group 1 +# y: data for group 2 +# q: the quantiles to be compared +# nboot: Number of bootstrap samples +# +# +if(pr){ +print('Note: confidence intervals are not adjusted to control the simultaneous probability coverage') +} +if(SEED)set.seed(2) +if(na.rm){ +xy=elimna(cbind(x,y)) +x=xy[,1] +y=xy[,2] +} +pv=NULL +output=matrix(NA,nrow=length(q),ncol=10) +dimnames(output)<-list(NULL,c('q','n1','n2','est.1','est.2','est.1_minus_est.2','ci.low','ci.up','p-value','adj.p.value')) +for(i in 1:length(q)){ +output[i,1]=q[i] +output[i,2]=length(elimna(x)) +output[i,3]=length(elimna(y)) +output[i,4]=hd(x,q=q[i]) +output[i,5]=hd(y,q=q[i]) +output[i,6]=output[i,4]-output[i,5] +if(na.rm){ +temp=bootdpci(x,y,est=est,q=q[i],dif=FALSE,plotit=FALSE,pr=FALSE,nboot=nboot,alpha=alpha[i],SEED=FALSE) +output[i,7]=temp$output[1,5] +output[i,8]=temp$output[1,6] +output[i,9]=temp$output[1,3] +} +if(!na.rm){ +temp=rmmismcp(x,y,est=est,q=q[i],plotit=FALSE,pr=FALSE,nboot=nboot,alpha=alpha[i],SEED=FALSE) +output[i,7]=temp$output[1,6] +output[i,8]=temp$output[1,7] +output[i,9]=temp$output[1,4] +} +} +output[,10]=p.adjust(output[,9],method='hoch') +if(plotit){ +xax=rep(output[,4],3) +yax=c(output[,6],output[,7],output[,8]) +plot(xax,yax,xlab=xlab,ylab=ylab,type='n') +points(output[,4],output[,6],pch='*') +lines(output[,4],output[,6]) +points(output[,4],output[,7],pch='+') +lines(output[,4],output[,7],lty=2) +points(output[,4],output[,8],pch='+') +lines(output[,4],output[,8],lty=2) +} +output +} + + + +Dqdif<-function(x,y=NULL,q=.25,nboot=1000,plotit=TRUE,xlab="Group 1 - Group 2",SEED=TRUE,alpha=.05){ +# +# Compare two dependent groups by comparing the +# q and 1-q quantiles of the difference scores +# +# q should be < .5 +# if the groups do not differ, then the difference scores should be symmetric +# about zero. +# In particular, the sum of q and 1-q quantiles should be zero. +# +# q indicates the quantiles to be compared. By default, the .25 and .75 quantiles are used. +# +if(SEED)set.seed(2) +if(q>=.5)stop("q should be less than .5") +if(!is.null(y)){ +xy=elimna(cbind(x,y)) +dif=xy[,1]-xy[,2] +} +if(is.null(y))dif=elimna(x) +n=length(dif) +if(plotit)akerd(dif,xlab=xlab) +bvec=NA +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec[ib]<-hd(dif[data[ib,]],q=q)+hd(dif[data[ib,]],q=1-q) +} +est1=hd(dif,q=q) +est2=hd(dif,q=1-q) +pv=mean(bvec<0)+.5*mean(bvec==0) +p=2*min(c(pv,1-pv)) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +sbvec=sort(bvec) +ci=sbvec[low] +ci[2]=sbvec[up] +list(est.q=est1,est.1.minus.q=est2,conf.interval=ci,p.value=p) +} + +qwmwhd<-function(x,y,q=seq(5,40,5)/100,xlab="Quantile",ylab="Sum of q and 1-q Quantiles",plotit=TRUE,alpha=.05,nboot=1000,SEED=TRUE){ +# +# Plot that provides perspective on the degree a distribution is symmetric about zero. +# This function plots the sum of q and 1-q quantiles of the distribution of D=X-Y, X and Y independent. +# A 1-alpha confidence interval for the sum is indicated by a + +# If the distribution is symmetric +# the plot should be approximately a horizontal line. +# +# FWE is controlled via Hochberg's method, which was used to determine critical +# p-values based on the argument +# alpha. +# +# Can alter the quantiles compared via the argument +# q +# q must be less than .5 +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +n1=length(x) +n2=length(y) +output=matrix(NA,ncol=8,nrow=length(q)) +dimnames(output)=list(NULL,c("quantile","Est.1","Est.2","SUM","ci.low","ci.up","p_crit","p-value")) +for(i in 1:length(q)){ +test=cbmhd(x,y,q=q[i],plotit=FALSE,nboot=nboot,SEED=SEED) +output[i,1]=q[i] +output[i,2]=test$Est1 +output[i,3]=test$Est2 +output[i,4]=test$sum +output[i,8]=test$p.value +output[i,5]=test$ci[1] +output[i,6]=test$ci[2] +} +temp=order(output[,8],decreasing=TRUE) +zvec=alpha/c(1:length(q)) +output[temp,7]=zvec +output <- data.frame(output) +output$signif=rep("YES",nrow(output)) +for(i in 1:nrow(output)){ +if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]="NO" +if(output[temp[i],8]<=output[temp[i],7])break +} +if(plotit){ +plot(rep(q,3),c(output[,4],output[,5],output[,6]),type="n",xlab=xlab,ylab=ylab) +points(q,output[,6],pch="+") +points(q,output[,5],pch="+") +points(q,output[,4],pch="*") +} +list(n=c(n1,n2),output=output) +} + + +difQpci<-function(x,y=NULL,q=seq(5,40,5)/100,xlab="Quantile",ylab="Group 1 minus Group 2",plotit=TRUE,alpha=.05,nboot=1000,SEED=TRUE,LINE=FALSE){ +# +# x can be a vector, in which case compare quantiels of distribution of data in x +# x can be a matrix with 2 columns, in which case analysis is done on dif=x[,1]-x[,2] +# y supplied, then do analysis of dif=x-y +# +# Plot that provides perspective on the degree a distribution is symmetric about zero. +# This function plots the sum of q and 1-q quantiles. A 1-alpha confidence interval for the sum is indicated by a + +# If the distributions are symmetric +# the plot should be approximately a horizontal line. If in addition the median +# of the difference scores is zero, the horizontal line will intersect the y-axis at zero. +# +# Similar to difQplot, only plots fewer quantiles by default and returns p-values for +# each quantile indicated by the argument q. +# +# FWE is controlled via Hochberg's method, which was used to determine critical +# p-values based on the argument +# alpha. +# +# Can alter the quantiles compared via the argument +# q +# q must be less than .5 +# +# LINE=TRUE. When plotting, a line connecting the estimates will be included. +# +x=as.matrix(x) +if(is.null(y))dif=x +if(ncol(x)>2)stop("Should be at most two groups") +if(ncol(x)==2)dif=x[,1]-x[,2] +if(!is.null(y))dif=x-y +dif=elimna(dif) +nv=length(dif) +output=matrix(NA,ncol=8,nrow=length(q)) +dimnames(output)=list(NULL,c("quantile","Est_q","Est_1.minus.q","SUM","ci.low","ci.up","p_crit","p-value")) +for(i in 1:length(q)){ +test=Dqdif(dif,q=q[i],plotit=FALSE,nboot=nboot,SEED=SEED) +output[i,1]=q[i] +output[i,2]=test$est.q +output[i,3]=test$est.1.minus.q +output[i,8]=test$p.value +output[i,5]=test$conf.interval[1] +output[i,6]=test$conf.interval[2] +} +temp=order(output[,8],decreasing=TRUE) +zvec=alpha/c(1:length(q)) +output[temp,7]=zvec +output <- data.frame(output) +output$signif=rep("YES",nrow(output)) +for(i in 1:nrow(output)){ +if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]="NO" +if(output[temp[i],8]<=output[temp[i],7])break +} +output[,4]=output[,2]+output[,3] +if(plotit){ +plot(rep(q,3),c(output[,4],output[,5],output[,6]),type="n",xlab=xlab,ylab=ylab) +points(q,output[,6],pch="+") +points(q,output[,5],pch="+") +points(q,output[,4],pch="*") +if(LINE)lines(q,output[,4],pch="*") +} +list(n=nv,output=output) +} + +bsqrm<-function(x,y,alpha=0.05,bend=1.28){ +# +# Computes Bsqrm test statistic. This test statistic is from Ozdemir (2012) +# "mestse" was used as the standard error of one-step M-estimator and +# "mad" was used as a measure of scale. +# +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +zc<-qnorm(alpha/2) +x2<-(x-median(x))/mad(x) +y2<-(y-median(y))/mad(y) +C<-length(x[abs(x2)>bend]) +D<-length(y[abs(y2)>bend]) +e<-c(C,D) +alist<-list(x,y) +f<-(sapply(alist,length))-e +s=sapply(alist,mestse)^2 +wden=sum(1/s) +w=(1/s)/wden +yplus<-sum(w*(sapply(alist,onestep))) +tt<-((sapply(alist,onestep))-yplus)/sqrt(s) +v<-(f-1) +z<-((4*v^2)+(5*((2*(zc^2))+3)/24))/((4*v^2)+v+(((4*(zc^2))+9)/12))*sqrt(v)*(sqrt(log(1+(tt^2/v)))) +teststat<-sum(z^2) +list(teststat=teststat) +} + +bsqrmbt<-function(x,y,alpha=.05,bend=1.28,nboot=599,SEED=TRUE){ +# +# Goal: Test hypothesis that two independent groups have +# equal population M-measures of location. +# A bootstrap-t method is used. +# The method used was derived by F. Ozdemir +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +T<-bsqrm(x,y,alpha,bend)$teststat +TT<-0 +bsqrmbt<-numeric(2) +xone<-onestep(x,bend=bend) +yone<-onestep(y,bend=bend) +for(j in 1:nboot) + { + xx<-(sample(x,length(x),replace=TRUE)-xone) + yy<-(sample(y,length(y),replace=TRUE)-yone) + TT[j]<-bsqrm(xx,yy,alpha,bend)$teststat + } +TT<-sort(TT) +bott<-round(alpha*nboot)+1 +bsqrmbt<-TT[nboot-bott] +pv=mean(T<=TT) +list(critval=bsqrmbt,teststat=TRUE,p.value=pv) +} + +M2gbt=bsqrmbt + +qregplots<-function(x, y,qval=.5,q=NULL,op=1,pr=FALSE,xout=FALSE,outfun=out,plotit=FALSE,xlab="X",ylab="Y",...){ +# +# Compute the quantile regression line for one or more quantiles and plot the results +# That is, determine the qth (qval) quantile of Y given X using the +# the Koenker-Bassett approach. +# +# One predictor only is allowed +# +# v2=T, uses the function rq in the R library quantreg +# v2=F, uses an older and slower version +# +# Example: qregplots(x,y,q=c(.25,.5,.75)) will plot the regression lines for +# predicting quartiles. +# +if(!is.null(q))qval=q +x<-as.matrix(x) +if(ncol(x)!=1)stop("Current version allows only one predictor") +X<-cbind(x,y) +X<-elimna(X) +np<-ncol(X) +p<-np-1 +x<-X[,1:p] +x<-as.matrix(x) +y<-X[,np] +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +est=matrix(NA,ncol=3,nrow=length(qval)) +dimnames(est)=list(NULL,c("q","Inter","Slope")) +library(quantreg) +x<-as.matrix(x) +plot(x,y,xlab=xlab,ylab=ylab) +if(ncol(x)!=1)stop("Current version allows only one predictor") +for(j in 1:length(qval)){ +coef=coefficients((rq(y~x,tau=qval[j]))) +est[j,1]=qval[j] +est[j,2:3]=coef +abline(coef) +} +list(coef = est) +} +acbinomciv2<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05,nullval=.5){ +# Compute a p-value when testing the hypothesis that the probability of +# success for a binomial distribution is equal to +# nullval, which defaults to .5 +# The Agresti-Coull method is used. +# +# y is a vector of 1s and 0s. +# Or can use the argument +# x = the number of successes observed among +# n=nn trials. +# +res=acbinomci(x=x,nn=nn,y=y,alpha=alpha) +ci=res$ci +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-acbinomci(x=x,nn=nn,y=y,alpha=alph[i])$ci +if(chkit[1]>nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]1)stop("With more than one predictor, use logSM") +xy=elimna(cbind(x,y)) +x=xy[,1:ncol(x)] +y=xy[,ncol(xy)] +x<-as.vector(x) +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag] +y<-y[flag] +} +if(STAND)x<-(x-median(x))/mad(x) +m1<-outer(x,x,"-")^2 +m2<-exp(-1*m1)*(sqrt(m1)<=fr) +m3<-matrix(y,length(y),length(y))*m2 +yhat<-apply(m3,2,sum)/apply(m2,2,sum) #sum over rows for each column +if(plotit){ +xor<-order(x) +plot(x,y,xlab=xlab,ylab=ylab) +if(!LP)lines(x[xor],yhat[xor]) +if(LP){ +Yhat=lplot(x[xor],yhat[xor],pyhat=TRUE,plotit=FALSE)$yhat.values +lines(x[xor],Yhat) +} +} +output<-"Done" +if(pyhat)output<-yhat +list(output=output) +} + +coefalpha<-function(x){ +library(psych) +x=elimna(x) +res=alpha(x) +res +} + + +z.power<-function(n,alpha=.05,del=NULL,var=NULL){ + q=qnorm(1-alpha/2) + sig=sqrt(var) + p1=pnorm(0-q-(sqrt(n)*del)/sig) + p2=1-pnorm(q-(sqrt(n)*del)/sig) + p=p1+p2 + list(power=p) + } + +hdpb<-function(x,est=hd,alpha=.05,nboot=2000,SEED=TRUE,nv=0,...){ +# +# Compute a bootstrap, .95 confidence interval for the +# measure of location corresponding to the argument est. +# By default, the Harrell-Davis estimator is used +# +# The default number of bootstrap samples is nboot=2000 +# +# The parameter q determines the quantile estimated via the function hd +# This function is the same as onesampb, only for convenience it defaults +# to using an estimate of the median. +# +# nv=null value when computing a p-value +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print("Taking bootstrap samples. Please wait.") +x=elimna(x) +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,est,...) +bvec<-sort(bvec) +low<-round((alpha/2)*nboot) +up<-nboot-low +low<-low+1 +pv=mean(bvec>nv)+.5*mean(bvec==nv) +pv=2*min(c(pv,1-pv)) +estimate=est(x,...) +list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv) +} + +vecnorm<-function(x, p=2) sum(x^p)^(1/p) + +regYvar<-function(x,y,regfun=tsreg,pts=x,nboot=100,xout=FALSE,outfun=out,SEED=TRUE,...){ +# +# Estimate standard error of predicted value of Y using regression estimator +# corresponding to the points in the argument +# pts +# A bootstrap estimate is used +# nboot=100 indicates that 100 bootstrap samples will be used. +# regfun indicates the regression estimator that will be used. +# Theil--Sen is used by default. +# +xy=elimna(cbind(x,y)) +x<-as.matrix(x) +p=ncol(x) +p1=p+1 +vals=NA +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +nv=length(y) +x<-as.matrix(x) +pts=as.matrix(pts) +nvpts=nrow(pts) +bvec=matrix(NA,nrow=nboot,ncol=nvpts) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec[ib,]=regYsub(x[data[ib,],],y[data[ib,]],xr=pts,p1=p1,regfun=regfun,...) +} +sqsd=apply(bvec,2,var) +sqsd +} + +regYsub<-function(x,y,xr,p1,regfun=tsreg,...){ +est=regfun(x,y,...)$coef +xr=as.matrix(xr) +yhat=est[1]+xr%*%est[2:p1] +yhat +} + +regYband<-function(x,y,regfun=tsreg,npts=NULL,nboot=100,xout=FALSE,outfun=outpro,SEED=TRUE, +alpha=.05,crit=NULL,xlab="X",ylab="Y",SCAT=TRUE,ADJ=TRUE,pr=TRUE,nreps=1000, +MC=FALSE,pch='.',...){ +# +# Plot confidence band for the predicted Y value +# if ADJ=FALSE, plot confidence intervals for +# npts points between min(x) and max(x) +# if npts=NULL, then npts=20 is used. +# if ADJ=TRUE, plot confidence band for the predicted Y value for all x values such that +# the simultaneous probability coverage is .95. +# +# npts=NULL and ADJ=FALSE, npts will be set equal to 20. That is, computed confidence +# intervals for 20 point covariate values even space between min(x) and max(x). +# +# +if(!ADJ){ +if(is.null(npts))npts=20 +if(pr)print('To adjust the confidence band so that the simultaneous probability coverage is .95, set ADJ=TRUE') +} +xy=elimna(cbind(x,y)) +x<-as.matrix(x) +p=ncol(x) +if(p!=1)stop("This function assumes a single predictor only") +p1=p+1 +vals=NA +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +if(SEED)set.seed(2) +if(!ADJ)pts=seq(min(x),max(x),length.out=npts) +if(ADJ)pts=sort(unique(x)) +res=regYci(x,y,pts=pts,regfun=regfun,xout=FALSE,SEED=SEED,alpha=alpha,ADJ=ADJ,nreps=nreps,MC=MC,...) +plot(c(x,pts,pts),c(y,res[,2],res[,3]),xlab=xlab,ylab=ylab,type="n") +abline(regfun(x,y,...)$coef) +if(SCAT)points(x,y,pch=pch) +lines(pts,res[,3],lty=2) +lines(pts,res[,4],lty=2) +res +} + +ols.pred.ci<-function(x,y,xlab="X",ylab="Y",alpha=.05,xout=FALSE,RETURN=FALSE,newx=NULL){ +# +# plot the ols regression line and a 1-alpha +# confidence interval for the predicted values +# +# RETURN=T means the function will return predicted values and +# and confidence interval for the x values indicated by the argument +# newx +# newx=NULL, means predicted Y will be for seq(min(x), max(x), 0.1) +# +# xout=T removes leverage points. +# +if(ncol(as.matrix(x))!=1)stop("One predictor is allowed") +xy=elimna(cbind(x,y)) +x=xy[,1] +y=xy[,2] +if(xout){ +flag=out(x)$keep +x=x[flag] +y=y[flag] +} +tmp.lm=lm(y~x) +if(is.null(newx))newx=seq(min(x), max(x), 0.1) +a=predict(tmp.lm,interval="confidence",level=1-alpha,newdata=data.frame(x=newx)) +plot(x,y,xlab=xlab,ylab=ylab) +abline(ols(x,y,plotit=FALSE)$coef) +lines(newx,a[,2],lty=2) +lines(newx,a[,3],lty=2) +res=NULL +if(RETURN)res=a +res +} + +regYhat<-function(x,y,xr=x,regfun=tsreg,xout=FALSE,outfun=outpro,pr=FALSE,plot.pts=FALSE,pts=NULL,...){ +# +# For convenience, return estimate of Y based on data in xr (or pts) using +# regression line based on regfun +# +xy=elimna(cbind(x,y)) +x<-as.matrix(x) +xr=as.matrix(xr) +p=ncol(x) +p1=p+1 +vals=NA +x<-xy[,1:p] +y<-xy[,p1] +#print(xr[1:10,]) +if(xout){ +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +if(!is.null(pts[1]))xr=pts +xr=as.matrix(xr) +est=regfun(x,y,...)$coef +if(ncol(xr)!=p)xr=t(xr) # for a single point, need to transpose. +yhat=est[1]+xr%*%est[2:p1] +if(plot.pts)points(xr,yhat) +yhat +} + +reg.pred<-regYhat + +reg1way<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,AD=FALSE,alpha=.05,pr=TRUE,...){ +# +# Test hypothesis that for two or more independent groups, all regression parameters +# (the intercepts and slopes) are equal +# By default the Theil--Sen estimator is used +# +# Strategy: Use bootstrap estimate of standard errors followed by +# Johansen MANOVA type test statistic. +# +# x and y are assumed to have list mode having length J equal to the number of groups +# For example, x[[1]] and y[[1]] contain the data for group 1. +# +# xout=T will eliminate leverage points using the function outfun, which defaults to the MVE method. +# +# OUTPUT: +# n is sample size after missing values are removed +# nv.keep is sample size after leverage points are removed. +# +if(pr){ +if(!xout)print("Might want to consider xout=T to remove leverage points") +} +if(SEED)set.seed(2) +if(!is.list(x))stop("Argument x should have list mode") +J=length(x) # number of groups +x=lapply(x,as.matrix) +pchk=lapply(x,ncol) +temp=matl(pchk) +if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared") +nv=NULL +p=ncol(x[[1]]) +p1=p+1 +for(j in 1:J){ +xy=elimna(cbind(x[[j]],y[[j]])) +x[[j]]=xy[,1:p] +y[[j]]=xy[,p1] +x[[j]]=as.matrix(x[[j]]) +nv=c(nv,nrow(x[[j]])) +} +nv.keep=nv +if(xout){ +temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) +for(j in 1:J){ +x[[j]]=x[[j]][temp[[j]]$keep,] +y[[j]]=y[[j]][temp[[j]]$keep] +}} +x=lapply(x,as.matrix) +K=p1 +est=matrix(NA,nrow=J,ncol=p1) +grpnum=NULL +for(j in 1:J)grpnum[j]=paste("Group",j) +vlabs="Intercept" +for(j in 2:p1)vlabs[j]=paste("Slope",j-1) +dimnames(est)=list(grpnum,vlabs) +ecov=list() +ecovinv=list() +W=rep(0,p1) +gmean=rep(0,p1) +for(j in 1:J){ +est[j,]=regfun(x[[j]],y[[j]],xout=FALSE,...)$coef +nv.keep[j]=nrow(x[[j]]) +vals=matrix(NA,nrow=nboot,ncol=p1) +data<-matrix(sample(length(y[[j]]),size=length(y[[j]])*nboot,replace=TRUE),ncol=nboot) +data=listm(data) +bvec<-lapply(data,regbootMC,x[[j]],y[[j]],regfun,...) +# bvec is a p+1 by nboot matrix. +vals=t(matl(bvec)) +ecov[[j]]=var(vals) +ecovinv[[j]]=solve(ecov[[j]]) #W_j +gmean=gmean+ecovinv[[j]]%*%est[j,] +W=W+ecovinv[[j]] +} +estall=solve(W)%*%gmean +F=0 +for(k in 1:K){ +for(m in 1:K){ +for(j in 1:J){ +F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) +}}} +pvalad=NULL +# if xout=F or AD=T, compute corrected critical value, stemming from Johansen +df=K*(J-1) +if(!xout || AD){ +iden=diag(p1) +Aw=0 +for(j in 1:J){ +temp=iden-solve(W)%*%ecovinv[[j]] +tempsq=temp%*%temp +Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) +} +Aw=Aw/2 +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +crit=qchisq(alval[i],df) +critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +if(F2)stop("Only one covariate is allowed. Use ancJNmp") +x1=xy[,1] +y1=xy[,2] +nv1=length(y1) +xy=elimna(cbind(x2,y2)) +if(ncol(xy)>2)stop("Only one covariate is allowed. Use ancJNmp") +x2=xy[,1] +y2=xy[,2] +nv2=length(y2) +if(xout){ +m<-cbind(x1,y1) +p1=ncol(m) +p=p1-1 +if(identical(outfun,outblp))flag=outblp(x1,y1,plotit=FALSE)$keep +else +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +if(identical(outfun,outblp))flag=outblp(x2,y2,plotit=FALSE)$keep +else +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +if(is.null(pts[1])){ +xall=unique(c(x1,x2)) +pts=seq(min(xall),max(xall),length.out=Npts) +if(Dpts){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=x1[isub] +} +mat<-matrix(NA,5,10) +dimnames(mat)<-list(NULL,c("X","Est1","Est2","DIF","TEST","se","ci.low","ci.hi","p.value",'adj.p.values')) +mat[,1]=pts +sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +est1=regYhat(x1,y1,xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) +est2=regYhat(x2,y2,xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd1+sqsd2) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +pv=2*(1-pnorm(abs(tests))) +mat[,9]=pv +crit<-smmcrit(Inf,5) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +if(!is.null(FLAG)){ +n1=NA +n2=NA +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),10) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value','p.adjust')) +mat[,1]<-pts +sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) +est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd1+sqsd2) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +pv=2*(1-pnorm(abs(tests))) +mat[,9]=pv +crit<-smmcrit(Inf,length(pts)) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +reg1=regfun(x1,y1,...)$coef +reg2=regfun(x2,y2,...)$coef +if(plotit){ +if(xout){ +if(identical(outfun,outblp))flag=outblp(x1,y1,plotit=FALSE)$keep +else +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +if(identical(outfun,outblp))flag=outblp(x2,y2,plotit=FALSE)$keep +else +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab) +if(SCAT){ +points(x1,y1,pch=pch1) +points(x2,y2,pch=pch2) +} +abline(reg1) +abline(reg2,lty=2) +} +mat[,10]=p.adjust(mat[,9],method='hoch') +list(n=c(nv1,nv2),intercept.slope.group1=reg1,intercept.slope.group2=reg2,output=mat) +} + +ancJN<-function(x1,y1,x2,y2,pts=NULL,Npts=5,Dpts=FALSE,regfun=tsreg,fr1=1,fr2=1,SCAT=TRUE,pch1='*',pch2='+', +alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',...){ +# +# Compare the regression lines of two independent groups at specified design points +# using a robust regression estimator. +# By default, use the Theil--Sen estimator +# +# Assume data are in x1 y1 x2 and y2 +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +# Dpts=FALSE: Five covariate points are chosen uniformly space between the smallest and largest +# values observed. +# Dpts=TRUE: Five covariate points are chosen in the same manner as done by the function ancova +# +# Npts: number of points used +# +if(identical(outfun,boxplot))stop('Use outfun=outbox') +if(SEED)set.seed(2) +FLAG=pts +xy=elimna(cbind(x1,y1)) +if(ncol(xy)>2)stop('Only one covariate is allowed. Use ancJNmp') +x1=xy[,1] +y1=xy[,2] +nv1=length(y1) +xy=elimna(cbind(x2,y2)) +if(ncol(xy)>2)stop('Only one covariate is allowed. Use ancJNmp') +x2=xy[,1] +y2=xy[,2] +nv2=length(y2) +if(xout){ +m<-cbind(x1,y1) +p1=ncol(m) +p=p1-1 +flag<-outfun(x1,plotit=FALSE,...)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE,...)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +if(is.null(pts[1])){ +xall=unique(c(x1,x2)) +pts=seq(min(xall),max(xall),length.out=Npts) +if(Dpts){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=x1[isub] +} +npts=length(pts) +mat<-matrix(NA,npts,10) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value','adj.p.values')) +mat[,1]=pts +sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +est1=regYhat(x1,y1,xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) +est2=regYhat(x2,y2,xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd1+sqsd2) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +pv=2*(1-pnorm(abs(tests))) +mat[,9]=pv +crit<-smmcrit(Inf,5) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +if(!is.null(FLAG)){ +n1=NA +n2=NA +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),10) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value','p.adjust')) +mat[,1]<-pts +sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) +est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd1+sqsd2) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +pv=2*(1-pnorm(abs(tests))) +mat[,9]=pv +crit<-smmcrit(Inf,length(pts)) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +reg1=regfun(x1,y1,...)$coef +reg2=regfun(x2,y2,...)$coef +if(plotit){ +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) +if(SCAT){ +points(x1,y1,pch=pch1) +points(x2,y2,pch=pch2) +} +abline(reg1) +abline(reg2,lty=2) +} +mat[,10]=p.adjust(mat[,9],method='hoch') +list(n=c(nv1,nv2),intercept.slope.group1=reg1,intercept.slope.group2=reg2,output=mat) +} + +block.diag<-function(mat){ +# +# mat is assumed to have list mode with +# mat[[1]]...mat[[p]] each having n-by-n matrices +# +# Create a np-by-np block diagonal matrix +# +# So p is the number of blocks +# +if(!is.list(mat))stop("mat should have list mode") +np<-length(mat)*ncol(mat[[1]]) +m<-matrix(0,np,np) +n=nrow(mat[[1]]) +p=length(mat) +ilow<-1-n +iup<-0 +for(i in 1:p){ +ilow<-ilow+n +iup<-iup+n +m[ilow:iup,ilow:iup]<-mat[[i]] +} +m +} + +reg1wayMC<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro, +STAND=TRUE,alpha=.05,pr=TRUE,AD=FALSE,...){ +# +# Test hypothesis that for two or more independent groups, all regression parameters are equal +# By default the Theil--Sen estimator is used +# +# Strategy: Use bootstrap estimate of standard errors followed by +# Johansen MANOVA type test statistic +# +# x and y are assumed to have list mode having length J equal to the number of groups +# For example, x[[1]] and y[[1]] contain the data for group 1. +# +# xout=T will eliminate leverage points using the function outfun +# +# OUTPUT: +# n is sample size after missing values are removed +# nv.keep is sample size after leverage points are removed. +# +library(parallel) +if(pr){ +if(!xout)print("Might want to consider xout=T to remove leverage points") +} +if(SEED)set.seed(2) +if(!is.list(x))stop("Argument x should have list mode") +if(pr){ +if(xout)print("xout=T, so an adjusted critical is not computed and apparently not needed") +} +J=length(x) # number of groups +x=lapply(x,as.matrix) +pchk=lapply(x,ncol) +temp=matl(pchk) +if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared") +nv=NULL +nv.keep=NULL +nv.all=NULL +p=ncol(x[[1]]) +p1=p+1 +for(j in 1:J){ +xy=elimna(cbind(x[[j]],y[[j]])) +x[[j]]=xy[,1:p] +y[[j]]=xy[,p1] +x[[j]]=as.matrix(x[[j]]) +nv.all[j]=c(nv,nrow(x[[j]])) +} +if(xout){ +temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) +for(j in 1:J){ +x[[j]]=x[[j]][temp[[j]]$keep,] +y[[j]]=y[[j]][temp[[j]]$keep] +}} +x=lapply(x,as.matrix) +p1=ncol(x[[1]])+1 +K=p1 +est=matrix(NA,nrow=J,ncol=p1) +hlabs=NULL +vlabs="Intercept" +for(j in 1:J)hlabs[j]=paste("Group",j) +for(j in 2:p1)vlabs[j]=paste("Slope",j-1) +dimnames(est)<-list(hlabs,vlabs) +nv=NA +ecov=list() +ecovinv=list() +W=rep(0,p1) +gmean=rep(0,p1) +for(j in 1:J){ +est[j,]=regfun(x[[j]],y[[j]])$coef +nv.keep[j]=nrow(x[[j]]) +nv[j]=nv.keep[j] +vals=matrix(NA,nrow=nboot,ncol=p1) +data<-matrix(sample(nv[j],size=nv[j]*nboot,replace=TRUE),ncol=nboot) +data=listm(data) +bvec<-mclapply(data,regbootMC,x[[j]],y[[j]],regfun,...) +vals=t(matl(bvec)) +ecov[[j]]=var(vals) +ecovinv[[j]]=solve(ecov[[j]]) #W_j +gmean=gmean+ecovinv[[j]]%*%est[j,] +W=W+ecovinv[[j]] +} +estall=solve(W)%*%gmean +F=0 +for(k in 1:K){ +for(m in 1:K){ +for(j in 1:J){ +F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) +}}} +df=K*(J-1) +pvalad=NULL +# if xout=F, compute corrected critical value, stemming from Johansen +df=K*(J-1) +if(!xout || AD){ +iden=diag(p1) +Aw=0 +for(j in 1:J){ +temp=iden-solve(W)%*%ecovinv[[j]] +tempsq=temp%*%temp +Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) +} +Aw=Aw/2 +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +crit=qchisq(alval[i],df) +critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +if(F1){ +if(ntests<=28){ +if(alpha==.05)crit<-smmcrit(Inf,ntests) +if(alpha==.01)crit<-smmcrit01(Inf,ntests) +} +if(ntests>28)crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha) +if(is.null(crit))crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha) +} +mat[,6]=est-crit*sd +mat[,7]=est+crit*sd +list(n=nv,points=pts,output=mat) +} + +ancpar<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab="X",ylab="Y",...){ +# +# Compare the regression lines of two independent groups at specified design points. +# By default, use the Theil--Sen estimator +# +# Assume data are in x1 y1 x2 and y2 +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# For p>1 predictors, pts should be a matrix with p columns +# +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop("x1 and x2 have different number of columns") +if(ncol(x1)==1)output=ancts(x1,y1,x2,y2,pts=pts,regfun=regfun,fr1=fr1,fr2=fr2,alpha=alpha, +plotit=plotit,xout=xout,outfun=outfun,nboot=nboot,SEED=SEED,xlab=xlab,ylab=ylab,...) +if(ncol(x1)>1)output=anctsmp(x1,y1,x2,y2,regfun=regfun,alpha=alpha,pts=pts,SEED=SEED,xout=xout,outfun=outfun,nboot=nboot,...) +output +} + + + ols.coef<-function(x,y,xout=FALSE){ + # In some cases, want the OLS estimate returned in $coef + res=ols(x,y,xout=xout)$coef[,1] + list(coef=res) + } + + +reg2ciMC<-function(x,y,x1,y1,regfun=tsreg,nboot=599,alpha=.05,plotit=TRUE,SEED=TRUE, +xout=FALSE,outfun=outpro,pr=FALSE,xlab='X',ylab='Y',...){ +# +# Compute a .95 confidence interval for the difference between the +# the intercepts and slopes corresponding to two independent groups. +# The default regression method is Theil-Sen. +# +# Same as reg2ci, only takes advantage of a multi-core processor +# +# The predictor values for the first group are +# assumed to be in the n by p matrix x. +# The predictors for the second group are in x1 +# +# The default number of bootstrap samples is nboot=599 +# +# regfun can be any R function that returns the coefficients in +# the vector regfun$coef, the first element of which contains the +# estimated intercept, the second element contains the estimate of +# the first predictor, etc. +# +library(parallel) +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +x1<-as.matrix(x1) +xx1<-cbind(x1,y1) +xx1<-elimna(xx1) +x1<-xx1[,1:ncol(x1)] +x1<-as.matrix(x1) +y1<-xx1[,ncol(x1)+1] +x=as.matrix(x) +x1=as.matrix(x1) +if(xout){ +if(identical(outfun,outblp)){ +flag1=outblp(x,y,plotit=FALSE)$keep +flag2=outblp(x1,y2,plotit=FALSE)$keep +} +if(!identical(outfun,outblp)){ +flag1=outfun(x,plotit=FALSE)$keep +flag2=outfun(x1,plotit=FALSE)$keep +} +x=x[flag1,] +y=y[flag1] +x1=x1[flag2,] +y1=y1[flag2] +} +n=length(y) +n[2]=length(y1) +x<-as.matrix(x) +x1<-as.matrix(x1) +est1=regfun(x,y)$coef +est2=regfun(x1,y1)$coef +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +bvec<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE,xout=FALSE,...) +bvec=matl(bvec) +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +bvec1<-mclapply(data,regbootMC,x1,y1,regfun,mc.preschedule=TRUE,xout=FALSE,...) +bvec1=matl(bvec1) +bvec<-bvec-bvec1 +p1<-ncol(x)+1 +regci<-matrix(0,p1,6) +dimnames(regci)<-list(NULL, +c("Parameter","ci.lower","ci.upper","p.value","Group 1","Group 2")) +ilow<-round((alpha/2)*nboot)+1 +ihi<-nboot-(ilow-1) +for(i in 1:p1){ +temp<-sum(bvec[i,]<0)/nboot+sum(bvec[i,]==0)/(2*nboot) +regci[i,4]<-2*min(temp,1-temp) +bsort<-sort(bvec[i,]) +regci[i,2]<-bsort[ilow] +regci[i,3]<-bsort[ihi] +regci[,1]<-c(0:ncol(x)) +} +regci[,5]=est1 +regci[,6]=est2 +if(ncol(x)==1 && plotit){ +plot(c(x,x1),c(y,y1),type="n",xlab=xlab,ylab=ylab) +points(x,y) +points(x1,y1,pch="+") +abline(regfun(x,y,...)$coef) +abline(regfun(x1,y1,...)$coef,lty=2) +} +list(n=n,output=regci) +} + +reg2difplot<-function(x1,y1,x2,y2,regfun=tsreg,pts=x1,xlab="VAR 1",ylab="VAR 2",zlab="Group 2 minus Group 1",xout=FALSE,outfun=out,ALL=TRUE,pts.out=FALSE,SCAT=FALSE,theta=50,phi=25,ticktype='simple', +pr=TRUE,...){ +# +# Fit a regression model to both groups assuming have two predictors. +# Get predicted Y values based on points in pts. By default, use +# pts=x1 +# +# x1 a matrix containing two predictors +# x2 a matrix containing two predictors +# +# Compute differences in predicted values and plot the results as a function of the points in pts +# pts=x1 by default. +# ALL=T, pts is taken to be all points in x1 and x2. +# +# pts.out=T will remove leverage points from pts. +# +if(!is.matrix(x1))stop("x1 should be a matrix") +if(!is.matrix(x2))stop("x2 should be a matrix") +if(!is.matrix(pts))stop("pts should be a matrix") +if(ncol(x1)!=2)stop("x1 should be a matrix with two columns") +if(ncol(x2)!=2)stop("x2 should be a matrix with two columns") +if(ncol(pts)!=2)stop("pts should be a matrix with two columns") +if(ALL)pts=rbind(x1,x2) +if(pts.out){ +flag=outfun(pts,plotit=FALSE,...)$keep +pts=pts[flag,] +} +e1=regYhat(x1,y1,xout=xout,regfun=regfun,outfun=outfun,xr=pts,...) +e2=regYhat(x2,y2,xout=xout,regfun=regfun,outfun=outfun,xr=pts,...) +if(SCAT){ +library(scatterplot3d) +scatterplot3d(cbind(pts,e2-e1),xlab=xlab,ylab=ylab,zlab=zlab) +} +if(!SCAT)rplot(pts,e2-e1,xlab=xlab,ylab=ylab,zlab=zlab,theta=theta,phi=phi,pr=FALSE,ticktype=ticktype,prm=FALSE) +} + +cbmhd<-function(x,y,qest=hd,alpha=.05,q=.25,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab="",nboot=600,SEED=TRUE){ +# +# Compute a confidence interval for the sum of the qth and (1-q)th quantiles +# of the distribution of D=X-Y, where X and Y are two independent random variables. +# The Harrell-Davis estimator is used +# If the distribution of X and Y are identical, then in particular the +# distribution of D=X-Y is symmetric about zero. +# +# plotit=TRUE causes a plot of the difference scores to be created +# pop=0 adaptive kernel density estimate +# pop=1 results in the expected frequency curve. +# pop=2 kernel density estimate (Rosenblatt's shifted histogram) +# pop=3 boxplot +# pop=4 stem-and-leaf +# pop=5 histogram +# +if(SEED)set.seed(2) +if(q>=.5)stop("q should be less than .5") +if(q<=0)stop("q should be greater than 0") +x<-x[!is.na(x)] +y<-y[!is.na(y)] +n1=length(x) +n2=length(y) +m<-outer(x,y,FUN="-") +q2=1-q +est1=qest(m,q) +est2=qest(m,q2) +data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot) +bvec=NA +for(i in 1:nboot){ +mb=outer(x[data1[i,]],y[data2[i,]],"-") +bvec[i]=qest(mb,q)+qest(mb,q2) +} +p=mean(bvec>0)+.5*mean(bvec==0) +p=2*min(c(p,1-p)) +sbv=sort(bvec) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=sbv[ilow] +ci[2]=sbv[ihi] +if(plotit){ +if(pop==1 || pop==0){ +if(length(x)*length(y)>2500){ +print("Product of sample sizes exceeds 2500.") +print("Execution time might be high when using pop=0 or 1") +print("If this is case, might consider changing the argument pop") +print("pop=2 might be better") +}} +MM=as.vector(m) +if(pop==0)akerd(MM,xlab=xlab,ylab=ylab) +if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab) +if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab) +if(pop==3)boxplot(MM) +if(pop==4)stem(MM) +if(pop==5)hist(MM,xlab=xlab) +if(pop==6)skerd(MM) +} +list(q=q,Est1=est1,Est2=est2,sum=est1+est2,ci=ci,p.value=p) +} + +reg1wayISO<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=.05,pr=TRUE,...){ +# +# Test hypothesis that for two or more independent groups, all slope parameters +# are equal. +# By default the Theil--Sen estimator is used +# +# Strategy: Use bootstrap estimate of standard errors followed by +# Johansen MANOVA type test statistic. +# +# x and y are assumed to have list mode having length J equal to the number of groups +# For example, x[[1]] and y[[1]] contain the data for group 1. +# +# xout=T will eliminate leverage points using the function outfun +# +if(SEED)set.seed(2) +if(pr){ +if(!xout)print("Might want to consider xout=T to remove leverage points") +} +if(!is.list(x))stop("Argument x should have list mode") +J=length(x) # number of groups +x=lapply(x,as.matrix) +pchk=lapply(x,ncol) +temp=matl(pchk) +if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared") +nv=NULL +p=ncol(x[[1]]) +p1=p+1 +for(j in 1:J){ +xy=elimna(cbind(x[[j]],y[[j]])) +x[[j]]=xy[,1:p] +y[[j]]=xy[,p1] +x[[j]]=as.matrix(x[[j]]) +nv=c(nv,nrow(x[[j]])) +} +nv.keep=nv +if(xout){ +temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) +for(j in 1:J){ +x[[j]]=x[[j]][temp[[j]]$keep,] +y[[j]]=y[[j]][temp[[j]]$keep] +}} +x=lapply(x,as.matrix) +K=p1 +est=matrix(NA,nrow=J,ncol=p1) +nv.keep=NULL +ecov=list() +ecovinv=list() +W=rep(0,p1) +gmean=rep(0,p) +for(j in 1:J){ +est[j,]=regfun(x[[j]],y[[j]],xout=FALSE,...)$coef +nv.keep[j]=nrow(x[[j]]) +vals=matrix(NA,nrow=nboot,ncol=p1) +data<-matrix(sample(length(y[[j]]),size=length(y[[j]])*nboot,replace=TRUE),ncol=nboot) +data=listm(data) +bvec<-lapply(data,regbootMC,x[[j]],y[[j]],regfun,...) +# bvec is a p+1 by nboot matrix. +vals=t(matl(bvec)) +ecov[[j]]=var(vals) +ecovinv[[j]]=solve(ecov[[j]]) #W_j +gmean=gmean+ecovinv[[j]][2:K,2:K]%*%est[j,2:K] +W=W+ecovinv[[j]] +} +estall=solve(W[2:K,2:K])%*%gmean +estall=c(0,estall) +F=0 +for(k in 2:K){ +for(m in 2:K){ +for(j in 1:J){ +F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) +}}} +df=p*(J-1) +pvalad=NULL +AD=FALSE # Seems adjusted critical is not needed +if(AD){ +iden=diag(p1) +Aw=0 +for(j in 1:J){ +temp=iden-solve(W)%*%ecovinv[[j]] +tempsq=temp%*%temp +Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) +} +Aw=Aw/2 +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +crit=qchisq(alval[i],df) +critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +if(F2)stop("Should be at most two groups") +if(ncol(x)==2)dif=x[,1]-x[,2] +if(!is.null(y))dif=x-y +dif=elimna(dif) +dif=as.matrix(dif) +nv=length(dif) +output=matrix(NA,ncol=8,nrow=length(q)) +dimnames(output)=list(NULL,c('quantile','Est_q','Est_1.minus.q','SUM','ci.low','ci.up','p_crit','p-value')) +for(i in 1:length(q)){ +test=DqdifMC(dif,q=q[i],plotit=FALSE,nboot=nboot,SEED=SEED) +output[i,1]=q[i] +output[i,2]=test$est.q +output[i,3]=test$est.1.minus.q +output[i,8]=test$p.value +output[i,5]=test$conf.interval[1] +output[i,6]=test$conf.interval[2] +} +temp=order(output[,8],decreasing=TRUE) +zvec=alpha/c(1:length(q)) +output[temp,7]=zvec +output <- data.frame(output) +output$signif=rep('YES',nrow(output)) +for(i in 1:nrow(output)){ +if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]='NO' +if(output[temp[i],8]<=output[temp[i],7])break +} +output[,4]=output[,2]+output[,3] +if(plotit){ +plot(rep(q,3),c(output[,4],output[,5],output[,6]),type='n',xlab=xlab,ylab=ylab) +points(q,output[,6],pch='+') +points(q,output[,5],pch='+') +points(q,output[,4],pch='*') +} +list(n=nv,output=output) +} +tsregF<-function(x,y,xout=FALSE,outfun=out,iter=10,varfun=pbvar, +corfun=pbcor,plotit=FALSE,tol=.0001,...){ +# +# Compute Theil-Sen regression estimator +# +# Use Gauss-Seidel algorithm +# when there is more than one predictor +# +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(ncol(x)==1){ +temp1<-tsp1reg(x,y) +coef<-temp1$coef +res<-temp1$res +} +if(ncol(x)>1){ +for(p in 1:ncol(x)){ +temp[p]<-tsp1reg(x[,p],y)$coef[2] +} +res<-y-x%*%temp +alpha<-median(res) +r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) +tempold<-temp +for(it in 1:iter){ +for(p in 1:ncol(x)){ +r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] +temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2] +} +if(max(abs(temp-tempold))0){ +e.pow<-varfun(yhat)/varfun(y) +if(!is.na(e.pow)){ +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +e.pow=as.numeric(e.pow) +stre=sqrt(e.pow) +}} +res=NULL +list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow) +} + + +outproMC<-function(m,gval=NA,center=NA,plotit=TRUE,op=TRUE,MM=FALSE,cop=3, +xlab="VAR 1",ylab="VAR 2",STAND=TRUE,tr=.2,q=.5,pr=TRUE,...){ +# +# same as function outpro, only it takes advantage of multiple core +# processors +# +# Detect outliers using a modification of the +# Stahel-Donoho projection method. +# +# Determine center of data cloud, for each point, +# connect it with center, project points onto this line +# and use distances between projected points to detect +# outliers. A boxplot method is used on the +# projected distances. +# +# plotit=T creates a scatterplot when working with +# bivariate data. +# +# op=T +# means the .5 depth contour is plotted +# based on data with outliers removed. +# +# op=F +# means .5 depth contour is plotted without removing outliers. +# +# MM=F Use interquatile range when checking for outliers +# MM=T uses MAD. +# +# If value for center is not specified, +# there are four options for computing the center of the +# cloud of points when computing projections: +# +# cop=2 uses MCD center +# cop=3 uses median of the marginal distributions. +# cop=4 uses MVE center +# cop=5 uses TBS +# cop=6 uses rmba (Olive's median ball algorithm)# cop=7 uses the spatial (L1) median +# +# args q and tr having are not used by this function. They are included to deal +# with situations where smoothers have optional arguments for q and tr +# +# STAND=T means that marginal distributions are standardized before +# checking for outliers + +# When using cop=2, 3 or 4, default critical value for outliers +# is square root of the .975 quantile of a +# chi-squared distribution with p degrees +# of freedom. +# +# Donoho-Gasko (Tukey) median is marked with a cross, +. +# +library(parallel) +library(MASS) +m<-as.matrix(m) +if(pr){ +if(!STAND){ +if(ncol(m)>1)print('STAND=FALSE. If measures are on different scales, might want to use STAND=TRUE') +}} +if(ncol(m)==1){ +dis<-(m-median(m))^2/mad(m)^2 +dis<-sqrt(dis) +crit<-sqrt(qchisq(.975,1)) +chk<-ifelse(dis>crit,1,0) +vec<-c(1:nrow(m)) +outid<-vec[chk==1] +keep<-vec[chk==0] +} +if(ncol(m)>1){ +if(STAND)m=standm(m,est=median,scat=mad) +if(is.na(gval) && cop==1)gval<-sqrt(qchisq(.95,ncol(m))) +if(is.na(gval) && cop!=1)gval<-sqrt(qchisq(.975,ncol(m))) +m<-elimna(m) # Remove missing values +if(cop==1 && is.na(center[1])){ +if(ncol(m)>2)center<-dmean(m,tr=.5,cop=1) +if(ncol(m)==2){ +tempd<-NA +for(i in 1:nrow(m)) +tempd[i]<-depth(m[i,1],m[i,2],m) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center<-m[flag,] +if(sum(flag)>1)center<-apply(m[flag,],2,mean) +}} +if(cop==2 && is.na(center[1])){ +center<-cov.mcd(m)$center +} +if(cop==4 && is.na(center[1])){ +center<-cov.mve(m)$center +} +if(cop==3 && is.na(center[1])){ +center<-apply(m,2,median) +} +if(cop==5 && is.na(center[1])){ +center<-tbs(m)$center +} +if(cop==6 && is.na(center[1])){ +center<-rmba(m)$center +} +if(cop==7 && is.na(center[1])){ +center<-spat(m) +} +flag<-rep(0, nrow(m)) +outid <- NA +vec <- c(1:nrow(m)) +cenmat=matrix(rep(center,nrow(m)),ncol=ncol(m),byrow=TRUE) +Amat=m-cenmat +B=listm(t(Amat)) # so rows are now in B[[1]]...B[[n]] +dis=mclapply(B,outproMC.sub,Amat) +flag=mclapply(dis,outproMC.sub2,MM,gval) +flag=matl(flag) +flag=apply(flag,1,max) +} +if(sum(flag) == 0) outid <- NA +if(sum(flag) > 0)flag<-(flag==1) +outid <- vec[flag] +idv<-c(1:nrow(m)) +keep<-idv[!flag] +if(ncol(m)==2){ +if(plotit){ +plot(m[,1],m[,2],type="n",xlab=xlab,ylab=ylab) +points(m[keep,1],m[keep,2],pch="*") +if(length(outid)>0)points(m[outid,1],m[outid,2],pch="o") +if(op){ +tempd<-NA +keep<-keep[!is.na(keep)] +mm<-m[keep,] +for(i in 1:nrow(mm))tempd[i]<-depth(mm[i,1],mm[i,2],mm) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center<-mm[flag,] +if(sum(flag)>1)center<-apply(mm[flag,],2,mean) +m<-mm +} +points(center[1],center[2],pch="+") +x<-m +temp<-fdepth(m,plotit=FALSE) +flag<-(temp>=median(temp)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +}} +list(out.id=outid,keep=keep) +} + + + +olsJ2<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro, +STAND=TRUE,plotit=TRUE,xlab="X",ylab="Y",ISO=FALSE,...){ +# +# Test hypothesis that for two independent groups, all regression parameters are equal +# Least squares regression is used. +# +# Strategy: Use HC4 estimate of standard errors followed by +# Johansen type test statistic. +# +# ISO=TRUE, test slopes, ignoring intercept. +# +x1=as.matrix(x1) +p=ncol(x1) +p1=p+1 +xy=elimna(cbind(x1,y1)) +x1=xy[,1:p] +y1=xy[,p1] +x2=as.matrix(x2) +p=ncol(x2) +p1=p+1 +xy=elimna(cbind(x2,y2)) +x2=xy[,1:p] +y2=xy[,p1] +if(plotit){ +xx1=x1 +yy1=y1 +xx2=x2 +yy2=y2 +if(ncol(as.matrix(x1))==1){ +if(xout){ +flag=outfun(xx1,plotit=FALSE,...)$keep +xx1=x1[flag] +yy1=y1[flag] +flag=outfun(xx2,plotit=FALSE,...)$keep +xx2=x2[flag] +yy2=y2[flag] +} +plot(c(xx1,xx2),c(yy1,yy2),type="n",xlab=xlab,ylab=ylab) +points(xx1,yy1) +points(xx2,yy2,pch="+") +abline(lsfit(xx1,yy1,...)$coef) +abline(lsfit(xx2,yy2,...)$coef,lty=2) +}} +x=list() +y=list() +x[[1]]=x1 +x[[2]]=x2 +y[[1]]=y1 +y[[2]]=y2 +if(!ISO)output=ols1way(x,y,xout=xout,outfun=outfun,STAND=STAND,...) +if(ISO)output=ols1wayISO(x,y,xout=xout,outfun=outfun,STAND=STAND,...) +output +} +ebarplot.med<-function(x,y=NULL,alpha=.05,nse=2, liw = uiw, aui=NULL, ali=aui, +err="y", tr=0,ylim=NULL, sfrac = 0.01, gap=0, add=FALSE, +col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab="Group", + ylab=NULL, ...) { +# plots error bars using the data in +# x, which is assumed to be a matrix with J columns (J groups) or +# x has list mode. +# nse indicates how many standard errors to use when plotting. +# +# Designed specifically for medians +# Uses distribution-free confidence intervals +# +# Missing values are automatically removed. +# +if(!is.null(y)){ +if(is.matrix(x))stop("When y is given, x should not be a matrix") +if(is.list(x))stop("When y is given, x should not be in list mode") +rem=x +x=list() +x[[1]]=rem +x[[2]]=y +} +if(is.matrix(x))x<-listm(x) +mval<-NA +if(!is.list(x) && is.null(y))stop("This function assumes there + are two or more groups") +aui=NA +ali=NA +for(j in 1:length(x)){ +mval[j]<-median(x[[j]],na.rm=TRUE) +temp=sint(x[[j]],alpha=alpha,pr=FALSE) +ali[j]=temp[1] +aui[j]=temp[2] +} + +plotCI(mval,y=NULL, liw = uiw, aui=aui, ali=ali, + err="y", ylim=NULL, sfrac = 0.01, gap=0, add=FALSE, + col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab=xlab, + ylab=ylab) +} +MULtsreg<-function(x,y,tr=.2,RMLTS=TRUE){ +# Multivariate Least Trimmed Squares Estimator +# Input: +# x: data-matrix (n,p) +# y: data-matrix (n,q) +# tr: proportion of trimming +# This function calls an R function written by Kristel Joossens +# +# Output: +# If MLTS=T coef: matrix (p,q) of MLTS-regression coefficients +# IF MLTS=F betaR : matrix (p,q) of RMLTS-regression coefficients +# +# Ref: Agullo,J., Croux, C., and Van Aelst, S. (2008) +# The Multivariate Least Trimmed Squares Estimator, +# Journal of multivariate analysis, 99, 311-338. +# +x=as.matrix(x) +xy=elimna(cbind(x,y)) +xx=as.matrix(cbind(rep(1,nrow(xy)),xy[,1:ncol(x)])) +p1=ncol(x)+1 +y=as.matrix(xy[,p1:ncol(xy)]) +outp=mlts(xx,y,tr) +if(!RMLTS)coef=outp$beta +if(RMLTS)coef=outp$betaR +list(coef=coef) +} +mlts<-function(x,y,gamma,ns=500,nc=10,delta=0.01) +{ + d <- dim(x); n <- d[1]; p <- d[2] + q <- ncol(y) + h <- floor(n*(1-gamma))+1 + obj0 <- 1e10 + for (i in 1:ns) + { sorted <- sort(runif(n),na.last = NA,index.return=TRUE) + istart <- sorted$ix[1:(p+q)] + xstart <- x[istart,] + ystart <- y[istart,] + bstart <- solve(t(xstart)%*%xstart,t(xstart)%*%ystart) + sigmastart <- (t(ystart-xstart%*%bstart))%*%(ystart-xstart%*%bstart)/q + for (j in 1:nc) + { res <- y - x %*% bstart + tres <- t(res) + dist2 <- colMeans(solve(sigmastart,tres)*tres) + sdist2 <- sort(dist2,na.last = NA,index.return = TRUE) + idist2 <- sdist2$ix[1:h] + xstart <- x[idist2,] + ystart <- y[idist2,] + bstart <- solve(t(xstart)%*%xstart,t(xstart)%*%ystart) + sigmastart <- (t(ystart-xstart%*%bstart))%*%(ystart-xstart%*%bstart)/(h-p) + } + obj <- det(sigmastart) + if (obj < obj0) + { result.beta <- bstart + result.sigma <- sigmastart + obj0 <- obj + } + } + cgamma <- (1-gamma)/pchisq(qchisq(1-gamma,q),q+2) + result.sigma <- cgamma * result.sigma + res <- y - x %*% result.beta + tres<-t(res) + result.dres <- colSums(solve(result.sigma,tres)*tres) + result.dres <- sqrt(result.dres) + + qdelta <- sqrt(qchisq(1-delta,q)) + good <- (result.dres <= qdelta) + xgood <- x[good,] + ygood <- y[good,] + result.betaR <- solve(t(xgood)%*%xgood,t(xgood)%*%ygood) + result.sigmaR <- (t(ygood-xgood%*%result.betaR)) %*% + (ygood-xgood%*%result.betaR)/(sum(good)-p) + cdelta <- (1-delta)/pchisq(qdelta^2,q+2) + result.sigmaR<-cdelta*result.sigmaR + resR<-y-x%*%result.betaR + tresR<-t(resR) + result.dresR <- colSums(solve(result.sigmaR,tresR)*tresR) + result.dresR <- sqrt(result.dresR) + list(beta=result.beta,sigma=result.sigma,dres=result.dres, + betaR=result.betaR,sigmaR=result.sigmaR,dresR=result.dresR) +} + +ancCR<-function(x1,y1,x2,y2){ +v=optim(0,JNH_sub1,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par +v[2]=optim(0,JNH_sub2,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par +a=min(v) +v=c(a,max(v)) +} + + +tsregNW<-function(x,y,xout=FALSE,outfun=out,iter=10,varfun=pbvar, +corfun=pbcor,plotit=FALSE,tol=.0001,...){ +# +# Compute Theil-Sen regression estimator +# +# Use Gauss-Seidel algorithm +# when there is more than one predictor +# +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(ncol(x)==1){ +temp1<-tsp1reg(x,y) +coef<-temp1$coef +res<-temp1$res +} +if(ncol(x)>1){ +for(p in 1:ncol(x)){ +temp[p]<-tsp1reg(x[,p],y)$coef[2] +} +res<-y-x%*%temp +alpha<-median(res) +r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) +tempold<-temp +for(it in 1:iter){ +for(p in 1:ncol(x)){ +r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] +temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2] +} +if(max(abs(temp-tempold))0){ +e.pow<-varfun(yhat)/varfun(y) +if(!is.na(e.pow)){ +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +e.pow=as.numeric(e.pow) +stre=sqrt(e.pow) +}} +res=NULL +list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow) +} + + +reg2cimcp<-function(x,y,regfun=tsreg,nboot=599,alpha=0.05, +SEED=TRUE,xout=FALSE,outfun=out,...){ +# +# Like reg2ci only x1 etc have list mode containing +# data for J>1 groups. For all pairs of groups are compared via a +# call to reg2ci. +# +# x list mode contain a matrix of predictors. +# x[[1]] contains predictors for first group +# y[[1]] dependent variable for first group. +# +# +if(!is.list(x))stop('x and y should have list mode') +J=length(x) # number of groups +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +res=reg2ci(x[[j]],y[[j]],x[[k]],y[[k]],regfun=regfun,nboot=nboot,alpha=alpha, +plotit=FALSE,xout=xout,outfun=outfun,WARN=FALSE,...) +print(paste('Group', j,'Group', k)) +print(res) +}}} +} + + +epowv2<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,varfun=pbvar,xout=FALSE,outfun=outpro,plotit=FALSE,...){ +# +# Estimate the explanatory correlation between x and y +# +# It is assumed that x is a vector or a matrix having one column only +xx<-elimna(cbind(x,y)) # Remove rows with missing values +p1=ncol(xx) +p=p1-1 +x<-xx[,1:p] +y<-xx[,p1] +x<-as.matrix(x) +if(xout){ +flag<-outfun(x,plotit=plotit,...)$keep +x=x[flag,] +y=y[flag] +} +coef<-regfun(x,y)$coef +yhat<-x %*% coef[2:p1] + coef[1] +stre=NULL +temp=varfun(y) +e.pow=NULL +if(temp>0)e.pow<-varfun(yhat)/temp +if(e.pow>1)e.pow=corfun(y,yhat)$cor^2 +list(Strength.Assoc=e.pow,Explanatory.Power=sqrt(e.pow)) +} +rmblo<-function(x,y){ +# +# Remove only bad leverage points and return the +# remaining data +# +x=as.matrix(x) +p=ncol(x) +p1=p+1 +xy=elimna(cbind(x,y)) +x=xy[,1:p] +y=xy[,p1] +temp1=reglev(x,y,plotit=FALSE) +ad1=c(temp1$levpoints,temp1$regout) +flag1=duplicated(ad1) +if(sum(flag1)>0){ +flag1=ad1[flag1] +x=as.matrix(x) +x1=x[-flag1,] +y1=y[-flag1] +xy=cbind(x1,y1) +} +list(x=xy[,1:p],y=xy[,p1]) +} + + + +ols1way<-function(x,y,xout=FALSE,outfun=outpro,STAND=TRUE, +alpha=.05,pr=TRUE,BLO=FALSE,HC3=FALSE,...){ +# +# Test hypothesis that for two or more independent groups, all regression parameters +# (the intercepts and slopes) are equal +# using OLS estimator. +# +# (To compare slopes only, use ols1way2g) +# +# Strategy: Use HC4 or HC3 estimate of standard errors followed by +# Johansen MANOVA type test statistic. +# +# x and y are assumed to have list mode having length J equal to the number of groups +# For example, x[[1]] and y[[1]] contain the data for group 1. +# +# xout=T will eliminate leverage points using the function outfun, +# which defaults to the MVE method. +# +# BLO=TRUE, only bad leverage points are removed. +# +# OUTPUT: +# n is sample size after missing values are removed +# nv.keep is sample size after leverage points are removed. +# +if(!is.list(x))stop('Argument x should have list mode') +J=length(x) # number of groups +x=lapply(x,as.matrix) +pchk=lapply(x,ncol) +temp=matl(pchk) +if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared') +nv=NULL +p=ncol(x[[1]]) +p1=p+1 +for(j in 1:J){ +xy=elimna(cbind(x[[j]],y[[j]])) +x[[j]]=xy[,1:p] +y[[j]]=xy[,p1] +x[[j]]=as.matrix(x[[j]]) +nv=c(nv,nrow(x[[j]])) +} +nv.keep=nv +critrad=NULL +if(BLO)xout=FALSE +if(xout){ +temp1=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) +for(j in 1:J){ +x[[j]]=x[[j]][temp1[[j]]$keep,] +y[[j]]=y[[j]][temp1[[j]]$keep] +}} +if(BLO){ +for(j in 1:J){ +temp1=reglev(x[[j]],y[[j]],plotit=FALSE) +ad1=c(temp1$levpoints,temp1$regout) +flag1=duplicated(ad1) +if(sum(flag1)>0){ +flag1=ad1[flag1] +x[[j]]=as.matrix(x[[j]]) +x[[j]]=x[[j]][-flag1,] +y[[j]]=y[[j]][-flag1] +}}} +x=lapply(x,as.matrix) +K=p1 +est=matrix(NA,nrow=J,ncol=p1) +grpnum=NULL +for(j in 1:J)grpnum[j]=paste("Group",j) +vlabs="Intercept" +for(j in 2:p1)vlabs[j]=paste("Slope",j-1) +dimnames(est)=list(grpnum,vlabs) +ecov=list() +ecovinv=list() +W=rep(0,p1) +gmean=rep(0,p1) +for(j in 1:J){ +est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef +nv.keep[j]=nrow(x[[j]]) +ecov[[j]]=olshc4(x[[j]],y[[j]],HC3=HC3)$cov +ecovinv[[j]]=solve(ecov[[j]]) #W_j +gmean=gmean+ecovinv[[j]]%*%est[j,] +W=W+ecovinv[[j]] +} +estall=solve(W)%*%gmean +F=0 +for(k in 1:K){ +for(m in 1:K){ +for(j in 1:J){ +F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) +}}} +pvalad=NULL +df=K*(J-1) +iden=diag(p1) +Aw=0 +for(j in 1:J){ +temp=iden-solve(W)%*%ecovinv[[j]] +tempsq=temp%*%temp +Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) +} +Aw=Aw/2 +crit=qchisq(alpha,df) +crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +crit=qchisq(alval[i],df) +critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +if(F0){ +flag1=ad1[flag1] +x[[j]]=as.matrix(x[[j]]) +x[[j]]=x[[j]][-flag1,] +y[[j]]=y[[j]][-flag1] +}}} +x=lapply(x,as.matrix) +K=p1 +est=matrix(NA,nrow=J,ncol=p1) +grpnum=NULL +for(j in 1:J)grpnum[j]=paste("Group",j) +vlabs="Intercept" +for(j in 2:p1)vlabs[j]=paste("Slope",j-1) +dimnames(est)=list(grpnum,vlabs) +ecov=list() +ecovinv=list() +W=rep(0,p1) +gmean=rep(0,p) +for(j in 1:J){ +est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef +nv.keep[j]=nrow(x[[j]]) +ecov[[j]]=olshc4(x[[j]],y[[j]])$cov +ecovinv[[j]]=solve(ecov[[j]]) #W_j +gmean=gmean+ecovinv[[j]][2:K,2:K]%*%est[j,2:K] +W=W+ecovinv[[j]] +} +estall=solve(W[2:K,2:K])%*%gmean +estall=c(0,estall) +F=0 +for(k in 2:K){ +for(m in 2:K){ +for(j in 1:J){ +F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m]) +}}} +pvalad=NULL +df=p*(J-1) +# Adjust critical value: +iden=diag(p) +Aw=0 +for(j in 1:J){ +temp=iden-solve(W[2:K,2:K])%*%ecovinv[[j]][2:K,2:K] +tempsq=temp%*%temp +Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1) +} +Aw=Aw/2 +crit=qchisq(alpha,df) +crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +crit=qchisq(alval[i],df) +critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +if(F0){ +if(sig>alpha)sig=a$Estimates[2] +if(sig>alpha)sig=.95*alpha +}} +list(cor.ci=corci,p.value=sig,cor.est=est) +} + +scorsubMC<-function(isub,x,y,pr=FALSE,STAND=TRUE,corfun=corfun,cop=cop,CPP=FALSE,RAN=FALSE,...){ +isub=as.vector(isub) +if(!CPP)corbsub<-scor(x[isub],y[isub],plotit=FALSE,pr=FALSE,STAND=STAND,corfun=corfun,cop=cop, +SEED=FALSE,RAN=RAN,...)$cor +if(CPP)stop('Need to use RStudio with WRScpp installed and use the file WRSC++') +corbsub +} +normTmm<-function(x,SEED=TRUE,nboot=2000){ +# +# Test that the tails of the distribution of x +# have more outliers than expected under normality +# +if(SEED)set.seed(45) +no=out(x,SEED=FALSE)$n.out +val=NA +x=elimna(x) +n=length(x) +for(i in 1:nboot)val[i]=out(rnorm(n),SEED=FALSE)$n.out +list(n.out=no,p.value=mean(val>=no)) +} + +rplot<-function(x,y,est=tmean,scat=TRUE,fr=NA,plotit=TRUE,pyhat=FALSE,efr=.5, +theta=50,phi=25,scale=TRUE,expand=.5,SEED=TRUE,varfun=pbvar,outfun=outpro, +nmin=0,xout=FALSE,out=FALSE,eout=FALSE,xlab='X',ylab='Y',zscale=FALSE, +zlab=' ',pr=TRUE,duplicate='error',ticktype='simple',LP=TRUE,OLD=FALSE,pch='.',prm=TRUE,...){ +# duplicate='error' +# In some situations where duplicate values occur, when plotting with +# two predictors, it is necessary to set duplicate='strip' +# +# LP=TRUE, the plot of the smooth is further smoothed via lplot (lowess) +# To get a plot as done with old version set +# LP=FALSE +# +# zscale=TRUE will standardize the dependent variable when plotting with 2 independent variables. +# +# efr is the span when computing explanatory strength of association +# +# cf qplot in the R package ggplot2 +# +if(pr){ +if(!xout)print('Suggest also looking at result using xout=TRUE') +} +x<-as.matrix(x) +p=ncol(x) +p1=p+1 +if(pr && !OLD){ +print('A new estimate of the strength of the association is used by default.') +print(' To get the old estimate, set OLD=TRUE') +} +xx<-cbind(x,y) +xx<-elimna(xx) +n=nrow(xx) +if(eout){ +flag=outfun(xx,plotit=FALSE,...)$keep +xx=xx[flag,] +} +if(xout){ +if(identical(outfun,outblp))flag=outblp(xx[,1:p],xx[,p1],plotit=FALSE)$keep +else +flag=outfun(xx[,1:p],plotit=FALSE,...)$keep +xx=xx[flag,] +} +n.keep=nrow(xx) +x<-xx[,1:p] +x<-as.matrix(x) +p1=ncol(x)+1 +y<-xx[,p1] +if(ncol(x)==1){ +if(is.na(fr))fr<-.8 +val<-rungen(x,y,est=est,scat=scat,fr=fr,plotit=plotit,pyhat=TRUE, +xlab=xlab,ylab=ylab,LP=LP,pch=pch,...) +val2<-rungen(x,y,est=est,fr=efr,plotit=FALSE,pyhat=TRUE,LP=FALSE,...)$output +val<-val$output +} +if(ncol(x)>1){ +id=chk4binary(x) +Lid=length(id) +if(Lid>0)Stop('Binary independent variables detected, use rplotv2') +if(ncol(x)==2 && !scale){ +if(pr){print('scale=FALSE is specified.') +print('If there is dependence, might want to use scale=T') +}} +if(is.na(fr))fr<-1 +val<-rung3d(x,y,est=est,fr=fr,plotit=plotit,pyhat=TRUE,SEED=SEED,nmin=nmin,LP=LP, +scale=scale,phi=phi,theta=theta,expand=expand,zscale=zscale,pr=FALSE, +duplicate='error',xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,...) +} +E.power=NULL +if(OLD){ +E.power=varfun(val)/varfun(y) +names(E.power)='' +if(E.power>1)E.power=.99 +} +if(!OLD)E.power=smRstr(x,y,fr=fr)$str^2 +stra=sqrt(E.power) +# Best correction at the moment. Not sure when or if needed. +# Maybe a correlation option is better, but need to check this. +xvals=x +if(ncol(x)==1)xvals=sort(xvals) +if(!pyhat){ +val <- NULL +xvals=NULL +} +if(!prm){ +stra=NULL +E.power=NULL +val=NULL +} +list(n=n,n.keep=n.keep,Strength.Assoc=stra,Explanatory.Power = E.power, xvals=xvals,yhat = val) +} + +Rfit<-function(x,y,xout=FALSE,outfun=outpro,...){ +# +# Fit regression line using rank-based method based +# Jaeckel's dispersion function +# via the R package Rfit +# +library(Rfit) +if(xout){ +m<-cbind(x,y) +p1=ncol(m) +p=p1-1 +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +fit=rfit(y~x) +output=summary(fit) +list(summary=output[1]$coefficients,coef=output[1]$coefficients[,1],Drop_test=output[2]$dropstat, + Drop_test_p.value=output[3]$droppval,Mult_R_squared=output[4]$R2) +} + +regunstack<-function(x,grp,xcols,ycol){ +# +# x is assumed to be a matrix or a data frame +# +# sort data in x into group indicated by col grp of x, +# Designed for a one-way ANOVA where goal is to compare slopes +# corresponding to two or more groups. +# +# returns the independent variables in x having list mode +# x[[1]] would be a matrix for group 1, x[[2]] a matrix for group 2, etc +# y[[1]] is the dependent variable for group 1, etc. +# +# xcols indicates the columns of x containing independent variables +# ycol indicates the column of x containing dependent variables +# +x=elimna(x) +val=sort(unique(x[,grp])) +xs=list() +ys=list() +for(i in 1:length(val)){ +flag=(x[,grp]==val[i]) +xs[[i]]=x[flag,xcols] +ys[[i]]=x[flag,ycol] +} +list(num.grps=length(val),x=xs,y=ys) +} + + + + +ols1way2g<-function(x,y,grp=c(1,2),iv=1,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=.05,pr=TRUE,BLO=FALSE,...){ +# +# Test hypothesis that for two or more independent groups, all slope parameters +# are equal using OLS estimator. +# +# (ols1way tests the hypothesis that all parameters are equal, not just slopes.) +# +# Use Johansen MANOVA type test statistic in conjunction with HC4 estimate of covariances. +# +# x and y are assumed to have list mode having length J equal to the number of groups +# For example, x[[1]] and y[[1]] contain the data for group 1. +# +# xout=T will eliminate leverage points using the function outfun, +# which defaults to the MVE method. +# +# BLO=TRUE, only bad leverage points are removed. +# +# OUTPUT: +# n is sample size after missing values are removed +# nv.keep is sample size after leverage points are removed. +# +if(!is.list(x))stop('Argument x should have list mode') +iv1=iv+1 +J=length(x) # number of groups +x=lapply(x,as.matrix) +pchk=lapply(x,ncol) +temp=matl(pchk) +if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared') +nv=NULL +p=ncol(x[[1]]) +p1=p+1 +for(j in 1:J){ +xy=elimna(cbind(x[[j]],y[[j]])) +x[[j]]=xy[,1:p] +y[[j]]=xy[,p1] +x[[j]]=as.matrix(x[[j]]) +nv=c(nv,nrow(x[[j]])) +} +nv.keep=nv +critrad=NULL +if(BLO)xout=FALSE +if(xout){ +temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) +for(j in 1:J){ +x[[j]]=x[[j]][temp[[j]]$keep,] +y[[j]]=y[[j]][temp[[j]]$keep] +}} +if(BLO){ +for(j in 1:J){ +temp1=reglev(x[[j]],y[[j]],plotit=FALSE) +ad1=c(temp1$levpoints,temp1$regout) +flag1=duplicated(ad1) +if(sum(flag1)>0){ +flag1=ad1[flag1] +x[[j]]=as.matrix(x[[j]]) +x[[j]]=x[[j]][-flag1,] +y[[j]]=y[[j]][-flag1] +}}} +x=lapply(x,as.matrix) +K=p1 +est=matrix(NA,nrow=J,ncol=p1) +grpnum=NULL +for(j in 1:J)grpnum[j]=paste("Group",j) +vlabs="Intercept" +for(j in 2:p1)vlabs[j]=paste("Slope",j-1) +dimnames(est)=list(grpnum,vlabs) +ecov=list() +ecovinv=list() +W=rep(0,p1) +gmean=rep(0,K) +for(j in 1:J){ +est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef +nv.keep[j]=nrow(x[[j]]) +ecov[[j]]=olshc4(x[[j]],y[[j]])$cov +ecovinv[[j]]=solve(ecov[[j]]) #W_j +gmean=gmean+ecovinv[[j]]%*%est[j,] +W=W+ecovinv[[j]] +} +estall=solve(W)%*%gmean +F=0 +for(j in 1:2){ +F=F+ecovinv[[grp[j]]][iv1,iv1]*(est[grp[j],iv1]-estall[iv1])*(est[grp[j],iv1]-estall[iv1]) +} +pvalad=NULL +df=1 +# Adjust critical value: +iden=1 +Aw=0 +for(j in 1:J){ +temp=iden-solve(W[iv1,iv1])%*%ecovinv[[grp[j]]][iv1,iv1] +tempsq=temp%*%temp +Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[grp[j]]-1) +} +Aw=Aw/2 +crit=qchisq(alpha,df) +crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +alval<-c(1:999)/1000 +for(i in 1:999){ +irem<-i +crit=qchisq(alval[i],df) +critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2)) +if(F0){ +flag1=ad1[flag1] +x[[j]]=as.matrix(x[[j]]) +x[[j]]=x[[j]][-flag1,] +y[[j]]=y[[j]][-flag1] +}}} +x=lapply(x,as.matrix) +K=p1 +est=matrix(NA,nrow=J,ncol=p1) +grpnum=NULL +for(j in 1:J)grpnum[j]=paste("Group",j) +vlabs="Intercept" +for(j in 2:p1)vlabs[j]=paste("Slope",j-1) +dimnames(est)=list(grpnum,vlabs) +ecov=list() +ecovinv=list() +W=rep(0,p1) +for(j in 1:J){ +est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef +nv.keep[j]=nrow(x[[j]]) +ecov[[j]]=olshc4(x[[j]],y[[j]],HC3=HC3)$cov +} +q1=ecov[[grp[1]]][iv1,iv1] +q2=ecov[[grp[2]]][iv1,iv1] +top=est[grp[1]]-est[grp[2]] +F=(est[grp[1],iv1]-est[grp[2],iv1])/sqrt(q1+q2) +df=(q1+q2)^2/(q1^2/(nv[grp[1]]-1)+q2^2/(nv[grp[2]]-1)) +pv=2*(1-pt(abs(F),df)) +crit=qt(1-alpha/2,df) +ci=est[grp[1],iv1]-est[grp[2],iv1]-crit*sqrt(q1+q2) +ci[2]=est[grp[1],iv1]-est[grp[2],iv1]+crit*sqrt(q1+q2) +list(n=nv,n.keep=nv.keep,test.stat=F,conf.interval=ci, +est=c(est[grp[1],iv1],est[grp[2],iv1]),est.dif=est[grp[1],iv1]-est[grp[2],iv1],p.value=pv) +} + +cov.roc<-function(x){ +library(robust) +temp<-covRob(x,estim='M') +val<-temp +list(center=val[3]$center,cov=val[2]$cov) +} +reg1mcp<-function(x,y,regfun=tsreg,SEED=TRUE,nboot=100,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=.05, +pr=TRUE,MC=FALSE,...){ +# +# Perform all pairwise comparisons of intercepts among J independent groups +# Do the same of the first slope, followed by the 2nd slope, etc. +# +# Control FWE via Hochberg's methods for each set of +# (J^2-J)/2 parameters. That is, control FWE for the intercepts +# Do the same for the first slope, etc. +# +# # x and y are assumed to have list mode having +# length J equal to the number of groups +# For example, x[[1]] and y[[1]] contain the data for group 1. +# +# xout=T will eliminate leverage points using the function outfun, +# which defaults to the projection method. +# +# OUTPUT: +# n is sample size after missing values are removed +# nv.keep is sample size after leverage points are removed. +# output contains all pairwise comparisons +# For each parameter, FWE is controlled using Hochberg's method +# So by default, for the intercepts, +# all pairwise comparisons are performed with FWE=.05 +# For the first slope, all pairwise comparisons are performed +# with FWE=.05, etc. +# +if(SEED)set.seed(2) +if(!is.list(x))stop('Argument x should have list mode') +if(!is.list(y))stop('Argument y should have list mode') +J=length(x) # number of groups +x=lapply(x,as.matrix) +pchk=lapply(x,ncol) +temp=matl(pchk) +if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared') +nv=NULL +p=ncol(x[[1]]) +p1=p+1 +for(j in 1:J){ +xy=elimna(cbind(x[[j]],y[[j]])) +x[[j]]=xy[,1:p] +y[[j]]=xy[,p1] +x[[j]]=as.matrix(x[[j]]) +nv=c(nv,nrow(x[[j]])) +} +nv.keep=nv +critrad=NULL +if(xout){ +temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...) +for(j in 1:J){ +x[[j]]=x[[j]][temp[[j]]$keep,] +y[[j]]=y[[j]][temp[[j]]$keep] +nv.keep[j]=length(y[[j]]) +}} +tot=(J^2-J)/2 +dvec<-alpha/c(1:tot) +outl=list() +nr=tot*p1 +outp=matrix(NA,ncol=7,nrow=nr) +x=lapply(x,as.matrix) +rlab=rep('Intercept',tot) +xx=list() +yy=list() +iall=0 +ivp=c(1,tot)-tot +for(ip in 1:p){ +#iv=ip-1 +rlab=c(rlab,rep(paste('slope',ip),tot)) +} +i=0 +sk=1+tot*p +st=seq(1,sk,tot) +st=st-1 +for(j in 1:J){ +for(k in 1:J){ +if(j < k){ +i=i+1 +st=st+1 +xx[[1]]=x[[j]][,1:p] +xx[[2]]=x[[k]][,1:p] +yy[[1]]=y[[j]] +yy[[2]]=y[[k]] +if(!MC)temp=reg2ci(xx[[1]],yy[[1]],xx[[2]],yy[[2]],regfun=regfun)$output +if(MC)temp=reg2ci(xx[[1]],yy[[1]],xx[[2]],yy[[2]],regfun=regfun)$output +iall=iall+1 +outp[iall,1]=j +outp[iall,2]=k +outp[st,3]=temp[,4] +outp[st,5]=temp[,2] +outp[st,6]=temp[,3] +}}} +for(i in 1:p1){ +ivp=ivp+tot +temp2<-order(0-outp[ivp[1]:ivp[2],3]) +icc=c(ivp[1]:ivp[2]) +icc[temp2]=dvec +outp[ivp[1]:ivp[2],4]=icc +} +flag=(outp[,3]<=outp[,4]) +outp[,7]=rep(0,nr) +outp[flag,7]=1 +v=outp[1:tot,1] +vall=rep(v,p1) +outp[,1]=vall +v=outp[1:tot,2] +vall=rep(v,p1) +outp[,2]=vall +#outp[,7]=p.adjust(outp[,3],method=method) +dimnames(outp)=list(rlab,c('Group','Group','p.value','p.crit','ci.low','ci.hi','Sig')) +list(n=nv,n.keep=nv.keep,output=outp) +} + + + +qcorp1<-function(x,y,qest=hd,q=.5,xout=FALSE,outfun=outpro,plotit=FALSE,...){ +# +# Compute a measure of the strength of the association +# based on the quantile regression lines +# +X=cbind(x,y) +X=elimna(X) +x<-as.matrix(x) +p=ncol(x) +x=X[,1:p] +p1=p+1 +y=X[,p1] +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +X=cbind(x,y) +} +est=qreg(x,y,q=q)$coef +top=qreg.sub(X,est,qval=q) +null=qest(y,q) +v=c(null,rep(0,p)) +bot=qreg.sub(X,v,qval=q) +ce=sqrt(1-top/bot) +if(p==1)ce=sign(est[2])*ce +list(cor=ce) +} + +scorciMC<-function(x,y,nboot=1000,alpha=.05,V2=TRUE,SEED=TRUE,plotit=TRUE,STAND=TRUE,corfun=pcor,pr=TRUE,cop=3,...){ +# +# Compute a 1-alpha confidence interval for the skipped correlation. +# alpha=0.05 is the default. +# By default, Pearson's correlation is computed after outliers are removed via the R function outdoor +# corfun=spear, for example would replace Pearson's correlation with Spearman's correlation. +# +# The default number of bootstrap samples is nboot=1000 +# +# This function uses the R package parallel +# +if(pr){ +print('As of Sept. 4, 2019, an improved version of this function is used when n<120. To use the old version, set V2=FALSE') +} +if(ncol(as.matrix(x))!=1)stop('x should be a single vector') +if(!V2){ +m1=cbind(x,y) +m1<-elimna(m1) # Eliminate rows with missing values +nval=nrow(m1) +x<-m1[,1] +y<-m1[,2] +est<-scor(x,y,plotit=plotit,STAND=STAND,corfun=corfun,SEED=SEED,cop=cop,pr=FALSE,...)$cor +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +library(parallel) +bvec<-mclapply(data,scorsubMC,x,y,STAND=STAND,corfun=corfun,cop=cop,...) +bvec=matl(bvec) # A 1 by nboot matrix. +bvec=as.vector(bvec) +ihi<-floor((1-alpha/2)*nboot+.5) +ilow<-floor((alpha/2)*nboot+.5) +bsort<-sort(bvec) +corci<-1 +corci[1]<-bsort[ilow] +corci[2]<-bsort[ihi] +phat <- sum(bvec < 0)/nboot +sig <- 2 * min(phat, 1 - phat) +} +if(V2){ +a=scorregciH(x,y,nboot=nboot,alpha=alpha,pr=FALSE,SEED=SEED,STOP=FALSE) +est=a$Estimates[1] +sig=a$Estimates[2] +corci=a$confidence.int[2:3] +chk=sign(corci[1]*corci[2]) +if(chk>0){ +if(sig>alpha)sig=a$Estimates[2] +if(sig>alpha)sig=.95*alpha +}} +list(cor.ci=corci,p.value=sig,cor.est=est) +} + +olsLmcp<-function(x,y,xout=TRUE,outfun=outpro,ISO=FALSE,STAND=TRUE,alpha=.05,pr=TRUE,BLO=FALSE,HC3=FALSE,...){ +# +# All pairwise comparison of regression models among J independent groups +# That is, for groups j and k, all j0)rlab=c(rlab,rep(paste('slope',iv),tot)) +for(j in 1:J){ +for(k in 1:J){ +if(j < k){ +i=i+1 +xx[[1]]=x[[j]][,1:p] +xx[[2]]=x[[k]][,1:p] +yy[[1]]=y[[j]] +yy[[2]]=y[[k]] +all=olsW2g(xx,yy,iv=iv,BLO=BLO,HC3=HC3) +temp=all$p.value +iall=iall+1 +outp[iall,1]=j +outp[iall,2]=k +outp[iall,3]=all$conf.interval[1] +outp[iall,4]=all$conf.interval[2] +outp[iall,5]=temp +}}} +ivp=ivp+tot +temp2<-order(0-outp[ivp[1]:ivp[2],5]) +icc=c(ivp[1]:ivp[2]) +icc[temp2]=dvec +outp[ivp[1]:ivp[2],6]=icc +D=rep('NO',tot) +flag=(outp[ivp[1]:ivp[2],5]<=outp[ivp[1]:ivp[2],4]) +} +flag=(outp[,5]<=outp[,6]) +outp[,7]=rep(0,nr) +outp[flag,7]=1 +dimnames(outp)=list(rlab,c('Group','Group','ci.low','ci.up','p.value','p.crit','sig')) +list(n=nv,n.keep=nv.keep,output=outp) +} + + + +anctsmcp<-function(x,y,regfun=tsreg,nboot=599,alpha=0.05,pts=NULL, +SEED=TRUE,xout=FALSE,outfun=out,fr1=1,fr2=1,...){ +# +# Like reg2ci only x1 etc have list mode containing +# data for J>1 groups. For all pairs of groups are compared via a +# call to ancova. +# +# x list mode contain a matrix of predictors. +# x[[1]] contains predictors for first group +# y[[1]] dependent variable for first group. +# +# +if(!is.list(x))stop('x and y should have list mode') +J=length(x) # number of groups +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +res=ancts(x[[j]],y[[j]],x[[k]],y[[k]],regfun=regfun,pts=pts, +nboot=nboot,alpha=alpha,fr1=fr1,fr2=fr2, +plotit=FALSE,xout=xout,outfun=outfun,WARN=FALSE,...) +print(paste('Group', j,'Group', k)) +print(res) +}}} +} + +chregF<-function(x,y,bend=1.345,SEED=FALSE,xout=FALSE,outfun=out,...){ +# +# Compute Coakley Hettmansperger robust regression estimators +# JASA, 1993, 88, 872-880 +# +# x is a n by p matrix containing the predictor values. +# +# No missing values are allowed +# +# Comments in this function follow the notation used +# by Coakley and Hettmansperger +# +library(MASS) +# with old version of R, need library(lqs) when using ltsreg +# as the initial estimate. +# +if(SEED)set.seed(12) # Set seed so that results are always duplicated. +x<-as.matrix(x) +p<-ncol(x) +m<-elimna(cbind(x,y)) +x<-m[,1:p] +p1<-p+1 +y<-m[,p1] +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +x<-as.matrix(x) +cutoff<-bend +mve<-vector('list') +if(ncol(x)==1){ +mve$center<-median(x) +mve$cov<-mad(x)^2 +} +if(ncol(x)>=2)mve<-cov.mve(x) # compute minimum volume ellipsoid measures of + # location and scale and store in mve. +reg0<-ltsReg(x,y) # compute initial regression est using least trimmed + # squares. +# Next, compute the rob-md2(i) values and store in rob +rob<-1 # Initialize vector rob +mx<-mve$center +rob<-mahalanobis(x,mx,mve$cov) +k21<-qchisq(.95,p) +c62<-k21/rob +vecone<-c(rep(1,length(y))) # Initialize vector vecone to 1 +c30<-pmin(vecone,c62) # mallows weights put in c30 +k81<-median(abs(reg0$residuals)) # median of absolute residuals +k72<-1.4826*(1+(5/(length(y)-p-1)))*k81 # lms scale +c60<-reg0$residuals/(k72*c30) # standardized residuals +# compute psi and store in c27 +cvec<-c(rep(cutoff,length(y))) # Initialize vector cvec to cutoff +c27<-pmin(cvec,c60) +c27<-pmax(-1*cutoff,c27) #c27 contains psi values +# +# compute B matrix and put in c66. +# Also, transform B so that i th diag elem = 0 if c27[i] is +# between -cutoff and cutoff, 1 otherwise. +# +c66<-ifelse(abs(c27)<=bend,1,0) # Have derivative of psi in c66 +m1<-cbind(1,x) # X matrix with col of 1's added +m2<-t(m1) #X transpose +m5<-diag(c30) # matrix W, diagonal contains weights +m4<-diag(c66) # B matrix +m6<-m4%*%m1 # BX +m7<-m2%*%m6 # X'BX (nD=X'BX) +m8<-solve(m7) #m8 = (X'-B-X)inverse +m9<-m8%*%m2 #m9=X prime-B-X inverse X' +m9<-m9%*%m5 # m9=X prime-B-X inverse X'W +m10<-m9%*%c27 +c20<-m10*k72 +c21<-reg0$coef+c20 #update initial estimate of parameters. +res<-y-m1%*%c21 +list(coef=t(c21),residuals=res) +} + +DregGOLS<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,SEED=TRUE,nboot=200, +STAND=TRUE,...){ +# +# Global test that two dependent (time 1 and time 2) +# OLS regression lines are identical +# +# Use a variation of Hotelling's test coupled with a bootstrap +# estimate of the relevant covariance matrix associated with the differences +# in the estimates of the parameters. +# +if(SEED)set.seed(2) +X=elimna(cbind(x1,y1,x2,y2)) +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +n=length(y1) +if(xout){ +opf=identical(outfun,outpro) +if(!opf){ +flag1=outfun(x1)$out.id +flag2=outfun(x2)$out.id +} +if(opf){ +flag1=outpro(x1,STAND=STAND)$out.id +flag2=outfun(x2,STAND=STAND)$out.id +} +flag=unique(c(flag1,flag2)) +if(length(flag)>0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +nk=length(y1) +x1=as.matrix(x1) +x2=as.matrix(x2) +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +bvec1<-apply(data,1,regboot,x1,y1,regfun=lsfit,...) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +bvec2<-apply(data,1,regboot,x2,y2,regfun=lsfit,...) +dif=t(bvec1-bvec2) +S=cov(dif) +est1=lsfit(x1,y1)$coef +est2=lsfit(x2,y2)$coef +est=est1-est2 +k <- (nk-p1)/((nk - 1)*p1) + stat <- k * crossprod(est, solve(S, est))[1, ] + pvalue <- 1 - pf(stat, p1, nk - p1) +list(test.statistic = stat, degrees_of_freedom = c(p1, nk - p1), p.value = +pvalue,est.1=est1,est.2=est2,estimate.dif = est) +} + +difregOLS<-function(x1,y1,x2,y2,regfun=lsfit,xout=FALSE,outfun=outpro,nboot=200, +alpha=.05,SEED=TRUE,plotit=FALSE,xlab='X',ylab='Y',...){ +# +# OLS regression data from two different times i.e., two dependent groups +# +# compute confidence interval for the difference between intercepts +# and the slopes +# +if(SEED)set.seed(2) +X=elimna(cbind(x1,y1,x2,y2)) +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +n=length(y1) +if(xout){ +flag1=outfun(x1)$out.id +flag2=outfun(x2)$out.id +flag=unique(c(flag1,flag2)) +if(length(flag)>0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +nk=length(y1) +x1=as.matrix(x1) +x2=as.matrix(x2) +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +bvec1<-apply(data,1,regboot,x1,y1,regfun,...) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +bvec2<-apply(data,1,regboot,x2,y2,regfun,...) +dif=t(bvec1)-t(bvec2) +est1=lsfit(x1,y1)$coef +est2=lsfit(x2,y2)$coef +estdif=est1-est2 +se=apply(dif,2,sd) +pvec=NA +test=NA +test=estdif/se +df=nk-1 +pvec=2*(1-pt(abs(test),df)) +if(plotit){ +reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab) +} +lvec='Intercept' +ci=matrix(NA,nrow=p1,ncol=3) +ci[,1]=c(0:p) +ci[,2]=estdif+qt(alpha/2,df)*se +ci[,3]=estdif-qt(alpha/2,df)*se +dimnames(ci)=list(NULL,c('Param','ci.low','ci.hi')) +for(j in 2:p1)lvec=c(lvec,paste('slope',j-1)) +pvec=array(pvec,dimnames=lvec) +list(n=n,n.keep=nk,est.dif=estdif,est.1=est1,est.2=est2, +test.stat=test,standard.error=se,p.values=pvec,conf.intervals=ci) +} + +Dancols<-function(x1,y1,x2,y2,pts=NULL,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',CR=FALSE,...){ +# +# Compare the OLS regression lines of two dependent (within) groups +# at specified design points +# +# Assume data are in x1 y1 x2 and y2 +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# If not specified, points are chosen for you. +# +# CR=TRUE: determine interval outside of which the lines cross. +# (Analog of Johnson--Neyman method) +# +# OUTPUT: +# cross.interval indicates interval outside of which the lines have crossed. +# output cr.quant.grp1 indicates that quantiles of group 1 corresponding to +# to the end of the intervals returned in cross.interval +# +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(SEED)set.seed(2) +FLAG=pts +X=elimna(cbind(x1,y1,x2,y2)) +if(ncol(X)>4)stop('Only one covariate is allowed') +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +n=length(y1) +if(xout){ +flag1=outfun(x1,SEED=SEED,...)$out.id +flag2=outfun(x2,SEED=SEED,...)$out.id +flag=unique(c(flag1,flag2)) +if(length(flag)>0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +n.keep=length(y1) +if(is.null(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=x1[isub] +pts=unique(pts) +npt=nrow(as.matrix(pts)) +mat<-matrix(NA,npt,9) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) +mat[,1]=pts +sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED) +est1=regYhat(x1,y1,xr=pts,regfun=lsfit) #Note: if xout=T, leverage points already removed +est2=regYhat(x2,y2,xr=pts,regfun=lsfit) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +df=length(y1)-1 +pv=2*(1-pt(abs(tests),df)) +mat[,9]=pv +crit<-smmcrit(df,5) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +if(!is.null(FLAG)){ +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +pts=unique(pts) +mat<-matrix(NA,length(pts),9) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) +mat[,1]<-pts +sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED) +est1=regYhat(x1,y1,xr=pts,regfun=lsfit,,...) +est2=regYhat(x2,y2,xr=pts,regfun=lsfit,,...) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +df=length(y1)-1 +pv=2*(1-pt(abs(tests),df)) +mat[,9]=pv +crit<-smmcrit(df,length(pts)) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +if(plotit){ +plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) +points(x1,y1,pch='o') +points(x2,y2,pch='+') +abline(lsfit(x1,y1)$coef) +abline(lsfit(x2,y2)$coef,lty=2) +} +int=NULL +crq=NULL +crq2=NULL +if(CR){ +if(ncol(as.matrix(x1))>1)stop('CR=T only allowed with one covariate') +int=DancCR(x1,y1,x2,y2) +crq=mean(x1<=int[1]) +crq[2]=mean(x1<=int[2]) +crq2=mean(x2<=int[1]) +crq2[2]=mean(x2<=int[2]) +} + +list(n=n,n.keep=n.keep,output=mat,cross.interval=int,cr.quant.grp1=crq, +cr.quant.grp2=crq2) +} +Dancols_sub1<-function(pts,x1,y1,x2,y2){ +# +# +ci=abs(Dancols_sub(x1,y1,x2,y2,pts=pts)$output[1,7]) +ci +} +Dancols_sub2<-function(pts,x1,y1,x2,y2){ +# +# +ci=abs(Dancols_sub(x1,y1,x2,y2,pts=pts)$output[1,8]) +ci +} +Dancols_sub<-function(x1,y1,x2,y2,pts=NULL,fr1=1,fr2=1,alpha=.05,plotit=FALSE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',...){ +# +# Compare the OLS regression lines of two dependent (within) groups +# at specified design points +# +# Assume data are in x1 y1 x2 and y2 +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# If not specified, points are chosen for you. +# +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(SEED)set.seed(2) +FLAG=pts +X=elimna(cbind(x1,y1,x2,y2)) +if(ncol(X)>4)stop('Only one covariate is allowed') +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +n=length(y1) +if(xout){ +flag1=outfun(x1,SEED=SEED,...)$out.id +flag2=outfun(x2,SEED=SEED,...)$out.id +flag=unique(c(flag1,flag2)) +if(length(flag)>0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +n.keep=length(y1) +if(is.null(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,9) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) +pts=x1[isub] +mat[,1]=pts +sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED) +est1=regYhat(x1,y1,xr=pts,regfun=lsfit) #Note: if xout=T, leverage points already removed +est2=regYhat(x2,y2,xr=pts,regfun=lsfit) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +df=length(y1)-1 +pv=2*(1-pt(abs(tests),df)) +mat[,9]=pv +crit<-smmcrit(df,5) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +if(!is.null(FLAG)){ +n1=1 +n2=1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),9) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) +mat[,1]<-pts +sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED) +est1=regYhat(x1,y1,xr=pts,regfun=lsfit,,...) +est2=regYhat(x2,y2,xr=pts,regfun=lsfit,,...) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +df=length(y1)-1 +pv=2*(1-pt(abs(tests),df)) +mat[,9]=pv +crit<-smmcrit(df,length(pts)) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +if(plotit){ +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) +points(x1,y1,pch='o') +points(x2,y2,pch='+') +abline(lsfit(x1,y1)$coef) +abline(lsfit(x2,y2)$coef,lty=2) +} +list(n=n,n.keep=n.keep,output=mat) +} +DancCR<-function(x1,y1,x2,y2){ +v=optim(0,Dancols_sub1,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par +v[2]=optim(0,Dancols_sub2,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par +a=min(v) +v=c(a,max(v)) +} + +difregYvar<-function(x1,y1,x2,y2,regfun=tsreg,pts=NULL, +nboot=100,xout=FALSE,outfun=out,SEED=TRUE,...){ +# +# Estimate standard error of difference between the predicted value of Y +# corresponding to two dependent groups using regression estimator indicated by +# the argument +# regfun +# corresponding to the points in +# pts +# regfun defaults to tsreg, the Theil--Sen estimator +# pts default is to use all unique points among x1 and x2 +# +X=elimna(cbind(x1,y1,x2,y2)) +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +x1<-as.matrix(x1) +x2=as.matrix(x2) +if(is.null(pts)){ +pts=rbind(x1,x2) +pts=unique(pts) +} +pts=as.matrix(pts) +nvpts=nrow(pts) +bvec1=matrix(NA,nrow=nboot,ncol=nvpts) +bvec2=matrix(NA,nrow=nboot,ncol=nvpts) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec1[ib,]=regYsub(x1[data[ib,],],y1[data[ib,]],pts,p1=p1,regfun=regfun,...) +bvec2[ib,]=regYsub(x2[data[ib,],],y2[data[ib,]],pts,p1=p1,regfun=regfun,...) +} +bvec=bvec1-bvec2 +sqsd=apply(bvec,2,var) +sqsd +} + +difreg<-function(x1,y1,x2,y2,regfun=tsreg,xout=FALSE,outfun=outpro,nboot=599, +alpha=.05,SEED=TRUE,plotit=FALSE,xlab='X',ylab='Y',pr=TRUE,...){ +# +# regression data from two different times i.e., two dependent groups +# +# compute confidence interval for the difference in the slopes +# +if(SEED)set.seed(2) +X=elimna(cbind(x1,y1,x2,y2)) +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +n=length(y1) +if(xout){ +flag1=outfun(x1,...)$out.id +flag2=outfun(x2,...)$out.id +flag=unique(c(flag1,flag2)) +X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +flagF=identical(regfun,tsreg) +if(flagF){ +if(pr){ +if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +pr=FALSE +} +if(pr){ +if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +}} +nk=length(y1) +x1=as.matrix(x1) +x2=as.matrix(x2) +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +bvec1<-lapply(data,regboot,x1,y1,regfun,xout=FALSE,...) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +bvec2<-lapply(data,regboot,x2,y2,regfun,xout=FALSE,...) +bvec1=matl(bvec1) +bvec2=matl(bvec2) +dif=t(bvec1)-t(bvec2) +dif.sort=apply(dif,2,sort) +pvec=NA +for(i in 1:p1){ +pvec[i]<-(sum(dif[,i]<0)+.5*sum(dif[,i]==0))/nboot +if(pvec[i]>.5)pvec[i]<-1-pvec[i] +} +pvec<-2*pvec +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=matrix(NA,nrow=p1,ncol=3) +ci[,1]=c(0:p) +for(i in 1:p1){ +ci[i,2]=dif.sort[ilow,i] +ci[i,3]=dif.sort[ihi,i] +} +dimnames(ci)=list(NULL,c('Param','ci.low','ci.hi')) +if(plotit){ +reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab,regfun=regfun,...) +} +lvec='Intercept' +for(j in 2:p1)lvec=c(lvec,paste('slope',j-1)) +#pvec=array(pvec,dimnames=lvec) +est1=regfun(x1,y1,xout=FALSE,...)$coef +est2=regfun(x2,y2,xout=FALSE,...)$coef +list(n=n,n.keep=nk,param=lvec,p.values=pvec,est.grp1=est1,est.grp2=est2,conf.intervals=ci) +} + +Dancts<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE, +outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',pr=TRUE,...){ +# +# Compare the regression lines of two dependent groups using +# the robust regression indicated by the argument +# regfun. Default is modified Theil--Sen estimator +# +# Comparisons are done at specified design points +# This is a robust Johnson-Neyman method for dependent groups. +# +# For OLS, use Dancols +# Assume data are in x1 y1 x2 and y2 +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(SEED)set.seed(2) +FLAG=pts +X=elimna(cbind(x1,y1,x2,y2)) +if(ncol(X)>4)stop('Only one covariate is allowed') +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +if(xout){ +if(identical(outfun,outblp)){ +flag1=outblp(x1,y1,plotit=FALSE)$bad.lev +flag2=outblp(x2,y2,plotit=FALSE)$bad.lev +} +else{ +flag1=outfun(x1)$out.id +flag2=outfun(x2)$out.id +} +flag=unique(c(flag1,flag2)) +if(length(flag)>0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +flagF=identical(regfun,tsreg) +if(flagF){ +if(pr){ +if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +pr=FALSE +} +if(pr){ +if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +}} +if(is.null(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,9) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) +pts=x1[isub] +mat[,1]=pts +sqsd=difregYvar(x1,y1,x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) +est1=regYhat(x1,y1,xr=pts,regfun=regfun) #Note: if xout=T, leverage points already removed +est2=regYhat(x2,y2,xr=pts,regfun=regfun) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +pv=2*(1-pnorm(abs(tests))) +mat[,9]=pv +crit<-smmcrit(Inf,5) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +if(!is.null(FLAG)){ +n1=1 +n2=1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),9) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) +mat[,1]<-pts +sqsd=difregYvar(x1,y1,x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) +est1=regYhat(x1,y1,xr=pts,regfun=regfun) +est2=regYhat(x2,y2,xr=pts,regfun=regfun) +mat[,2]=est1 +mat[,3]=est2 +est=est1-est2 +mat[,4]=est +sd=sqrt(sqsd) +mat[,6]=sd +tests=(est1-est2)/sd +mat[,5]=tests +pv=2*(1-pnorm(abs(tests))) +mat[,9]=pv +crit<-smmcrit(Inf,length(pts)) +mat[,7]=est-crit*sd +mat[,8]=est+crit*sd +} +if(plotit){ +#if(xout){ #Leverage points already removed if xout=TRUE +#flag<-outfun(x1,...)$keep +#x1<-x1[flag] +#y1<-y1[flag] +#flag<-outfun(x2,...)$keep +#x2<-x2[flag] +#y2<-y2[flag] +#} +plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) +points(x1,y1,pch='o') +points(x2,y2,pch='+') +abline(regfun(x1,y1)$coef) +abline(regfun(x2,y2)$coef,lty=2) +} +list(output=mat) +} + + +tshd<-function(x,y,HD=TRUE,plotit=FALSE,xlab='X',ylab='Y',OPT=FALSE,tr=FALSE){ +# +# Compute the Theil-Sen regression estimator. +# Only a single predictor is allowed in this version +# +# HD=TRUE, use Harrell-Davis for slopes +# HD=FALSE, use usual median +# +# OPT=TRUE, compute the intercept using median(y)-b_1median(X) +# OPT=FALSE compute the intercept using median of y-b_1X +# +# +temp<-matrix(c(x,y),ncol=2) +temp<-elimna(temp) # Remove any pairs with missing values +x<-temp[,1] +y<-temp[,2] +ord<-order(x) +xs<-x[ord] +ys<-y[ord] +vec1<-outer(ys,ys,'-') +vec2<-outer(xs,xs,'-') +v1<-vec1[vec2>0] +v2<-vec2[vec2>0] +if(!HD)slope<-median(v1/v2,na.rm=TRUE) +if(HD)slope<-hd(v1/v2,na.rm=TRUE,tr=tr) +res=y-slope*x +if(!OPT)int=hd(res,tr=tr) +if(OPT)int=hd(y,na.rm=TRUE)-slope*hd(x,na.rm=TRUE,tr=tr) +coef=c(int,slope) +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab) +abline(coef) +} +list(coef=coef) +} + +tshdreg<-function(x,y,HD=TRUE,xout=FALSE,outfun=out,iter=5,varfun=pbvar,tr=FALSE,do.stre=TRUE, +corfun=pbcor,plotit=FALSE,tol=.0001,RES=TRUE,OPT=FALSE,xlab='X',ylab='Y',...){ +# +# Compute Theil-Sen regression estimator +# +# Use back-fitting +# when there is more than one predictor +# and estimate intercept using Harrel-Davis estimator +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(ncol(x)==1){ +temp1<-tshd(x,y,HD=HD,plotit=plotit,xlab=xlab,ylab=ylab,OPT=OPT,tr=tr) +coef<-temp1$coef +res<-y-coef[2]*x-coef[1] +} +if(ncol(x)>1){ +for(p in 1:ncol(x)){ +temp[p]<-tshd(x[,p],y)$coef[2] +} +res<-y-x%*%temp +alpha<-hd(res) +r<-matrix(NA,ncol=ncol(x),nrow=nrow(x)) +tempold<-temp +for(it in 1:iter){ +for(p in 1:ncol(x)){ +r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p] +temp[p]<-tshd(x[,p],r[,p],plotit=FALSE,tr=tr)$coef[2] +} +if(max(abs(temp-tempold))0){ +e.pow<-varfun(yhat)/varfun(y) +if(!is.na(e.pow)){ +if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2 +e.pow=as.numeric(e.pow) +stre=sqrt(e.pow) +}}} +if(!RES)res=NULL +list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow,residuals=res) +} + + +ltsreg<-function(x,y,tr=.5,xout=FALSE,outfun=outpro,STAND=TRUE,...){ +# +# Leasts trimmed squares regression via the function ltsReg in the +# R package robustbase +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +library(robustbase) +temp=ltsReg(y~x,alpha=1-tr) +#coef=ltsReg(y~x)[8]$coefficients +coef=temp[8]$coefficients +res=temp[7]$raw.resid +list(coef=coef,residuals=res) +} + +# For convenience when doing robust ridge regression: +ltsreg.2<-function(x,y,tr=.2,xout=FALSE,outfun=outpro,STAND=TRUE,...){ +# +# Leasts trimmed squares regression via the function ltsReg in the +# R package robustbase +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +library(robustbase) +temp=ltsReg(y~x,alpha=1-tr) +coef=temp[8]$coefficients +res=temp[7]$raw.resid +list(coef=coef,residuals=res) +} + + + + +DregG<-function(x1,y1,x2,y2,nullv=NULL,regfun=tshdreg,nboot=500,xout=FALSE,outfun=outpro, +SEED=TRUE,plotit=FALSE,pr=TRUE,...){ +# +# Global test that two dependent groups have identical +# regression parameters. +# +# Use a variation of Hotelling's test coupled with a bootstrap +# estimate of the relevant covariance matrix associated with the differences +# in the estimates of the parameters.# For OLS, use DregGOLS +# +# (plotit=F is used so that in simulations, if xout=T, the seed is not +# set everytime outpro is called.) +# +if(SEED)set.seed(2) +X=elimna(cbind(x1,y1,x2,y2)) +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +if(is.null(nullv))nullv=rep(0,p1) +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +n=length(y1) +if(xout){ +flag1=outfun(x1,...)$out.id +flag2=outfun(x2,...)$out.id +flag=unique(c(flag1,flag2)) +if(length(flag)>0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +x1=as.matrix(x1) +x2=as.matrix(x2) +flagF=FALSE +flagF1=identical(regfun,tsreg) +flagF1[2]=identical(regfun,tshdreg) +#flagF1[3]=identical(regfun,tshdreg_C) obsolete,now it causes an error +if(sum(flagF1)>0)flagF=TRUE +if(!flagF){if(pr){ +if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +pr=FALSE +} +if(pr){ +if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +}} +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +bvec1<-apply(data,1,regboot,x1,y1,regfun=regfun,xout=FALSE,...) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +bvec2<-apply(data,1,regboot,x2,y2,regfun=regfun,xout=FALSE,...) +dif=t(bvec1-bvec2) +temp<-pdis(rbind(dif,nullv)) +sig.level<-sum(temp[nboot+1]0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +flagF=FALSE +flagF1=identical(regfun,tsreg) +flagF1[2]=identical(regfun,tshdreg) +#flagF1[3]=identical(regfun,tshdreg_C) +if(sum(flagF1)>0)flagF=TRUE +if(!flagF){ +if(pr){ +if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +pr=FALSE +} +if(pr){ +if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +}} +x1=as.matrix(x1) +x2=as.matrix(x2) +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +bvec1=mclapply(data,regbootMC,x1,y1,regfun,xout=FALSE,...) +bvec1=matl(bvec1) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +bvec2=mclapply(data,regbootMC,x2,y2,regfun,xout=FALSE,...) +bvec2=matl(bvec2) +dif=t(bvec1-bvec2) +temp<-pdisMC(rbind(dif,nullv)) +sig.level<-sum(temp[nboot+1]0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +pr=FALSE +} +if(pr){ +if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +}} +nk=length(y1) +x1=as.matrix(x1) +x2=as.matrix(x2) +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +bvec1<-mclapply(data,regboot,x1,y1,regfun,mc.preschedule=TRUE,xout=FALSE,...) +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +bvec2<-mclapply(data,regboot,x2,y2,regfun,mc.preschedule=TRUE,xout=FALSE,...) +bvec1=matl(bvec1) +bvec2=matl(bvec2) +dif=t(bvec1)-t(bvec2) +dif.sort=apply(dif,2,sort) +pvec=NA +for(i in 1:p1){ +pvec[i]<-(sum(dif[,i]<0)+.5*sum(dif[,i]==0))/nboot +if(pvec[i]>.5)pvec[i]<-1-pvec[i] +} +pvec<-2*pvec +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=matrix(NA,nrow=p1,ncol=3) +ci[,1]=c(0:p) +for(i in 1:p1){ +ci[i,2]=dif.sort[ilow,i] +ci[i,3]=dif.sort[ihi,i] +} +dimnames(ci)=list(NULL,c('Param','ci.low','ci.hi')) +if(plotit){ +reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab,regfun=regfun,...) +} +lvec='Intercept' +for(j in 2:p1)lvec=c(lvec,paste('slope',j-1)) +#pvec=array(pvec,dimnames=lvec) +est1=regfun(x1,y1,xout=FALSE,...)$coef +est2=regfun(x2,y2,xout=FALSE,...)$coef +list(n=n,n.keep=nk,param=lvec,p.values=pvec,est.grp1=est1,est.grp2=est2,conf.intervals=ci) +} + +qcipb<-function(x,q=.5,alpha=.05,nboot=2000,SEED=TRUE,nv=0,...){ +# +# Compute a bootstrap, .95 confidence interval for the +# qth quantile via the Harrell--Davis estimator. +# +# Default is q=.5, meaning a confidence interval for the median is +# computed. +# +# Appears to be best method when there are tied values +# +# nv=null value when computing a p-value +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +x=elimna(x) +data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,hd,q=q) +bvec<-sort(bvec) +low<-round((alpha/2)*nboot) +up<-nboot-low +low<-low+1 +pv=mean(bvec>nv)+.5*mean(bvec==nv) +pv=2*min(c(pv,1-pv)) +estimate=hd(x,q=q) +list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv) +} +Qreg<-function(x,y,q=.5,xout=FALSE,outfun=outpro,res.vals=TRUE,plotit=FALSE,xlab='X',ylab='Y',pch='*',...){ +# +# Quantile regression. Like the function qreg, but avoids computational +# problems that can arise when there are tied values among the dependent +# variable +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +xx=as.matrix(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +init=ols(x,y)$coef +v=optim(init,qfun,x=x,y=y,q=q,method='BFGS')$par +p1=ncol(x)+1 +res=NULL +if(res.vals)res<-y-x%*%v[2:p1]-v[1] +if(ncol(x)==1){ +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab,pch=pch) +abline(v) +}} +list(coef=v,residuals=res) +} +qfun<-function(x,y,coef,q){ +x=as.matrix(x) +p1=ncol(x)+1 + r=y-coef[1]-x%*%coef[2:p1] + rhoq=sum(r*(q-as.numeric((r<0)))) + s=sum(rhoq) + s + } +Rcoefalpha<-function(x,cov.fun=wincov,pr=FALSE,...){ +# Compute coefficient alpha plus a robust analog) +# +# x is assumed to be a matrix +# output: +# coefficient alpha plus robust version +# +# NOTE: now use cov.fun=wincov by default. Use skipcov in earlier version, +# But it might not be computable. +# +# Possible choices for cov.fun: +# skipcov +# tbscov +# covout +# covogk +# mgvcov +# mvecov +# mcdcov +# wincov +# bicovM +# +x=elimna(x) +x=as.matrix(x) +mcor=winall(x,tr=0)$cov +term=sum(mcor) +diag(mcor)=0 +term1=sum(mcor) +k=ncol(x) +lam=k*term1/(k-1) +res1=lam/term +# +mcor=cov.fun(x,...) +term=sum(mcor) +diag(mcor)=0 +term1=sum(mcor) +k=ncol(x) +lam=k*term1/(k-1) +lam=lam/term +list(coef.alpha=res1,robust.alpha=lam) +} + +Dancovamp<-function(x1,y1,x2=NULL,y2,fr1=1,fr2=1,tr=0.2,alpha=0.05, pts=NULL,SEED=TRUE,DIF=TRUE,cov.fun=skipcov,...){ +# +# Compare two dependent groups using a nonparametric ANCOVA method. +# Multiple covariates are allowed. +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# Design points are chosen based on depth of points in x1 if pts=NULL +# Assume data are in x1 y1 x2 and y2 +# +# Choices for cov.fun include +# skipcov +# tbscov +# covogk +# mgvcov +# mvecov +# mcdcov +# wincov +# +if(is.null(x2))x2=x1 +flag=identical(cov.fun,cov.mve) +if(flag)if(SEED)set.seed(2) # now cov.mve always returns same result +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 should have same number of columns') +if(ncol(x1)==1)stop('For one covariate, use Dancova') +if(nrow(x1)!=nrow(x2))stop('x1 and x2 should have same number of rows') +if(length(y1)!=length(y2))stop('y1 and y2 should have same length') +p=ncol(x1) +p1=p+1 +m1=elimna(cbind(x1,y1,x2,y2)) +x1=m1[,1:p] +y1=m1[,p1] +p2=p1+1 +p3=p2+p-1 +p4=p3+1 +x2=m1[,p2:p3] +y2=m1[,p4] +if(is.null(pts[1])){ +x1<-as.matrix(x1) +x2<-as.matrix(x2) +pts<-ancdes(x1) +} +pts<-as.matrix(pts) +flag<-rep(TRUE,nrow(pts)) +if(!DIF){ +mat<-matrix(NA,nrow(pts),10) +dimnames(mat)<-list(NULL,c('n','est1','est2','DIF','TEST','se','ci.low','ci.hi','p.value','p.adj')) +} +if(DIF){ +mat<-matrix(NA,nrow(pts),8) +dimnames(mat)<-list(NULL,c('n','DIF','TEST','se','ci.low','ci.hi','p.value','p.adj')) +} +n<-1 +vecn<-1 +mval1<-cov.funl(cov.fun(x1,...)) +mval2<-cov.funl(cov.fun(x2,...)) +for(i in 1:nrow(pts)){ +t1=near3d(x1,pts[i,],fr1,mval1) +t2=near3d(x2,pts[i,],fr2,mval2) +pick=as.logical(t1*t2) +n[i]<-length(y1[pick]) +if(n[i]<5)flag[i]<-FALSE +if(n[i]>=5){ +if(!DIF){ +test<-yuend(y1[pick],y2[pick],tr=tr,alpha=alpha) +mat[i,2]=test$est1 +mat[i,3]=test$est2 +mat[i,4]=test$dif +mat[i,5]=test$teststat +mat[i,6]=test$se +mat[i,7]=test$ci[1] +mat[i,8]=test$ci[2] +mat[i,9]=test$p.value +} +if(DIF){ +test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE,alpha=alpha) +mat[i,2]=test$estimate +mat[i,3]=test$test.stat +mat[i,4]=test$se +mat[i,5]=test$ci[1] +mat[i,6]=test$ci[2] +mat[i,7]=test$p.value +} +} +mat[i,1]<-n[i] +} +if(!DIF)mat[,10]=p.adjust(mat[,9],method='hoch') +if(DIF)mat[,8]=p.adjust(mat[,7],method='hoch') +if(sum(flag)==0)print('No comparable design points found, might increase span.') +list(pts=pts,output=mat) +} + + +cov.funl<-function(m){ +list(cov=m) +} +rplotCIS<-function(x,y,tr=.2,fr=.8,plotit=TRUE,scat=TRUE,pyhat=FALSE,SEED=TRUE,dfmin=8, +eout=FALSE,xout=FALSE,xlab='x',ylab='y',outfun=out,LP=TRUE,alpha=.05,pch='.',...){ +# +# A simple method for computing a confidence band based on +# running interval smoother and a trimmed mean. +# +# rplotCI adjusts the band so that FWE=1-alpha +# +# LP=TRUE, the plot is further smoothed via lowess +# +# fr controls amount of smoothing +plotit<-as.logical(plotit) +scat<-as.logical(scat) +str=rplot(x,y,tr=tr,xout=xout,plotit=FALSE,LP=LP,fr=fr,pr=FALSE)$Strength.Assoc +m<-cbind(x,y) +if(ncol(m)>2)stop('Only one independent variable can be used') +m<-elimna(m) +nv=nrow(m) +if(eout && xout)stop('Not allowed to have eout=xout=T') +if(eout){ +flag<-outfun(m,plotit=FALSE)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(m[,1])$keep +m<-m[flag,] +} +x=m[,1] +y=m[,2] +n.keep=length(y) +rmd<-c(1:length(x)) +for(i in 1:length(x))rmd[i]<-mean(y[near(x,x[i],fr=fr)],tr=tr) +sedf=runse(x,y,fr=fr,tr=tr,pts=x,SEED=SEED) +df=sedf$df +flag=df>dfmin +se=sedf$se +low=rmd[flag]-qt(1-alpha/2,df[flag])*se[flag] +up=rmd[flag]+qt(1-alpha/2,df[flag])*se[flag] +rmd=rmd[flag] +x=x[flag] +y=y[flag] +if(plotit){ +ord=order(x) +x=x[ord] +rmd=rmd[ord] +up=up[ord] +low=low[ord] +if(LP){ +rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +up=lplot(x,up,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +low=lplot(x,low,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +} +if(scat){ +plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type='n') +lines(x,up,lty=2) +lines(x,low,lty=2) +points(x,y,pch=pch) +} +if(!scat)plot(c(x,x),c(y,rmd),type='n',ylab=ylab,xlab=xlab) +points(x,rmd,type='n') +sx<-sort(x) +xorder<-order(x) +sysm<-rmd[xorder] +lines(sx,sysm) +lines(x,up,lty=2) +lines(x,low,lty=2) +} +if(pyhat){output<-cbind(x,rmd,low,up) +dimnames(output)=list(NULL,c('x','y.hat','ci.low','ci.up')) +} +if(!pyhat)output<-'Done' +list(output=output,str=str,n=nv,n.keep=n.keep) +} +runse<-function(x,y,fr=1,tr=.2,pts=x,RNA=FALSE,outfun=out,xout=FALSE,SEED=TRUE){ +# +# Estimate SE of Yhat when using a running interval smooth +# based on a trimmed mean. +# fr controls amount of smoothing +# +# Missing values are automatically removed. +# +# RNA=F, do not remove missing values when averaging +# (computing the smooth) at x +# xout=T removes points for which x is an outlier +# +if(SEED)set.seed(2) +temp<-cbind(x,y) +if(ncol(temp)>2)stop(' 1 predictor only is allowed') +temp<-elimna(temp) # Eliminate any rows with missing values +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +temp<-temp[flag,] +} +x<-temp[,1] +y<-temp[,2] +pts<-as.matrix(pts) +vals<-NA +WSE=NA +df=NA +h=NA +for(i in 1:length(pts)){ +ysub=y[near(x,pts[i],fr)] +v=trimse(ysub,tr=tr,na.rm=TRUE) +if(is.na(v))v=0 +if(v>0){ +WSE[i]=trimse(ysub,tr=tr,na.rm=TRUE) +df[i]=length(ysub)-2*floor(tr*length(ysub))-1 +} +if(v==0){ +df[i]=0 +WSE[i]=0 +}} +list(se=WSE,df=df) +} + +rplotpbCI<-function(x,y,est=onestep,fr=1,plotit=TRUE,scat=TRUE,pyhat=FALSE, +xout=FALSE,xlab='x',ylab='y',outfun=out,LP=TRUE,alpha=.05, +nboot=500,SEED=TRUE,...){ +# +# running interval smoother based on any measure of location +# Unlike rplotCI, uses a percentile bootstrap +# method to get a confidence band +# +# LP=TRUE, the plot is further smoothed via lowess +# +# fr controls amount of smoothing +plotit<-as.logical(plotit) +scat<-as.logical(scat) +m<-cbind(x,y) +if(ncol(m)>2)stop('Only one independent variable can be used') +m<-elimna(m) +x=m[,1] +y=m[,2] +if(xout){ +flag<-outfun(m[,1])$keep +m<-m[flag,] +} +x=m[,1] +y=m[,2] +low=rep(NA,length(y)) +up=rep(NA,length(y)) +rmd<-NA +for(i in 1:length(x)){ +sel=y[near(x,x[i],fr)] +temp=onesampb(sel,est=est,nboot=nboot,alpha=alpha,SEED=SEED,...) +low[i]=temp$ci[1] +up[i]=temp$ci[2] +rmd[i]=temp$estimate +} +all=elimna(cbind(x,low,up,y,rmd)) +x=all[,1] +low=all[,2] +up=all[,3] +y=all[,4] +rmd=all[,5] +if(plotit){ +ord=order(x) +x=x[ord] +y=y[ord] +rmd=rmd[ord] +up=up[ord] +low=low[ord] +if(LP){ +rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +up=lplot(x,up,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +low=lplot(x,low,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +} +if(scat){ +plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type='n') +points(x,y) +lines(x,up,lty=2) +lines(x,low,lty=2) +} +if(!scat)plot(c(x,x),c(y,rmd),type='n',ylab=ylab,xlab=xlab) +points(x,rmd,type='n') +sx<-sort(x) +xorder<-order(x) +sysm<-rmd[xorder] +lines(sx,sysm) +lines(x,up,lty=2) +lines(x,low,lty=2) +} +if(pyhat)output<-rmd +if(!pyhat)output<-'Done' +list(output=output) +} + +Danctspb<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,SCAT=TRUE, +outfun=outpro,BLO=FALSE,nboot=500,SEED=TRUE,xlab='X',ylab='Y',pr=TRUE,eout=FALSE,...){ +# +# Compare the regression lines of two dependent groups at specified design points using +# the robust regression estimator indicated by the argument +# regfun. Default is modified Theil--Sen estimator +# +# Comparisons are done at specified design points +# This is a robust Johnson-Neyman method for dependent groups. +# +# For OLS, use Dancols +# Assume data are in x1 y1 x2 and y2 +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +# Uses bootstrap samples based on resamples of the points followed by a regression fit. +# In contrast, Dancts uses bootstrap estimate of the se of Yhat followed by a pivotal test +# statistic. +# +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(SEED)set.seed(2) +FLAG=pts +X=elimna(cbind(x1,y1,x2,y2)) +if(ncol(X)>4)stop('Only one covariate is allowed') +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +if(xout){ +flag1=outfun(x1)$out.id +flag2=outfun(x2)$out.id +flag=unique(c(flag1,flag2)) +if(length(flag)>0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +flagF=identical(regfun,tsreg) +if(identical(regfun,tshdreg))flagF=FALSE +if(flagF){ +if(pr){ +if(sum(duplicated(y1)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') +pr=FALSE +} +if(pr){ +if(sum(duplicated(y2)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') +}} +if(is.null(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=x1[isub] +} +for(i in 1:length(pts)){ +n1<-1 +n2<-1 +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),7) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value')) +mat[,1]=pts +n=length(y1) +x1=as.matrix(x1) +x2=as.matrix(x2) +data<-matrix(sample(length(y1),size=n*nboot,replace=TRUE),nrow=nboot) +est1=apply(data,1,Danctspb.sub,x1,y1,xr=pts,regfun=regfun,xout=FALSE,...) +est2=apply(data,1,Danctspb.sub,x2,y2,xr=pts,regfun=regfun,xout=FALSE,...) +mat[,2]=regYhat(x1,y1,xr=pts,regfun=regfun,...) +mat[,3]=regYhat(x2,y2,xr=pts,regfun=regfun,...) +est=est1-est2 +if(!is.matrix(est))est=matrix(est,nrow=1) +mat[,4]=mat[,2]-mat[,3] +pv1=apply(est<0,1,mean,na.rm=TRUE) +pv2=apply(est==0,1,mean,na.rm=TRUE) +pv=pv1+.5*pv2 +pv1m=1-pv +pv=2*apply(cbind(pv,pv1m),1,min) +mat[,7]=pv +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +estsort=apply(est,1,sort) +mat[,5]=estsort[ilow,] +mat[,6]=estsort[ihi,] +if(plotit){ +if(eout && xout)stop('Cannot have both eout and xout = F') +if(eout){ +flag<-outfun(cbind(x1,y1),plotit=FALSE,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(cbind(x2,y2),plotit=FALSE,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) +if(SCAT)points(x1,y1,pch='o') +if(SCAT)points(x2,y2,pch='+') +abline(regfun(x1,y1)$coef) +abline(regfun(x2,y2)$coef,lty=2) +} +list(output=mat) +} + +Danctspb.sub<-function(data,x,y,xr,regfun,...){ +x=as.matrix(x) +yhat=regYhat(x[data,],y[data],xr=xr,regfun=regfun,...) +yhat +} + +DanctspbMC<-function(x1,y1,x2,y2,pts=NULL,regfun=tshdreg,fr1=1,fr2=1,alpha=.05,SCAT=TRUE, +plotit=TRUE,xout=FALSE,outfun=outpro,nboot=500,SEED=TRUE,xlab='X',ylab='Y',WARN=FALSE,pr=TRUE,eout=FALSE,...){ +# +# Compare the regression lines of two dependent groups at specified design points using +# the robust regression estimator indicated by the argument +# regfun. Default is modified Theil--Sen estimator +# +# Similar to Dancts, which uses a bootstrap estimate of se of Y hat +# Here, do bootstrap based on bootstrap samples from the data +# as done for example by regci +# +# Comparisons are done at specified design points +# This is a robust Johnson-Neyman method for dependent groups. +# +# For OLS, use Dancols +# Assume data are in x1 y1 x2 and y2 +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(SEED)set.seed(2) +FLAG=pts +X=elimna(cbind(x1,y1,x2,y2)) +if(ncol(X)>4)stop('Only one covariate is allowed') +x1=as.matrix(x1) +x2=as.matrix(x2) +p=ncol(x1) +p1=p+1 +p2=p+2 +p3=p1+p +p4=p3+1 +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +if(xout){ +flag1=outfun(x1)$out.id +flag2=outfun(x2)$out.id +flag=unique(c(flag1,flag2)) +if(length(flag)>0)X=X[-flag,] +x1=X[,1:p] +y1=X[,p1] +x2=X[,p2:p3] +y2=X[,p4] +} +flagF=identical(regfun,tsreg) +if(identical(regfun,tshdreg))flagF=FALSE +if(flagF){ +if(pr){ +if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +pr=FALSE +} +if(pr){ +if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg') +}} +if(is.null(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=x1[isub] +} +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),7) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value')) +mat[,1]=pts +n=length(y1) +x1=as.matrix(x1) +x2=as.matrix(x2) +data<-matrix(sample(length(y1),size=n*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +library(parallel) +est1=mclapply(data,Danctspb.sub,x1,y1,xr=pts,regfun=regfun,xout=FALSE,...) +est2=mclapply(data,Danctspb.sub,x2,y2,xr=pts,regfun=regfun,xout=FALSE,...) +est1=matl(est1) +est2=matl(est2) +mat[,2]=regYhat(x1,y1,xr=pts,regfun=regfun,...) +mat[,3]=regYhat(x2,y2,xr=pts,regfun=regfun,...) +est=est1-est2 +if(!is.matrix(est))est=matrix(est,nrow=1) +mat[,4]=mat[,2]-mat[,3] +pv1=apply(est<0,1,mean,na.rm=TRUE) +pv2=apply(est==0,1,mean,na.rm=TRUE) +pv=pv1+.5*pv2 +pv1m=1-pv +pv=2*apply(cbind(pv,pv1m),1,min) +mat[,7]=pv +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +estsort=apply(est,1,sort) +mat[,5]=estsort[ilow,] +mat[,6]=estsort[ihi,] +if(plotit){ +if(eout && xout)stop('Cannot have both eout and xout = F') +if(eout){ +flag<-outfun(cbind(x1,y1),plotit=FALSE,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(cbind(x2,y2),plotit=FALSE,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) +if(SCAT)points(x1,y1,pch='o') +if(SCAT)points(x2,y2,pch='+') +abline(regfun(x1,y1)$coef) +abline(regfun(x2,y2)$coef,lty=2) +} +list(output=mat) +} + +Danctspb.sub<-function(data,x,y,xr,regfun,...){ +x=as.matrix(x) +yhat=regYhat(x[data,],y[data],xr=xr,regfun=regfun,...) +yhat +} + +anctspb<-function(x1,y1,x2,y2,pts=NULL,regfun=tshdreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=outpro,nboot=500,SEED=TRUE,xlab='X',ylab='Y',...){ +# +# Compare the regression lines of two independent groups +# at specified design points using a robust regression estimator. +# +# Like ancts but uses +# a percentile bootstrap method is used. +# This might help when there are tied values among the dependent variable. +# +# Assume data are in x1 y1 x2 and y2 +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +if(SEED)set.seed(2) +FLAG=pts +xy=elimna(cbind(x1,y1)) +if(ncol(xy)>2)stop('Only one covariate is allowed') +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +if(ncol(xy)>2)stop('Only one covariate is allowed') +x2=xy[,1] +y2=xy[,2] +if(is.null(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,length(pts),9) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value')) +pts=x1[isub] +} +mat<-matrix(NA,length(pts),7) +dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value')) +mat[,1]<-pts +bvec1=matrix(NA,nrow=nboot,ncol=length(pts)) +bvec2=matrix(NA,nrow=nboot,ncol=length(pts)) +x1=as.matrix(x1) +x2=as.matrix(x2) +p1=ncol(x1)+1 +data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec1[ib,]=regYsub(x1[data[ib,],],y1[data[ib,]],pts,p1=p1,regfun=regfun,...) +} +data<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec2[ib,]=regYsub(x2[data[ib,],],y2[data[ib,]],pts,p1=p1,regfun=regfun,...) +} +dif=bvec10)stop('Duplicate ids detected in m1') +if(sum(duplicated(m2))>0)stop('Duplicate ids detected in m2') +for(i in 1:nrow(m1)){ +flag=duplicated(c(m1[i,id.col1],m2[,id.col2])) +if(sum(flag)==0){ +ic1=ic1+1 +idnm1[ic1]=i +} +if(sum(flag>0)){ +if(is.data.frame(m1)){ +if(!is.null(dim(M1)))M1=rbind(M1,as.data.frame(m1[i,])) +if(is.null(dim(M1)))M1=as.data.frame(m1[i,]) +} +if(!is.data.frame(m1)){ +if(!is.null(dim(M1)))M1=rbind(M1,m1[i,]) +if(is.null(dim(M1)))M1=matrix(m1[i,],nrow=1) +} +}} +M2=NULL +for(i in 1:nrow(m2)){ +flag=duplicated(c(m2[i,id.col2],m1[,id.col1])) +if(sum(flag)==0){ +ic2=ic2+1 +idnm2[ic2]=i +} +if(sum(flag>0)){ +if(is.data.frame(m2)){ +if(!is.null(dim(M2)))M2=rbind(M2,as.data.frame(m2[i,])) +if(is.null(dim(M2)))M2=as.data.frame(m2[i,]) +} +if(!is.data.frame(m2)){ +if(!is.null(dim(M2)))M2=rbind(M2,m2[i,]) +if(is.null(dim(M2)))M2=matrix(m2[i,],nrow=1) +} +}} +m=cbind(M1[,id.col1],M1[,-id.col1],M2[,-id.col2]) +nc1=ncol(m2)-1 +m1u=NULL +if(!is.null(idnm1))m1u=m1[idnm1,] +m2u=NULL +if(!is.null(idnm2))m2u=m2[idnm2,] +list(m=m,idnm1=idnm1,idnm2=idnm2,m1.no=m1u,m2.no=m2u) +} + +regcits<-function(x,y,regfun=tshdreg,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE, +xout=FALSE,outfun=outpro,plotit=FALSE,xlab='Predictor 1',ylab='Predictor 2', +MC=TRUE,...){ +if(MC)v=regciMC(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=pr,xout=xout, +outfun=outfun,plotit=plotit,xlab=xlab,ylab=ylab,...) +if(!MC)v=regci(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=pr,xout=xout, +outfun=outfun,plotit=plotit,xlab=xlab,ylab=ylab,...) +v +} + +qhdsm<-function(x,y,qval=0.5,q=NULL,pr=FALSE, +xout=FALSE,outfun=outpro,plotit=TRUE,xlab='X',ylab='Y',zlab='Z',pyhat=FALSE,fr=NULL,LP=FALSE,theta=50,phi=25,ticktype='simple',nmin=0,scale=TRUE,pr.qhd=TRUE,pch='.',...){ +# +# Compute the quantile regression line for one or more quantiles +# using combination of hd, running interval smoother and LOESS +# That is, determine the qth (qval) quantile of Y given X using the +# +# plotit=TRUE will plot the lines. WIth p=1 predictor, multiple lines can be plotted. +# Example: qhdsm(x,y,q=c(.25,.5,.75)) will plot the regression lines for +# predicting quartiles. + # +if(!is.null(q))qval=q +x<-as.matrix(x) +X<-cbind(x,y) +X<-elimna(X) +np<-ncol(X) +p<-np-1 +if(p>1 & length(q)>1)print('Only first quantile specified can be plotted') +x<-X[,1:p] +x<-as.matrix(x) +y<-X[,np] +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(p==1){ +if(is.null(fr))fr=.8 +ord=order(x) +x=sort(x) +y=y[ord] +est=matrix(NA,ncol=3,nrow=length(qval)) +dimnames(est)=list(NULL,c('q','Inter','Slope')) +#x<-as.matrix(x) +qest=matrix(NA,ncol=length(qval),nrow=length(y)) +for(j in 1:length(qval)){ +rmd=NA +for(i in 1:length(x))rmd[i]<-hd(y[near(x,x[i],fr)],q=qval[j]) +if(LP)rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +qest[,j]=rmd +} +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab,pch=pch) +for(j in 1:ncol(qest))lines(x,qest[,j]) +} +if(!pyhat)qest='DONE' +} +if(p>1){ +if(is.null(fr))fr=1 +if(p==2){ +if(pr.qhd){ +if(!scale)print('scale=F is specified. If there is dependence, might want to use scale=TRUE') +}} +qest=rplot(x,y,est=hd,q=qval[1],fr=fr,plotit=plotit,pyhat=pyhat,theta=theta, +phi=phi,scale=scale,SEED=FALSE,varfun=pbvar,xlab=xlab,ylab=ylab,zlab=zlab, +ticktype=ticktype,nmin=nmin,pr=pr) +if(!pyhat)qest='DONE' +if(pyhat)qest=qest$yhat +} +qest +} +skip.cov<-function(x,cop = 6, MM = FALSE, op = 1, mgv.op = 0, outpro.cop = 3, + STAND = FALSE){ +ans=skipcov(x,cop=cop,MM=MM,op=op,mgv.op=mgv.op,outpro.cop=outpro.cop,STAND=STAND) +list(cov=ans) +} + +skipSPR<-function(x,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,pr=FALSE){ +v=skip(x,pr=pr,STAND=TRUE,cop=cop,op=op,mgv.op=mgv.op,outpro.cop=outpro.cop) +v +} +rmdzeroG<-function(x,est=skipSPR,grp=NA,nboot=500,SEED=TRUE,...){ +# +# Do ANOVA on dependent groups +# using # depth of zero among bootstrap values +# based on difference scores. +# +# Like rmdzero, only designed for multivariate estimators such as +# computed by the R functions, skip and dmean for example. +# +# The data are assumed to be stored in x in list mode +# or in a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, columns correspond to groups. +# +# grp is used to specify some subset of the groups, if desired. +# By default, all J groups are used. +# +# The default number of bootstrap samples is nboot=500 +# +if(!is.list(x) && !is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +if(is.list(x)){ +# put the data in an n by J matrix +mat<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))mat[,j]<-x[[j]] +} +if(is.matrix(x))mat<-x +if(!is.na(grp[1])){ +mat<-mat[,grp] +} +FLAG=FALSE +if(ncol(mat)<3)FLAG=TRUE +#if(ncol(mat)<3)stop('This function is for three or more measures of location') +mat<-elimna(mat) # Remove rows with missing values. +J<-ncol(mat) +jp<-0 +Jall<-(J^2-J)/2 +dif<-matrix(NA,nrow=nrow(mat),ncol=Jall) +ic<-0 +for(j in 1:J){ +for(k in 1:J){ +if(j.5)stop('tr must be between 0 and .5') +res=yuend(x=x,y=y,tr=tr,alpha=alpha) +# +#if(tr==0)term=1 +#if(tr>0)term=sqrt(area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr) +#epow=(res$dif-null.value)*term/sqrt(winvar(x-y,tr=tr,na.rm=TRUE)) +epow=yuenv2(x,y,tr=tr)$Effect.Size +list(ci=res$ci,p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,se=res$se, +teststat=res$teststat,n=res$n,df=res$df,Effect.Size=epow) +} +qhdsm2g<-function(x1,y1,x2,y2,q=.5,qval=NULL,LP=TRUE,fr=.8,xlab='X',ylab='Y',xout=FALSE,outfun=outpro,...){ +# +# Plot of quantile smoother for two groups using qhdsm +# +# fr controls amount of smoothing +# Missing values are automatically removed. +# +if(!is.null(qval))q=qval +m1<-elimna(cbind(x1,y1)) +if(ncol(m1)>3)stop('One covariate only is allowed') +m2<-elimna(cbind(x2,y2)) +x1<-m1[,1] +y1<-m1[,2] +x2<-m2[,1] +y2<-m2[,2] +if(xout){ +flag<-outfun(m1[,1],plotit=FALSE,...)$keep +m1<-m1[flag,] +x1<-m1[,1] +y1<-m1[,2] +flag<-outfun(m2[,1],plotit=FALSE,...)$keep +m2<-m2[flag,] +x2<-m2[,1] +y2<-m2[,2] +} +flag=order(x1) +x1=x1[flag] +y1=y1[flag] +flag=order(x2) +x2=x2[flag] +y2=y2[flag] +rmd1=NA +rmd2=NA +for(i in 1:length(x1))rmd1[i]<-hd(y1[near(x1,x1[i],fr)],q=q) +for(i in 1:length(x2))rmd2[i]<-hd(y2[near(x2,x2[i],fr)],q=q) +if(LP){ +rmd1=lplot(x1,rmd1,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +rmd2=lplot(x2,rmd2,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +} +plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab) +points(x1,y1) +points(x2,y2,pch='+') +lines(x1,rmd1) +lines(x2,rmd2,lty=2) +} + + + +ancGLOB_sub3<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100, +nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,xlab='X',ylab='Y',LP=TRUE,...){ +# +# +if(SEED)set.seed(2) +x1<-as.matrix(x1) +p1<-ncol(x1)+1 +p<-ncol(x1) +if(p>1)stop('Current version is for one independent variable only') +xy<-cbind(x1,y1) +xy<-elimna(xy) +x1<-xy[,1:p] +y1<-xy[,p1] +xy<-cbind(x2,y2) +xy<-elimna(xy) +x2<-xy[,1:p] +y2<-xy[,p1] +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +N1=length(y1) +N2=length(y2) +if(is.null(pts[1])){ +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=nmin]) +isub[5]<-max(sub[vecn>=nmin]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=x1[isub] +g1=list() +g2=list() +for (i in 1:5){ +g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)] +g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)] +}} +if(!is.null(pts[1])){ +if(length(pts)<2)stop('Should have at least two points (With one point, use the R function ancova)') +g1=list() +g2=list() +for (i in 1:length(pts)){ +g1[[i]]<-y1[near(x1,pts[i],fr1)] +g2[[i]]<-y2[near(x2,pts[i],fr2)] +} +} +n1=lapply(g1,length) +nv=(min(as.vector(matl(n1)))) +res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,...) +if(plotit)runmean2g(x1,y1,x2,y2,nboot=nboot,fr=fr1,est=est,xout=xout,LP=LP,...) +list(p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2) +} + + + + + + + + +ancGLOB_sub4<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100, +nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,xlab='X',ylab='Y',LP=TRUE,...){ +# +# +if(SEED)set.seed(2) +x1<-as.matrix(x1) +p1<-ncol(x1)+1 +p<-ncol(x1) +if(p>1)stop('Current version is for one independent variable only') +xy<-cbind(x1,y1) +xy<-elimna(xy) +x1<-xy[,1:p] +y1<-xy[,p1] +xy<-cbind(x2,y2) +xy<-elimna(xy) +x2<-xy[,1:p] +y2<-xy[,p1] +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +N1=length(y1) +N2=length(y2) +if(is.null(pts[1])){ +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=nmin]) +isub[5]<-max(sub[vecn>=nmin]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=x1[isub] +g1=list() +g2=list() +for (i in 1:5){ +g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)] +g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)] +}} +if(!is.null(pts[1])){ +if(length(pts)<2)stop('Should have at least two points (use the R function ancova)') +g1=list() +g2=list() +for (i in 1:length(pts)){ +g1[[i]]<-y1[near(x1,pts[i],fr1)] +g2[[i]]<-y2[near(x2,pts[i],fr2)] +}} +n1=lapply(g1,length) +nv=(min(as.vector(matl(n1)))) +res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,...) +if(plotit)runmean2g(x1,y1,x2,y2,nboot=nboot,fr=fr1,est=est,xout=xout,LP=LP,...) +list(p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2) +} + + + + + + + + +ancGLOB_sub5<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100, +nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,xlab='X',ylab='Y',LP=TRUE,...){ +# +# +if(is.null(pts))stop('pts should be specified') +if(SEED)set.seed(2) +x1<-as.matrix(x1) +p1<-ncol(x1)+1 +p<-ncol(x1) +if(p>1)stop('Current version is for one independent variable only') +xy<-cbind(x1,y1) +xy<-elimna(xy) +x1<-xy[,1:p] +y1<-xy[,p1] +xy<-cbind(x2,y2) +xy<-elimna(xy) +x2<-xy[,1:p] +y2<-xy[,p1] +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +N1=length(y1) +N2=length(y2) +if(length(pts)<2)stop('Should have at least two points (With one point, use the R function ancova)') +g1=list() +g2=list() +for (i in 1:length(pts)){ +g1[[i]]<-y1[near(x1,pts[i],fr1)] +g2[[i]]<-y2[near(x2,pts[i],fr2)] +} +n1=lapply(g1,length) +nv=(min(as.vector(matl(n1)))) +res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,nmin=nmin,...)$p.value +res +} + + +ancGLOB_pv<-function(n1,n2,est=tmean,fr1=.8,fr2=.8,nboot=500,SEED=TRUE,iter=1000,nmin=12,MC=TRUE,alpha=.05,PRM=FALSE,pts=NULL,...){ +# +# Determine critical p-value when using the function ancGLOB +# Strategy: generage data from a normal distribution, NULL true +# compute p-value, repeat +# iter times (iter=100 is default) +# (a larger choice for iter is recommended. To reduce execution time use ancGLOB_pv_C +# +# returns: +# p.crit, the critical p-value for the specified alpha value +# if PRM=T, all p-values that were computed. +# ef.iter, the actual number of iterations, which might differ from iter +# due to sample sizes where it makes no sense to compute a p-value +# based on the generated data. +# +if(SEED)set.seed(45) +bvec=list() +np1=min(c(n1,n2))+1 +nmax=max(c(n1,n2)) +for(i in 1:iter){ +bvec[[i]]=rmul(nmax,p=4) +if(n1!=n2)bvec[[i]][np1:nmax,1:2]=NA +} +if(MC){ +library(parallel) +prm=mclapply(bvec,ancGLOB_sub2,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...) +} +if(!MC)prm=lapply(bvec,ancGLOB_sub2,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...) +prm=elimna(as.vector(matl(prm))) +ef.iter=length(prm) +p.crit=hd(prm,alpha) +prm=sort(elimna(prm)) +if(!PRM)prm=NULL +list(p.crit=p.crit,prm=prm,ef.iter=ef.iter) +} + +ancGLOB_sub2<-function(bvec,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nmin=12,nboot=nboot,pts=pts,...){ +p=ancGLOB_sub3(bvec[,1],bvec[,2],bvec[,3],bvec[,4],est=est,SEED=SEED,fr1=fr1,fr2=fr2,nboot=nboot, +plotit=FALSE,nmin=12,pts=pts,...)$p.value +p +} + + +ancGLOB_pv_pts<-function(x1,x2,est=tmean,fr1=1,fr2=1,nboot=500,SEED=TRUE,iter=1000,nmin=12,MC=TRUE,alpha=.05,PRM=FALSE,pts=NULL,...){ +# +# Determine critical p-value when using the function ancGLOB and pts is specified. +# Strategy: generage data from a normal distribution, NULL true +# compute p-value, repeat +# iter times (iter=1000 is default) +# +# pts is used to indicate the covariate values where comparisons are to be made. +# Example: pts=c(1,4,6) will compare regression lines at X=1, 4 and 6 +# if pts is not specified, the function terminates with an error. +# +# +# returns: +# p.crit, the critical p-value for the specified alpha value +# if PRM=T, all p-values that were computed. +# ef.iter, the actual number of interations, which might differ from iter +# due to sample sizes where it makes no sense to compute a p-value +# based on the generated data. +# +# Like ancGLOB_pv, only pts is specified and use data in x1 and x2 +# +if(is.null(pts[1]))stop('pts is null, use ancGLOB_pv') +x1=elimna(x1) +x2=elimna(x2) +n1=length(x1) +n2=length(x2) + +if(SEED)set.seed(45) +bvec=list() +np1=min(c(n1,n2))+1 +nmax=max(c(n1,n2)) +for(i in 1:iter){ +bvec[[i]]=rmul(nmax,p=4) +if(n1!=n2)bvec[[i]][np1:nmax,1:2]=NA +bvec[[i]][1:n1,1]=x1 +bvec[[i]][1:n2,3]=x2 +} +prm=NA +if(MC){ +library(parallel) +prm=mclapply(bvec,ancGLOB_sub4,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...) +} +#if(!MC)prm=lapply(bvec,ancGLOB_sub4,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...) +if(!MC){ +for(ij in 1:length(bvec)){ +bv=as.matrix(bvec[[ij]]) +prm[ij]=ancGLOB_sub4(bv,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,nmin=nmin,...) +}} +prm=elimna(as.vector(matl(prm))) +ef.iter=length(prm) +p.crit=hd(prm,alpha) +prm=sort(elimna(prm)) +if(!PRM)prm=NULL +list(p.crit=p.crit,prm=prm,ef.iter=ef.iter) +} + +ancGLOB_sub4<-function(bvec,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nmin=12,nboot=nboot,pts=pts,...){ +p=ancGLOB_sub5(bvec[,1],bvec[,2],bvec[,3],bvec[,4],est=est,SEED=SEED,fr1=fr1,fr2=fr2,nboot=nboot,nmin=12,pts=pts,...) +p +} + + + +q2by2<-function(x,q = c(0.1, 0.25, 0.5, 0.75, 0.9), nboot = 2000,SEED=TRUE){ +# +# For a 2 by 2 ANOVA, independent groups, test main effects +# and interaction for all quantiles indicated by argument q +# +if(SEED)set.seed(2) +if(is.matrix(x))x<-listm(x) +if(length(x)!=4)stop('Current version is for a 2-by-2 ANOVA only. Should have four groups.') +A=matrix(NA,nrow=length(q),6) +B=matrix(NA,nrow=length(q),6) +AB=matrix(NA,nrow=length(q),6) +dimnames(A)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper','p.hoch')) +dimnames(B)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper','p.hoch')) +dimnames(AB)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper','p.hoch')) +con=con2way(2,2) + +for(i in 1:length(q)){ +A[i,1]=q[i] +B[i,1]=q[i] +AB[i,1]=q[i] +a=linconpb(x,nboot=nboot,est=hd,con=con$conA,SEED=FALSE,q=q[i]) +b=linconpb(x,nboot=nboot,est=hd,con=con$conB,SEED=FALSE,q=q[i]) +ab=linconpb(x,nboot=nboot,est=hd,con=con$conAB,SEED=FALSE,q=q[i]) +A[i,2:5]=a$output[,c(2,3,5,6)] +B[i,2:5]=b$output[,c(2,3,5,6)] +AB[i,2:5]=ab$output[,c(2,3,5,6)] +} +A[,6]=p.adjust(A[,3],method='hoch') +B[,6]=p.adjust(B[,3],method='hoch') +AB[,6]=p.adjust(AB[,3],method='hoch') +list(A=A,B=B,AB=AB) +} +bd1GLOB<-function(x,est=spatcen,nboot=599,alpha=.05,SEED=TRUE,MC=FALSE,...){ +# +# Test the hypothesis of equal measures of location for J +# dependent groups using a +# percentile bootstrap method. +# +# Same as bd1way, only designed for estimators that take into account the +# overall structure of the data when dealing with outliers +# +# By default, use spatial median estimator +# est=dmean.cen will use the Donoho-Gasko trimmed mean. +# +# argument est is location estimator that returns value in $center +# (So, for example, est=dmean will not run.) +# +# Data are assumed to be stored in list mode or an n by J matrix. +# misran=F means missing values do not occur at random, case wise deletion is used. +# +#if(MC){ +#if(identical(est,dmean_C))stop('Using dmean_C with MC=T can cause R to crash. Use MC=F') +#library(parallel) +#} +# Last 3 commands cause an error unless WRScpp is available. +if(!is.list(x) && !is.matrix(x))stop('Data must be store in list mode or in an n by J matrix.') +if(is.list(x)){ +m<-matrix(0,length(x[[1]]),length(x)) +for (j in 1:length(x))m[,j]<-x[[j]] +} +if(is.matrix(x))m<-x +xcen<-m +locval=est(m,...)$center +locval=as.vector(locval) +for (j in 1:ncol(m))xcen[,j]<-m[,j]-locval[j] +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(nrow(m),size=nrow(m)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(MC)bvec<-mclapply(data,bd1GLOB1,xcen=xcen,est=est,...) +if(!MC)bvec<-lapply(data,bd1GLOB1,xcen=xcen,est=est,...) +bvec=as.vector(matl(bvec)) +# A vector of nboot test statistics. +icrit<-floor((1-alpha)*nboot+.5) +test<-(length(locval)-1)*var(locval) +pv=mean((test2){ +zvec<-rep(0,Jall) +m1<-rbind(dif,zvec) +bplus<-nboot+1 +cmat=var(dif) +dv<-pdisMC(m1,center=cdif) +bplus<-nboot+1 +sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot +} +list(p.value=sig.level,center=center) +} + + +Dancovapb<-function(x1,y1,x2=x1,y2,fr1=1,fr2=1,est=hd,alpha=.05,nboot=500,pr=TRUE,SEED=TRUE, +plotit=TRUE, pts=NA,sm=FALSE,xout=FALSE,outfun=out,DIF=FALSE,na.rm=TRUE,...){ +# +# Compare two dependent groups using the ancova method +# (a method similar to the one used by the R function ancova). +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# percentile bootstrap method is used. +# +# est indicates estimator to be used; Harrell-Davis median estimator is default. +# +# Assume data are in x1 y1 x2 and y2 +# +# sm=T will create smooths using bootstrap bagging. +# pts can be used to specify the design points where the regression lines +# are to be compared. +# pts=NA means five points will be picked empirically. +# +# +if(SEED)set.seed(2) +if(DIF & !na.rm){ +if(pr)stop('With na.rm=TRUE, must have DIF=FALSE') +} +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') +xy=elimna(cbind(x1,y1,x2,y2)) +x1=xy[,1] +y1=xy[,2] +x2=xy[,3] +y2=xy[,4] +if(is.na(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,7) +dimnames(mat)<-list(NULL,c('X','n','DIF','ci.low','ci.hi','p.value','p.adjust')) +for (i in 1:5){ +t1=near(x1,x1[isub[i]],fr1) +t2=near(x2,x1[isub[i]],fr2) +pick=as.logical(t1*t2) +if(!na.rm)test=rmmismcp(y1[pick],y2[pick],est=est,,nboot=nboot,alpha=alpha,pr=FALSE, +plotit=FALSE,SEED=FALSE,...) +if(na.rm){ +test=rmmcppb(y1[pick],y2[pick],est=est,dif=DIF,nboot=nboot,plotit=FALSE,alpha=alpha, +pr=FALSE,SEED=SEED,...) +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(y1[pick]) +mat[i,3]<-test$output[,2] +mat[i,4]<-test$output[,5] +mat[i,5]<-test$output[,6] +mat[i,6]<-test$output[,3] +} +if(!na.rm){ +test=rmmismcp(y1[pick],y2[pick],est=est,nboot=nboot,alpha=alpha,pr=FALSE, +plotit=FALSE,SEED=SEED,...) +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(y1[pick]) +mat[i,3]<-test$output[,3] +mat[i,4]<-test$output[,6] +mat[i,5]<-test$output[,7] +mat[i,6]<-test$output[,4] +} +} +temp2<-order(0-mat[,6]) +bot=c(1:nrow(mat)) +dvec=sort(alpha/bot,decreasing=TRUE) +#mat[temp2,7]=dvec +mat[,7]=p.adjust(mat[,6],method='hoch') +} +if(!is.na(pts[1])){ +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +# First check sample size +# +flage=rep(TRUE,length(pts)) +for (i in 1:length(pts)){ +t1<-near(x1,pts[i],fr1) +t2<-near(x2,pts[i],fr2) +pick=as.logical(t1*t2) +if(sum(pick)<=5){print(paste('Warning: there are',sum(pick),' points corresponding to the design point X=',pts[i])) +flage[i]=FALSE +}} +pts=pts[flage] +mat<-matrix(NA,length(pts),7) +dimnames(mat)<-list(NULL,c('X','n','DIF','ci.low','ci.hi', +'p.value','p.crit')) +for (i in 1:length(pts)){ +t1<-near(x1,pts[i],fr1) +t2<-near(x2,pts[i],fr2) +pick=as.logical(t1*t2) +#print(y1[pick]) +test=rmmcppb(y1[pick],y2[pick],est=est,dif=DIF,plotit=FALSE,alpha=alpha,pr=FALSE,SEED=FALSE,...) +mat[i,3]<-test$output[,2] +mat[i,1]<-pts[i] +mat[i,2]<-length(y1[pick]) +mat[i,4]<-test$output[,5] +mat[i,5]<-test$output[,6] +mat[i,6]<-test$output[,3] +} +#temp2<-order(0-mat[,6]) +mat[,7]=p.adjust(mat[,6],method='hoch') +bot=c(1:nrow(mat)) +dvec=sort(alpha/bot,decreasing=TRUE) +#mat[temp2,7]=dvec +} +if(plotit){ +runmean2g(x1,y1,x2,y2,fr=fr1,est=est,sm=sm,xout=xout,outfun=outfun,,...) +} +list(output=mat) +} +ancdifplot<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,pr=TRUE,xout=FALSE,outfun=out,LP=TRUE, +nmin=8,scat=TRUE,xlab='X',ylab='Y',report=FALSE,...){ +# +# Compare two independent groups using the ancova method +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# +# nmin indicates minimun number of values close to a point +# +# Similar to ancova, only compute a confidence band for the difference and plot it. +# +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(xout){ +flago<-outfun(x1,...)$keep +x1<-x1[flago] +y1<-y1[flago] +flag<-outfun(x2,...)$keep +x2<-x2[flago] +y2<-y2[flago] +} +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +flag=vecn>=nmin +ptsum=sum(flag) +est=NA +low=NA +up=NA +ic=0 +xp1=NA +xp2=NA +pv=NA +for (i in 1:length(x1)){ +if(flag[i]){ +g1<-y1[near(x1,x1[i],fr1)] +g2<-y2[near(x2,x2[i],fr2)] +test<-yuen(g1,g2,tr=tr) +ic=ic+1 +xp1[ic]=x1[i] +xp2[ic]=x2[i] +est[ic]=test$dif +low[ic]=test$ci[1] +up[ic]=test$ci[2] +pv[ic]=test$p.value +}} +#print(length(pv)) +#print(length(xp1)) +if(LP){ +xy=elimna(cbind(xp1,est,low,up,pv)) +est=lplot(xy[,1],xy[,2],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +up=lplot(xy[,1],xy[,4],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +low=lplot(xy[,1],xy[,3],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +} +if(!report)output='DONE' +plot(c(x1,x2),c(y1,y2),xlab=xlab,ylab=ylab,type='n') +if(!LP){ +lines(xp1,up,lty=2) +lines(xp1,low,lty=2) +lines(xp1,est) +if(scat)points(c(x1,x2),c(y1,y2)) +if(report){ +output=cbind(xp1,est,low,up,pv) +dimnames(output)=list(NULL,c(xlab,'est.dif','lower.ci','upper.ci','p-value')) +}} +if(LP){ +lines(xy[,1],up,lty=2) +lines(xy[,1],low,lty=2) +lines(xy[,1],est) + if(scat)points(c(x1,x2),c(y1,y2)) +if(report){ +output=cbind(xy[,1],est,low,up,xy[,5]) +dimnames(output)=list(NULL,c(xlab,'est.dif','lower.ci','upper.ci','p-value')) +} +} +output +} +ancGLOB<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,p.crit=NULL,nreps=500, +alpha=.05,pr=TRUE,nboot=500,SEED=TRUE,MC=FALSE,CR=FALSE, nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,SCAT=TRUE,pch1='+',pch2='o', +xlab='X',ylab='Y',LP=TRUE,cpp=FALSE,...){ +# +# Like the function ancova, only performs a global test that the measures of location +# are equal among all the covariate values that are chosen. +# +# pts = NULL, the function picks five covariate values. +# iter=500 means that when the critical p-value is determined, simulations with 500 +# replications are used to determine the critical p-value. +# +# Reject if the p-value is less than the critical p-value. +# Works well with alpha=.05. Uncertain about alpha <.05. +# +# cpp=TRUE, a C++ function is used to determine the critical p-value +# assuming the library WRScpp has been installed. This is done as follows: +# install.packages('devtools') +# library("devtools") +# install_github( "WRScpp", "mrxiaohe") +# +# CR=TRUE: If number of points is two or three, plot 1-alpha confidence region +# +# +if(CR)plotit=FALSE # Can't plot both regression lines and confidence region +if(SEED)set.seed(2) +iter=nreps +pts.flag=is.null(pts) +if(!is.null(pts))cpp=FALSE +x1<-as.matrix(x1) +p1<-ncol(x1)+1 +p<-ncol(x1) +if(p>1)stop('Current version is for one independent variable only') +xy<-cbind(x1,y1) +xy<-elimna(xy) +x1<-xy[,1:p] +y1<-xy[,p1] +xy<-cbind(x2,y2) +xy<-elimna(xy) +x2<-xy[,1:p] +y2<-xy[,p1] +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +N1=length(y1) +N2=length(y2) +if(is.null(pts[1])){ +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=nmin]) +isub[5]<-max(sub[vecn>=nmin]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=x1[isub] +g1=list() +g2=list() +for (i in 1:5){ +g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)] +g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)] +}} +if(!is.null(pts[1])){ +if(length(pts)<2)stop('Should have at least two points (use the R function ancova)') +g1=list() +g2=list() +for (i in 1:length(pts)){ +g1[[i]]<-y1[near(x1,pts[i],fr1)] +g2[[i]]<-y2[near(x2,pts[i],fr2)] +} +} +p.alpha=NULL +if(is.null(p.crit)){ +if(pts.flag){ +if(cpp){ +library(WRScpp) +ve=ancGLOB_pv_C(N1,N2,est=est,iter=iter,fr1=fr1,fr2=fr2,nboot=nboot,SEED=SEED,...) +v=hd(ve,q=alpha) +} +else{ + v=ancGLOB_pv(N1,N2,est=est,iter=iter,fr1=fr1,fr2=fr2,nboot=nboot, +PRM=FALSE,SEED=SEED,alpha=alpha,xlab=xlab,ylab=ylab,...)$p.crit +} +} +if(!pts.flag)v=ancGLOB_pv_pts(x1,x2,pts=pts,nmin=nmin,iter=iter,est=est,fr1=fr1,fr2=fr2, +nboot=nboot,SEED=SEED,alpha=alpha,MC=MC)$p.crit +} +if(!is.null(p.crit))v=p.crit +res=aov2depth(g1,g2,est=est,SEED=SEED,CR=CR,alpha=v,...) +if(pr)print('Reject if p.test is less than p.crit') +if(plotit)runmean2g(x1,y1,x2,y2,fr=fr1,est=est,xout=FALSE,LP=LP,xlab=xlab,ylab=ylab, +SCAT=SCAT,pch1=pch1,pch2=pch2,...) +list(p.test=res$p.value,p.crit=v,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2) +} + + +aov2depth<-function(x1,x2,est=tmean,nboot=500,SEED=TRUE,nmin=12,CR=FALSE, +xlab=' DIF 1',ylab='DIF 2',zlab='DIF 3',alpha=.05,...){ +# +# 2 by K ANOVA independent group (K levels not necessarily independent and +# not completely dependent +# +# Main effect Factor A only +# +# Strategy: Use depth of zero based on estimated +# differences for each column of the K levels of Factor B +# That is, testing no main effects for Factor A in +# a manner that takes into account the pattern of the +# measures of location rather then simply averaging +# across columns. +# +# x1 can be a matrix with K columns corrspoding to groups, ditto for x2 +# Or x1 and x2 can have list mode. +# Assuming x1 and x2 contain data for indepedendent groups. +# +if(is.matrix(x1)||is.data.frame(x1))x1=listm(x1) +if(is.matrix(x2)||is.data.frame(x2))x2=listm(x2) +J=length(x1) +if(J!=length(x2))stop('x1 and x2 should have same number of groups') +if(SEED)set.seed(2) +for(j in 1:J){ +x1[[j]]=na.omit(x1[[j]]) +x2[[j]]=na.omit(x2[[j]]) +} +n1=mapply(x1,FUN=length) +n2=mapply(x2,FUN=length) +bplus=nboot+1 +bvec1=matrix(NA,nrow=nboot,ncol=J) +bvec2=matrix(NA,nrow=nboot,ncol=J) +for(j in 1:J){ +data1=matrix(sample(x1[[j]],size=n1[j]*nboot,replace=TRUE),nrow=nboot) +data2=matrix(sample(x2[[j]],size=n2[j]*nboot,replace=TRUE),nrow=nboot) +bvec1[,j]=apply(data1,1,est,...) +bvec2[,j]=apply(data2,1,est,...) +} +difb=bvec1-bvec2 +est1=mapply(x1,FUN=est,...) +est2=mapply(x2,FUN=est,...) +dif=est1-est2 +m1=var(difb) +nullvec=rep(0,J) +difz=rbind(difb,nullvec) +dis=mahalanobis(difz,dif,m1) +sig=sum(dis[bplus]<=dis)/bplus +if(CR){ +dis2<-order(dis[1:nboot]) +dis<-sort(dis) +critn<-floor((1-alpha)*nboot) +if(J==2){ +plot(difb[,1],difb[,2],xlab=xlab,ylab=ylab) +points(0,0,pch=0) +xx<-difb[dis2[1:critn],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +if(J==3){ +scatterplot3d(difb[dis2[1:critn],],xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=TRUE) +} + +} +list(p.value=sig,est1=est1,est2=est2,dif=dif,n1=n1,n2=n2) +} + +ancovaWMW<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,sm=FALSE,est=tmean, +plotit=TRUE,pts=NA,xout=FALSE,outfun=out,LP=TRUE,...){ +# +# Compare two independent groups using the ancova method in conjunction +# with Cliff's improvement on the Wilcoxon-Mann-Whitney test. +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# +# OLD version: sm=TRUE will use bootstrap bagging when plotting the regression lines +# The plot is based on measure of location indicated by the argument +# est. Default is the Harrell-Davis estimate of the median. Not working, took this out. +# +# LP=TRUE: use running interval smoother followed by LOESS +# +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +dv.sum=NULL +if(is.na(pts[1])){ +npt<-5 +CC=5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,8) +dimnames(mat)<-list(NULL,c('X','n1','n2','p.hat','ci.low','ci.hi','p.value','p.crit')) +for (i in 1:5){ +g1<-y1[near(x1,x1[isub[i]],fr1)] +g2<-y2[near(x2,x1[isub[i]],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +test<-cidv2(g1,g2,alpha=alpha) +dv.sum=rbind(dv.sum,test$summary.dvals) +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(g1) +mat[i,3]<-length(g2) +mat[i,4]<-test$p.hat +mat[i,5]<-test$p.ci[1] +mat[i,6]<-test$p.ci[2] +mat[i,7]<-test$p.value +}} +if(!is.na(pts[1])){ +CC=length(pts) +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),8) +dimnames(mat)<-list(NULL,c('X','n1','n2','p.hat','ci.low','ci.hi','p.value','p.crit')) +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +test=cidv2(g1,g2,alpha=alpha) +dv.sum=rbind(dv.sum,test$summary.dvals) +mat[i,1]<-pts[i] +mat[i,2]<-length(g1) +mat[i,3]<-length(g2) +if(length(g1)<=5)print(paste('Warning, there are',length(g1),' points corresponding to the design point X=',pts[i])) +if(length(g2)<=5)print(paste('Warning, there are',length(g2),' points corresponding to the design point X=',pts[i])) +mat[i,4]<-test$p.hat +mat[i,5]<-test$p.ci[1] +mat[i,6]<-test$p.ci[2] +mat[i,7]<-test$p.value +}} +dvec<-alpha/c(1:CC) +temp2<-order(0-mat[,6]) +mat[temp2,8]=dvec +if(plotit){ +runmean2g(x1,y1,x2,y2,fr=fr1,est=est,sm=sm,xout=FALSE,LP=LP,...) +} +list(output=mat,summary=dv.sum) +} + +ghtrim<-function(tr=.2,g=0,h=0){ +# +# Compute trimmed mean of a g-and-h distribution. +# +# +if(g==0)val=0 +if(g>0){ +low=qnorm(tr) +up=-1*low +val=integrate(ftrim,low,up,tr=tr,g=g,h=h)$value +val=val/(1-2*tr) +} +val +} + +ftrim<-function(z,tr,g,h){ +gz=(exp(g*z)-1)*exp(h*z^2/2)/g +res=dnorm(z)*gz +res +} + +DancovaV2<-function(x1=NULL,y1=NULL,x2=NULL,y2=NULL,xy=NULL,fr1=1,fr2=1, +est=tmean,alpha=.05,plotit=TRUE,xlab='X',ylab='Y',qvals=c(.25,.5,.75),sm=FALSE, +xout=FALSE,eout=FALSE,outfun=out,DIF=FALSE,LP=TRUE,method='hochberg', +nboot=500,SEED=TRUE,nreps=2000,MC=TRUE,cpp=FALSE, +SCAT=TRUE,pch1='*',pch2='+', +nmin=12,q=.5,...){ +# +# Compare two dependent groups using the ancova method. +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Like Dancova, only bootstrap samples are obtained by resampling +# from c(x1,y1,x2,y2) rather than conditioning on the x value as done by Dancova. +# This function tends to have more power than Dancova. +# +# One covariate only is allowed. +# +# method='hochberg +# By default, family wise error rate is controlled by Hochberg's methoe + +# To get critical p-value, need the following commands to get access to the software. +# library(`devtools') +# install_github( `WRScpp', `mrxiaohe') + +# Assume data are in xy having four columns: x1, y1, x2 and y2. +# +# Or can have the +# data stored in four separate variables: +# x1 y1 x2 and y2 +# +# x1 y1 are measures at time 1 +# x2 y2 are measures at time 2 +# +# LP=T, when plotting, running interval smoother is smoothed again using lplot. +# sm=T will create smooths using bootstrap bagging. +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +# q=.5 means when est=hd (Harrell-Davis estimator), median is estimated. +# +# eout=TRUE will eliminate all outliers when plotting. +# +if(SEED)set.seed(2) +iter=nreps +if(!is.null(x1[1])){ +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') +xy=cbind(x1,y1,x2,y2) +} +n=nrow(elimna(xy)) +if(plotit){ +ef=identical(est,hd) +if(!ef)runmean2g(xy[,1],xy[,2],xy[,3],xy[,4],fr=fr1,est=est,sm=sm,xout=xout,LP=LP,eout=eout, +xlab=xlab,ylab=ylab,SCAT=SCAT,pch1=pch1,pch2=pch2,...) +if(ef)runmean2g(xy[,1],xy[,2],xy[,3],xy[,4],fr=fr1,est=hd,sm=sm,xout=xout,LP=LP,q=q,eout=eout, +xlab=xlab,ylab=ylab,SCAT=SCAT,pch1=pch1,pch2=pch2,...) +} + +#eliminate this code and use Hochberg instead +#if(is.null(p.crit)){ +#if(cpp)library(WRScpp) +#p.crit=DancGLOB_pv(n,fr1=fr1,fr2=fr2,nboot=nboot,est=est,SEED=SEED,iter=iter, +#nmin=nmin,MC=MC,alpha=alpha,qvals=qvals,cpp=cpp)$p.crit +# +#} + +pts=NULL +#if(is.null(pts)){ +for(i in 1:length(qvals))pts=c(pts,qest(xy[,1],qvals[i])) +#} +if(SEED)set.seed(2) +ef=identical(est,hd) +n=nrow(xy) +est1=NA +est2=NA +J=length(pts) +est1=matrix(NA,nrow=nboot,ncol=J) +est2=matrix(NA,nrow=nboot,ncol=J) +# +data=matrix(sample(n,size=n*nboot,replace=TRUE),ncol=nboot,nrow=n) +if(!MC){ +if(!ef){ +est1=apply(data,2,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) +est2=apply(data,2,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...) +} +if(ef){ +est1=apply(data,2,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...) +est2=apply(data,2,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...) +} +est1=t(as.matrix(est1)) +est2=t(as.matrix(est2)) +} +if(MC){ +library(parallel) +data=listm(data) +if(!ef){ +est1=mclapply(data,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) +est2=mclapply(data,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...) +} +if(ef){ +est1=mclapply(data,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...) +est2=mclapply(data,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...) +} +est1=t(matl(est1)) +est2=t(matl(est2)) +} +pv=NA +for(j in 1:J){ +pv[j]=mean(est1[,j]1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +xy1=elimna(cbind(x1,y1)) +xy2=elimna=cbind(x2,y2) +n1=nrow(xy1) +n2=nrow(xy2) +if(plotit){ +ef=identical(est,hd) +if(!ef)runmean2g(xy1[,1],xy1[,2],xy2[,1],xy2[,2],fr=fr1,est=est,sm=sm,xout=xout,LP=LP,eout=eout, +xlab=xlab,ylab=ylab,SCAT=SCAT,pch1=pch1,pch2=pch2,...) +if(ef)runmean2g(xy1[,1],xy1[,2],xy2[,1],xy2[,2],fr=fr1,est=hd,sm=sm,xout=xout,LP=LP,q=q,eout=eout, +xlab=xlab,ylab=ylab,SCAT=SCAT,pch1=pch1,pch2=pch2,...) +} +if(is.null(p.crit)){ +if(FAST){ +if(alpha==.05){ +nm=max(c(n1,n2)) +if(nm<=800){ +nv=c(50,60,80,100,200,300,500,800) +if(qpts){ +pv=c(.02709,.0283,.0306,.02842,.02779,.02410,.02683,.01868,.02122) +p.crit=lplot.pred(1/nv,pv,1/n1)$yhat +} +if(!qpts){ +pv=c(.020831,.017812,.015796,.014773,.012589,.015664,.011803,.012479) +p.crit=lplot.pred(1/nv,pv,1/n1)$yhat +}} +}}} +if(is.null(p.crit)){ +p.crit=ancovaV2.pv(n1,n2,nreps=nreps,MC=MC,qpts=qpts,est=est,qvals=qvals,SEED=SEED, +alpha=alpha,nboot=nboot)$p.crit +} +pts=NULL +if(qpts)for(i in 1:length(qvals))pts=c(pts,qest(xy1[,1],qvals[i])) +if(!qpts)pts=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE)$output[,1] +if(SEED)set.seed(2) +ef=identical(est,hd) +est1=NA +est2=NA +J=length(pts) +est1=matrix(NA,nrow=nboot,ncol=J) +est2=matrix(NA,nrow=nboot,ncol=J) +# +data1=matrix(sample(n1,size=n1*nboot,replace=TRUE),ncol=nboot,nrow=n1) +data2=matrix(sample(n2,size=n2*nboot,replace=TRUE),ncol=nboot,nrow=n2) +if(!MC){ +if(!ef){ +est1=apply(data1,2,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) +est2=apply(data2,2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=est,fr=fr2,nmin=nmin,...) +} +if(ef){ +est1=apply(data1,2,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...) +est2=apply(data2,2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...) +} +est1=t(as.matrix(est1)) +est2=t(as.matrix(est2)) +} +if(MC){ +library(parallel) +data1=listm(data1) +data2=listm(data2) +if(!ef){ +est1=mclapply(data1,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) +est2=mclapply(data2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=est,fr=fr2,nmin=nmin,...) +} +if(ef){ +est1=mclapply(data1,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...) +est2=mclapply(data2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...) +} +est1=t(matl(est1)) +est2=t(matl(est2)) +} +pv=NULL +if(J==1){ +est1=t(as.matrix(est1)) +est2=t(as.matrix(est2)) +} +for(j in 1:J){ +pv[j]=mean(est1[,j]=crit +out.id=z[flag] +n.out=sum(flag) +nums=c(1:length(x)) +keep=nums[!flag] +} +if(ncol(x)>1)stop('Use function out with outfun=wmean.cov') +list(n=length(x),n.out=n.out,out.value=x[flag],out.id=nums[flag],keep=keep) +} +wmean.cov<-function(x,tr=0){ +# +# Compute Winsoriced mean and covariance for data in x +# +loc=apply(x,2,mean,tr=tr) +cv=wincov(x,tr=tr) +list(center=loc,cov=cv) +} +rngh<-function(n,rho=0,p=2,g=0,h=0,ADJ=TRUE,pr=TRUE){ +# +# Generate data from a multivariate distribution where the marginal distributions +# are g-and-h distributions that have common correlation rho. +# Strategy: adjust the correlation when generating data from multivariate normal normal distribution so that +# when transforming the marginal distributions to a g-and-h distribution, the correlation is rho. +# +# +library(MASS) +if(ADJ){ +adjrho=rngh.sub(n,g,h,rho)$rho.adjusted +rho=adjrho +if(pr)print(paste('Adjusted rho',rho)) +} +cmat<-matrix(rho,p,p) +diag(cmat)<-1 +x=mvrnorm(n=n, mu=rep(0,p), Sigma=cmat) +for(i in 1:p){ +if (g>0){ +x[,i]<-(exp(g*x[,i])-1)*exp(h*x[,i]^2/2)/g +} +if(g==0)x[,i]<-x[,i]*exp(h*x[,i]^2/2) +} +x +} + + +rngh.sub<-function(n,g,h,rho,ntest=1000000){ +# +# Determine adjusted value for rho so that +# the actual correlation is some desired value +# +# rho: desired correlation +vals=seq(rho,.99,.01) +for(i in 1:length(vals)){ +adj=vals[i] +cmat<-matrix(vals[i],2,2) +diag(cmat)<-1 +x=mvrnorm(ntest,mu=c(0,0),Sigma=cmat) +for(i in 1:2){ +if (g>0){ +x[,i]<-(exp(g*x[,i])-1)*exp(h*x[,i]^2/2)/g +} +if(g==0)x[,i]<-x[,i]*exp(h*x[,i]^2/2) +} +chk=cor(x) +if(abs(chk[1,2]-rho)<.01)break +if(chk[1,2]>=rho)break +} +list(rho.adjusted=adj,rho.actual=chk[1,2]) +} +rplot.res<-function(x,y,pv=1,est = tmean, scat = TRUE, fr = NA, plotit = TRUE, +pyhat = FALSE, efr = 0.5, theta = 50, phi = 25, scale = TRUE, +expand = 0.5, SEED = TRUE, varfun = pbvar, outfun = outpro,STAND=TRUE, +nmin = 0, xout = FALSE, out = FALSE, eout = FALSE, xlab='X', +ylab ='Y',zscale=FALSE,zlab=' ', pr=TRUE,duplicate='error', +ticktype='simple',LP=TRUE,...){ +# +# Apply rplot excluding the independent variable indicated by the argument +# pv. +# So pv=1 means will exclude the first predictor. +# Fit a smooth using the remaing variables, compute the residuals, then plot +# the smooth using the residuals as the dependent variable and +# the variables indicated by pv as the independent variables. +# +xy=na.omit(cbind(x,y)) +p=ncol(x) +p1=p+1 +if(xout){ +flag=outfun(xy[,1:p],plotit=FALSE,STAND=STAND,...)$keep +xy=xy[flag,] +} +x=xy[,1:p] +y=xy[,p1] +res=y-rplot(x[,1:2],y,est=est,scat=scat,varfun=varfun,expand=expand,nmin=nmin, +pyhat=TRUE,plotit=FALSE,fr=fr,xout=FALSE)$yhat +outp=rplot(x[,pv],res,fr=fr,xout=FALSE,efr=efr,theta=theta,phi=phi, +scale=scale,SEED=SEED,xlab=xlab,ylab=ylab,zlab=zlab,pr=FALSE, +ticktype=ticktype,LP=LP,...) +outp +} + +trimcimul<-function(x, tr = 0.2, alpha = 0.05,null.value=0){ +# +# For J dependent random variables, apply trimci to each +# FWE controlled with Rom-Hochberg method +# +# x is a matrix having J columns. (Can have list mode as well.) +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J<-ncol(x) +xbar<-vector('numeric',J) +ncon<-J +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +psihat<-matrix(0,J,6) +dimnames(psihat)<-list(NULL,c('Variable','estimate','ci.lower','ci.upper','adj.ci.lower','adj.ci.upper')) +test<-matrix(0,J,5) +dimnames(test)<-list(NULL,c('Variable','test','p.value','p.crit','se')) +temp1<-NA +nval=NULL +for (d in 1:J){ +psihat[d,1]<-d +dval=na.omit(x[,d]) +nval[d]=length(dval) +temp=trimci(dval,tr=tr,pr=FALSE,null.value=null.value) +test[d,1]<-d +test[d,2]<-temp$test.stat +test[d,3]=temp$p.value +test[d,5]<-temp$se +psihat[d,2]<-temp$estimate +psihat[d,3]<-temp$ci[1] +psihat[d,4]<-temp$ci[2] +} +temp1=test[,3] +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +test[temp2,4]<-zvec +num.sig=sum(test[,3]<=test[,4]) +# compute adjusted confidence intervals having simultaneous probability coverage 1-alpha +for(d in 1:J){ +dval=na.omit(x[,d]) +psihat[d,5:6]=trimci(dval,tr=tr,alpha=test[d,4],pr=FALSE,null.value=null.value)$ci +} +list(n=nval,test=test,psihat=psihat,num.sig=num.sig) +} + +g5plot<-function(x1,x2,x3=NULL,x4=NULL,x5=NULL,fr=.8,aval=.5,xlab='X',ylab='',color=rep('black',5),main=NULL,sub=NULL){ +# +# plot estimates of the density functions for up to 5 groups. +# using an adaptive kernel density estimator +# +if(is.matrix(x1)||is.data.frame(x1))x1=listm(x1) +if(is.list(x1)){ +x=x1 +J=length(x) +ic=0 +for(j in 1:J){ +ic=ic+1 +if(ic==1)x1=x[[1]] +if(ic==2)x2=x[[2]] +if(ic==3)x3=x[[3]] +if(ic==4)x4=x[[4]] +if(ic==5)x5=x[[5]] +} +} +x1<-elimna(x1) +x2<-elimna(x2) +x1<-sort(x1) +x2<-sort(x2) +if(!is.null(x3))x3<-sort(x3) +if(!is.null(x4))x4<-sort(x4) +if(!is.null(x5))x5<-sort(x5) +z3=NULL +z4=NULL +z5=NULL +z1<-akerd(x1,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) +z2<-akerd(x2,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) +if(!is.null(x3))z3=akerd(x3,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) +if(!is.null(x4))z4=akerd(x4,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) +if(!is.null(x5))z5=akerd(x5,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE) +plot(c(x1,x2,x3,x4,x5),c(z1,z2,z3,z4,z5), xlab =xlab, ylab =ylab, type = 'n',main=main,sub=sub) +lines(x1,z1,col=color[1]) +lines(x2,z2,lty=2,col=color[2]) +if(!is.null(x3))lines(x3,z3,lty=3,col=color[3]) +if(!is.null(x4))lines(x4,z4,lty=4,col=color[4]) +if(!is.null(x5))lines(x5,z5,lty=5,col=color[5]) +} + +MEDanova<-function(x,op=3,nboot=600,MC=FALSE,SEED=TRUE){ +# +# Test global hypothesis that J independent groups +# have equal medians. +# Performs well when there are tied values. +# +# Basically, use pbadepth in conjunction with the Harrell--Davis +# estimator. +# +output=pbadepth(x,est=hd,allp=TRUE,SEED=SEED,op=op,nboot=nboot,MC=MC) +output +} + +Qmcp<-function(x,q=.5,con=0,SEED=TRUE,THD=FALSE,nboot=NA,alpha=.05,HOCH=FALSE){ +# +# Multiple comparisons among independent groups +# based on the quantile indicated by the argument +# q +# +# THD=TRUE would use the trimmed Harrell--Davis estimator +# The default is the Harrell--Davis estimator +# Familywise error is controlled with the Hochberg's method + +# +# The Harrell--Davis estimator is used in order to deal with tied values +# +est=hd +if(THD)est=thd +res=linconpb(x,est=est,q=q,nboot=nboot,SEED=SEED,con=con,method='hoch') +res +} + + +Qinterplot<-function(x,q=.5){ +# +# Plot interactions based on quantiles estimated via the +# Harrell--Davis estimator +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(length(x)!=4)stop('Should have a 2 by 2 design for a total of four groups') +qv=lapply(x,hd,q=q) +qv=as.vector(matl(qv)) +interplot(2,2,locvec=qv,xlab='Fac 1',ylab=paste(q,'Quantile'),trace.label='Fac 2') +} + + +lplot.pred<-function(x,y,pts=NULL,xout=FALSE,outfun=outpro,span=2/3,family='gaussian',...){ +# +# Using loess, compute predicted values based on the data in pts +# +x<-as.matrix(x) +d=ncol(x) +dp1=d+1 +m<-elimna(cbind(x,y)) +n.orig=nrow(m) +n.keep=n.orig +if(xout){ +flag<-outfun(m[,1:d],plotit=FALSE,...)$keep +m<-m[flag,] +n.keep=nrow(m) +} +x<-m[,1:d] +y<-m[,dp1] +if(is.null(pts))pts=x +fit=loess(y~x,span=span,family=family) +pred=predict(fit,pts) +list(n=n.orig,n.keep=n.keep,x.used=x,yhat=pred) +} + +regse<-function(x,y,xout=FALSE,regfun=tsreg,outfun=outpro,nboot=200,SEED=TRUE,...){ +# +# Estimate the standard errors and +# covariance matrix associated with the estimates of +# the regression parameters based on the estimator indicated by the +# argument +# regfun: default is Theil--Sen. +# So the diagonal elements of the matrix returned by this function +# are the squared standard errors of the intercept estimator, etc. +# +# Function returns +# param.estimates: the estimate of the intercept and slopes +# covar: the covariance matrix associated with the estimator used +# s.e.: the standard errors. +# + +if(SEED)set.seed(2) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +nrem=length(y) +estit=regfun(x,y,xout=xout,...)$coef +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) +#Leverage points already removed. +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +sqe=var(t(bvec)) +list(param.estimates=estit,covar=sqe,s.e.=sqrt(diag(sqe))) +} +smRstr<-function(x,y,fr=1,est=tmean,nmin=1,varfun=winvar,xout=FALSE,outfun=outpro,...){ +# +# Estimate explanatory strength of an association (a generlization of +# Pearson's correlation) based on a running interval smoother +# and a leave-one-out cross-validation technique. +# Prediction error is estimated as well. +# +# Arguments: +# est: the measure of location to be used by rplot +# varfun: the measure of variation used when estimating prediction error +# Example: varfun=pbvar would compute the percentage bend measure of +# of varition between observed and predicted values of the +# dependent variable. +# fr: the span used by rplot +# +# Function returns +# str: strength of the association +# pred.error: prediction error +# +xy=elimna(cbind(x,y)) +if(xout){ +flag=outfun(x,plotit=FALSE)$keep +xy=xy[flag,] +} +p=ncol(xy) +pm1=p-1 +x=xy[,1:pm1] +y=xy[,p] +x=as.matrix(x) +px=ncol(x) +px1=px+1 +n=nrow(xy) +val=NA +for(i in 1:n){ +if(px==1)val[i]=runhat(xy[-i,1:px],xy[-i,px1],x[i,1:px],fr=fr,nmin=nmin,est=est) +if(px>1)val[i]=rung3hat(xy[-i,1:px],xy[-i,px1],pts=t(as.matrix(x[i,1:px])),fr=fr,est=est,...)$rmd +} +dif=y-val +dif=elimna(dif) +pe=varfun(dif) +nopre=locCV(y,varfun=varfun,locfun=est,...)# no predictor +rat=(nopre-pe)/nopre +str=0 +if(rat>0)str=sqrt(rat) +list(str=str,pred.error=pe) +} + + +clnorm<-function(n,epsilon=.1,k=10){ +# +# generate n observations from a contaminated lognormal +# distribution +# +# Using default values, median is approximately 1.14 and 20% trimmed mean is 1.33 +if(epsilon>1)stop('epsilon must be less than or equal to 1') +if(epsilon<0)stop('epsilon must be greater than or equal to 0') +if(k<=0)stop('k must be greater than 0') +val<-rlnorm(n) +uval<-runif(n) +flag<-(uval<=1-epsilon) +val[!flag]<-k*val[!flag] +val +} +twoKlin<-function(x=NULL,x1=NULL,x2=NULL,tr=.2,alpha=.05,pr=TRUE,opt=1){ +# +# A step-down MCP based on K independent tests. +# It is essential that the tests are independent. +# +# Use Fisher method based on p-values coupled with Hochberg +# +# Data are assumed to be stored in two R variables, x1 and x2 or in one +# R variable, x +# +# If stored in x1 and x2, they are assumed to be matrices with K columns +# or to have list mode, both having length K. +# +# If the data are stored in x, +# x is assumed to have 2K columns if a matrix or length 2K if it has list mode. +# +# If data are stored in x1 and x2, for each column, compute a p-value. +# That is, perform a test based on the data in column 1 of x1 and x2, +# followed by a test using the data in column 2 of x1 and x2, etc. +# +# If data are stored in x, the first test is based +# on the data in columns 1 and K+1, +# the second test is based on columns 2 and K+2, etc. +# +# opt=1 Fisher's method +# opt=2 Chen-Nadarajah method +# opt=3 Max method +# +if(is.null(x[1])){ +if(is.matrix(x1))x=cbind(x1,x2) +if(is.list(x1))x=c(x1,x2) +} +if(is.matrix(x))x=listm(x) +crit=NA +n1=NA +n2=NA +if(is.matrix(x) || is.data.frame(x))K2=ncol(x) +if(is.list(x))K2=length(x) +K=floor(K2/2) +if(2*K!=K2)stop('Total number of groups, K2, should be an even number') +ic=0 +ic2=K +pv=NULL +for(i in 1:K){ +ic=ic+1 +ic2=ic2+1 +testit=yuen(x[[ic]],x[[ic2]],tr=tr,alpha=alpha) +n1[ic]=testit$n1 +n2[ic]=testit$n2 +pv[ic]=testit$p.value +} +pick=NULL +v=order(pv) +ic=0 +for(i in K:1){ +K2=2*K +flag=TRUE +if(opt==1){ +i2=i*2 +if(i==K)res=(0-2)*sum(log(pv)) # Fisher test statistic +if(ialpha)flag=TRUE +if(pvF<=alpha/(K+1-i)){ +ic=ic+1 +pick=c(pick,v[ic]) +flag=FALSE +if(pv[v[ic]]>alpha)flag=TRUE +} +if(flag)break +} +Decision=rep('Not Sig',length(pv)) +if(!is.null(pick))Decision[pick]='Reject' +nsig=sum(length(pick)) +list(n1=n1,n2=n2,p.values=pv, +Decisions=as.matrix(Decision),num.sig=nsig) +} + + + + +twobicipv<-function(r1=sum(x),n1=length(x),r2=sum(y),n2=length(y),x=NA,y=NA,alpha=.05){ +# +# Compute a p-value based on Beal's method for comparing two independent +# binomials. +# +alph=seq(.001,.999,.001) +for(i in 1:length(alph)){ +pv=alph[i] +chk=twobici(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=y,alpha=alph[i])$ci #$ +if(chk[1]>0 && chk[2]>0)break +if(chk[1]<0 && chk[2]<0)break +} +reg=twobici(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=y,alpha=alpha) +list(p.value=pv,ci=reg$ci,p1=reg$p1,p2=reg$p2) +} + +ols2ci<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=05, +method='hoch',SO=TRUE,HC3=FALSE,plotit=TRUE,xlab='X',ylab='Y',...){ +# +# Compare the OLS regression parameters for two independent groups +# SO=TRUE means p-values adjusted only for the slopes. SO=FALSE +#. include the intercept when adjusting +# +p=ncol(as.matrix(x1)) +p1=p+1 +if(p==1 && plotit)reg2plot(x1,y1,x2,y2,xlab='X',ylab='Y',xout=xout,outfun=outpro,regfun=ols,...) +m1=elimna(cbind(x1,y1)) +m2=elimna(cbind(x2,y2)) +x1=m1[,1:p] +y1=m1[,p1] +x2=m2[,1:p] +y2=m2[,p1] +x=list() +y=list() +x[[1]]=x1 +x[[2]]=x2 +y[[1]]=y1 +y[[2]]=y2 +ivl=c(1:ncol(as.matrix(x1))) +iv=ncol(as.matrix(x1)) +iv1=iv+1 +rlab=paste('slope',ivl) +rlab=c('intercept',rlab) +res=olsWmcp(x,y,xout=xout,outfun=outfun,STAND=STAND,alpha=alpha,HC3=HC3) +outp=matrix(NA,nrow=nrow(res$output),ncol=7) +dimnames(outp)=list(rlab,c('Est.1','Est.2','Dif','ci.low','ci.up','p.value','adj.p.value')) +print(res) +outp[,1]=ols(x1,y1,xout=xout,outfun=outfun)$coef +outp[,2]=ols(x2,y2,xout=xout,outfun=outfun)$coef +outp[,3]=outp[,1]-outp[,2] +outp[,4]=res$output[,3] +outp[,5]=res$output[,4] +outp[,6]=res$output[,5] +if(!SO)outp[,7]=p.adjust(outp[,6],method=method) +else outp[2:p1,7]=p.adjust(outp[2:p1,6],method=method) +list(n=res$n,output=outp) +} + + +ancovampG<-function(x1,y1,x2,y2,fr1=1,fr2=1, tr=.2, +alpha=.05, pts=NULL,SEED=TRUE,test=yuen,DH=FALSE,FRAC=.5,cov.fun=skip.cov,ZLIM=TRUE, +pr=FALSE,q=.5,plotit=FALSE,LP=FALSE,theta=50,xlab=' X1',ylab='X2 ',SCAT=FALSE,zlab='p.value ',ticktype='detail',...){ +# +# ANCOVA: +# +# This function generalizes the R function ancovamp +# so that any hypothesis testing method +# can be used to compare groups at specified design points. +# +# No parametric assumption is made about the form of +# the regression surface--a running interval smoother is used. +# Design points are chosen based on depth of points in x1 if pts=NULL +# Assume data are in x1 y1 x2 and y2, can have more than one covariate +# +# test: argument test determines the method that will be used to compare groups. +# two choices: yuen, qcomhd qcomhdMC +# Example: test=qcomhd would compare medians using a percentile bootstrap +# q: controls the quantile used by qcomhd. +# +# pts: a matrix of design points at which groups are compared +# +# DH=TRUE, groups compared at the deepest (1-FRAC) design points. +# if DH=TRUE, there are two covariates and plot=TRUE, plot a smooth with dependent variable=p.values if pv=TRUE +# or the estimated difference in the measures of location if pv=FALSE +# If SCAT=TRUE, instead create a scatterplot of the points used in pts, the covariate values +# and mark the significant ones with * +# +# theta can be use to rotate the plot. +# +# SEED=TRUE sets the seed for the random number generator +# so that same result is always returned when +# using a bootstrap method or when using cov.mve or cov.mcd +# +# cov.fun: returns covariance matrix in $cov (e.g. +# skipcov does not return it in $cov, but skip.cov does. So cov.mve could be used) +# +# Returns: +# designs points where comparisons were made. +# n's used, p-values +# crit.p.value: critical p-value based on Hochberg's method for controlling FWE +# sig=1 if a signficant result based on Hochberg; 0 otherwise +# +t.sel=0 +if(identical(test,yuen))t.sel=1 +if(identical(test,qcomhd))t.sel=2 +if(identical(test,qcomhdMC))t.sel=2 +if(identical(test,binom2g))t.sel=3 +if(t.sel==0)stop('Argument test should be either yuen, qcomhd, qcomhd or binom2g') +x1=as.matrix(x1) +p=ncol(x1) +p1=p+1 +m1=elimna(cbind(x1,y1)) +x1=m1[,1:p] +y1=m1[,p1] +x2=as.matrix(x2) +p=ncol(x2) +p1=p+1 +m2=elimna(cbind(x2,y2)) +x2=m2[,1:p] +y2=m2[,p1] +# +# +# +if(is.null(pts[1])){ +x1<-as.matrix(x1) +pts<-ancdes(x1,DH=DH,FRAC=FRAC) +pts=unique(pts) +} +pts<-as.matrix(pts) +n1<-1 +n2<-1 +vecn<-1 +mval1<-cov.fun(x1) +mval2<-cov.fun(x2) +for(i in 1:nrow(pts)){ +n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)]) +n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)]) +} +flag<-rep(TRUE,nrow(pts)) +for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F +flag=as.logical(flag) +pts<-pts[flag,] +if(sum(flag)==1)pts<-t(as.matrix(pts)) +dd=NULL +if(sum(flag)==0){ +print('No comparable design points found, might increase span.') +pts=NULL +mat=NULL +dd=NULL +} +if(sum(flag)>0){ +mat<-matrix(NA,nrow(pts),6) +mat[,5]=0 +dimnames(mat)<-list(NULL,c('n1','n2','p.value','crit.p.value','Sig','est.dif')) +output=list() +for (i in 1:nrow(pts)){ +g1<-y1[near3d(x1,pts[i,],fr1,mval1)] +g2<-y2[near3d(x2,pts[i,],fr2,mval2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +temp=NULL +if(identical(test,qcomhd))temp=qcomhd(g1,g2,q=q,plotit=FALSE) +if(identical(test,qcomhdMC))temp=qcomhdMC(g1,g2,q=q,plotit=FALSE) +if(identical(test,yuen))temp=yuen(g1,g2,tr=tr) +if(identical(test,binom2g))temp=binom2g(x=g1,y=g2) +if(is.null(temp$p.value))print('Argument test should be yuen, or qcomhd or qcomhdMC') +mat[i,3]=temp$p.value +output[[i]]=temp +mat[i,1]<-length(g1) +mat[i,2]<-length(g2) +if(t.sel==1)mat[i,6]=mean(g1,tr=tr)-mean(g2,tr=tr) +if(t.sel==2)mat[i,6]=hd(g1,q=q)-hd(g2,q=q) +if(t.sel==3)mat[i,6]=mean(g1)-mean(g2) +if(length(g1)<=5)print(paste('Warning, there are',length(g1),' points corresponding to the design point X=',pts[i,])) +if(length(g2)<=5)print(paste('Warning, there are',length(g2),' points corresponding to the design point X=',pts[i,])) +} +npt=nrow(pts) +dvec=alpha/c(1:npt) +temp2<-order(0-mat[,3]) +sigvec<-(mat[temp2,3]>=dvec) +dd=0 +if(sum(sigvec)0)mat[flag,5]=1 +} +if(plotit){ +if(!LP){ +library(scatterplot3d) +scatterplot3d(pts[,1],pts[,2],mat[,3],xlab=xlab, ylab=ylab,zlab='p.value',zlim=c(0,1)) +} +if(LP)lplot(pts,mat[,3],xlab=xlab, ylab=ylab,zlab='p.value',theta=theta,ZLIM=ZLIM,ticktype=ticktype) +} +list(points=pts,results=mat,num.sig=dd) +} + +anclog<-function(x1,y1,x2,y2,fr1=1,fr2=1, +alpha=.05, pts=NULL,SEED=TRUE,DH=FALSE,FRAC=.5,cov.fun=skip.cov, +pr=FALSE,q=.5,plotit=FALSE,pv=FALSE,theta=50,xlab=' ',ylab=' ',SCAT=FALSE,zlab=' ',...){ +res=ancovampG(x1=x1,y1=y1,x2=x2,y2=y2,fr1=fr1,fr2=fr2, tr=.2, +alpha=alpha, pts=pts,SEED=SEED,test=twobinom,DH=DH,FRAC=FRAC,cov.fun=cov.fun, +pr=pr,q=q,plotit=plotit,pv=pv,theta=theta,xlab=xlab,ylab=ylab,SCAT=SCAT,zlab=zlab,...) +list(points=res$points,results=res$results,num.sig=res$num.sig) +} + +Qregci<-function(x,y,nboot=100,alpha=.05, +qval=.5,q=NULL,SEED=TRUE,pr=TRUE,xout=FALSE,outfun=outpro,...){ +# +# Test the hypothesis that the quantile regression slopes are zero. +# Can use the .5 quantile regression line only, +# the .2 and .8 quantile regression lines, or +# the .2, .5 and .8 quantile regression lines. +# In the latter two cases, FWE is controlled for alpha=.1, .05, .025 and .01. +# +if(!is.null(q))qval=q +xx<-elimna(cbind(x,y)) +np<-ncol(xx) +p<-np-1 +y<-xx[,np] +x<-xx[,1:p] +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +x<-as.matrix(x) +n<-length(y) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print("Taking bootstrap samples. Please wait.") +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +# determine critical value. +crit<-NA +if(alpha==.1)crit<-1.645-1.19/sqrt(n) +if(alpha==.05)crit<-1.96-1.37/sqrt(n) +if(alpha==.025)crit<-2.24-1.18/sqrt(n) +if(alpha==.01)crit<-2.58-1.69/sqrt(n) +crit.fwe<-crit +if(length(qval)==2 || p==2){ +if(alpha==.1)crit.fwe<-1.98-1.13/sqrt(n) +if(alpha==.05)crit.fwe<-2.37-1.56/sqrt(n) +if(alpha==.025)crit.fwe<-2.60-1.04/sqrt(n) +if(alpha==.01)crit.fwe<-3.02-1.35/sqrt(n) +} +if(length(qval)==3 || p==3){ +if(alpha==.1)crit.fwe<-2.145-1.31/sqrt(n) +if(alpha==.05)crit.fwe<-2.49-1.49/sqrt(n) +if(alpha==.025)crit.fwe<-2.86-1.52/sqrt(n) +if(alpha==.01)crit.fwe<-3.42-1.85/sqrt(n) +} +if(is.na(crit.fwe)){ +print("Could not determine a critical value") +print("Only alpha=.1, .05, .025 and .01 are allowed") +} +if(p==1){ +bvec<-apply(data,1,Qindbt.sub,x,y,q=qval) +estsub<-NA +for(i in 1:length(qval)){ +estsub[i]<-Qreg(x,y,q=qval[i])$coef[2] +} +if(is.matrix(bvec))se.val<-sqrt(apply(bvec,1,FUN=var)) +if(!is.matrix(bvec))se.val<-sqrt(var(bvec)) +test<-abs(estsub)/se.val +ci.mat<-matrix(nrow=length(qval),ncol=3) +dimnames(ci.mat)<-list(NULL,c("Quantile","ci.lower","ci.upper")) +ci.mat[,1]<-qval +ci.mat[,2]<-estsub-crit*se.val +ci.mat[,3]<-estsub+crit*se.val +} +if(p>1){ +if(length(qval)>1){ +print("With p>1 predictors,only the first qval value is used") +} +bvec<-apply(data,1,regboot,x,y,regfun=Qreg,qval=qval[1]) +se.val<-sqrt(apply(bvec,1,FUN=var)) +estsub<-Qreg(x,y,q=qval[1])$coef +test<-abs(estsub)/se.val +ci.mat<-matrix(nrow=np,ncol=3) +dimnames(ci.mat)<-list(NULL,c("Predictor","ci.lower","ci.upper")) +ci.mat[,1]<-c(0:p) +ci.mat[,2]<-estsub-crit*se.val +ci.mat[,3]<-estsub+crit*se.val +} +list(n=length(y),test=test,se.val=se.val,crit.val=crit,crit.fwe=crit.fwe,est.values=estsub,ci=ci.mat) +} + + + + +Qindbt.sub<-function(isub,x,y,qval){ +# +# Perform regression using x[isub] to predict y[isub] +# isub is a vector of length n, +# a bootstrap sample from the sequence of integers +# 1, 2, 3, ..., n +# +# This function is used by other functions when computing +# bootstrap estimates. +# +# +# x is assumed to be a matrix containing values of the predictors. +# +xmat<-matrix(x[isub,],nrow(x),ncol(x)) +regboot<-NA +for(i in 1:length(qval)){ +regboot[i]<-Qreg(xmat,y[isub],q=qval[i])$coef[2] +} +regboot +} +binomcipv<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05,nullval=.5){ +# Compute a p-value when testing the hypothesis that the probability of +# success for a binomial distribution is equal to +# nullval, which defaults to .5 +# Pratt's method is used. +# +# y is a vector of 1s and 0s. +# Or can use the argument +# x = the number of successes observed among +# n=nn trials. +# +if(is.logical(y)){ +y=elimna(y) +temp=rep(0,length(y)) +temp[y]=1 +y=temp +} +res=binomci(x=x,nn=nn,y=y,alpha=alpha) +ci=res$ci +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-binomci(x=x,nn=nn,y=y,alpha=alph[i])$ci +if(chkit[1]>nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]2)stop('One or two predictors only is allowed,') +p=ncol(x) +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +if(xout){ +xy=cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag=outfun(x)$keep +x=xy[flag,1:p] +y=xy[flag,p1] +} +if(p==1){ +plot(x,y,xlab=xlab,ylab=ylab) +abline(regfun(x,y,...)$coef) +} +if(p==2){ +pyhat=regYhat(x,y,regfun=regfun,...) +temp=rplot(x,pyhat,scat=FALSE,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,pr=FALSE) +} +} +FisherLSD<-function(x,alpha=.05){ +# +# Perform Fisher's LSD method +# x is assumed to be a matrix, or data frame, or to have list mode +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +n=lapply(x,length) +J=length(x) +g=NULL +X=NULL +for(j in 1:J){ +g=c(g,rep(j,n[j])) +X=c(X,x[[j]]) +} +FT=anova1(x) +res=NULL +if(FT$p.value<=alpha)res=pairwise.t.test(X,g,p.adjust.method='none') +list(ANOVA_F_p.value=FT$p.value,LSD=res) +} +dat2form<-function(x,alpha=.05){ +# +# Perform Fisher's LSD method +# x is assumed to be a matrix, or data frame, or to have list mode +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +n=lapply(x,length) +J=length(x) +g=NULL +X=NULL +for(j in 1:J){ +g=c(g,rep(j,n[j])) +X=c(X,x[[j]]) +} +g=as.factor(g) +list(x=X,g=g) +} +T.HSD<-function(x,alpha=.05,plotit=FALSE){ +# +# Perform Tukey--Kramer MCP +# +z=dat2form(x) +temp=aov(z$x~as.factor(z$g)) +v=TukeyHSD(temp,conf.level=1-alpha) +if(plotit)plot(v) +v +} +Scheffe<-function(x,con=0,alpha=.05,WARN=TRUE){ +# +# Scheffe's MCP +# +# The data are assumed to be stored in $x$ in list mode, a matrix +# or a data frame. If in list mode, +# length(x) is assumed to correspond to the total number of groups. +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# Missing values are automatically removed. +# +# +if(WARN)print('WARNING: Suggest using lincon instead') +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +con<-as.matrix(con) +J<-length(x) +n=NA +xbar<-NA +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +n[j]=length(x[[j]]) +xbar[j]<-mean(x[[j]]) +} +N=sum(n) +df2=N-J +AOV=anova1(x) +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +df1=J-1 +psihat<-matrix(0,CC,6) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', +'p.value')) +test<-matrix(NA,CC,5) +dimnames(test)<-list(NULL,c('Group','Group','test','crit','se')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt((J-1)*AOV$MSWG*(1/n[j]+1/n[k])) +sejk<-sqrt((CC-1)*AOV$MSWG*(1/n[j]+1/n[k])) +test[jcom,5]<-sqrt(AOV$MSWG*(1/n[j]+1/n[k])) +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +psihat[jcom,6]<-1-pf(test[jcom,3]^2,df1,df2) +crit=sqrt(qf(1-alpha,df1,df2)) +test[jcom,4]<-crit +psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk +psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x)){ +stop('The number of groups does not match the number of contrast coefficients.') +} +psihat<-matrix(0,ncol(con),5) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', +'p.value')) +test<-matrix(0,ncol(con),4) +dimnames(test)<-list(NULL,c('con.num','test','crit','se')) +df1<-nrow(con)-1 +df2=N-nrow(con) +crit=sqrt(qf(1-alpha,df1,df2)) +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(df1*AOV$MSWG*sum(con[,d]^2/n)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +test[d,3]<-crit +test[d,4]=sqrt(AOV$MSWG*sum(con[,d]^2/n)) +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +psihat[d,5]<-(1-pf(test[d,2]^2,df1,df2)) +} +} +list(n=n,test=test,psihat=psihat) +} + +sintmcp<-function(x, con=0, alpha=0.05){ +# +# Dependent groups +# Multiple comparisons using medians on difference scores +# +flagcon=F +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +con<-as.matrix(con) +J<-ncol(x) +#xbar<-NULL +x<-elimna(x) # Remove missing values +nval<-nrow(x) +if(sum(con^2!=0))CC<-ncol(con) +if(sum(con^2)==0)CC<-(J^2-J)/2 +ncon<-CC +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(sum(con^2)==0){ +flagcon<-TRUE +psihat<-matrix(0,CC,7) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper','p.value','p.crit')) +temp1<-0 +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +dv<-x[,j]-x[,k] +temp=sintv2(dv,pr=FALSE) +temp1[jcom]<-temp$p.value +psihat[jcom,1]<-j +psihat[jcom,2]<-k +psihat[jcom,3]<-median(dv) +psihat[jcom,4]<-temp$ci.low +psihat[jcom,5]<-temp$ci.up +psihat[jcom,6]<-temp$p.value +}}} +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(psihat[temp2,6]>=zvec) +dd=0 +if(sum(sigvec)0){ +if(nrow(con)!=ncol(x))warning('The number of groups does not match the number + of contrast coefficients.') +ncon<-ncol(con) +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper','p.value','p.crit')) +temp1<-NA +for (d in 1:ncol(con)){ +psihat[d,1]<-d +for(j in 1:J){ +if(j==1)dval<-con[j,d]*x[,j] +if(j>1)dval<-dval+con[j,d]*x[,j] +} +temp=sintv2(dval,pr=FALSE) +temp1[d]=temp$p.value +psihat[d,5]=temp$p.value +psihat[d,2]<-median(dval) +psihat[d,3]<-temp$ci.low +psihat[d,4]<-temp$ci.up +} +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(psihat[temp2,5]>=zvec) +psihat[temp2,6]<-zvec +dd=0 +if(sum(sigvec)0)x[1:N,1:3]=NA +data[[i]]=x +} +if(MC){ +library(parallel) +res=mclapply(data,anc2COV.sub,FRAC=FRAC,TPM=TPM,tau=tau) +} +if(!MC)res=lapply(data,anc2COV.sub,FRAC=FRAC,TPM=TPM,tau=tau) +M=as.vector(list2mat(res)) +M=sort(M) +ic=round(alpha*iter) +crit=M[ic] +list(crit.val=crit,M=M) +} + +anc2COV.sub<-function(data,FRAC=.5,TPM=FALSE,tau=.05){ +val=ancovampG(data[,1:2],data[,3],data[,4:5],data[,6],DH=TRUE,SEED=FALSE,test=yuen,FRAC=FRAC,cov.fun=covl)$results[,3] +val=elimna(val) +if(!TPM)val=mean(val,na.rm=TRUE) +if(TPM)val=sum(log(val[val<=tau])) +val +} +covl<-function(x){ +res=cov(x) +list(cov=res) +} + +twoDcorR_sub<-function(data,x,y,corfun=wincor,...){ +# +# Used by TwoDcorR +# +rv=corfun(x[data,1],y[data],...)$cor +rv[2]=corfun(x[data,2],y[data],...)$cor +rv +} + + +################################################## +# R code written by: # +# # +# Jimmy A. Doi (jdoi@calpoly.edu) # +# Department of Statistics # +# Cal Poly State Univ, San Luis Obispo # +# Web: www.calpoly.edu/~jdoi # +# # +# ............................................ # +# # +# If using please cite: # +# # +# Schilling, M., Doi, J. # +# "A Coverage Probability Approach to Finding # +# an Optimal Binomial Confidence Procedure", # +# The American Statistician, 68, 133-145. # +# # +# ............................................ # +# # +# Shiny app site: jdoi.shinyapps.io/LCO-CI # +# # +# ............................................ # +# # +# Code updated on: 1AUG2014 # +################################################## + + +############################################################################## +# The function LCO.CI() generates the LCO confidence intervals # +# for X = 0, 1, ..., n for a specified n and confidence level. # +# # +# Example: To generate all LCO confidence intervals at n=20, # +# level=.90, and 3rd decimal place accuracy, use # +# # +# > LCO.CI(20,.90,3) # +############################################################################## + + +LCO.CI <- function(n,level,dp) +{ + + # For desired decimal place accuracy of dp, search on grid using (dp+1) + # accuracy then round final results to dp accuracy. + iter <- 10**(dp+1) + + p <- seq(0,.5,1/iter) + + + ############################################################################ + # Create initial cpf with AC[l,u] endpoints by choosing coverage + # probability from highest acceptance curve with minimal span. + + + cpf.matrix <- matrix(NA,ncol=3,nrow=iter+1) + colnames(cpf.matrix)<-c("p","low","upp") + + for (i in 1:(iter/2+1)){ + p <- (i-1)/iter + + bin <- dbinom(0:n,n,p) + x <- 0:n + pmf <- cbind(x,bin) + + # Binomial probabilities ordered in descending sequence + pmf <- pmf[order(-pmf[,2],pmf[,1]),] + pmf <- data.frame(pmf) + + # Select the endpoints (l,u) such that AC[l,u] will + # be at least equal to LEVEL. The cumulative sum of + # the ordered pmf will identify when this occurs. + m.row <- min(which((cumsum(pmf[,2])>=level)==TRUE)) + low.val <-min(pmf[1:m.row,][,1]) + upp.val <-max(pmf[1:m.row,][,1]) + + cpf.matrix[i,] <- c(p,low.val,upp.val) + + # cpf flip only for p != 0.5 + + if (i != iter/2+1){ + n.p <- 1-p + n.low <- n-upp.val + n.upp <- n-low.val + + cpf.matrix[iter+2-i,] <- c(n.p,n.low,n.upp) + } + } + + + ############################################################################ + # LCO Gap Fix + # If the previous step yields any violations in monotonicity in l for + # AC[l,u], this will cause a CI gap. Apply fix as described in Step 2 of + # algorithm as described in paper. + + # For p < 0.5, monotonicity violation in l can be determined by using the + # lagged difference in l. If the lagged difference is -1 a violation has + # occurred. The NEXT lagged difference of +1 identifies the (l,u) pair to + # substitute with. The range of p in violation would be from the lagged + # difference of -1 to the point just before the NEXT lagged difference of + # +1. Substitute the old (l,u) with updated (l,u) pair. Then make required + # (l,u) substitutions for corresponding p > 0.5. + + # Note the initial difference is defined as 99 simply as a place holder. + + diff.l <- c(99,diff(cpf.matrix[,2],differences = 1)) + + if (min(diff.l)==-1){ + + for (i in which(diff.l==-1)){ + j <- min(which(diff.l==1)[which(diff.l==1)>i]) + new.low <- cpf.matrix[j,2] + new.upp <- cpf.matrix[j,3] + cpf.matrix[i:(j-1),2] <- new.low + cpf.matrix[i:(j-1),3] <- new.upp + } + + # cpf flip + pointer.1 <- iter - (j - 1) + 2 + pointer.2 <- iter - i + 2 + + cpf.matrix[pointer.1:pointer.2,2]<- n - new.upp + cpf.matrix[pointer.1:pointer.2,3]<- n - new.low + } + + + ############################################################################ + # LCO CI Generation + + ci.matrix <- matrix(NA,ncol=3,nrow=n+1) + rownames(ci.matrix) <- c(rep("",nrow(ci.matrix))) + colnames(ci.matrix) <- c("x","lower","upper") + + # n%%2 is n mod 2: if n%%2 == 1 then n is odd + # n%/%2 is the integer part of the division: 5/2 = 2.5, so 5%/%2 = 2 + + if (n%%2==1) x.limit <- n%/%2 + if (n%%2==0) x.limit <- n/2 + + for (x in 0:x.limit) + { + num.row <- nrow(cpf.matrix[(cpf.matrix[,2]<=x & x<=cpf.matrix[,3]),]) + + low.lim <- + round(cpf.matrix[(cpf.matrix[,2]<=x & x<=cpf.matrix[,3]),][1,1], + digits=dp) + + upp.lim <- + round(cpf.matrix[(cpf.matrix[,2]<=x & x<=cpf.matrix[,3]),][num.row,1], + digits=dp) + + ci.matrix[x+1,]<-c(x,low.lim,upp.lim) + + # Apply equivariance + n.x <- n-x + n.low.lim <- 1 - upp.lim + n.upp.lim <- 1 - low.lim + + ci.matrix[n.x+1,]<-c(n.x,n.low.lim,n.upp.lim) + } + + + heading <- matrix(NA,ncol=1,nrow=1) + + heading[1,1] <- + paste("LCO Confidence Intervals for n = ",n," and Level = ",level,sep="") + + rownames(heading) <- c("") + colnames(heading) <- c("") + +# print(heading,quote=FALSE) + + # print(ci.matrix) +ci.matrix +} + + +############################################################################## +# The function LCO.CI() generates the LCO confidence intervals # +# for X = 0, 1, ..., n for a specified n and confidence level. # +# # +# Example: To generate all LCO confidence intervals at n=20, # +# level=.90, and 3rd decimal place accuracy, use # +# # +# > LCO.CI(20,.90,3) # +############################################################################## + +binomLCO<-function (x = sum(y), nn = length(y), y = NULL, n = NA, alpha = 0.05){ +# +# Compute a confidence interval for the probability of success using the method in +# +# Schilling, M., Doi, J. (2014) +# A Coverage Probability Approach to Finding +# an Optimal Binomial Confidence Procedure, +# The American Statistician, 68, 133-145. +# +if(!is.null(y)){ +y=elimna(y) +nn=length(y) +} +if(nn==1)stop('Something is wrong: number of observations is only 1') +cis=LCO.CI(nn,1-alpha,3) +ci=cis[x+1,2:3] +list(phat=x/nn,ci=ci,n=nn) +} + +twoDcorR<-function(x,y,corfun=wincor,alpha=.05,nboot=500,SEED=TRUE,MC=FALSE,outfun=outpro,...){ +# +# Comparing two robust dependent correlations: Overlapping case +# Winsorized correlation is used by default. +# +# x is assumed to be a matrix with 2 columns +# +# Compare correlation of x[,1] with y to x[,2] with y +# +# The confidence interval is returned in ci +# The estimates of the correlations are returned in est.rho1 and est.rho2 +# +if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') +if(ncol(x)!=2)stop('Argument x should have two columns') +m1=cbind(x,y) +m1<-elimna(m1) # Eliminate rows with missing values +nval=nrow(m1) +x<-m1[,1:2] +y=m1[,3] +est<-cor2xy(x,y,corfun=corfun,...)$cor +r12=est[1] +r13=est[2] +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +# If you use corfun=scor, set plotit=F +# +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(MC){ +library(parallel) +bvec<-mclapply(data,twoDcorR_sub,x,y,corfun,...) +} +if(!MC)bvec<-lapply(data,twoDcorR_sub,x,y,corfun,...) +mat=matrix(NA,nrow=nboot,ncol=2) +for(i in 1:nboot)mat[i,]=bvec[[i]] +ihi<-floor((1-alpha/2)*nboot+.5) +ilow<-floor((alpha/2)*nboot+.5) +bsort<-sort(mat[,1]-mat[,2]) +ci12<-1 +ci12[1]<-bsort[ilow] +ci12[2]<-bsort[ihi] +pv=mean(bsort<0)+.5*mean(bsort==0) +pv=2*min(c(pv,1-pv)) +list(est.rho1=r12,est.rho2=r13,ci=ci12,p.value=pv) +} + +spearci<-function(x,y,nboot=1000,alpha=.05,SEED=TRUE,MC=FALSE){ +if(!MC)res=corb(x,y,corfun=spear,nboot=nboot,alpha=alpha,SEED=SEED) +if(MC)res=corbMC(x,y,corfun=spear,nboot=nboot,alpha=alpha,SEED=SEED) +res +} +tauci<-function(x,y,nboot=1000,alpha=.05,SEED=TRUE,MC=FALSE){ +if(!MC)res=corb(x,y,corfun=tau,nboot=nboot,alpha=alpha,SEED=SEED) +if(MC)res=corbMC(x,y,corfun=tau,nboot=nboot,alpha=alpha,SEED=SEED) +res +} +tscor<-function(x,y,xout = FALSE, outfun = out, varfun = winvar, +WARN = TRUE, HD = FALSE, ...){ +# +# Correlation coefficient (explanatory measure of association) +# based on the Theil--Sen estimator +# +# To get a p.value, use the R function corb +# +temp=tsreg(x,y,varfun=varfun,xout=xout,outfun=outfun,HD=HD) +val=sign(temp$coef[2])*temp$Strength.Assoc +list(cor=val) +} + + +tscorci<-function(x,y,nboot=599,alpha=.05,SEED=TRUE,MC=FALSE){ +if(!MC)res=corb(x,y,corfun=tscor,nboot=nboot,alpha=alpha,SEED=SEED) +if(MC)res=corbMC(x,y,corfun=tscor,nboot=nboot,alpha=alpha,SEED=SEED) +res +} +wincorci<-function(x,y,nboot=1000,alpha=.05,SEED=TRUE,MC=FALSE,tr=0.2){ +if(!MC)res=corb(x,y,corfun=wincor,nboot=nboot,alpha=alpha,SEED=SEED,tr=tr) +if(MC)res=corbMC(x,y,corfun=wincor,nboot=nboot,alpha=alpha,SEED=SEED,tr=tr) +res +} +twoDNOV<-function(x,y,corfun=wincor,alpha=.05,nboot=500,SEED=TRUE,MC=FALSE){ +# +# Comparing two robust dependent correlations: Non-overlapping case +# Winsorized correlation is used by default. +# +# Both x and y are assumed to be a matrix with 2 columns +# +# Compare correlation of x[,1] x[,2] to the correlation between +# y[,1] and y[,2] +# +if(nrow(x)!=nrow(y))stop('x and y have different sample sizes; should be equal') +m1=cbind(x,y) +if(ncol(m1)!=4)stop('Both x and y should have two columns') +m1<-elimna(m1) # Eliminate rows with missing values +nval=nrow(m1) +x<-m1[,1:2] +y=m1[,3:4] +r12=corfun(x[,1],x[,2])$cor +r13=corfun(y[,1],y[,2])$cor +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +# If you use corfun=scor, set plotit=F +# +data<-matrix(sample(nrow(y),size=nrow(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(MC){ +library(parallel) +bvec1<-mclapply(data,corbsub,x[,1],x[,2],corfun) +bvec2<-mclapply(data,corbsub,y[,1],y[,2],corfun) +} +if(!MC){ +bvec1<-lapply(data,corbsub,x[,1],x[,2],corfun) +bvec2<-lapply(data,corbsub,y[,1],y[,2],corfun) +} +mat1=matl(bvec1) +mat2=matl(bvec2) +ihi<-floor((1-alpha/2)*nboot+.5) +ilow<-floor((alpha/2)*nboot+.5) +bsort<-sort(mat1-mat2) +ci12<-bsort[ilow] +ci12[2]<-bsort[ihi] +ci12 +pv=mean(bsort<0) +pv=2*min(c(pv,1-pv)) +list(est.rho1=r12,est.rho2=r13,est.dif=r12-r13,ci=ci12,p.value=pv) +} + +comdvar<-function(x,y,alpha=.05){ +# +# Test the hypothesis that two dependent variables have equal variances. +# A heteroscedastic version of the Morgan-Pitman test is used. +# (The HC4 estimator is used to deal with heteroscedasticity) +# +xy=elimna(cbind(x,y)) +est1=var(xy[,1]) +est2=var(xy[,2]) +pv=pcorhc4(xy[,1]-xy[,2],xy[,1]+xy[,2],alpha=alpha) +list(p.value=pv$p.value, est1=est1, est2=est2,test.stat=pv$test.stat) +} +ptests<-function(pv,Fisher=TRUE){ +# +# pv: p-values based on N independent tests +# Test hypothesis that all N null hypotheses are true. +# Fisher=TRUE, use Fisher's method +# Fisher=FALSE, use Chen-Nadarajah method +# +ntests=length(pv) +if(Fisher){ +res=(0-2)*sum(log(pv)) # Fisher test statistic +pvF=1-pchisq(res,2*ntests) #Fisher p-value based on all tests. +} +if(!Fisher){ +res=sum(qnorm(pv/2)^2) # C-N test +pvF=1-pchisq(res,ntests) +} +list(test.stat=res,p.value=pvF) +} +mcpPV<-function(pv,alpha=.05,opt=1){ +# +# pv: A collection of p-values based on independent tests +# +# Perform the step-down multiple comparison method in +# Wilcox, R. R. \& Clark, F. (in press). +# Robust multiple comparisons based on combined +# probabilities from independent tests. Journal of Data Science +# based on K independent p-values +# +# opt=1 Fisher's method +# opt=2 Chen-Nadarajah method +# opt=3 Max method +# +K=length(pv) +pick=NULL +v=order(pv) +ic=0 +for(i in K:1){ +flag=TRUE +if(opt==1){ +i2=i*2 +if(i==K)res=(0-2)*sum(log(pv)) # Fisher test statistic +if(ialpha)flag=TRUE +if(pvF<=alpha/(K+1-i)){ +ic=ic+1 +pick=c(pick,v[ic]) +flag=FALSE +if(pv[v[ic]]>alpha)flag=TRUE +} +if(flag)break +} +Decision=rep('Not Sig',length(pv)) +if(!is.null(pick))Decision[pick]='Reject' +nsig=sum(length(pick)) +list(p.values=pv, +Decisions=as.matrix(Decision),num.sig=nsig) +} +rungenv2<-function(x, y, est = onestep, fr = 1, LP = TRUE, ...){ +# +# Return x and predicted y values not sorted in ascending order, +# rather, keep x as originally entered and return corresponding Yhat values +# +xord=order(x) +res=rungen(x,y,est=est,fr=fr,LP=LP,pyhat=TRUE,plotit=FALSE)$output +res[order(xord)] +} + + +adrun<-function(x,y,est=tmean,iter=10,pyhat=FALSE,plotit=TRUE,fr=1,xlab='X', +ylab='Y',zlab='', +theta=50,phi=25,expand=.5,scale=TRUE,zscale=TRUE,xout=FALSE,eout=xout,outfun=out,ticktype='simple',...){ +# +# additive model based on running interval smoother +# and backfitting algorithm +# +m<-elimna(cbind(x,y)) +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +x=x[flag,] +y=y[flag] +} +x<-as.matrix(x) +p<-ncol(x) +if(p==1)val<-rungen(x[,1],y,est=est,pyhat=TRUE,plotit=plotit,fr=fr, +xlab=xlab,ylab=ylab,...)$output +if(p>1){ +library(MASS) +library(akima) +np<-p+1 +x<-m[,1:p] +y<-m[,np] +fhat<-matrix(NA,ncol=p,nrow=length(y)) +fhat.old<-matrix(NA,ncol=p,nrow=length(y)) +res<-matrix(NA,ncol=np,nrow=length(y)) +dif<-1 +for(i in 1:p) +fhat.old[,i]<-rungenv2(x[,i],y,est=est,pyhat=TRUE,plotit=FALSE,fr=fr,...) +eval<-NA +for(it in 1:iter){ +for(ip in 1:p){ +res[,ip]<-y +for(ip2 in 1:p){ +if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2] +} +fhat[,ip]<-rungenv2(x[,ip],res[,ip],est=est,pyhat=TRUE,plotit=FALSE,fr=fr,...) +} +eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2)))) +if(it > 1){ +itm<-it-1 +dif<-abs(eval[it]-eval[itm]) +} +fhat.old<-fhat +if(dif<.01)break +} +val<-apply(fhat,1,sum) +aval<-est(y-val,...) +val<-val+aval +if(plotit && p==2){ +fitr<-val +iout<-c(1:length(fitr)) +nm1<-length(fitr)-1 +for(i in 1:nm1){ +ip1<-i+1 +for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0 +} +fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane +# This is necessary when doing three dimensional plots +# with the R function interp +mkeep<-x[iout>=1,] +fitr<-interp(mkeep[,1],mkeep[,2],fitr) +persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype) +}} +if(!pyhat)val<-'Done' +val +} +ancov2COV<-function(x1,y1,x2,y2,tr=.2,test=yuen,cr=NULL,pr=TRUE,DETAILS=FALSE,cp.value=FALSE, +plotit=FALSE,xlab='X',ylab='Y',zlab=NULL,span=.75,PV=TRUE,FRAC=.5,MC=FALSE,q=.5, +iter=1000,alpha=.05,TPM=FALSE,tau=.05,est=tmean,fr=1,...){ +# +# ANCOVA two covariates, no parametric assumption about the regression surface. +# Use all design points nested deeply within the cloud of data. +# Global test statistic is the average of the p-values based on +# Yuen's test performed at each of the deepest +# points where comparisons can be made. +# +# TPM=TRUE: replace average of the p-values with the test statistic +# studied by +# Zaykin, D. V., Zhivotovsky, L. A., Westfall, P. H., & Weir, B.S. (2002). +# Truncated product method for combining p-values. +# Genetic Epidemiology 22, 170--185. +# +# x1 and x2 assumed to be a matrix or data frame with two columns +# +# if plotit=TRUE then if +# PV=TRUE create a plot of the p.values as a function of the two covariates +# using LOESS. +# if PV=FALSE, plot the difference between the dependent variable as a function of +# the covariates +# +# By default, Yuen's test is used, but other tests can be used via the argument +# test +# +# pr=TRUE: warning messages will be printed +# +# DETAILS=TRUE: all p.values are reported for all covariate points used. +# +# span: the span used by LOESS + +# fr: span used by rung3hat when estimating the difference between +# predicted Y for group 1 minus predicted Y for group 2. +# +# FRAC is the fraction of least deep covariate points that are ignored +# +# MC=TRUE: use a multicore processor to compute a critical value and global p.value +# +# com.p.value=TRUE: compute p.value based on the global hypothesis of no differences. +# +# iter=1000: number of iterations used to compute a critical value or global p.value +# +# Function returns: +# test.stat: the test statistic. there are two allowed choices: yuen or qcomhd +# crit.p.val: the critical value, reject if test.stat<=crit.p.val +# min.p.val.point: the values of the covariate that had the smallest p-value +# min.p.value: the minimum p-value among all p-values that were computed. +# +com.p.value=cp.value +if(pr)print('Reject if test.stat is less than or equal to crit.value') +if(FRAC<=0 || FRAC >=1)stop('FRAC should be a value between 0 and 1.') +if(ncol(x1)!=2)stop('Should have two covariates') +xy1=elimna(cbind(x1,y1)) +x1=xy1[,1:2] +y1=xy1[,3] +xy2=elimna(cbind(x2,y2)) +x2=xy2[,1:2] +y2=xy2[,3] +n=min(c(nrow(x1),nrow(x2))) +if(n<50){ +if(pr)print('Warning: sample size is less than 50; critical value unknown') +} +if(is.null(cr)){ +if(n>=50 & n<=80)cr=as.vector(regYhat(c(50,75),c(.23,.264),xr=n)) +if(n>80)cr=.27 +} +flag0=is.null(cr) +flag1=FRAC!=.5 +flag3=flag0+flag1+com.p.value+TPM +if(flag3>0){ +comp.pv=anc2COV.CV(nrow(x1),nrow(x2),iter=iter,MC=MC,TPM=TPM,tau=tau) +MV=sort(comp.pv$M) +ic=round(alpha*iter) +cr=MV[ic] +} +DONE=FALSE +if(identical(test,qcomhd)){ +val=ancovampG(x1,y1,x2,y2,DH=TRUE,SEED=TRUE,test=qcomhd,q=q,FRAC=FRAC) +DONE=TRUE +} +if(identical(test,qcomhdMC)){ +val=ancovampG(x1,y1,x2,y2,DH=TRUE,SEED=TRUE,test=qcomhdMC,q=q,FRAC=FRAC) +DONE=TRUE +} +if(!DONE){ +if(identical(test,yuen))val=ancovampG(x1,y1,x2,y2,DH=TRUE,SEED=TRUE,test=yuen,tr=tr,FRAC=FRAC) +#if(!identical(test,yuen))val=ancovampG(x1,y1,x2,y2,DH=TRUE,SEED=TRUE,test=test,FRAC=FRAC,...) +} +est.dif=rung3hat(x1,y1,fr=fr,pts=val$points,est=est)$rmd-rung3hat(x2,y2,pts=val$points,fr=fr,est=est)$rmd +pavg=mean(val$results[,3]) +if(TPM){ +vals=val$results[,3] +vals=elimna(vals) +pavg=sum(log(vals[vals<=tau])) +} +mpv=which(val$results[,3]==min(val$results[,3])) +points=val$points +results=val$results +rem.res=results[mpv,3] +rem.points=points[mpv,] +points=cbind(points,est.dif) +dimnames(points)=list(NULL,c('COV 1','COV 2','EST.DIF')) +if(plotit){ +if(is.null(zlab)){ +if(PV)zlab='P-Value' +if(!PV)zlab='Est.Dif' +} +if(PV)lplot(points[,1:2],results[,3],xlab=xlab,ylab=ylab,zlab=zlab,tick='det',span=span) +if(!PV)lplot(points[,1:2],est.dif,xlab=xlab,ylab=ylab,zlab=zlab,tick='det',span=span) +} +nk=nrow(points) +if(!DETAILS){ +points=NULL +results=NULL +} +pval=NULL +if(com.p.value || TPM)pval=1-mean(pavg<=comp.pv$M) +list(num.points.used=nk,test.stat=pavg,crit.value=cr,GLOBAL.p.value=pval,min.p.val.point=rem.points,min.p.value=rem.res,all.points.used=points,all.results=results[,1:3]) +} + +Qanova<-function(x,q=.5,op=3,nboot=2000,MC=FALSE,SEED=TRUE){ +# +# Test global hypothesis that J independent groups +# have equal medians. +# Performs well when there are tied values. +# +# Basically, use pbadepth in conjunction with the Harrell--Davis +# estimator. +# +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +chkcar=NA +for(j in 1:length(x))chkcar[j]=length(unique(x[[j]])) +if(min(chkcar<20)){ +print('Warning: Sample size is less than') +print('20 for one or more groups. Type I error might not be controlled') +} +output=pbadepth(x,est=hd,q=q,allp=TRUE,SEED=SEED,op=op,nboot=nboot,MC=MC,na.rm=TRUE) +output +} + +runYhat<-function(x,y,pts=NULL,est=tmean,fr=1,nmin=1,xout=FALSE,outfun=outpro,XY.used=FALSE,...){ +# +# Fit a running interval smoother using the data in x and y +# Use the fit to estimate the typical value of Y +# corresponding to the covariates values in pts +# +# pts=NULL means all points in x, after missing values are removed, are used. That is, predict y for each x +# +x<-as.matrix(x) +p=ncol(x) +p1=p+1 +xx<-cbind(x,y) +xx<-elimna(xx) +x=xx[,1:p] +y=xx[,p1] +if(is.null(pts))pts=x +x=as.matrix(x) +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(ncol(x)==1){ +vals=runhat(x[,1],y,pts=pts,est=est,fr=fr,nmin=nmin,...) +nvals=1 +for(i in 1:length(pts)){ +nvals[i]<-length(y[near(x[,1],pts[i],fr=fr)]) +} +} +if(ncol(x)>1){ +temp=rung3hat(x,y,pts=pts,est=est,fr=fr,...) +vals=temp$rmd +nvals=temp$nval +} +XY=NULL +if(XY.used)XY=cbind(x,y) +list(Y.hat=vals,nvals=nvals,xy.used=XY,pts.used=pts) +} + +rplot.pred=runYhat + +wmwpb<-function(x,y=NULL,est=median,alpha=.05,nboot=2000,SEED=TRUE,pr=TRUE, +na.rm=TRUE,...){ +# +# Compute a bootstrap confidence interval for a +# measure of location associated with +# the distribution of x-y, +# est indicates which measure of location will be used +# x and y are possibly dependent +# +# loc2dif.ci computes a non-bootstrap confidence interval +# +if(is.null(y[1])){ +if(!is.matrix(x) & !is.data.frame(x))stop('With y missing, x should be a matrix') +y=x[,2] +x=x[,1] +} +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data1<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-NA +for(i in 1:nboot)bvec[i]<-wmwloc(x[data1[i,]],y[data2[i,]],est=est,na.rm=na.rm,...) +bvec<-sort(bvec) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) +sig.level<-2*(min(temp,1-temp)) +estdiff=wmwloc(x,y,est=est,na.rm=na.rm,...) +list(estimate=estdiff,ci=c(bvec[low],bvec[up]),p.value=sig.level) +} +idealfIQR<-function(x){ +# +# Compute the interquartile range using the ideal fourths. +x=elimna(x) +res=idealf(x)$qu-idealf(x)$ql +res +} +conCON<-function(J,conG=1){ +# +# Create contrast coefficients for comparisons to a controll +# +# J = number of groups including the control group. +# conG = the group that is the control. By default, assume group 1 is the control +# +Jm1=J-1 +A=matrix(rep(1,Jm1^2+Jm1),nrow=J) +A[-conG,]=-1*diag(Jm1) +if(conG>1)A=-1*A +list(conCON=A) +} + +btsqrk<-function(alist,alpha=0.05,tr=0.2){ +#computes B2_tk test statistics for k independent samples. +#alist should be a list type object +#s's are computed by trimse which can be found in all Rallfun files written by Wilcox Rand +k<-length(alist) +# Remove any missing values in alist +for (i in 1:k){alist[[i]]<-alist[[i]][!is.na(alist[[i]])]} +zc<-qnorm(alpha/2) +e=trunc(tr*sapply(alist,length)) +f<-(sapply(alist,length))-(2*e) +s=sapply(alist,trimse,tr=tr)^2 +wden=sum(1/s) +w=(1/s)/wden +yplus<-sum(w*(sapply(alist,mean,trim=tr))) +tt<-((sapply(alist,mean,trim=tr))-yplus)/sqrt(s) +v<-(f-1) +z<-((4*v^2)+(5*((2*(zc^2))+3)/24))/((4*v^2)+v+(((4*(zc^2))+9)/12))*sqrt(v)*(sqrt(log(1+(tt^2/v)))) +teststat<-sum(z^2) +crit<-qchisq(1-alpha,k-1) +bt2pvalue<-1-(pchisq(teststat,k-1)) +list(p.value=bt2pvalue,teststat=teststat,crit=crit,e=e,f=f,s=s,w=w,tt=tt) +} + +t1waybtsqrk<-function(x,alpha=.05,nboot=599,tr=0.2,SEED=TRUE){ +# +# One-way ANOVA for trimmed means, independent groups. +# Uses a method studied by Ozdemir et al. +# +if(SEED)set.seed(2) +B=nboot +if(is.matrix(x))x=listm(x) +x=lapply(x,elimna) +T.test<-btsqrk(x,alpha=alpha,tr=tr)$teststat +means<-c() +ylist<-list(0) +TT<-c() +b<-floor((1-alpha)*B) +means<-sapply(x,mean,tr) +k<-length(x) +for (i in 1:B) +{ + for (j in 1:k) + {ylist[[j]]<-(sample(x[[j]],length(x[[j]]),replace=TRUE)-means[j])} + TT<-c(TT,btsqrk(ylist,alpha,tr)$teststat) +} +TT=sort(TT) +pval<-mean(T<=TT,na.rm=TRUE) +list(test.stat=T.test,crit.value=TT[b],p.value=pval) +} +logSMpred<-function(x,y,pts=NULL,fr=2,LP=TRUE,xout=FALSE,outfun=outpro,...){ +# +# A smoother designed specifically for binary outcomes +# LP=TRUE: With two independent variables, smooth the initial smooth using LOESS +# Return predicted probability of 1 for points in +# pts +# based on the data in x and y +# +# +x=as.matrix(x) +p=ncol(x) +p1=p+1 +xx<-elimna(cbind(x,y)) +x<-xx[,1:p] +y<-xx[,p1] + +n=nrow(xx) +yy=rep(1,n) +vals=sort(unique(y)) +if(length(vals)>2)stop('y should be binary') +flag=y==vals[2] +yy[!flag]=0 +y=yy + +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x=as.matrix(x) +if(is.null(pts))pts=x +pts=as.matrix(pts) +library(MASS) +m=covmve(x) +phat<-NA +m1=matrix(NA,nrow=nrow(pts),ncol=nrow(pts)) +yhat=NULL +for(i in 1:nrow(pts)){ +d<-mahalanobis(x,pts[i,],m$cov) +flag=sqrt(d)<=fr +w=flag*exp(-1*d) +yhat[i]=sum(w*y)/sum(w) +} +yhat +} +idrange<-function(x,na.rm=FALSE){ +# +# Compute the interquartile range based on the ideal fourths. +# +temp=idealf(x,na.rm=na.rm) +res=temp$qu-temp$ql +res +} +prodepth<-function(x,pts=x,ndir=1000,SEED=TRUE){ +# +# Determine an approximation of the projection depth of +# pts in +# x +# using the R package library(DepthProc) +# +# ndir indicates how many randomly chosen projections will be used +# +# Advantage over zoudepth, much faster execution time. +# Should be noted, however, that using the function twice on the same +# data generally results in different values for the depths. +# Setting +# SEED=TRUE +# avoids this. +# +# +if(SEED){ +oldSeed <- .Random.seed +set.seed(45) +} +library(DepthProc) +res=as.vector(depthProjection(pts,x,ndir=ndir)) +if(SEED) { + assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) +} +res +} +zoudepth<-function(x,pts=x, zloc = median, zscale = mad, SEED=TRUE){ +# +# Determine projection depth using the R function zdepth +# The Nelder--Mead method for finding the maximum of a function is used +# +# SEED, included for convenience when this function is used with certain classification techniques. +# +res=1/(1+zdepth(x,pts,zloc,zscale)) +res +} +KNN<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,k=3,prob=TRUE,SEED=NULL){ +# +# Do classification using the kNN method +# +# k: number of nearest neighbors +# +# train is the training set +# test is the test data +# g contains labels for the data in the training set, +# If data for the two groups are stored in +# x1 +# and +# x2, +# the function creates labels for you. +# +# This function removes the need to call library class. +# For more information, use the command ?knn +# +# SEED=NULL, used for convenience when called by other functions that expect SEED +# +library(class) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +}} +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +}} +res=knn(train,test,cl=as.factor(g),k=k,prob=prob) +res +} + +KNNv2<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,k=3,prob=TRUE,SEED=NULL){ +# +# Do classification using the kNN method +# +# +# train is the training set +# test is the test data +# g contains labels for the data in the training set, +# +# This function removes the need to call library class. +# For more information, use the command ?knn +# +# SEED=NULL, used for convenience when called by other functions that expect SEED +# +library(class) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +}} +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +}} +res=knn(train,test,cl=as.factor(g),k=k,prob=prob) +res=as.numeric(as.vector(res))+1 +res +} + +estBetaParams <- function(mu, var) { +# +# Estimate parameters of the beta distribution, r and s, given the mean and variance. + alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2 + beta <- alpha * (1 / mu - 1) +list(r=alpha,s=beta) +} +KNNdist<-function(train=NULL,test=NULL,g,k=3,x1=NULL,x2=NULL,prob=FALSE,plotit=FALSE,SEED=NULL, +xlab='Group 1',ylab='Group 2',depthfun=prodepth,...){ +# +# Do classification using depths and the kNN method. +# Points are transformed to their depth in each group and knn is applied +# using the resulting depth values. +# See Li et al., 2012, DD-classifier: nonparametric classification +# procedure based on DD-plot. Journal of the American Statistical Association, +# 107, 737--753 +# +# depthfun indicates how the depth of a point is computed. +# By default, projection distances are used. +# +# train is the training set +# test is the test data +# g: labels for the data in the training set. +# +# depthfun must be a function having the form depthfun(x,pts). +# That is, compute depth for the points in pts relative to points in x. +# +# SEED: not used here, included for convenience when this function is called by other functions +# +library(class) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +}} +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +}} +g=as.numeric(as.vector(g)) +train=elimna(train) +if(is.null(test))stop('Argument test is null, no data') +test=elimna(test) +train=as.matrix(train) +test=as.matrix(test) +if(ncol(train)!=ncol(test))stop('The first two arguments, train and test, should have the same number of coluimns') +P=ncol(train) +P1=P+1 +xall=as.data.frame(matrix(NA,nrow=nrow(train),ncol=P1)) +xall[,1:P]=train +xall[,P1]=g +xall=elimna(xall) +x1=xall[,1:P] +xall=as.matrix(xall) +x1=as.matrix(x1) +g=as.vector(xall[,P1]) +ids=unique(g) +x2=elimna(test) +x1=as.matrix(x1) +x2=as.matrix(x2) +n=nrow(x1) +n2=nrow(x2) +p=length(ids) +d=matrix(NA,nrow=n,ncol=p) +D=matrix(NA,nrow=n2,ncol=p) +for(i in 1:length(ids)){ +flag=g==ids[i] +d[,i]=depthfun(as.matrix(x1[flag,]),pts=x1,...) +D[,i]=depthfun(as.matrix(x1[flag,]),pts=x2,...) +} +res=NULL +res=knn(d,D,cl=as.factor(g),k=k,prob=prob) +if(plotit){ +if(p==2){ +plot(d[,1],d[,2],xlab=xlab,ylab=ylab,type='n') +flag=g==ids[1] +points(d[flag,1],d[flag,2]) +points(d[!flag,1],d[!flag,2],pch='*') +}} +res=as.numeric(res) +res +} +olshomci<-function(x,y,alpha=.05){ +# +# Computes confidence interval for the slope of the +# least squares regression line assuming homoscedasticity and that there is +# only one independent variable. +# +# Not recommended in practice; use a method that allows heteroscedasticity +# +if(length(x)!=length(y))stop('x and y have different lengths') +temp=ols(x,y) +df=temp$n-2 #degrees of freedom +df +temp$summary +ci=temp$summar[2,1]-qt(1-alpha/2,df)*temp$summary[2,2] +ci[2]=temp$summar[2,1]+qt(1-alpha/2,df)*temp$summary[2,2] +list(ci=ci) +} +qcor<-function(x,y,q=.5,qfun=qest,xout=FALSE,outfun=outpro){ +# +# Compute quantile correlation as in Li, Li and Tsai, JASA 2015 +# +if(xout){ +flag<-outfun(x,plotit=plotit)$keep +x<-x[flag] +y<-y[flag] +} +dif=y-qfun(x,q) +flag=dif<0 +psi=q-flag +qcov=mean(psi*(x-mean(x))) +qc=qcov/sqrt((q-q^2)*var(x)) +list(cor=qc,cov=qcov) +} +stepmcp<-function(x,tr=.2,alpha=.05){ +# +# Step-down MCP method based on trimmed means +# +# x is assumed to have list mode, or a matrix or data with J columns +# J=number of groups. +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +if(J<3)stop('For two groups only, use yuen') +if(J>5)stop('Currently limited to at most five groups') +com=modgen(J) +jp1=J+1 +mout=matrix(NA,nrow=length(com),ncol=3, +dimnames=list(NULL,c('Groups','p-value','p.crit'))) +mout[,3]=alpha +jm2=J-2 +com=com[jp1:length(com)] +mout=mout[jp1:nrow(mout),] +for(i in 1:length(com)){ +nmod=length(com[[i]])-1 +temp=c(nmod:0) +mout[i,1]=sum(com[[i]]*10^temp) +temp=t1way(x[com[[i]]],tr=tr)$p.value +pnum=length(com[[i]]) +pe=1-(1-alpha)^(pnum/J) +if(length(com[[i]])<=jm2)mout[i,3]=pe +mout[i,2]=t1way(x[com[[i]]],tr=tr)$p.value +} +mout +} + + +rfit.est<-function(x,y,xout=FALSE,outfun=outpro,...){ +# +# Fit regression line using rank-based method based +# Jaeckel's dispersion function +# via the R package Rfit +# +library(Rfit) +library(quantreg) +if(xout){ +m<-cbind(x,y) +p1=ncol(m) +p=p1-1 +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +fit=rfitv2(y~x) +fit=fit$coef +list(coef=fit) +} + +Rfit.est=rfit.est + +rfitv2 <- function (formula, data, subset, yhat0 = NULL, + scores = Rfit::wscores, symmetric = FALSE, TAU = 'F0', ...) { + +# Below is taken from quantreg (under GPL) # + call<-match.call() + mf<-match.call(expand.dots=FALSE) + m<-match(c('formula','data','subset'),names(mf),0) + mf<-mf[c(1,m)] + mf[[1]]<-as.name("model.frame") + mf<-eval.parent(mf) +# + + x <- model.matrix(attr(mf, "terms"), data = mf) + if( abs(max(x) - min(x)) < .Machine$double.eps ^ 0.5 ) stop("x cannot only contain an intercept") + x1 <- as.matrix(x[,colnames(x)!='(Intercept)']) + x1 <- as.matrix(cbind(rep(1,nrow(x1)),x1)) + + y <- model.response(mf) + + qrx <- qr(x1) + Q<-as.matrix(qr.Q(qrx)) + q1<-Q[,1] + xq<-as.matrix(Q[,2:qrx$rank]) + + if( is.null(yhat0) ) { + fit0<-suppressWarnings(rq(y~xq-1)) + } else { + fit0 <- lsfit(xq, yhat0, intercept = FALSE) + } +# ord<-order(fit0$resid) + +## 20141211: set initial fit to null model if it has lower dispersion + betahat0 <- fit0$coef + if( disp(betahat0, xq, y, scores) > disp(rep(0,length(betahat0)), xq, y, scores) ) { + betahat0 <- rep(0, length(betahat0) ) + } + ord <- order(y - xq%*%betahat0) +## + + fit <- jaeckel(as.matrix(xq[ord,]), y[ord], betahat0, scores=scores, ...) + if( fit$convergence != 0 ) { + fit2 <- jaeckel(as.matrix(xq[ord,]), y[ord], jitter(fit$par), scores=scores, ...) + if( fit$convergence != 0 ) { + warning("rfit: Convergence status not zero in jaeckel") + if( fit2$value < fit$value ) fit <- fit2 + } else { + fit <- fit2 + } + rm(fit2) + } + rm(ord) + betahat <- fit$par + + yhat <- xq %*% betahat + ehat <- y - yhat + alphahat <- ifelse(symmetric, signedrank(ehat), median(ehat)) + ehat <- ehat - alphahat + yhat <- yhat+alphahat + + bhat <- lsfit(x,yhat,intercept=FALSE)$coef + + r.gettau <- switch(TAU, + F0 = gettauF0, + R = gettau, + N = function(...) NA + ) + + tauhat <- r.gettau(ehat, ncol(xq), scores, ...) + if (symmetric) { + taushat <- tauhat + } else { + taushat <- taustar(ehat, qrx$rank) + } + + res <- list( coefficients = bhat, residuals = ehat, fitted.values = yhat, + scores = scores, x = x, y = y, tauhat = tauhat, qrx1=qrx, + taushat = taushat, symmetric = symmetric, betahat = bhat,disp=fit$value) + res$call <- call + class(res) <- list("rfit") + res + +} +loc2plot<-function(x,y,plotfun=akerd,xlab='X',ylab='',...){ +# +# Plot an estimate of the distribution of X-Y +# By default, +# plotfun=akerd, meaning that a kernel adaptive estimator is used. +# Other options are: +# skerd +# kdplot +# rdplot +# +# See Wilcox Introduction to Robust Estimation and Hypothesis Testing +# section 3.2 for details. +# +m=elimna(cbind(x,y)) +x=m[,1] +y=m[,2] +temp=temp=as.vector(outer(x,y,FUN='-')) +plotfun(temp,xlab=xlab,ylab=ylab,...) +} +q2gci<-function(x,y,q=c(.1,.25,.5,.75,.9),nboot=2000,plotit=TRUE,SEED=TRUE,xlab='Group 1',ylab='Est.1-Est.2',alpha=.05){ +# +# Compare quantiles using pb2gen +# via hd estimator. Tied values are allowed. +# When comparing lower or upper quartiles, both power and the probability of Type I error +# compare well to other methods that have been derived. +# q: can be used to specify the quantiles to be compared +# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles +# Function returns p-values and critical p-values based on Hochberg's method. +# +x=elimna(x) +y=elimna(y) +if(sum(duplicated(x)>0))stop('Duplicate values were detected; use qcomhd or medpb2') +if(sum(duplicated(y)>0))stop('Duplicate values were detected; use qcomhd or medpb2') +if(SEED)set.seed(2) +pv=NULL +output=matrix(NA,nrow=length(q),ncol=10) +dimnames(output)<-list(NULL,c('q','n1','n2','est.1','est.2','est.1_minus_est.2','ci.low','ci.up','p_crit','p-value')) +for(i in 1:length(q)){ +output[i,1]=q[i] +output[i,2]=length(elimna(x)) +output[i,3]=length(elimna(y)) +output[i,4]=qest(x,q=q[i]) +output[i,5]=qest(y,q=q[i]) +output[i,6]=output[i,4]-output[i,5] +temp=pb2gen(x,y,nboot=nboot,est=qest,q=q[i],SEED=FALSE,alpha=alpha,pr=FALSE) +output[i,7]=temp$ci[1] +output[i,8]=temp$ci[2] +output[i,10]=temp$p.value +} +temp=order(output[,10],decreasing=TRUE) +zvec=alpha/c(1:length(q)) +output[temp,9]=zvec +#print(output) +output <- data.frame(output) +output$signif=rep('YES',nrow(output)) +for(i in 1:nrow(output)){ +if(output[temp[i],10]>output[temp[i],9])output$signif[temp[i]]='NO' +if(output[temp[i],10]<=output[temp[i],9])break +} +if(plotit){ +xax=rep(output[,4],3) +yax=c(output[,6],output[,7],output[,8]) +plot(xax,yax,xlab=xlab,ylab=ylab,type='n') +points(output[,4],output[,6],pch='*') +lines(output[,4],output[,6]) +points(output[,4],output[,7],pch='+') +points(output[,4],output[,8],pch='+') +} +output +} +cov2cor<-function(x){ +# +# Convert a covariance matrix to a correlation matrix +# +p=ncol(x) +m=x +for(i in 1:p){ +for(j in 1:p){ +m[i,j]=m[i,j]/sqrt(x[i,i]*x[j,j]) +}} +m +} + + + + +linpairpb<-function(x,tr=.2,alpha=.05,nboot=NA,est=tmean,method='hoch',bhop=FALSE,SEED=TRUE,...){ +# +# Report results for all pairwise comparsisons +# in a format convenient when using other functions that use this function +# +if(is.matrix(x))x=listm(x) +J=length(x) +con=con.all.pairs(J) +if(bhop)method='BH' +a=linconpb(x,tr=tr,alpha=alpha,nboot=nboot,est=est,SEED=SEED,method=method,...) +r=nrow(a$output) +cc=ncol(a$output) +cp1=cc+3 +mat=matrix(NA,nrow=r,ncol=cp1) +for(i in 1:r){ +g=which(a$con[,i]!=0) +z=c(g,a$output[i,2:cc],est(x[[g[1]]],...),est(x[[g[2]]],...)) +e1=est(x[[g[1]]],...) +e2=est(x[[g[2]]],...) +mat[i,]=c(g,a$output[i,2:cc],e1,e2) +} +num.sig<-sum(mat[,4]<=mat[,5]) +dimnames(mat)=list(NULL,c('Group','Group','psihat','p.value','p.crit','ci.lower','ci.upper','Est .1','Est. 2','p.adjusted')) +list(output=mat,num.sig=num.sig) +} + +func.plot<-function(fit, x = NULL, method ='MBD', depth = NULL, plotit = TRUE, + prob = 0.5, color = 6, outliercol = 2, barcol = 4, fullout = FALSE, + factor = 1.5, xlim = c(1, nrow(fit)), ylim = c(min(fit) - + 0.5 * diff(range(fit)), max(fit) + 0.5 * diff(range(fit))),xlab='Time',ylab='Y', + ...){ +# +# functional boxplot for functional data using +# method in Sun and Genton. +# +# +# fit is assumed to be an n-by-p matrix +# n= number of subjects +# p= number points where the function has been evaluated. +# +# rows with missing values are automatically removed. +# +library(fda) +elimna(fit) +fit=t(fit) +res=fbplot(fit, x = NULL, method = method, depth = depth, plot = plotit, + prob =prob, color =color, outliercol =outliercol, barcol = barcol, +fullout = fullout, factor = factor, xlim =xlim, ylim = ylim, xlab=xlab,ylab=ylab,...) +res +} + +func.out<-function(x,xlab='Time',ylab=' '){ +# +# A spaghetti plot for functional data that indicates outliers with a dashed line +# x is a matrix with n rows and p columns +# +# It is assumed that the function is measured at times 1, 2, ..., p +# +x=elimna(t(x)) # colums with missing data are automatically removed +x=t(x) +p=ncol(x) +n=nrow(x) +plot(c(1:p),seq(min(x),max(x),length.out=p),type='n',xlab=xlab,ylab=ylab) +flag=func.plot(x,plotit=FALSE)$outpoint +chk=c(1:n) +flag2=chk +nsub=length(flag) +if(nsub>0)flag2=chk[-flag] +for(j in 1:length(flag2))lines(c(1:p),x[flag2[j],]) +if(nsub>0)for(j in 1:nsub)lines(c(1:p),x[flag[j],],lty=2) +} + +spag.plot<-function(x, regfun=tsreg,type = c('l', + 'p', 'b', 'o', 'c'), legend = FALSE, trace.label = deparse(substitute(trace.factor)), + fixed = FALSE, xlab = 'Time', ylab ='', + xtick = FALSE, xaxt = par('xaxt'), axes = TRUE, fit.lin=FALSE,...){ +# +# Create a spaghetti plot for data stored in a matrix with +# n rows and p columns. The p columns +# contain measures taken at p times for each subject. +# This function converts x into a form that can be used by interaction.plot +# +# fit.line=TRUE means that a linear fit is plotted. +# +# regfun: The linear fit is based on the regression estimator indicated by +# regfun. The default is Theil--Sen estimator +# +# +# type: the type of plot (see plot.default): lines or points or both. +# +x=as.matrix(x) +n=nrow(x) +p=ncol(x) +np=n*p +m=matrix(NA,nrow=np,3) +pvec=c(1:p) +ic=1-p +iu=0 +for(i in 1:n){ +ic=ic+p +iu=iu+p +m[ic:iu,1]=i # create Subject id. +m[ic:iu,2]=pvec +m[ic:iu,3]=x[i,] +} +if(!fit.lin)interaction.plot(m[,2],m[,1],m[,3],xlab=xlab,ylab=ylab,legend=legend, +xtick=xtick,xaxt=xaxt,axes=axes) +if(fit.lin){ +fit=by(m[,2:3],m[,1],regYval,regfun=regfun) +fit1 <- unlist(fit) +names(fit1) <- NULL +#plotting the linear fit by id +interaction.plot(m[,2],m[,1], fit1, + xlab=xlab, ylab=ylab, legend=legend) +} +} +regYval<-function(m,regfun=tsreg){ +val=as.vector(regYhat(m[,1],m[,2],regfun=regfun)) +val +} +FBplot=func.plot + +mcpKadjp <- function (p, k=1, proc = c('Holm'), rawp=p) { +# +# MCP method based on results in +# +# Keselman, H. J., Miller, C. E., & Holland, B. (2011). +# Many tests of significance: New methods for controlling Type I errors. +# Psychological Methods, 16, 420-431. +# +# Also see +# Keselman, H. J., & Miller, C. E. (2012). +# Correction to many tests of significance: +# New methods for controlling Type I errors. Psychological Methods, 17(4), 679. +# +# p: The p-values to be adjusted. +# k: The value for k-FWER +# proc: indicates the method to be used. Choices are: +#' Holm' +# 'Hochberg', +#' 'RS', Romano-Shaikh procedure + # 'Sarkar', + # 'BH' , Benjamini--Hochberg +# +## Generalized Hochberg is valid under MTP2 condition of the joint null +## distribution of the p-values +## Sarkar procedure is only valid for independent test statistics +# +D1 <- function(k=1, s=1000) { +#To calculate D1 values for Romano-Shaikh procedure + alpha <- NULL + for (i in 1:s) { + if (i <= k) alpha[i]=k/s + else alpha[i]=k/(s+k-i) + } + S <- NULL + S[1:k] <- 0 + for (I in (k+1):s) { + tmp <- NULL + tmp[1:(k-1)] <- 0 + tmp[k]=I*alpha[s-I+k]/k + for (j in (k+1):I) tmp[j]=I*(alpha[s-I+j]-alpha[s-I+j-1])/j + S[I] <- sum(tmp) + if (S[I] < S[I-1]) break + + maxI <- I + maxS <- round(S[I],4) + } + return(list(S=S, maxI=maxI, maxS=maxS)) +} +#modified the function mt.rawp2adjp from MTP package to k-FWER procedures + m <- length(rawp) + n <- length(proc) + index <- order(rawp) + spval <- rawp[index] + adjp <- matrix(0, m, n + 1) + dimnames(adjp) <- list(NULL, c('rawp', proc)) + adjp[, 1] <- spval + +#################### Calculate adjusted p-values ###################### + +#generalized Holm procedure based on Lehmann and Romano (2005) + if (is.element('Holm', proc)) { + crit <- sapply(c(rep(k,k-1),k:m), function(i) + k/(m-i+k)) + tmp <- 1/crit*spval + tmp[tmp>1] <- 1 + for (i in 2:m) tmp[i] <- max(tmp[i-1], tmp[i]) + adjp[, 'Holm'] <- tmp + } +#generalized Hochberg procedure (Step-up version of Lehmann and Romano) + if (is.element('Hochberg', proc)) { + crit <- sapply(c(rep(k,k-1),k:m), function(i) + k/(m-i+k)) + tmp <- 1/crit*spval + tmp[tmp>1] <- 1 + for (i in (m-1):1) tmp[i] <- min(tmp[i+1], tmp[i]) + adjp[, 'Hochberg'] <- tmp + } +#generalized Hochberg procedure based on Romano and Shaikh(2006) + if (is.element('RS', proc)) { + d <- D1(k,m)$maxS + crit <- sapply(c(rep(k,k-1),k:m), function(i) + k/(m-i+k)/d) + tmp <- 1/crit*spval + tmp[tmp>1] <- 1 + for (i in (m-1):1) tmp[i] <- min(tmp[i+1], tmp[i]) + adjp[, 'RS'] <- tmp + } +#generalized Hochberg procedure based on Sarkar(2008) + ### Only for independent case ### + if (is.element('Sarkar', proc)) { + crit <- sapply(c(rep(k,k-1),k:m), function(i) + (prod((1:k)/(m-i+(1:k))))) + tmp <- 1/crit*(spval^k) + tmp[tmp>1] <- 1 + for (i in (m-1):1) tmp[i] <- min(tmp[i+1], tmp[i]) + # Next line used to protect against possibility of adjp1] <- 1 + for (i in (m-1):1) tmp[i] <- min(tmp[i+1], tmp[i]) + adjp[, 'BH'] <- tmp + } +### The following line returns original order of p-values + adjp <- adjp[order(index),] + return(adjp) +} + +discANOVA<-function(x,nboot=500,SEED=TRUE){ +# +# Test the global hypothesis that for two or more independent groups, +# the corresponding discrete distributions are identical. +# That is, test the hypothesis that independent groups have identical +# multinomial distributions. A generalization of the Storer--Kim method is used. +# +# Could also use a chi-squared test via the function: disc2.chi.sq +# +# The method is designed for situations where the +# sample size is relatively small. The method can be sensitive to +# differences that are missed using a measure of location. +# +# Control over the Type I error probability is excellent, even when n=10 +# +# x is a matrix with n rows and J columns +# or it can have list mode. +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +library(mc2d) +if(SEED)set.seed(2) +vals=lapply(x,unique) +vals=sort(elimna(list2vec(vals))) +K=length(unique(vals)) +n=lapply(x,length) +n=list2vec(n) +J=length(x) +step1=discANOVA.sub(x) +test=step1$test +C1=step1$C1 +HT=NULL +for(i in 1:K)HT[i]=mean(C1[i,]) +tv=NULL +TB=NA +VP=NA +B1hat=NA +xx=list() +for(ib in 1:nboot){ +xx=list() +for(j in 1:J){ +temp=rmultinomial(n[j],1,HT) +xx[[j]]=which(temp[1,]==1) +for(i in 2:n[j])xx[[j]][i]=which(temp[i,]==1) +} +TB[ib]=discANOVA.sub(xx)$test +} +pv=1-mean(test>TB)-.5*mean(test==TB) +list(test=test,p.value=pv) +} +discANOVA.sub<-function(x){ +# +# +x=lapply(x,elimna) +vals=lapply(x,unique) +vals=sort(elimna(unique(list2vec(vals)))) +n=lapply(x,length) +n=list2vec(n) +K=length(vals) +J=length(x) +C1=matrix(0,nrow=K,ncol=J) +for(j in 1:J){ +for(i in 1:K){ +C1[i,j]=C1[i,j]+sum(x[[j]]==vals[i]) +} +C1[,j]=C1[,j]/n[j] +} +test=0 +for(i in 1:K)test=test+var(C1[i,]) +list(test=test,C1=C1) +} + + +discmcp<-function(x,alpha=.05,nboot=500,SEED=TRUE,...){ +# +# Multiple comparisons for J independent groups +# having discrete distributions. +# The method is based on a chi-squared test for each pair of groups to be compared +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# Missing values are allowed. +# +# Probability of one or more Type I errors controlled using Hochberg's method. +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') +J<-length(x) +ncon=(J^2-J)/2 +Jm<-J-1 +# +# Determine critical values +dvec=alpha/c(1:ncon) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +output<-matrix(NA,nrow=ncon,ncol=4) +dimnames(output)<-list(NULL,c('Group','Group','p.value','p.crit')) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j=zvec) +output[temp2,4]<-zvec +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,num.sig=num.sig) +} +discstep<-function(x,nboot=500,alpha=.05,SEED=TRUE){ +# +# Step-down multiple comparison procedure for comparing +# J independent discrete random variables. +# The method is based on a generalization of the Storer--Kim method +# comparing independent binomials; it can be sensitive to differences +# not detected by measures of location. +# +# x is a matrix with n rows and J columns +# or it can have list mode +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +library(mc2d) +if(SEED)set.seed(2) +vals=lapply(x,unique) +vals=sort(elimna(list2vec(vals))) +K=length(unique(vals)) +n=lapply(x,length) +n=list2vec(n) +J=length(x) +if(J==2)stop('For 2 groups use disc2com') +if(J>5)stop('Designed for 5 groups or less') +com=modgen(J) +ntest=length(com) +jp1=J+1 +com=com[jp1:length(com)] +ntest=length(com) +mout=matrix(NA,nrow=ntest,ncol=3) +dimnames(mout)=list(NULL,c('Groups','p-value','p.crit')) +test=NULL +for(i in 1:ntest){ +test[i]=discANOVA.sub(x[com[[i]]])$test #$ +nmod=length(com[[i]])-1 +temp=c(nmod:0) +mout[i,1]=sum(com[[i]]*10^temp) +} +mout[,3]=alpha +xx=list() +pv=NA +jm2=J-2 +mout[,3]=alpha +TB=matrix(NA,nrow=nboot,ncol=ntest) +step1=discANOVA.sub(x) +C1=step1$C1 +HT=NULL +for(i in 1:K)HT[i]=mean(C1[i,]) +for(ib in 1:nboot){ +xx=list() +for(j in 1:J){ +temp=rmultinomial(n[j],1,HT) +xx[[j]]=which(temp[1,]==1) +for(i in 2:n[j])xx[[j]][i]=which(temp[i,]==1) +} +for(k in 1:ntest)TB[ib,k]=discANOVA.sub(xx[com[[k]]])$test #$ +} +for(k in 1:ntest){ +mout[k,2]=1-mean(test[k]>TB[,k])-.5*mean(test[k]==TB[,k]) +pnum=length(com[[k]]) +pe=1-(1-alpha)^(pnum/J) +if(length(com[[k]])<=jm2)mout[k,3]=pe +} +list(results=mout[nrow(mout):1,]) +} + +medcurve<-function(x){ +# +# returns the median curve for functional data +# +chk=FBplot(x,plot=FALSE)$depth +id=which(chk==max(chk)) +if(length(id)==1)est=x[id,] +if(length(id)>1)est=apply(x[id,],2,mean) +est +} +cumrelf<-function(x,y=NA,xlab='X',ylab='CUM REL FREQ',pr.freq=FALSE){ +# +# plot the cumulative relative frequencies for 1 or more groups +# +# x can be a matrix, columns corresponding to groups, or x +# x can have list mode. +# y=NA, if data are stored in y, it is assumes there two groups +# with data for the second group stored in y +# +xu=NULL +cf=NULL +if(!is.na(y[1])){ +xx=list() +xx[[1]]=x +xx[[2]]=y +x=xx +} +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(length(x)==1)stop('This function is designed for two or more groups') +x=elimna(x) +for(j in 1:length(x)){ +z=splot(x[[j]],plotit=FALSE) +xu=c(xu,sort(unique(x[[j]]))) +cf=c(cf,cumsum(z$frequencies)/length(x[[j]])) +} +plot(xu,cf,,type='n',xlab=xlab,ylab=ylab) +for(j in 1:length(x)){ +z=splot(x[[j]],plotit=FALSE) +if(pr.freq)print(z) +lines(sort(unique(x[[j]])),cumsum(z$frequencies)/length(x[[j]]),lty=j) +} +} +regGmcp<-function(x,y,regfun=tsreg,SEED=TRUE,nboot=100,xout=FALSE,AD=FALSE, + outfun=outpro,STAND=TRUE,alpha=0.05,pr=TRUE,MC=FALSE,ISO=TRUE,...) +{ +# +# If ISO = FALSE: +# All pairwise comparisons of regression parameters are performed among J independent groups +# That is, for groups j and k, all j1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') +xy=elimna(cbind(x1,y1,x2,y2)) +} +if(is.null(pts)){ +for(i in 1:length(qvals))pts=c(pts,qest(xy[,1],qvals[i])) +} +if(SEED)set.seed(2) +n=nrow(xy) +est1=NA +est2=NA +J=length(pts) +est1=matrix(NA,nrow=nboot,ncol=J) +est2=matrix(NA,nrow=nboot,ncol=J) + +data=matrix(sample(n,size=n*nboot,replace=TRUE),ncol=nboot,nrow=n) +if(!MC){ +est1=apply(data,2,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) +est2=apply(data,2,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...) +est1=t(as.matrix(est1)) +est2=t(as.matrix(est2)) +} + +if(MC){ +library(parallel) +data=listm(data) +est1=mclapply(data,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...) +est2=mclapply(data,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...) +est1=t(matl(est1)) +est2=t(matl(est2)) +} + +e1=runhat(xy[,1],xy[,2],pts=pts,est=est,fr=fr1,...) +e2=runhat(xy[,3],xy[,4],pts=pts,est=est,fr=fr2,...) +dif=e1-e2 + +pv=NA +for(j in 1:J){ +pv[j]=mean(est1[,j]=zvec) +output[temp2,7]<-zvec +output[,7]<-output[,7] +INT=bw.es.I(J,K,x,CI=CI,tr=tr,alpha=alpha,REL.MAG=REL.M)$Interaction.ES +list(output=output,Effect.Sizes=INT) +} + + +bwiJ2plot<-function(J,K,x,fr=.8,aval=.5,xlab = 'X', ylab = '', +color = rep('black', 5),BOX=FALSE){ +# +# This function is for a J by 2 between by within design +# +# Plot distribution of the difference scores for +# each of the J independpent groups +# +# x: can be a matrix, organized as expected by bwimcp +# or it can have list mode. +# +if(K!=2)stop('Should have only two dependent variables') +if(J>5)stop('Can only have five levels for the independent factor') + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} +dif=list() +for(j in 1:5)dif[[j]]=NULL +JK=J*K +m<-matrix(c(1:JK),J,K,byrow=TRUE) +ic<-c(-1,0) +for(j in 1:J){ +ic<-ic+2 +dif[[j]]=x[[ic[1]]]-x[[ic[2]]] +} +if(!BOX)g5plot(dif[[1]],dif[[2]],dif[[3]],dif[[4]],dif[[5]],fr = fr, + aval = aval, xlab = xlab, ylab = ylab, color = color) +if(BOX)boxplot(dif) +} + +Dancova<-function(x1,y1,x2=x1,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=NA, +sm=FALSE,xout=FALSE,outfun=out,DIF=FALSE,LP=FALSE,xlab='X',ylab='Y',...){ +# +# Compare two dependent groups using a method similar to the one used by the R function ancova +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# +# sm=T will create smooths using bootstrap bagging. +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +# DIF=FALSE: marginal trimmed means are compared +# DIF=TRUE: Trimmed means of difference scores are used. +# +if(!is.null(x2)){ +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') + +if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') +xy=elimna(cbind(x1,y1,x2,y2)) +x1=xy[,1] +y1=xy[,2] +x2=xy[,3] +y2=xy[,4] +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +if(!is.na(pts[1]))mat=Dancovapts(x1,y1,x2,y2,fr1=fr1,fr2=fr2,tr=tr,alpha=alpha, +plotit=FALSE,pts=pts,sm=sm,xout=xout,outfun=outfun,DIF=DIF,...) +if(is.na(pts[1])){ +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +n=length(y1) +ivals=c(1:n) +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,9) +dimnames(mat)<-list(NULL,c('X','n','DIF','TEST','se','ci.low','ci.hi','p.value','p.adjust')) +for (i in 1:5){ +t1=near(x1,x1[isub[i]],fr1) +t2=near(x2,x1[isub[i]],fr2) +iv1=ivals[t1] +iv2=ivals[t2] +pick=unique(c(iv1,iv2)) +mat[i,2]<-length(y1[pick]) +if(!DIF)test<-yuend(y1[pick],y2[pick],tr=tr) +if(DIF)test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE) +mat[i,1]<-x1[isub[i]] +if(!DIF){ +mat[i,4]<-test$teststat +mat[i,3]<-test$dif +} +if(DIF){ +mat[i,4]<-test$test.stat +mat[i,3]<-test$estimate +} +mat[i,5]<-test$se +mat[i,6]<-test$ci[1] +mat[i,7]<-test$ci[2] +mat[i,8]<-test$p.value +} +temp2<-order(0-mat[,8]) +bot=c(1:nrow(mat)) +dvec=sort(alpha/bot,decreasing=TRUE) +#mat[temp2,9]=dvec +mat[,9]=p.adjust(mat[,8],method='hoch') +} +if(plotit){ +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +runmean2g(x1,y1,x2,y2,fr=fr1,est=tmean,sm=sm,xout=FALSE,LP=LP,xlab=xlab,ylab=ylab,...) +}} +list(output=mat) +} + + + +Dancovapts<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE,xout=FALSE,outfun=out,DIF=FALSE,LP=TRUE,...){ +# +# Compare two dependent groups using a method similar to the one used by the R function ancova +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# +# sm=T will create smooths using bootstrap bagging. +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +# DIF=FALSE: marginal trimmed means are compared +# DIF=TRUE: Trimmed means of difference scores are used. +# +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') +xy=elimna(cbind(x1,y1,x2,y2)) +x1=xy[,1] +y1=xy[,2] +x2=xy[,3] +y2=xy[,4] +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n=length(y1) +npts=length(pts) +mat<-matrix(NA,nrow=npts,nco=9) +dimnames(mat)<-list(NULL,c('X','n','DIF','TEST','se','ci.low','ci.hi', +'p.value','p.crit')) +for (i in 1:npts){ +t1=near(x1,pts[i],fr1) +t2=near(x2,pts[i],fr2) +ivals=c(1:n) +iv1=ivals[t1] +iv2=ivals[t2] +pick=unique(c(iv1,iv2)) +mat[i,2]<-length(y1[pick]) +if(!DIF)test<-yuend(y1[pick],y2[pick],tr=tr,alpha=alpha) +if(DIF)test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE,alpha=alpha) +mat[i,1]<-pts[i] +if(!DIF){ +mat[i,4]<-test$teststat +mat[i,3]<-test$dif +} +if(DIF){ +mat[i,4]<-test$test.stat +mat[i,3]<-test$estimate +} +mat[i,5]<-test$se +mat[i,6]<-test$ci[1] +mat[i,7]<-test$ci[2] +mat[i,8]<-test$p.value +} +temp2<-order(0-mat[,8]) +bot=c(1:nrow(mat)) +dvec=sort(alpha/bot,decreasing=TRUE) +mat[temp2,9]=dvec +mat +} +qrchk<-function(x,y,qval=.5,q=NULL,nboot=1000,com.pval=FALSE,SEED=TRUE,alpha=.05,pr=TRUE, +xout=FALSE,outfun=out,chk.table=FALSE,MC=FALSE,...){ +# +# Test of a linear fit based on quantile regression +# The method stems from He and Zhu 2003, JASA, 98, 1013-1022. +# Here, resampling is avoided using approximate critical values if +# com.pval=F +# +# To get a p-value, via simulations, set com.pval=T +# nboot is number of simulations used to determine p-value. +# Execution time can be quite high +# +# This function quickly determines .1, .05, .025 and .01 +# critical values for n<=400 and p<=6 (p= number of predictors) +# and when dealing with the .5 quantile. +# Otherwise, critical values are determined via simulations, which +# can have high execution time. +# +if(!is.null(q))qval=q +if(pr){ +if(!com.pval)print('To get a p-value, set com.pval=T and use MC=T if a multicore processor is available') +print('Reject if test statistic is >= critical value') +} +x<-as.matrix(x) +p<-ncol(x) +pp1<-p+1 +yx<-elimna(cbind(y,x)) #Eliminate missing values. +y<-yx[,1] +x<-yx[,2:pp1] +store.it=F +x<-as.matrix(x) +p.val<-NULL +crit.val<-NULL +x<-as.matrix(x) +if(xout){ +flag<-outfun(x,...)$keep +x<-x[flag,] +y<-y[flag] +} +# shift the marginal x values so that the test statistic is +# invariant under changes in location +n<-length(y) +x=standm(x) +if(p<=6){ +if(qval==.5){ +aval<-c(.1,.05,.025,.01) +aokay<-duplicated(c(alpha,aval)) +aokay<-sum(aokay) +if(aokay>0){ +crit10<-matrix(c(.0254773,.008372,.00463254,.0023586,.000959315,.00042248, +.00020069, +.039728,.012163,.0069332,.0036521,.001571,.0006882, .0003621, +.055215,.0173357,.009427,.004581,.0021378,.00093787,.00045287, +.075832,.0228556,.0118571,.005924,.00252957,.0011593,.00056706, +.103135,.0298896,.0151193,.0073057,.00305456,.0014430,.000690435, +.12977,.03891,.018989,.009053,.0036326,.001617,.000781457),ncol=6,nrow=7) +crit05<-matrix(c(.031494,.010257,.00626,.00303523,.0012993,.000562247, +.00025972, +.046296,.015066,.00885556,.0045485,.0110904,.00086946,.000452978, +.063368,.0207096546,.010699,.005341,.0025426,.0011305,.000539873, +.085461,.027256,.014067,.0071169,.002954,.0013671,.000660338, +.11055,.03523,.017511,.0084263,.0036533,.0016338,.00081289, +.13692,.043843,.0222425,.0102265,.004283,.0019,.000907241),ncol=6,nrow=7) +crit025<-matrix(c(.0361936,.012518,.007296,.0036084,.00172436,.000725365, +.000327776, +.05315,.017593,.0102389,.0055043,.00227459,.0010062,.000523526, +.07214,.023944,.013689,.0060686,.0028378,.00136379,.000635645, +.093578,.0293223,.0156754,.0086059,.0035195,.001694,.00074467, +.118414,.03885,.0201468,.0094298,.0040263,.00182437,.000916557, +.14271,.047745,.0253974,.011385,.004725,.00207588,.0010191),ncol=6,nrow=7) +crit01<-matrix(c(.0414762,.0146553,.0098428,.0045274,.00219345,.00096244, +.000443827, +.058666,.020007,.01129658,.0063092,.002796,.0011364,.000628054, +.079446,.0267958,.015428,.0071267,.0034163,.0015876,.000734865, +.102736,.0357572,.017786,.0093682,.0042367,.0019717,.000868506, +.125356,.041411,.0234916,.0106895,.0047028,.0020759,.00101052, +.14837,.053246,.027759,.012723,.00528,.002437,.00116065),ncol=6,nrow=7) +if(alpha==.1)critit<-crit10 +if(alpha==.05)critit<-crit05 +if(alpha==.025)critit<-crit025 +if(alpha==.01)critit<-crit01 +nvec<-c(10,20,30,50,100,200,400) +nval<-duplicated(c(n,nvec)) +nval<-nval[2:7] +if(sum(nval)>0)crit.val<-critit[nval,p] +if(is.null(crit.val)){ +if(n<=400){ +loc<-rank(c(n,nvec)) +xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5) +yy<-c(critit[loc[1]-1,p],critit[loc[1],p]) +} +icoef<-lsfit(xx,yy)$coef +crit.val<-icoef[1]+icoef[2]/n^1.5 +}}}} +if(is.null(crit.val)){ +# no critical value found +if(!com.pval){ +print('Critical values not available, will set com.pval=T') +print('and compute a p-value') +com.pval<-T +}} +gdot<-cbind(rep(1,n),x) +gdot<-ortho(gdot) +x<-gdot[,2:pp1] +x<-as.matrix(x) +temp<-rqfit(x,y,qval=qval,res=TRUE) +coef<-temp$coef +psi<-NA +psi<-ifelse(temp$residuals>0,qval,qval-1) +rnmat<-matrix(0,nrow=n,ncol=pp1) +ran.mat<-apply(x,2,rank) +flagvec<-apply(ran.mat,1,max) +for(j in 1:n){ +flag<-ifelse(flagvec<=flagvec[j],TRUE,FALSE) +flag<-as.numeric(flag) +rnmat[j,]<-apply(flag*psi*gdot,2,sum) +} +rnmat<-rnmat/sqrt(n) +temp<-matrix(0,pp1,pp1) +for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,]) +temp<-temp/n +test<-max(eigen(temp)$values) +if(com.pval){ +if(SEED)set.seed(2) +if(MC)library(parallel) +xy=list() +p1=p+1 +for(i in 1:nboot)xy[[i]]=rmul(n,p=p1) +if(MC)temp3=mclapply(xy,qrchkv2.sub2,qval=qval,mc.preschedule=TRUE) +if(!MC)temp3=lapply(xy,qrchkv2.sub2,qval=qval) +rem=matl(temp3) +p.val=sum(test>=rem) +rem<-sort(rem) +p.val<-1-p.val/nboot +ic<-round((1-alpha)*nboot) +crit.val<-rem[ic] +} +de='Fail to reject' +if( test>=crit.val)de='Reject' +list(test.stat=test,crit.value=crit.val,p.value=p.val,Decision=de) +} +qrchkv2.sub2<-function(xy,qval){ +p1=ncol(xy) +p=p1-1 +val=qrchkv2(xy[,1:p],xy[,p1],qval=qval) +val +} + +regYciCV<-function(n,alpha=.05,nboot=1000,regfun=tsreg,SEED=TRUE,MC=FALSE,null.value=0,xout=FALSE,...){ +# +# Determine a critical value for regYci +# +if(SEED)set.seed(2) +mv=NA +chk=0 +if(MC)library(parallel) +xy=list() +for (i in 1:nboot)xy[[i]]=rmul(n) +if(!MC)est=lapply(xy,regciCV.sub,regfun=regfun,null.value=null.value,...) +if(MC)est=mclapply(xy,regciCV.sub,regfun=regfun,null.value=null.value,...) +est=as.vector(matl(est)) +est=sort(est) +ic=round(alpha*nboot) +crit=est[ic] +crit +} +regciCV.sub<-function(xy,regfun,null.value=0,xout=FALSE,...){ + pv=regYci(xy[,1],xy[,2],SEED=FALSE,regfun=regfun,null.value=null.value,xout=xout,...)[,5] + min(pv) +} + +regYci.sum<-function(x,y,regfun=tsreg,pts=x,nboot=100,xout=FALSE,outfun=out,SEED=TRUE,alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,ADJ=FALSE,MC=FALSE, +scale=FALSE,span=.75,xlab='X',xlab1='X1',xlab2='X2',ylab='p-values',theta=50,phi=25,pch='*',...){ +# +# Summarize results from regYci so that results are easier to read. +# single independent variable is assumed. +# +xy=elimna(cbind(x,y)) +x<-as.matrix(x) +p=ncol(x) +if(p>1)stop('This function is designed for one independent variable only') +p1=p+1 +x<-xy[,1:p] +y<-xy[,p1] +x<-as.matrix(x) +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +x=as.matrix(x) +} +res=regYci(x,y,regfun=regfun,nboot=nboot,null.value=null.value,alpha=alpha,crit=crit, +ADJ=ADJ,MC=MC,plotPV=plotPV,...) +xord=order(pts) +outp=cbind(pts[xord],res[xord,]) +dimnames(outp)=list(NULL,c('X','Pred. Y','Lower.ci','Upper.ci','p.value')) +outp +} + +trq.fit<- +function(x, y, a1 = 0.1, a2, z, int = TRUE, method = "primal", tol = 0.0001) +{ +#compute trimmed regression quantiles +#z is the rq strcture + if(missing(a2)) a2 <- a1 + if(a1 < 0 | a2 < 0) + stop("trimming proportion negative.") + if(a1 + a2 - 1 > tol) + stop("trimming proportion greater than 1.") + if(method!="primal" & method!="dual") + stop("invalid method: should be 'primal' or 'dual'.") + x <- as.matrix(x) + if(missing(z)){ +# z <- rq.fit.br(x, y, tau = -1) #This function is not working properly. +ny=length(y) +xx=cbind(rep(1,ny),x) + z <- rq.fit.br(xx, y, tau = -1) +print(z) +} + p <- z$sol[1, ] + q <- matrix(z$sol[ - c(1:3), ], nrow(z$sol) - 3, ncol(z$sol)) + n <- nrow(z$dsol) + s <- NULL + if(length(dimnames(x)[[2]]) == 0) + dimnames(x) <- list(NULL, paste("X", 1:(nrow(q) - 1), sep = "") + ) + if(int) { + x <- cbind(1, x) + dimnames(x)[[2]][1] <- 'Intercept' + } + xbar <- apply(x, 2, "mean") + xxinv <- solve(t(x) %*% x) + if(abs(a1 + a2 - 1) <= tol) { + +#single quantile case + i <- sum(p < a1) + s$coef <- q[, i] + names(s$coef) <- dimnames(x)[[2]] + s$resid <- y - x %*% s$coef + PI <- 3.14159 + x0 <- qnorm(a1) + d0 <- (1/sqrt(2 * PI)) * exp( - (x0^2/2)) + d0 <- ((4.5 * d0^4)/(2 * x0^2 + 1)^2)^0.2 + d <- d0 * (length(s$resid) - length(s$coef))^(-0.2) + if(d > min(a1, 1 - a1)) + d <- min(a1, 1 - a1) + s$d <- d + i <- sum(p < a1 + d) + j <- sum(p < a1 - d) + shat <- as.numeric(xbar %*% t(q[, i] - q[, j]))/(2 * d) + s$int <- int + s$v <- a1 * (1 - a1) * shat^2 + s$cov <- s$v * xxinv + } + else { +#real trimming + p1 <- p[-1] + f <- 1/(1 - a1 - a2) + d <- pmax((pmin(p1, 1 - a2) - c(a1, pmax(p1[ - length(p1)], + a1))), 0) + if(method == "primal") { + s$coef <- q[, 1:length(p1)] %*% d * f + s$resid <- y - x %*% s$coef + s$int <- int + } + else { +#Jureckova-Gutenbrunner trimmed least squares + i <- max(1, sum(p < a1)) + g <- (z$dsol[, i + 1] - z$dsol[, i])/(p[i + 1] - p[ + i]) + wa <- z$dsol[, i] + (a1 - p[i]) * g + j <- sum(p < 1 - a2) + g <- (z$dsol[, j + 1] - z$dsol[, j])/(p[j + 1] - p[ + j]) + wb <- z$dsol[, j] + (1 - a2 - p[j]) * g + wt <- wa - wb + if(min(wt) < 0) + warning("some weights negative!") + s <- lsfit(x, y, abs(wt), int = F) + } +#now compute covariance matrix estimate + mu <- xbar %*% s$coef + v <- d %*% (z$sol[2, 1:length(d)] - mu)^2 + k <- qrq(z, c(a1, a2)) - mu + v <- v + a1 * k[1]^2 + a2 * k[2]^2 + (a1 * k[1] + a2 * k[2])^2 + names(s$coef) <- dimnames(x)[[2]] + s$v <- as.vector(f^2 * v) + s$cov <- s$v * xxinv + } + class(s) <- "trq" + s +} +"print.trq"<- +function(object, digits = 4) +{ + n <- length(object$resid) + p <- length(object$coef) + options(warn = -1) + if(object$int) { + df.num <- p - 1 + fstat <- c(t(object$coef[-1]) %*% solve(object$cov[-1, -1]) %*% + (object$coef[-1]))/df.num + } + else { + df.num <- p + fstat <- t(object$coef) %*% solve(object$cov) %*% (object$coef + )/df.num + } + pvalue <- 1 - pf(fstat, df.num, (n - p)) + regstat <- c(sqrt(object$v), n, fstat, df.num, (n - p), pvalue) + names(regstat) <- c("rse", "n", "F.stat", "df.num", "df.den", "p.value" + ) + err <- sqrt(diag(object$cov)) + tstat <- c(object$coef/err) + tabcoef <- cbind(object$coef, err, tstat, 2 * (1 - pt(abs(tstat), + n - p))) + dimnames(tabcoef) <- list(names(object$coef), c("coef", "std.err", + "t.stat", "p.value")) + options(warn = 0) + print(round(tabcoef, digits)) + cat(paste("Winsorized Standard Error of Regression= ", format(round( + sqrt(object$v), digits)), "\n", "N = ", format(n), + ", F-statistic = ", format(round(fstat, digits)), " on ", + format(df.num), " and ", format((n - p)), " df, ", "p-value = ", + format(round(pvalue, digits)), "\n\n", sep = "")) + invisible(list(summary = regstat, coef.table = tabcoef)) +} +"qrq"<- +function(s, a) +{ +#computes linearized quantiles from rq data structure +#v is the rq structure e.g. rq(x,y) +#a is a vector of quantiles required + if(min(a) < 0 | max(a) > 1) stop("alphas out of range [0,1]") + r <- s$sol[1, ] + q <- s$sol[2, ] + q <- c(q[1], q) + J <- length(r) + r <- c(0, (r[1:J - 1] + r[2:J])/2, 1) + u <- rep(0, length(a)) + for(k in 1:length(a)) { + i <- sum(r < a[k]) + w <- (a[k] - r[i])/(r[i + 1] - r[i]) + u[k] <- w * q[i + 1] + (1 - w) * q[i] + } + u +} +trqreg<-function(x,y,a1=.1,a2,xout=FALSE,outfun=outpro){ +library(quantreg) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +n=length(y) +xx=cbind(rep(1,n),x) +z <- rq.fit.br(xx, y, tau = -1) +res=trq.fit(x,y,z=z,a1=a1,a2) +coef=res$coef[1:p1] +list(coef=coef) +} +cor2xy<-function(x,y,corfun=spear,...){ + est1=corfun(x[,1],y,...)$cor + est2=corfun(x[,2],y,...)$cor + list(cor=c(est1,est2)) + } +TWOpovPV<-function(x,y,alpha=.05,CN=FALSE){ +# +# Comparing two dependent correlations: Overlapping case +# +# x is assumed to be a matrix with 2 columns +# +# Compare correlation of x[,1] with y to x[,2] with y +# returns a confidence interval stored in +# ci +# +# This function is exactly like TWOpov, only it returns a p-value as well. +# +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-TWOpov(x,y,alpha=alph[i],CN=CN,ZCI=TRUE)$ci +if(sign(chkit[1]*chkit[2])==1)break +} +p.value<-irem/100 +if(p.value<=.1){ +iup<-(irem+1)/100 +alph<-seq(.001,iup,.001) +for(i in 1:length(alph)){ +irem=i +p.value<-alph[i] +chkit<-TWOpov(x,y,alpha=alph[i],CN=CN,ZCI=TRUE)$ci +if(sign(chkit[1]*chkit[2])==1)break +}} +if(p.value<=.1){ +iup<-(irem+1)/100 +alph<-seq(.001,iup,.001) +for(i in 1:length(alph)){ +p.value<-alph[i] +chkit<-TWOpov(x,y,alpha=alph[i],CN=CN,ZCI=TRUE)$ci +if(sign(chkit[1]*chkit[2])==1)break +}} +res=TWOpov(x,y,alpha=alpha,CN=CN) +list(p.value=p.value,est.rho1=res$est.rho1,est.rho2=res$est.rho2,ci=res$ci) +} + +TWOpNOVPV<-function(x,y,HC4=TRUE,alpha=.05){ +# +# Comparing two dependent correlations: Non-overlapping case +# +# Compute a .95 confidence interval +# for the difference between two dependent Pearson correlations, +# non-overlapping case. +# +# Both x and y are assumed to be matrices with two columns. +# The function compares the correlation between x[,1] and x[,2] +# to the correlation between y[,1] and y[,2]. +# +# For simulation results, see Wilcox (2009). +# COMPARING PEARSON CORRELATIONS: DEALING WITH +# HETEROSCEDASTICITY AND NON-NORMALITY, Communications in Statistics--Simulations +# and Computations, 38, 2220-2234. +# +# This function is exactly like TWOpNOV, only it returns a p-value as well. +# +# Note: To get a p-value, HC4=TRUE must be used. +# +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) +chkit=c(chkit$ci.lower,chkit$ci.upper) +if(sign(chkit[1]*chkit[2])==1)break +} +p.value<-irem/100 +if(p.value<=.1){ +iup<-(irem+1)/100 +alph<-seq(.001,iup,.001) +for(i in 1:length(alph)){ +p.value<-alph[i] +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) +chkit=c(chkit$ci.lower,chkit$ci.upper) +if(sign(chkit[1]*chkit[2])==1)break +}}} +p.value<-irem/100 +if(p.value<=.1){ +iup<-(irem+1)/100 +alph<-seq(.001,iup,.001) +for(i in 1:length(alph)){ +p.value<-alph[i] +chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) +chkit=c(chkit$ci.lower,chkit$ci.upper) +if(sign(chkit[1]*chkit[2])==1)break +}} +if(p.value<=.001){ +alph<-seq(.0001,.001,.0001) +for(i in 1:length(alph)){ +p.value<-alph[i] +chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) +chkit=c(chkit$ci.lower,chkit$ci.upper) +if(sign(chkit[1]*chkit[2])==1)break +}} +if(p.value<=.001){ +alph<-seq(.0001,.001,.0001) +for(i in 1:length(alph)){ +p.value<-alph[i] +chkit<-TWOpNOV(x,y,alpha=alph[i],HC4=TRUE) +chkit=c(chkit$ci.lower,chkit$ci.upper) +if(sign(chkit[1]*chkit[2])==1)break +}} +res=TWOpNOV(x,y,alpha=alpha,HC4=TRUE) +ci=c(res$ci.lower,res$ci.upper) +list(p.value=p.value,est.1=res$est.1,est.2=res$est.2,ci=ci) #ci.lower=res$ci.lower,ci.upper=res$ci.upper) +} + +regYci<-function(x,y,regfun=tsreg,pts=unique(x),nboot=100,ADJ=FALSE,xout=FALSE,outfun=out,SEED=TRUE,alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,scale=TRUE,span=.75, +xlab='X',xlab1='X1',xlab2='X2',ylab='p-values',zlab='p-values', +theta=50,phi=25,MC=FALSE,nreps=1000,SM=FALSE,pch='*',...){ +# +# Compute confidence interval for the typical value of Y, given X, based on some regression estimator +# By default, +# regfun=tsreg meaning that the Theil--Sen estimator is used. +# +# ADJ=TRUE, the critical value is adjusted so that the simultaneous probability coverage is 1-alpha. +# The adjustment has been studied with one independent variable. It is unknown how well it works with +# more than one independent variable. +# +# If there is a single independent variable, +# regfun=tsreg, ols or qreg, and alpha=.05, an adjustment can be made quickly. Otherwise an +# adjustment must be computed, which can require relatively high execution time. +# To reduce execution time, set +# MC=TRUE, assuming a multi-core processor is available. +# +# nreps: Number of replications used to compute a critical value. Execution time can be high +# MC=TRUE can reduce execution time considerably if a multi-core processor is available. +# + +xy=elimna(cbind(x,y)) +x<-as.matrix(x) +p=ncol(x) +p1=p+1 +vals=NA +x<-xy[,1:p] +y<-xy[,p1] +x<-as.matrix(x) +n=nrow(x) +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +n=nrow(m) +x<-m[,1:p] +y<-m[,p1] +x=as.matrix(x) +} +if(ADJ){ +if(n<10)stop('Should have a sample size of at least 10') +if(alpha==.05){ +alpha=.01 # assuming tsreg,tsreg_C, tshdreg or qreg are being used. +if(identical(regfun,ols)){ +nv=c(10,20,50,100,400) +pval=c(.001,.004, .008, .008, .01) +ipos=sum(nv<=n) +alpha=pval[ipos] +} +if(identical(regfun,tshdreg))alpha=.009 +if(identical(regfun,qreg))alpha=.009 +crit=qnorm(1-alpha/2) +} +} +if(SEED)set.seed(2) +if(is.null(crit)){ +if(!ADJ)crit=qnorm(1-alpha/2) +if(ADJ){ +padj=regYciCV(n,nboot=nreps,regfun=regfun,MC=MC,SEED=FALSE, +null.value=0,...) +crit=qnorm(1-padj/2) +}} +sqsd=regYvar(x,y,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED,...) +sd=sqrt(sqsd) +est=regYhat(x,y,regfun=regfun,xr=pts,...) +pv=2*(1-pnorm(abs(est-null.value)/sd)) +if(length(pts)==1)est=matrix(c(est,est-crit*sd,est+crit*sd,pv),nrow=1) +if(length(pts)>1)est=cbind(est,est-crit*sd,est+crit*sd,pv) +dimnames(est)=list(NULL,c('Pred. Y','Lower.ci','Upper.ci','p.value')) +if(plotPV){ +if(ncol(x)>2)stop('Can plot only with one or two independent variables') +if(ncol(x)==1)plot(pts,pv,xlab=xlab,ylab=ylab,pch=pch) +if(ncol(x)==2){ +if(SM)lplot(pts,pv,xlab=xlab1,ylab=xlab2,zlab=zlab,span=span,ticktype='detail',scale=scale,theta=theta,phi=phi) +if(!SM){ +library(scatterplot3d) +scatterplot3d(pts[,1],pts[,2],pv,xlab=xlab1,ylab=xlab2,zlab=zlab) +} +}} +if(p==1){ +xord=order(pts) +if(length(pts)==1)outp=matrix(c(pts[xord],est[xord,]),nrow=1) +if(length(pts)>1)outp=cbind(pts[xord],est[xord,]) +dimnames(outp)=list(NULL,c('X','Pred. Y','Lower.ci','Upper.ci','p.value')) +est=outp +} +est +} + + + +anclin<-function(x1,y1,x2,y2,regfun=tsreg,pts=NULL,ALL=FALSE,npts=25,plotit=TRUE,SCAT=TRUE, +pch1='*',pch2='+', +nboot=100,ADJ=TRUE,xout=FALSE,outfun=outpro,SEED=TRUE,p.crit=.015, +alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,scale=TRUE,span=.75,xlab='X',ylab='p-values',ylab2='Y',MC=FALSE,nreps=1000,pch='*',...){ +# +# ANCOVA: +# For two independent groups, compute confidence intervals for difference between +# the typical value of Y, given X, +# based on some regression estimator +# By default, +# regfun=tsreg meaning that the Theil--Sen estimator is used. +# +# The functions anclin and regYci2g are identical. +# +# In contrast to the function ancJN, this function can deal with a larger number of +# covariate values and it controls the probability of one or more Type I errors using +# a method that is better, in terms of power, than using Hochberg or Hommel. +# +# ADJ=TRUE, the critical value is adjusted so that the simultaneous +# probability coverage is 1-alpha. +# A single covariate is assumed. +# If alpha=.05, an adjustment can be made quickly. Otherwise an +# adjustment must be computed, which can require relatively high execution time. +# To reduce execution time, set +# MC=TRUE, assuming a multi-core processor is available. +# If n1<20 and n2<100, assuming that n11)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x1<-xy[,1:p] +y1<-xy[,p1] +x1<-as.matrix(x1) +xy=elimna(cbind(x2,y2)) +x2<-as.matrix(x2) +p=ncol(x2) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x2<-xy[,1:p] +y2<-xy[,p1] +x2<-as.matrix(x2) +n1=length(y1) +n2=length(y2) +n=min(c(n1,n2)) +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +n1=nrow(m) +x1<-m[,1:p] +y1<-m[,p1] +x1=as.matrix(x1) +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +n2=nrow(m) +n=min(c(n1,n2)) +x2<-m[,1:p] +y2<-m[,p1] +x2=as.matrix(x2) +} +if(is.null(pts)){ +xall=unique(c(x1,x2)) +if(ALL)pts=xall +if(!ALL)pts=seq(min(xall),max(xall),length.out=npts) +} +if(ADJ){ +if(n<10)stop('Should have a sample size of at least 10') +if(alpha==.05){ +alpha=p.crit +crit=qnorm(1-alpha/2) +} +if(!ADJ)p.crit=alpha +if(n<20 & max(c(n1,n2))<100) crit=NULL +if(p>1)crit=NULL +} +if(is.null(crit) & !ADJ)crit=qnorm(1-alpha/2) +if(is.null(crit) & ADJ){ +if(SEED)set.seed(2) +padj=regYciCV2G(n1,n2,nboot=nreps,regfun=regfun,MC=MC,SEED=FALSE,ALL=ALL, +null.value=null.value,pts=pts,alpha=alpha,...)$crit.est +crit=qnorm(1-padj/2) +p.crit=padj +} +sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) +sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) +sd=sqrt(sqsd1+sqsd2) +est1=regYhat(x1,y1,regfun=regfun,xr=pts,...) +est2=regYhat(x2,y2,regfun=regfun,xr=pts,...) +chk.test=abs(est1-est2-null.value)/sd +pv=2*(1-pnorm(abs(est1-est2-null.value)/sd)) +est=cbind(pts,est1-est2,est1-est2-crit*sd,est1-est2+crit*sd,pv) +dimnames(est)=list(NULL,c('X','Est.Dif','Lower.ci','Upper.ci','p.value')) +if(plotit){ +plotPV=FALSE +plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab2) +reg1=regfun(x1,y1,...)$coef +reg2=regfun(x2,y2,...)$coef +if(SCAT){ +points(x1,y1,pch=pch1) +points(x2,y2,pch=pch2) +} +abline(reg1) +abline(reg2,lty=2) +} +if(plotPV){ +plot(pts,pv,xlab=xlab,ylab=ylab,pch=pch) +} +list(output=est,p.crit=p.crit,crit.value=crit,num.sig=sum(est[,5]<=p.crit)) +} + +regYciCV2G<-function(n1,n2,crit=NULL,g=0,h=0,nboot=1000,regfun=tsreg,ALL=TRUE, +alpha=.05,SEED=TRUE,MC=FALSE,null.value=0,pts=NULL,npts=100,nmiss=0,...){ +n=max(n1,n2) +if(nmiss>n)stop('Number of missing values is greater than max(n1,n2)') +if(SEED)set.seed(2) +mv=NA +chk=0 +if(n1!=n2)nmiss=max(c(n1,n2))-min(c(n1,n2)) +if(MC)library(parallel) +xy=list() +for (i in 1:nboot){ +x1=ghdist(n,g=g,h=h) +x2=ghdist(n,g=g,h=h) +if(nmiss>0)x2[1:nmiss]=NA +xx=c(x1,x2) +xx=elimna(xx) +if(is.null(pts)){ +if(!ALL)pts=seq(min(xx),max(xx),length.out = npts) +if(ALL)pts=unique(xx) +} +y1=ghdist(n,g=g,h=h) +y2=ghdist(n,g=g,h=h) +xy[[i]]=cbind(x1,y1,x2,y2) +} +if(!MC)est=lapply(xy,regciCV2G.sub,regfun=regfun,null.value=null.value,npts=npts,...) +if(MC)est=mclapply(xy,regciCV2G.sub,regfun=regfun,null.value=null.value,pts=pts,npts=npts,...) +est=as.vector(matl(est)) +type1=NULL +if(!is.null(crit))type1=mean(est<=crit) +list(global.p.value=type1,crit.est=hd(est,alpha)) +} + +regYci2G=anclin + +regY2G.sub<-function(xy,regfun,null.value=0,...){ + pv=regYci2Gv2(xy[,1],xy[,2],xy[,3],xy[,4],SEED=FALSE,regfun=regfun,null.value=null.value,...)[,4] + min(pv) +} + + +regciCV2G.sub<-function(xy,regfun,null.value=0,pts=NULL,npts=100,...){ + pv=regYci2Gv2(xy[,1],xy[,2],xy[,3],xy[,4],SEED=FALSE,regfun=regfun,null.value=null.value,plotit=FALSE, +npts=npts,pts=pts,...)$output[,5] + min(pv) +} + +# To avoid nested calls, need: + +regYci2Gv2<-function(x1,y1,x2,y2,regfun=tsreg,pts=NULL,ALL=FALSE,npts=25,plotit=TRUE,SCAT=TRUE, +pch1='*',pch2='+', +nboot=100,ADJ=FALSE,xout=FALSE,outfun=outpro,SEED=TRUE,p.crit=.015, +alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,scale=TRUE,span=.75,xlab='X',xlab1='X1',xlab2='X2',ylab='p-values',ylab2='Y',theta=50,phi=25,MC=FALSE,nreps=1000,pch='*',...){ +# +# ANCOVA: +# For two independent groups, compute confidence intervals for difference between +# the typical value of Y, given X, +# based on some regression estimator +# By default, +# regfun=tsreg meaning that the Theil--Sen estimator is used. +# +# The functions anclin and regYci2g are identical. +# +# In contrast to the function ancJN, this function can deal with a larger number of +# covariate values and it controls the probability of one or more Type I errors using +# a method that is better, in terms of power, than using Hochberg or Hommel. +# +# ADJ=TRUE, the critical value is adjusted so that the simultaneous +# probability coverage is 1-alpha. +# If there is a single covariate, +# regfun=tsreg or tshdreg, and alpha=.05, an adjustment can be made quickly. Otherwise an +# adjustment must be computed, which can require relatively high execution time. +# To reduce execution time, set +# MC=TRUE, assuming a multi-core processor is available. +# If n1<20 and n2<100, assuming that n11)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x1<-xy[,1:p] +y1<-xy[,p1] +x1<-as.matrix(x1) +xy=elimna(cbind(x2,y2)) +x2<-as.matrix(x2) +p=ncol(x2) +p1=p+1 +vals=NA +x2<-xy[,1:p] +y2<-xy[,p1] +x2<-as.matrix(x2) +n1=length(y1) +n2=length(y2) +n=min(c(n1,n2)) +#print(c(n1,n2,n)) +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +n1=nrow(m) +x1<-m[,1:p] +y1<-m[,p1] +x1=as.matrix(x1) +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +n2=nrow(m) +n=min(c(n1,n2)) +x2<-m[,1:p] +y2<-m[,p1] +x2=as.matrix(x2) +} +if(is.null(pts)){ +xall=unique(c(x1,x2)) +if(ALL)pts=xall +if(!ALL)pts=seq(min(xall),max(xall),length.out=npts) +} +if(ADJ){ +if(n<10)stop('Should have a sample size of at least 10') +if(alpha==.05){ +#if(identical(regfun,tsreg) || identical(regfun,tsreg_C))alpha=p.crit causes an error if WRScpp not installed +#if(identical(regfun,tsreg))alpha=p.crit +alpha=p.crit +crit=qnorm(1-alpha/2) +} +if(!ADJ)p.crit=alpha +if(n<20 & max(c(n1,n2))<100) crit=NULL +if(p>1)crit=NULL +} +if(is.null(crit) & !ADJ)crit=qnorm(1-alpha/2) +if(is.null(crit) & ADJ){ +if(SEED)set.seed(2) +print(c(n1,n2)) +padj=regYciCV2G(n1,n2,nboot=nreps,regfun=regfun,MC=MC,SEED=FALSE,ALL=ALL, +null.value=null.value,pts=pts,alpha=alpha,...)$crit.est +crit=qnorm(1-padj/2) +p.crit=padj +} +sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) +sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED) +sd=sqrt(sqsd1+sqsd2) +est1=regYhat(x1,y1,regfun=regfun,xr=pts,...) +est2=regYhat(x2,y2,regfun=regfun,xr=pts,...) +pv=2*(1-pnorm(abs(est1-est2-null.value)/sd)) +est=cbind(pts,est1-est2,est1-est2-crit*sd,est1-est2+crit*sd,pv) +dimnames(est)=list(NULL,c('X','Est.Dif','Lower.ci','Upper.ci','p.value')) +if(plotit){ +plotPV=FALSE +plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab2) +reg1=regfun(x1,y1,...)$coef +reg2=regfun(x2,y2,...)$coef +if(SCAT){ +points(x1,y1,pch=pch1) +points(x2,y2,pch=pch2) +} +abline(reg1) +abline(reg2,lty=2) +} +if(plotPV){ +if(ncol(x1)>2)stop('Can plot only with one or two independent variables') +if(ncol(x1)==1)plot(pts,pv,xlab=xlab,ylab=ylab,pch=pch) +if(ncol(x2)==2)lplot(pts,pv,xlab=xlab1,ylab=xlab2,zlab=ylab,span=span,ticktype='detail',scale=scale,theta=theta,phi=phi) +} +list(output=est,p.crit=p.crit,crit.value=crit,num.sig=sum(est[,5]<=p.crit)) +} + + + + +ancdet<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2, +alpha=.05,method="EP",plotit=TRUE,plot.dif=FALSE,pts=NA,sm=FALSE, +pr=TRUE,xout=FALSE,outfun=out,MC=FALSE, +npts=25,p.crit=NULL,nreps=5000,SEED=TRUE,EST=FALSE, +SCAT=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+',...){ +# +# Like the function ancova, but a more detailed analysis +# plot.dif=TRUE: plot difference in the estimates plus a +# confidence band having simultaneous probability coverate 1-alpha +# +# npts = number of covariate values to be used +# +# Argument method indicates which measure of effect size will be used +# EP: explanatory measure of effect size +# QS: quantile shift measure of effect size +# AKP: trimmed mean Winsorized variance analog of Cohen's d +# WMW: P(X1)stop('One covariate only is allowed with this function') +if(is.null(p.crit))set.seed(2) +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(xout){ +flag<-outfun(x1)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2)$keep +x2<-x2[flag] +y2<-y2[flag] +} +res1=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE,method=method)$output +pts=seq(res1[1,1],res1[5,1],length.out=npts) + +if(alpha!=.05)EST=TRUE +if(is.null(p.crit)){ +if(!EST){ +nv=c(30, 50, 60, 70, 80, 100, +150, 200, 300, 400, 500, 600, 800) +pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, +0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, +.003372966, 0.003350022) +n1= length(y1) + n2=length(y2) +p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 +} +if(EST)p.crit=ancdet.pv(length(y1),length(y2),nreps=nreps,tr=tr,npts=npts,MC=MC) +} + +if(plot.dif)plotit=FALSE +critv=qnorm(1-p.crit/2) +res=ancova(x1,y1,x2,y2,fr1=fr1,fr2=fr2,tr=tr,alpha=alpha,pr=FALSE,plotit=plotit,pts=pts,SCAT=SCAT)$output +res[,7]=res[,4]-critv*res[,6] # adjust confidence interval based on adjusted p-value +res[,8]=res[,4]+critv*res[,6] # adjust confidence interval based on adjusted p-value +if(plot.dif){ +yhat=plot(c(res[,1],res[,1],res[,1]),c(res[,4],res[,7],res[,8]),type='n',xlab=xlab,ylab=ylab) +z1=lplot(res[,1],res[,4],plotit=FALSE,pyhat=TRUE)$yhat +z2=lplot(res[,1],res[,7],plotit=FALSE,pyhat=TRUE)$yhat +z3=lplot(res[,1],res[,8],plotit=FALSE,pyhat=TRUE)$yhat +lines(res[,1],z1) +lines(res[,1],z2,lty=2) +lines(res[,1],z3,lty=2) +} +sig=rep(0,nrow(res)) +sig[res[,9]<=p.crit]=1 +sig=as.matrix(sig,ncol=1) +dimnames(sig)=list(NULL,'Sig.Dif') +res=cbind(res,sig) +list(p.crit=p.crit,output=res[,-10],num.sig=sum(sig),p.crit=p.crit) +} + + +ancdet.sub<-function(xy,tr=.2, +alpha=.05,plotit=FALSE,plot.dif=FALSE,pts=NA,sm=FALSE, +pr=TRUE,xout=FALSE,outfun=out,LP=TRUE, +npts=25,p.crit=NULL,nreps=2000, +SCAT=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+',...){ +# +# Like ancova, but a more detailed analysis based on using +# npts covariate values +# +xy1=elimna(xy[,1:2]) +xy2=elimna(xy[,3:4]) +x1=xy1[,1] +y1=xy1[,2] +x2=xy2[,1] +y2=xy2[,2] +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +res1=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE)$output +pts=seq(res1[1,1],res1[5,1],length.out=npts) +res=ancova(x1,y1,x2,y2,tr=tr,alpha=alpha,plotit=FALSE, +pr=FALSE,pts=pts,skip.crit=TRUE)$output +res.out=min(res[,9]) +res.out +} + +ancdet.pv<-function(n1,n2,nreps=2000,alpha=.05,npts=25,tr=.2,MC=FALSE,SEED=TRUE){ +if(SEED)set.seed(2) +pvals=NA +xy=list() +n=max(c(n1,n2)) +nmiss=n-min(c(n1,n2)) +for (i in 1:nreps){ +xy[[i]]=rmul(n,p=4) +xy[[i]][1:nmiss,1:2]=NA +} +if(!MC)pvals=lapply(xy,ancdet.sub,npts=npts,tr=tr) +if(MC){ +library(parallel) +pvals=mclapply(xy,ancdet.sub,npts=npts,tr=tr) +} +pvals=matl(pvals) +pv=hd(pvals,alpha) +pv +} + +ancdet2C<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,test=yuen,q=.5, +alpha=.05,plotit=TRUE,op=FALSE,pts=NA,sm=FALSE,FRAC=.5, +pr=TRUE,xout=FALSE,outfun=outpro,MC=FALSE, +p.crit=NULL,nreps=2000,SEED=TRUE,FAST=TRUE,ticktype='detail', +xlab='X1',ylab='X2',zlab='Y',pch1='*',pch2='+',...){ +# +# method MC3 in Wilcox (2017, Intro to Robust Estimation and Hypothesis Testing, 4th ed.) +# Multiple comparisons using an improvement on Hochberg to control FWE +# +# Like ancdet, only two covariate values can be used. +# Like method MC2, +# use the deepest half of the covariate values. +# +# politit=TRUE. Plot covariate points. Significant points are indicated by +# pch='+' +# +# test can have one of three values: yuen (default), qcomhd or qcomhdMC +# +if(ncol(as.matrix(x1))!=2)stop('Two covariates only can be used') +if(is.null(p.crit))set.seed(2) +xy=elimna(cbind(x1,y1)) +x1=xy[,1:2] +y1=xy[,3] +xy=elimna(cbind(x2,y2)) +x2=xy[,1:2] +y2=xy[,3] +if(min(length(y1),length(y2))<50)stop('The minimum sample size must be greater than or equal to 50') +if(xout){ +flag<-outfun(x1,plotit=FALSE)$keep +x1<-x1[flag,] +y1<-y1[flag] +flag<-outfun(x2,plotit=FALSE)$keep +x2<-x2[flag,] +y2<-y2[flag] +} +if(FAST){ +if(FRAC==.5){ +if(is.null(p.crit)){ +if(alpha==.05){ +nv=c(50, 55, 60, 70, 80, 100, 200, 300, 400, 500, 600,800) +pv=c(0.004585405, 0.003199894, 0.002820089, 0.002594342, 0.002481210, 0.001861313, + 0.001419821, 0.001423000, 0.001313700, 0.001351900, 0.001075, 0.00095859) + n1= length(y1) + n2=length(y2) +p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 + # Using K=n1 points, i.e., K=n1 tests are performed +if(max(n1,n2)>max(nv)){ +p.crit=min(pv) +print('Warning: p.crit has not been computed exactly for sample sizes greater than 800') +if(n1>800)p.crit1=regYhat(1/pv[8:12,1],pv[8:12,2],1/n1) +if(n1<=800)p.crit1=lplot.pred(1/nv,pv,1/n1)$yhat +if(n2>800)p.crit1=regYhat(1/pv[8:12,1],pv[8:12,2],1/n2) +if(n2<=800)p.crit1=lplot.pred(1/nv,pv,1/n2)$yhat +p.crit=(p.crit1+p.crit2)/2 +} +}}}} +res1=ancov2COV(x1,y1,x2,y2,DETAILS=TRUE,pr=FALSE,FRAC=FRAC,tr=tr,test=test,q=q,MC=MC) +if(is.null(p.crit))p.crit=ancdet2C.pv(length(y1),length(y2),MC=MC,nreps=nreps, +SEED=SEED) +LL=length(ncol(res1$all.results)) +if(LL==1)num.sig=sum(res1$all.results[,3]<=p.crit) +if(LL==0)num.sig=NA +sig.points=NA +if(LL==1){ +flag=res1$all.results[,3]<=p.crit +sig.points=res1$all.points.used[flag,1:2] +} +if(plotit){ +if(!op){ +if(pr)print('To plot the estimated differences for the covariate points used, set op=TRUE') +if(LL==0)plot(res1$all.points.used[,1],res1$all.points.used[,2],xlab=xlab,ylab=ylab) +if(LL==1){ +plot(res1$all.points.used[,1],res1$all.points.used[,2],type='n',xlab=xlab,ylab=ylab) +points(res1$all.points.used[!flag,1],res1$all.points.used[!flag,2],pch=pch1) +points(res1$all.points.used[flag,1],res1$all.points.used[flag,2],pch=pch2) +}} +if(op) +lplot(res1$all.points.used[,1:2],res1$all.points.used[,3],xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +} +list(num.sig=num.sig,p.crit=p.crit,points.used=cbind(res1$all.points.used[,1:3],res1$all.results),sig.points=sig.points) +} + + +ancdet2C.pv<-function(n1,n2,nreps=2000,alpha=.05,FRAC=.5,tr=.2,MC=FALSE,SEED=TRUE){ +pvals=NA +xy=list() +n=max(c(n1,n2)) +nmiss=n-min(c(n1,n2)) +for (i in 1:nreps){ +xy[[i]]=rmul(n,p=6) +xy[[i]][1:nmiss,1:3]=NA +} +if(!MC)pvals=lapply(xy,ancdet2C.sub,tr=tr,FRAC=FRAC) +if(MC){ +library(parallel) +pvals=mclapply(xy,ancdet2C.sub,tr=tr,FRAC=FRAC) +} +pvals=matl(pvals) +pv=hd(pvals,alpha) +pv +} + +ancdet2C.sub<-function(xy,tr=.2,FRAC=.5){ +# +xy1=elimna(xy[,1:3]) +xy2=elimna(xy[,4:6]) +x1=xy1[,1:2] +y1=xy1[,3] +x2=xy2[,1:2] +y2=xy2[,3] +res1=ancov2COV(x1,y1,x2,y2,pr=FALSE,FRAC=FRAC)$min.p.value +res1 +} + +Dancdet<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,DIF=TRUE, +alpha=.05,plotit=TRUE,plot.dif=FALSE,pts=NA,sm=FALSE, +pr=TRUE,xout=FALSE,outfun=out,MC=FALSE, +npts=25,p.crit=NULL,nreps=2000,SEED=TRUE, +SCAT=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+',...){ +# +# ANCOVA for dependent groups. +# +# Like Dancova, but a more detailed analysis +# plot.dif=TRUE: plot difference in the estimates plus a +# confidence band having simultaneous probability coverate 1-alpha +# +# npts = number of covariate values to be used +# +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') +xy=elimna(cbind(x1,y1,x2,y2)) +x1=xy[,1] +y1=xy[,2] +x2=xy[,3] +y2=xy[,4] +#if(is.null(p.crit))set.seed(2) +if(alpha!=.05)p.crit=ancdet.pv(length(y1),length(y2),nreps=nreps,tr=tr,npts=npts,MC=MC) +else{ +if(n>800)p.crit=0.00335002 +if(n <= 800){ +nv=c(30, 50, 60, 70, 80, 100, +150, 200, 300, 400, 500, 600, 800) +pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, +0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, +.003372966, 0.003350022) +n1= length(y1) + n2=length(y2) +p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 +}} + +res1=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE)$output # Get lowest and covariate +# values where comparisons can be made. +pts=seq(res1[1,1],res1[5,1],length.out=npts) +#if(is.null(p.crit))p.crit=ancdet.pv(length(y1),length(y2),nreps=nreps,tr=tr,npts=npts,MC=MC) +if(plot.dif)plotit=FALSE +res=Dancova(x1,y1,x2,y2,fr1=fr1,fr2=fr2,tr=tr,alpha=p.crit, +DIF=DIF,plotit=plotit,pts=pts,SCAT=SCAT)$output +if(plot.dif){ +yhat=plot(c(res[,1],res[,1],res[,1]),c(res[,3],res[,6],res[,7]),type='n',xlab=xlab,ylab=ylab) +z1=lplot(res[,1],res[,3],plotit=FALSE,pyhat=T)$yhat +z2=lplot(res[,1],res[,6],plotit=FALSE,pyhat=T)$yhat +z3=lplot(res[,1],res[,7],plotit=FALSE,pyhat=T)$yhat +lines(res[,1],z1) +lines(res[,1],z2,lty=2) +lines(res[,1],z3,lty=2) +} +sig=rep(0,nrow(res)) +sig[res[,8]<=p.crit]=1 +sig=as.matrix(sig) +res=res[,-9] +dimnames(sig)=list(NULL,'Sig.Dif') +res=cbind(res,sig) +list(p.crit=p.crit,output=res,num.sig=sum(sig),p.crit=p.crit) +} + +dmedian<-function(x,depfun=pdepth,...){ +# +# Compute the median based on the deepest point for the multivariate +# data in x +# +# For continuous variables, this function returns a unique median +# +# Projection distances are used by default. +# Another option is depfun=zdepth +# +if(is.null(dim(x)) || dim(x)==1)stop('x should be a matrix with two or more columns') +val=depfun(x,...) +id=which(val==max(val)) +list(center=x[id,]) +} + + +ancJNmp<-function(x1,y1,x2,y2,regfun=qreg,p.crit=NULL,DEEP=FALSE,WARN=FALSE, +plotit=TRUE,xlab='X1',ylab='X2',null.value=0,FRAC=.5,cov1=FALSE,SMM=TRUE,ALL=TRUE,pr=TRUE, +alpha=.05,nreps=1000, MC=FALSE, pts=NULL,SEED=TRUE,nboot=100,xout=FALSE,outfun=outpro,...){ +# +# Compare two independent groups using a generalization of the ancts function that +# allows more than one covariate. +# +# DEEP=FALSE: If pts=NULL, design points are chosen to be deepest point in +# x1 plus points on the .5 depth contour. +# +# DEEP=TRUE, choose deepest half of c(x1,x2) and use critical p-value indicated by +# p.crit, the critical p-value,which defaults to .015 when alpha=.05. +# If alpha!=.05, p.crit must be computed, which can require high execution time. +# MC=TRUE will reduce execution time considerably. +# +# cov1=TRUE: the covariates that are used are taken to be the points in x1. If +# +# plotit=TRUE: if p=2 covariates, plot covariate points with non-significant points indicated by * and +# significant points by + + +# (This function replaces anctsmp, which does not have an option for using the deepest half of covariate points.) +# +if(SEED)set.seed(2) # now cov.mve always returns same result + +stop('This function has been replaced by an improved ancJNPVAL') +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have a different number of columns') +p=ncol(x1) +p1=p+1 +m1=elimna(cbind(x1,y1)) +x1=m1[,1:p] +y1=m1[,p1] +m2=elimna(cbind(x2,y2)) +x2=m2[,1:p] +y2=m2[,p1] +# +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +n1=length(y1) +n2=length(y2) +nv=c(n1,n2) +if(cov1){ +pts=unique(x1) +if(alpha==.05)p.crit=0.00676 +} +if(DEEP)pts=NULL +if(!is.null(pts[1])){ +p.crit=NULL +DEEP=FALSE +} +if(is.null(pts[1])){ +if(!DEEP){ +x1<-as.matrix(x1) +pts<-ancdes(unique(rbind(x1,x2))) +p.crit=NULL +} +if(DEEP){ +pts=ancov2COV(x1,y1,x2,y2,DETAILS=TRUE,cr=.27,pr=FALSE,FRAC=FRAC)$all.points.used[,1:2] +}} +pts<-as.matrix(pts) +ntests=nrow(pts) +mat<-matrix(NA,ntests,8) +dimnames(mat)<-list(NULL,c('Est 1', 'Est 2','DIF','TEST','se','ci.low','ci.hi','p.value')) +sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +# xout=F because leverage points have already been removed. +est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) +est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) +mat[,1]=est1 +mat[,2]=est2 +est=est1-est2 +mat[,3]=est +sd=sqrt(sqsd1+sqsd2) +mat[,5]=sd +tests=(est1-est2)/sd +mat[,4]=tests +pv=2*(1-pnorm(abs(tests))) +mat[,8]=pv +crit=NULL +if(!cov1){ +if(!DEEP){ +if(ntests==1)crit=qnorm(1-alpha/2) +if(length(pts)>1){ +if(ntests<=28){ +if(alpha==.05)crit<-smmcrit(Inf,ntests) +if(alpha==.01)crit<-smmcrit01(Inf,ntests) +} +if(ntests>28 || is.null(crit))crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha) +}}} +if(cov1){ +if(!DEEP){ +if(alpha==.05)p.crit=0.00676 +if(alpha!=.05)p.crit=ancJNmpcp(n1,n2,alpha=alpha,regfun=regfun,nreps=nreps,MC=MC,cov1=cov1)$pc.est +crit=qnorm(1-p.crit/2) +}} +if(DEEP){ +if(p==2){ +p.crit=.012 +if(alpha!=.05)p.crit=ancJNmpcp(n1,n2,alpha=alpha,regfun=regfun,nreps=nreps,MC=MC,cov1=cov1)$pc.est +crit=qnorm(1-p.crit/2) +} +if(p>2){ +if(length(pts)>1){ +if(SMM){ +if(ntests<=28){ +if(alpha==.05)crit<-smmcrit(Inf,ntests) +if(alpha==.01)crit<-smmcrit01(Inf,ntests) +} +if(ntests>28 || is.null(crit))crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha) +} +if(!SMM){ +p.crit=ancJNmpcp(n1,n2,alpha=alpha,regfun=regfun,nreps=nreps,MC=MC,cov1=FALSE) +crit=qnorm(1-p.crit/2) +}}}} +mat[,6]=est-crit*sd +mat[,7]=est+crit*sd +flag=rep(FALSE,nrow(mat)) +flag.chk1=as.logical(mat[,6]>null.value) +flag.chk2=(mat[,7]0) +num.sig=sum(flag.chk) +if(p==2){ +if(plotit){ +plot(pts[,1],pts[,2],xlab=xlab,ylab=ylab,type='n') +flag[flag.chk]=TRUE +points(pts[!flag,1],pts[!flag,2],pch='*') +points(pts[flag,1],pts[flag,2],pch='+') #significant points +}} +output.sig=NULL +if(p==2){ +if(num.sig>0){ +output.sig=matrix(NA,nrow=num.sig,ncol=8) +output.sig[,1]=pts[flag,1] +output.sig[,2]=pts[flag,2] +output.sig[,3]=mat[flag,1] +output.sig[,4]=mat[flag,2] +output.sig[,5]=mat[flag,3] +output.sig[,6]=mat[flag,6] +output.sig[,7]=mat[flag,7] +output.sig[,8]=mat[flag,8] +dimnames(output.sig)<-list(NULL,c('COV 1','COV 2','Est 1', 'Est 2','DIF','ci.low','ci.hi','p.value')) +if(!ALL){ +mat=NULL +pts=NULL +} +if(pr){ +if(ALL)print('To get only the results for all covariate points where this is a significant result, set ALL=FALSE') +} +}} +list(n=nv,num.sig=num.sig,p.crit=p.crit,points=pts,output.sig=output.sig,output=mat) +} + +ancJNmpcp<-function(n1,n2,regfun=qreg,CPP=FALSE,nreps=1000,alpha=.05,MC=FALSE, +SEED=TRUE,cov1=FALSE){ +if(CPP)library(WRScpp) +if(MC)library(parallel) +if(SEED)set.seed(2) +x=list() +n=max(c(n1,n2)) +nmiss=n-min(c(n1,n2)) +for(i in 1:nreps){ +x[[i]]=rmul(n,p=6) +if(n10)x[[i]][1:nmiss,1:3]=NA +} +if(n1>n2){ +if(nmiss>0)x[[i]][1:nmiss,4:6]=NA +} +} + +if(!MC)vals=lapply(x,ancJNmpcp.sub,regfun=regfun,cov1=cov1) +if(MC)vals=mclapply(x,ancJNmpcp.sub,regfun=regfun,cov1=cov1) +vals=as.vector(matl(vals)) +pc.est=hd(vals,alpha) +list(pc.est=pc.est) +} + +ancJNmpcp.sub<-function(x,regfun=qreg,cov1=FALSE){ +pts=NULL +z=elimna(x[,1:3]) +z2=elimna(x[,4:6]) +if(cov1)pts=z[,1:2] +res1=ancJNmp(z[,1:2],z[,3],z2[,1:2],z2[,3],SEED=TRUE,plotit=FALSE, +regfun=regfun,pts=pts)$output +v=min(res1[,8]) +} + + +ancovaV2.pv<-function(n1,n2,nreps=2000,MC=FALSE,qpts=FALSE,qvals = c(0.25, 0.5, 0.75), +nboot=500,SEED=TRUE,est=tmean,alpha=.05){ +iter=nreps +if(SEED)set.seed(45) +xy=list() +for(i in 1:iter){ +xy[[i]]=list() +xy[[i]][[1]]=rnorm(n1) +xy[[i]][[2]]=rnorm(n1) +xy[[i]][[3]]=rnorm(n2) +xy[[i]][[4]]=rnorm(n2) +} +if(!MC)pv=lapply(xy,ancovaV2pv.sub,qpts=qpts,qvals=qvals,nboot=nboot,MC=FALSE,est=est) +if(MC){ +library(parallel) +pv=mclapply(xy,ancovaV2pv.sub,qpts=qpts,qvals=qvals,nboot=nboot,MC=FALSE,est=est) +} +pv=as.vector(matl(pv)) +p=hd(pv,q=alpha) +list(p.crit=p) +} + +ancovaV2pv.sub<-function(xy,qpts=FALSE,qvals = c(0.25, 0.5, 0.75),nboot=500,MC=TRUE, +est=tmean){ +res=ancovaV2(xy[[1]],xy[[2]],xy[[3]],xy[[4]],est=est,plotit=FALSE,p.crit=.03,SEED=TRUE,qpts=qpts, +nboot=nboot,MC=MC) +rm=min(res$output[,2]) +rm +} + +ancovaUB.pv=ancovaV2.pv + +list.dif<-function(x1,x2){ +# +# Form all differences +# +if(!is.list(x1))stop('Argument x1 should have list mode') +if(!is.list(x2))stop('Argument x2 should have list mode') +if(length(x1)!=length(x2))stop('x1 and x2 have different lengths') +dif=list() +for(j in 1:length(x1))dif[[j]]=x1[[j]]-x2[[j]] +dif +} +kerSORT<-function(x,xlab='',ylab='',pts=NA){ +# +# kernel density estimator using Silverman's rule of thumb +# +# +A=min(c(sd(x),idealfIQR(x)/1.34)) +bw=1.06*A/n^.2 +init=density(x,bw=bw,kernel='epanechnikov') +plot(init$x,init$y,xlab=xlab,ylab=ylab,type='n') +lines(init$x,init$y) +} + +twohc4cor<-function(x1,y1,x2,y2,alpha=.05){ +# +# Compare two independent Pearson correlations using the HC4 method +# +# +X<-elimna(cbind(x1,y1)) +x1<-X[,1] +y1<-X[,2] +X<-elimna(cbind(x2,y2)) +x2<-X[,1] +y2<-X[,2] +x1=(x1-mean(x1))/sd(x1) +y1=(y1-mean(y1))/sd(y1) +x2=(x2-mean(x2))/sd(x2) +y2=(y2-mean(y2))/sd(y2) +temp1=olshc4(x1,y1) +temp2=olshc4(x2,y2) +test=(temp1$ci[2,2]-temp2$ci[2,2])/sqrt(temp1$ci[2,6]^2+temp2$ci[2,6]^2) +df=length(x1)+length(x2)-4 +pv=2*(1-pt(abs(test),df)) +pv +} + +#BD2 +BD2=function(matrizDatos){ + n=dim(matrizDatos)[1] + p=dim(matrizDatos)[2] + cont=rep(0,n) + for (i in 1:(n-1)){ + for (j in (i+1):n){ + cont=cont+estaEntre(c(i,j),matrizDatos) + } + } + contg=(cont/combinat(n,2)) +} + +#indicator function +estaEntre=function(v,matrizDatos){ + n=dim(matrizDatos)[1] + p=dim(matrizDatos)[2] + Z=matrizDatos + inf=t(apply(Z[v,],2,min)) + sup=t(apply(Z[v,],2,max)) + resultados=colSums((t(Z)<=t(sup)%*%rep(1,n))* (t(Z)>=t(inf)%*%rep(1,n)))==p +} + +#combination +combinat=function(n,p){ + if (n=t(inf)%*%rep(1,n))) + resultado=(resul/p) +} + + + + +#function boxplot +#fit: p by n functional data matrix, n is the number of curves +#method: BD2, BD3, MBD +fbplot<-function(fit,x=NULL,method='MBD',depth=NULL,plot=TRUE,prob=0.5,color=6,outliercol=2,barcol=4,fullout=FALSE, factor=1.5,xlab='Time',ylab='Y',...){ + + if(is.fdSmooth(fit) | is.fdPar(fit)){ fit = fit$fd } + if(is.fd(fit)){ + if(length(x)==0){ + x = seq(fit$basis$rangeval[1],fit$basis$rangeval[2],len=101) + } + fit = eval.fd(x,fit) + } + + tp=dim(fit)[1] + n=dim(fit)[2] + if (length(x)==0) {x=1:tp} + #compute band depth + if (length(depth)==0){ + if (method=='BD2') {depth=BD2(t(fit))} + else if (method=='BD3') {depth=BD3(t(fit))} + else if (method=='MBD') {depth=MBD(t(fit))} + else if (method=='Both') {depth=round(BD2(t(fit)),4)*10000+MBD(t(fit))} + } + + dp_s=sort(depth,decreasing=TRUE) + index=order(depth,decreasing=TRUE) + if (plot) { + plot(x,fit[,index[1]],lty=1,lwd=2,col=1,type='l',xlab=xlab,ylab=ylab,...) + } + for (pp in 1:length(prob)){ + m=ceiling(n*prob[pp])#at least 50% + center=fit[,index[1:m]] + out=fit[,index[(m+1):n]] + inf=apply(center,1,min) + sup=apply(center,1,max) + + if (prob[pp]==0.5){ #check outliers + dist=factor*(sup-inf) + upper=sup+dist + lower=inf-dist + outly=(fit<=lower)+(fit>=upper) + outcol=colSums(outly) + remove=(outcol>0) + #outlier column + colum=1:n + outpoint=colum[remove==1] + out=fit[,remove] + woout=fit + good=woout[,(remove==0),drop=FALSE] + maxcurve=apply(good,1,max) + mincurve=apply(good,1,min) + if (sum(outly)>0){ + if (plot) { + matlines(x,out,lty=2,col=outliercol,type='l',...) + } + } + barval=(x[1]+x[tp])/2 + bar=which(sort(c(x,barval))==barval)[1] + if (plot) { + lines(c(x[bar],x[bar]),c(maxcurve[bar],sup[bar]),col=barcol,lwd=2) + lines(c(x[bar],x[bar]),c(mincurve[bar],inf[bar]),col=barcol,lwd=2) + } + } + xx=c(x,x[order(x,decreasing=TRUE)]) + supinv=sup[order(x,decreasing=TRUE)] + yy=c(inf,supinv) + if (plot) { + if (prob[pp]==0.5) {polygon(xx,yy,col=color[pp],border=barcol,lwd=2)} + else {polygon(xx,yy,col=color[pp],border=NA)} + } + } + if (plot) { + lines(x,fit[,index[1]],lty=1,lwd=2,col=1,type='l') + lines(x,maxcurve,col=barcol,lwd=2) + lines(x,mincurve,col=barcol,lwd=2) + if (fullout) { + if (sum(outly)>0){ + if (plot) { + matlines(x,out,lty=2,col=outliercol,type='l',...) + } + } + } + } + return(list(depth=depth,outpoint=outpoint)) +} + + + +funyuenpb<-function(x1,x2,tr=.2,pts=NULL,npts=25,plotit=TRUE,alpha=.05, +SEED=TRUE, +nboot=2000,xlab='T',ylab='Est.dif',FBP=TRUE,method='hochberg',COLOR=TRUE){ +# +# x1 and x2 are n-by-p matrices, +# Designed for functional data. +# For example, p measures taken over time where p is typically large +# +# Goal: at speficied times, compare the two groups. +# pts: Can specify time points where comparisons are to be made +# if pts=NULL, pick +# npts points evenly space between min and max time points +# +p=ncol(x1) +pm1=p-1 +if(p!=ncol(x2))stop('ncol(x1) is not equal to ncol(x2)') +n1=nrow(x1) +n2=nrow(x2) +if(SEED)set.seed(2) +if(is.null(pts)){ +np=round(p/npts) +if(np==0)np=1 +pts=seq(2,pm1,np) +notpts=-1*length(pts) +pts=pts[c(-1,notpts)] +} +npts=length(pts) +xsub1=x1[,pts] +xsub2=x2[,pts] +res=NA +dif=NA +bvals=matrix(nrow=nboot,ncol=npts) +for(j in 1:nboot){ +data1=sample(n1,size=n1,replace=TRUE) +data2<-sample(n2,size=n2,replace=TRUE) +bvals[j,]=apply(xsub1[data1,],2,tmean,tr=tr)-apply(xsub2[data2,],2,tmean,tr=tr) +} +bsort=apply(bvals,2,sort) +crit<-alpha/2 +icl<-round(crit*nboot)+1 +icu<-nboot-icl +op=matrix(NA,nrow=length(pts),ncol=7) +dimnames(op)=list(NULL,c('est 1','est 2','dif','p.value', +'adjust.p.value','ci.low','ci.hi')) +op[,1]=apply(xsub1,2,tmean,tr=tr) +op[,2]=apply(xsub2,2,tmean,tr=tr) +op[,3]=op[,1]-op[,2] +bsort=apply(bvals,2,sort) +bs=bvals<0 +pv=apply(bs,2,mean) +pv2=rbind(pv,1-pv) +pv2=apply(pv2,2,min) +op[,4]=2*pv2 +#flag0=op[,4]==0 +#op[flag0,4]=.004 +op[,5]=p.adjust(op[,4],method=method) +op[,6]=bsort[icl,] +op[,7]=bsort[icu,] +if(plotit){ +if(!FBP){ +xlow=c(1:nrow(op)) +xax=rep(c(1:nrow(op)),3) +rplot(xlow,op[,3],xlab=xlab,ylab=ylab,scat=FALSE) +plot(xax,as.vector(op[,c(3,6,7)]),type='n',xlab=xlab,ylab=ylab) +lines(xlow,op[,3]) +lines(xlow,op[,6],lty=2) +lines(xlow,op[,7],lty=2) +} +if(FBP){ +par(mfrow=c(1,2)) +if(COLOR)FBplot(x1) +if(!COLOR)func.out(x1) +lines(medcurve(x2)) +if(COLOR)FBplot(x2) +if(!COLOR)func.out(x2) +lines(medcurve(x1)) +par(mfrow=c(1,1)) +}} +op=cbind(pts,op) +op +} +anova.nestA<-function(x,tr=.2){ +# +# J-by-K nested ANOVA +# x is assumed to have list mode with length J. +# x[[j]] is assumed to be a matrix with n_j rows and K columns +# j=1, ..., J +# +# Strategy: For fixed level of factor A compute trimmed mean for each +# level of factor B and use these trimmed means as the unit of analysis +# That is, perform an ANOVA using these trimmed means +# +if(!is.list(x))stop('x should have list mode') +y=list() +J=length(x) +for(j in 1:J)y[[j]]=apply(x[[j]],2,tmean,tr=tr) +res=t1way(y,tr=tr) +res +} + +anova.nestAP<-function(x,tr=.2){ +# +# J-by-K nested ANOVA +# x is assumed to have list mode with length J. +# x[[j]] is assumed to be a matrix with n_j rows and K columns +# j=1, ..., J +# +# pool data for each level of A and do anova +# +if(!is.list(x))stop('x should have list mode') +y=list() +J=length(x) +for(j in 1:J)y[[j]]=as.vector(x[[j]]) +res=t1way(y,tr=tr) +res +} + +r.gauss.pro<-function(n,C,M,t){ +# +# generate data from a Gaussian Process +# +# n is the sample size +# C is the covariance function +# for example: +# C <- function(x, y) 0.01 * exp(-10000 * (x - y)^2) # covariance function +# M is the mean function. For example +#M <- function(x) sin(x) # mean function +# t is the interval over which the mean is computed +#t <- seq(0, 1, by = 0.01) # will sample the GP at these points +# +library(MASS) +k <- length(t) +m <- M(t) +S <- matrix(nrow = k, ncol = k) +for (i in 1:k) for (j in 1:k) S[i, j] = C(t[i], t[j]) +z=matrix(NA,nrow=n,ncol=k) +#for(i in 1:n) +#z[i,] <- mvrnorm(1, m, S) +z=mvrnorm(n, m, S) +z +} + +Flplot<-function(x,est=mean,xlab='Time',ylab='Y',plotit=TRUE){ +# +# average n curves and plot results +# +es=apply(x,2,est) +if(plotit){ +plot(es,xlab=xlab,ylab=ylab,type='n') +lines(es) +} +es +} + + + +FQplot<-function(x,xlab='Time',ylab='Y',plotit=TRUE){ +# +# Compute the median and quartiles of n curves and plot results +# +es=apply(x,2,hd) +es1=apply(x,2,hd,q=.25) +es2=apply(x,2,hd,q=.75) +if(plotit){ +plot(rep(c(1:ncol(x)),3),c(es,es1,es2),xlab=xlab,ylab=ylab,type='n') +lines(es) +lines(es1,lty=2) +lines(es2,lty=2) +} +es +} + + + +Flplot2g<-function(x1,x2,est=mean,xlab='Time',ylab='Y',plotit=TRUE){ +# +# average n curves and plot results +# +x1=elimna(x1) +x2=elimna(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 should have the same number of columns') +x1=elimna(x1) +x2=elimna(x2) +es1=apply(x1,2,est) +es2=apply(x2,2,est) +if(plotit){ +plot(rep(1:ncol(x1),2),c(es1,es2),xlab=xlab,ylab=ylab,type='n') +lines(es1) +lines(es2,lty=2) +} +list(est.1=es1,est.2=es2) +} + + + +funloc<-function(x,tr=.2,pts=NULL,npts=25,plotit=TRUE,alpha=.05,nv=rep(0,ncol(x)), +xlab='T',ylab='Est.',FBP=TRUE,method='hochberg',COLOR=TRUE){ +# +# x1 and x2 are n-by-p matrices, +# Designed for functional data. +# For example, p measures taken over time where p is typically large +# +# nv is the null value when testing some hypothesis +# +# Goal: at speficied times, compute measures of location and confidence intervals. +# pts: Can specify time points where comparisons are to be made +# if pts=NULL, pick +# npts points evenly space between min and max time points +# +# FBP=T: creates a functional boxplot +# FBP=F: plot an estimate of the typical value plus 1-alpha confidence intervals. +# +p=ncol(x) +pm1=p-1 +if(is.null(pts)){ +np=round(p/npts) +if(np==0)np=1 +pts=seq(2,pm1,np) +} +res=NA +dif=NA +op=matrix(NA,nrow=length(pts),ncol=6) +dimnames(op)=list(NULL,c('est.','s.e.','p.value', +'adjust.p.value','ci.low','ci.hi')) +for(i in 1:length(pts)){ +z=trimci(x[,i],tr=tr,null.value=nv[i],pr=FALSE) +op[i,1]=z$estimate +op[i,2]=z$se +op[i,3]=z$p.value +op[i,5]=z$ci[1] +op[i,6]=z$ci[2] +} +op[,4]=p.adjust(op[,3],method=method) +if(plotit){ +if(!FBP){ +xlow=c(1:nrow(op)) +xax=rep(c(1:nrow(op)),3) +plot(xax,as.vector(op[,c(3,5,6)]),type='n',xlab=xlab,ylab=ylab) +lines(xlow,op[,1]) +lines(xlow,op[,5],lty=2) +lines(xlow,op[,6],lty=2) +} +if(FBP){ +if(COLOR)FBplot(x) +if(!COLOR)func.out(x) +}} +op=cbind(pts,op) +op + +} + + +funlocpb<-function(x,est=tmean,nboot=2000,SEED=TRUE, +pts=NULL,npts=25,plotit=TRUE,alpha=.05,nv=rep(0,ncol(x)), +xlab='T',ylab='Est.',FBP=TRUE,method='hochberg',COLOR=TRUE,...){ +# +# x1 and x2 are n-by-p matrices, +# Designed for functional data. +# For example, p measures taken over time where p is typically large +# +# nv is the null value when testing some hypothesis +# +# Goal: at speficied times, compute measures of location and confidence intervals. +# pts: Can specify time points where comparisons are to be made +# if pts=NULL, pick +# npts points evenly space between min and max time points +# +# FBP=T: creates a functional boxplot +# FBP=F: plot an estimate of the typical value plus 1-alpha confidence intervals. +# +p=ncol(x) +pm1=p-1 +if(is.null(pts)){ +np=round(p/npts) +if(np==0)np=1 +pts=seq(2,pm1,np) +} +res=NA +dif=NA +op=matrix(NA,nrow=length(pts),ncol=5) +dimnames(op)=list(NULL,c('est.','p.value', +'adjust.p.value','ci.low','ci.hi')) +x=elimna(x) +n=nrow(x) +for(i in 1:length(pts)){ +z=onesampb(x[,i],est=est,nboot=nboot,alpha=alpha,SEED=SEED,nv=nv,...) +op[i,1]=z$estimate +op[i,2]=z$p.value +op[i,4]=z$ci[1] +op[i,5]=z$ci[2] +} +op[,3]=p.adjust(op[,2],method=method) +if(plotit){ +if(!FBP){ +xlow=c(1:nrow(op)) +xax=rep(c(1:nrow(op)),3) +plot(xax,as.vector(op[,c(3,5,6)]),type='n',xlab=xlab,ylab=ylab) +lines(xlow,op[,1]) +lines(xlow,op[,5],lty=2) +lines(xlow,op[,6],lty=2) +} +if(FBP){ +if(COLOR)FBplot(x) +if(!COLOR)func.out(x) +}} +op=cbind(pts,op) +op + +} + + +MULMreg<-function(x,y,regfun=MMreg, +xout=FALSE,outfun=outpro,...){ +# +# Multivariate regression: simply estimate parameters for +# for each column of Y values based on some multivariate regression +# estimator. +# +# Use MMreg by default +# +# x and y are assumed to be matrices with two or more columns +# +# +x<-as.matrix(x) +y<-as.matrix(y) +n.keep=nrow(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE)$keep +x<-x[flag,] +y<-y[flag,] +x<-as.matrix(x) +y<-as.matrix(y) +n.keep=nrow(x) +} +p1=ncol(x)+1 +q=ncol(y) +est=matrix(NA,nrow=p1,ncol=q) +dimnames(est)=list(c('Inter',rep('Slope',ncol(x))),NULL) +for(i in 1:q)est[,i]=regfun(x,y[,i],...)$coef +list(coef=est) +} + +MULR.yhat<-function(x,y,pts=x,regfun=MULMreg, +xout=FALSE,outfun=outpro,...){ +# +# Compute predicted Y values based on some multivariate regression +# estimator. +# +# Use MULMreg by default +# +# +x<-as.matrix(x) +y<-as.matrix(y) +n.keep=nrow(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE)$keep +x<-x[flag,] +y<-y[flag,] +x<-as.matrix(x) +n.keep=nrow(x) +} +if(is.null(pts))pts=x +q=ncol(y) +p1=ncol(x)+1 +yhat=matrix(NA,nrow=nrow(pts),ncol=q) +coef=regfun(x,y)$coef +slope=as.matrix(coef[2:p1,]) +for(j in 1:q){ +for(i in 1:nrow(pts)){ +yhat[i,j]=coef[1,j]+sum(slope[,j]*x[i,]) +}} +list(yhat=yhat) +} +corCOMmcp_sub<-function(data,x,y,corfun=wincor,...){ +# +# +rv=NA +for(j in 1:ncol(x))rv[j]=corfun(x[data,j],y[data])$cor +rv +} +corCOMmcp<-function(x,y,corfun=wincor,alpha=.05,nboot=500,SEED=TRUE,MC=FALSE,xout=FALSE,outfun=outpro,method='hommel',...){ +# +# Comparing robust dependent correlations: Overlapping case +# That is, have two or more independent variables, compare +# cor(y,x_j) to cor(y,x_k) for each j0)stop('IV1 and IV2 have duplicate values making this method invalid') +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +#if(length(IV1)+length(IV2) != p)stop('ncol(x) should equal the number of variables indicated by IV1 and IV2') +if(length(IV1)+length(IV2) > p)stop('IV!+IV2 should be less than or equal ncol(x)') +if(max(c(IV1,IV2))>p)stop('IV1 or IV2 has a value that exceeds the number of col. in x') +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +nrem=length(y) +if(xout){ +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +nkeep=length(y) +#estit=regfun(x,y,xout=xout,...)$coef[2:p1] +nv=length(y) +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(k in 1:2){ +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +if(!MC){ +bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) +if(k==1)bvec1=bvec +if(k==2)bvec2=bvec +} +if(MC){ +library(parallel) +data=listm(t(data)) +bvec<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE,xout=FALSE,...) +if(k==1)bvec1=matl(bvec) +if(k==2)bvec2=matl(bvec) +data=t(matl(data)) +}} +#Leverage points already removed. +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +bvec1=bvec1[2:p1,] # don't need the intercept +bvec2=bvec2[2:p1,] # don't need the intercept +v1=NA +v2=NA +for(i in 1:nboot){ +v1[i]=regIVcom_sub(bvec1[IV1,i],x[data[i,],IV1],tr=tr) +v2[i]=regIVcom_sub(bvec2[IV2,i],x[data[i,],IV2],tr=tr) +} +pv=bmp(v1,v2)$phat +pv=2*min(c(pv,1-pv)) +est=regfun(x,y)$coef[2:p1] +e1=regIVcom_sub(est[IV1],x[,IV1],tr=tr) +e2=regIVcom_sub(est[IV2],x[,IV2],tr=tr) +rat=NA +if(e2>0)rat=e1/e2 +ep1=e1/winvar(y,tr=tr) +ep2=e2/winvar(y,tr=tr) +list(n=nrem,n.keep=nkeep,est.1=e1,est.2=e2,e.pow1=ep1,e.pow2=ep2,strength.assoc.1=sqrt(ep1), +strength.assoc.2=sqrt(ep2), +ratio=rat,strength.ratio=sqrt(rat),p.value=pv) +} + +regIVcom_sub<-function(slope,x,tr){ +yhat=apply(t(slope*t(x)),1,sum) +str=winvar(yhat,tr=tr) +str +} + +regIVstr<-function(x,y,regfun=tsreg,xout=FALSE,outfun=outpro,tr=.2,...){ +# +# Estimate strength of each independent variable +# when all of them are entered into the model. +# +xy=cbind(x,y) +xy=elimna(xy) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +p=ncol(x) +p1=p+1 +est=regfun(x,y,...)$coef[2:p1] +top=NA +for(j in 1:p)top[j]=regIVcom_sub(est[j],x[,j],tr=tr) +bot=winvar(y,tr=tr) +str=top/bot +list(explanatory.power=str,explanatory.strength=sqrt(str)) +} + + + + +Q2anova<-function(J,K,x,alpha=.05,nboot=2000,MC=FALSE){ +# +# Two-way ANOVA for medians, tied values allowed. +# +if(is.matrix(x)|| is.data.frame(x))x=listm(x) +if(J*K != length(x))stop('Total number of groups is not equal to JK') +chkcar=NA +for(j in 1:length(x))chkcar[j]=length(unique(x[[j]])) +if(min(chkcar)<20){ +print('Warning: Sample size is less than') +print('20 for one more groups. Type I error might not be controlled') +} +con=con2way(J,K) +A=pbadepth(x,est=hd,con=con$conA,alpha=alpha,nboot=nboot,MC=MC) +B=pbadepth(x,est=hd,con=con$conB,alpha=alpha,nboot=nboot,MC=MC) +AB=pbadepth(x,est=hd,con=con$conAB,alpha=alpha,nboot=nboot,MC=MC) +list(Fac.A=A,Fac.B=B,Fac.AB=AB) +} +Q3anova<-function(J,K,L,x,alpha=.05,nboot=600,MC=FALSE){ +# +# Three-way ANOVA for medians, tied values allowed. +# +if(is.matrix(x)|| is.data.frame(x))x=listm(x) +if(J*K*L != length(x))stop('Total number of groups is not equal to JKL') +chkcar=NA +for(j in 1:length(x))chkcar[j]=length(unique(x[[j]])) +if(min(chkcar)<20){ +print('Warning: Sample size is less than') +print('20 for one more groups. Type I error might not be controlled') +} +con=con3way(J,K,L) +A=pbadepth(x,est=hd,con=con$conA,alpha=alpha,nboot=nboot,MC=MC) +B=pbadepth(x,est=hd,con=con$conB,alpha=alpha,nboot=nboot,MC=MC) +C=pbadepth(x,est=hd,con=con$conC,alpha=alpha,nboot=nboot,MC=MC) +AB=pbadepth(x,est=hd,con=con$conAB,alpha=alpha,nboot=nboot,MC=MC) +AC=pbadepth(x,est=hd,con=con$conAC,alpha=alpha,nboot=nboot,MC=MC) +BC=pbadepth(x,est=hd,con=con$conBC,alpha=alpha,nboot=nboot,MC=MC) +ABC=pbadepth(x,est=hd,con=con$conABC,alpha=alpha,nboot=nboot,MC=MC) +list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) +} +scat3d2g<-function(x1,x2,pch1='+',pch2='*',tick.marks=TRUE, +xlab='X1', ylab='X1', zlab='X3'){ +# +# plot data in x1 and x2 +# marking points in x1 with symbol indicated by pch1 +# marking points in x2 with symbol indicated by pch2 +# +if(ncol(x1)!=3)stop('x1 should be a matrix with 3 columns') +if(ncol(x2)!=3)stop('x2 should be a matrix with 3 columns') +library(scatterplot3d) +temp=scatterplot3d(x=c(x1[,1],x2[,1]),y=c(x1[,2],x2[,2]), +z=c(x1[,3],x2[,3]),type='n',tick.marks=tick.marks, +xlab=xlab, ylab=ylab, zlab=zlab) +temp$points(x1,pch=pch1) +temp$points(x2,pch=pch2) +} +scat2d2g<-function(x1,x2,xlab='X1',ylab='X2',ticktype='detailed',pch1='+', +pch2='*'){ +# +# Create a scatterplot marking data from the first group with the symbol +# indicated by pch1 and the symbol indicated by pch2 for group 2. +# +if(ncol(x1)!=2)stop('x1 should be a matrix with 2 columns') +if(ncol(x2)!=2)stop('x2 should be a matrix with 2 columns') +plot(rbind(x1,x2),type='n',xlab=xlab,ylab=ylab) +points(x1,pch=pch1) +points(x2,pch=pch2) +} +qhdsm.pred<-function(x,y,pts=x,q=.5,fr=1,nmin=1,xout=FALSE,outfun=outpro,...){ +# +# Predict the qth quantile of Y based on the values in pts, using the +# the data in x and y. +# +xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +x=xy[,1:p] +y=xy[,p1] +x=as.matrix(x) +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(ncol(x)==1){ +vals=runhat(x[,1],y,pts=pts,est=hd,q=q,fr=fr,nmin=nmin,...) +nvals=1 +for(i in 1:length(pts)){ +nvals[i]<-length(y[near(x,pts[i],fr=fr)]) +} +} +if(ncol(x)>1){ +temp=rung3hat(x,y,pts=pts,est=hd,q=q,fr=fr,...) +vals=temp$rmd +nvals=temp$nval +} +list(Y.hat=vals,nvals=nvals) +} +skmcp<-function(x,alpha=.05){ +# +# Multiple comparisons for J independent groups +# and binary data. +# The method is based on the Storer--Kim +# method for comparing independent binomials. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# Missing values are allowed. +# +# Probability of one or more Type I errors controlled using Hochberg's method. +# +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') +J<-length(x) +ncon=(J^2-J)/2 +Jm<-J-1 +# +# Determine critical values +dvec=alpha/c(1:ncon) +output<-matrix(NA,nrow=ncon,ncol=4) +dimnames(output)<-list(NULL,c('Group','Group','p.value','p.crit')) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j=zvec) +output[temp2,4]<-zvec +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,num.sig=num.sig) +} + +smmvalv3<-function(ntests,df,iter=20000,alpha=.05,SEED=TRUE){ +# +# ntests=number of tests to be performed +# +if(SEED)set.seed(1) +vals<-NA +tvals<-NA +dfvec=rep(df,ntests) +z=matrix(nrow=iter,ncol=ntests) +for(j in 1: ntests)z[,j]=rt(iter,dfvec[j]) +vals=apply(abs(z),1,max) +vals<-sort(vals) +ival<-round((1-alpha)*iter) +qval<-vals[ival] +qval +} +binmat<-function(m,col,lower, upper,INC=TRUE){ +# +# pull out the rows of the matrix m based on the values in the column +# indicated by the argument +# col +# that are between lower and upper, inclusive. Note: the built-in function findInterval could be used instead +# +# Example: binmat(m,3,10,15) will return all rows such that the +# values in column 3 are between 10 and 15, inclusive. +# +if(is.vector(m)){ +m=as.matrix(m) +col=1 +} +if(INC){ +flag1=m[,col]<=upper +flag2=m[,col]>=lower +} +if(!INC){ +flag1=m[,col]lower +} +flag=as.logical(flag1*flag2) +m[flag,] +} + + +boxdif<-function(x,names){ +# +# For J dependent groups, compute all pairwise differences and then +# create boxplots for all pairs of groups. +# +if(is.null(dim(x)))stop('x should be a matrix or data frame') +ic=0 +J=ncol(x) +n=nrow(x) +N=(J^2-J)/2 +ic=0 +dif=matrix(NA,nrow=n,ncol=N) +for(j in 1:J){ +for(k in 1:J){ +if(jbest){ +best=new +idk=j +idcost=k +}}} +# Now check to see whether cost matters for the kernel that is being used +temp=NA +for(k in 1:length(cost))temp[k]=comdepthsvm(x1,x2,kernel=kvals[idk],cost=cost[k])$est.prob +if(var(temp)==0)flag=FALSE +fin=comdepthsvm(x1,x2,MISS=MISS,best.kernel=kvals[idk],best.cost=cost[idc],TABLE=TABLE) +list(best.kernel=kvals[idk],best.cost=cost[idcost],est.prob=fin$est.prob,cost.matters=flag, +miss.class.vectors=fin$miss.class.vectors,TABLE=fin$TABLE) +} + + + +cumrelfT<-function(x,y,pts=NULL,q=c(.1,.25,.5,.75,.9),xlab='X',ylab='CUM REL FREQ',plotit=TRUE, +op=1){ +# +# Compare the cumulative relative frequencies for 2 independent groups +# based on the values in pts. +# +# x and y are vectors. +# +# op=1 use twobinom +# op=2 usd bi2KMS + +x=elimna(x) +y=elimna(y) +if(is.null(pts)){ +for(i in 1:length(q))pts[i]=qest(x,q[i]) +} +output=matrix(NA,nrow=length(pts),ncol=5) +for(j in 1:length(pts)){ +flag1=x<=pts[j] +flag2=y<=pts[j] +temp=NULL +if(op==1)temp=twobinom(x=flag1,y=flag2) +if(op==2)temp=bi2KMSv2(x=flag1,y=flag2) +if(is.null(temp))stop('op should be equal to 1 or 2') +output[j,2]=temp$p1 +output[j,3]=temp$p2 +output[j,4]=temp$est.dif +output[j,5]=temp$p.value +} +output[,1]=pts +if(plotit){ +m=list() +m[[1]]=x +m[[2]]=y +cumrelf(m,xlab=xlab,ylab=ylab) +} +#output[,6]=p.adjust(output[,5],method='hoch') # can beat this adjusted p-value +#dimnames(output)=list(NULL,c('pts','est.p1','est.p2','est.dif','p.value','p.adjusted')) +dimnames(output)=list(NULL,c('pts','est.p1','est.p2','est.dif','p.value')) +output +} +rplotCITAP.pv<-function(n,nreps=2000,alpha=.05,npts=25,tr=.2,fr=5,MC=FALSE,nmin=12,SEED=TRUE,LP=FALSE){ +if(SEED)set.seed(2) +pvals=NA +xy=list() +for (i in 1:nreps){ +xy[[i]]=rmul(n) +} +if(!MC)pvals=lapply(xy,rplotCITAP.sub,npts=npts,tr=tr,fr=fr,alpha=alpha,nmin=nmin) +if(MC){ +library(parallel) +pvals=mclapply(xy,rplotCITAP.sub,npts=npts,tr=tr,fr=fr,alpha=alpha,nmin=nmin) +} +pvals=matl(pvals) +pv=hd(pvals,alpha) +pv +} + +rplotCITAP.sub<-function(xy,tr=.2,fr=NA,SEED=TRUE,nmin=12, +pts=NA,npts=25,LP=FALSE,alpha=.05,xout=FALSE,...){ +# +# prediction interval running interval smoother based on a trimmed mean. +# Unlike rplot, includes a confidence band. +# +x=xy[,1] +y=xy[,2] +xord=order(x) +x=x[xord] +y=y[xord] +infit=rplot(x,y,tr=tr,xout=xout,plotit=FALSE,LP=LP,fr=fr,pr=FALSE,pyhat=TRUE,nmin=nmin) +res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr)$output +pts=seq(res1[1,1],res1[5,1],length.out=npts) +rmd=infit$pyhat +res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr,nmin=nmin)$output +pts=seq(res1[1,1],res1[5,1],length.out=npts) +y.hat=NA +pv=NA +civ=matrix(NA,nrow=npts,ncol=2) +for(i in 1:length(pts)){ +doit=trimci(y[near(x,pts[i],fr)],tr=tr,alpha=alpha,pr=FALSE) +pv[i]=doit$p.value +} +min(pv) +} + +rplotCI.pv<-rplotCITAP.pv + +rplotCIv2.pv<-function(n,nreps=4000,alpha=.05,tr=.2,fr=.5, +MC=TRUE,nmin=12,SEED=TRUE){ +if(SEED)set.seed(2) +pvals=NA +xy=list() +for (i in 1:nreps){ +xy[[i]]=rmul(n) +} +if(!MC)pvals=lapply(xy,rplotCIv2.sub,tr=tr,fr=fr,nmin=nmin) +if(MC){ +library(parallel) +pvals=mclapply(xy,rplotCIv2.sub,tr=tr,fr=fr,nmin=nmin) +} +pvals=matl(pvals) +pv=hd(pvals,alpha) +pv +} + +rplotCIv2.sub<-function(xy,nmin,tr,fr){ +x=xy[,1] +y=xy[,2] +n=length(y) +nv=NA +for(j in 1:n)nv[j]=sum(near(x,x[j],fr=fr)) +pts=x[nv>=nmin] +n.keep=length(pts) +for(j in 1:n.keep)nv[j]=sum(near(x,x[j],fr=fr)) +pts=x[nv>=nmin] +rmd=NA +for(i in 1:length(pts))rmd[i]<-trimci(y[near(x,pts[i],fr)],tr=tr,pr=FALSE)$p.value +pv=min(rmd) +pv +} + + +bdmP<-function(x){ +# +# Test the null hypothesis in Bruner et al. (2016) +# for a one-way design +# +if(is.matrix(x))x=listm(x) +J=length(x) +library(rankFD) +nv=lapply(x,length) +nv=as.vector(matl(nv)) +z=x[[1]] +g=rep(1,nv[1]) +for(j in 2:J){ +z=c(z,x[[j]]) +g=c(g,rep(j,nv[j])) +} +xg=cbind(z,g) +xg=as.data.frame(xg) +res=rankFD(z~g,data=xg,hypothesis = 'H0p') +w=as.vector(res$ANOVA.Type.Statistic) +list(test.stat=w[1],df1=w[2],df2=w[3],p.value=w[4],q.hat=bdm(x)$q.hat) +} +rplotCIM<-function(x,y,est=hd,fr=.5,p.crit=NA,plotit=TRUE,scat=TRUE, +pyhat=FALSE, pts=NA,npts=25,xout=FALSE, +xlab='X',ylab='Y',low.span=2/3,nmin=16, +outfun=out,LP=TRUE,LPCI=FALSE,MID=TRUE,alpha=.05,pch='.',...){ +# +# Confidence interval for running interval smoother based on a median +# Unlike rplot, includes a confidence band having simultaneous probability +# coverage equal to 1-alpha. +# +# LP=TRUE, the plot is further smoothed via lowess +# +# fr controls amount of smoothing. If the association is relatively strong, might want to use fr=.2 +# +chk=FALSE +if(identical(est,hd))chk=TRUE +if(!chk)stop('Current version, argument est must be hd') +n=length(y) +if(n<50)stop('Need at least n=50') +xord=order(x) +x=x[xord] +y=y[xord] +infit=rplot(x,y,est=est,xout=xout,plotit=plotit,LP=LP,fr=fr,pr=FALSE,pyhat=TRUE,xlab=xlab,ylab=ylab) +rmd=infit$yhat +m<-cbind(x,y) +if(ncol(m)>2)stop('One covariate only is allowed with this function') +m<-elimna(m) +nv=nrow(m) +if(xout){ +flag<-outfun(m[,1])$keep +m<-m[flag,] +} +x=m[,1] +y=m[,2] +n.keep=length(y) +res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr,nmin=nmin)$output +pts=seq(res1[1,1],res1[5,1],length.out=npts) +flag=duplicated(pts) +npts=length(pts) +civ=matrix(NA,nrow=npts,ncol=2) +for(i in 1:length(pts)){ +xx=y[near(x,pt=pts[i],fr)] +civ[i,]=sint(xx,alpha=alpha/npts) +} +up=civ[!flag,2] +low=civ[!flag,1] +if(plotit){ +if(LPCI){ +up=lplot(pts,up,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat +low=lplot(pts,low,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat +} +pts=pts[!flag] +lines(pts,up,lty=2) +lines(pts,low,lty=2) +} +if(pyhat){output<-cbind(pts,rmd[!flag],low,up) +dimnames(output)=list(NULL,c('pts','y.hat','ci.low','ci.up')) +} +if(!pyhat)output<-'Done' +list(output=output,str=infit$Strength.Assoc,n=nv,n.keep=n.keep) +} +lplotCI<-function(x,y,plotit=TRUE,xlab='X',ylab='Y',p.crit=NULL,alpha=.05,span=NULL, +CIV=FALSE,xout=FALSE,outfun=outpro, pch='.',SEED=TRUE,nboot=100,pts=NULL,npts=25,nreps=2000,...){ +# +# Confidence band using LOESS +# +# Method allows heteroscedasticity and adjust the confidence intervals +# so that the simultaneous probabillty coverage is approximately 1-alpha +# +# If CIV=FALSE and plotit=TRUE, creates a plot with the confidence intervals. +# CIV=TRUE, returns the confidence intervals for the points in pts +# pts =NULL, the function picks +# npts points, extending between M-1.5*mad(x) and M+1.5*mad(x) +# +# +# For alpha=0.05, n <=2000 execution time is low. Otherwise +# the adjusted critical value must be computed. +# +# p.crit=NULL: If alpha=.05, determined quickly, otherwise it is computed. +# +xy=elimna(cbind(x,y)) +if(ncol(xy)>2)stop('Current version limited to a single predictor variable') +if(xout){ +flag<-outfun(xy[,1],plotit=FALSE,...)$keep +xy<-xy[flag,] +} +n=nrow(xy) +if(is.null(span)){ +span=2/3 +if(n >=300)span=.5 +if(n >=800)span=.3 +} +x=xy[,1] +y=xy[,2] +xord=order(x) +y=y[xord] +x=x[xord] +M=median(x) +low=M-1.5*mad(x) +up=M+1.5*mad(x) +if(is.null(pts))pts=seq(low,up,length.out=npts) +if(npts<=5)p.crit=alpha/npts +if(alpha==.05){ +if(is.null(p.crit)){ +if(n<30)stop('Should have n>=30') +nv=c(30,50,70,100,150, 200,300, 500, 1000, 2000) +pv=c(0.003599898, 0.002661925, 0.002399994, 0.002877103, 0.003000428, 0.003538190, + 0.003872710, 0.004396500, 0.004075000, 0.0045161) + +if(npts==25){ +if(n<=2000)p.crit=lplot.pred(1/nv,pv,pts=1/n)$yhat +if(n>2000)p.crit=.00452 +}}} +if(is.null(p.crit)){ +print('p.crit is being computed, this might take some time.') +pts.stand=NULL +if(!is.null(pts))pts.stand=(median(x)-pts)/mad(x) +p.crit=lplotbsepvv3(n,nreps=nreps,npts=npts,pts=pts.stand,alpha=alpha) +} +plx<-predict(loess(y ~ x,span=span), se=TRUE) +se=lplotse(x,y,nboot=nboot,SEED=SEED,pts=pts,span=span) +lfit=lplot.pred(x,y,pts=pts,span=2/3)$yhat +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab,pch=pch) +lines(x,plx$fit) +if(is.null(p.crit))p.crit=alpha +lines(pts,lfit - qt(1-p.crit/2,plx$df)*se, lty=2) +lines(pts,lfit + qt(1-p.crit/2,plx$df)*se, lty=2) +} +ci.low=lfit - qt(1-p.crit/2,plx$df)*se +ci.up=lfit + qt(1-p.crit/2,plx$df)*se +if(!CIV)ci=NULL +if(CIV){ +ci=cbind(pts,lfit,ci.low,ci.up) +dimnames(ci)=list(NULL,c('X','Y.hat','ci.low','ci.up')) +} +list(p.crit=p.crit,Conf.Intervals=ci) +} + + +lplotbsepvv3<-function(n,nreps=2000,alpha=0.05,pts=NULL,npts=25){ +# +# Determine critical p-value for lplotCI. +# +set.seed(2) +pv=NA +for(i in 1:nreps){ +x=rnorm(n) +y=rnorm(n) +xord=order(x) +y=y[xord] +x=x[xord] +M=median(x) +low=M-1.5*mad(x) +up=M+1.5*mad(x) +if(is.null(pts))pts=seq(low,up,length.out=npts) +plx<-predict(loess(y ~ x), se=TRUE) +est=lplot.pred(x,y,pts=pts)$yhat +se=lplotse(x,y,SEED=FALSE,pts=pts) +test=abs(est/se) +pall=2*(1-pt(abs(test),plx$df)) +pv[i]=min(elimna(pall)) +} +hd(pv,alpha) +} + +lplotse<-function(x,y,pts=x,nboot=100,SEED=TRUE,span=2/3){ +# +# compute estimae of SE +# return the values corresponding to the order x values +# +xord=order(x) +y=y[xord] +x=x[xord] +if(SEED)set.seed(2) +n=length(y) +ev=matrix(NA,nrow=nboot,ncol=length(pts)) +for(i in 1:nboot){ +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +ev[i,]=lplot.pred(x[data[i,]],y[data[i,]],pts=pts,span=span)$yhat +} +se=apply(ev,2,sd) +se +} +BFBANOVA<-function(x,nboot=1000,SEED=TRUE){ +# +# One-way ANOVA bootstrap version of Brown-Forsyhte +# +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +TV=BFANOVA(x)$test.statistic +ylist<-list() +dat=list() +TT<-NA +#means<-sapply(x,mean) +for (j in 1:J)ylist[[j]]<-x[[j]]-mean(x[[j]]) +for (i in 1:nboot){ +for(j in 1:J)dat[[j]]=sample(ylist[[j]],length(ylist[[j]]),replace=TRUE) +TT[i]<-BFANOVA(dat)$test.statistic +} +pval<-mean(TV<=TT,na.rm=TRUE) +list(test.stat=TV,p.value=pval) +} +BFANOVA<-function(x){ +# +# Brown-Forsyhte ANOVA, generalized to trimmed means +# +# (Not known whether a generalization to trimmed means performs relatively well.) +# +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +xall=elimna(as.vector(matl(x))) +YB=mean(xall) +ybar=lapply(x,mean) +v=lapply(x,var) +n=lapply(x,length) +ybar=matl(ybar) +n=as.vector(matl(n)) +v=as.vector(matl(v)) +ybar=as.vector(matl(ybar)) +w=n/v +N=sum(n) +top=sum(n*(ybar-YB)^2) +bot=sum((1-n/N)*v) +FS=top/bot +df1=J-1 +fv=NA +for(j in 1:J)fv[j]=(1-n[j]/N)*v[j] +df2=1/sum(fv^2/(n-1)) +pv=1-pf(FS,df1,df2) +list(test.statistic=FS,df1=df1,df2=df2,p.value=pv) +} +olshc4.band<-function(x,y,alpha=.05,xout=FALSE,outfun=outpro,plotit=TRUE,pr=TRUE, +xlab='X',ylab='Y',nreps=5000,pch='.',CI=FALSE,ADJ=TRUE,SEED=TRUE){ +# +# Heterocedastic confidence band with simultaneous +# probability coverate 1-alpha +# +# CI=TRUE, confidence intervals are returned. +# CI=FALSE, only a plot is created. +# +# +if(!CI){ +if(pr)print('This function returns the confidence intervals when CI=TRUE') +} +xy=elimna(cbind(x,y)) +if(ncol(xy)!=2)stop('Only one independent variable allowed') +x=xy[,1] +y=xy[,2] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1] +y<-m[,2] +} +flagdup=duplicated(x) +xu=x[!flagdup] +yu=y[!flagdup] +xs=order(xu) +xu=xu[xs] +yu=yu[xs] +n=length(yu) +df=n-2 +nv=c(20,30,40,50,75,100,200,300,500,1000,2000) +cr=c(0.009649481,0.01115032,0.0125336,0.01196315,0.01350826, +0.01346237,0.01326992,0.01291531,0.01465537,0.01488745, 0.01595954) +ysqse=trimse(y,tr=0)^2 +temp=olshc4(x,y) +b1sqse=temp$cov[2,2] +yhat=temp$ci[1,2]+temp$ci[2,2]*xu +se=sqrt(ysqse+b1sqse*(xu-mean(xu))^2) +ci=matrix(NA,ncol=4,nrow=length(xu)) +ci[,1]=xu +ci[,2]=yhat +if(!ADJ)crit=qt(1-alpha/2,df) +if(ADJ){ +adj=NA +if(alpha==.05){ +if(n>=20 && n<=2000)adj=lplot.pred(1/nv,cr,1/n)$yhat +if(is.na(adj))adj=olshc4.bandCV(n=n,nreps=nreps,alpha=alpha,SEED=SEED) +} +crit=qnorm(1-adj/2) # Don't need Student's T, adjustment deals with n. +} +ci[,3]=yhat-crit*se +ci[,4]=yhat+crit*se +dimnames(ci)=list(NULL,c('X','Yhat','ci.low','ci.up')) +if(plotit){ +plot(x,y,pch=pch,xlab=xlab,ylab=ylab) +abline(temp$ci[1,2],temp$ci[2,2]) +lines(xu,ci[,3],lty=2) +lines(xu,ci[,4],lty=2) +} +if(!CI)ci=NA +list(conf.intervals=ci) +} + + +olshc4.bandCV<-function(n,nreps=5000,alpha=.05,SEED=TRUE){ +# +# Heterocedastic confidence band with simultaneous +# probability coverage 1-alpha +# +if(SEED)set.seed(2) +pv=NA +for(i in 1:nreps){ +x=rnorm(n) +y=rnorm(n) +ysqse=trimse(y,tr=0)^2 +temp=olshc4(x,y) +ysqse=trimse(y,tr=0)^2 +b1sqse=temp$cov[2,2] +yhat=temp$ci[1,2]+temp$ci[2,2]*x +se=sqrt(ysqse+b1sqse*(x-mean(x))^2) +pv[i]=min(2*(1-pnorm(abs(yhat)/se))) +} +hd(pv,q=alpha) +} + +olshc4band=olshc4.band + +lplotcom2<-function(x,y,xout=FALSE,pts1=NULL,pts2=NULL,outfun=outpro,span=2/3,npts=10,tr=.2,...){ +# +# For two independent variables, estimate their relative importance when using LOESS +# +library(stats) +x<-as.matrix(x) +m<-elimna(cbind(x,y)) +n.orig=nrow(m) +n.keep=n.orig +d<-ncol(x) +if(d!=2)stop('Current version is for two independent variables only') +if(xout){ +flag<-outfun(m[,1:2],plotit=FALSE,...)$keep +m<-m[flag,] +} +n.keep=nrow(m) +M=apply(m,2,median) +SD=apply(m,2,mad) +low=M-1.5*SD +up=M+1.5*SD +if(is.null(pts1))pts1=seq(low[1],up[1],length.out=npts) +if(is.null(pts2))pts2=seq(low[2],up[2],length.out=npts) +e1=NA # +e2=NA +for(j in 1:length(pts1)){ # Determine strength of x2 given a value stored in pts1. +v2=cbind(rep(pts1[j],n.keep),m[,2]) +vals=lplot.pred(m[,1:2],m[,3],v2,span=span)$yhat +vals=elimna(vals) +nv=length(vals) +e2[j]=NA +if(nv>=10)e2[j]=winsd(vals,tr=tr,na.rm=TRUE)/winsd(m[,3],tr=tr,na.rm=TRUE) +} +for(j in 1:length(pts2)){ # Determine strength of x1 given a value stored in pts2. +v1=cbind(m[,1],rep(pts2[j],n.keep)) +vals=lplot.pred(m[,1:2],m[,3],v1,span=span)$yhat +vals=elimna(vals) +nv=length(vals) +e1[j]=NA +if(nv>=10)e1[j]=winsd(vals,tr=tr,na.rm=TRUE)/winsd(m[,3],tr=tr,na.rm=TRUE) + } +p=mean(outer(e1,e2,FUN='-')<0,na.rm=TRUE) +list(str1=e1,str2=e2,p=p,mean.str1=mean(e1),mean.str2=mean(e2)) +} + +lplotCIMC<-function(data,x,y,pts1,pts2,npts,tr,span){ +temp=lplotcom2(x[data,],y[data],pts1=pts1,pts2=pts2,npts=npts,tr=tr,span=span) +v=c(temp$mean.str1,temp$mean.str2) +} +linWMW<-function(x,con,locfun=median,nreps=100,SEED=TRUE){ +# +# Determine distribution of Y_i=sum_j c_jX_j +# Then estimate P(Y<0) and measure of location +# based on +# locfun, which defaults to the median. +# +con=as.vector(con) +if(sum(con)!=0)stop('Contrast coefficients must sum to zero') +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +if(length(con)!=J)stop('Length of con should equal number of groups') +x=elimna(x) +nv=as.vector(matl(lapply(x,FUN='length'))) +nmin=min(nv) +est=NA +p=NA +B=list() +M=matrix(NA,nrow=nmin,ncol=J) +for(i in 1:nreps){ +for(j in 1:J)M[,j]=sample(x[[j]],nmin) +B[[i]]=M +} +L=lapply(B,linWMWMC.sub,con=con) +est=lapply(L,locfun) +p=lapply(L,linWMWMC.sub2) +est=as.vector(matl(est)) +p=as.vector(matl(p)) +list(p=mean(p),center=mean(est)) +} + +linWMWMC.sub<-function(M,con){ +L=apply(t(con*t(M)),1,sum) +L +} + +linWMWMC.sub2<-function(L){ +phat=mean(L<0)+.5*mean(L==0) +phat +} + + +linEP<-function(x,con,locfun=tmean,tr=.2,nreps=200,SEED=TRUE){ +# +# Estimate exlanatory power for a linear contrast aimed at main effects +# +# con = contrast coefficients +# x is a matrix or has list mode. +# +# When dealing with main effects. Could pool the data and use yuenv2. +# Or could estimate distribution of the linear contrast, which is the +# strategy here. Note: for interactions, this latter strategy is needed. +# +# (Uses the function linWMWMC.sub) +# +if(sum(con)!=0)stop('Contrast coefficients must sum to zero') +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +if(length(con)!=J)stop('Length of con should be equal to the number of groups') +x=elimna(x) +nv=as.vector(matl(lapply(x,FUN='length'))) +nmin=min(nv) +B=list() +M=matrix(NA,nrow=nmin,ncol=J) +for(i in 1:nreps){ +for(j in 1:J)M[,j]=sample(x[[j]],nmin) +B[[i]]=M +} +if(length(con)==2)ef.size=yuenv2(x[[1]],x[[2]],tr=tr)$Effect.Size +else{ +flag=con==1 +con1=con +con1[!flag]=0 +con2=abs(con) +con2[flag]=0 +L1=lapply(B,linWMWMC.sub,con=con1) +L2=lapply(B,linWMWMC.sub,con=con2) +ef.size=NA +for(j in 1:length(L1))ef.size[j]=yuenv2(L1[[j]],L2[[j]],SEED=FALSE)$Effect.Size +} +list(Effect.Size=mean(ef.size)) +} +linconEP<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,INT=FALSE,nreps=200,POOL=FALSE){ +# +# +# This function is used when estimating effect size via +# a variation of explanatory power. +# +# It is restricted to the usual main effects and interactions in a two-way design. +# This function is used by bbmcpEP. +# +# con: used to indicate main effects and is passed to this function via bbmcpEP +# +# POOL=TRUE: For the usual main effects in a two-way where +# for a fixed level of Factor A, say, one can simply pool the data over the +# levels of Factor A. POOL=TRUE means that data with contrast coefficients +# = 1 are pooled, the same is for data with contrast coefficients +# = -1 and the resulting two groups are compared. +# +# A heteroscedastic test of d linear contrasts using trimmed means. +# +# The data are assumed to be stored in $x$ in list mode, a matrix +# or a data frame. If in list mode, +# length(x) is assumed to correspond to the total number of groups. +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# Missing values are automatically removed. +# +# +if(tr==.5)stop('Use the R function medpb to compare medians') +if(is.data.frame(x))x=as.matrix(x) +flag<-TRUE +if(alpha!= .05 && alpha!=.01)flag<-FALSE +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +sam=NA +h<-vector('numeric',J) +w<-vector('numeric',J) +xbar<-vector('numeric',J) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +sam[j]=length(x[[j]]) +h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) + # h is the number of observations in the jth group after trimming. +w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) +xbar[j]<-mean(x[[j]],tr) +} +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +psihat<-matrix(0,CC,7) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', +'p.value','Effect.Size')) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c('Group','Group','test','crit','se','df')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) +sejk<-sqrt(w[j]+w[k]) +test[jcom,5]<-sejk +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) +test[jcom,6]<-df +psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) +psihat[jcom,7]=yuenv2(x[[j]],x[[k]])$Effect.Size +if(CC>28)flag=FALSE +if(flag){ +if(alpha==.05)crit<-smmcrit(df,CC) +if(alpha==.01)crit<-smmcrit01(df,CC) +if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED) +} +test[jcom,4]<-crit +psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk +psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x)){ +stop('The number of groups does not match the number of contrast coefficients.') +} +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', +'p.value','Effect.Size')) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c('con.num','test','crit','se','df')) +df<-0 +for (d in 1:ncol(con)){ +if(POOL){ +id1=which(con[,d]==1) +id2=which(con[,d]==-1) +y1=pool.a.list(x[id1]) +y2=pool.a.list(x[id2]) +xx=list(y1,y2) +conP=matrix(c(1,-1)) +} +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) +if(flag){ +if(alpha==.05)crit<-smmcrit(df,ncol(con)) +if(alpha==.01)crit<-smmcrit01(df,ncol(con)) +} +if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED) +test[d,3]<-crit +test[d,4]<-sejk +test[d,5]<-df +if(!POOL)temp=linEP(x,con[,d],tr=tr,nreps=nreps,SEED=SEED) +if(POOL)temp=linEP(xx,conP,tr=tr,nreps=nreps,SEED=SEED) +if(!INT){ +psihat[d,6]=linEP(x,con[,d],tr=tr,nreps=nreps,SEED=SEED)$Effect.Size +} +if(INT){ +id=con[,d]!=0 +psihat[d,6]=Inter.EP(x[id],tr=tr,nreps=nreps,SEED=SEED)$Effect.Size +} +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) +} +} +list(n=sam,test=test,psihat=psihat) +} +bbmcpEP<-function(J,K,x,tr=.2,alpha=.05,grp=NA,op=FALSE,nreps=200,SEED=TRUE,pr=TRUE,POOL=TRUE){ +# +# Test all linear contrasts associated with +# main effects for Factor A and B and all interactions based on trimmed means +# By default, +# tr=.2, meaning 20% trimming is used. +# +# This function is the same as bbmpc, only it also reports a measures of effect size +# based on explanatory power. +# +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # + JK <- J * K + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp[1])) { + yy <- x + x<-list() + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop('Data must be stored in list mode or a matrix.') + for(j in 1:JK) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] # Remove missing values + } + # + + if(JK != length(x)) + warning('The number of groups does not match the number of contrast coefficients.') +for(j in 1:JK){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} + # Create the three contrast matrices +temp<-con2way(J,K) +conA<-temp$conA +conB<-temp$conB +conAB<-temp$conAB +if(!op){ +Factor.A<-linconEP(x,con=conA,tr=tr,nreps=nreps,INT=FALSE,pr=FALSE,POOL=POOL) +Factor.B<-linconEP(x,con=conB,tr=tr,nreps=nreps,INT=FALSE,pr=FALSE,POOL=POOL) +Factor.AB<-linconEP(x,con=conAB,tr=tr,nreps=nreps,INT=TRUE,pr=FALSE,POOL=FALSE) +} +All.Tests<-NA +if(op){ +Factor.A<-NA +Factor.B<-NA +Factor.AB<-NA +con<-cbind(conA,conB,conAB) +All.Tests<-lincon(x,con=con,tr=tr,alpha=alpha) +} +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,All.Tests=All.Tests,conA=conA,conB=conB,conAB=conAB) +} + +rmmcpES<-function(x, con = 0, tr = 0.2, alpha = 0.05,dif=TRUE,hoch=TRUE,pr=TRUE){ +# +# Like rmmcp,only a robust version of Cohen's d is included. +# Designed only for all pairwise comparisons. +# +if(con!=0)stop('This function is for all pairwise comparisons only') +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +a=rmmcp(x,con=con,tr=tr,alpha=alpha,dif=dif,hoch=hoch) +test=a$test +J=ncol(x) +CC=(J^2-J)/2 +psihat<-matrix(0,CC,6) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper','Effect.Size')) +psihat[,1:5]=a$psihat +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +if(dif)psihat[ic,6]=D.akp.effect(x[,j],x[,k],tr=tr) +if(!dif){ +psihat[ic,6]=yuendv2(x[,j],x[,k],tr=tr)$Effect.Size +if(pr)print('Note: With dif=FALSE, explanatory measure of effect size is used') +} +}}} +list(test=test,psihat=psihat) +} + +interWMWpb<-function(x,nreps=100,SEED=TRUE,nboot=500,alpha=.05,nmax=10^8,MC=TRUE){ +# +# +# +if(MC)library(parallel) +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +x=elimna(x) +J<-length(x) +if(J!=4)stop('Number of groups should be four') +nv=lapply(x,length) +y=list() +pv=NA +N=max(pool.a.list(nv)) +mat=matrix(NA,nrow=N,ncol=4) +for(i in 1:nboot){ +for(j in 1:4)mat[1:nv[[j]],j]=sample(x[[j]],nv[[j]],replace=TRUE) +y[[i]]=mat +} +if(!MC)pv=lapply(y,interWMWpb.lsub) +if(MC)pv=mclapply(y,interWMWpb.lsub) +pv=pool.a.list(pv) +est=interWMW(x,nreps=nreps,SEED=SEED,nmax=nmax) +pv=sort(pv) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=pv[ilow] +ci[2]=pv[ihi] +pval=mean(pv<.5)+.5*mean(pv==.5) +pval=2*min(c(pval,1-pval)) +list(p.est=est$p.est,ci=ci,p.value=pval,row.results=est$results.4.rows) +} + +interWMWpb.lsub<-function(x,nreps=nreps){ +v=interWMW(x,nreps=nreps,SEED=FALSE)$p.est +v +} + + +linWMWpb<-function(x,con,nreps=100,SEED=TRUE,nboot=500,alpha=.05,MC=FALSE){ +# +# Compute a confidence interval for the probability that a linear contrast +# is less than zero. +# +con=as.vector(con) +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +x=elimna(x) +J<-length(x) +if(J!=length(con))stop('Number of groups should be equal to the number of rows in con') +nv=lapply(x,length) +N=max(pool.a.list(nv)) +mat=matrix(NA,nrow=N,ncol=J) +y=list() +pv=NA +est=linWMW(x,con=con,nreps=nreps,SEED=SEED)$p +for(i in 1:nboot){ +#for(j in 1:J)y[[j]]=sample(x[[j]],nv[[j]],replace=TRUE) +for(j in 1:J)mat[1:nv[[j]],j]=sample(x[[j]],nv[[j]],replace=TRUE) +y[[i]]=mat +} +if(!MC)pv=lapply(y,linWMWpb.lsub,con=con,nreps=nreps,SEED=SEED) +if(MC){ +library(parallel) +pv=mclapply(y,linWMWpb.lsub,con=con,nreps=nreps,SEED=SEED) +} +pv=pool.a.list(pv) +pv=sort(pv) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=pv[ilow] +ci[2]=pv[ihi] +pval=mean(pv<.5)+.5*mean(pv==.5) +pval=2*min(c(pval,1-pval)) +list(p.est=est,ci=ci,p.value=pval) +} + +linWMWpb.lsub<-function(x,nreps=nreps,con=con,SEED=SEED){ +v=linWMW(x,nreps=nreps,con=con,SEED=SEED)$p +v +} + +permg.t<-function(x,y,alpha=.05,tr=0,nboot=1000,SEED=TRUE){ +# +# Do a two-sample permutation test based on trimmed means using the +# Chung--Romano version of a permuation test. + +# The default number of permutations is nboot=1000 +# +if(SEED)set.seed(2) +x<-x[!is.na(x)] +y<-y[!is.na(y)] +xx<-c(x,y) +tval<-yuen(x,y,tr=tr)$teststat +vec<-c(1:length(xx)) +v1<-length(x)+1 +difb<-NA +tv<-NA +for(i in 1:nboot){ +data <- sample(xx, size = length(xx), replace = FALSE) +temp1<-data[c(1:length(x))] +temp2<-data[c(v1:length(xx))] +tv[i]<-yuen(temp1,temp2,tr=tr)$teststat +} +tv<-sort(tv) +icl<-floor((alpha/2)*nboot+.5) +icu<-floor((1-alpha/2)*nboot+.5) +reject<-'no' +list(teststat=tval,lower.crit=tv[icl],upper.crit=tv[icu],reject=reject) +} + +loc2dif.ci<-function(x,y,est=median,alpha=.05,nboot=2000,SEED=TRUE){ +# +# Confidence interval for the median of D=X-Y, +# where X and Y are independent +# +x=elimna(x) +y=elimna(y) +n1=length(x) +n2=length(y) +es=loc2dif(x,y) +FLAG=FALSE +cliff=cid(x,y,alpha=alpha)$ci.p +del1=WMW2med(x,y,cliff[1]) +del2=WMW2med(x,y,cliff[2]) +ci=loc2dif(x+del1,y,est=est) +ci[2]=loc2dif(x+del2,y,est=est) +if(var(cliff)==0)FLAG=TRUE +if(escliff[2])FLAG=TRUE +if(FLAG)ci=wmwpb(x,y,est=est,alpha=alpha,nboot=nboot,SEED=SEED,pr=FALSE)$ci +list(n1=n1,n2=n2,est=es,conf.int=ci) +} + + + +WMW2med<-function(x,y,q){ +# +# If P(Xalpha)flag=TRUE +if(pvF<=alpha/(K+1-i)){ +ic=ic+1 +pick=c(pick,v[ic]) +flag=FALSE +if(pv[v[ic]]>alpha)flag=TRUE +} +if(flag)break +} +Decision=rep('Not Sig',length(pv)) +if(!is.null(pick))Decision[pick]='Reject' +nsig=sum(length(pick)) +list(n1=n1,n2=n2,p.values=pv, +Decisions=as.matrix(Decision),num.sig=nsig) +} + +shiftPBci<-function(x,y,locfun=median,alpha=.05,null.val=.5,nboot=500,SEED=TRUE,...){ +# +# confidence interval for the quantile shift measure of effect size. +# (Same as shiftQSci) +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +n1=length(x) +n2=length(y) +v=NA +ef=shiftes(x,y,locfun=locfun,SEED=FALSE)$Q.Effect +for(i in 1:nboot){ +X=sample(x,n1,replace=TRUE) +Y=sample(y,n2,replace=TRUE) +v[i]=shiftes(X,Y,locfun=locfun,SEED=FALSE)$Q.Effect +} +v=sort(v) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=v[ilow] +ci[2]=v[ihi] +pv=mean(v=10 & n2>=10){ +x=elimna(x) +y=elimna(y) +nt=n1*n2 +if(nt<10^6)L=outer(x,y,FUN='-') +if(nt>=10^6){ +if(SEED)set.seed(2) +L=NULL +nmin=min(c(n1,n2,100)) +vef=NA +vefND=NA + +for(i in 1:iter){ +id1=sample(n1,nmin) +id2=sample(n2,nmin) +L=outer(x[id1],y[id2],FUN='-') +est=locfun(L,...) +vef[i]=mean(L-est<=est) +if(est<0)ef.sizeND=mean(L-est>=est) +} +ef.size=mean(vef) +} +if(nt<10^6){ +est=locfun(L,...) +ef.size=mean(L-est<=est) +}} + +list(Q.Effect=ef.size) +} + + +shiftQS=shiftes + +shiftesci<-function(x,y,locfun=median,alpha=.05,nboot=500,SEED=TRUE,...){ +# +# confidence interval for the quantile shift measure of effect size. +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +n1=length(x) +n2=length(y) +v=NA +for(i in 1:nboot){ +X=sample(x,n1,replace=TRUE) +Y=sample(y,n2,replace=TRUE) +v[i]=shiftes(X,Y,locfun=locfun)$Q.Effect +} +v=sort(v) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=v[ilow] +ci[2]=v[ihi] +ci +} + + +shiftci<-function(x,y,locfun=median,alpha=.05,pr=TRUE,...){ +# +# confidence interval for the quantile shift measure of effect size. +# +# OLD VERSION USE shiftesci +# +x=elimna(x) +y=elimna(y) +if(pr){ +if(sum(duplicated(x)>0))print('Duplicate values detected; suggest using shiftPBci') +if(sum(duplicated(y)>0))print('Duplicate values detected; suggest using shiftPBci') +} +n1=length(x) +n2=length(y) +if(pr){ +if(min(c(n1,n2)<40))print('Minimum sample size is less than 40; suggest using shiftPBci') +} +L=outer(x,y,FUN='-') +L=as.vector(L) +est=locfun(L,...) +ef=shiftes(x,y,locfun=locfun)$Q.Effect +ci=cidv2(x-2*est,y,alpha=alpha)$p.ci +list(n1=n1,n2=n2,effect.size=ef,conf.int=ci) +} + + +medpb.es<-function(x,alpha=.05,nboot=NA,grp=NA,est=median,con=0,bhop=FALSE, +SEED=TRUE,INT=FALSE,...){ +# +# Multiple comparisons for J independent groups using medians. +# +# A percentile bootstrap method with Rom's method is used. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# +# con can be used to specify linear contrasts; see the function lincon +# +# Missing values are allowed. +# +# A shift-type measure of effect size,Q, is reported. No effect, Q=.5 +# For two groups, let D=X-Y, let M be the population median of D. +# Let F be the distribution D-M. Then +# Q=F(M). If the median of D is M, there is no effect. +# Q represents a shift in location to some relatively high or low quantile associated with $F_0$ +# +con<-as.matrix(con) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +dvec<-alpha/c(1:ncon) +if(nrow(con)!=J)stop('Something is wrong with con; the number of rows does not match the number of groups.') +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(!bhop){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(j in 1:J){ +#print(paste('Working on group ',j)) +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group +} +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +tv<-sum(bcon[d,]==0)/nboot +test[d]<-sum(bcon[d,]>0)/nboot+.5*tv +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,7) +dimnames(output)<-list(NULL,c('con.num','psihat','p.value','p.crit','ci.lower','ci.upper','Q.effect')) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +output[,2]=tvec +for (ic in 1:ncol(con)){ +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +see=lin.ES(x,con=con[,ic],locfun=median) +if(!INT)output[ic,7]=lin.ES(x,con=con[,ic],locfun=median)$Effect.Size +if(!INT)output[ic,7]=lin.ES(x,con=con[,ic],locfun=median)$Effect.Size +if(INT){ +id=which(con[,ic]!=0) +output[ic,7]=interQS(x[id],locfun=median,SEED=SEED)$Q.Effect +} +} +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + +medpbQS=medpb.es + +rplotCI<-function(x,y,tr=.2,fr=.5,p.crit=NA,plotit=TRUE,scat=TRUE, +SEED=TRUE,pyhat=FALSE,npts=25,xout=FALSE, +xlab='X',ylab='Y',low.span=2/3,nmin=12,pr=TRUE, +outfun=outpro,LPCI=FALSE,MID=TRUE,alpha=.05,pch='.',...){ +# +# Confidence interval for running interval smoother based on a trimmed mean. +# Unlike rplot, includes an approximate confidence band having simultaneous probability +# coverage equal to 1-alpha. More precisely, the simultaneous probability +# is for K=npts points +# +# LP=TRUE, the plot is further smoothed via lowess +# +# fr controls amount of smoothing +# +# To specify the points where confidence intervals are computed, +# use rplotCIsmm +# +if(pr){ +if(!LPCI)print('To get smoother plot, set LPCI=TRUE') +} +m<-cbind(x,y) +if(ncol(m)>2)stop('Only one independent variable can be used') +m<-elimna(m) +x=m[,1] +y=m[,2] +if(xout){ +xy=cbind(x,y) +flag=outfun(x,plotit=FALSE)$keep +x=xy[flag,1] +y=xy[flag,2] +} +n.used=NA +n=length(y) +if(n<50)stop('Need at least n=50') +nv=c(50, 60, 70, 80, 100, +150, 200, 300, 400, 500, 600, 800, 1000) +if(npts==25) pv=c(0.004846408, +0.004553274, +0.004236101, +0.004099674, + 0.00353898, #n=100 + 0.003366984, +0.003038767, + 0.003061386, + 0.002793521, + 0.002479689, + 0.002606313, + 0.0026630370, + 0.002836043) +if(npts==10) pv=c( +0.007612451, +0.008383655, +0.006992874, + 0.0068073, +0.005733889, +0.005767139, +0.006130155, +0.005447419, +0.005555951, +0.005228471, +0.005642503, +0.005402162, +0.005569217) +FLAG=FALSE +if(npts==25 || npts==10)FLAG=TRUE +if(alpha!=.05 || !FLAG){ +if(is.na(p.crit)){ +print('p.crit must be estimated, execution time might be high') +print('Or use the R function rplotCIsmm') +} +p.crit=rplotCITAP.pv(n,tr=tr,fr=fr,alpha=alpha,nmin=nmin,npts=npts,nreps=nreps) +} +rem.n=n +if(n>1000)n=1000 +if(is.na(p.crit))p.crit=lplot.pred(1/nv,pv,1/n)$yhat +n=rem.n +xord=order(x) +x=x[xord] +y=y[xord] +infit=rplot(x,y,tr=tr,xout=FALSE,plotit=plotit,LP=LPCI,fr=fr,pr=FALSE,pyhat=TRUE,xlab=xlab, +ylab=ylab) +rmd=infit$pyhat +m<-cbind(x,y) +if(ncol(m)>2)stop('One covariate only is allowed with this function') +m<-elimna(m) +nv=nrow(m) +if(xout){ +flag<-outfun(m[,1])$keep +m<-m[flag,] +} +x=m[,1] +y=m[,2] +n.keep=length(y) +res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr,nmin=nmin)$output +pts=seq(res1[1,1],res1[5,1],length.out=npts) +y.hat=NA +civ=matrix(NA,nrow=npts,ncol=2) +for(i in 1:length(pts)){ +xx=y[near(x,pt=pts[i],fr)] +doit=trimci(xx,tr=tr,alpha=p.crit,pr=FALSE) +civ[i,]=doit$ci +y.hat[i]=doit$estimate +n.used[i]=doit$n +} +up=civ[,2] +low=civ[,1] +if(plotit){ +if(LPCI){ +up=lplot(pts,up,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat +y.hat=lplot(pts,y.hat,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat +low=lplot(pts,low,plotit=FALSE,pyhat=TRUE,pr=FALSE,low.span=low.span)$yhat +} +lines(pts,up,lty=2) +lines(pts,low,lty=2) +} +if(pyhat){output<-cbind(pts,y.hat,low,up,n.used) +dimnames(output)=list(NULL,c('pts','y.hat','ci.low','ci.up','n.used')) +} +if(!pyhat)output<-'Done' +list(output=output,str=infit$Strength.Assoc,n=nv,n.keep=n.keep) +} + +signmcp<-function(x,y = NULL, alpha = 0.05, method='AC' , AUTO=TRUE,Method="hochberg"){ +# +# Dependent groups +# Perform sign test for all pairwise differences +# +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +J<-ncol(x) +nval<-nrow(x) +ncon<-(J^2-J)/2 +dvec<-alpha/c(1:ncon) +psihat<-matrix(NA,ncon,9) +dimnames(psihat)<-list(NULL,c("Group","Group","n","N","Prob_x_less_than_y","ci.lower","ci.upper", +"p.value","p.adjusted")) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +temp=signt(x[,j],x[,k],alpha=alpha,method=method,AUTO=AUTO) +psihat[jcom,1]<-j +psihat[jcom,2]<-k +psihat[jcom,3]<-temp$n +psihat[jcom,4]<-temp$N +psihat[jcom,5]<-temp$Prob_x_less_than_y +psihat[jcom,6:7]=temp$ci +if(method!='SD')psihat[jcom,8]=temp$p.value +}}} +if(method!='SD')psihat[,9]=p.adjust(psihat[,8],method=Method) +list(output=psihat) +} + + + + + +loc2difpb<-function(x,y,est=median,alpha=.05,nboot=2000,SEED=TRUE){ +# +# A percentile bootstrap +# confidence interval for the median of D=X-Y +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +n1=length(x) +n2=length(y) +v=NA +for(i in 1:nboot){ +X=sample(x,n1,replace=TRUE) +Y=sample(y,n2,replace=TRUE) +v[i]=loc2dif(X,Y) +} +pv=mean(v<0)+.5*mean(v==0) +pv=2*min(c(pv,1-pv)) +vs=sort(v) +crit<-alpha/2 +icl<-round(crit*nboot)+1 +icu<-nboot-icl +ci=vs[icl] +ci[2]=vs[icu] +list(ci=ci,p.value=pv) +} + +Dcbmhd<-function(x=NULL,y=NULL,d=NULL,qest=hd,alpha=.05,q=.25,plotit=FALSE,pop=0, +fr=.8,rval=15,xlab='',ylab='',nboot=600,SEED=TRUE){ +# +# +# Compute a confidence interval for the sum of the qth and (1-q)th quantiles +# of the distribution of D=X-Y, where X and Y are two +# dependent random variables. +# The Harrell-Davis estimator is used +# If the distribution of X and Y are identical, then in particular the +# distribution of D=X-Y is symmetric about zero. +# +# plotit=TRUE causes a plot of the difference scores to be created +# pop=0 adaptive kernel density estimate +# pop=1 results in the expected frequency curve. +# pop=2 kernel density estimate (Rosenblatt's shifted histogram) +# pop=3 boxplot +# pop=4 stem-and-leaf +# pop=5 histogram +# +if(SEED)set.seed(2) +if(q>=.5)stop('q should be less than .5') +if(q<=0)stop('q should be greater than 0') +if(is.null(d))d=x-y +if(is.null(d))stop('Apparently x or y contain no data') +d=elimna(d) +n=length(d) +q2=1-q +est1=qest(d,q) +est2=qest(d,q2) +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +bvec=NA +for(i in 1:nboot){ +bvec[i]=qest(d[data[i,]],q)+qest(d[data[i,]],q2) +} +p=mean(bvec>0)+.5*mean(bvec==0) +p=2*min(c(p,1-p)) +sbv=sort(bvec) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=sbv[ilow] +ci[2]=sbv[ihi] +if(plotit){ +if(pop==1 || pop==0){ +if(length(x)*length(y)>2500){ +print('Product of sample sizes exceeds 2500.') +print('Execution time might be high when using pop=0 or 1') +print('If this is case, might consider changing the argument pop') +print('pop=2 might be better') +}} +MM=d +if(pop==0)akerd(MM,xlab=xlab,ylab=ylab) +if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab) +if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab) +if(pop==3)boxplot(MM) +if(pop==4)stem(MM) +if(pop==5)hist(MM,xlab=xlab) +if(pop==6)skerd(MM) +} +list(q=q,n=n,Est1=est1,Est2=est2,sum=est1+est2,ci=ci,p.value=p) +} +Dqcihd<-function(x,y,alpha=.05,q=c(1:9/10), +plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab="",nboot=600,SEED=TRUE){ +# +# Compute a confidence interval for the quantiles for D=X-Y, X and Y independent. +# +# The Harrell-Davis estimator is used +# If the distribution of X and Y are identical, then in particular the +# distribution of D=X-Y is symmetric about zero. +# +# plotit=TRUE causes a plot of the difference scores to be created +# pop=0 adaptive kernel density estimate +# pop=1 results in the expected frequency curve. +# pop=2 kernel density estimate (Rosenblatt's shifted histogram) +# pop=3 boxplot +# pop=4 stem-and-leaf +# pop=5 histogram +# +if(SEED)set.seed(2) +x<-x[!is.na(x)] +y<-y[!is.na(y)] +n1=length(x) +n2=length(y) +m<-outer(x,y,FUN="-") +est=NA +for(i in 1:length(q))est[i]=hd(m,q=q[i]) +data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot) +data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot) +bvec=matrix(NA,nrow=nboot,ncol=length(q)) +for(i in 1:nboot){ +mb=outer(x[data1[i,]],y[data2[i,]],"-") +for(j in 1:length(q)) +bvec[i,j]=hd(mb,q=q[j]) +} +p=NA +ci=matrix(NA,nrow=length(q),ncol=2) +for(j in 1:length(q)){ +p[j]=mean(bvec[,j]>0)+.5*mean(bvec[,j]==0) +p[j]=2*min(c(p[j],1-p[j])) +sbv=sort(bvec[,j]) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci[j,1]=sbv[ilow] +ci[j,2]=sbv[ihi] +} +if(plotit){ +if(pop==1 || pop==0){ +if(length(x)*length(y)>2500){ +print("Product of sample sizes exceeds 2500.") +print("Execution time might be high when using pop=0 or 1") +print("If this is case, might consider changing the argument pop") +print("pop=2 might be better") +}} +MM=as.vector(m) +if(pop==0)akerd(MM,xlab=xlab,ylab=ylab) +if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab) +if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab) +if(pop==3)boxplot(MM) +if(pop==4)stem(MM) +if(pop==5)hist(MM,xlab=xlab) +if(pop==6)skerd(MM) +} +output=cbind(as.matrix(q),as.matrix(est),ci,as.matrix(p)) +dimnames(output)=list(NULL,c("Quantile","Estimates","ci.low","ci.up","p-value")) +output +} +lplotcom2v2<-function(x,y,xout=FALSE,pts1=NULL,pts2=NULL,outfun=outpro,span=2/3,npts=10,tr=.2,...){ +# +# For two independent variables, estimate their relative importance when using LOESS +# +library(stats) +x<-as.matrix(x) +m<-elimna(cbind(x,y)) +n.orig=nrow(m) +n.keep=n.orig + +d<-ncol(x) +if(d!=2)stop('Current version is for two independent variables only') +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +} #$ +n.keep=nrow(m) +M=apply(m,2,median) +SD=apply(m,2,mad) +low=M-1.5*SD +up=M+1.5*SD +if(is.null(pts1))pts1=seq(low[1],up[1],length.out=npts) +if(is.null(pts2))pts2=seq(low[2],up[2],length.out=npts) +e1=NA +e2=NA +for(j in 1:length(pts1)){ # Determine strength of x2 given a value stored in pts1. +v2=cbind(rep(pts1[j],n.keep),m[,2]) +vals=lplot.pred(m[,1:2],m[,3],v2,span=span)$yhat +vals=elimna(vals) +nv=length(vals) +e2[j]=NA +if(nv>=10)e2[j]=winsd(vals,tr=tr,na.rm=TRUE) +} +for(j in 1:length(pts2)){ # Determine strength of x1 given a value stored in pts2. +v1=cbind(m[,1],rep(pts2[j],n.keep)) +vals=lplot.pred(m[,1:2],m[,3],v1,span=span)$yhat +vals=elimna(vals) +nv=length(vals) +e1[j]=NA +if(nv>=10)e1[j]=winsd(vals,tr=tr,na.rm=TRUE) + } +p=mean(outer(e1,e2,FUN='-')<0,na.rm=TRUE) +list(str1=e1,str2=e2,p=p,mean.str1=mean(e1),mean.str2=mean(e2)) +} + +lplotcomBCI<-function(x,y,xout=FALSE,pts1=NULL,pts2=NULL,p.crit=NULL, +outfun=outpro,span=2/3,npts=10,tr=.2,nboot=500, +SEED=TRUE,SEQ=FALSE,MAD.OP=FALSE,plotit=TRUE,ticktype='simple', +xlab='X1',ylab='X2',zlab='Y',reverse.x1=FALSE,reverse.x2=FALSE,pr=FALSE, +MEDIAN=FALSE,Q1=FALSE,Q2=FALSE,alpha=.05,MC=FALSE,...){ +# +# For two independent variables, estimate their relative importance when using LOESS +# p.crit is the critical p-value. If not specified, the function returns the approximate 0.05 critical p-value +# +# By default, use the average of the strength of the associations, so essentially a global test based on the quartiles +# MEDIAN=TRUE, use the median of the independent variables only. +# Q1=TRUE, use the lower quartile of the independent variables only. +# Q2=TRUE, use the upper quartile of the independent variables only. +# +# ADJ.CI=TRUE: Confidence intervals are based on the critical p-value +# otherwise use alpha +# +if(SEED)set.seed(2) +library(stats) +x<-as.matrix(x) +m<-elimna(cbind(x,y)) +n=nrow(m) +x=m[,1:2] +y=m[,3] +if(xout){ +flag=outfun(x,plotit=FALSE)$keep +x=x[flag,] +y=y[flag] +n=nrow(x) +} +if(n<50)stop('The sample size must be greater than or equal to 50') +if(MEDIAN){ +pts1=median(x[,1]) +pts2=median(x[,2]) +if(is.null(p.crit)){ +if(n<=200)p.crit=regYhat(c(1/50,1/100,1/200),c(.114,.080,.065),1/n) +if(n>200)p.crit=.062 +} +} +if(Q1){ +pts1=qest(x[,1],.25) +pts2=qest(x[,2],.25) +if(is.null(p.crit)){ +if(n<=200)p.crit=regYhat(c(1/50,1/100,1/200),c(.142,.095,.082),1/n) +if(n>200)p.crit=.062 +} +} +if(Q2){ +pts1=qest(x[,1],.75) +pts2=qest(x[,2],.75) +if(is.null(p.crit)){ +if(n<=200)p.crit=regYhat(c(1/50,1/100,1/200),c(.142,.095,.082),1/n) +if(n>200)p.crit=.062 +} +} +if(is.null(pts1)){ +pts1=qest(x[,1],c(.25,.5,.75)) +if(is.null(p.crit)){ +if(n<=200)p.crit=regYhat(c(1/50,1/100,1/200),c(.082,.076,.067),1/n) +if(n>200)p.crit=.06 +} +if(reverse.x1)pts1=sort(pts1,TRUE) +if(is.null(pts2))pts2=qest(x[,2],c(.25,.5,.75)) +if(reverse.x2)pts2=sort(pts2,TRUE) +} +if(SEQ){ +if(MAD.OP){ +M=apply(m,2,median) +SD=apply(m,2,mad) + +low=M-1.5*SD +up=M+1.5*SD +} +else{ +low=apply(m,2,qest,.25) +hi=apply(m,2,qest,.75) +} +pts1=seq(low[1],up[1],length.out=npts) +pts2=seq(low[2],up[2],length.out=npts) +} +v1=NA +v2=NA +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +if(!MC){ +for(i in 1:nboot){ +ib=data[i,] +temp=lplotcom2v2(x[ib,],y[ib],pts1=pts1,pts2=pts2,npts=npts,tr=tr,span=span) +v1[i]=temp$mean.str1 +v2[i]=temp$mean.str2 +}} +if(MC){ +library(parallel) +data=listm(t(data)) +bvec<-mclapply(data,lplotCIMCv2,x,y,pts1=pts1,pts2=pts2,npts=npts,tr=tr,span=span) +bvec=matl(bvec) # a 2-by-nboot matrix. +dif=sort(bvec[1,]-bvec[2,]) +} +if(!MC)dif=sort(v1-v2) +nbl=length(dif) +#ilow<-round((alpha/2) * nbl) +ilow<-round((p.crit/2) * nbl) +ihi<-nbl - ilow +ilow<-ilow+1 +ci.low=dif[ilow] +ci.hi=dif[ihi] +pv=mean(dif<0,na.rm=TRUE) +pv=2*min(pv,1-pv) +est=lplotcom2(x,y,xout=FALSE,pts1=pts1,pts2=pts2,outfun=outfun,span=span, +npts=npts,tr=tr) +if(plotit)lplot(x,y,ticktype=ticktype,xlab=xlab,ylab=ylab,zlab=zlab,pr=pr) +list(p.crit=p.crit,p.value=pv,str.x1.given.x2=est$str1,str.x2.given.x1=est$str2,mean.str1=est$mean.str1, +mean.str2=est$mean.str2, +ci.low=ci.low,ci.hi=ci.hi,pts.x1=pts1,pts.x2=pts2) +} + +lplotcomBCI9<-function(x,y,xout=FALSE,pr=TRUE, +outfun=outpro,span=2/3,npts=10,tr=.2,nboot=500, +SEED=TRUE,plotit=TRUE,ticktype='simple',ADJ.CI=TRUE, +xlab='X1',ylab='X2',zlab='Y',alpha=.05,MC=FALSE,...){ +# +# For two independent variables, estimate their relative importance when using LOESS +# Focus on the quartiles: none tests based on all possible combinations. +# +p.crit=NA +if(pr){ +if(alpha!=.05){ +if(pr)print('Critical p-value is taken to be the value of alpha. Unknown how to adjust when alpha is not .05') +p.crit=alpha +} +if(ADJ.CI)print('Confidence intervals are based on the critical p-value') +} +if(SEED)set.seed(2) +library(stats) +x<-as.matrix(x) +m<-elimna(cbind(x,y)) +n=nrow(m) +n.orig=n +x=m[,1:2] +y=m[,3] +if(xout){ +flag=outfun(x,plotit=FALSE)$keep +x=x[flag,] +y=y[flag] +n=nrow(x) +} +if(is.na(p.crit)){ +if(n<=100)p.crit=regYhat(c(1/50,1/100),c(.042,.025),1/n) +else p.crit=.025 +} +output<-matrix(NA,nrow=9,ncol=7) +dimnames(output)=list(NULL,c('pts1','pts2','p-value','str.x1.given.x2','str.x2.given.x1','ci.low','ci.hi')) +pts1=qest(x[,1],c(.25,.5,.75)) +pts2=qest(x[,2],c(.25,.5,.75)) + +v1=matrix(NA,nrow=nboot,ncol=3) +v2=matrix(NA,nrow=nboot,ncol=3) + +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +if(!MC){ +for(i in 1:nboot){ +ib=data[i,] +for(j in 1:3){ +temp=lplotcom2v2(x[ib,],y[ib],pts1=pts1[j],pts2=pts2[j],npts=npts,tr=tr,span=span) +v1[i,j]=temp$mean.str1 +v2[i,j]=temp$mean.str2 +}}} +if(MC){ +library(parallel) +data=listm(t(data)) +for(j in 1:3){ +bvec<-mclapply(data,lplotCIMCv2,x,y,pts1=pts1[j],pts2=pts2[j],npts=npts,tr=tr,span=span) +bvec=matl(bvec) # a 2-by-nboot matrix. +v1[,j]=bvec[1,] +v2[,j]=bvec[2,] +} +} +pc=matrix(NA,3,3) #rows are for pts1, columns for pts2 +ic=0 +for(j in 1:3){ +for(k in 1:3){ +est=lplotcom2(x,y,xout=FALSE,pts1=pts1[j],pts2=pts2[k],outfun=outfun,span=span, +npts=npts,tr=tr) +ic=ic+1 +output[ic,1]=pts1[j] +output[ic,2]=pts2[k] +dif=sort(v1[,j]-v2[,k]) +nbl=length(dif) +if(ADJ.CI)ilow<-round((p.crit/2) * nbl) +else ilow<-round((alpha/2) * nbl) +ihi<-nbl - ilow +ilow<-ilow+1 +ci.low=dif[ilow] +ci.hi=dif[ihi] +pv=mean(dif<0,na.rm=TRUE) +pc[j,k]=2*min(pv,1-pv) +output[ic,3]=pc[j,k] +output[ic,6]=ci.low +output[ic,7]=ci.hi +output[ic,4]=est$mean.str1 +output[ic,5]=est$mean.str2 +}} +if(plotit)lplot(x,y,ticktype=ticktype,xlab=xlab,ylab=ylab,zlab=zlab) +list(n=n.orig,n.keep=n,p.crit=p.crit,output=output) +} + +lplotCIMCv2<-function(data,x,y,pts1,pts2,npts,tr,span){ +temp=lplotcom2v2(x[data,],y[data],pts1=pts1,pts2=pts2,npts=npts,tr=tr,span=span) +v=c(temp$mean.str1,temp$mean.str2) +} + + +linconES<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,INT=FALSE, +locfun=tmean){ +# +# Like the function lincon, only +# this function estimates effect size via +# quantile shift perspective. +# +# A heteroscedastic test of d linear contrasts using trimmed means. +# +# The data are assumed to be stored in $x$ in list mode, a matrix +# or a data frame. If in list mode, +# length(x) is assumed to correspond to the total number of groups. +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# Missing values are automatically removed. +# +# +if(tr==.5)stop('Use the R function medpb to compare medians') +if(is.data.frame(x))x=as.matrix(x) +flag<-TRUE +if(alpha!= .05 && alpha!=.01)flag<-FALSE +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +con<-as.matrix(con) +J<-length(x) +sam=NA +h<-vector('numeric',J) +w<-vector('numeric',J) +xbar<-vector('numeric',J) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +sam[j]=length(x[[j]]) +h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) + # h is the number of observations in the jth group after trimming. +w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) +xbar[j]<-mean(x[[j]],tr) +} +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +psihat<-matrix(0,CC,8) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', +'p.value','Q.effect','Rel.Q')) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c('Group','Group','test','crit','se','df')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) +sejk<-sqrt(w[j]+w[k]) +test[jcom,5]<-sejk +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) +test[jcom,6]<-df +psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) +psihat[jcom,7]=lin.ES(x[c(j,k)],con=c(1,-1))$Effect.Size +psihat[jcom,8]=(psihat[jcom,7]-.5)/.5 +if(CC>28)flag=FALSE +if(flag){ +if(alpha==.05)crit<-smmcrit(df,CC) +if(alpha==.01)crit<-smmcrit01(df,CC) +} +if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED) +test[jcom,4]<-crit +psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk +psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x)){ +stop('The number of groups does not match the number of contrast coefficients.') +} +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', +'p.value','Q.effect')) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c('con.num','test','crit','se','df')) +df<-0 +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) +if(flag){ +if(alpha==.05)crit<-smmcrit(df,ncol(con)) +if(alpha==.01)crit<-smmcrit01(df,ncol(con)) +} +if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED) +test[d,3]<-crit +test[d,4]<-sejk +test[d,5]<-df +if(!INT)psihat[d,6]=lin.ES(x,con[,d],tr=tr,nreps=nreps,SEED=SEED)$Effect.Size +if(INT){ +id=con[,d]!=0 +psihat[d,6]=interQS(x[id],nreps=nreps,locfun=locfun,SEED=SEED)$Q.Effect +} +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) +} +} +if(pr){ +print('Note: confidence intervals are adjusted to control FWE') +print('But p-values are not adjusted to control FWE') +print('Adjusted p-values can be computed with the R function p.adjusted') +print('Under normality and homoscedasticity, Cohen d= .2, .5, .8') +print('corresponds approximately to Rel.Q = 0.55, 0.65 and 0.70, respectively') +} +list(n=sam,test=test,psihat=psihat) +} + +STRIPchart<-function(x,method ='overplot', jitter = 0.1, offset = 1/3, + vertical = FALSE, group.names, add = FALSE, + at = NULL, xlim = NULL, ylim = NULL, + ylab = NULL, xlab = NULL, dlab ='', glab ='', + log = '', pch = 0, col = par('fg'), cex = par('cex'), + axes = TRUE, frame.plot = axes, ...){ +# +# Same as stripchart, only it accepts a matrix, unlike stripchart, which +# allows x to be a data frame or list mode, but not a matrix. +# +if(is.matrix(x))x=listm(x) +stripchart(x,method=method,jitter=jitter,offset = offset, + vertical = vertical, group.names=group.names, add = add, + at =at, xlim = xlim, ylim = ylim, + ylab = ylab, xlab = xlab, dlab = dlab, glab = glab, + log = log, pch = pch, col = col, cex = cex, + axes = axes, frame.plot = frame.plot, ...) +} +interWMW<-function(x,locfun=median,nreps=200,SEED=TRUE,nmax=10^8){ +# +# Goal: estimate P(X_1-X_2 < X_3-X_4). +# +# That is, dealing with an interaction in a 2-by-2 ANOVA design based on +# a Wilcoxon--Mann--Whitney approach but allow heteroscedasticity. +# +# Strategy: estimate the distribution of X_1-X_2, non-parametrically do the same +# for X_3-X_4, then estimate P(X_1-X_2< X_3-X_4) +# +# x should be a matrix with four columns or have list mode with length=4 +# +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +x=elimna(x) +J<-length(x) +if(J!=4)stop('Number of groups should be four') +#nx=pool.a.list(lapply(x,FUN='length')) +LL=list() +LL[[1]]=outer(x[[1]],x[[2]],FUN='-') +LL[[2]]=outer(x[[3]],x[[4]],FUN='-') +nv=c(length(LL[[1]]),length(LL[[2]])) +ntot=nv[1]*nv[2] +if(ntot<=nmax)p=bmp(LL[[1]],LL[[2]])$phat +else{ +nmin=min(nv) +est=NA +p=NA +pest=NA +B=list() +M=matrix(NA,nrow=nmin,ncol=2) +for(i in 1:nreps){ +for(j in 1:2)M[,j]=sample(LL[[j]],nmin) +B[[i]]=M +pest[i]=mean(M[,1]10^3){ +if(SEED)set.seed(2) +Nmin1=min(c(nv[1],nv[2],100)) +Nmin2=min(c(nv[3],nv[4],100)) +for(i in 1:iter){ +id1=sample(nv[1],Nmin1) +id2=sample(nv[2],Nmin1) +L1=outer(x[[1]][id1],x[[2]][id2],FUN='-') +id1=sample(nv[3],Nmin2) +id2=sample(nv[4],Nmin2) +L2=outer(x[[3]][id1],x[[4]][id2],FUN='-') +ef[i]=pxly(L1,L2,iter=iter,SEED=SEED) +}} +if(nt<=10^3){ +L1=outer(x[[1]],x[[2]],FUN='-') +L2=outer(x[[3]],x[[4]],FUN='-') +ef=pxly(L1,L2,iter=iter,SEED=SEED) +} +ef=mean(ef) +ef +} + +interWMWAP<-function(x,nreps=100,SEED=TRUE,nboot=500,alpha=.05,nmax=10^8,MC=TRUE){ +# +# Interaction in a 2-by-2 design using P(X_1-X_210^3){ +if(SEED)set.seed(2) +for(i in 1:nreps){ +for(j in 1:4)M[,j]=sample(x[[j]],nmin) +L1=outer(M[,g[1]],M[,g[2]],FUN='+') +L2=outer(M[,g[3]],M[,g[4]],FUN='+') +ef[i]=shiftes(L1,L2,locfun=locfun)$Q.Effect +}} +else{ +L1=outer(x[[g[1]]],x[[g[2]]],FUN='+') +L2=outer(x[[g[3]]],x[[g[4]]],FUN='+') +ef=shiftes(L1,L2,locfun=locfun)$Q.Effect +} +es=mean(ef) +list(Q.Effect=es) +} + +QS1way<-function(x,locfun=median,alpha=0.05,SEED=TRUE,nboot=500,CI=TRUE){ +# +# Estimate quantile shift function when comparing all +# pairs of groups in a one-way (independent) groups design +# +# CI=TRUE: confidence intervals for the measure of effect size are computed. +# +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J=length(x) +Jall=(J^2-J)/2 +con1=con1way(J) +output=matrix(NA,nrow=Jall,ncol=5) +dimnames(output)=list(NULL,c('Group','Group','Effect.Size','low.ci','up.ci')) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j10^3){ +if(SEED)set.seed(2) +for(i in 1:nreps){ +for(j in 1:4)M[,j]=sample(x[[j]],nmin) +L1=outer(M[,1],M[,2],FUN='-') +L2=outer(M[,3],M[,4],FUN='-') +ef[i]=shiftes(L1,L2,locfun=locfun)$Q.Effect +}} +else{ +L1=outer(x[[1]],x[[2]],FUN='-') +L2=outer(x[[3]],x[[4]],FUN='-') +ef=shiftes(L1,L2,locfun=locfun,...)$Q.effect +} +es=mean(ef) +list(Q.Effect=es) +} + + +linconQS<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,INT=FALSE, +locfun=tmean){ +# +# +# This function is used when estimating effect size via +# quantile shift perspective. +# +# A heteroscedastic test of d linear contrasts using trimmed means. +# +# The data are assumed to be stored in x in list mode, a matrix +# or a data frame. If in list mode, +# length(x) is assumed to correspond to the total number of groups. +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# Missing values are automatically removed. +# +# +if(tr==.5)stop('Use the R function medpb to compare medians') +if(is.data.frame(x))x=as.matrix(x) +flag<-TRUE +if(alpha!= .05 && alpha!=.01)flag<-FALSE +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +con<-as.matrix(con) +J<-length(x) +sam=NA +h<-vector('numeric',J) +w<-vector('numeric',J) +xbar<-vector('numeric',J) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +sam[j]=length(x[[j]]) +h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) + # h is the number of observations in the jth group after trimming. +w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) +xbar[j]<-mean(x[[j]],tr) +} +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +psihat<-matrix(0,CC,8) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', +'p.value','Q.effect','Rel.Q')) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c('Group','Group','test','crit','se','df')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) +sejk<-sqrt(w[j]+w[k]) +test[jcom,5]<-sejk +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) +test[jcom,6]<-df +psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) +psihat[jcom,7]=lin.ES(x[c(j,k)],con=c(1,-1))$Effect.Size +psihat[jcom,8]=(psihat[jcom,7]-.5)/.5 +if(CC>28)flag=FALSE +if(flag){ +if(alpha==.05)crit<-smmcrit(df,CC) +if(alpha==.01)crit<-smmcrit01(df,CC) +} +if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED) +test[jcom,4]<-crit +psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk +psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x)){ +stop('The number of groups does not match the number of contrast coefficients.') +} +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', +'p.value','Q.effect')) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c('con.num','test','crit','se','df')) +df<-0 +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) +if(flag){ +if(alpha==.05)crit<-smmcrit(df,ncol(con)) +if(alpha==.01)crit<-smmcrit01(df,ncol(con)) +} +if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED) +test[d,3]<-crit +test[d,4]<-sejk +test[d,5]<-df +if(!INT)psihat[d,6]=lin.ES(x,con[,d],tr=tr,nreps=nreps,SEED=SEED)$Effect.Size +if(INT){ +id=con[,d]!=0 +psihat[d,6]=interQS(x[id],nreps=nreps,locfun=locfun,SEED=SEED)$Q.Effect +} +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) +} +} +if(pr){ +print('Note: confidence intervals are adjusted to control FWE') +print('But p-values are not adjusted to control FWE') +print('Adjusted p-values can be computed with the R function p.adjusted') +print('Under normality and homoscedasticity, Cohen d= 0, .2, .5, .8') +print('correspond approximately to Q.effect = 0.5, 0.55, 0.65 and 0.70, respectively') +} +list(n=sam,test=test,psihat=psihat) +} + +bbmcpQS<-function(J,K,x,locfun,nreps=100,SEED=TRUE,POOL=TRUE,pr=TRUE){ +# +# For independent groups, +# compute quantile shift measure of effect size for all main effects and interactions. +# +# To get an explanatory measure of effect size, use bbmcpEP +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # + JK <- J * K + if(is.matrix(x) || is.data.frame(x)) + x <- listm(x) + if(!is.list(x)) + stop('Data must be stored in list mode or a matrix.') + if(JK != length(x)) + print('Warning: JK does not match the number of groups.') +x=elimna(x) # Remove missing values. +DONE=FALSE +if(J==2 & K==2){ +Factor.A=list() +Factor.A[[1]]=linconQS(x[1:2],pr=FALSE) +Factor.A[[2]]=linconQS(x[3:4],pr=FALSE) +Factor.B=list() +Factor.B[[1]]=linconQS(x[c(1,3)],pr=FALSE) +Factor.B[[2]]=linconQS(x[c(2,4)],pr=FALSE) +Factor.AB=linconQS(x,con=c(1,-1,-1,1),INT=TRUE,pr=FALSE) +DONE=TRUE +} +temp<-con2way(J,K) +conA<-temp$conA +conB<-temp$conB +conAB<-temp$conAB +if(!DONE){ + # Create the three contrast matrices +if(!POOL){ # For each level of Factor A, compute effect size +# for all pairwise comparisons among the levels of B +ID=matrix(c(1:JK),nrow=J,ncol=K,byrow=TRUE) +Factor.A=list() +for(j in 1:J){ +id=as.vector(ID[j,]) +Factor.A[[j]]=linconQS(x[id],pr=FALSE) +} +Factor.B=list() +ID=t(ID) +for(k in 1:K){ +id=as.vector(ID[k,]) +Factor.B[[k]]=linconQS(x[id],pr=FALSE) +} +}} +# Do interactions +Factor.AB=list() +for(l in 1:ncol(conAB)){ +#id=which(conAB[,l]!=0) +Factor.AB[[l]]=linconQS(x,con=conAB[,l],INT=TRUE,pr=FALSE) +} +# +if(POOL){ +ID=matrix(c(1:JK),nrow=J,ncol=K,byrow=TRUE) +LEV.A=list() +for(j in 1:J){ +id=as.vector(ID[j,]) +LEV.A[[j]]=pool.a.list(x[id]) +} +Factor.A=linconQS(LEV.A,pr=FALSE) +ID=t(ID) +LEV.B=list() +for(k in 1:K){ +id=as.vector(ID[k,]) +LEV.B[[k]]=pool.a.list(x[id]) +} +Factor.B=linconQS(LEV.B,pr=FALSE) +} +if(pr){ +print('The columns of conAB contain the contrast coefficients for the interactions.') +print('For example, the output in FactorAB[[1]] are the results based') +print('on the contrast coefficients in column 1') +print('which is the interaction for the first two rows and the first two columns') +print(' ') +print('Note: Under normality and homoscedasticity, Cohen d= 0, .2, .5, .8') +print('correspond approximately to Q.effect = 0.5, 0.55, 0.65 and 0.70, respectively') +} +if(!POOL){ +print('Factor.A: for each row of 1st factor, perform all pairwise') +print(' among the levels of Factor B and store the results in Factor.A') +print('Do the same for the second factor and store the results in Factor.B') +} +if(POOL){ +print('Factor.A: for each row of 1st factor, pool the data over the levels') +print('of Factor B. Then do all pairwise comparisons and store the results in Factor.A') +print('Do the same for the second factor and store the results in Factor.B') +} +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,conAB=conAB) +} + + +bwiDIF<-function(J,K,x,JK=J*K,grp=c(1:JK),alpha=.05,SEED=TRUE){ +# +# Same as bwimcp only use a Patel type approach +# +# Multiple comparisons for interactions +# in a split-plot design. +# The analysis is done by taking difference scores +# among all pairs of dependent groups and +# determining which of +# these differences differ across levels of Factor A +# using trimmed means. +# +# FWE is controlled via Hochberg's method +# To adjusted p-values, use the function p.adjust +# +# For MOM or M-estimators, use spmcpi which uses a bootstrap method +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# + if(is.matrix(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} + +JK<-J*K +if(JK!=length(x))stop('Something is wrong. Expected ',JK,' groups but x contains ', length(x), 'groups instead.') +MJ<-(J^2-J)/2 +MK<-(K^2-K)/2 +JMK<-J*MK +MJMK<-MJ*MK +Jm<-J-1 +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +output<-matrix(0,MJMK,9) +dimnames(output)<-list(NULL,c('A','A','B','B','p.hat','p.value','ci.low','ci.up','p.adjust')) +jp<-1-K +kv<-0 +kv2<-0 +test<-NA +for(j in 1:J){ +jp<-jp+K +xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]])) +for(k in 1:K){ +kv<-kv+1 +xmat[,k]<-x[[kv]] +} +xmat<-elimna(xmat) +for(k in 1:K){ +kv2<-kv2+1 +x[[kv2]]<-xmat[,k] +}} +m<-matrix(c(1:JK),J,K,byrow=TRUE) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j=zvec) +output[temp2,7]<-zvec +output[,7]<-output[,7] +output +} + +bwmcpAKP<-function(J,K,x,tr=.2,pr=TRUE){ +# +# Compute Algina et al measure of effect size for all pairwise comparisons +# in a between-by-within design +# +if(pr){ +print('A[[1]] contains the estimated effect size for level 1 of Factor A;') +print(' all pairwise comparisons over Factor B') +print('A[[2]] contains results for level 2, etc.') +} +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +JK=J*K +ID=matrix(c(1:JK),nrow=J,ncol=K,byrow=TRUE) +A=list() +for (j in 1:J)A[[j]]=wmcpAKP(x[ID[j,]]) +B=list() +for(k in 1:K)B[[k]]=bmcpAKP(x[ID[,k]],tr=tr) +AB=bwimcpAKP(J,K,x)[,c(1:4,8)] +list(Factor.A=A,Factor.B=B,interactions=AB) +} + +bmcpQS<-function(x,locfun=median,...){ +# +# Compute quantile shift measure of effect size for all pairs of J independent groups +# +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +J=length(x) +C=(J^2-J)/2 +A=matrix(NA,nrow=C,ncol=3) +dimnames(A)=list(NULL,c('Group','Group','Effect.Size')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if(j=null.value)ef.sizeND=mean(L-est+null.value<=est) +ef.size=mean(L-est+null.value<=est) +if(est=est) +list(Q.effect=ef.size) +} + + + +linQS<-function(x,con,locfun=median,nreps=200,SEED=TRUE){ +# +# Determine distribution of Y_i=sum_j c_jX_j +# Then estimate quantile shift in location measure of effect size +# locfun, which defaults to the median. +# +if(sum(con)!=0)stop('Contrast coefficients must sum to zero') +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +if(length(con)!=J)stop('Length of con should equal number of groups') +x=elimna(x) +nv=as.vector(matl(lapply(x,FUN='length'))) +nmin=min(nv) +est=NA +p=NA +B=list() +M=matrix(NA,nrow=nmin,ncol=J) +for(i in 1:nreps){ +for(j in 1:J)M[,j]=sample(x[[j]],nmin) +B[[i]]=M +} +L=lapply(B,linWMWMC.sub,con=con) +est=lapply(L,locfun) +p=lapply(L,linQS.sub2,locfun=locfun) +est=as.vector(matl(est)) +p=as.vector(matl(p)) +list(Q.effect=mean(p),center=mean(est)) +} + +linQS.sub2<-function(L,locfun=median){ +phat=mean(L-locfun(L)11)*(nval2>11)) +flag[i]=flag.chk +if(flag.chk){ +Y1=y1[near3d(x1,pts[i,],fr=fr1,m=m1)] +Y2=y2[near3d(x2,pts[i,],fr=fr2,m=m2)] +temp=yuen(Y1,Y2,tr=tr) +temp=pool.a.list(temp[1:7]) +ic=ic+1 +output[ic,]=temp +}} +sel.pts=NULL +sig.points=NULL +if(ic>0){ +n.sel=sum(flag) +output=output[1:n.sel,] +sel.pts=pts[flag,] +} + +if(sum(flag)==0){ +print('Could not find any point with 12 or more nearest neighbors') +output=matrix(NA,nrow=1,ncol=8) +n.sel=0 +num.sig=0 +} +id.sig=NULL +padj=NULL +p.crit=NULL +if(n.sel>0){ +sel.id=c(1:n.sel) +if(n.sel<=25){ +if(n.sel==1)padj=output[7] +else +padj=p.adjust(output[,7],method='hoch') +flag=padj<=alpha +if(sum(flag)==1)sig.points=sel.pts[flag] +if(sum(flag)>1)sig.points=sel.pts[flag,] +num.sig=sum(flag) +id.sig=sel.id[flag] +} +if(n.sel>25){ +if(n.sel<=100)p.crit=0.0806452604/n.sel-0.0002461736 +if(n.sel>100)p.crit=6.586286e-02/n.sel+4.137143e-05 +flag=output[,7]<=p.crit +if(sum(flag)>0)sig.points=sel.pts[flag,] +} +num.sig=sum(flag) +id.sig=sel.id[flag] +} +list(selected.points=sel.pts,output=output,significant.points=sig.points,num.sig=num.sig,id.sig=id.sig) +} + + + +reg.vs.rplot<-function(x,y,xout=FALSE,fr=1,est=median,regfun=Qreg,Qreg.plot=TRUE,qv=c(.25,.75),SMQ=FALSE, +pr=TRUE,xlab='Reg.Est',ylab='Rplot.Est',pch='*'){ +# +# If the linear model is correct, the plot returned here should be +# tightly clustered around a line having slope=1 and intercept=0, indicated +# by a dashed line. +# +if(pr)print('This function was updated July 2022') +xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +x=xy[,1:p] +y=xy[,p1] +e1=regYhat(x,y,xout=xout,regfun=regfun) +e2=rplot.pred(x,y,xout=xout,est=est,fr=fr)$Y.hat +if(Qreg.plot){ +if(!SMQ)qplotreg(e1,e2,xlab=xlab,ylab=ylab,pch=pch,qval=qv) +if(SMQ)qhdsm(e1,e2,xlab=xlab,ylab=ylab,pch=pch,qval=qv,LP=TRUE) +} +if(!(Qreg.plot)) lplot(e1,e2,xlab=xlab,ylab=ylab,pc=pch) +abline(0,1,lty=2) +} + +reg.vs.lplot<-function(x,y,xout=FALSE,Qreg.plot=TRUE,qv=c(.25,.75),SMQ=FALSE,pch='*',pr=TRUE, +outfun=outpro,fr=1,est=mean,regfun=tsreg,xlab='Reg.est',ylab='Lplot.est',span=.75,...){ +# +# +# +if(pr)print('This function was updated July 2022') +xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +x=xy[,1:p] +y=xy[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +e1=regYhat(x,y,regfun=regfun) +e2=lplot.pred(x,y,,est=est,span=span)$yhat +if(Qreg.plot){ +if(!SMQ)qplotreg(e1,e2,xlab=xlab,ylab=ylab,pch=pch,qval=qv) +if(SMQ)qhdsm(e1,e2,xlab=xlab,ylab=ylab,pch=pch,qval=qv,LP=TRUE) +} +if(!(Qreg.plot)) lplot(e1,e2,xlab=xlab,ylab=ylab,pc=pch) +abline(0,1,lty=2) +} + +bbdetmcp<-function(J,K,x,tr=0.2){ +# +# For each level of Factor A, do all pairiwise comparisons +# among levels of B and store results in A in list mode. +# +# For each level of Factor B, do all pairiwise comparisons +# among levels of A and store results in B in list mode. +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +A=list() +B=list() +JK=J*K +mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) +for(j in 1:J)A[[j]]=lincon(x[mat[j,]],tr=tr,pr=FALSE) +for(k in 1:K)B[[k]]=lincon(x[mat[,k]],tr=tr,pr=FALSE) +list(Levels.of.A=A,Level.of.B=B) +} + +bbdetmcpQS<-function(J,K,x,tr=0.2){ +# +# For each level of Factor A, do all pairiwise comparisons +# among levels of B and store results in A in list mode. +# +# For each level of Factor B, do all pairiwise comparisons +# amonglevels of A andstore results in B in list mode. +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +A=list() +B=list() +JK=J*K +mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) +for(j in 1:J)A[[j]]=linconQS(x[mat[j,]],tr=tr,pr=FALSE) +for(k in 1:K)B[[k]]=linconQS(x[mat[,k]],tr=tr,pr=FALSE) +list(Levels.of.A=A,Level.of.B=B) +} + + +ancJNPVAL<-function(x1,y1,x2,y2,regfun=MMreg,p.crit=NULL,DEEP=TRUE, +plotit=TRUE,xlab='X1',ylab='X2',null.value=0,WARNS=FALSE, +alpha=.05, pts=NULL,SEED=TRUE,nboot=100,xout=FALSE,outfun=outpro,...){ +# +# Compare two independent groups using a generalization of the ancts function that +# allows more than one covariate. +# +# Design points can be specified via the argument +# pts: a matrix with p=ncol(x1) columns. +# +# DEEP=FALSE: If pts=NULL, design points are chosen to be deepest point in +# rbind(x1,x2) plus points on the .5 depth contour. +# +# DEEP=TRUE, choose deepest half of the points in rbind(x1,x2) and use critical p-value indicated by +# p.crit. +# +# alpha=.05, refers to the desired probability of one or more Type I errors. If +# p.crit=NULL, +# when alpha=.05 or .01 and number of covariates is <=6, p.crit is +# determined quickly by this function. That is, the familywise error will be approximately alpha. +# +# If number of covariates is > 6, unknown how to adjust p.crit to control familywise error. +# +# plotit=TRUE: if p=2 covariates, plot covariate points with +# non-significant points indicated by * and significant points by + + +# (This function replaces anctsmp, which does not have an option for +# using the deepest half of the covariate points.) +# +if(SEED)set.seed(2) +if(!is.null(pts[1]))DEEP=FALSE +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have a different number of columns') +p=ncol(x1) +if(p==1)stop('Should have at least two covariates') +if(p>6)stop('Current version is limited to six covariates or less') +p1=p+1 +m1=elimna(cbind(x1,y1)) +x1=m1[,1:p] +y1=m1[,p1] +m2=elimna(cbind(x2,y2)) +x2=m2[,1:p] +y2=m2[,p1] +# +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +n1=length(y1) +n2=length(y2) +if(is.null(pts[1])){ +if(!DEEP){ +x1<-as.matrix(x1) +pts<-ancdes(unique(rbind(x1,x2))) +p.crit=NULL +} +if(DEEP){ +xall=unique(rbind(x1,x2)) +pd=pdepth(xall) +id.keep=which(pd>median(pd)) +pts=xall[id.keep,] +pts=unique(pts) +}} +pts<-as.matrix(pts) +ntests=nrow(pts) +mat<-matrix(NA,ntests,8) +dimnames(mat)<-list(NULL,c('Est 1', 'Est 2','DIF','TEST','se','ci.low','ci.hi','p.value')) +if(!WARNS)options(warn=-1) +sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +# xout=F because leverage points have already been removed. +est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) +est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=FALSE,outfun=outfun,...) +if(!WARNS)options(warn=0) #turn warnings back on +mat[,1]=est1 +mat[,2]=est2 +est=est1-est2 +mat[,3]=est +sd=sqrt(sqsd1+sqsd2) +mat[,5]=sd +tests=(est1-est2)/sd +mat[,4]=tests +pv=2*(1-pnorm(abs(tests))) +mat[,8]=pv +crit=NULL +if(ntests==1)crit=qnorm(1-alpha/2) +if(nrow(pts)>1){ +if(ntests<=25){ +if(alpha==.05)crit<-smmcrit(Inf,ntests) +if(alpha==.01)crit<-smmcrit01(Inf,ntests) +}} +if(ntests>25){ +pvals.05=c(NA,.00615847,0.002856423,.00196,0.001960793,0.001120947) +pvals.01=c(NA,0.001006744,0.000237099,0.0003169569,0.0002031497,9.442465e-05) +if(alpha==.05){ +crit=qnorm(1-pvals.05[p]/2) +p.crit=pvals.05[p] +} +if(alpha==.01){ +crit=qnorm(1-pvals.01[p]/2) +p.crit=pvals.01[p] +} +} +mat[,6]=est-crit*sd +mat[,7]=est+crit*sd +flag=rep(FALSE,nrow(mat)) +flag.chk1=as.logical(mat[,6]>null.value) +flag.chk2=(mat[,7]0) +#if(!is.null(p.crit))num.sig=sum(mat[,8]<=p.crit) +num.sig=sum(mat[,8]<=p.crit) +if(p==2){ +if(plotit){ +plot(pts[,1],pts[,2],xlab=xlab,ylab=ylab,type='n') +flag[flag.chk]=TRUE +points(pts[!flag,1],pts[!flag,2],pch='*') +points(pts[flag,1],pts[flag,2],pch='+') +}} +sig.points=NULL +if(!is.null(p.crit)){ +if(num.sig>0){ +pick=which(mat[,8]<=p.crit) +sig.points=pts[pick,] +}} +list(n1=n1,n2=n2,num.sig=num.sig,p.crit=p.crit,points=pts,output=mat, significant.points=sig.points) +} + + +qhatDEP<-function(x1,x2,depthfun=prodepth,...){ +# +# Compute apparent probability of correct classification +# +x1<-x1[!is.na(x1)] +x2<-x2[!is.na(x2)] +x1=as.matrix(x1) +x2=as.matrix(x2) +tv=c(rep(1,nrow(x1)),rep(2,nrow(x2))) +see=discdepth(x1,x2,z=rbind(x1,x2)) +qhat=mean(tv==see) +qhat +} +qhatdepPB<-function(x1,x2,nboot=500,alpha=.05,depthfun=prodepth, +SEED=TRUE,...){ +# +# +if(SEED)set.seed(2) +bvec=NA +x1=as.matrix(x1) +x2=as.matrix(x2) +n1=nrow(x1) +n2=nrow(x2) +for(i in 1:nboot){ +dat1=sample(n1,n1,replace=TRUE) +dat2=sample(n2,n2,replace=TRUE) +bvec[i]=qhatDEP(x1[dat1,],x2[dat2,],depthfun=depthfun) +} +est=qhatDEP(x1,x2) +bvec=sort(bvec) +crit<-alpha/2 +icl<-round(crit*nboot)+1 +icu<-nboot-icl +ci<-bvec[icl] +ci[2]<-bvec[icu] +list(estimate=est,ci=ci) +} + + +discdepth<-function(train=NULL,test=NULL,g,x1=NULL,x2=NULL,depthfun=prodepth,...){ +# +# x1 and x2 contain the data for the two groups +# Goal, classify the values in test using depths associated with the training data +# +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +} +Train=cbind(train,g) +Train=elimna(Train) +p=ncol(train) +p1=p+1 +train=Train[,1:p] +g=Train[,p1] +flag=g==min(g) +x1=Train[flag,1:p] +x2=Train[!flag,1:p] +} +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +if(is.null(test))stop('No test data, argument test is NULL') +test=elimna(test) +z=as.matrix(test) +x1=as.matrix(x1) +x2=as.matrix(x2) +z=as.matrix(z) +d1=depthfun(x1,pts=z,...) +d2=depthfun(x2,pts=z,...) +flag=d1>d2 +N=nrow(z) +id=rep(2,N) +id[flag]=1 +id +} + +dis.depth.bag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,nboot=100,SEED=TRUE,...){ +# +# +# g=class id +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# The function will then create appropriate labels and store them in g. +# +# Uses data depths. +# KNNdist uses data depths, for the n1!=n2 it can be a bit biased, meaning that +# when there is no association, the probability of a correct classification will be less than .5 +# +# +if(is.null(test))stop('test =NULL, no test data provided') +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group labels, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n1,n,replace=TRUE) +id2=sample(n2,n,replace=TRUE) + +dvec[i,]=discdepth(x1=x1[id1,],x2=x2[id2,],test=test,depthfun=depthfun,...) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} + + +pdepth<-function(m,pts=m,MM=FALSE,cop=3,dop=1,center=NA, SEED=TRUE){ +# +# projection depth +# +# SEED, included for convenience when this function is used with certain classification techniques. +# +v=pdis(m,pts=pts,MM=MM,cop=cop,dop=dop,center=center) +v=1/(1+v) +v +} +pdis<-function(m,pts=m,MM=FALSE,cop=3,dop=1,center=NA,na.rm=TRUE){ +# +# Compute projection distances for points in pts relative to points in m +# That is, the projection distance from the center of m +# +# +# MM=F Projected distance scaled +# using interquatile range. +# MM=T Scale projected distances using MAD. +# +# There are five options for computing the center of the +# cloud of points when computing projections: +# cop=1 uses Donoho-Gasko median +# cop=2 uses MCD center +# cop=3 uses median of the marginal distributions. +# cop=4 uses MVE center +# cop=5 uses skipped mean +# +m<-elimna(m) # Remove missing values +pts=elimna(pts) +m<-as.matrix(m) +nm=nrow(m) +pts<-as.matrix(pts) +if(ncol(m)>1){ +if(ncol(pts)==1)pts=t(pts) +} +npts=nrow(pts) +mp=rbind(m,pts) +np1=nrow(m)+1 +if(ncol(m)==1){ +m=as.vector(m) +pts=as.vector(pts) +if(is.na(center[1]))center<-median(m) +dis<-abs(pts-center) +disall=abs(m-center) +temp=idealf(disall) +if(!MM){ +pdis<-dis/(temp$qu-temp$ql) +} +if(MM)pdis<-dis/mad(disall) +} +else{ +if(is.na(center[1])){ +if(cop==1)center<-dmean(m,tr=.5,dop=dop) +if(cop==2)center<-cov.mcd(m)$center +if(cop==3)center<-apply(m,2,median) +if(cop==4)center<-cov.mve(m)$center +if(cop==5)center<-smean(m) +} +dmat<-matrix(NA,ncol=nrow(mp),nrow=nrow(mp)) +for (i in 1:nrow(mp)){ +B<-mp[i,]-center +dis<-NA +BB<-B^2 +bot<-sum(BB) +if(bot!=0){ +for (j in 1:nrow(mp)){ +A<-mp[j,]-center +temp<-sum(A*B)*B/bot +dis[j]<-sqrt(sum(temp^2)) +} +dis.m=dis[1:nm] +if(!MM){ +#temp<-idealf(dis) +temp<-idealf(dis.m) +dmat[,i]<-dis/(temp$qu-temp$ql) +} +if(MM)dmat[,i]<-dis/mad(dis.m) +}} +pdis<-apply(dmat,1,max,na.rm=na.rm) +pdis=pdis[np1:nrow(mp)] +} +pdis +} +yuenQS<-function(x,y=NULL,tr=.2,alpha=.05, plotit=FALSE,op=TRUE, +cor.op=FALSE,loc.fun=median,pr=TRUE,xlab='X',ylab=' ' ){ +# +# Perform Yuen's test for trimmed means on the data in x and y. +# The default amount of trimming is 20% +# Missing values (values stored as NA) are automatically removed. +# +# A confidence interval for the trimmed mean of x minus the +# the trimmed mean of y is computed and returned in yuen$ci. +# The significance level is returned in yuen$p.value +# +# Unlike the function yuen, a robust quantile shift measure +# of effect size is returned. +# +if(pr){ +print('Note: Under normality and homoscedasticity, Cohen d= 0, .2, .5, .8') +print('correspond approximately to Q.Effect = 0.5, 0.55, 0.65 and 0.70, respectively') +} +if(tr==.5)stop('Use medpb to compare medians.') +if(tr>.5)stop('cannot have tr>.5') +if(is.null(y)){ +if(is.matrix(x) || is.data.frame(x)){ +y=x[,2] +x=x[,1] +} +if(is.list(x)){ +y=x[[2]] +x=x[[1]] +} +} +library(MASS) +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +n1=length(x) +n2=length(y) +h1<-length(x)-2*floor(tr*length(x)) +h2<-length(y)-2*floor(tr*length(y)) +q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) +q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) +df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) +crit<-qt(1-alpha/2,df) +m1=mean(x,tr) +m2=mean(y,tr) +mbar=(m1+m2)/2 +dif=m1-m2 +low<-dif-crit*sqrt(q1+q2) +up<-dif+crit*sqrt(q1+q2) +test<-abs(dif/sqrt(q1+q2)) +yuen<-2*(1-pt(test,df)) +e.pow=shiftQS(x,y,tmean,tr=tr)$Q.Effect +if(plotit){ +g2plot(x,y,xlab=xlab,ylab=ylab) +} +list(ci=c(low,up),n1=n1,n2=n2, +p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test, +crit=crit,df=df,Q.Effect=e.pow) +} + +regbtci<-function(x,y,regfun=qreg,alpha=.05,nboot=300,xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Bootstrap-t confidence intervals for regression parameters +# +if(SEED)set.seed(2) +xx<-elimna(cbind(x,y)) +np<-ncol(xx) +p<-np-1 +y<-xx[,np] +x<-xx[,1:p] +x<-as.matrix(x) +n.orig=length(y) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +vlabs='Intercept' +for(j in 2:np)vlabs[j]=paste('Slope',j-1) +regout<-matrix(0,np,5) +dimnames(regout)<-list(vlabs,c('ci.low','ci.up','Estimate','S.E.','p-value')) +val=regse(x,y,regfun=regfun,nboot=nboot,SEED=SEED,...) +tests=val$param.estimates/val$s.e. +pv=2*(1-pnorm(abs(tests))) +est=regfun(x,y,...) +regout[,3]=est$coef +regout[,1]=est$coef-qnorm(1-alpha/2)*val$s.e. +regout[,2]=est$coef+qnorm(1-alpha/2)*val$s.e. +regout[,4]=val$s.e. +regout[,5]=pv +list(output=regout,n=n.orig,n.keep=length(y)) +} + + + +dtrimQS<-function(x,y=NULL,tr=.2,pr=TRUE){ +# +# Trimmed mean based on difference scores +# Also returns quantile shift measure of location +# +# +if(pr){ +print('Note: Under normality and homoscedasticity, Cohen d= 0, .2, .5, .8') +print('correspond approximately to Q.effect = 0.5, 0.55, 0.65 and 0.70, respectively') +} +if(!is.null(y))L=x-y +else L=x +L=elimna(L) +output=trimci(L,tr=tr,pr=FALSE) +ef=depQS(L,locfun=mean,tr=tr) +list(ci=output$ci,estimate=output$estimate,test=output$test.stat, +se=output$se,p.value=output$p.value,n=output$n,Q.effect=ef$Q.effect) +} +lindQS<-function(x,con,locfun=median,...){ +# +# For dependent variables X_1...X_J +# compute quantile shift measure of effect size for +# Y_i=sum_j c_jX_j +# +x=elimna(x) +if(sum(con)!=0)stop('Contrast coefficients must sum to zero') +if(is.data.frame(x))x=as.matrix(x) +if(is.list(x))x<-matl(x) +J<-ncol(x) +if(length(con)!=J)stop('Length of con should equal number of groups') +L=linWMWMC.sub(x,con=con) +est=locfun(L) +p=linQS.sub2(L,locfun=locfun) +list(Q.effect=p,center=est) +} + + + + +bwiQS<-function(J,K,x,locfun=median,...){ +# +# Quantile shift measure of effect size for interactions in a +# between-by-within design +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +JK<-J*K +MJ<-(J^2-J)/2 +MK<-(K^2-K)/2 +MJMK<-MJ*MK +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(JK!=length(x))stop('Something is wrong. Expected ',JK,' groups but x contains ', length(x), ' groups instead.') +m=matrix(c(1:JK),nrow=J,byrow=TRUE) +output=matrix(NA,ncol=5,nrow=MJMK) +dimnames(output)<-list(NULL,c('A','A','B','B','Q.Effect')) +ic=0 +for(j in 1:J){ +for(jj in 1:J){ +if(j1)stop('One covariate only is allowed with this function') +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +xor1=order(x1) +xor2=order(x2) +x1=x1[xor1] +x2=x2[xor2] +y1=y1[xor1] +y2=y2[xor2] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +bot<-min(sub[vecn>=nmin]) +itop<-max(sub[vecn>=nmin]) +xbot=x1[bot] +xup=x1[itop] + +if(BOTH){ +vecn=1 +n1=1 +n2=1 +for(i in 1:length(x2))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x2))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x2))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x2)) +bot<-max(sub[vecn>=nmin]) +itop<-min(sub[vecn>=nmin]) +xbot[2]=x2[itop] #CORRECT, need to switch +xup[2]=x2[bot] +} +xbot=max(xbot) +xup=min(xup) +pts=seq(xbot,xup,length.out=npts) +if(alpha!=.05)EST=TRUE +if(is.null(p.crit)){ +nv=c(30, 50, 60, 70, 80, 100, 150, 200, +300, 400, 500, 600, 800) +if(Ycrit)pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, +0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, +.003372966, 0.003350022) +if(!Ycrit) +pv=c(0.008566, # 30 +0.0083847, # 50 +0.006758, # 60 +0.006871, # 70 +0.006157, # 80 +0.006629, #100 +0.006629, # 150 +0.004681, # 200 +0.004537, # 300 +0.004952, # 400 + 0.004294, # 500 + 0.004288, # 600 + 0.004148) +n1= length(y1) + n2=length(y2) +p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 +p.crit=(alpha/.05)*p.crit # Crude approximation when alpha != .05, tends to be conservative. +} +temp=ancovaWMW(x1,y1,x2,y2,pts=pts,fr1=fr1,fr2=fr2,alpha=p.crit,plotit=plotit) +res=temp$output[,1:7] +if(plotit){ +x=res[,1] +y=res[,4] +minx=min(x) +maxx=max(x) +plot(c(minx,maxx,x),c(0,1,y),xlab=xlab,ylab=ylab,type='n') +points(x,y,pch=pc) +if(!sm){lines(res[,1],res[,5],lty=2) +lines(res[,1],res[,6],lty=2) +lines(res[,1],res[,4]) +} +if(sm){ +plin=lplot.pred(res[,1],res[,4],span=span)$yhat +lines(res[,1],plin) +low.line=lplot.pred(res[,1],res[,5],span=span)$yhat +lines(res[,1],low.line,lty=2) +up.line=lplot.pred(res[,1],res[,6],span=span)$yhat +lines(res[,1],up.line,lty=2) +} + +} +sig=rep(0,nrow(res)) +sig[res[,7]<=p.crit]=1 +sig=as.matrix(sig,ncol=1) +dimnames(sig)=list(NULL,'Sig.Dif') +res=cbind(res,sig) +list(p.crit=p.crit,output=res,summary=temp$summary,num.sig=sum(sig)) +} + +ancdetwmwQ<-function(x1,y1,x2,y2,fr1=1,fr2=1,nmin=8,q=.05, +alpha=.05,plotit=TRUE,pts=NA,span=2/3,sm=TRUE, xout=FALSE,outfun=out,MC=FALSE, +npts=25,p.crit=NULL, +SCAT=TRUE,xlab='X',ylab='P.hat',pc='.',...){ +# +# Like the function ancdet, only use analog of Wilcoxon--Mann--Whitney +# plot=TRUE: plot estimates P.hat plus a +# confidence band having simultaneous probability coverage 1-alpha +# +# span = the span when using loess to plot the regression line. +# +# npts = number of covariate values to be used +# +# sm=TRUE will smooth the plot using lowess +# +# Covariate points are chosen that lie between the q and 1-q quantiles +# +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +xor1=order(x1) +xor2=order(x2) +x1=x1[xor1] +x2=x2[xor2] +y1=y1[xor1] +y2=y2[xor2] +n1<-1 +n2<-1 +vecn<-1 + +xbot=max(qest(x1,q),qest(x2,q)) +xup=min(qest(x1,1-q),qest(x2,1-q)) + +pts=seq(xbot,xup,length.out=npts) + +nchk1=0 +for(i in 1:length(pts))nchk1[i]=length(y1[near(x1,pts[i],fr1)]) +nchk2=0 +for(i in 1:length(pts))nchk2[i]=length(y2[near(x2,pts[i],fr2)]) +flag1=nchk1>=nmin +flag2=nchk2>=nmin +flag=as.logical(flag1*flag2) +pts=pts[flag] +if(is.null(p.crit)){ +nv=c(30, 50, 60, 70, 80, 100, 150, 200, +300, 400, 500, 600, 800) +pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, +0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, +.003372966, 0.003350022) +n1= length(y1) + n2=length(y2) +p.crit=(lplot.pred(1/nv,pv,1/n1)$yhat+lplot.pred(1/nv,pv,1/n2)$yhat)/2 +p.crit=(alpha/.05)*p.crit # Crude approximation when alpha != .05, tends to be conservative. +} +temp=ancovaWMW(x1,y1,x2,y2,pts=pts,fr1=fr1,fr2=fr2,alpha=p.crit,plotit=plotit) +res=temp$output +if(plotit){ +x=res[,1] +y=res[,4] +minx=min(x) +maxx=max(x) +plot(c(minx,maxx,x),c(0,1,y),xlab=xlab,ylab=ylab,type='n') +points(x,y,pch=pc) +if(!sm){lines(res[,1],res[,5],lty=2) +lines(res[,1],res[,6],lty=2) +lines(res[,1],res[,4]) +} +if(sm){ +plin=lplot.pred(res[,1],res[,4],span=span)$yhat +lines(res[,1],plin) +low.line=lplot.pred(res[,1],res[,5],span=span)$yhat +lines(res[,1],low.line,lty=2) +up.line=lplot.pred(res[,1],res[,6],span=span)$yhat +lines(res[,1],up.line,lty=2) +} + +} +sig=rep(0,nrow(res)) +sig[res[,7]<=p.crit]=1 +sig=as.matrix(sig,ncol=1) +dimnames(sig)=list(NULL,'Sig.Dif') +res=cbind(res,sig) +list(p.crit=p.crit,output=res,summary=temp$summary,num.sig=sum(sig),p.crit=p.crit) +} + + + +regIVcommcp<-function(x,y,regfun = tsreg, nboot = 200, + xout = FALSE, outfun = outpro, SEED = TRUE, MC = FALSE, tr = 0.2, + ...){ +# +# For each pair of the independent variables in x, compare strength +# when both are included in the model. +# +x<-as.matrix(x) +J=ncol(x) +A=(J^2-J)/2 +output=matrix(NA,nrow=A,ncol=6) +ic=0 +for(i in 1:J){ +for(k in 1:J){ +if(i=crit) +mtest=max(abs(res[flag])) +hdPV=optimize(hdpv,interval=c(.001,.999),dat=tval,obs=mtest) +if(is.na(num.sig))num.sig=0 +list(n=n,cor=test$cor,test.stats=res,crit.val=crit, +num.sig=num.sig,p.value=1-hdPV$minimum) +} + +hdpv=function(val,dat,obs){z=abs(hd(dat,val)-obs) +z +} + +mscorpbMC<-function(x,corfun=pcor,nboot=500,alpha=0.05,SEED=TRUE,WARN=FALSE, +outfun=outpro,pr=TRUE){ +# +# For p-variate data, test the hypothesis that the +# skipped correlation is zero for all pairs of variables. +# The probability of one or more Type I errors is indicated by the +# argument +# alpha +# +if(pr)print('Here, the p-value is the smallest alpha value for which one or more hypotheses are rejected') +library(parallel) +if(SEED)set.seed(2) +x=elimna(x) +n=nrow(x) +tval=NA +y=list() +for(i in 1:nboot)y[[i]]=apply(x,2,sample,replace=TRUE) +v=mclapply(y,mscor,corfun=corfun,outfun=outpro) +for(i in 1:nboot)tval[i]=max(abs(elimna(as.vector(v[[i]]$test.stat)))) + +crit=hd(tval,q=1-alpha) +test=mscor(x) +res=test$test.stat +flag=upper.tri(res) +num.sig=sum(abs(res[flag])>=crit) +mtest=max(abs(res[flag])) +if(!WARN)options(warn=-1) +hdPV=optimize(hdpv,interval=c(.001,.999),dat=tval,obs=mtest) +if(!WARN)options(warn=0) +if(is.na(num.sig))num.sig=0 +list(n=n,cor=test$cor,test.stats=res,crit.val=crit, +num.sig=num.sig,p.value=1-hdPV$minimum) +} + + + +mscorci.cr<-function(n,p,iter=500,corfun=pcor,alpha=c(.05,.025,.01),TV=FALSE,SEED=TRUE){ +# +# Determine critical p-values for the function mscorci +# Returns the estimate of the distribution of the null minimum p-value +# plus the critical p-values corresponding to the levels indicated by +# alpha. +# +if(SEED)set.seed(65) +x=list() +library(parallel) +for(i in 1:iter)x[[i]]=rmul(n,p=p) +tval=mclapply(x,mscorci.cr.sub,corfun=corfun,nboot=iter) +tval=list2vec(tval) +crit.p=NA +for(j in 1:length(alpha))crit.p[j]=hd(tval,alpha[j]) +if(!TV)tval=NULL +list(crit.p.values=crit.p,tval=tval) +} + +mscorci.cr.sub<-function(x,corfun,nboot=500){ +v=mscorci(x,SEED=FALSE,corfun=corfun,nboot=nboot,crit.pv=1)$p.values +mp=min(as.vector(v),na.rm=T) +mp +} +scorv2<-function(x,y=NULL,corfun=pcor,gval=NA,plotit=FALSE,op=TRUE,cop=3,xlab="VAR 1", +ylab="VAR 2",STAND=TRUE,pr=TRUE,SEED=TRUE,MC=FALSE){ +# +# Compute a skipped correlation coefficient. +# +# Eliminate outliers using a projection method +# That is, compute Donoho-Gasko median, for each point +# consider the line between it and the median, +# project all points onto this line, and +# check for outliers using a boxplot rule. +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# For information about the argument cop, see the function +# outpro. +# +# Eliminate any outliers and compute correlation using +# remaining data. +# +# Nearly the same as scor, but does not reset the SEED, which corrects problems with other functions +# +# MC=TRUE, the multicore version of outpro is used +# +# corfun=pcor means Pearson's correlation is used. +# corfun=spear means Spearman's correlation is used. +# corfun=tau means Kendall tau is used. +if(SEED){ +set.seed(12) # So when using MVE or MCD, get consistent results +} +if(is.null(y[1]))m<-x +if(!is.null(y[1]))m<-cbind(x,y) +m<-elimna(m) +if(!MC)temp<-outpro(m,gval=gval,plotit=plotit,op=op,cop=cop, +xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep +if(MC)temp<-outproMC(m,gval=gval,plotit=plotit,op=op,cop=cop, +xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep +tcor<-corfun(m[temp,])$cor +if(!is.null(dim((m))))tcor<-tcor[1,2] +test<-abs(tcor*sqrt((nrow(m)-2)/(1-tcor**2))) +if(ncol(m)!=2)diag(test)<-NA +crit<-6.947/nrow(m)+2.3197 +list(cor=tcor,test.stat=test,crit.05=crit) +} + +scorreg<-function(x,y,corfun=spear,cop=3,MM=FALSE,gval=NA, +outfun=outpro,alpha=.05,MC=NULL,SEED=TRUE,ALL=TRUE,...){ +# +# x is an n by p matrix +# +# Compute a skipped correlation matrix between y and each variable in x. +# +# corfun indicates the correlation to be used +# corfun=pcor uses Pearson's correlation +# corfun=spear uses Spearman's correlation +# +# ALL=TRUE: eliminate all outliers among cbind(x,y) +# ALL=FALSE: skipped correlation is computed for each x[,j] and y. So outliers are eliminated only +# for these two variables and this done for j=1,...p, p=number of predictors. +# +# This function returns the p by p matrix of correlations +# +# Method: Eliminate outliers using a projection technique. +# That is, compute Donoho-Gasko median, for each point +# consider the line between it and the median, +# project all points onto this line, and +# check for outliers using a boxplot rule. +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# cop determines how center of the scatterplot is +# estimated; see the function outpro. +# cop=l Donoho-Gasko halfspace median +# cop=2 MCD measure of location +# cop=3 marginal medians +# cop=4 MVE measure of location +# +# gval is critical value for determining whether a point +# is an outlier. It is determined automatically if not specified, +# assuming that Spearman's correlation is used. Critical +# values when using some other correlation have not been +# determined. +# +m<-elimna(cbind(x,y)) +m=as.matrix(m) +p1<-ncol(m) +p=p1-1 +n<-nrow(m) +e=NA +if(!ALL){ +if(is.null(MC)){ +if(n>=200)MC=TRUE +else MC=FALSE +} +e=NA +for(j in 1:p)e[j]=scorv2(m[,j],m[,p1],MC=MC,corfun=corfun,SEED=SEED)$cor.value +} +if(ALL){ +if(n<500) +flag=outpro(m,cop=cop,plotit=FALSE)$keep +else flag=outpro.depth(m,plotit=FALSE,SEED=SEED)$keep +xy=m[flag,] +for(j in 1:p)e[j]=corfun(xy[,j],xy[,p1],...)$cor +} +list(cor=e) +} + +mscorci<-function(x,y=NULL,nboot=1000,alpha=c(.05,.025,.01),SEED=TRUE, +STAND=TRUE,corfun=pcor,outfun=outpro, crit.pv=NULL, +pvals=NULL,hoch=FALSE,iter=500,pval.SEED=TRUE,pr=TRUE){ +# +# For p-variate data, test the hypothesis of a zero skipped correlation for each pair of variables in a manner +# that controls the probability of one or more Type I errors. +# +# The function also returns confidence intervals for each of the skipped correlations when hoch=FALSE. +# alpha=0.05 is the default. +# By default, Pearson's correlation is computed after outliers are removed via the R function indicated by +# outfun, which defaults to a projection-type method. +# corfun=spear, for example would replace Pearson's correlation with Spearman's correlation. +# +# alpha=c(.05,.025,.01) is the default, meaning that when determining critical p-values, this is done for +# for alpha .05, 0.25 and .01. So can use different alpha values if desired. +# For other purposes the family wise error (FWE) rate is taken to be +# alpha[1]=.05 by default. So setting the argument alpha=.01, FWE is taken to be .01 and a critical p-value is +# computed for the.01 level only. +# +# The default number of bootstrap samples is +# nboot=500 +# +# hoch=TRUE is the default in order to reduce execution time. +# If n>=60, this might suffice when testing at the 0.05 level. But power might be increased by using +# hoch=FALSE at the expense of higher execution time. +# +# If alpha is less than .05, say .025 or .01, hoch=FALSE is recommended. +# +# Note: confidence intervals are reported only when hoch=FALSE. +# +# pvals can be used to supply a vector of p-values estimating the distribution of the minimum p-value among the tests that are +# are performed when all hypotheses are true. +# +# iter=500: number of replications used to estimate the distribution of the minimum p-value. +# Or use the argument crit.pv as indicated below. +# Note: in the journal article dealing with this method, iter=1000 was used. + +# By default +# pvals=NULL, the functions computes these values if the p-values suggest that there might be +# significant results and hoch=FALSE; this can result in high execution time. +# The pvals are computed via the R function +# mscorci.cr(n,p,iter=500,corfun=pcor,alpha=alpha,SEED=TRUE). +# +# Critical p-values are a function of n and p. Once known, can supply them via the argument +# crit.pv as follows: +# +# pv=mscorci.cr(n,p)$crit.p.values +# mscorci(x,crit.pv=pv) +# +# +# +if(pr){ +if(!hoch){print('To reduce execution time, critical p-values are not computed when the observed p.values are too large to') +print('reject at the 0.05 level. To compute them any way, use the R function mscorci.cr') +} +if(hoch){ +print('Hochberg adjusted p-values are used.') +print('This is reasonable when n>120 and alpha=.05. Otherwise suggest using hoch=FALSE') +print('With hoch=TRUE, unadjusted 1-alphaj[1] confidence intervals are reported') +}} +if(SEED)set.seed(2) +if(!is.null(y))x=cbind(x,y) +x<-elimna(x) # Eliminate rows with missing values +nval=nrow(x) +p=ncol(x) +J=(p^2-p)/2 +est<-mscor(x,STAND=STAND,corfun=corfun,outfun=outfun)$cor +flag=upper.tri(est) +est=est[flag] +data<-matrix(sample(nval,size=nval*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +bvec<-lapply(data,scorci.sub,x,STAND=STAND,corfun=corfun,outfun=outfun) +bvec=matl(bvec) # A J by nboot matrix. + +phat=0 +sig=0 +for(j in 1:J){ +phat[j]=sum(bvec[j,] < 0)/nboot +sig[j] <- 2 * min(phat[j], 1 - phat[j]) +} +# +# Compute critical p-values if any of the p-values are sufficiently small. +# +FLAG=FALSE +if(p==2 && sig[1]<=.15){ +FLAG=TRUE +if(hoch){ +if(pr)print('If the p.value is <=.15, suggest using hoch=FALSE') +}} +if(p>2){ +if(min(sig)<=alpha[1])FLAG=TRUE +} +if(FLAG){ +if(is.null(crit.pv)){ +if(!hoch){ +if(pr)print('Computing critical p-values. Execution time might require several minutes') +temp=mscorci.cr(nval,p,iter=iter,corfun=corfun,alpha=alpha,SEED=pval.SEED,TV=TRUE) #returns tval in case want to adjust p-values. +# Need to add code to do this. (See mscorpbMC for how this might be done.) +crit.pv=temp$crit.p.values +}}} +ci.mat=matrix(NA,nrow=J,ncol=4) +dimnames(ci.mat)=list(NULL,c('Var i','Var j','ci.low','ci.up')) +for(j in 1:J)bvec[j,]<-sort(bvec[j,]) +if(J==1)bvec=as.matrix(bvec) +ic=0 +if(is.null(crit.pv))crit.pv=alpha[1] +for(j in 1:p){ +for(k in 1:p){ +if(j40){ +if(n<=70){ +vv=p.crit.n60(alpha[1],sig[j]) +sigadj[j]=vv$adj.p.value +crit.p=vv$crit.p.value +}} +if(n>70){ +if(n<=100){ +vv=p.crit.n80(alpha[1],sig[j]) +sigadj[j]=vv$adj.p.value +crit.p=vv$crit.p.value +} +} +if(n>100){ +if(n<=120) +{ +vv=p.crit.n100(alpha[1],sig[j]) +crit.p=vv$crit.p.value +sigadj[j]=vv$adj.p.value +}} +if(n>120){ # no adjustment +sigadj[j]=sig[j] #i.e., no adjustment +crit.p=alpha[1] +}} +hadj=p.adjust(sig,method='hoch') +ci.mat=matrix(NA,nrow=p,ncol=3) +dimnames(ci.mat)=list(NULL,c('Var','ci.low','ci.up')) +for(j in 1:p)bvec[j,]<-sort(bvec[j,]) +if(p==1)bvec=as.matrix(bvec) +ic=0 +if(is.null(crit.pv))crit.pv=alpha[1] +for(j in 1:p){ +ic=ic+1 +ci.mat[ic,1]=j +ihi<-floor((1-crit.p[1]/2)*nboot+.5) +ilow<-floor((crit.p[1]/2)*nboot+.5) +ci.mat[ic,2]=bvec[ic,ilow] +ci.mat[ic,3]=bvec[ic,ihi] +} +p.mat=matrix(NA,nrow=p,ncol=4) +p.mat[,1]=est +p.mat[,2]=sig +p.mat[,3]=sigadj +adj.p=p.adjust(sigadj,method='hochberg') +p.mat[,4]=adj.p +dimnames(p.mat)=list(NULL,c('Est.','p-value','adjusted p.value','Hoch.adjusted.p.value')) +list(Estimates=p.mat,confidence.int=ci.mat) +} + + +scorreg.sub<-function(data,xy,corfun=corfun,outfun=outfun,ALL=ALL){ +p1=ncol(xy) +p=p1-1 +est<-scorreg(xy[data,1:p],xy[data,p1],corfun=corfun,SEED=FALSE,ALL=ALL)$cor +est +} + + +p.crit.n30<-function(alpha=.05,p.obs=NULL){ +p.table=c( +0.696 ,0.828 ,0.208 ,0.328 ,0.152 +,0.632 ,0.184 ,0.452 ,0.86 ,0.988 +,0.772 ,0.832 ,0.288 ,0.288 ,0.944 +,0.672 ,0.868 ,0.476 ,0.148 ,0.292 +,0.792 ,0.852 ,0.236 ,0.9, 1 +,0.484 ,0.932 ,0.704 ,0.4 ,0.904 +,0.656 ,0.32 ,0.104 ,0.676 ,0.572 +,0.936 ,0.14 ,0.148 ,0.86 ,0.508 +,0.748 ,0.328 ,0.816 ,0.268 ,0.364 +,0.152 ,0.816 ,0.5 ,0.972 ,0.684 +,0.156 ,0.676 ,0.244 ,0.948 ,0.612 +,0.28 ,0.092 ,0.712 ,0.152 ,0.704 +,0.192 ,0.904 ,0.372 ,0.908 ,0.992 +,0.692 ,0.956 ,0.704 ,0.964 ,0.484 +,0.496 ,0.768 ,0.172 ,0.336 ,0.108 +,0.052 ,0.62 ,0.192 ,0.664 ,0.716 +,0.148 ,0.6 ,0.384 ,0.52 ,0.536 +,0.992 ,0.272 ,0.68 ,0.232 ,0.368 +,0.788 ,0.572 ,0.516 ,0.476 ,0.832 +,0.988 ,0.72 ,0.432 ,0.756 ,0.564 +,0.792 ,0.44 ,0.996 ,0.388 ,0.456 +,0.848 ,0.364, 1 ,0.584 ,0.104 +,0.68 ,0.848 ,0.268 ,0.424 ,0.836 +,0.316 ,0.828 ,0.12 ,0.392 ,0.588 +,0.38 ,0.544 ,0.108 ,0.248 ,0.972 +,0.94 ,0.184 ,0.156 ,0.444 ,0.28 +,0.612 ,0.22 ,0.544 ,0.888 ,0.808 +,0.436 ,0.736 ,0.424 ,0.792 ,0.324 +,0.672 ,0.012 ,0.36 ,0.656 ,0.54 +,0.872 ,0.596 ,0.788 ,0.896 ,0.532 +,0.34 ,0.664 ,0.28 ,0.484 ,0.888 +,0.66 ,0.824 ,0.032 ,0.524 ,0.496 +,0.84 ,0.564 ,0.432 ,0.668 ,0.664 +,0.332 ,0.576 ,0.568 ,0.388 ,0.876 +,0.06 ,0.948 ,0.664 ,0.764 ,0.12 +,0.416 ,0.956 ,0.936 ,0.848 ,0.904 +,0.596 ,0.792 ,0.232 ,0.68 ,0.404 +,0.556 ,0.356 ,0.44 ,0.936 ,0.748 +,0.968 ,0.528 ,0.8 ,0.152 ,0.68 +,0.792 ,0.664 ,0.872 ,0.856 ,0.176 +,0.908 ,0.124 ,0.744 ,0.708 ,0.632 +,0.68 ,0.972 ,0.244 ,0.984 ,0.76 +,0.828 ,0.256 ,0.888 ,0.688 ,0.312 +,0.828 ,0.124 ,0.296 ,0.396 ,0.8 +,0.756 ,0.104 ,0.228 ,0.884 ,0.948 +,0.96 ,0.52 ,0.724 ,0.824 ,0.436 +,0.672 ,0.868 ,0.772 ,0.612 ,0.48 +,0.036 ,0.868 ,0.52 ,0.268 ,0.232 +,0.608 ,0.676 ,0.476 ,0.588 ,0.904 +,0.508 ,0.236 ,0.952 ,0.848 ,0.628 +,0.924 ,0.132 ,0.812 ,0.696 ,0.3 +,0.948 ,0.904 ,0.868 ,0.392 ,0.072 +,0.38 ,0.624 ,0.608 ,0.756 ,0.332 +,0.088 ,0.42 ,0.764 ,0.648 ,0.084 +,0.428 ,0.04 ,0.408 ,0.548 ,0.216 +,0.636 ,0.784 ,0.24 ,0.9 ,0.512 +,0.476 ,0.504 ,0.288 ,0.812 ,0.6 +,0.696 ,0.492 ,0.42 ,0.068 ,0.236 +,0.604 ,0.564 ,0.888 ,0.816 ,0.52 +,0.092 ,0.096 ,0.372 ,0.54 ,0.328 +,0.96 ,0.276 ,0.38 ,0.1 ,0.412 +,0.732 ,0.184 ,0.044 ,0.772 ,0.892 +,0.244 ,0.344 ,0.976 ,0.04 ,0.088 +,0.032 ,0.796 ,0.24 ,0.524 ,0.808 +,0.472 ,0.472 ,0.152 ,0.696 ,0.728 +,0.756 ,0.784 ,0.452 ,0.764 ,0.764 +,0.144 ,0.988 ,0.552 ,0.788 ,0.5 +,0.46 ,0.42 ,0.468 ,0.516 ,0.832 +,0.528 ,0.724 ,0.148 ,0.648 ,0.456 +,0.28 ,0.804 ,0.496 ,0.464 ,0.52 +,0.864 ,0.228 ,0.544 ,0.708 ,0.912 +,0.528 ,0.18 ,0.188 ,0.092 ,0.44 +,0.452 ,0.596 ,0.424 ,0.32 ,0.808 +,0.036 ,0.508 ,0.836 ,0.064 ,0.924 +,0.4 ,0.324 ,0.464 ,0.888 ,0.948 +,0.688 ,0.856 ,0.76 ,0.16 ,0.44 +,0.372 ,0.328 ,0.088 ,0.984 ,0.496 +,0.428 ,0.892 ,0.636 ,0.236 ,0.704 +,0.704 ,0.416 ,0.9 ,0.716 ,0.976 +,0.908 ,0.524 ,0.604 ,0.436 ,0.332 +,0.996 ,0.428, 1 ,0.244 ,0.712 +,0.456 ,0.808 ,0.984 ,0.804 ,0.62 +,0.552 ,0.732 ,0.264 ,0.488 ,0.604 +,0.424 ,0.936 ,0.808 ,0.356 ,0.164 +,0.152 ,0.34 ,0.34 ,0.644 ,0.4 +,0.784 ,0.308 ,0.296 ,0.672 ,0.664 +,0.64 ,0.76 ,0.24 ,0.464 ,0.656 +,0.84 ,0.76 ,0.176 ,0.148 ,0.184 +,0.296 ,0.516 ,0.62 ,0.396 ,0.384 +,0.84 ,0.984 ,0.964 ,0.46 ,0.224 +,0.968 ,0.292 ,0.78 ,0.696 ,0.128 +,0.384 ,0.98 ,0.852 ,0.408 ,0.644 +,0.744 ,0.876 ,0.688 ,0.924 ,0.06 +,0.36 ,0.4 ,0.528 ,0.084 ,0.216 +,0.4 ,0.984 ,0.488 ,0.152 ,0.608 +,0.332 ,0.5 ,0.884 ,0.78 ,0.912 +,0.236 ,0.368 ,0.276 ,0.74 ,0.96 +,0.912 ,0.36 ,0.608 ,0.804 ,0.9 +,0.688 ,0.348 ,0.748 ,0.544 ,0.956 +,0.384 ,0.892 ,0.728 ,0.164 ,0.392 +,0.876 ,0.836 ,0.54 ,0.604 ,0.456 +,0.144 ,0.3 ,0.848 ,0.272 ,0.668 +,0.908 ,0.004 ,0.812 ,0.408 ,0.676 +,0.928 ,0.224 ,0.052 ,0.756 ,0.928 +,0.428 ,0.096 ,0.996 ,0.996 ,0.828 +,0.504 ,0.616 ,0.788 ,0.644 ,0.26 +,0.764 ,0.616 ,0.248 ,0.556 ,0.972 +,0.912 ,0.66 ,0.72 ,0.792 ,0.204 +,0.904 ,0.32 ,0.228 ,0.628 ,0.912 +,0.804 ,0.072 ,0.656 ,0.456 ,0.992 +,0.3 ,0.808 ,0.692 ,0.84 ,0.544 +,0.072 ,0.652 ,0.524 ,0.884 ,0.168 +,0.208 ,0.216 ,0.948 ,0.896 ,0.92 +,0.964 ,0.784 ,0.812 ,0.708 ,0.936 +,0.508 ,0.488 ,0.156 ,0.94 ,0.088 +,0.508 ,0.72 ,0.636 ,0.552 ,0.016 +,0.464 ,0.348 ,0.576 ,0.904 ,0.248 +,0.324 ,0.516 ,0.988 ,0.616 ,0.716 +,0.664 ,0.576 ,0.336 ,0.792 ,0.824 +,0.896 ,0.804 ,0.524 ,0.332 ,0.804 +,0.94 ,0.424 ,0.964 ,0.644 ,0.604 +,0.4 ,0.984 ,0.38 ,0.696 ,0.248 +,0.244 ,0.772 ,0.836 ,0.048 ,0.696 +,0.724 ,0.576 ,0.6 ,0.348 ,0.88 +,0.776 ,0.376 ,0.644 ,0.648 ,0.08 +,0.424 ,0.912 ,0.964 ,0.224 ,0.984 +,0.476 ,0.928 ,0.64 ,0.944 ,0.512 +,0.644 ,0.596 ,0.388 ,0.28 ,0.124 +,0.212 ,0.388 ,0.416 ,0.884 ,0.964 +,0.996 ,0.428 ,0.832 ,0.464 ,0.88 +,0.984 ,0.256 ,0.664 ,0.344 ,0.496 +,0.192 ,0.124 ,0.392 ,0.268 ,0.4 +,0.944 ,0.816 ,0.648 ,0.252 ,0.16 +,0.24 ,0.716 ,0.272 ,0.136 ,0.832 +,0.212 ,0.548 ,0.776 ,0.328 ,0.492 +,0.952 ,0.62 ,0.688 ,0.26 ,0.084 +,0.264 ,0.856 ,0.912 ,0.796 ,0.78 +,0.276 ,0.692 ,0.628 ,0.26 ,0.592 +,0.66 ,0.66 ,0.912 ,0.84 ,0.244 +,0.66 ,0.892 ,0.332 ,0.092 ,0.584 +,0.804 ,0.408 ,0.036 ,0.22 ,0.02 +,0.648 ,0.52 ,0.212 ,0.34 ,0.4 +,0.38 ,0.156 ,0.464 ,0.32 ,0.944 +,0.84 ,0.98 ,0.676 ,0.396 ,0.86 +,0.884 ,0.272 ,0.712 ,0.444 ,0.24 +,0.296 ,0.956 ,0.436 ,0.096 ,0.448 +,0.796 ,0.084 ,0.872 ,0.368 ,0.828 +,0.656 ,0.192 ,0.984 ,0.668 ,0.452 +,0.992 ,0.904 ,0.572 ,0.768 ,0.42 +,0.444 ,0.42, 0 ,0.456 ,0.464 +,0.908 ,0.884 ,0.704 ,0.164 ,0.604 +,0.924 ,0.748 ,0.688 ,0.648 ,0.968 +,0.332 ,0.636 ,0.472 ,0.956 ,0.924 +,0.6 ,0.788 ,0.488 ,0.156 ,0.904 +,0.892 ,0.372 ,0.948 ,0.868 ,0.06 +,0.58 ,0.604 ,0.9 ,0.212 ,0.824 +,0.632 ,0.416 ,0.5 ,0.576 ,0.932 +,0.472 ,0.932 ,0.936 ,0.96 ,0.26 +,0.556 ,0.372 ,0.748 ,0.368 ,0.256 +,0.076 ,0.676 ,0.292 ,0.504 ,0.6 +,0.216 ,0.796 ,0.488 ,0.132 ,0.076 +,0.02 ,0.48 ,0.848 ,0.772 ,0.524 +,0.22 ,0.908 ,0.432 ,0.952 ,0.556 +,0.12 ,0.868 ,0.756 ,0.732 ,0.56 +,0.084 ,0.7 ,0.34 ,0.2 ,0.704 +,0.336 ,0.092 ,0.22 ,0.944 ,0.044 +,0.844 ,0.356 ,0.72 ,0.276 ,0.664 +,0.828 ,0.492 ,0.392 ,0.368 ,0.32 +,0.304 ,0.804 ,0.856 ,0.528 ,0.6 +,0.056 ,0.908 ,0.124 ,0.448 ,0.632 +,0.232 ,0.008 ,0.2 ,0.552 ,0.884 +,0.82 ,0.92 ,0.744 ,0.26 ,0.492 +,0.94 ,0.96 ,0.572 ,0.536 ,0.196 +,0.992 ,0.524 ,0.356 ,0.116 ,0.072 +,0.084 ,0.46 ,0.604 ,0.884 ,0.752 +,0.812 ,0.36 ,0.492 ,0.508 ,0.42 +,0.54 ,0.132 ,0.084 ,0.328 ,0.984 +,0.104 ,0.592 ,0.172 ,0.992 ,0.688 +,0.572 ,0.312 ,0.304 ,0.596 ,0.796 +,0.488 ,0.388 ,0.188 ,0.456 ,0.716 +,0.168 ,0.292 ,0.36 ,0.848 ,0.02 +,0.756 ,0.6 ,0.956 ,0.676 ,0.864 +,0.96 ,0.304 ,0.276 ,0.576 ,0.32 +,0.324 ,0.776 ,0.66 ,0.652 ,0.832 +,0.052 ,0.24 ,0.08 ,0.844 ,0.668 +,0.44 ,0.844 ,0.476 ,0.224 ,0.604 +,0.876 ,0.436 ,0.8 ,0.228 ,0.364 +,0.792 ,0.052 ,0.94 ,0.444 ,0.796 +,0.436 ,0.276 ,0.908 ,0.092 ,0.74 +,0.128 ,0.76 ,0.256 ,0.56 ,0.376 +,0.604 ,0.82 ,0.864 ,0.328 ,0.24 +,0.244 ,0.28 ,0.648 ,0.452 ,0.56 +,0.712 ,0.14 ,0.908 ,0.256 ,0.544 +,0.176 ,0.36 ,0.924 ,0.584 ,0.216 +,0.68 ,0.82 ,0.628 ,0.828 ,0.316 +,0.52 ,0.34 ,0.172 ,0.916 ,0.54 +,0.88 ,0.636 ,0.796 ,0.696 ,0.976 +,0.68 ,0.368 ,0.456 ,0.764 ,0.736 +,0.356 ,0.188 ,0.992 ,0.94 ,0.572 +,0.112 ,0.736 ,0.476 ,0.58 ,0.772 +,0.944 ,0.348 ,0.248 ,0.292 ,0.992 +,0.916 ,0.128 ,0.904 ,0.804 ,0.66 +,0.972 ,0.044 ,0.228 ,0.82 ,0.296 +,0.92 ,0.368 ,0.924 ,0.96 ,0.928 +,0.38 ,0.184 ,0.86 ,0.8 ,0.136 +,0.304 ,0.512 ,0.684 ,0.612 ,0.624 +,0.868 ,0.908 ,0.548 ,0.396 ,0.436 +,0.668 ,0.92 ,0.196 ,0.156 ,0.176 +,0.088 ,0.888 ,0.524 ,0.196 ,0.736 +,0.736 ,0.884 ,0.072 ,0.824 ,0.456 +,0.404 ,0.212 ,0.664 ,0.404 ,0.608 +,0.532 ,0.62 ,0.816 ,0.496 ,0.836 +,0.328 ,0.868 ,0.48 ,0.636 ,0.836 +,0.668 ,0.424 ,0.364 ,0.276 ,0.376 +,0.744 ,0.228 ,0.604 ,0.656 ,0.936 +,0.344 ,0.54 ,0.868 ,0.876 ,0.184 +,0.204 ,0.976 ,0.752 ,0.796 ,0.324 +,0.88 ,0.108 ,0.552 ,0.92 ,0.132 +,0.44 ,0.312 ,0.184 ,0.936 ,0.44 +,0.62 ,0.492 ,0.976 ,0.764 ,0.94 +,0.48 ,0.908 ,0.888 ,0.332 ,0.74 +,0.532 ,0.64 ,0.976 ,0.668 ,0.992 +,0.988 ,0.892 ,0.516 ,0.496 ,0.56 +,0.016 ,0.616 ,0.224 ,0.3 ,0.684 +,0.616 ,0.452 ,0.976 ,0.248 ,0.132 +,0.256 ,0.136 ,0.956 ,0.144 ,0.96 +,0.664 ,0.26 ,0.772 ,0.108 ,0.868 +,0.516 ,0.268 ,0.376 ,0.532 ,0.68 +,0.56 ,0.428 ,0.64 ,0.272 ,0.808 +,0.22 ,0.156 ,0.184 ,0.436 ,0.452 +,0.128 ,0.924 ,0.488 ,0.268 ,0.584 +,0.596 ,0.892 ,0.284 ,0.916 ,0.424 +,0.576 ,0.844 ,0.212 ,0.696 ,0.2 +,0.88 ,0.548 ,0.728 ,0.88 ,0.72 +,0.468 ,0.208 ,0.524 ,0.896 ,0.06 +,0.516 ,0.736 ,0.508 ,0.524 ,0.9 +,0.408 ,0.82 ,0.68 ,0.16 ,0.776 +,0.84 ,0.756 ,0.236 ,0.8 ,0.84 +,0.548 ,0.628 ,0.54 ,0.768 ,0.328 +,0.476 ,0.604 ,0.22 ,0.844 ,0.396 +,0.704 ,0.556 ,0.128 ,0.068 ,0.08 +,0.424 ,0.544 ,0.556 ,0.464 ,0.74 +,0.716 ,0.752 ,0.068 ,0.804 ,0.024 +,0.632 ,0.68 ,0.868 ,0.328 ,0.448 +,0.14 ,0.364 ,0.596 ,0.916 ,0.148 +,0.504 ,0.62 ,0.3 ,0.536 ,0.024 +,0.892 ,0.932 ,0.056 ,0.532 ,0.084 +,0.248 ,0.268 ,0.944 ,0.212 ,0.92 +,0.5 ,0.1 ,0.736 ,0.648 ,0.648 +,0.236 ,0.604 ,0.588 ,0.416 ,0.88 +,0.92 ,0.956 ,0.6 ,0.988 ,0.848 +,0.54 ,0.384 ,0.868 ,0.748 ,0.256 +,0.18 ,0.196 ,0.988 ,0.588 ,0.94 +,0.856 ,0.856 ,0.512 ,0.008 ,0.748 +,0.46 ,0.672 ,0.848 ,0.932 ,0.14 +,0.708 ,0.812 ,0.608 ,0.692 ,0.756 +,0.424 ,0.84 ,0.16 ,0.744 ,0.92 +,0.892 ,0.676 ,0.68 ,0.164 ,0.796 +,0.656 ,0.496 ,0.576 ,0.44 ,0.088 +,0.232 ,0.592 ,0.04 ,0.808 ,0.632 +,0.86 ,0.112 ,0.392 ,0.196 ,0.696 +,0.912 ,0.872 ,0.72 ,0.484 ,0.348 +,0.424 ,0.556 ,0.408 ,0.612 ,0.592 +,0.636 ,0.584 ,0.088 ,0.28 ,0.444 +,0.332 ,0.444 ,0.952 ,0.172 ,0.664 +,0.008 ,0.468 ,0.624 ,0.7 ,0.3 +,0.356 ,0.516 ,0.308 ,0.964 ,0.38 +,0.984 ,0.956 ,0.96 ,0.604 ,0.044 +,0.436 ,0.956 ,0.192 ,0.24 ,0.164 +,0.888 ,0.904 ,0.98 ,0.924 ,0.584 +,0.22 ,0.988 ,0.644 ,0.644 ,0.652 +,0.712 ,0.676 ,0.136 ,0.144 ,0.656 +,0.548 ,0.836 ,0.804 ,0.856 ,0.492 +,0.86 ,0.744 ,0.808 ,0.404 ,0.62 +,0.772 ,0.852 ,0.712 ,0.44 ,0.5 +,0.768 ,0.728 ,0.276 ,0.776 ,0.316 +,0.396 ,0.656 ,0.676 ,0.764 ,0.4 +,0.988 ,0.276 ,0.952 ,0.32 ,0.552 +,0.976 ,0.244 ,0.676 ,0.916 ,0.204 +,0.152 ,0.548 ,0.708 ,0.764 ,0.524 +,0.564 ,0.244 ,0.656 ,0.928 ,0.068 +,0.984 ,0.524 ,0.9 ,0.792 ,0.636 +,0.488 ,0.56 ,0.352 ,0.452 ,0.328 +,0.504 ,0.348 ,0.804 ,0.272 ,0.348 +,0.6 ,0.972 ,0.816 ,0.208 ,0.28 +,0.652 ,0.944 ,0.468 ,0.1 ,0.676 +,0.7 ,0.664 ,0.948 ,0.688 ,0.112 +,0.816 ,0.088 ,0.572 ,0.236 ,0.912 +,0.408 ,0.752 ,0.532 ,0.84 ,0.464 +,0.292 ,0.052 ,0.088 ,0.784 ,0.396 +,0.592 ,0.652 ,0.3 ,0.24 ,0.588 +,0.936 ,0.084 ,0.696 ,0.74 ,0.516 +,0.952 ,0.684 ,0.564 ,0.636 ,0.968 +,0.184 ,0.3 ,0.256 ,0.804 ,0.64 +,0.256 ,0.844 ,0.8 ,0.992 ,0.66 +,0.492 ,0.428 ,0.94 ,0.064 ,0.748 +,0.424 ,0.212 ,0.092 ,0.076 ,0.144 +,0.776 ,0.228 ,0.48 ,0.596 ,0.324 +,0.348 ,0.804 ,0.812 ,0.944 ,0.976 +,0.864 ,0.956 ,0.996 ,0.54 ,0.736 +,0.408 ,0.172 ,0.732 ,0.876 ,0.564 +,0.028 ,0.2 ,0.444 ,0.612 ,0.252 +,0.584 ,0.208 ,0.992 ,0.32 ,0.684 +,0.144 ,0.38 ,0.852 ,0.084 ,0.292 +,0.576 ,0.504 ,0.532 ,0.788 ,0.768 +,0.664 ,0.86 ,0.728 ,0.556 ,0.664 +,0.28 ,0.588 ,0.48 ,0.616 ,0.576 +,0.796 ,0.412 ,0.596 ,0.216 ,0.972 +,0.952 ,0.572 ,0.836 ,0.772 ,0.672 +,0.176 ,0.96 ,0.892 ,0.04 ,0.416 +,0.808 ,0.78 ,0.68 ,0.896 ,0.424 +,0.404 ,0.556 ,0.824 ,0.004 ,0.816 +,0.632 ,0.06 ,0.708 ,0.352 ,0.136 +,0.416 ,0.78 ,0.94 ,0.872 ,0.128 +,0.072 ,0.74 ,0.96 ,0.308 ,0.472 +,0.252 ,0.112 ,0.376 ,0.816 ,0.408 +,0.332 ,0.964 ,0.364 ,0.624 ,0.728 +,0.764 ,0.088 ,0.024 ,0.052 ,0.032 +,0.348 ,0.388 ,0.672 ,0.816 ,0.188 +,0.064 ,0.62 ,0.744 ,0.408 ,0.572 +,0.672 ,0.06 ,0.38 ,0.92 ,0.676 +,0.848 ,0.756 ,0.504 ,0.92 ,0.092 +,0.532 ,0.928 ,0.076 ,0.552 ,0.572 +,0.28 ,0.916 ,0.788 ,0.312 ,0.868 +,0.536 ,0.448 ,0.724 ,0.032 ,0.38 +,0.828 ,0.836 ,0.328 ,0.18 ,0.544 +,0.228 ,0.352 ,0.572 ,0.4 ,0.872 +,0.5 ,0.36 ,0.48 ,0.324 ,0.656 +,0.96 ,0.484 ,0.152 ,0.744 ,0.804 +,0.908 ,0.844 ,0.216 ,0.968 ,0.784 +,0.476 ,0.84 ,0.384 ,0.26 ,0.576 +,0.444 ,0.472 ,0.636 ,0.272 ,0.8 +,0.476 ,0.512 ,0.54 ,0.512 ,0.96 +,0.272 ,0.764 ,0.324 ,0.952 ,0.604 +,0.568 ,0.764 ,0.912 ,0.652 ,0.988 +,0.192 ,0.408 ,0.684 ,0.208 ,0.164 +,0.16 ,0.36 ,0.22 ,0.512 ,0.636 +,0.372 ,0.376 ,0.548 ,0.636 ,0.824 +,0.132 ,0.308 ,0.72 ,0.916 ,0.688 +,0.556 ,0.556 ,0.608 ,0.708 ,0.22 +,0.308 ,0.272 ,0.612 ,0.936 ,0.5 +,0.608 ,0.956 ,0.76 ,0.832 ,0.668 +,0.776 ,0.852 ,0.728 ,0.812 ,0.892 +,0.408 ,0.96 ,0.708 ,0.744 ,0.408 +,0.832 ,0.556 ,0.764 ,0.116 ,0.896 +,0.052 ,0.452 ,0.9 ,0.232 ,0.484 +,0.776 ,0.672 ,0.536 ,0.252 ,0.504 +,0.044 ,0.584 ,0.908 ,0.96 ,0.932 +,0.24 ,0.824 ,0.84 ,0.672 ,0.856 +,0.116 ,0.104 ,0.912 ,0.648 ,0.852 +,0.644 ,0.612 ,0.82 ,0.408 ,0.86 +,0.724 ,0.684 ,0.68 ,0.516 ,0.78 +,0.632 ,0.432 ,0.98 ,0.956 ,0.216 +,0.228 ,0.576 ,0.304 ,0.4 ,0.448 +,0.616 ,0.292 ,0.412 ,0.572 ,0.136 +,0.568 ,0.488 ,0.228 ,0.46 ,0.5 +,0.452 ,0.804 ,0.596 ,0.076 ,0.22 +,0.92 ,0.868 ,0.492 ,0.428 ,0.524 +,0.424 ,0.212 ,0.512 ,0.592 ,0.604 +,0.784 ,0.688 ,0.48 ,0.588 ,0.564 +,0.052 ,0.484 ,0.176 ,0.932 ,0.196 +,0.968 ,0.744 ,0.9 ,0.648 ,0.832 +,0.836 ,0.22 ,0.632 ,0.804 ,0.436 +,0.184 ,0.588 ,0.864 ,0.884 ,0.82 +,0.696 ,0.58 ,0.768 ,0.584 ,0.148 +,0.66 ,0.696 ,0.268 ,0.04 ,0.716 +,0.116 ,0.536 ,0.988 ,0.704 ,0.612 +,0.872 ,0.052 ,0.352 ,0.624 ,0.624 +,0.332 ,0.044 ,0.8 ,0.476 ,0.992 +,0.788 ,0.872 ,0.276 ,0.208 ,0.632 +,0.804 ,0.644 ,0.4 ,0.868 ,0.708 +,0.636 ,0.088 ,0.036 ,0.936 ,0.04 +,0.804 ,0.532 ,0.396 ,0.284 ,0.652 +,0.928 ,0.768 ,0.82 ,0.668 ,0.408 +,0.944 ,0.972 ,0.84 ,0.608 ,0.652 +,0.304 ,0.316 ,0.728 ,0.968 ,0.804 +,0.284 ,0.644 ,0.244 ,0.532 ,0.076 +,0.748 ,0.048 ,0.384 ,0.424 ,0.752 +,0.748 ,0.984 ,0.148 ,0.58 ,0.58 +,0.28 ,0.248 ,0.7 ,0.572 ,0.648 +,0.632 ,0.276 ,0.888 ,0.932 ,0.572 +,0.308 ,0.072 ,0.5 ,0.392 ,0.068 +,0.468 ,0.856 ,0.232 ,0.268,1 +,0.164 ,0.484 ,0.032 ,0.852 ,0.86 +,0.912 ,0.272 ,0.724 ,0.536 ,0.304 +,0.456 ,0.752 ,0.344 ,0.18 ,0.968 +,0.676 ,0.204 ,0.664 ,0.948 ,0.504 +,0.696 ,0.468 ,0.508 ,0.324 ,0.292 +,0.364 ,0.476 ,0.152 ,0.272 ,0.948 +,0.684 ,0.352 ,0.26 ,0.836 ,0.084 +,0.912 ,0.876 ,0.944 ,0.164 ,0.504 +,0.704 ,0.604 ,0.216 ,0.248 ,0.536 +,0.112 ,0.48 ,0.976 ,0.94 ,0.648 +,0.764 ,0.664 ,0.54 ,0.012 ,0.424 +,0.02 ,0.48 ,0.512 ,0.02 ,0.436 +,0.42 ,0.072 ,0.54 ,0.76 ,0.576 +,0.192 ,0.096 ,0.436 ,0.58 ,0.86 +,0.196 ,0.368 ,0.984 ,0.552 ,0.632 +,0.72 ,0.644 ,0.952 ,0.228 ,0.056 +,0.376 ,0.368 ,0.04 ,0.316 ,0.464) +cp=hd(p.table,alpha) +pv=NULL +if(!is.null(p.obs))w=optimize(hdpv,interval=c(.001,.999),dat=p.table,obs=p.obs)$minimum +list(crit.p.value=cp,adj.p.value=w) +} +p.crit.n60<-function(alpha, p.obs = NULL){ +p.table=c(0.24 ,0.656 ,0.648 ,0.364 ,0.856 +,0.052 ,0.476 ,0.304 ,0.216 ,0.72 +,0.476 ,0.908 ,0.912 ,0.216 ,0.212 +,0.476 ,0.488 ,0.868 ,0.072 ,0.088 +,0.504 ,0.26 ,0.192 ,0.244 ,0.512 +,0.432 ,0.32 ,0.236, 1 ,0.396 +,0.504 ,0.4 ,0.756 ,0.996 ,0.08 +,0.14 ,0.224 ,0.964 ,0.492 ,0.94 +,0.528 ,0.652 ,0.884 ,0.84 ,0.648 +,0.592 ,0.844 ,0.572 ,0.104 ,0.712 +,0.948 ,0.612 ,0.88 ,0.684 ,0.552 +,0.716 ,0.156 ,0.996 ,0.296 ,0.62 +,0.02 ,0.164 ,0.532 ,0.372 ,0.104 +,0.78 ,0.996 ,0.84 ,0.552 ,0.588 +,0.668 ,0.088 ,0.78 ,0.76 ,0.708 +,0.208 ,0.976 ,0.336 ,0.052 ,0.904 +,0.648 ,0.588 ,0.668 ,0.108 ,0.996 +,0.808 ,0.824 ,0.312 ,0.808 ,0.936 +,0.616 ,0.212 ,0.496 ,0.628 ,0.736 +,0.152 ,0.24 ,0.504 ,0.964 ,0.808 +,0.528 ,0.232 ,0.2 ,0.356 ,0.26 +,0.984 ,0.832 ,0.424 ,0.584 ,0.2 +,0.356 ,0.432 ,0.568 ,0.348 ,0.784 +,0.364 ,0.368 ,0.92 ,0.124 ,0.556 +,0.096 ,0.828 ,0.676 ,0.752 ,0.724 +,0.724 ,0.168 ,0.524 ,0.064 ,0.876 +,0.112 ,0.408 ,0.544 ,0.56 ,0.104 +,0.288 ,0.808 ,0.116 ,0.54 ,0.008 +,0.988 ,0.46 ,0.616 ,0.644 ,0.64 +,0.996 ,0.36 ,0.472 ,0.544 ,0.316 +,0.848 ,0.868 ,0.872 ,0.46 ,0.816 +,0.16 ,0.444 ,0.688 ,0.008 ,0.96 +,0.296 ,0.132 ,0.868 ,0.452 ,0.352 +,0.996 ,0.696 ,0.816 ,0.668 ,0.748 +,0.024 ,0.968 ,0.692 ,0.8 ,0.2 +,0.548 ,0.632 ,0.824 ,0.668 ,0.216 +,0.228 ,0.336 ,0.388 ,0.824 ,0.824 +,0.376 ,0.728 ,0.72 ,0.932 ,0.1 +,0.136 ,0.1 ,0.96 ,0.988 ,0.516 +,0.86 ,0.576 ,0.952 ,0.78 ,0.84 +,0.948 ,0.94 ,0.536 ,0.704 ,0.816 +,0.352 ,0.164 ,0.716 ,0.264 ,0.94 +,0.228 ,0.404 ,0.704 ,0.744 ,0.308 +,0.156 ,0.468 ,0.124 ,0.708 ,0.676 +,0.432 ,0.472 ,0.244 ,0.124 ,0.124 +,0.908 ,0.36 ,0.668 ,0.34 ,0.8 +,0.48 ,0.112 ,0.792 ,0.428 ,0.724 +,0.28 ,0.724 ,0.768 ,0.972 ,0.524 +,0.436 ,0.008 ,0.664 ,0.648 ,0.704 +,0.94 ,0.12 ,0.308 ,0.884 ,0.824 +,0.248 ,0.112 ,0.572 ,0.492 ,0.052 +,0.664 ,0.788 ,0.604 ,0.344 ,0.288 +,0.996 ,0.696 ,0.996 ,0.852 ,0.28 +,0.004 ,0.276 ,0.732 ,0.964 ,0.248 +,0.456 ,0.044 ,0.232 ,0.776 ,0.196 +,0.344 ,0.248 ,0.84 ,0.716 ,0.764 +,0.628 ,0.312 ,0.616 ,0.352 ,0.944 +,0.156 ,0.032 ,0.948 ,0.532 ,0.3 +,0.792 ,0.844 ,0.148 ,0.224 ,0.512 +,0.328 ,0.104 ,0.344 ,0.652 ,0.932 +,0.972 ,0.356 ,0.168 ,0.284 ,0.364 +,0.276 ,0.68 ,0.376 ,0.436 ,0.016 +,0.936 ,0.416 ,0.212 ,0.664 ,0.824, +1 ,0.9 ,0.652 ,0.836 ,0.2 +,0.036 ,0.072 ,0.88 ,0.748 ,0.668 +,0.964 ,0.6 ,0.772 ,0.288 ,0.968 +,0.484 ,0.928 ,0.436 ,0.588 ,0.976 +,0.364 ,0.508 ,0.064 ,0.784 ,0.884 +,0.54 ,0.08 ,0.252 ,0.768 ,0.156 +,0.872, 0 ,0.672 ,0.572 ,0.94 +,0.272 ,0.936 ,0.792 ,0.824 ,0.092 +,0.884 ,0.492 ,0.336 ,0.724 ,0.64 +,0.124 ,0.896 ,0.308 ,0.224 ,0.64 +,0.932 ,0.712 ,0.1 ,0.884 ,0.76 +,0.808 ,0.1 ,0.384 ,0.416 ,0.828 +,0.992 ,0.6 ,0.288 ,0.02 ,0.392 +,0.34 ,0.9 ,0.444 ,0.892 ,0.76 +,0.964 ,0.88 ,0.428 ,0.612 ,0.728 +,0.104 ,0.268 ,0.488 ,0.348 ,0.488 +,0.208 ,0.52 ,0.96 ,0.572 ,0.18 +,0.976 ,0.812 ,0.668 ,0.064 ,0.768 +,0.98 ,0.484 ,0.84 ,0.876 ,0.132 +,0.56 ,0.392 ,0.536 ,0.48 ,0.096 +,0.608 ,0.556 ,0.196 ,0.884 ,0.744 +,0.944 ,0.432 ,0.104 ,0.444 ,0.324 +,0.896 ,0.472 ,0.716 ,0.792 ,0.984 +,0.528 ,0.472 ,0.716 ,0.368 ,0.644 +,0.432 ,0.288 ,0.004 ,0.716 ,0.688 +,0.684 ,0.94 ,0.772 ,0.856 ,0.16 +,0.784 ,0.244 ,0.412 ,0.384 ,0.264 +,0.028 ,0.128 ,0.14 ,0.328 ,0.896 +,0.224 ,0.676 ,0.32 ,0.668 ,0.156 +,0.852 ,0.648 ,0.232 ,0.6 ,0.804 +,0.692 ,0.528 ,0.728 ,0.588 ,0.64 +,0.252 ,0.304 ,0.8 ,0.052 ,0.492 +,0.76 ,0.4 ,0.708 ,0.628 ,0.888 +,0.992 ,0.22 ,0.276 ,0.64 ,0.644 +,0.232 ,0.476 ,0.584 ,0.72 ,0.5 +,0.688 ,0.08 ,0.208 ,0.488 ,0.836 +,0.708, 1 ,0.948 ,0.364 ,0.424 +,0.636 ,0.94 ,0.308 ,0.172 ,0.352 +,0.536 ,0.112 ,0.968 ,0.832 ,0.192 +,0.36 ,0.888 ,0.552 ,0.784 ,0.376 +,0.956 ,0.2 ,0.504 ,0.676 ,0.224 +,0.66 ,0.172 ,0.256 ,0.664 ,0.324 +,0.648 ,0.936 ,0.676 ,0.184 ,0.552 +,0.648 ,0.812 ,0.304 ,0.464 ,0.564 +,0.264 ,0.14 ,0.088 ,0.52 ,0.516 +,0.404 ,0.464 ,0.852 ,0.2 ,0.66 +,0.668 ,0.228 ,0.024 ,0.5 ,0.436 +,0.712 ,0.776 ,0.264 ,0.576 ,0.956 +,0.912 ,0.892 ,0.864 ,0.268 ,0.56 +,0.528 ,0.368 ,0.036 ,0.536 ,0.412 +,0.784 ,0.776 ,0.484 ,0.7 ,0.016 +,0.732 ,0.672 ,0.544 ,0.312 ,0.128 +,0.292 ,0.832 ,0.34 ,0.74 ,0.916 +,0.764 ,0.34 ,0.256 ,0.164 ,0.704 +,0.552 ,0.3 ,0.032 ,0.736 ,0.62 +,0.192 ,0.532 ,0.38 ,0.552 ,0.264 +,0.504 ,0.148 ,0.856 ,0.16 ,0.252 +,0.236 ,0.048 ,0.144 ,0.08 ,0.912 +,0.08 ,0.828 ,0.072 ,0.708 ,0.5 +,0.324 ,0.196 ,0.48 ,0.304 ,0.28 +,0.764 ,0.516 ,0.42 ,0.948 ,0.352 +,0.408 ,0.248 ,0.212 ,0.596 ,0.252 +,0.54 ,0.008 ,0.984 ,0.512 ,0.46 +,0.716 ,0.396 ,0.288 ,0.416 ,0.544 +,0.164 ,0.676 ,0.636 ,0.68 ,0.52 +,0.108 ,0.484 ,0.936 ,0.892 ,0.292 +,0.988 ,0.808 ,0.432 ,0.712 ,0.96 +,0.296 ,0.956 ,0.4 ,0.576 ,0.52 +,0.328 ,0.708 ,0.272 ,0.58 ,0.832 +,0.476 ,0.544 ,0.676 ,0.636 ,0.096 +,0.712 ,0.076 ,0.98 ,0.584 ,0.816 +,0.524 ,0.08 ,0.244 ,0.392 ,0.784 +,0.588 ,0.256 ,0.372 ,0.212 ,0.512 +,0.936 ,0.176 ,0.636 ,0.088 ,0.62 +,0.928 ,0.608 ,0.564 ,0.54 ,0.152 +,0.736 ,0.732 ,0.1 ,0.412 ,0.596 +,0.58 ,0.044 ,0.792 ,0.876 ,0.464 +,0.88 ,0.732 ,0.112 ,0.304 ,0.88 +,0.748 ,0.944 ,0.64 ,0.74 ,0.26 +,0.184 ,0.532 ,0.256 ,0.172 ,0.808 +,0.828 ,0.44 ,0.976 ,0.356 ,0.316, +1 ,0.736, 1 ,0.116 ,0.292 +,0.908 ,0.86 ,0.344 ,0.236 ,0.476 +,0.444 ,0.64 ,0.668 ,0.98 ,0.416 +,0.344 ,0.916 ,0.84 ,0.5 ,0.548 +,0.92 ,0.68 ,0.82 ,0.956 ,0.368 +,0.764 ,0.536 ,0.192 ,0.272 ,0.892 +,0.148 ,0.2 ,0.644 ,0.252 ,0.468 +,0.304 ,0.248 ,0.96 ,0.768 ,0.632 +,0.268 ,0.588 ,0.54 ,0.444 ,0.548 +,0.856 ,0.732 ,0.884 ,0.672 ,0.324 +,0.62 ,0.524 ,0.568 ,0.08 ,0.992 +,0.744 ,0.484 ,0.628 ,0.644 ,0.612, +0 ,0.592 ,0.948 ,0.128 ,0.892 +,0.972 ,0.108 ,0.68 ,0.72 ,0.876 +,0.328 ,0.236 ,0.612 ,0.716 ,0.912 +,0.168 ,0.612 ,0.12 ,0.676 ,0.58 +,0.504 ,0.34 ,0.964 ,0.256 ,0.668 +,0.584 ,0.388 ,0.016 ,0.796 ,0.68 +,0.628 ,0.476 ,0.776 ,0.696 ,0.348 +,0.656 ,0.036 ,0.036 ,0.596 ,0.824 +,0.464 ,0.732 ,0.58 ,0.364 ,0.38 +,0.632 ,0.488 ,0.108 ,0.832 ,0.856 +,0.448 ,0.272 ,0.932 ,0.388 ,0.788 +,0.476 ,0.576 ,0.776 ,0.672 ,0.312 +,0.7 ,0.86 ,0.784 ,0.988 ,0.328 +,0.792 ,0.196 ,0.236 ,0.344 ,0.396 +,0.596 ,0.98 ,0.972 ,0.492 ,0.624 +,0.68 ,0.744 ,0.996 ,0.548 ,0.976 +,0.828 ,0.764 ,0.784 ,0.408 ,0.768 +,0.452 ,0.232 ,0.572 ,0.112 ,0.468 +,0.96 ,0.772 ,0.668 ,0.928 ,0.788 +,0.832 ,0.128 ,0.104 ,0.652 ,0.972 +,0.888 ,0.852 ,0.572 ,0.484 ,0.272 +,0.44 ,0.948 ,0.352 ,0.684 ,0.932 +,0.46 ,0.576 ,0.044 ,0.456 ,0.972 +,0.904 ,0.784 ,0.048 ,0.312 ,0.352 +,0.844 ,0.8 ,0.616 ,0.676 ,0.604 +,0.836 ,0.936 ,0.732 ,0.728 ,0.372 +,0.34 ,0.344 ,0.988 ,0.312 ,0.688 +,0.04 ,0.272 ,0.632 ,0.576 ,0.244 +,0.188 ,0.46 ,0.78 ,0.648 ,0.248 +,0.308 ,0.02 ,0.712 ,0.572 ,0.392 +,0.884 ,0.64 ,0.048 ,0.34 ,0.724 +,0.832 ,0.932 ,0.664 ,0.336 ,0.8 +,0.52 ,0.352 ,0.844 ,0.084 ,0.424 +,0.548 ,0.692 ,0.188 ,0.364 ,0.872 +,0.136 ,0.084 ,0.952 ,0.652 ,0.592 +,0.028 ,0.836 ,0.804 ,0.536 ,0.984 +,0.932 ,0.984 ,0.628 ,0.84 ,0.8 +,0.496 ,0.196 ,0.664 ,0.608 ,0.416 +,0.032 ,0.448 ,0.348 ,0.984 ,0.912 +,0.448 ,0.856 ,0.588 ,0.896 ,0.776 +,0.092 ,0.728 ,0.928 ,0.496 ,0.432 +,0.056 ,0.468 ,0.364 ,0.488 ,0.224 +,0.788 ,0.084 ,0.096 ,0.788 ,0.62 +,0.22 ,0.804 ,0.18 ,0.532 ,0.368 +,0.496 ,0.476 ,0.904 ,0.156 ,0.744 +,0.344 ,0.396 ,0.152 ,0.644 ,0.5 +,0.888 ,0.276 ,0.756 ,0.604 ,0.76 +,0.92 ,0.412 ,0.872 ,0.536 ,0.612 +,0.54 ,0.216 ,0.668 ,0.6 ,0.148 +,0.78 ,0.672 ,0.472 ,0.816 ,0.844 +,0.964 ,0.42 ,0.824 ,0.78 ,0.296 +,0.956 ,0.104 ,0.704 ,0.288 ,0.92 +,0.984 ,0.228 ,0.528 ,0.804 ,0.5 +,0.668 ,0.356 ,0.86 ,0.536 ,0.412 +,0.028 ,0.04 ,0.284 ,0.536 ,0.348 +,0.524 ,0.796 ,0.872 ,0.912 ,0.216 +,0.496 ,0.912 ,0.684 ,0.18 ,0.14 +,0.132 ,0.48 ,0.64 ,0.524 ,0.992 +,0.416 ,0.764 ,0.484 ,0.848 ,0.788 +,0.764 ,0.588 ,0.284 ,0.7 ,0.056 +,0.76 ,0.408 ,0.664 ,0.744 ,0.104 +,0.86 ,0.96 ,0.66 ,0.816 ,0.256 +,0.892 ,0.62 ,0.692 ,0.832 ,0.592 +,0.052 ,0.84 ,0.96 ,0.888 ,0.828 +,0.244 ,0.388 ,0.036 ,0.188 ,0.34 +,0.94 ,0.304 ,0.472 ,0.436 ,0.728 +,0.636 ,0.796 ,0.836 ,0.748 ,0.328 +,0.452 ,0.248 ,0.352 ,0.212 ,0.892 +,0.8 ,0.892 ,0.816 ,0.708 ,0.356 +,0.344 ,0.5 ,0.232 ,0.548 ,0.744 +,0.56 ,0.28 ,0.964 ,0.284 ,0.352 +,0.268 ,0.908 ,0.924 ,0.664 ,0.788 +,0.56 ,0.616 ,0.748 ,0.208 ,0.476 +,0.632 ,0.88 ,0.364 ,0.192 ,0.824 +,0.368 ,0.188 ,0.86 ,0.872 ,0.196 +,0.86 ,0.204 ,0.616 ,0.572 ,0.56 +,0.44 ,0.972 ,0.952 ,0.812 ,0.268 +,0.14 ,0.276 ,0.736 ,0.6 ,0.312 +,0.404 ,0.636 ,0.376 ,0.064 ,0.416 +,0.4 ,0.248 ,0.904 ,0.412 ,0.748 +,0.316 ,0.064 ,0.524 ,0.632 ,0.588 +,0.536 ,0.656 ,0.768 ,0.68 ,0.14 +,0.448 ,0.568 ,0.708 ,0.156 ,0.628 +,0.596 ,0.84 ,0.788 ,0.052 ,0.14 +,0.724 ,0.856 ,0.544 ,0.616 ,0.544 +,0.496 ,0.268 ,0.776 ,0.316 ,0.152 +,0.72 ,0.684 ,0.1 ,0.84 ,0.04 +,0.788 ,0.948 ,0.868 ,0.196 ,0.508 +,0.212 ,0.412 ,0.872 ,0.652 ,0.072 +,0.412 ,0.988 ,0.86 ,0.912 ,0.156 +,0.432 ,0.54 ,0.492 ,0.624 ,0.508 +,0.508 ,0.992 ,0.852 ,0.788 ,0.244 +,0.632 ,0.824 ,0.112 ,0.036 ,0.184 +,0.668 ,0.756 ,0.884 ,0.788 ,0.52 +,0.44 ,0.084 ,0.932 ,0.752 ,0.6 +,0.656 ,0.84 ,0.832 ,0.6 ,0.664 +,0.96 ,0.664 ,0.984 ,0.652 ,0.832 +,0.928 ,0.592 ,0.44 ,0.304 ,0.86 +,0.04 ,0.724 ,0.636 ,0.188 ,0.96 +,0.9 ,0.564 ,0.28 ,0.34 ,0.492 +,0.456 ,0.988 ,0.328 ,0.612 ,0.76 +,0.964 ,0.736 ,0.744 ,0.64 ,0.272 +,0.436 ,0.064 ,0.688 ,0.416 ,0.956 +,0.824 ,0.14 ,0.648 ,0.492 ,0.072 +,0.244 ,0.436 ,0.728 ,0.48 ,0.188 +,0.168 ,0.432 ,0.46 ,0.044 ,0.824 +,0.924 ,0.336 ,0.328 ,0.224 ,0.96 +,0.552 ,0.948 ,0.712 ,0.96 ,0.396 +,0.716 ,0.16 ,0.176 ,0.988 ,0.62 +,0.892 ,0.18 ,0.576 ,0.264 ,0.924 +,0.16 ,0.356 ,0.724 ,0.772 ,0.532 +,0.944 ,0.744 ,0.268 ,0.184 ,0.972 +,0.968 ,0.044 ,0.732 ,0.732 ,0.616 +,0.716 ,0.804 ,0.86 ,0.344 ,0.704 +,0.932 ,0.568 ,0.356 ,0.944 ,0.232 +,0.452 ,0.212 ,0.692 ,0.88 ,0.86 +,0.4 ,0.904 ,0.008 ,0.528 ,0.576 +,0.86 ,0.696 ,0.116 ,0.444 ,0.472 +,0.224 ,0.372 ,0.248 ,0.68 ,0.104 +,0.476 ,0.228 ,0.532 ,0.24 ,0.648 +,0.828 ,0.416 ,0.08 ,0.48 ,0.76 +,0.3 ,0.556 ,0.628 ,0.396 ,0.864 +,0.272 ,0.564 ,0.984 ,0.312 ,0.968 +,0.448 ,0.044 ,0.664 ,0.408 ,0.732 +,0.464 ,0.532 ,0.76 ,0.712 ,0.132 +,0.324 ,0.936 ,0.872 ,0.768 ,0.432 +,0.848 ,0.464 ,0.72 ,0.496 ,0.464 +,0.752 ,0.808 ,0.372 ,0.204 ,0.604 +,0.432 ,0.128 ,0.268 ,0.336 ,0.728 +,0.824 ,0.212 ,0.704 ,0.172 ,0.408 +,0.9 ,0.924 ,0.448 ,0.912 ,0.688 +,0.748 ,0.672 ,0.044 ,0.704 ,0.568 +,0.356 ,0.116 ,0.94 ,0.688 ,0.948 +,0.776 ,0.664 ,0.732 ,0.108 ,0.72 +,0.24 ,0.964 ,0.42 ,0.412 ,0.764 +,0.104 ,0.868 ,0.308 ,0.62 ,0.608 +,0.404 ,0.6 ,0.664 ,0.152 ,0.432 +,0.544 ,0.26 ,0.604 ,0.584 ,0.64 +,0.404 ,0.08 ,0.244 ,0.452 ,0.78 +,0.7 ,0.896 ,0.66, 0 ,0.5 +,0.368 ,0.468 ,0.432 ,0.76 ,0.68 +,0.528 ,0.548 ,0.076 ,0.448 ,0.288 +,0.592 ,0.64 ,0.668 ,0.548 ,0.32 +,0.852 ,0.916 ,0.9 ,0.696 ,0.6 +,0.968 ,0.22 ,0.992 ,0.456 ,0.788 +,0.168 ,0.988 ,0.896 ,0.268 ,0.552 +,0.596 ,0.54 ,0.384 ,0.596 ,0.896 +,0.896 ,0.14 ,0.588 ,0.52 ,0.508 +,0.784 ,0.892 ,0.548 ,0.652 ,0.34 +,0.94 ,0.78 ,0.76 ,0.8 ,0.22 +,0.78 ,0.428 ,0.504 ,0.592 ,0.084 +,0.928 ,0.324 ,0.664 ,0.732 ,0.784 +,0.98 ,0.38 ,0.812 ,0.236 ,0.092 +,0.156 ,0.712 ,0.424 ,0.776 ,0.612 +,0.156 ,0.544 ,0.332 ,0.292 ,0.644 +,0.804 ,0.42 ,0.368 ,0.004 ,0.74 +,0.52 ,0.472 ,0.06 ,0.664 ,0.572 +,0.684 ,0.592 ,0.476 ,0.116 ,0.296 +,0.564 ,0.24 ,0.556 ,0.488 ,0.588 +,0.168 ,0.324 ,0.408 ,0.284 ,0.472 +,0.56 ,0.752 ,0.76 ,0.992 ,0.16 +,0.42 ,0.564 ,0.984 ,0.82 ,0.72 +,0.356 ,0.328 ,0.96 ,0.356 ,0.644 +,0.268 ,0.544 ,0.104 ,0.84 ,0.972 +,0.556 ,0.248 ,0.04 ,0.372 ,0.592 +,0.588 ,0.468 ,0.968 ,0.44 ,0.64 +,0.216 ,0.792 ,0.476 ,0.724 ,0.068 +,0.472 ,0.992 ,0.484 ,0.888 ,0.908 +,0.376 ,0.84 ,0.468 ,0.748 ,0.356 +,0.712 ,0.628 ,0.912 ,0.496 ,0.648 +,0.124 ,0.396 ,0.508 ,0.312 ,0.128 +,0.788 ,0.92 ,0.468 ,0.372 ,0.216 +,0.54 ,0.84 ,0.608 ,0.464 ,0.744 +,0.336 ,0.184 ,0.496 ,0.44 ,0.444 +,0.136 ,0.504 ,0.568 ,0.852 ,0.804 +,0.376 ,0.708 ,0.676 ,0.476 ,0.708 +,0.06 ,0.98 ,0.436 ,0.796 ,0.448 +,0.46 ,0.452 ,0.144 ,0.504 ,0.592 +,0.848 ,0.628 ,0.5 ,0.784 ,0.492 +,0.444 ,0.196 ,0.876 ,0.832 ,0.636 +,0.24 ,0.908 ,0.484 ,0.544 ,0.808 +,0.256 ,0.664 ,0.272 ,0.716 ,0.196 +,0.272 ,0.484 ,0.94 ,0.168 ,0.956 +,0.856 ,0.82, 0 ,0.868 ,0.796 +,0.44 ,0.656 ,0.82 ,0.208 ,0.924 +,0.352 ,0.832 ,0.844 ,0.324 ,0.216 +,0.832 ,0.348 ,0.904 ,0.244 ,0.324 +,0.816 ,0.64 ,0.892 ,0.116 ,0.392 +,0.472 ,0.772 ,0.464 ,0.556 ,0.892 +,0.232 ,0.224 ,0.788 ,0.5 ,0.04 +,0.532 ,0.284 ,0.62 ,0.464 ,0.924 +,0.804 ,0.252 ,0.224 ,0.38 ,0.244 +,0.076 ,0.424 ,0.988 ,0.78 ,0.324 +,0.076 ,0.496 ,0.844 ,0.496 ,0.288 +,0.556 ,0.696 ,0.76 ,0.352 ,0.952 +,0.26 ,0.752 ,0.084 ,0.08 ,0.324 +,0.776 ,0.632 ,0.712 ,0.868 ,0.12 +,0.808 ,0.76 ,0.444 ,0.664 ,0.16 +,0.308 ,0.912 ,0.16 ,0.04 ,0.692 +,0.336 ,0.672 ,0.664 ,0.556 ,0.876 +,0.172 ,0.52 ,0.188 ,0.904 ,0.552 +,0.32 ,0.292 ,0.216 ,0.58 ,0.988 +,0.724 ,0.704 ,0.212 ,0.74 ,0.348 +,0.912 ,0.592 ,0.108 ,0.332 ,0.536 +,0.22 ,0.452 ,0.124 ,0.98 ,0.364 +,0.908 ,0.272 ,0.564 ,0.556 ,0.668 +,0.432 ,0.928 ,0.596 ,0.992 ,0.456 +,0.832 ,0.624 ,0.832 ,0.064 ,0.52 +,0.096 ,0.492 ,0.62 ,0.416 ,0.448 +,0.928 ,0.68 ,0.976 ,0.192 ,0.728 +,0.496 ,0.756 ,0.372 ,0.18 ,0.196 +,0.808 ,0.816 ,0.996 ,0.352 ,0.648 +,0.48 ,0.172 ,0.94 ,0.864 ,0.844 +,0.228 ,0.4 ,0.12 ,0.12 ,0.344 +,0.952 ,0.632 ,0.376 ,0.264 ,0.88 +,0.252 ,0.332 ,0.56 ,0.756 ,0.468 +,0.144 ,0.26 ,0.316 ,0.528 ,0.224 +,0.512 ,0.568 ,0.724 ,0.912 ,0.384 +,0.616 ,0.304 ,0.652 ,0.2 ,0.996 +,0.292 ,0.36 ,0.788 ,0.612 ,0.768 +,0.748 ,0.624 ,0.664 ,0.696 ,0.792 +,0.64 ,0.176 ,0.78 ,0.8 ,0.6 +,0.108 ,0.568 ,0.168 ,0.92 ,0.044 +,0.872 ,0.848 ,0.296 ,0.9 ,0.648 +,0.544 ,0.124 ,0.66 ,0.664 ,0.28 +,0.164 ,0.564 ,0.768 ,0.552 ,0.852 +,0.508 ,0.652 ,0.8 ,0.532 ,0.596 +,0.204 ,0.036 ,0.22 ,0.076 ,0.972 +,0.684 ,0.148 ,0.248 ,0.24 ,0.948 +,0.356 ,0.06 ,0.684 ,0.244 ,0.516 +,0.192 ,0.912 ,0.388 ,0.656 ,0.852 +,0.644 ,0.704 ,0.976 ,0.9 ,0.664 +,0.98 ,0.744 ,0.156 ,0.676 ,0.78 +,0.936 ,0.78 ,0.284 ,0.3 ,0.904 +,0.38 ,0.324 ,0.524 ,0.228 ,0.7 +,0.264 ,0.868 ,0.62 ,0.416 ,0.356 +,0.772 ,0.464 ,0.92 ,0.9 ,0.148 +,0.204 ,0.364 ,0.956 ,0.888 ,0.536 +,0.196 ,0.048 ,0.232 ,0.872 ,0.496 +,0.524 ,0.576 ,0.7 ,0.368 ,0.248 +,0.532 ,0.408 ,0.372 ,0.492 ,0.432 +,0.508 ,0.468 ,0.576 ,0.704 ,0.84 +,0.472 ,0.08 ,0.728 ,0.548 ,0.336 +,0.572 ,0.564 ,0.032 ,0.352 ,0.84) +cp=hd(p.table,alpha) +pv=NULL +if(!is.null(p.obs))w=optimize(hdpv,interval=c(.001,.999),dat=p.table,obs=p.obs)$minimum +list(crit.p.value=cp,adj.p.value=w) +} +p.crit.n80<-function(alpha=.05,p.obs=NULL){ +p.table=c( +0.46 ,0.36 ,0.66 ,0.56 ,0.704 +,0.848 ,0.008 ,0.232 ,0.072 ,0.784 +,0.944 ,0.096, 0 ,0.252 ,0.464 +,0.132, 1 ,0.116 ,0.288 ,0.236 +,0.512 ,0.056 ,0.68 ,0.356 ,0.164 +,0.36 ,0.444 ,0.448 ,0.656 ,0.464 +,0.616 ,0.296 ,0.7 ,0.34 ,0.152 +,0.248 ,0.776 ,0.516 ,0.084 ,0.908 +,0.084 ,0.268 ,0.048 ,0.612 ,0.876 +,0.752 ,0.108 ,0.916 ,0.756 ,0.424 +,0.772 ,0.044 ,0.788 ,0.936 ,0.48 +,0.824 ,0.784 ,0.944 ,0.5 ,0.236 +,0.564 ,0.956 ,0.1 ,0.536 ,0.772 +,0.82 ,0.956 ,0.556 ,0.76 ,0.78 +,0.144 ,0.512 ,0.964 ,0.928 ,0.04 +,0.86 ,0.364 ,0.98 ,0.252 ,0.548 +,0.252 ,0.264 ,0.96 ,0.46 ,0.744 +,0.932 ,0.58 ,0.448 ,0.708 ,0.928 +,0.976 ,0.288 ,0.224 ,0.436 ,0.84 +,0.056 ,0.68 ,0.04 ,0.848 ,0.46 +,0.104 ,0.5 ,0.736 ,0.808 ,0.436 +,0.692 ,0.944 ,0.552 ,0.024 ,0.36 +,0.668 ,0.764 ,0.952 ,0.62 ,0.072 +,0.972 ,0.816 ,0.392 ,0.964 ,0.356 +,0.62 ,0.272 ,0.416 ,0.68 ,0.116 +,0.632 ,0.904 ,0.36 ,0.212 ,0.632 +,0.664 ,0.576 ,0.56 ,0.516 ,0.876 +,0.82 ,0.736 ,0.044 ,0.648 ,0.36 +,0.328 ,0.42 ,0.708 ,0.868 ,0.356 +,0.076 ,0.856 ,0.828 ,0.124 ,0.72 +,0.4 ,0.5 ,0.028 ,0.64 ,0.936 +,0.492 ,0.96 ,0.28 ,0.688 ,0.488 +,0.684 ,0.24 ,0.828 ,0.624 ,0.928 +,0.492 ,0.696 ,0.2 ,0.424 ,0.868 +,0.22 ,0.532 ,0.204 ,0.376 ,0.256 +,0.612 ,0.64 ,0.72 ,0.76 ,0.216 +,0.468 ,0.756 ,0.576 ,0.856 ,0.132 +,0.916 ,0.248 ,0.364 ,0.828 ,0.896 +,0.536 ,0.792 ,0.6 ,0.72 ,0.86 +,0.592 ,0.056 ,0.628 ,0.016 ,0.688 +,0.656 ,0.484 ,0.26 ,0.66 ,0.304 +,0.476 ,0.648 ,0.848 ,0.68 ,0.832 +,0.92 ,0.568, 1 ,0.056 ,0.236 +,0.648 ,0.42 ,0.284 ,0.708 ,0.296 +,0.944 ,0.952 ,0.336 ,0.48 ,0.356 +,0.86 ,0.496 ,0.976 ,0.692 ,0.624 +,0.1 ,0.568 ,0.236 ,0.88 ,0.36 +,0.864 ,0.124 ,0.94 ,0.884 ,0.512 +,0.22 ,0.896 ,0.3 ,0.684 ,0.7 +,0.316 ,0.696 ,0.66 ,0.864 ,0.548 +,0.884 ,0.656 ,0.204 ,0.88 ,0.936 +,0.264 ,0.604 ,0.34 ,0.832 ,0.728 +,0.644 ,0.924 ,0.524 ,0.808 ,0.612 +,0.36 ,0.936 ,0.884 ,0.904 ,0.748 +,0.6 ,0.648 ,0.16 ,0.8 ,0.312 +,0.42 ,0.544 ,0.744 ,0.292 ,0.5 +,0.028 ,0.804 ,0.9 ,0.648 ,0.984 +,0.432 ,0.844 ,0.936 ,0.796 ,0.948 +,0.608 ,0.976 ,0.552 ,0.94 ,0.424 +,0.848 ,0.916 ,0.728 ,0.764 ,0.604 +,0.508 ,0.74 ,0.468 ,0.268 ,0.748 +,0.072 ,0.468 ,0.82 ,0.24 ,0.596 +,0.18 ,0.188 ,0.612 ,0.152 ,0.996 +,0.96 ,0.332 ,0.72 ,0.44 ,0.364 +,0.704 ,0.612 ,0.248 ,0.72 ,0.568 +,0.956 ,0.524 ,0.352 ,0.708 ,0.368 +,0.924 ,0.384 ,0.476 ,0.912 ,0.736 +,0.368 ,0.412 ,0.232 ,0.348 ,0.016 +,0.568 ,0.34 ,0.608 ,0.356 ,0.772 +,0.944 ,0.336 ,0.504 ,0.908 ,0.812 +,0.292 ,0.904 ,0.16 ,0.076 ,0.928 +,0.912 ,0.12 ,0.28 ,0.156 ,0.248 +,0.988 ,0.44 ,0.764 ,0.088 ,0.256 +,0.208 ,0.08 ,0.288 ,0.172 ,0.428 +,0.428 ,0.276 ,0.084 ,0.344 ,0.132 +,0.492 ,0.728 ,0.26 ,0.956 ,0.56 +,0.344 ,0.176 ,0.864 ,0.54 ,0.24 +,0.724 ,0.384 ,0.916 ,0.956 ,0.692 +,0.88 ,0.66 ,0.372 ,0.128 ,0.568 +,0.636 ,0.28 ,0.288 ,0.888 ,0.872 +,0.42 ,0.356 ,0.604 ,0.72 ,0.852 +,0.408 ,0.976 ,0.52 ,0.556 ,0.9 +,0.364 ,0.716 ,0.588 ,0.72 ,0.312 +,0.224 ,0.26 ,0.116 ,0.952 ,0.404 +,0.952 ,0.948 ,0.22 ,0.676 ,0.58 +,0.724 ,0.144 ,0.084 ,0.396 ,0.664 +,0.16 ,0.412 ,0.796 ,0.476 ,0.284 +,0.8 ,0.348 ,0.736 ,0.26 ,0.672 +,0.372 ,0.904 ,0.768 ,0.82 ,0.736 +,0.548 ,0.788 ,0.068 ,0.008 ,0.548 +,0.304, 1 ,0.2 ,0.12 ,0.168 +,0.4 ,0.504 ,0.68 ,0.96 ,0.924 +,0.884 ,0.348 ,0.044 ,0.236 ,0.416 +,0.32 ,0.612 ,0.512 ,0.34 ,0.604 +,0.868 ,0.412 ,0.376 ,0.376 ,0.88 +,0.864 ,0.928 ,0.364 ,0.42 ,0.048 +,0.116 ,0.66 ,0.916 ,0.344 ,0.596 +,0.768 ,0.84 ,0.964 ,0.92 ,0.948 +,0.54 ,0.828 ,0.44 ,0.932 ,0.972 +,0.244 ,0.948 ,0.1 ,0.228 ,0.88 +,0.808 ,0.404 ,0.016 ,0.996 ,0.236 +,0.88 ,0.076 ,0.156 ,0.172 ,0.692 +,0.312 ,0.248 ,0.968 ,0.264 ,0.088 +,0.296 ,0.824 ,0.444 ,0.24 ,0.996 +,0.42 ,0.744 ,0.5 ,0.872 ,0.556 +,0.68 ,0.172 ,0.216 ,0.688 ,0.94 +,0.136 ,0.78 ,0.408 ,0.768 ,0.348 +,0.568 ,0.324 ,0.116 ,0.968 ,0.132 +,0.528 ,0.92 ,0.98 ,0.308 ,0.528 +,0.112 ,0.056 ,0.1 ,0.616 ,0.636 +,0.628 ,0.288 ,0.576 ,0.296 ,0.992 +,0.048 ,0.088 ,0.664, 1 ,0.044 +,0.796 ,0.284 ,0.02 ,0.692 ,0.488 +,0.524 ,0.344 ,0.472 ,0.796 ,0.244 +,0.112 ,0.9 ,0.012 ,0.328 ,0.508 +,0.664 ,0.892 ,0.404 ,0.792 ,0.744 +,0.752 ,0.864 ,0.448 ,0.756 ,0.252 +,0.1 ,0.788 ,0.948 ,0.448 ,0.964 +,0.416 ,0.5 ,0.236 ,0.828 ,0.344 +,0.964 ,0.552 ,0.392 ,0.948 ,0.864 +,0.908 ,0.12, 0 ,0.14 ,0.516 +,0.856 ,0.476 ,0.828 ,0.232 ,0.636 +,0.612 ,0.668 ,0.892 ,0.792 ,0.76 +,0.968 ,0.072 ,0.896 ,0.636 ,0.62 +,0.32 ,0.072 ,0.684 ,0.6 ,0.9 +,0.452 ,0.196 ,0.892 ,0.788 ,0.532 +,0.46 ,0.576 ,0.6 ,0.948 ,0.98 +,0.992 ,0.156 ,0.292 ,0.956 ,0.7 +,0.472 ,0.428 ,0.6 ,0.772 ,0.864 +,0.388 ,0.636 ,0.308 ,0.492 ,0.188 +,0.144 ,0.916 ,0.808 ,0.76 ,0.212 +,0.516 ,0.556 ,0.056 ,0.2 ,0.676 +,0.076 ,0.62 ,0.984 ,0.824 ,0.204 +,0.024 ,0.656 ,0.176 ,0.804 ,0.936 +,0.576 ,0.316 ,0.544 ,0.94 ,0.128 +,0.62 ,0.464 ,0.116 ,0.188 ,0.372 +,0.732 ,0.956 ,0.256 ,0.832 ,0.816 +,0.152 ,0.22 ,0.632 ,0.712 ,0.364 +,0.988 ,0.504 ,0.728 ,0.984 ,0.776 +,0.8 ,0.876 ,0.612 ,0.896 ,0.152 +,0.532 ,0.88 ,0.968 ,0.256 ,0.456 +,0.552 ,0.056 ,0.352 ,0.808 ,0.64 +,0.172 ,0.176 ,0.092 ,0.62 ,0.9 +,0.768 ,0.024 ,0.1 ,0.896 ,0.36 +,0.212 ,0.42 ,0.52 ,0.884 ,0.684 +,0.896 ,0.596 ,0.664 ,0.848 ,0.432 +,0.04 ,0.688 ,0.884 ,0.032 ,0.628 +,0.2 ,0.832 ,0.508 ,0.784 ,0.476 +,0.956 ,0.628 ,0.232 ,0.844 ,0.94 +,0.068 ,0.952 ,0.212 ,0.352 ,0.6 +,0.196 ,0.808 ,0.628 ,0.112 ,0.628 +,0.94 ,0.216 ,0.816 ,0.212 ,0.788 +,0.524 ,0.9 ,0.72 ,0.364 ,0.436 +,0.152 ,0.176 ,0.544 ,0.56 ,0.152 +,0.772 ,0.732 ,0.324 ,0.224 ,0.456 +,0.732 ,0.304 ,0.124 ,0.524 ,0.256 +,0.376 ,0.464 ,0.836 ,0.66 ,0.964 +,0.784 ,0.772 ,0.624 ,0.56 ,0.04 +,0.584 ,0.168 ,0.132 ,0.62 ,0.276 +,0.096 ,0.928 ,0.588 ,0.66 ,0.952 +,0.908 ,0.224 ,0.348 ,0.64 ,0.456 +,0.22 ,0.052 ,0.232 ,0.908 ,0.92 +,0.648 ,0.492 ,0.416 ,0.444 ,0.98 +,0.92 ,0.624 ,0.612 ,0.684 ,0.744 +,0.624 ,0.068 ,0.58 ,0.048 ,0.656 +,0.352 ,0.56 ,0.708 ,0.124 ,0.7 +,0.9 ,0.268 ,0.836 ,0.208 ,0.752 +,0.136 ,0.54 ,0.136 ,0.336 ,0.988 +,0.78 ,0.612 ,0.772 ,0.36 ,0.452 +,0.616 ,0.424 ,0.736 ,0.856 ,0.5 +,0.472 ,0.62 ,0.736 ,0.896 ,0.08 +,0.8 ,0.276 ,0.124 ,0.116 ,0.692 +,0.404 ,0.78 ,0.484 ,0.268 ,0.624 +,0.776 ,0.28 ,0.908 ,0.576 ,0.208 +,0.028 ,0.752 ,0.58 ,0.904 ,0.672 +,0.716 ,0.364 ,0.732 ,0.3 ,0.444 +,0.568 ,0.388 ,0.476 ,0.356 ,0.124 +,0.432 ,0.996 ,0.492 ,0.964 ,0.356 +,0.58 ,0.792 ,0.948 ,0.204 ,0.392 +,0.808 ,0.296 ,0.252 ,0.404, 0 +,0.836 ,0.096 ,0.336 ,0.892 ,0.112 +,0.476 ,0.54 ,0.364 ,0.916 ,0.9 +,0.548 ,0.808 ,0.272 ,0.212 ,0.38 +,0.384 ,0.656 ,0.38 ,0.436 ,0.58 +,0.728 ,0.464 ,0.88 ,0.988 ,0.888 +,0.208 ,0.476 ,0.28 ,0.984 ,0.536 +,0.692 ,0.28 ,0.396 ,0.632 ,0.66 +,0.812 ,0.636 ,0.728 ,0.12 ,0.896 +,0.548 ,0.536 ,0.032 ,0.74 ,0.336 +,0.572 ,0.932 ,0.188 ,0.196 ,0.82 +,0.456 ,0.892 ,0.424 ,0.276 ,0.848 +,0.948 ,0.952 ,0.656 ,0.332 ,0.92 +,0.552 ,0.664 ,0.536 ,0.708 ,0.972 +,0.44 ,0.864 ,0.076, 1 ,0.104 +,0.416 ,0.104 ,0.324 ,0.24 ,0.708 +,0.992 ,0.996 ,0.184 ,0.156 ,0.18 +,0.852 ,0.836 ,0.092 ,0.896 ,0.08 +,0.816 ,0.384 ,0.268 ,0.324 ,0.1 +,0.708 ,0.188 ,0.732 ,0.056 ,0.776 +,0.768 ,0.744 ,0.22 ,0.648 ,0.236 +,0.648 ,0.752 ,0.936 ,0.672 ,0.796 +,0.4 ,0.732 ,0.64 ,0.9 ,0.208 +,0.536 ,0.828 ,0.54 ,0.4 ,0.04 +,0.084 ,0.5 ,0.232 ,0.724 ,0.752 +,0.076 ,0.564 ,0.836 ,0.352 ,0.808 +,0.916 ,0.928 ,0.752 ,0.584 ,0.344 +,0.584 ,0.62 ,0.316 ,0.072 ,0.604 +,0.656 ,0.676 ,0.524 ,0.392 ,0.168 +,0.744 ,0.56 ,0.568 ,0.936 ,0.792 +,0.592 ,0.804 ,0.2 ,0.852 ,0.972 +,0.736 ,0.616 ,0.744 ,0.284 ,0.764 +,0.824 ,0.484 ,0.76 ,0.804 ,0.472 +,0.968 ,0.868 ,0.72 ,0.496 ,0.168 +,0.868 ,0.676 ,0.388 ,0.948 ,0.06 +,0.104 ,0.552 ,0.432 ,0.368 ,0.472 +,0.376 ,0.244 ,0.556 ,0.86 ,0.976 +,0.116 ,0.784 ,0.748 ,0.736 ,0.68 +,0.828 ,0.156 ,0.916 ,0.22 ,0.16 +,0.576 ,0.408 ,0.752 ,0.92 ,0.216 +,0.484 ,0.832 ,0.96 ,0.584 ,0.616 +,0.968 ,0.728 ,0.776 ,0.664 ,0.796 +,0.36 ,0.656 ,0.336 ,0.816 ,0.892 +,0.808 ,0.728 ,0.432 ,0.376 ,0.456 +,0.496 ,0.524 ,0.684 ,0.84 ,0.7 +,0.448 ,0.264 ,0.28 ,0.476 ,0.68 +,0.328 ,0.844 ,0.24 ,0.528 ,0.456 +,0.872 ,0.924 ,0.58 ,0.78 ,0.772 +,0.636 ,0.144 ,0.684 ,0.212 ,0.284 +,0.22 ,0.324 ,0.28 ,0.468 ,0.692 +,0.604 ,0.008 ,0.888 ,0.236 ,0.88 +,0.468 ,0.984 ,0.656 ,0.736 ,0.532 +,0.084 ,0.444 ,0.824 ,0.552 ,0.288 +,0.588 ,0.148 ,0.704 ,0.632 ,0.556 +,0.024 ,0.508 ,0.852 ,0.18 ,0.112 +,0.164 ,0.248 ,0.364 ,0.588 ,0.268 +,0.192 ,0.564 ,0.172 ,0.052 ,0.98 +,0.22 ,0.648 ,0.032 ,0.84 ,0.512 +,0.528 ,0.324 ,0.28 ,0.296 ,0.108 +,0.02 ,0.368 ,0.704 ,0.048 ,0.36 +,0.536 ,0.36 ,0.252 ,0.732 ,0.816 +,0.544 ,0.216 ,0.416 ,0.924 ,0.64 +,0.14 ,0.8 ,0.528 ,0.196 ,0.884 +,0.08 ,0.572 ,0.748 ,0.372 ,0.6 +,0.928 ,0.144 ,0.668 ,0.896 ,0.524 +,0.636 ,0.792 ,0.46 ,0.748 ,0.996 +,0.02 ,0.94 ,0.18 ,0.12 ,0.236 +,0.072 ,0.256 ,0.992 ,0.98 ,0.204 +,0.796 ,0.556 ,0.844 ,0.952 ,0.924 +,0.72 ,0.004 ,0.712 ,0.62 ,0.864 +,0.4 ,0.724 ,0.86 ,0.876, 0 +,0.192 ,0.532 ,0.28 ,0.128 ,0.576 +,0.14 ,0.124 ,0.648 ,0.924 ,0.284 +,0.724 ,0.46 ,0.208 ,0.94 ,0.256 +,0.828 ,0.048 ,0.608 ,0.36 ,0.94 +,0.924 ,0.664 ,0.988 ,0.476 ,0.88 +,0.204 ,0.188 ,0.656 ,0.192 ,0.416 +,0.528 ,0.408 ,0.032 ,0.448 ,0.572 +,0.352 ,0.18 ,0.032 ,0.46 ,0.468 +,0.276 ,0.944 ,0.304 ,0.072 ,0.316 +,0.504 ,0.76 ,0.904 ,0.076 ,0.46 +,0.592 ,0.768 ,0.42 ,0.484 ,0.728 +,0.528 ,0.06 ,0.924 ,0.616 ,0.34 +,0.208, 1 ,0.428 ,0.564 ,0.26 +,0.136 ,0.608 ,0.544 ,0.508 ,0.82 +,0.264 ,0.296 ,0.156 ,0.192 ,0.628 +,0.472 ,0.996 ,0.136 ,0.104 ,0.112 +,0.412 ,0.896 ,0.776 ,0.016 ,0.232 +,0.116 ,0.408 ,0.26 ,0.372 ,0.46 +,0.944 ,0.28 ,0.932 ,0.636 ,0.632 +,0.168 ,0.124 ,0.412 ,0.228 ,0.292 +,0.676 ,0.68 ,0.644 ,0.996 ,0.948 +,0.312 ,0.444 ,0.832 ,0.356 ,0.408 +,0.952 ,0.848 ,0.184 ,0.404 ,0.892 +,0.92 ,0.896 ,0.604 ,0.064 ,0.416 +,0.436 ,0.312 ,0.668 ,0.948 ,0.172 +,0.996 ,0.508 ,0.536 ,0.444 ,0.832 +,0.772 ,0.26 ,0.916 ,0.12 ,0.436 +,0.652 ,0.732 ,0.872 ,0.104 ,0.02 +,0.328 ,0.692 ,0.464 ,0.096 ,0.148 +,0.348 ,0.772 ,0.84 ,0.472 ,0.416 +,0.132 ,0.388 ,0.168 ,0.92 ,0.012 +,0.764 ,0.484 ,0.148 ,0.3 ,0.392 +,0.36 ,0.68 ,0.5 ,0.18, 1 +,0.676 ,0.596 ,0.856 ,0.952 ,0.992 +,0.812 ,0.612 ,0.66 ,0.66 ,0.132 +,0.344 ,0.784 ,0.052 ,0.356 ,0.464 +,0.476 ,0.24 ,0.296 ,0.768 ,0.584 +,0.588 ,0.628 ,0.484 ,0.228 ,0.556 +,0.34 ,0.94 ,0.964 ,0.42 ,0.008 +,0.268 ,0.976 ,0.288 ,0.276 ,0.344 +,0.496 ,0.384 ,0.796 ,0.692 ,0.244 +,0.368 ,0.764 ,0.672 ,0.24 ,0.204 +,0.224 ,0.276 ,0.764 ,0.34 ,0.9 +,0.548 ,0.532 ,0.54 ,0.388 ,0.788 +,0.168 ,0.532 ,0.172 ,0.976 ,0.788 +,0.724 ,0.972 ,0.628 ,0.616 ,0.408 +,0.832 ,0.968 ,0.788 ,0.384 ,0.34 +,0.636 ,0.592 ,0.404 ,0.168 ,0.792 +,0.572 ,0.636 ,0.656 ,0.892 ,0.06 +,0.22 ,0.832 ,0.404 ,0.496 ,0.256 +,0.136 ,0.82 ,0.212 ,0.948 ,0.696 +,0.508 ,0.892 ,0.64 ,0.3 ,0.564 +,0.996 ,0.42 ,0.232 ,0.08 ,0.952 +,0.544 ,0.464 ,0.304 ,0.876 ,0.608 +,0.372 ,0.1 ,0.74 ,0.968 ,0.996 +,0.692 ,0.876 ,0.62 ,0.672 ,0.648 +,0.316 ,0.816 ,0.352 ,0.376 ,0.972 +,0.844 ,0.528 ,0.752 ,0.916 ,0.448 +,0.244 ,0.668 ,0.232 ,0.744 ,0.132 +,0.524 ,0.492 ,0.912 ,0.936 ,0.94 +,0.188 ,0.516 ,0.18 ,0.312 ,0.16 +,0.932 ,0.084 ,0.384 ,0.604 ,0.688 +,0.48 ,0.116 ,0.936 ,0.576 ,0.8 +,0.952 ,0.316 ,0.488 ,0.084 ,0.736 +,0.392 ,0.232 ,0.576 ,0.396 ,0.896 +,0.692 ,0.292 ,0.788 ,0.864 ,0.192 +,0.28 ,0.044 ,0.14 ,0.808 ,0.992 +,0.876 ,0.288 ,0.86 ,0.908 ,0.064 +,0.228 ,0.964 ,0.796 ,0.272 ,0.344 +,0.524 ,0.232 ,0.648 ,0.68 ,0.064 +,0.104 ,0.392 ,0.772 ,0.196 ,0.868 +,0.08 ,0.428 ,0.856 ,0.98 ,0.076 +,0.456 ,0.556 ,0.22 ,0.816 ,0.512 +,0.128 ,0.276 ,0.64 ,0.316 ,0.516 +,0.768 ,0.368 ,0.828 ,0.044 ,0.676 +,0.8 ,0.28 ,0.176 ,0.936 ,0.056 +,0.016 ,0.34 ,0.72 ,0.764 ,0.272 +,0.604 ,0.152 ,0.86 ,0.096 ,0.292 +,0.984 ,0.832 ,0.356 ,0.152 ,0.732 +,0.812 ,0.304 ,0.276 ,0.76 ,0.88 +,0.676 ,0.212 ,0.204 ,0.352 ,0.476 +,0.244 ,0.84 ,0.812 ,0.176 ,0.028 +,0.352 ,0.54 ,0.832 ,0.48 ,0.416 +,0.732 ,0.376 ,0.528 ,0.416 ,0.012 +,0.196 ,0.46 ,0.7 ,0.5 ,0.096 +,0.072 ,0.612 ,0.304 ,0.472 ,0.376 +,0.476 ,0.408 ,0.5 ,0.828 ,0.32 +,0.22 ,0.036 ,0.172 ,0.712 ,0.756 +,0.52 ,0.632 ,0.26 ,0.316 ,0.16 +,0.688 ,0.484 ,0.692 ,0.336 ,0.736 +,0.444 ,0.936 ,0.624 ,0.276 ,0.504 +,0.58 ,0.376 ,0.648 ,0.296 ,0.12 +,0.864, 1 ,0.832 ,0.668 ,0.924 +,0.916 ,0.668 ,0.48 ,0.828 ,0.724 +,0.448 ,0.624 ,0.82 ,0.624 ,0.944 +,0.1 ,0.044 ,0.804 ,0.436 ,0.22 +,0.808 ,0.548 ,0.32 ,0.264 ,0.284 +,0.872 ,0.876 ,0.74 ,0.568 ,0.824 +,0.608 ,0.544 ,0.176 ,0.288 ,0.084 +,0.092 ,0.916 ,0.764 ,0.168 ,0.872 +,0.376 ,0.52 ,0.288 ,0.88 ,0.888 +,0.904 ,0.892 ,0.372 ,0.42 ,0.984 +,0.52 ,0.372 ,0.476 ,0.348 ,0.756 +,0.044 ,0.736 ,0.252 ,0.732 ,0.776 +,0.632 ,0.976 ,0.08 ,0.36 ,0.596 +,0.72 ,0.228 ,0.36 ,0.2 ,0.924 +,0.676 ,0.744 ,0.58 ,0.644 ,0.3 +,0.82 ,0.296 ,0.44 ,0.516 ,0.716 +,0.46 ,0.428 ,0.46 ,0.372 ,0.604 +,0.16 ,0.484 ,0.164 ,0.38 ,0.708 +,0.964 ,0.988 ,0.844 ,0.216 ,0.912 +,0.228 ,0.368 ,0.22 ,0.064 ,0.384 +,0.72 ,0.636 ,0.852 ,0.776 ,0.444 +,0.944 ,0.992 ,0.74 ,0.384 ,0.528 +,0.536 ,0.296 ,0.056 ,0.34 ,0.152 +,0.152 ,0.316 ,0.148 ,0.816 ,0.576 +,0.936 ,0.896 ,0.212 ,0.792 ,0.392 +,0.728 ,0.692 ,0.324 ,0.496 ,0.68 +,0.536 ,0.372 ,0.316 ,0.276 ,0.9 +,0.664 ,0.008 ,0.464 ,0.564 ,0.088 +,0.52 ,0.032 ,0.584 ,0.396 ,0.864 +,0.18 ,0.708 ,0.62 ,0.82 ,0.248 +,0.736 ,0.496 ,0.284 ,0.496 ,0.152 +,0.66 ,0.524 ,0.536 ,0.56 ,0.232 +,0.084 ,0.396 ,0.38 ,0.028 ,0.708 +,0.184 ,0.824 ,0.264 ,0.892 ,0.62 +,0.204 ,0.62 ,0.728 ,0.984 ,0.948 +,0.656 ,0.876 ,0.92 ,0.84 ,0.932 +,0.572 ,0.516 ,0.024 ,0.248 ,0.756 +,0.428 ,0.28 ,0.572 ,0.936 ,0.228 +,0.416 ,0.04 ,0.44 ,0.252 ,0.872 +,0.408 ,0.484 ,0.468 ,0.036 ,0.388 +,0.1 ,0.956 ,0.64 ,0.904 ,0.436 +,0.152 ,0.144 ,0.428 ,0.628 ,0.748 +,0.632 ,0.132 ,0.204 ,0.6 ,0.236 +,0.732 ,0.508 ,0.128 ,0.42 ,0.724 +,0.04 ,0.876 ,0.528 ,0.852 ,0.844 +,0.652 ,0.968 ,0.26 ,0.924 ,0.124 +,0.312 ,0.884 ,0.96 ,0.132 ,0.464 +,0.304 ,0.1 ,0.684 ,0.22 ,0.42 +,0.404 ,0.94 ,0.244 ,0.884 ,0.484 +,0.788 ,0.42 ,0.88 ,0.836 ,0.112 +,0.468 ,0.928 ,0.52 ,0.592 ,0.452 +,0.192 ,0.408 ,0.94 ,0.148 ,0.216 +,0.456 ,0.06 ,0.968 ,0.444 ,0.236 +,0.348 ,0.652 ,0.716 ,0.628 ,0.16 +,0.084 ,0.392 ,0.284 ,0.34 ,0.988 +,0.404 ,0.476 ,0.724 ,0.108 ,0.988 +,0.632 ,0.84 ,0.588 ,0.744 ,0.008 +,0.232 ,0.336 ,0.804 ,0.368 ,0.604) +cp=hd(p.table,alpha) +pv=NULL +if(!is.null(p.obs))w=optimize(hdpv,interval=c(.001,.999),dat=p.table,obs=p.obs)$minimum +list(crit.p.value=cp,adj.p.value=w) +} + +p.crit.n100<-function(alpha=.05,p.obs=NULL){ +p.table=c( +0.516 ,0.516 ,0.14 ,0.42 ,0.124 +,0.872 ,0.396 ,0.692 ,0.92 ,0.452 +,0.048 ,0.34 ,0.18 ,0.952 ,0.348 +,0.004 ,0.74 ,0.04 ,0.612 ,0.052 +,0.888 ,0.416 ,0.096 ,0.16 ,0.62 +,0.132 ,0.536 ,0.96 ,0.24 ,0.512 +,0.672 ,0.54 ,0.204 ,0.128 ,0.296 +,0.828 ,0.4 ,0.324 ,0.388 ,0.988 +,0.568 ,0.764 ,0.82 ,0.392 ,0.992 +,0.996 ,0.804 ,0.416 ,0.308 ,0.924 +,0.1 ,0.016 ,0.368 ,0.264 ,0.94 +,0.56 ,0.156 ,0.68 ,0.616 ,0.252 +,0.724 ,0.38 ,0.544 ,0.02 ,0.092 +,0.692 ,0.912 ,0.04 ,0.58 ,0.072 +,0.416 ,0.912, 1 ,0.252 ,0.428 +,0.068 ,0.504 ,0.692 ,0.452 ,0.932 +,0.668 ,0.912 ,0.056 ,0.86 ,0.908 +,0.276 ,0.54 ,0.424 ,0.556 ,0.248 +,0.42 ,0.94 ,0.632 ,0.544 ,0.168 +,0.824 ,0.44 ,0.136 ,0.9 ,0.984 +,0.34 ,0.984 ,0.428 ,0.208 ,0.216 +,0.024 ,0.744 ,0.34 ,0.644 ,0.488 +,0.716 ,0.984 ,0.292 ,0.512 ,0.908 +,0.336 ,0.908 ,0.528 ,0.364 ,0.924 +,0.884 ,0.928 ,0.608 ,0.06 ,0.624 +,0.588 ,0.536 ,0.88 ,0.224 ,0.124 +,0.184 ,0.416 ,0.004 ,0.84 ,0.784 +,0.016 ,0.692 ,0.892 ,0.04 ,0.084 +,0.844 ,0.38 ,0.512 ,0.392 ,0.28 +,0.312 ,0.96 ,0.084 ,0.216 ,0.34 +,0.544 ,0.952 ,0.88 ,0.024 ,0.292 +,0.456 ,0.076 ,0.932 ,0.78 ,0.304 +,0.06 ,0.692 ,0.996 ,0.132 ,0.82 +,0.592 ,0.872 ,0.812 ,0.132 ,0.392 +,0.84 ,0.736 ,0.188 ,0.132 ,0.736 +,0.236 ,0.44 ,0.684 ,0.62 ,0.484 +,0.828 ,0.2 ,0.688 ,0.404 ,0.78 +,0.948 ,0.968 ,0.284 ,0.776 ,0.672 +,0.016 ,0.088 ,0.712 ,0.776 ,0.324 +,0.788 ,0.74 ,0.584 ,0.456 ,0.572 +,0.896 ,0.976 ,0.164 ,0.576 ,0.112 +,0.724 ,0.268 ,0.18 ,0.232 ,0.224 +,0.996 ,0.692 ,0.552 ,0.524 ,0.756 +,0.1 ,0.684 ,0.416 ,0.98 ,0.02 +,0.452 ,0.6 ,0.94 ,0.148 ,0.808 +,0.272 ,0.484 ,0.404 ,0.064 ,0.64 +,0.564 ,0.108 ,0.832 ,0.232 ,0.784 +,0.504 ,0.364 ,0.784 ,0.584 ,0.616 +,0.708 ,0.576 ,0.668 ,0.984 ,0.428 +,0.984 ,0.72 ,0.736 ,0.66 ,0.876 +,0.508 ,0.2 ,0.444 ,0.94 ,0.808 +,0.404 ,0.436 ,0.104 ,0.656 ,0.616 +,0.524 ,0.884 ,0.24 ,0.34 ,0.424 +,0.052 ,0.46 ,0.872 ,0.452 ,0.132 +,0.704 ,0.508 ,0.22 ,0.972 ,0.632 +,0.5 ,0.544 ,0.02 ,0.112, 1 +,0.204 ,0.728 ,0.46 ,0.556 ,0.732 +,0.228 ,0.472 ,0.068 ,0.344 ,0.972 +,0.404 ,0.816 ,0.368 ,0.38 ,0.912 +,0.948 ,0.108 ,0.308 ,0.584 ,0.816 +,0.692 ,0.024 ,0.604 ,0.74 ,0.696 +,0.48 ,0.888 ,0.356 ,0.464 ,0.548 +,0.232 ,0.76 ,0.892 ,0.096 ,0.18 +,0.596 ,0.556 ,0.56 ,0.776 ,0.368 +,0.66 ,0.856 ,0.4 ,0.968 ,0.356 +,0.752 ,0.648 ,0.62 ,0.888 ,0.932 +,0.86 ,0.448 ,0.248 ,0.936 ,0.22 +,0.364 ,0.92 ,0.424 ,0.316 ,0.684 +,0.812 ,0.652 ,0.372 ,0.308 ,0.616 +,0.816 ,0.04 ,0.304 ,0.864 ,0.356 +,0.632 ,0.82 ,0.976 ,0.612 ,0.112 +,0.088 ,0.7 ,0.568 ,0.948 ,0.084 +,0.848 ,0.452 ,0.004 ,0.392 ,0.796 +,0.528 ,0.372 ,0.472 ,0.404 ,0.48 +,0.244 ,0.144 ,0.944 ,0.852 ,0.52 +,0.436 ,0.396 ,0.388 ,0.596 ,0.496 +,0.972 ,0.424 ,0.424 ,0.144 ,0.772 +,0.1 ,0.748 ,0.192 ,0.508 ,0.78 +,0.176 ,0.944 ,0.136 ,0.564 ,0.724 +,0.76 ,0.6 ,0.032 ,0.504 ,0.78 +,0.936 ,0.76 ,0.124 ,0.62 ,0.092 +,0.768 ,0.852 ,0.556 ,0.456 ,0.468 +,0.984 ,0.7 ,0.568 ,0.288 ,0.048 +,0.928 ,0.892 ,0.728 ,0.32 ,0.836 +,0.788 ,0.352 ,0.748 ,0.128 ,0.92 +,0.352 ,0.236 ,0.56 ,0.192 ,0.72 +,0.516 ,0.668 ,0.652 ,0.056 ,0.876 +,0.916 ,0.124 ,0.176 ,0.652 ,0.088 +,0.508 ,0.8 ,0.112 ,0.1 ,0.264 +,0.836 ,0.912 ,0.376 ,0.792 ,0.876 +,0.892 ,0.284 ,0.892 ,0.088 ,0.376 +,0.24 ,0.98 ,0.416 ,0.96 ,0.848 +,0.632 ,0.304 ,0.5 ,0.716 ,0.388 +,0.976 ,0.24 ,0.32 ,0.808 ,0.168 +,0.352 ,0.064 ,0.7 ,0.42 ,0.224 +,0.176 ,0.988 ,0.224 ,0.904 ,0.592 +,0.696 ,0.42 ,0.376 ,0.696 ,0.44 +,0.328 ,0.816 ,0.312 ,0.968 ,0.432 +,0.784 ,0.92 ,0.6 ,0.928 ,0.784 +,0.96 ,0.54 ,0.872 ,0.816 ,0.728 +,0.688 ,0.436 ,0.796 ,0.42 ,0.464 +,0.112 ,0.328 ,0.168 ,0.244 ,0.052 +,0.864 ,0.312 ,0.992 ,0.832 ,0.584 +,0.396 ,0.596 ,0.72 ,0.684 ,0.448 +,0.88 ,0.9 ,0.748 ,0.424 ,0.66 +,0.644 ,0.188 ,0.528 ,0.98 ,0.58 +,0.716 ,0.352 ,0.512 ,0.716 ,0.548 +,0.988, 0 ,0.36 ,0.4 ,0.784 +,0.212 ,0.756 ,0.956 ,0.5 ,0.512 +,0.852 ,0.216 ,0.004 ,0.596 ,0.504 +,0.56 ,0.672 ,0.988 ,0.76 ,0.924 +,0.108 ,0.62 ,0.412 ,0.44 ,0.816 +,0.664 ,0.924 ,0.62 ,0.816 ,0.184 +,0.244 ,0.644 ,0.172 ,0.3 ,0.22 +,0.02 ,0.572 ,0.156 ,0.768 ,0.484 +,0.576 ,0.992 ,0.192 ,0.428 ,0.736 +,0.708 ,0.888 ,0.136 ,0.192 ,0.948 +,0.244 ,0.528 ,0.188 ,0.956 ,0.036 +,0.016 ,0.636 ,0.048 ,0.42 ,0.184 +,0.16 ,0.496 ,0.512 ,0.18 ,0.052 +,0.972 ,0.488 ,0.988 ,0.284 ,0.252 +,0.876 ,0.244 ,0.072 ,0.712 ,0.684 +,0.78 ,0.468 ,0.504 ,0.732 ,0.84 +,0.496 ,0.168 ,0.256 ,0.092 ,0.916 +,0.76 ,0.152 ,0.72 ,0.14 ,0.356 +,0.824 ,0.172 ,0.248 ,0.624 ,0.932 +,0.856 ,0.612 ,0.52 ,0.584 ,0.904 +,0.576 ,0.528 ,0.092 ,0.132 ,0.288 +,0.096 ,0.516 ,0.316 ,0.448 ,0.792 +,0.756 ,0.748 ,0.016 ,0.52 ,0.64 +,0.924 ,0.688 ,0.824 ,0.392 ,0.908 +,0.852 ,0.652 ,0.184 ,0.244 ,0.288 +,0.324 ,0.324 ,0.736 ,0.628 ,0.816 +,0.868 ,0.576 ,0.584 ,0.92 ,0.812 +,0.104 ,0.48 ,0.884 ,0.816 ,0.24 +,0.024 ,0.204 ,0.32 ,0.452 ,0.44 +,0.036 ,0.96 ,0.716 ,0.648 ,0.04 +,0.256 ,0.684 ,0.296 ,0.484 ,0.772 +,0.52 ,0.068 ,0.26 ,0.352 ,0.096 +,0.348 ,0.18 ,0.556 ,0.276 ,0.956 +,0.944 ,0.052 ,0.916 ,0.52 ,0.624 +,0.668 ,0.98 ,0.112 ,0.144 ,0.644 +,0.348 ,0.488 ,0.88 ,0.16 ,0.056 +,0.66 ,0.964 ,0.316 ,0.14 ,0.36 +,0.62 ,0.168 ,0.124 ,0.852 ,0.58 +,0.512 ,0.424 ,0.024 ,0.676 ,0.192 +,0.636 ,0.544 ,0.78 ,0.712 ,0.368 +,0.156 ,0.068 ,0.056 ,0.112 ,0.26 +,0.448 ,0.68 ,0.316 ,0.9 ,0.78 +,0.62 ,0.264 ,0.856 ,0.04 ,0.936 +,0.6 ,0.128 ,0.328 ,0.924 ,0.94 +,0.708 ,0.476 ,0.044 ,0.172 ,0.648 +,0.356 ,0.688 ,0.888 ,0.136 ,0.976 +,0.212 ,0.652 ,0.624 ,0.9 ,0.76 +,0.396 ,0.736 ,0.812 ,0.1 ,0.868 +,0.844 ,0.952 ,0.076 ,0.616 ,0.964 +,0.064 ,0.28 ,0.576 ,0.944 ,0.532 +,0.596 ,0.704 ,0.952 ,0.832 ,0.18 +,0.928 ,0.504 ,0.072 ,0.4 ,0.432 +,0.644 ,0.168 ,0.784 ,0.516 ,0.408 +,0.224 ,0.476 ,0.992 ,0.588 ,0.668 +,0.324 ,0.064 ,0.592 ,0.96 ,0.652 +,0.24 ,0.964 ,0.988 ,0.06 ,0.068 +,0.828 ,0.736 ,0.432 ,0.508 ,0.92 +,0.34 ,0.832 ,0.504 ,0.192 ,0.88 +,0.532 ,0.168 ,0.128 ,0.46 ,0.456 +,0.992 ,0.328 ,0.728 ,0.72 ,0.488 +,0.728 ,0.848 ,0.372 ,0.688 ,0.324 +,0.748 ,0.628 ,0.936 ,0.556 ,0.8 +,0.088 ,0.26 ,0.66 ,0.412 ,0.264 +,0.432 ,0.148 ,0.24 ,0.672 ,0.216 +,0.268 ,0.512 ,0.296 ,0.412 ,0.564 +,0.224 ,0.196 ,0.42 ,0.424 ,0.168 +,0.852 ,0.144 ,0.96 ,0.008 ,0.568 +,0.196 ,0.704 ,0.624 ,0.6 ,0.452 +,0.628 ,0.272 ,0.056 ,0.536 ,0.364 +,0.364 ,0.696 ,0.272 ,0.472 ,0.008 +,0.112 ,0.924 ,0.82 ,0.428 ,0.96 +,0.748 ,0.308 ,0.912 ,0.472 ,0.544 +,0.744 ,0.072 ,0.36 ,0.604 ,0.98 +,0.308 ,0.304 ,0.352 ,0.024 ,0.956 +,0.82 ,0.692 ,0.572 ,0.036 ,0.86 +,0.896 ,0.976 ,0.284 ,0.764 ,0.24 +,0.68 ,0.788 ,0.316 ,0.068 ,0.06 +,0.688 ,0.26 ,0.496 ,0.416 ,0.388 +,0.58 ,0.2 ,0.508 ,0.412 ,0.344 +,0.212 ,0.932 ,0.588 ,0.212 ,0.936 +,0.232 ,0.26 ,0.82 ,0.764 ,0.648 +,0.924 ,0.508 ,0.84 ,0.66 ,0.46 +,0.788 ,0.596 ,0.092 ,0.656 ,0.244 +,0.68 ,0.628 ,0.732 ,0.56 ,0.596 +,0.908 ,0.376 ,0.64 ,0.628 ,0.824 +,0.36 ,0.764 ,0.484 ,0.892 ,0.576 +,0.128 ,0.972 ,0.4 ,0.444 ,0.856 +,0.744 ,0.32 ,0.004 ,0.58 ,0.116 +,0.636 ,0.368 ,0.696 ,0.904 ,0.536 +,0.04 ,0.132 ,0.812 ,0.916 ,0.468 +,0.736 ,0.108 ,0.684 ,0.96 ,0.284 +,0.068 ,0.832 ,0.736 ,0.248 ,0.624 +,0.26 ,0.964 ,0.316 ,0.504 ,0.2 +,0.36 ,0.46 ,0.8 ,0.164 ,0.284 +,0.372 ,0.792 ,0.808 ,0.716 ,0.148 +,0.232 ,0.724 ,0.86 ,0.692 ,0.204 +,0.484 ,0.444 ,0.432 ,0.384 ,0.256 +,0.732 ,0.084 ,0.316 ,0.264 ,0.18 +,0.236 ,0.592 ,0.42 ,0.76 ,0.556 +,0.116 ,0.72 ,0.252 ,0.632 ,0.72 +,0.476 ,0.896 ,0.784 ,0.328 ,0.852 +,0.548 ,0.132 ,0.692 ,0.92 ,0.596 +,0.268 ,0.204 ,0.852 ,0.948 ,0.88 +,0.82 ,0.328 ,0.7 ,0.684 ,0.16 +,0.868 ,0.44 ,0.912 ,0.192 ,0.168 +,0.844 ,0.4 ,0.32 ,0.768 ,0.52 +,0.8 ,0.464 ,0.884 ,0.448 ,0.908 +,0.116 ,0.136 ,0.036 ,0.368 ,0.076 +,0.424 ,0.224 ,0.72 ,0.008 ,0.932 +,0.732 ,0.764 ,0.088 ,0.788 ,0.852 +,0.392 ,0.66 ,0.904 ,0.524 ,0.612 +,0.256 ,0.892 ,0.912 ,0.512 ,0.984 +,0.912 ,0.94 ,0.204 ,0.408 ,0.156 +,0.868 ,0.1 ,0.988) +cp=hd(p.table,alpha) +pv=NULL +if(!is.null(p.obs))w=optimize(hdpv,interval=c(.001,.999),dat=p.table,obs=p.obs)$minimum +list(crit.p.value=cp,adj.p.value=w) +} + + +mscorciH<-function(x,nboot=1000,alpha=.05,SEED=TRUE,method='hoch', +corfun=pcor,outfun=outpro, crit.pv=NULL,ALL=TRUE,MC=TRUE,pr=TRUE){ +# +# Test the hypothesis of a zero skipped correlation for each pair of variables in +# x, an n-by-p matrix. +# +# Use Hochberg adjusted critical p-values based on adjusted p-values to control the probability of one or more Type I errors. +# +# The function also returns 1-alpha confidence intervals for each of the skipped correlations +# alpha=0.05 is the default. +# By default, Pearson's correlation is computed after outliers are removed via the R function indicated by +# outfun, which defaults to a projection-type method. +# corfun=spear, for example would replace Pearson's correlation with Spearman's correlation. +# +# The default number of bootstrap samples is +# nboot=500 +# +# +if(pr){ +print('Each confidence interval has, approximately, 1-alpha probability coverage') +} +if(SEED)set.seed(2) +xy=elimna(x) +x=as.matrix(x) +p=ncol(x) +p1=p+1 +J=(p^2-p)/2 +x=as.matrix(x) +n=nrow(x) +est<-mscor(x,corfun=corfun,outfun=outfun)$cor +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(!MC)bvec<-lapply(data,scorci.sub,x,corfun=corfun,outfun=outfun,STAND=TRUE) +if(MC){ +library(parallel) +bvec<-mclapply(data,scorci.sub,x,corfun=corfun,outfun=outfun,STAND=TRUE) +} +bvec=matl(bvec) # A J by nboot matrix. J=(p^2-p)/2, p=number of IV variables. + +# +# Compute critical p-values when p=1 and then use Hochberg adjustment +# + +phat=0 +sig=matrix(NA,p,p) +sigadj=matrix(NA,p,p) +ic=0 +for(j in 1:p){ +for(k in 1:p){ +if(j40){ +if(n<=70){ +vv=p.crit.n60(alpha[1],sig[j,k]) +sigadj[j,k]=vv$adj.p.value +crit.p=vv$crit.p.value +} +} +if(n>70){ +if(n<=100){ +vv=p.crit.n80(alpha[1],sig[j,k]) +sigadj[j,k]=vv$adj.p.value +crit.p=vv$crit.p.value +} +} +if(n>100){ +if(n<=120) +{ +vv=p.crit.n100(alpha[1],sig[j,k]) +crit.p=vv$crit.p.value +sigadj[j,k]=vv$adj.p.value +}} +if(n>120){ # no adjustment +sigad[j,k]=sig[j,k] #i.e., no adjustment +crit.p=alpha +}}}} +ci.mat=matrix(NA,nrow=J,ncol=7) +dimnames(ci.mat)=list(NULL,c('Var i','Var j','Est','ci.low','ci.up','P-value','FWE Adjusted p-value' )) +crit.pv=crit.p +for(j in 1:J)bvec[j,]<-sort(bvec[j,]) +if(J==1)bvec=as.matrix(bvec) +ic=0 +if(is.null(crit.pv))crit.pv=alpha[1] +for(j in 1:p){ +for(k in 1:p){ +if(j=60, this might suffice when testing at the 0.05 level. But power might be increased by using +# hoch=FALSE at the expense of higher execution time. +# +# If alpha is less than .05, say .025 or .01, hoch=FALSE is recommended. +# +# Note: confidence intervals are reported only when hoch=FALSE. +# +# pvals can be used to supply a vector of p-values estimating the distribution of the minimum p-value among the tests that are +# are performed when all hypotheses are true. +# +# iter=500: number of replications used to estimate the distribution of the minimum p-value. +# Or use the argument crit.pv as indicated below. +# Note: in the journal article dealing with this method, iter=1000 was used. + +# By default +# pvals=NULL, the functions computes these values if the p-values suggest that there might be +# significant results and hoch=FALSE; this can result in high execution time. +# The pvals are computed via the R function +# mscorci.cr(n,p,iter=500,corfun=pcor,alpha=alpha,SEED=TRUE). +# +# Critical p-values are a function of n and p. Once known, can supply them via the argument +# crit.pv as follows: +# +# pv=scorregci.cr(n,p)$crit.p.values +# scorregci(x,crit.pv=pv) +# +# When hoch=TRUE, unadjusted confidence intervals are returned. +# +# +# +if(pr){ +if(!hoch){print('To reduce execution time, critical p-values are not computed when the observed p.values are too large to') +print('reject at the 0.05 level. To compute them any way, use the R function scorregci.cr') +} +if(hoch){ +print('Hochberg adjusted p-values are used.') +print('This is reasonable when n>=60 and alpha=.05. Otherwise suggest using hoch=FALSE') +print('To get confidence intervals, use hoch=FALSE') +}} +if(SEED)set.seed(2) +xy=elimna(cbind(x,y)) +x=as.matrix(x) +p=ncol(x) +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +x=as.matrix(x) +n=nrow(x) +est<-scorreg(x,y,corfun=corfun,outfun=outfun,SEED=FALSE,ALL=ALL,...)$cor +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(!MC)bvec<-lapply(data,scorreg.sub,xy,corfun=corfun,outfun=outfun,ALL=ALL,...) +if(MC){ +library(parallel) +bvec<-mclapply(data,scorreg.sub,xy,corfun=corfun,outfun=outfun,ALL=ALL,...) +} +bvec=matl(bvec) # A p by nboot matrix. + +phat=0 +sig=0 +for(j in 1:p){ +phat[j]=sum(bvec[j,] < 0)/nboot +sig[j] <- 2 * min(phat[j], 1 - phat[j]) +} +# +# Compute critical p-values if any of the p-values are sufficiently small. +# +FLAG=FALSE +if(p==2 && sig[1]<=.15){ +FLAG=TRUE +if(hoch){ +if(pr)print('If the p.value is <=.15, suggest using hoch=FALSE') +}} +if(p>2){ +if(min(sig)<=alpha[1])FLAG=TRUE +} +if(FLAG){ +if(is.null(crit.pv)){ +if(!hoch){ +if(pr)print('Computing critical p-values. Execution time might require several minutes') +temp=scorregci.cr(nval,p,iter=iter,corfun=corfun,alpha=alpha,SEED=pval.SEED,TV=TRUE,ALL=ALL) #returns tval in case want to adjust p-values. +# Need to add code to do this. (See mscorpbMC for how this might be done.) +crit.pv=temp$crit.p.values +}}} +ci.mat=matrix(NA,nrow=p,ncol=3) +dimnames(ci.mat)=list(NULL,c('Var','ci.low','ci.up')) +for(j in 1:p)bvec[j,]<-sort(bvec[j,]) +if(p==1)bvec=as.matrix(bvec) +ic=0 +if(is.null(crit.pv))crit.pv=alpha[1] +for(j in 1:p){ +ic=ic+1 +ci.mat[ic,1]=j +ihi<-floor((1-crit.pv[1]/2)*nboot+.5) +ilow<-floor((crit.pv[1]/2)*nboot+.5) +ci.mat[ic,2]=bvec[ic,ilow] +ci.mat[ic,3]=bvec[ic,ihi] +} + +p.mat=matrix(NA,nrow=p,ncol=3) +p.mat[,1]=est +p.mat[,2]=sig +adj.p=NULL +if(hoch){ +adj.p=p.adjust(sig,method='hochberg') +p.mat[,3]=adj.p +} +dimnames(p.mat)=list(NULL,c('Est.','p-value','adjusted p.value')) +list(Estimates=p.mat,confidence.int=ci.mat,critical.p.values=crit.pv) +} + +scorreg.sub<-function(data,xy,corfun=corfun,outfun=outfun,ALL=ALL,...){ +p1=ncol(xy) +p=p1-1 +est<-scorreg(xy[data,1:p],xy[data,p1],corfun=corfun,SEED=FALSE,ALL=ALL,...)$cor +est +} + +scorreg.cr<-function(n,p,iter=500,nboot=500,corfun=pcor,alpha=c(.05,.025,.01),TV=FALSE,ALL=TRUE,SEED=TRUE,outfun=outpro){ +# +# Determine critical p-values for the function scorregci +# Returns the estimate of the distribution of the null minimum p-value +# plus the critical p-values corresponding to the levels indicated by +# alpha. +# +# p = number or predictors +# +# Function assumes that a multicore processor is used and that the R package parallel has been installed. +# +if(SEED)set.seed(65) +x=list() +library(parallel) +p1=p+1 +for(i in 1:iter){ +x[[i]]=rmul(n,p=p1) +} +tval=mclapply(x,scorreg.cr.sub,p=p,corfun=corfun,nboot=nboot,ALL=ALL) +tval=list2vec(tval) +crit.p=NA +for(j in 1:length(alpha))crit.p[j]=hd(tval,alpha[j]) +if(!TV)tval=NULL +list(crit.p.values=crit.p,tval=tval) +} + +scorreg.cr.sub<-function(x,corfun,p=p,nboot=500,ALL=ALL,outfun=outfun){ +p1=p+1 +v=scorregci(x[,1:p],x[,p1],SEED=FALSE,corfun=corfun,nboot=nboot,crit.pv=1,pr=FALSE,hoch=TRUE,ALL=ALL,outfun=outfun)$Estimates[,2] +mp=min(as.vector(v),na.rm=T) +mp +} + +ols.ridge<-function(x,y,k=NULL,xout=FALSE,outfun=outpro,MSF=TRUE){ +library(MASS) +x=as.matrix(x) +if(ncol(x)==1)stop('Should have two or more independent variables.') +xy=elimna(cbind(x,y)) +x=as.matrix(x) +p=ncol(x) +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +x=as.matrix(x) +if(xout){ +flag<-outfun(x)$keep +x<-x[flag,] +x<-as.matrix(x) +y<-y[flag] +} +if(is.null(k)){ +if(!MSF)k=ridge.est.k(x,y) +else{ +ires=ols(x,y)$residuals +sigh=sqrt(sum(ires^2)/(n-p-1)) +k=p^(1+1/p)*sigh +} +} +a=lm.ridge(y~x,lambda=k) +a=coef(a) +list(coef=a) +} + +rob.ridge<-function(x,y,Regfun=tsreg,k=NULL,xout=FALSE,outfun=outpro,plotit=FALSE,MSF=TRUE, +STAND=TRUE,INT=FALSE,locfun=median,...){ +# +# Do robust regression based on the robust estimator indicated by +# Regfun +# which defaults to Theil--Sen +# +# When MSF=FALSE, the bias parameter, k, is estimated based on results in +# Kidia, G. (2003). +# Performance of Some New Ridge +# Regression Estimators Communications in Statistics + # Simulation and Computation + #Vol. 32, No. 2, pp. 419--435 (in file CommB2003.pdf) +# The method was derived when using OLS. +# +# MSF=TRUE, use the method in Shabbir et al.(2032). doi.org/10.1080/03610918.2023.2186874 +# +# For results on the LTS version see +# Kan et al. 2013, J Applied Statistics, 40, 644-655 +# However, suggest using this function when testing hypotheses in conjunction with regci or regciMC +# +x=as.matrix(x) +xy=elimna(cbind(x,y)) +x=as.matrix(x) +n=length(y) +p=ncol(x) +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +x=as.matrix(x) +if(xout){ +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else flag<-outfun(x,plotit=plotit)$keep +x<-x[flag,] +x<-as.matrix(x) +y<-y[flag] +} +if(STAND){ +x=standm(x) +y=y-mean(y) +} +if(is.null(k)){ +if(!MSF)k=ridge.est.k(x,y,regfun=Regfun,...) +else{ +ires=Regfun(x,y)$residuals +sigh=sqrt(sum(ires^2)/(n-p-1)) +k=p^(1+1/p)*sigh +} +} +n=nrow(x) +init=Regfun(x,y,...)$coef +y=as.matrix(y) +if(!INT){ +kbeta=diag(k,nrow=p,ncol=p) +slopes=as.matrix(init[2:p1]) +xtx=t(x)%*%x +beta=solve(xtx+kbeta)%*%xtx +beta=beta%*%slopes +res=y-x%*%beta +b0=locfun(res) +beta=as.vector(beta) +beta=c(b0,beta) +} +if(INT){ +kbeta=diag(k,nrow=p1,ncol=p1) +slopes=as.matrix(init) +x1=cbind(rep(1,n),x) +xtx=t(x1)%*%x1 +beta=solve(xtx+kbeta)%*%xtx +beta=beta%*%slopes +} +res<-y-x%*%beta[2:p1]-beta[1] +list(coef=as.vector(beta),k=k,residuals=res) +} + +rob.ridge.test<-function(x,y,regfun=tsreg,xout=FALSE,outfun=outpro,MC=FALSE,method='hoch', +nboot=599,alpha = 0.05,MSF=TRUE,SEED=TRUE,...){ +# +# +# Test the global hypothesis that all slopes are equal to zero, +# If it rejects, it also suggests which slope is significant, but it cannot reject more than one slope. +# +# A robust ridge estimator is used base on the robust estimator indicated by the argument +# regfun +# +# MC=TRUE: takes advantage of a multicore processor. +# +p=ncol(x) +if(p==1)stop('Should have two or more independent variables') +p1=p+1 +if(!MC)a=regci(x,y,regfun=rob.ridge,Regfun=regfun,xout=xout, +alpha=alpha,SEED=SEED,MSF=MSF)$regci +if(MC)a=regciMC(x,y,regfun=rob.ridge,Regfun=regfun,xout=xout, +alpha=alpha,SEED=SEED,MSF=MSF)$regci +pv=a[2:p1,5] +padj=min(p.adjust(pv,method=method)) +id=NULL +if(padj<=alpha)id=which(pv==min(pv)) +list(p.value=padj,a.sig.slope=id) +} + +ridge.test<-function(x,y,k=NULL,alpha=.05,pr=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,method='hoch', +locfun=mean,scat=var,MSF=TRUE,...){ +# +# +# Using a ridge estimator +# test the hypothesis of a zero slope for each of p independent variables. +# If the smallest p-value is less than or equal to alpha, reject the corresponding hypothesis +# as well as the hypothesis that all slopes are zero. +# But no other slopes can be declared significant due to the bias associated with the +# ridge estimator. +# +# The method uses an analog of the heteroscedastic method +# recommended by Long and Ervin (2000). +# p-values are adjusted to control the probability of one or more Type I errors;' +# Hochberg's method is used by default. +# +# Advantage: +# Power tends to be at least as high as OLS and potentially much higher +# But when the null hypothsis is false, confidence intervals can be highly +# inaccurate. +# +# +# STAND=TRUE: x is standardized and y is centered, based on measues of location +# and scatter indicatd by +# locfun and scat +# locfun=median would use the median and scat=madsq uses MAD. +# For n<=40, this helps control the Type I error probability. +# +x<-as.matrix(x) +if(nrow(x) != length(y))stop('Length of y does not match number of x values') +m<-cbind(x,y) +m<-elimna(m) +p=ncol(x) +if(p==1)stop('There should be at least two independent variables') +p1=p+1 +x=m[,1:p] +y=m[,p1] +n=nrow(x) +df=n-p1 +if(xout){ +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else + flag<-outfun(x,plotit=FALSE)$keep +x<-x[flag,] +x<-as.matrix(x) +y<-y[flag] +n=nrow(x) +df=n-p1 +} +if(STAND){ +x=standm(x,locfun=locfun,scat=scat) +y=y-locfun(y) +} +if(is.null(k)){ +if(!MSF)k=ridge.est.k(x,y) +else{ +ires=ols(x,y)$residuals +sigh=sqrt(sum(ires^2)/(n-p-1)) +k=p^(1+1/p)*sigh +} +} +x1=cbind(rep(1,n),x) +ols.est=ols(x,y)$coef +est=ols.ridge(x,y,k=k)$coef +x<-cbind(rep(1,nrow(x)),x[,1:ncol(x)]) +res<-y-x1%*%est +p=ncol(x) +kmat=matrix(0,p,p) +diag(kmat)=k +xtx<-solve(t(x)%*%x+kmat) +h<-diag(x%*%xtx%*%t(x)) +hc3<-xtx%*%t(x)%*%diag(as.vector(res^2/(1-h)^2))%*%x%*%xtx +df<-nrow(x)-ncol(x) +crit<-qt(1-alpha/2,df) +al<-ncol(x) +ci<-matrix(NA,nrow=p,ncol=6) +se=sqrt(diag(hc3)) +p=p-1 +for(j in 2:p1){ +ci[j,1]=se[j] +ci[j,2]=est[j]/sqrt(hc3[j,j]) +ci[j,3]=2*(1-pt(abs(ci[j,2]),df)) +ci[j,5]=est[j] +ci[j,6]=ols.est[j] +} +ci[,4]=p.adjust(ci[,3],method=method) +sig='No slope is signficant' +if(sum(ci[2:al,4]<=alpha)>0){ +id=which(ci[2:al,4]==min(ci[2:al,4])) +sig=paste('Slope',id, 'is signficant') +} +ci=ci[2:p1,] # Eliminate results related to the intercept, not relevant +vlabs=NA +for(j in 1:p)vlabs[j]=paste('Slope',j) +dimnames(ci)=list(vlabs,c('s.e.','test.stat','p-value','Adjusted p','Ridge.Est.','OLS.est')) +list(output=ci,Sig.slope=sig) +} + +lasso.est<-function(x,y,xout=FALSE,STAND=TRUE,outfun=outpro,regout=FALSE,lam=NULL,...){ +# +# Lasso regression via the R package glmnet. +# This function includes the option of eliminating leverage points +# This function is for convenience, it returns the estimates of the +# coefficients only. The R function cv.glmnet provides more complete details +# and includes other options. +# +# xout=TRUE eliminate leverage points with the function +# outfun +# regout=TRUE eliminate regression outliers with the function elo +# +library(glmnet) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +nrem=length(y) +if(regout){ +flag=elo(x,y,outfun=outfun,lev=TRUE,reg=xout)$keep +xy<-xy[flag,] +x<-xy[,1:p] +y<-xy[,p1] +xout=FALSE +} +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +xy<-xy[flag,] +x<-xy[,1:p] +y<-xy[,p1] +} +if(STAND)x=standm(x) +z=cv.glmnet(x, y, family='gaussian',lam=lam) +e=coef(z,s=z$lambda.min) +e=as.vector(e) +list(coef=e,lambda.min.used=z$lambda.min) +} + +elo<-function(x,y,lev=TRUE,reg=TRUE,outfun=outpro,plotit=FALSE,SEED=TRUE){ +# +# +# lev=TRUE, remove points flagged as leverage points +# reg=TRUE, remove points flagged as regression outliers +# So lev=TRUE and reg=TRUE removes both +# +# For regression outliers, the Rousseeuw and van Zomeren (1990) method is used. +# (See section 10.15.1 in Wilcox, 2017, Intro to Robust Estimation and +# Hypothsis Testing) +# +# outfun: the function used to check for leverage points. +# + +a=reglev(x,y,plotit=plotit,SEED=SEED) +o=outfun(x,plotit=plotit) +L=NULL +B=NULL +if(lev){ +if(length(o$out.id)>0)L=o$out.id +} +if(reg){ +if(length(a$regout)>0)B=a$regout +} +e=unique(c(L,B)) +n=length(y) +id=c(1:n) +keep=id[-e] +list(keep=keep,reg.out.id=L,leverage.id=B) +} + + +scorall<-function(x,outfun=outpro,corfun=pcor,RAN=FALSE,...){ +# +# Eliminate outliers and compute a correlation based on the +# remaining data. +# +x=elimna(x) +if(!RAN)flag=outpro(x)$keep +if(RAN)flag=outpro.depth(x)$keep +est=corfun(x[flag,],...)$cor +est +} + + + +splotg5<-function(x1,x2=NULL,x3=NULL,x4=NULL,x5= NULL,xlab="X",ylab="Rel. Freq."){ +# +# Frequency plot for up to five variables. +# +# +freqx2=NULL +freqx3=NULL +freqx4=NULL +freqx5=NULL +x1<-x1[!is.na(x1)] +x2<-x2[!is.na(x2)] +x3<-x3[!is.na(x3)] +x4<-x4[!is.na(x4)] +x5<-x5[!is.na(x5)] + +xall=c(x1,x2,x3,x4,x5) +xall=xall[!is.na(xall)] +temp=sort(unique(xall)) +XL=list(x1,x2,x3,x4,x5) +NN=0 +for(j in 1:5)if(!is.null(XL[[j]]))NN=NN+1 +freqx1<-NA +for(i in 1:length(temp)){ +freqx1[i]<-sum(x1==temp[i]) +} +freqx1<-freqx1/length(x1) +if(!is.null(x2)){ +freqx2<-NA +for(i in 1:length(temp)){ +freqx2[i]<-sum(x2==temp[i]) +} +freqx2<-freqx2/length(x2) +} +if(!is.null(x3)){ +freqx3<-NA +for(i in 1:length(temp)){ +freqx3[i]<-sum(x3==temp[i]) +} +freqx3<-freqx3/length(x3) +} +if(!is.null(x4)){ +x4<-x4[!is.na(x4)] +freqx4<-NA +for(i in 1:length(temp)){ +freqx4[i]<-sum(x4==temp[i]) +} +freqx4<-freqx4/length(x4) +} +if(!is.null(x5)){ +x5<-x5[!is.na(x5)] +freqx5<-NA +for(i in 1:length(temp)){ +freqx5[i]<-sum(x5==temp[i]) +} +freqx5<-freqx5/length(x5) +} +X=rep(temp,NN) +pts=c(freqx1,freqx2,freqx3,freqx4,freqx5) +plot(X,pts,type="n",xlab=xlab,ylab=ylab) +points(X,pts) +lines(temp,freqx1) +if(NN>=2)lines(temp,freqx2,lty=2) +if(NN>=3)lines(temp,freqx3,lty=3) +if(NN>=4)lines(temp,freqx4,lty=4) +if(NN>=5)lines(temp,freqx5,lty=5) +} + +freq5<-function(x1,x2=NULL,x3=NULL,x4=NULL,x5= NULL,xlab="X",ylab="Rel. Freq."){ +# +# Compute relative frequencies associated with the sample space for up to five variables. +# +# +temp2=NULL +temp3=NULL +temp4=NULL +temp5=NULL +freqx2=NULL +freqx3=NULL +freqx4=NULL +freqx5=NULL + +x1<-x1[!is.na(x1)] +temp1<-sort(unique(x1)) +freqx1<-NA +for(i in 1:length(temp1)){ +freqx1[i]<-sum(x1==temp1[i]) +} +freqx1<-freqx1/length(x1) +N=1 + +if(!is.null(x2)){ +N=2 +x2<-x2[!is.na(x2)] +temp2<-sort(unique(x2)) +freqx2<-NA +for(i in 1:length(temp2)){ +freqx2[i]<-sum(x2==temp2[i]) +} +freqx2<-freqx2/length(x2) +} + +if(!is.null(x3)){ +N=3 +x3<-x3[!is.na(x3)] +temp3<-sort(unique(x3)) +freqx3<-NA +for(i in 1:length(temp3)){ +freqx3[i]<-sum(x3==temp3[i]) +} +freqx3<-freqx3/length(x3) +} + +if(!is.null(x4)){ +N=4 +x4<-x4[!is.na(x4)] +temp4<-sort(unique(x4)) +freqx4<-NA +for(i in 1:length(temp4)){ +freqx4[i]<-sum(x4==temp4[i]) +} +freqx4<-freqx4/length(x4) +} + +if(!is.null(x5)){ +N=5 +x5<-x5[!is.na(x5)] +temp5<-sort(unique(x5)) +freqx5<-NA +for(i in 1:length(temp5)){ +freqx5[i]<-sum(x5==temp5[i]) +} +freqx5<-freqx5/length(x5) +} + +v=list() +v[[1]]=cbind(temp1,freqx1) +v[[2]]=cbind(temp2,freqx2) +v[[3]]=cbind(temp3,freqx3) +v[[4]]=cbind(temp4,freqx4) +v[[5]]=cbind(temp5,freqx5) +for(j in 1:N)dimnames(v[[j]])=list(NULL,c('Value','Rel. Freq')) +v +} + +splotg2=splotg5 +#s2plot=splotg5 #Used in earlier versions. + +trim2gmul<-function(x,y, tr = 0.2, alpha = 0.05){ +# +# For two independent p-variate distributions, apply yuen to each column of data +# FWE controlled with Hochberg's method +# +# x and y are matrices having p columns. (Can have list mode as well.) +# +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +if(!is.matrix(y))y<-matl(y) +if(!is.matrix(y))stop('Data must be stored in a matrix or in list mode.') + +J<-ncol(x) +if(J!=ncol(y))stop('x and y should have the same number of columns') + +xbar<-vector('numeric',J) +ncon<-J +dvec<-alpha/c(1:ncon) +psihat<-matrix(0,J,4) +dimnames(psihat)<-list(NULL,c('Variable','difference','ci.lower','ci.upper')) +test<-matrix(0,J,5) +dimnames(test)<-list(NULL,c('Variable','test','p.value','p.crit','se')) +temp1<-NA +nval=NULL +for (d in 1:J){ +psihat[d,1]<-d +#dval=na.omit(x[,d]) +#nval[d]=length(dval) +temp=yuen(x[,d],y[,d],tr=tr) +test[d,1]<-d +test[d,2]<-temp$teststat +test[d,3]=temp$p.value +test[d,5]<-temp$se +psihat[d,2]<-temp$dif +psihat[d,3]<-temp$ci[1] +psihat[d,4]<-temp$ci[2] +} +temp1=test[,3] +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +test[temp2,4]<-zvec +num.sig=sum(test[,3]<=test[,4]) +list(n=c(nrow(x),nrow(y)),test=test,psihat=psihat,num.sig=num.sig) +} + +loc2gmulpb<-function(x,y,est=tmean,nboot=2000,alpha = 0.05,SEED=TRUE,...){ +# +# For two independent p-variate distributions, apply yuen to each column of data +# FWE controlled with Hochberg's method +# +# x and y are matrices having p columns. (Can have list mode as well.) +# +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +if(!is.matrix(y))y<-matl(y) +if(!is.matrix(y))stop('Data must be stored in a matrix or in list mode.') + +J<-ncol(x) +if(J!=ncol(y))stop('x and y should have the same number of columns') + +xbar<-vector('numeric',J) +ncon<-J +dvec<-alpha/c(1:ncon) +psihat<-matrix(0,J,4) +dimnames(psihat)<-list(NULL,c('Variable','difference','ci.lower','ci.upper')) +test<-matrix(0,J,4) +dimnames(test)<-list(NULL,c('Variable','p.value','p.crit','se')) +temp1<-NA +nval=NULL +for (d in 1:J){ +psihat[d,1]<-d +#temp=yuen(x[,d],y[,d],tr=tr) +temp=pb2gen(x[,d],y[,d],est=est,SEED=SEED,...) +test[d,1]<-d +#test[d,2]<-temp$teststat +test[d,2]=temp$p.value +test[d,4]<-sqrt(temp$sq.se) +psihat[d,2]<-temp$est.dif +psihat[d,3]<-temp$ci[1] +psihat[d,4]<-temp$ci[2] +} +temp1=test[,2] +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +test[temp2,3]<-zvec +num.sig=sum(test[,2]<=test[,3]) +list(n=c(nrow(x),nrow(y)),test=test,psihat=psihat,num.sig=num.sig) +} + + +trimmulCI<-function(x, tr = 0.2, alpha = 0.05,null.value=0,nboot=2000,SEED=TRUE,MC=TRUE){ +# +# For J dependent random variables, apply trimci to each. +# Confidence intervals are designed to have simultaneous probability coverage 1-alpha +# Useful when the number of variables is large say >20 and n is very small <=20 +# +# x is a matrix having J columns. (Can have list mode as well.) +# +# Output: +# num.sig = number of significant results. +# +# +if(SEED)set.seed(2) +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J<-ncol(x) +xbar<-vector('numeric',J) +ncon<-J +psihat<-matrix(0,J,4) +dimnames(psihat)<-list(NULL,c('Variable','estimate','ci.lower','ci.upper')) +test<-matrix(0,J,4) +dimnames(test)<-list(NULL,c('Variable','test','p.value','se')) +# Determine critical p-value +p.crit=trimmul.crit(x,tr=tr,nboot=nboot,alpha=alpha,SEED=SEED,MC=MC) +# +for (d in 1:J){ +psihat[d,1]<-d +dval=na.omit(x[,d]) +nval[d]=length(dval) +temp=trimci(dval,tr=tr,pr=FALSE,null.value=null.value,alpha=p.crit) +test[d,1]<-d +test[d,2]<-temp$test.stat +test[d,3]=temp$p.value +test[d,4]<-temp$se +psihat[d,2]<-temp$estimate +psihat[d,3]<-temp$ci[1] +psihat[d,4]<-temp$ci[2] +} +CI.sig=sum(psihat[,3]>null.value)+sum(psihat[,4]nmax){ +nmin=min(c(nmin,100)) +} +B=list() +M=matrix(NA,nrow=nmin,ncol=J) +for(i in 1:nreps){ +for(j in 1:J)M[,j]=sample(x[[j]],nmin) +B[[i]]=M +} +L=lapply(B,linWMWMC.sub,con=con) +ef.size=NA +for(j in 1:length(L))ef.size[j]=linES.sub(L[[j]],locfun=locfun,...) +ef=mean(ef.size) +} +if(POOL){ +y=list() +id1=which(con==1) +id2=which(con==-1) +v1=pool.a.list(x[id1]) +v2=pool.a.list(x[id2]) +if(length(v1)*length(v2)=nmax){ +B=list() +M=matrix(NA,nrow=nmin,ncol=J) +for(i in 1:nreps){ +for(j in 1:J)M[,j]=sample(x[[j]],nmin) +B[[i]]=M +} +L=lapply(B,linWMWMC.sub,con=con) +ef.size=NA +for(j in 1:length(L))ef.size[j]=linES.sub(L[[j]],locfun=locfun,...) +ef=mean(ef.size) +}} +list(Effect.Size=ef) +} + +linES.sub<-function(L,locfun,...){ +est=locfun(L,...) +if(est>=0)ef.size=mean(L-est<=est) +ef.size +} + + +rmlinES<-function(x, con = NULL){ +# +# Dependent groups: +# For each linear contrast, compute Algina et al. effect size based on the linear sum +# +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J=ncol(x) +if(is.null(con)){ +C=(J^2-J)/2 +con=matrix(0,ncol=C,nrow=J) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(hoch)dvec<-alpha/c(1:ncon) +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(sum(con^2)==0){ +flagcon<-TRUE +psihat<-matrix(0,CC,6) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper','Q.effect')) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c('Group','Group','test','p.value','p.crit','se')) +temp1<-0 +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +q1<-(nrow(x)-1)*winvar(x[,j],tr) +q2<-(nrow(x)-1)*winvar(x[,k],tr) +q3<-(nrow(x)-1)*wincor(x[,j],x[,k],tr)$cov +sejk<-sqrt((q1+q2-2*q3)/(h1*(h1-1))) +if(!dif){ +test[jcom,6]<-sejk +test[jcom,3]<-(xbar[j]-xbar[k])/sejk +temp1[jcom]<-2 * (1 - pt(abs(test[jcom,3]), df)) +test[jcom,4]<-temp1[jcom] +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +} +if(dif){ +dv<-x[,j]-x[,k] +test[jcom,6]<-trimse(dv,tr) +temp<-trimci(dv,alpha=alpha/CC,pr=FALSE,tr=tr) +test[jcom,3]<-temp$test.stat +temp1[jcom]<-temp$p.value +test[jcom,4]<-temp1[jcom] +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-mean(dv,tr=tr) +psihat[jcom,4]<-temp$ci[1] +psihat[jcom,5]<-temp$ci[2] +psihat[jcom,6]=depQS(x[,j],x[,k],locfun=locfun,...)$Q.effect +} +}}} +if(hoch)dvec<-alpha/c(1:ncon) +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2,4]>=zvec) +if(sum(sigvec)0){ +if(nrow(con)!=ncol(x))warning('The number of groups does not match the number + of contrast coefficients.') +ncon<-ncol(con) +psihat<-matrix(0,ncol(con),5) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper','Q.effect')) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c('con.num','test','p.value','p.crit','se')) +temp1<-NA +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,5]=lindQS(x,con[,d],locfun=locfun,...)$Q.effect +if(!dif){ +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-0 +for(j in 1:J){ +for(k in 1:J){ +djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1)) +sejk<-sejk+con[j,d]*con[k,d]*djk +}} +sejk<-sqrt(sejk) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +test[d,5]<-sejk +temp1[d]<-2 * (1 - pt(abs(test[d,2]), df)) +} +if(dif){ +for(j in 1:J){ +if(j==1)dval<-con[j,d]*x[,j] +if(j>1)dval<-dval+con[j,d]*x[,j] +} +temp1[d]<-trimci(dval,tr=tr,pr=FALSE)$p.value +test[d,1]<-d +test[d,2]<-trimci(dval,tr=tr,pr=FALSE)$test.stat +test[d,5]<-trimse(dval,tr=tr) +psihat[d,2]<-mean(dval,tr=tr) +}} +test[,3]<-temp1 +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2,3]>=zvec) +if(sum(sigvec) 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(sum(con^2)==0){ +flagcon<-TRUE +psihat<-matrix(0,CC,7) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper','p.value','adj.p.value')) +temp1<-0 +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +dv<-x[,j]-x[,k] +temp=sintv2(dv,pr=FALSE) +temp1[jcom]<-temp$p.value +psihat[jcom,1]<-j +psihat[jcom,2]<-k +psihat[jcom,3]<-median(dv) +psihat[jcom,4]<-temp$ci.low +psihat[jcom,5]<-temp$ci.up +psihat[jcom,6]<-temp$p.value +}}} +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(psihat[temp2,6]>=zvec) +dd=0 +if(sum(sigvec)0){ +if(nrow(con)!=ncol(x))warning('The number of groups does not match the number + of contrast coefficients.') +ncon<-ncol(con) +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper','p.value','adj.p.value')) +temp1<-NA +for (d in 1:ncol(con)){ +psihat[d,1]<-d +for(j in 1:J){ +if(j==1)dval<-con[j,d]*x[,j] +if(j>1)dval<-dval+con[j,d]*x[,j] +} +temp=sintv2(dval,pr=FALSE) +temp1[d]=temp$p.value +psihat[d,5]=temp$p.value +psihat[d,2]<-median(dval) +psihat[d,3]<-temp$ci.low +psihat[d,4]<-temp$ci.up +} +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(psihat[temp2,5]>=zvec) +psihat[temp2,6]<-zvec +dd=0 +if(sum(sigvec)n/2)stop('n.id should be less than n/2') +if(n.id<=0)stop('n.id should be greater than zero') +d=ics.distances(v) +dr=rank(d) +idout=n+1-n.id +id=which(dr>=idout) +j=c(1:n) +} +list(n=n,out.id=id,keep=j[-id]) +} + +ridge.Gtest<-function(x,y,k=NULL,regfun=tsreg,xout=FALSE,outfun=outpro,STAND=FALSE,PV=FALSE,iter=5000, +locfun=mean,scat=var,MC=FALSE,MSF=TRUE,...){ +# +# Test the hypothesis that all slope parameters are zero. +# using a robust analog of a ridge estimator. +# +# +# PV=TRUE: computes a p-value at the expense of higher execution time. +# Otherwise, simply test at the 0.05 level using an approximate critical value. +# +# +# STAND=TRUE: x is standardized and y is centered, based on measures of location +# and scatter indicated by +# locfun and scat +# locfun=median would use the median and scat=madsq uses MAD. +# +x<-as.matrix(x) +if(nrow(x) != length(y))stop("Length of y does not match number of x values") +m<-cbind(x,y) +m<-elimna(m) +p=ncol(x) +if(p==1)stop('There should be at least two independent variables') +p1=p+1 +x=m[,1:p] +y=m[,p1] +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +x<-x[flag,] +x<-as.matrix(x) +y<-y[flag] +} +n=nrow(x) +if(STAND){ +if(!PV)print('Suggest using PV=TRUE when STAND=TRUE') +x=standm(x,locfun=locfun,scat=scat) +y=y-locfun(y) +} +if(is.null(k)){ +if(!MSF)k=ridge.est.k(x,y) +else{ +ires=ols(x,y)$residuals +sigh=sqrt(sum(ires^2)/(n-p-1)) +k=p^(1+1/p)*sigh +}} +est=rob.ridge(x,y,k=k,Regfun=regfun,MSF=MSF)$coef +est=as.matrix(est) +x<-cbind(rep(1,nrow(x)),x[,1:ncol(x)]) +res<-y-x%*%est +p=ncol(x) +kmat=matrix(0,p,p) +diag(kmat)=k +xtx<-solve(t(x)%*%x+kmat) +h<-diag(x%*%xtx%*%t(x)) +hc3<-xtx%*%t(x)%*%diag(as.vector(res^2/(1-h)^2))%*%x%*%xtx +slopes=as.matrix(est[2:p1]) +Ssq=hc3[2:p1,2:p1] +f.test=t(slopes)%*%solve(Ssq)%*%slopes +f.test=(n-p)*f.test/((n-1)*p) +crit.val=NULL +if(!PV){ +if(n<20)crit.val=2.56 +if(n>500)crit.val=2.24 +if(is.null(crit.val)){ +nx=c(20,30,40, +50,75,100, +200,500) +ny=c(2.56,2.51,2.40, +2.41,2.34,2.30, + 2.29,2.24) +options(warn=-1) +if(n<=500)crit.f=lplot.pred(1/nx,ny,1/n)$yhat +options(warn=0) +}} +f.test=as.vector(f.test) +pv=NULL +if(PV){ +crit.f=NULL +if(!MC)v=ridgeGnull(n,p,regfun=regfun,iter=iter) +if(MC)v=ridgeGnullMC(n,p,regfun=regfun,iter=iter) +pv=mean(v>=f.test) +} +list(n=n,Ridge.est=est,F.test=f.test,critical.05.value=crit.f,p.value=pv) +} + + +ridgeGnullMC<-function(n,p,regfun=MMreg,iter=5000,SEED=TRUE){ +# +# Determine null distribution of ridge.Gtest +# +if(SEED)set.seed(45) +fv=NA +p1=p+1 +a=list() +library(parallel) +for(i in 1:iter)a[[i]]=rmul(n,p1) +fv=mclapply(a,ridgeGnullMC.sub,p=p,regfun=regfun) +fv=matl(fv) +fv=as.vector(fv) +fv +} +ridgeGnullMC.sub<-function(x,p,regfun=regfun){ +p1=p+1 +v=ridgeG.sub(x[,1:p],x[,p1],regfun=regfun)$F.test +v +} + + +dlinplot<-function(x,con,xlab='DV',ylab='',sym.test=FALSE){ +# +# For dependent variables, +# determine distribution of Y_i=sum_j c_jX_j +# and then plot the distribution +# +# The function also tests the hypothesis that Y has a median of zero. +# sym.test=TRUE: test the hypothesis that Y is symmetric. +# +# A quantile shift measure of effect size is returned as well. +# +if(is.matrix(con)){ +if(ncol(con>1))print('Warning: Argument con should be a vector. Only the first contrast coefficients are used.') +} +con=as.vector(con) +if(sum(con)!=0)stop('Contrast coefficients must sum to zero') +if(is.data.frame(x))x=as.matrix(x) +if(is.list(x))x=matl(x) +x=elimna(x) +n=nrow(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +if(length(con)!=ncol(x))stop('Length of con should equal number of groups') +x=elimna(x) +L=NA +linv=NA +for(i in 1:n){ +L[i]=sum(con*x[i,]) +} +akerd(L,xlab=xlab,ylab=ylab) +mt=sintv2(L) +sym=NULL +Q=depQS(L) +if(sym.test)sym=Dqdif(L) +list(median=mt$median,n=mt$n,ci.low=mt$ci.low,ci.up=mt$ci.up, +p.value=mt$p.value,Q.effect=Q$Q.effect,sym.test=sym) +} + + dlin.sign<-function(x,con){ + # For dependent variables, +# determine distribution of Y_i=sum_j c_jX_{ij} +# and then do a sign test +con=as.vector(con) +if(sum(con)!=0)stop('Contrast coefficients must sum to zero') +if(is.data.frame(x))x=as.matrix(x) +if(is.list(x))x=matl(x) +x=elimna(x) +n=nrow(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +if(length(con)!=ncol(x))stop('Length of con should equal number of groups') +x=elimna(x) +L=NA +linv=NA +for(i in 1:n){ +L[i]=sum(con*x[i,]) +} +a=signt(dif=L) +list(Prob_a_value_is_less_than_zerro=a$Prob_x_less_than_y,ci=a$ci,n=a$n,N=a$N,p.value=a$p.value) + } + + +wwwmcppbtr<-function(J,K,L, x,tr=.2,alpha=.05,dif=TRUE,op=FALSE,grp=NA,nboot=2000,SEED=TRUE,pr=TRUE){ +# +# Based on a percentile bootstrap method. +# +# dif=TRUE: use a linear combination of the variables, test the hypothesis that the trimmed mean is zero +# dif=FALSE: Use the marginal trimmed means instead. +# +# MULTIPLE COMPARISONS FOR A 3-WAY within-by-within-by within ANOVA +# Do all multiple comparisons associated with +# main effects for Factor A and B and C and all interactions +# based on trimmed means +# + # The data are assumed to be stored in x in list mode or in a matrix. + # If grp is unspecified, it is assumed x[[1]] contains the data + # for the first level of both factors: level 1,1. + # x[[2]] is assumed to contain the data for level 1 of the + # first factor and level 2 of the second factor: level 1,2 + # x[[j+1]] is the data for level 2,1, etc. + # If the data are in wrong order, grp can be used to rearrange the + # groups. For example, for a two by two design, grp<-c(2,4,3,1) + # indicates that the second group corresponds to level 1,1; + # group 4 corresponds to level 1,2; group 3 is level 2,1; + # and group 1 is level 2,2. + # + # Missing values are automatically removed. + # +if(is.data.frame(x))x=as.matrix(x) + JKL <- J*K*L + if(is.matrix(x)) + x <- listm(x) + if(!is.na(grp[1])) { + yy <- x + x<-list() + for(j in 1:length(grp)) + x[[j]] <- yy[[grp[j]]] + } + if(!is.list(x)) + stop("Data must be stored in list mode or a matrix.") + for(j in 1:JKL) { + xx <- x[[j]] + x[[j]] <- xx[!is.na(xx)] # Remove missing values + } + # + + if(JKL != length(x)) + warning("The number of groups does not match the number of contrast coefficients.") +for(j in 1:JKL){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +x[[j]]<-temp +} + # Create the three contrast matrices +temp<-con3way(J,K,L) +conA<-temp$conA +conB<-temp$conB +conC<-temp$conC +conAB<-temp$conAB +conAC<-temp$conAC +conBC<-temp$conBC +conABC<-temp$conABC +Factor.A<-rmmcppb(x,con=conA,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=pr) +Factor.B<-rmmcppb(x,con=conB,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) +Factor.C<-rmmcppb(x,con=conC,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) +Factor.AB<-rmmcppb(x,con=conAB,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) +Factor.AC<-rmmcppb(x,con=conAC,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) +Factor.BC<-rmmcppb(x,con=conBC,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) +Factor.ABC<-rmmcppb(x,con=conABC,est=tmean,tr=tr,alpha=alpha,dif=dif,nboot=nboot,SEED=SEED,pr=FALSE) +list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C, +Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC, +Factor.ABC=Factor.ABC,conA=conA,conB=conB,conC=conC, +conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC) +} + + + + +lsa.linear<-function(x,y){ + require(lars) + + ## Least square approximation. This version Oct 19, 2006 + ## Reference Wang, H. and Leng, C. (2006) and Efron et al. (2004). + ## + ## Written by Chenlei Leng + ## + ## Input + ## obj: lm/glm/coxph or other object + ## + ## Output + ## beta.ols: the MLE estimate + ## beta.bic: the LSA-BIC estimate + ## beta.aic: the LSA-AIC estimate + + lsa <- function(obj) + { + intercept <- attr(obj$terms,'intercept') + if(class(obj)[1]=='coxph') intercept <- 0 + n <- length(obj$residuals) + Sigma <- vcov(obj) + SI <- solve(Sigma) + + beta.ols <- coef(obj) + + l.fit <- lars.lsa(SI, beta.ols, intercept, n) + + t1 <- sort(l.fit$BIC, ind=T) + + t2 <- sort(l.fit$AIC, ind=T) + + beta <- l.fit$beta + + if(intercept) { + beta0 <- l.fit$beta0+beta.ols[1] + beta.bic <- c(beta0[t1$ix[1]],beta[t1$ix[1],]) + beta.aic <- c(beta0[t2$ix[1]],beta[t2$ix[1],]) + } + + else { + beta0 <- l.fit$beta0 + beta.bic <- beta[t1$ix[1],] + beta.aic <- beta[t2$ix[1],] + } + + + + obj <- list(beta.ols=beta.ols, beta.bic=beta.bic, + beta.aic = beta.aic) + obj + } + + ################################### + ## lars variant for LSA + lars.lsa <- function (Sigma0, b0, intercept, n, + type = c("lasso", "lar"), + eps = .Machine$double.eps,max.steps) + { + type <- match.arg(type) + TYPE <- switch(type, lasso = "LASSO", lar = "LAR") + + n1 <- dim(Sigma0)[1] + + ## handle intercept + if (intercept) { + a11 <- Sigma0[1,1] + a12 <- Sigma0[2:n1,1] + a22 <- Sigma0[2:n1,2:n1] + Sigma <- a22-outer(a12,a12)/a11 + b <- b0[2:n1] + beta0 <- crossprod(a12,b)/a11 + } + + else { + Sigma <- Sigma0 + b <- b0 + } + + Sigma <- diag(abs(b))%*%Sigma%*%diag(abs(b)) + b <- sign(b) + + nm <- dim(Sigma) + m <- nm[2] + im <- inactive <- seq(m) + + Cvec <- drop(t(b)%*%Sigma) + ssy <- sum(Cvec*b) + if (missing(max.steps)) + max.steps <- 8 * m + beta <- matrix(0, max.steps + 1, m) + Gamrat <- NULL + arc.length <- NULL + R2 <- 1 + RSS <- ssy + first.in <- integer(m) + active <- NULL + actions <- as.list(seq(max.steps)) + drops <- FALSE + Sign <- NULL + R <- NULL + k <- 0 + ignores <- NULL + + while ((k < max.steps) & (length(active) < m)) { + action <- NULL + k <- k + 1 + C <- Cvec[inactive] + Cmax <- max(abs(C)) + if (!any(drops)) { + new <- abs(C) >= Cmax - eps + C <- C[!new] + new <- inactive[new] + for (inew in new) { + R <- updateR(Sigma[inew, inew], R, drop(Sigma[inew, active]), + Gram = TRUE,eps=eps) + if(attr(R, "rank") == length(active)) { + ##singularity; back out + nR <- seq(length(active)) + R <- R[nR, nR, drop = FALSE] + attr(R, "rank") <- length(active) + ignores <- c(ignores, inew) + action <- c(action, - inew) + } + else { + if(first.in[inew] == 0) + first.in[inew] <- k + active <- c(active, inew) + Sign <- c(Sign, sign(Cvec[inew])) + action <- c(action, inew) + } + } + } + else action <- -dropid + Gi1 <- backsolve(R, backsolvet(R, Sign)) + dropouts <- NULL + A <- 1/sqrt(sum(Gi1 * Sign)) + w <- A * Gi1 + if (length(active) >= m) { + gamhat <- Cmax/A + } + else { + a <- drop(w %*% Sigma[active, -c(active,ignores), drop = FALSE]) + gam <- c((Cmax - C)/(A - a), (Cmax + C)/(A + a)) + gamhat <- min(gam[gam > eps], Cmax/A) + } + if (type == "lasso") { + dropid <- NULL + b1 <- beta[k, active] + z1 <- -b1/w + zmin <- min(z1[z1 > eps], gamhat) + # cat('zmin ',zmin, ' gamhat ',gamhat,'\n') + if (zmin < gamhat) { + gamhat <- zmin + drops <- z1 == zmin + } + else drops <- FALSE + } + beta[k + 1, ] <- beta[k, ] + beta[k + 1, active] <- beta[k + 1, active] + gamhat * w + + Cvec <- Cvec - gamhat * Sigma[, active, drop = FALSE] %*% w + Gamrat <- c(Gamrat, gamhat/(Cmax/A)) + + arc.length <- c(arc.length, gamhat) + if (type == "lasso" && any(drops)) { + dropid <- seq(drops)[drops] + for (id in rev(dropid)) { + R <- downdateR(R,id) + } + dropid <- active[drops] + beta[k + 1, dropid] <- 0 + active <- active[!drops] + Sign <- Sign[!drops] + } + + actions[[k]] <- action + inactive <- im[-c(active)] + } + beta <- beta[seq(k + 1), ] + + dff <- b-t(beta) + + RSS <- diag(t(dff)%*%Sigma%*%dff) + + if(intercept) + beta <- t(abs(b0[2:n1])*t(beta)) + else + beta <- t(abs(b0)*t(beta)) + + if (intercept) { + beta0 <- as.vector(beta0)-drop(t(a12)%*%t(beta))/a11 + } + else { + beta0 <- rep(0,k+1) + } + dof <- apply(abs(beta)>eps,1,sum) + BIC <- RSS+log(n)*dof + AIC <- RSS+2*dof + object <- list(AIC = AIC, BIC = BIC, + beta = beta, beta0 = beta0) + object + } + + ##This part is written by Hansheng Wang. + vcov.rq <- function(object,...) + { + q=object$tau + x=as.matrix(object$x) + resid=object$residuals + f0=density(resid,n=1,from=0,to=0)$y + COV=q*(1-q)*solve(t(x)%*%x)/f0^2 + COV + } + + # adaptive lasso for linear reg, tuning parameter by bic + # calls software from Wang and Leng (2007, JASA). + ok<-complete.cases(x,y) + x<-x[ok,] # get rid of na's + y<-y[ok] # since regsubsets can't handle na's + m<-ncol(x) + n<-nrow(x) + as.matrix(x)->x + lm(y~x)->out + lsa(out)->out.lsa + coeff<-out.lsa$beta.bic + coeff2<-coeff[2:(m+1)] # get rid of intercept + pred<-x%*%coeff2+coeff[1] + st<-sum(coeff2 !=0) # number nonzero + mse<-sum((y-pred)^2)/(n-st-1) + if(st>0) x.ind<-as.vector(which(coeff2 !=0)) else x.ind<-0 + return(list(fit=pred,st=st,mse=mse,x.ind=x.ind,coeff=coeff2, + intercept=coeff[1])) +} + +LADlasso.Z <- function(x , y , STAND = TRUE, grid = seq(log(0.01),log(1400),length.out=100),xout=FALSE,outfun=outpro,...){ +# +# Zheng, Q., Gallagher, C., and Kulasekera, K. (2016). Robust adaptive lasso for variable +# selection. Communications in Statistics - Theory and Methods, 46(9):4642-4659. +# +# The code used here is a slight modification of code supplied by Qi Zheng +# + ####X = matrix, list, or dataframe of predictor values + ####Y = vector or single-dimension matrix/list/dataframe of outcome values + ####grid = grid of lambda(tuning parameter) values to check + + library(lars) + library(quantreg) + + X=x + Y=y +p=ncol(X) +p1=p+1 +xy=elimna(cbind(X,Y)) +X=xy[,1:p] +Y=xy[,p1] + if(STAND)X=standm(X) + if(xout){ +X<-as.matrix(X) +flag<-outfun(X,plotit=FALSE,...)$keep +X<-X[flag,] +Y<-Y[flag] +X<-as.matrix(X) +n.keep=nrow(X) +} + object1=lsa.linear(X,Y); + adlasso=object1$coef; + n=length(Y); + grid=exp(grid); + rqob=rq(Y~0+X); + BIC=rep(0,100); + weights=1/abs(rqob$coef); + for ( i in 1:100){ + rqfit=rq.fit.lasso(X,Y,lambda=grid[i]*weights); + betalad_tmp=rqfit$coef; + betalad_tmp=betalad_tmp*(betalad_tmp>1e-8); + mse=mean(abs(rqfit$resi)); + mdsize=length(which(betalad_tmp!=0)); + BIC[i]=log(mse)+mdsize*log(n)/n; + } + step=which.min(BIC); + betalad=rq.fit.lasso(X,Y,lambda=grid[step]*weights)$coef; + ladlasso=betalad*(betalad>1e-8) + colnames(ladlasso) <- names(X) + # add an intercept for convenience + alpha<-median(Y-X%*%ladlasso) + coef<-c(alpha,ladlasso) +res<-Y-X%*%ladlasso-alpha + list(coef=coef,residuals=res) +} + +RA.lasso=LADlasso.Z + +H.lasso<- function(x,y,lambda.lasso.try=NULL,k=1.5, STAND=TRUE, xout=FALSE,outfun=outpro,...){ +# +# A slight modification of code supplied by Jung et al. (2016) +# +# +X=x +Y=y + +library(glmnet) +X=as.matrix(X) +p1<-ncol(X)+1 +p<-ncol(X) +xy<-cbind(X,Y) +xy<-elimna(xy) +X<-xy[,1:p] +Y<-xy[,p1] +if(STAND)X=standm(X) +if(is.null(lambda.lasso.try))lambda.lasso.try=seq(0.01,0.6,length.out=100) +library(glmnet) +if(xout){ +X<-as.matrix(X) +flag<-outfun(X,plotit=FALSE,...)$keep +X<-X[flag,] +Y<-Y[flag] +X<-as.matrix(X) +n.keep=nrow(X) +} +n<-length(Y) +Y.orgn<- Y +model.for.cv<- cv.glmnet(X, Y, family='gaussian',lambda=lambda.lasso.try) +lambda.lasso.opt<- model.for.cv$lambda.min +model.est<- glmnet(X,Y,family='gaussian',lambda=lambda.lasso.opt) +fit.lasso<- predict(model.est,X,s=lambda.lasso.opt) +res.lasso<- Y-fit.lasso +sigma.init<- mad(Y-fit.lasso) +beta.pre<- c(model.est$a0,as.numeric(model.est$beta)) +Y.old<- Y +tol = 10 +n.iter <- 0 +while(tol>1e-4 & n.iter<100) +{ +Y.new<- fit.lasso + winsorized(res.lasso,a=k, sigma=sigma.init) +model.for.cv<- cv.glmnet(X,Y.new, family='gaussian',lambda=lambda.lasso.try) +model.est<- glmnet(X,Y.new,family='gaussian',lambda=model.for.cv$lambda.min) +fit.lasso<- predict(model.est,X,s=model.for.cv$lambda.min) +res.lasso<- Y.new-fit.lasso +beta.post <- c(model.est$a0,as.numeric(model.est$beta)) +tol<- sum((beta.pre-beta.post)^2) +n.iter<- n.iter+1 +beta.pre<- beta.post +} +sigma.est<- mean((Y.new-cbind(rep(1,n),X)%*%beta.post)^2) +Y.fit<- cbind(rep(1,n),X)%*%beta.post +Y.res<- Y.new - Y.fit +#object<- list(coefficient=beta.post,fit=Y.fit, iter = n.iter, sigma.est = sigma.est, +list(coef=beta.post,fit=Y.fit, iter = n.iter, sigma.est = sigma.est, +lambda.lasso.opt = model.est$lambda, residuals = Y.res) +} + +winsorized<- function(x,a=1.5,sigma=1) { +s<-sigma +newx<-x +indp<-x>(a*s) +newx[indp]<-(a*s) +indn<- x<(a*-s) +newx[indn]<- (-a*s) +newx +} + +OS.lasso<- function(x,y,lambda.lasso.try=NULL,lambda.gamma.try=NULL,xout=FALSE,outfun=outpro,details=FALSE,...){ +# +# Outlier Shifting lasso +# Jung, Y., Lee, S., and Hu, J. (2016). Robust regression for highly corrupted response +# by shifting outliers. Statistical Modelling, 16(1):1--23. +# +# +X=x +Y=y +if(is.null(lambda.lasso.try))lambda.lasso.try=seq(0.01,0.6,length.out=100) +if(is.null(lambda.gamma.try))lambda.gamma.try = seq(1,4,length.out=50) +library(glmnet) +X<-as.matrix(X) +p1<-ncol(X)+1 +p<-ncol(X) +xy<-cbind(X,Y) +xy<-elimna(xy) +X<-xy[,1:p] +Y<-xy[,p1] +if(xout){ +X<-as.matrix(X) +flag<-outfun(X,plotit=FALSE,...)$keep +X<-X[flag,] +Y<-Y[flag] +X<-as.matrix(X) +n.keep=nrow(X) +} +x=X +y=Y +n<-length(Y) +Y.orgn<- Y +model.for.cv<- cv.glmnet(X, Y, family="gaussian",lambda=lambda.lasso.try) +lambda.lasso.opt<- model.for.cv$lambda.min +model.est<- glmnet(X,Y,family="gaussian",lambda=lambda.lasso.opt) +fit.lasso<- predict(model.est,X,s=lambda.lasso.opt) +res.lasso<- Y - fit.lasso +sigma.est<- mad(Y-fit.lasso) +beta.est<- as.numeric(model.est$beta) +gamma.est<-rep(0,n) +n.fold<- 5 +n.cv <- n/n.fold +CV.error2<-CV.error<-rep(NA,length(lambda.gamma.try)) +Y.pred.cv<-matrix(NA,nrow=length(Y),ncol=length(lambda.gamma.try)) +Y.new<- Y +for (tt in 1:length(lambda.gamma.try)) +{ +gamma.est.cv<-rep(0,n-n.cv) +for (jj in 1:n.fold) +{ +sample.out.index<- (1+n.cv*(jj-1)):(n.cv*(jj)) +X.train<- X[-sample.out.index,] +Y.train<- Y[-sample.out.index] +X.test<- X[sample.out.index,] +model.train.temp<- glmnet(X.train,Y.train,family="gaussian", +lambda=lambda.lasso.opt) +beta.pre<-beta.post<- c(model.train.temp$a0, +as.numeric(model.train.temp$beta)) +tol<-100; n.iter <- 0 +while(tol>1e-6 & n.iter<100) +{resid.temp<- Y.train-cbind(rep(1,n-n.cv),X.train)%*%beta.pre +nonzero<-which(abs(resid.temp)>=sigma.est*lambda.gamma.try[tt]) +gamma.est.cv[nonzero]<- resid.temp[nonzero] +Y.train.new <- Y.train - gamma.est.cv +model.train.temp<-glmnet(X.train,Y.train.new, +family="gaussian",lambda=lambda.lasso.opt) +beta.post <- c(model.train.temp$a0, +as.numeric(model.train.temp$beta)) +tol<- sum((beta.pre-beta.post)^2) +n.iter<- n.iter+1 +beta.pre<-beta.post +} +Y.pred.cv[sample.out.index,tt] <-cbind(rep(1,n.cv), +X.test)%*%beta.post +} +CV.error[tt]<- mean((Y.pred.cv[,tt]-Y.orgn)^2) +CV.error2[tt]<- mean(abs(Y.pred.cv[,tt]-Y.orgn)) +} +lambda.gamma.opt<- lambda.gamma.try[which.min(CV.error)] +model.opt<- glmnet(X,Y.orgn,family="gaussian",lambda=lambda.lasso.opt) +beta.pre<- beta.post<- c(model.opt$a0,as.numeric(model.opt$beta)) +tol<-100; n.iter <- 0 +while(tol>1e-6 & n.iter<100) +{ +resid.opt<- Y.orgn-cbind(rep(1,n),X)%*%beta.pre +nonzero<-which(abs(resid.opt)>=sigma.est*lambda.gamma.opt) +gamma.est[nonzero]<- resid.opt[nonzero] +Y.new <- Y.orgn - gamma.est +model.opt<- glmnet(X,Y.new,family="gaussian",lambda=lambda.lasso.opt) +beta.post <- c(model.opt$a0,as.numeric(model.opt$beta)) +tol<- mean((beta.pre-beta.post)^2) +n.iter<- n.iter+1 +beta.pre<-beta.post} +Y.fit<- cbind(rep(1,n),X)%*%beta.post +res=y-Y.fit +if(details) +list(coef=beta.post,fit=fit.lasso,iter = n.iter, +sigma.est = sigma.est,CV.error=CV.error, +n.outlier=length(which(gamma.est!=0)), +gamma.est=gamma.est,lambda.opt=lambda.gamma.opt) +else list(coef=beta.post,residuals=res) +} + +plot3D<-function(x,y,xlab='X1',ylab='X2',zlab='Y',theta=50,phi=25, +duplicate='error',pc='*',ticktype='simple',expand=.5){ +# +# A 3D plot: supplied for convenience +# +# Example: plot a regression surface +# x and y generated from regression model with no error term. +# +x=as.matrix(x) +if(ncol(x)!=2)stop('x should have two columns only') +library(akima) +fitr<-interp(x[,1],x[,2],y,duplicate=duplicate) +persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand, +scale=scale,ticktype=ticktype) +} + +LAD.lasso<-function(x,y,lam=NULL,WARN=FALSE,xout=FALSE,outfun=outpro,STAND=TRUE){ +# +# LAD (weighted) lasso based on +# Wang et al. Journal of Business and Economic Statistics +# +x=as.matrix(x) +p=ncol(x) +p1=p+1 +xy=cbind(x,y) +xy=elimna(xy) +if(STAND)xy=standm(xy) +x=xy[,1:p] +y=xy[,p1] + +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +xy=cbind(x,y) +x=xy[flag,1:p] +y=xy[flag,p1] +} +n=nrow(x) +if(p==1)stop('Should have two or more independent variables') +library(quantreg) +if(!WARN)options(warn=-1) +temp<-rq(y~x) +init<-temp[1]$coefficients +if(is.null(lam))lam=log(n)/(n*abs(init[2:p1])) +yy=c(y,rep(0,p)) +M=diag(n*lam) +xx=rbind(x,M) +coef=rq(yy~xx)[1]$coefficients +if(!WARN)options(warn=0) +res=y-x%*%coef[2:p1]-coef[1] +rv=order(lam) +list(coef=coef,lambda=lam,slopes.rank=rv,residuals=res) +} + + +ESfun<-function(x,y,QSfun=median,method=c('EP','QS','QStr','AKP','WMW','KMS'),tr=.2,pr=TRUE,SEED=TRUE){ +type=match.arg(method) +switch(type, + EP=yuenv2(x,y,tr=tr,SEED=SEED)$Effect.Size, #Explanatory power + QS=shiftQS(x,y,locfun=QSfun)$Q.Effect, #Quantile shift based on the medians + QStr=yuenQS(x,y,tr=tr,pr=pr)$Q.Effect, #Based on trimmed means + AKP=akp.effect(x,y,tr=tr), #Robust analog of Cohen's d + WMW=pxly(x,y,SEED=SEED), # P(X 0)flag<-(flag==1) +idv<-c(1:n) +outid <- idv[flag] +keep<-idv[!flag] +n.out=length(outid) +list(n=n,n.out=n.out,out.id=outid,keep=keep) +} + + Rdepth<-function(x,y,z=NULL, ndir = NULL){ +# +# +# z: +# An m by p+1 matrix containing row wise the hyperplanes for which to compute +# the regression depth. The first column should contain the intercepts. +# If z is not specified, it is set equal to cbind(x,y). +# +# Required: mrfDepth + +# For convenience, the arguments correspond to conventions in WRS + +x=cbind(x,y) +library(mrfDepth) +a=rdepth(x,z=z,ndir=ndir) +a +} + + +multsm<-function(x,y,pts=x,fr=.5,xout=FALSE,outfun=outpro,plotit=TRUE,pr=TRUE, +xlab='X',ylab='Prob',ylab2='Y',zlab='Prob',ticktype='det',vplot=NULL,scale=TRUE, +L=TRUE,...){ +# +# +# A smoother for multinomial regression based on logSM +# +# Example: Assuming x is a vector, and possible values +# for y are 0,1 and 2. +# multsm(x,y,c(-1,0,1)) +# This would estimate +# P(Y=0|x=-1), P(Y=1|x=-1), P(Y=2|x=-1), P(Y=0|x=0), etc. +# +# Returns estimates of the probabilities associated with +# each possible value of y given a value for independent variable that is stored in pts +# +# vplot indicates the value of the dependent variable for which probabilities will be plotted. +# vplot=1 means that the first largest value will be used. +# By default, vplot=NULL meaning that all values of y will when there is a single independent variable. +# +# vplot=c(1,3) means that the first and third values will be used. If the first value is 5, say and the third is 8, +# plot P(y=5|pts) and P(Y=8|pts) +# For more than one independent variable, the first value in vplot is used only. If no value is specified, the smallest y value is used. +# +# scale =TRUE is the default and is relevant when plotting and there are two dependent variables. See the function lplot. +# +# +# L=TRUE: for p=2, use LOESS (lplot) to plot the regression surface; otherwise use a running interval smoother (rplot). +# +# VALUE: +# For each value in pts, returns the probabilities for each of the y values. +# +if(pr){ +if(!xout)print('Suggest also looking at result using xout=TRUE') +} +xy=cbind(x,y) +xy=elimna(xy) +p1=ncol(xy) +p=p1-1 +if(p==1)pts=sort(pts) +x=xy[,1:p] +y=xy[,p1] +x=as.matrix(x) +if(xout){ +flag=outfun(x,plotit=FALSE,...)$keep +x=x[flag,] +y=y[flag] +} +x=standm(x,est=median,scar=madsq) +rempts=pts +pts=standm(pts,est=median,scar=madsq) +n=length(y) +temp<-sort(unique(y)) +nv=length(temp) +nv1=nv+1 +if(p==1){ +x=as.matrix(x) +pts=sort(pts) +} +pts=as.matrix(pts) +res=matrix(NA,nrow=nrow(pts),ncol=nv) +lab=NULL +for(k in 1:nv){ +est=logSMpred(x,y==temp[k],pts=pts) +res[,k]=est +lab=c(lab,paste('Y value',temp[k])) +} +dimnames(res)=list(NULL,lab) +if(plotit){ +if(is.null(vplot))vplot=c(1:nv) +#vplot=vplot+1 # adjustment to match col of res +if(p==1){ +nlines=min(ncol(res),6) +nlines=nlines-1 +plot(c(rep(rempts,length(vplot))),c(as.vector(res[,vplot])),type='n',xlab=xlab,ylab=ylab,ylim=c(0,1)) +for(k in 1:length(vplot))lines(rempts,res[,vplot[k]],lty=k) +} +if(p>1){ +if(p==2){ +if(L)lplot(rempts,res[,vplot[1]],xlab=xlab,ylab=ylab2,zlab=zlab,ticktype=ticktype,scale=scale,pr=FALSE) +if(!L)rplot(rempts,res[,vplot[1]],xlab=xlab,ylab=ylab2,zlab=zlab,ticktype=ticktype,scale=scale,pr=FALSE) +} +}} +list(estimates=res,pts=pts) +} + + + +multireg.prob<-function(x,y,pts=x,xout=FALSE,outfun=outpro,plotit=TRUE,xlab='X',ylab='Prob',zlab='Prob',ticktype='det',vplot=NULL, +L=TRUE,scale=TRUE,...){ +# +# +# Returns estimate of P(Y=k|X=pts) +# for all possible values of k and all points stored in pts. +# using a multinomial logit model +# +# Requires R package nnet +# +# scale =TRUE is the default: +# if there is only p=1 independent variable, the y-axis of the plot of the regression line will range between 0 and 1. +# This can provide a useful perspective, particularly when there is no association. +# if scale=TRUE, the y-axis is limited to the range of estimated probabilities. +# +library(nnet) +xy=cbind(x,y) +xy=elimna(xy) +p1=ncol(xy) +p=p1-1 +x=xy[,1:p] +y=xy[,p1] +if(p==1){ +pts=sort(pts) +} +x=as.matrix(x) +if(xout){ +flag=outfun(x,plotit=FALSE,...)$keep +x=x[flag,] +y=y[flag] +} +pts=as.matrix(pts) +npts=nrow(pts) +est=summary(multinom(y~x))$coefficients +x=as.matrix(x) +nv=length(unique(y)) +nvm1=nv-1 +w=NA +pr=NA +if(is.null(dim(est)))est=matrix(est,nrow=1) +ans=matrix(NA,nrow=npts,ncol=nvm1) +for(k in 1:nrow(pts)){ +for(j in 1:nvm1){ +w[j]=exp(est[j,1]+sum(est[j,2:p1]*pts[k,])) +} +bot=1+sum(w) +ans[k,]=w/bot +} +v0=1-apply(ans,1,sum) +ptn=c(1:nrow(pts)) +res=cbind(ptn,v0,ans) +temp=sort(unique(y)) +dimnames(res)=list(NULL,c('pts.no',temp)) +if(plotit){ +if(is.null(vplot))vplot=max(y) +vplot=vplot+1 # adjustment to match col of res +if(p==1){ +nlines=min(ncol(res),6) +nlines=nlines-1 +if(scale)plot(c(pts[1:2],rep(pts,length(vplot))),c(0,1,as.vector(res[,vplot])),type='n',xlab=xlab,ylab=ylab) +if(!scale)plot(rep(pts,length(vplot)),as.vector(res[,vplot]),type='n',xlab=xlab,ylab=ylab) +for(k in 1:length(vplot))lines(pts,res[,vplot[k]],lty=k) +} +if(p>1){ +if(ylab=='Prob')ylab='Y' +if(p==2){ +if(L)lplot(pts,res[,vplot[1]],xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,scale=scale,pr=FALSE) +if(!L)rplot(pts,res[,vplot[1]],xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,scale=scale,pr=FALSE) +} +} +} +list(estimates=res,pts=pts) +} + + + + + + + +logIVcom<-function(x,y,IV1=1,IV2=2,nboot=500,xout=FALSE,outfun=outpro,SEED=TRUE, +val=NULL,...){ +# +# For binary dependent variables. Assumes the logistic regression model is true. +# +# compare strength of the association for two subsets of independent variables +# IV1 and IV2 indicate the two sets of independent variables to be compared +# Example: IV1=c(1,2), IV2=1 would compare the first two independent +# variables to the third. +# +# If y is not binary, it has K possible values +# val: determines the value that will be used. +# Example: sort(unique(y))= 2, 4, 8 +# val=2, the function focuses on P(Y=4|X), the second possible +# value in y +# Default is to use the largest value, 8 in the example. +# +options(warn=-1) +library(parallel) +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +if(max(c(IV1,IV2))>p)stop('IV1 or IV2 has a value that exceeds the number of col. in x') +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +nrem=length(y) +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +nkeep=length(y) +temp<-sort(unique(y)) +nv=length(temp) +if(is.null(val))idv=nv +y=y==temp[nv] +y=as.numeric(y) +est1=sd(logreg.pred(x[,IV1],y,x[,IV1])) +est2=sd(logreg.pred(x[,IV2],y,x[,IV2])) +nv=length(y) +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. + +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) + +bvec1<-mclapply(data,regIVbinv2_sub,x[,IV1],y,x[,IV1]) +bvec2<-mclapply(data,regIVbinv2_sub,x[,IV2],y,x[,IV2]) +bvec1=as.vector(matl(bvec1)) +bvec2=as.vector(matl(bvec2)) +# bvec1 and bvec2 are nboot standard deviations based on bootstrap predict prob(Y|X) +pv1=mean(bvec1=nmin]) +UP<-max(sub[vecn>=nmin]) +pts=seq(x1[LOW],x1[UP],length.out=npts) +} +ES=NA +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +ES[i]=ESfun(g1,g2,method=method,pr=FALSE) +} +res=cbind(pts,ES) +dimnames(res)=list(NULL,c('pts','ES')) +if(plotit)lplot(res[,1],res[,2],xlab=xlab,ylab='ES') +} +if(p>1){ +if(SEED)set.seed(2) # now cov.mve always returns same result +ES=NA +x1=as.matrix(x1) +p=ncol(x1) +p1=p+1 +m1=elimna(cbind(x1,y1)) +x1=m1[,1:p] +y1=m1[,p1] +x2=as.matrix(x2) +p=ncol(x2) +p1=p+1 +m2=elimna(cbind(x2,y2)) +x2=m2[,1:p] +y2=m2[,p1] +# +if(is.na(pts[1])){ +x1<-as.matrix(x1) +pts<-ancdes(x1,FRAC=FRAC,DH=TRUE) +} +pts<-as.matrix(pts) +n1<-1 +n2<-1 +vecn<-1 +mval1<-cov.mve(x1) +mval2<-cov.mve(x2) +for(i in 1:nrow(pts)){ +n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)]) +n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)]) +} +flag<-rep(TRUE,nrow(pts)) +for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F +flag=as.logical(flag) +pts<-pts[flag,] +if(sum(flag)==1)pts<-t(as.matrix(pts)) +if(sum(flag)==0)stop('No comparable design points found, might increase span.') +for (i in 1:nrow(pts)){ +g1<-y1[near3d(x1,pts[i,],fr1,mval1)] +g2<-y2[near3d(x2,pts[i,],fr2,mval2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +ES[i]=ESfun(g1,g2,method=method,tr=tr,pr=FALSE) +} +if(p==2){ +if(plotit) lplot(pts,ES,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +} +res=cbind(pts,ES) +vlabs=NA +for(j in 1:ncol(pts))vlabs[j]=paste('X',j) +dimnames(res)=list(NULL,c(vlabs,'ES')) +} +res +} + + +sband<-function(x,y,plotit=TRUE,CI=TRUE,alpha=.05,crit=NULL, +sm=TRUE,op=1,xlab='First Group',ylab='Est. 2 - Est. 1'){ +# +# Compute a confidence band for the shift function. +# Assuming two independent groups are being compared +# +# The default critical value is the approximate .05 critical value. +# +# If flag=TRUE, the exact simultaneous probability coverage isomputed +# based on the critical value indicated the the argument +# crit. The default value yields, approximately, a .95 confidence band. +# +# If plotit=TRUE, a plot of the shift function is created, assuming that +# the graphics window has already been activated. +# +# This function removes all missing observations. +# +# When plotting, the median of x is marked with a + and the two +# quaratiles are marked with o. +# +# sm=TRUE, shift function is smoothed using: +# op!=1, running interval smoother, +# otherwise use lowess. +# +# Note: which group is the reference group matters. +# sband(x,y) often gives different results than sband(y,x). +# +x<-x[!is.na(x)] # Remove missing values from x. +y<-y[!is.na(y)] # Remove missing values from y. +n1=length(x) +n2=length(y) +if(is.null(crit))crit=ks.crit(n1=n1,n2=n2,alpha=alpha) +plotit<-as.logical(plotit) +pc<-NA +pc<-1-kssig(length(x),length(y),crit) +chk=sum(duplicated(x,y)) +if(chk>0){ +crit=ksties.crit(x,y,alpha) +pc=1-kstiesig(x,y,crit) +} +xsort<-sort(x) +ysort<-c(NA,sort(y)) +l<-0 +u<-0 +ysort[length(y)+1+1]<-NA +for(ivec in 1:length(x)) +{ +isub<-max(0,ceiling(length(y)*(ivec/length(x)-crit))) +l[ivec]<-ysort[isub+1]-xsort[ivec] +isub<-min(length(y)+1,floor(length(y)*(ivec/length(x)+crit))+1) +u[ivec]<-ysort[isub+1]-xsort[ivec] +} +id.sig.greater=NULL +id.sig.less.than=NULL +num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)]) +id.sig.greater=which(l>0) +id.sig.less.than=which(u<0) +qhat<-c(1:length(x))/length(x) +m<-matrix(c(qhat,l,u),length(x),3) +dimnames(m)<-list(NULL,c('qhat','lower','upper')) +if(plotit){ +xsort<-sort(x) +ysort<-sort(y) +del<-0 +for (i in 1:length(x)){ +ival<-round(length(y)*i/length(x)) +if(ival<=0)ival<-1 +if(ival>length(y))ival<-length(y) +del[i]<-ysort[ival]-xsort[i] +} +xaxis<-c(xsort,xsort) +yaxis<-c(m[,1],m[,2]) +allx<-c(xsort,xsort,xsort) +ally<-c(del,m[,2],m[,3]) +temp2<-m[,2] +temp2<-temp2[!is.na(temp2)] +plot(allx,ally,type='n',ylab=ylab,xlab=xlab) +ik<-rep(F,length(xsort)) +if(sm){ +if(op==1){ +ik<-duplicated(xsort) +del<-lowess(xsort,del)$y +} +if(op!=1)del<-runmean(xsort,del,pyhat=TRUE) +} +lines(xsort[!ik],del[!ik]) +lines(xsort,m[,2],lty=2) +lines(xsort,m[,3],lty=2) +temp<-summary(x) +text(temp[3],min(temp2),"+") +text(temp[2],min(temp2),"o") +text(temp[5],min(temp2),"o") +} +flag=is.na(m[,2]) +m[flag,2]=-Inf +flag=is.na(m[,3]) +m[flag,3]=Inf +q.greater=NULL +if(length(id.sig.greater)>0)q.greater=m[id.sig.greater,1] +q.less=NULL +if(length(id.sig.less.than)>0)q.less=m[id.sig.less.than,1] +if(!CI)m=NULL +list(m=m,crit=crit,numsig=num,q.sig.greater=q.greater,q.sig.less=q.less,prob.coverage=pc) +} + + +ancESband<-function(x1=NULL,y1=NULL,x2=NULL,y2=NULL,fr1=1,fr2=1,method='WMW', +pr=TRUE,FAST=TRUE,alpha=.05,plotit=TRUE,xlab='X',ylab='ES',npts=25, +xout=FALSE,outfun=out,nboot=500,SEED=TRUE, +nmin=12,SCAT=TRUE,pc='.',...){ +# +# Compute a measure of effect size for +# two independent groups when there is a single covariate. +# +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Confidence intervals are computed so that the simultaneous probability +# coverage is approximately .95 when npts=25 covariate points are used. +# +# Three methods can be used: +# +# 'AKP': trimmed-Winsorized analog of Cohen's d +# 'QS': quantile shift +# 'WMW': P(Y11)stop('One covariate only is allowed with this function') +if(method=='EP')stop('Using method EP not recommended at this time') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +xy1=elimna(cbind(x1,y1)) +xy2=elimna=cbind(x2,y2) +x1=xy1[,1] +y1=xy1[,2] +x2=xy2[,1] +y2=xy2[,2] +n1.in=nrow(xy1) +n2.in=nrow(xy2) + +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +xor1=order(x1) +xor2=order(x2) +x1=x1[xor1] +x2=x2[xor2] +y1=y1[xor1] +y2=y2[xor2] +n1n=length(y1) +n2n=length(y2) +nv=c(30, 50, 60, 70, 80, 100, +150, 200, 300, 400, 500, 600, 800) +pv=c(0.00824497,0.00581, 0.005435089, 0.004763079, +0.00416832, 0.004406774, 0.00388228,0.003812836,0.003812836,0.003453055, 0.003625061, +.003372966, 0.003350022) +p.crit=(lplot.pred(1/nv,pv,1/n1n)$yhat+lplot.pred(1/nv,pv,1/n2n)$yhat)/2 +if(alpha!=.05){ +p.crit=p.crit*alpha/.05 # A crude adjustment +} +if(npts<=15)p.crit=alpha/npts +qmin=nboot*p.crit +bmin=ceiling(1/p.crit) +if(qmin<1){ +stop(paste('nboot must be at least ',bmin)) +} +EST=ancES(x1,y1,x2,y2,plotit=FALSE,npts=npts,method=method) +pts=EST[,1] +MAT=matrix(NA,nrow=nboot,ncol=length(pts)) +for(i in 1:nboot){ +id1=sample(n1n,n1n,replace=TRUE) +id2=sample(n2n,n2n,replace=TRUE) +B=ancES(x1[id1],y1[id1],x2[id2],y2[id2],plotit=FALSE,method=method,pts=pts,npts=npts,SEED=FALSE) +MAT[i,]=B[,2] +} + +flag1=MAT<.5 +flag2=MAT==.5 +pv1=apply(flag1,2,mean,na.rm=TRUE) +pv2=apply(flag2,2,mean,na.rm=TRUE) +pv=pv1+.5*pv2 +one.m.pv=1-pv +pv=2*apply(rbind(pv,one.m.pv),2,min) +ci.low=NA +ci.up=NA +qlow=p.crit/2 +qhi=1-p.crit/2 +ci.low=NA +ci.up=NA +for(i in 1:length(pts)){ +if(!is.na(pts[i]))ci.low[i]=qest(MAT[,i],qlow) +if(!is.na(pts[i]))ci.up[i]=qest(MAT[,i],qhi) +} +pvm=matrix(NA,nrow=length(pts),ncol=5) +pvm[,1]=pts +pvm[,2]=EST[,2] +pvm[,3]=pv +pvm[,4]=ci.low +pvm[,5]=ci.up +num.sig=sum(pv=nmin]) +UP<-max(sub[vecn>=nmin]) +pts=seq(x1[LOW],x1[UP],length.out=npts) +} +ES=NA +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +ES[i]=ESfun(g1,g2,method=method,pr=FALSE,tr=tr) +} +res=cbind(pts,ES) +dimnames(res)=list(NULL,c('pts','ES')) +if(plotit){ +vp=0 +if(method=='AKP' || method=='QS')vp=-1 +plot(c(res[1,1],res[1,1],res[,1]),c(1,vp,res[,2]),xlab=xlab,ylab='ES',type='n') +v=lplot(res[,1],res[,2],xlab=xlab,ylab='ES',plotit=FALSE,pyhat=TRUE)$yhat.values +points(res[,1],res[,2],pch=pch) +lines(res[,1],v) +}} +if(p>1){ +if(SEED)set.seed(2) # now cov.mve always returns same result +ES=NA +x1=as.matrix(x1) +p=ncol(x1) +p1=p+1 +m1=elimna(cbind(x1,y1)) +x1=m1[,1:p] +y1=m1[,p1] +x2=as.matrix(x2) +p=ncol(x2) +p1=p+1 +m2=elimna(cbind(x2,y2)) +x2=m2[,1:p] +y2=m2[,p1] +# +if(is.na(pts[1])){ +x1<-as.matrix(x1) +pts<-ancdes(x1,FRAC=FRAC,DH=TRUE) +} +pts<-as.matrix(pts) +n1<-1 +n2<-1 +vecn<-1 +mval1<-cov.mve(x1) +mval2<-cov.mve(x2) +for(i in 1:nrow(pts)){ +n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)]) +n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)]) +} +flag<-rep(TRUE,nrow(pts)) +for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F +flag=as.logical(flag) +pts<-pts[flag,] +if(sum(flag)==1)pts<-t(as.matrix(pts)) +if(sum(flag)==0)stop('No comparable design points found, might increase span.') +for (i in 1:nrow(pts)){ +g1<-y1[near3d(x1,pts[i,],fr1,mval1)] +g2<-y2[near3d(x2,pts[i,],fr2,mval2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +ES[i]=ESfun(g1,g2,method=method,tr=tr,pr=FALSE) +} +if(p==2){ +if(plotit) lplot(pts,ES,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype) +} +res=cbind(pts,ES) +vlabs=NA +for(j in 1:ncol(pts))vlabs[j]=paste('X',j) +dimnames(res)=list(NULL,c(vlabs,'ES')) +} +res +} + +MULNC<-function(x1,x2,alpha=.05,SEED=TRUE,nboot=500,nullval=.5,PV=FALSE,pr=TRUE){ +# +# bivariate analog of Cliff's method, which is an analog of the +# Wilcoxon--Mann--Whitney test +# +# +# Let PG= event that x1 'dominates' x2. +# That is, for the ith and jth randomly sampled points +# x1[i,1]>x2[j,1] and x1[i,2]>x2[j,2] +# PL= event that x2 is 'dominates' x1. +# +# Function returns: +# +# phat.GT: the estimated probability of event PG +# phat.LT: the estimated probability of event PL +# d.ci: confidence interval for Pr(PL)-Pr(PG), the difference between the probabilities of these two events. +# phat, the estimated probability that the event PL is more likely than PG +# phat.ci: confidence interval for the estimand corresponding to phat +# p.value: testing Pr(PL)=Pr(PG) +# +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +m=matrix(0,nrow=n1,ncol=n2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 should be matrices with two columns') +if(ncol(x1)!=2)stop('x1 and x2 should be matrices with two columns') +for(i in 1:n1){ +m[i,]=(x1[i,1]>x2[,1])*(x1[i,2]>x2[,2]) +id=x1[i,1]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2] Y_1 and X_2 < Y_2 or if +# X_1 < Y_1 and X_2 > Y_2. +# +# Let PG= event that simulatensouly X_1 > Y_1 and X_2 < Y_2 +# PL= event thatsimulatensouly X_1 < Y_1 and X_2 > Y_2 +# +# Function returns: +# +# phat1: the estimated probability of event PG +# phat2: the estimated probability of event PL + +# phat, the estimated probability that the event PL is more likely than PG +# ci.p = A confidence interval for the estimand corresponding to phat +# +if(SEED)set.seed(2) +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +d=NA +ci.mat=matrix(NA,nrow=nboot,ncol=2) +d.mat=matrix(NA,nrow=nboot,ncol=2) +n=min(c(n1,n2)) +for(i in 1:nboot){ +id1=sample(n,n,replace=TRUE) +id2=sample(n,n,replace=TRUE) +v=MULNC.sub(x1[id1,],x2[id2,],alpha=alpha) +ci.mat[i,]=v$ci +d.mat[i,]=v$d.ci +} +v=MULNC.sub(x1,x2) +ci=mean(ci.mat[,1]) +ci[2]=mean(ci.mat[,2]) +dci=mean(d.mat[,1]) +dci[2]=mean(d.mat[,2]) +list(n1=n1,n2=n2,phat.GT=v$phat.GT,phat.LT=v$phat.LT,d.ci=dci,phat=v$phat,ci.p=ci) +} + + +MULNC.int<-function(J,K,x,x1=NULL,x2=NULL,x3=NULL,x4=NULL,alpha=.05,plotit=TRUE,SEED=TRUE,pr=TRUE){ +# +# Rank-based multiple comparisons for all interactions when dealing with bivariate data. +# in J by K design. The method is based on an +# extension of the Patel-Hoel definition of no interaction. +# +# The familywise type I error probability is controlled by using +# a critical value from the Studentized maximum modulus distribution. +# +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# x is assumed to have list mode, x[[1]]... x[[JK]] contain bivariate data stored in an n-by-2 matrix +# +# Consider a 2-by-2 design. +# Let PG= event that x1 is 'dominates' x2. +# That is, for the ith and jth randomly sampled points +# x1[i,1]>x2[j,1] and x1[i,2]>x2[j,2] +# PL= event that x2 is 'dominates' x1. +# Let P1= Pr(PL)-Pr(PG), the difference between the probabilities of these two events. +# Define P2 in an analogous fashion for x3 and x4 +# no interaction is taken to mean P1=P2. + +# +if(!is.null(x1)){ +x=list(x1,x2,x3,x4) +J=2 +K=2 +} +if(!is.list(x))stop('Data for each group must be stored in list mode.') +p=J*K +grp=c(1:p) +if(p>4){ +if(pr)print('Confidence intervals are adjusted so that the simultaneous probability coverage is approximately 1-alpha') +} +CCJ<-(J^2-J)/2 +CCK<-(K^2-K)/2 +CC<-CCJ*CCK +test<-matrix(NA,CC,8) +test.p<-matrix(NA,CC,7) +nv=NA +for(j in 1:p){ +if(ncol(x[[j]])!=2)stop('One or more groups do not contain bivariate data') +x[[j]]=elimna(x[[j]]) +nv[j]=length(x[[j]]) +} +if(var(nv)!=0)stop('Unequal sample sizes detected, use MULNCpb.int instead') +mat<-matrix(grp,ncol=K,byrow=TRUE) +dimnames(test)<-list(NULL,c('Factor A','Factor A','Factor B','Factor B','delta','ci.lower','ci.upper','p.value')) +jcom<-0 +crit<-smmcrit(200,CC) +if(alpha!=.05)crit<-smmcrit01(200,CC) +alpha<-1-pnorm(crit) +for (j in 1:J){ +for (jj in 1:J){ +if (j < jj){ +for (k in 1:K){ +for (kk in 1:K){ +if (k < kk){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-jj +test[jcom,3]<-k +test[jcom,4]<-kk +temp1<-MULNC.sub(x[[mat[j,k]]],x[[mat[j,kk]]]) +temp2<-MULNC.sub(x[[mat[jj,k]]],x[[mat[jj,kk]]]) +delta=temp2$d-temp1$d +sqse<-temp1$sq.se+temp2$sq.se +test[jcom,5]<-delta/2 +test[jcom,6]<-delta/2-crit*sqrt(sqse/4) +test[jcom,7]<-delta/2+crit*sqrt(sqse/4) +test[jcom,8]=2*(1-pnorm(abs((delta/2)/sqrt(sqse/4)))) +}}}}}} +list(test=test) +} +MULNC.sub<-function(x1,x2,alpha=.05){ +# +# bivariate analog of Cliff's method, which is an analog of the +# Wilcoxon--Mann--Whitney test +# +# +# Let PG= event that x1 is 'greater than' x2. That is, for the ith and jth randomly sampled points +# x1[i,1]>x2[j,1] and x1[i,2]>x2[j,2] +# PL= event that x1 is 'less than' x2. +# +# Function returns: +# +# phat.GT: the estimated probability of event PG +# phat.LT: the estimated probability of event PL + +# phat, the estimated probability that the event PL is more likely than PG +# +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +m=matrix(0,nrow=n1,ncol=n2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 should be matrices with two columns') +if(ncol(x1)!=2)stop('x1 and x2 should be matrices with two columns') +for(i in 1:n1){ +m[i,]=(x1[i,1]>x2[,1])*(x1[i,2]>x2[,2]) +id=x1[i,1]x2[j,1] and x1[i,2]>x2[j,2] +# PL= event that x1 is 'less than' x2. +# +# Function returns: +# +# phat.GT: the estimated probability of event PG +# phat.LT: the estimated probability of event PL + +# phat, the estimated probability that the event PL is more likely than PG +# +if(SEED)set.seed(2) +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +d=NA +for(i in 1:nboot){ +id1=sample(n1,n1,replace=TRUE) +id2=sample(n2,n2,replace=TRUE) +d[i]=MULNC.sub(x1[id1,],x2[id2,])$phat +} +low=round(alpha*nboot/2) +up=nboot-low +low=low+1 +ds=sort(d) +ci=ds[low] +ci[2]=ds[up] +pv=mean(ds<.5)+.5*mean(ds==.5) +pv=2*min(c(pv,1-pv)) +phat=MULNC.sub(x1,x2)$phat +list(n1=n1,n2=n2,phat=phat,ci.p=ci,p.value=pv) +} + +MULNCpb.int<-function(J,K,x,x1=NULL,x2=NULL,x3=NULL,x4=NULL,alpha=.05,plotit=TRUE,nboot=500,SEED=TRUE,method='hoch'){ +# +# Rank-based multiple comparisons for all interactions when dealing with bivariate data +# in a J-by-K design. The method is based on an +# extension of the Patel-Hoel definition of no interaction. +# +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# x is assumed to have list mode, x[[1]]... x[[JK]] contain bivariate data stored in an n-by-2 matrix +# +# For a 2-by-2 design, data can be stored in the arguments +# x1, x2, x3, x4 +# where each of these arguments is an n-by-2 matrix. +# +# Consider a 2-by-2 design. +# Let PG= event that x1 is 'dominates' x2. +# That is, for the ith and jth randomly sampled points +# x1[i,1]>x2[j,1] and x1[i,2]>x2[j,2] +# PL= event that x2 is 'dominates' x1. +# Let P1= Pr(PL)-Pr(PG), the difference between the probabilities of these two events. +# Define P2 in an analogous fashion for x3 and x4 +# no interaction is taken to mean P1=P2. + +# +if(!is.null(x1)){ +x=list(x1,x2,x3,x4) +J=2 +K=2 +} +if(!is.list(x))stop('Data for each group must be stored in list mode.') +if(SEED)set.seed(2) +p=J*K +grp=c(1:p) +CCJ<-(J^2-J)/2 +CCK<-(K^2-K)/2 +CC<-CCJ*CCK +test<-matrix(NA,CC,9) +test.p<-matrix(NA,CC,7) +for(j in 1:p){ +if(ncol(x[[j]])!=2)stop('One or more groups do not contain bivariate data') +x[[j]]=elimna(x[[j]]) +} +mat<-matrix(grp,ncol=K,byrow=TRUE) +dimnames(test)<-list(NULL,c('Factor A','Factor A','Factor B','Factor B','phat','ci.lower','ci.upper','p.value','adjusted p-value')) +jcom<-0 +low=round(alpha*nboot/2)+1 +up=nboot-low +for (j in 1:J){ +for (jj in 1:J){ +if (j < jj){ +for (k in 1:K){ +for (kk in 1:K){ +if (k < kk){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-jj +test[jcom,3]<-k +test[jcom,4]<-kk +d=NA +for(b in 1:nboot){ +n1=nrow(x[[mat[j,k]]]) +n2=nrow(x[[mat[j,kk]]]) +n3=nrow(x[[mat[jj,k]]]) +n4=nrow(x[[mat[jj,kk]]]) +X1=x[[mat[j,k]]] +X2=x[[mat[j,kk]]] +X3=x[[mat[jj,k]]] +X4=x[[mat[jj,kk]]] +id1=sample(n1,n1,replace=TRUE) +id2=sample(n2,n2,replace=TRUE) +id3=sample(n3,n3,replace=TRUE) +id4=sample(n4,n4,replace=TRUE) +d[b]=MULNC.sub(X1[id1,],X2[id2,])$phat-MULNC.sub(X3[id3,],X4[id4,])$phat +} +d=sort(d) +ci=d[low+1] +ci[2]=d[up] +pv=mean(d<0) +pv=2*min(c(pv,1-pv)) +test[jcom,5]=MULNC.sub(X1,X2)$phat-MULNC.sub(X3,X4)$phat +test[jcom,6]<-ci[1] +test[jcom,7]<-ci[2] +test[jcom,8]<-pv +}}}}}} +test[,9]=p.adjust(test[,8],method=method) +list(test=test) +} + +perm.rho<-function(x,y,alpha=.05,nboot=1000,SEED=TRUE){ +# +# Do a permutation test based on Pearson's correlation +# Diciccio--Romano version of a permuation test (JASA, 2017, 112, 1211-1220) +# + +# The default number of permutations is nboot=1000 +# +if(SEED)set.seed(2) +xx<-cbind(x,y) +xx=elimna(xx) +x=xx[,1] +y=xx[,2] +n=length(x) +tval<-perm.rho.sub(x,y) +vec<-c(1:length(xx)) +v1<-length(x)+1 +difb<-NA +tv<-NA +for(i in 1:nboot){ +id=sample(n,n) +tv[i]<-perm.rho.sub(x,y[id]) +} +tv<-sort(tv) +icl<-floor((alpha/2)*nboot+.5) +icu<-floor((1-alpha/2)*nboot+.5) +reject<-0 +if(tval>=tv[icu] || tval <=tv[icl])reject<-1 +list(teststat=tval,lower.crit=tv[icl],upper.crit=tv[icu],reject=reject) +} + +perm.rho.sub<-function(x,y){ +rho=cor(x,y) +n=length(x) +xbar=mean(x) +ybar=mean(y) +m22=sum((x-xbar)^2*(y-ybar)^2)/n +m20=sum((x-xbar)^2)/n +m02=sum((y-ybar)^2)/n +tau=sqrt(m22/(m20*m02)) +S=sqrt(n)*rho/tau +S +} + +binomCP<-function(x = sum(y), nn = length(y), y = NULL, n = NA, alpha = 0.05){ +# +# Clopper-Pearson +# +# y is a vector of 1s and 0s. +# x is the number of successes observed among n trials +# +if(!is.na(n))nn=n +q=binom.test(x,nn,conf.level=1-alpha)[4] +ci=q$conf.int[1:2] +list(phat=x/nn,ci=ci,n=nn) +} + +kmsbinomci<-function(x = sum(y), nn = length(y), y = NULL, n = NA, alpha = 0.05){ +# +# Boinomial +# Confidence interval for the probability of success. +# Kulinskaya, E., Morgenthaler, S. & Staudte, R. (2008). +# Meta Analysis: A guide to calibrating and combining statistical evidence p. 140 +# +if(!is.null(y[1])){ +y=elimna(y) +nn=length(y) +} +if(nn==1)stop('Something is wrong: number of observations is only 1') +n<-nn +cr=qnorm(1-alpha/2) +ntil=n+.75 +ptil=(x+3/8)/ntil +crit=qnorm(1-alpha/2) +if(x!=n && x!=0){ +term1=sin(asin(sqrt(ptil))-crit/(2*sqrt(n))) +term2=sin(asin(sqrt(ptil))+crit/(2*sqrt(n))) +lower=term1^2 +upper=term2^2 +} +if(x==0){ #Use Clopper-Pearson +lower<-0 +upper<-1-alpha^(1/n) +} +if(x==1){ +upper<-1-(alpha/2)^(1/n) +lower<-1-(1-alpha/2)^(1/n) +} +if(x==n-1){ +lower<-(alpha/2)^(1/n) +upper<-(1-alpha/2)^(1/n) +} +if(x==n){ +lower<-alpha^(1/n) +upper<-1 +} +phat=x/n +list(phat=phat,se=sqrt(ptil*(1-ptil)/ntil),ci=c(lower,upper),n=n) +} + + + +binom.conf<-function(x = sum(y), nn = length(y),AUTO=TRUE,pr=TRUE, +method=c('AC','P','CP','KMS','WIL','SD'), y = NULL, n = NA, alpha = 0.05){ +# +# +# P: Pratt's method +# AC: Agresti--Coull +# CP: Clopper--Pearson +# KMS: Kulinskaya et al. 2008, p. 140 +# WIL: Wilson type CI. Included for completeness; was used in simulations relevant to binom2g +# SD: Schilling--Doi +# +if(pr) print('Note: To perform the sign test, use the the R function signt') +if(nn<35){ +if(AUTO)method='SD' +} +type=match.arg(method) +switch(type, + P=binomci(x=x,nn=nn,y=y,n=n,alpha=alpha), + AC=acbinomci(x=x,nn=nn,y=y,n=n,alpha=alpha), + CP=binomCP(x=x,nn=nn,y=y,n=n,alpha=alpha), + KMS=kmsbinomci(x=x,nn=nn,y=y,n=n,alpha=alpha), + WIL=wilbinomci(x=x,y=y,n=nn,alpha=alpha), + SD=binomLCO(x=x,nn=nn,y=y,alpha=alpha), + ) +} + +binom.conf.pv<-function(x = sum(y), nn = length(y),y=NULL,method='AC',AUTO=TRUE, pr=FALSE, PVSD=TRUE,alpha=.05,nullval=.5,NOTE=TRUE){ +# +# p-value for the methods available in binom.conf +# AC: Agresti--Coull +# P: Pratt's method +# CP: Clopper--Pearson +# KMS: Kulinskaya et al. 2008, p. 140 +# WIL: Wilson type CI. Included for completeness; was used in simulations relevant to binom2g +# SD: Schilling--Doi +# +# AUTO=TRUE, use SD if n<35 +# PVSD=FALSE: no p-value when using SD to avoid possibly high execution time +# use p-value based on what method indicates. Default is AC, Agresti--Coull +# +if(pr) print('Note: To perform the sign test, use the the R function signt') +if(!PVSD & nn<35)AUTO=FALSE +if(AUTO){ +if(nn<35)method='SD' +} +ci<-binom.conf(x,nn,alpha=alpha,method=method,AUTO=FALSE,pr=FALSE) +pv=NULL +if(method=='SD'){ +if(!PVSD){ +if(NOTE)print('To get a p-value when method=SD, set PVSD=TRUE, but execution time might be high') +}} +if(method!='SD' || PVSD){ +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-binom.conf(x,nn,alpha=alph[i],method=method,AUTO=FALSE,pr=FALSE)$ci +if(chkit[[1]]>nullval || chkit[[2]]nullval || chkit[[2]]2){ +mat=cbind(con[,k]*est[,2],con[,k]*est[,3]) +LM=apply(mat,1,min) +UM=apply(mat,1,max) +term1=sum(con[,k]*est[,1]) +EST[k]=term1 +term2=sqrt(sum((con[,k]*est[,1]-LM)^2)) +term3=sqrt(sum((con[,k]*est[,1]-UM)^2)) +L[k]=term1-term2 +U[k]=term1+term3 +PV[k]=lincon.binPV(r,n,con=con[,k],nullval=null.value,binCI=acbinomci)$p.value +} +} +adj=p.adjust(PV,method='hoch') +CI=cbind(EST,L,U,PV,adj) +dimnames(CI)=list(NULL,c('Est','ci.low','ci.hi','p-value','Adjusted p.value')) +list(p.hat=est[,1],CI=CI,con=con) +} + + +lincon.bin.sub<-function(r,n,con=NULL,alpha=.05,null.value=0,x=NULL,binCI=acbinomci){ +# +# r: number of successes for J independent groups +# n: corresponding sample sizes +# +# Compute confidence interval for a linear combination of independent binomials +# using: +# A note on confidence interval estimation for a linear function +# of binomial proportion. +# Zou, G. Y., Huang, W. & Zheng, X (2009) CSDA, 53, 1080-1085 +# +# con: contrast coeffiients +# if NULL, all pairwise comparisons are performed. +# +# x: if not NULL, taken to be a matrix containing 0s and 1s, columns correspond to groups +# r and n are computed using the data in x +# +# binCI defaults to Agresti--Coull +# Other choices for binCI: +# binomci: Pratt's method +# binomCP: Clopper--Pearson +# kmsbinomci: Kulinskaya et al +# wilbinomci: Wilson +# binomLCO: Schilling--Doi +# +if(!is.null(x)){ +r=apply(x,2,sum) +n=rep(nrow(x),ncol(x)) +} +J=length(r) +est=matrix(NA,nrow=J,ncol=3) +for(j in 1:J){ +v=binCI(r[j],n[j],alpha=alpha) +est[j,]=c(v$phat,v$ci) +} +if(!is.null(con))con=as.matrix(con) +if(is.null(con))con=con.all.pairs(J) +NT=ncol(con) +L=NA +U=NA +EST=NA +for(k in 1:NT){ +mat=cbind(con[,k]*est[,2],con[,k]*est[,3]) +LM=apply(mat,1,min) +UM=apply(mat,1,max) +term1=sum(con[,k]*est[,1]) +EST[k]=term1 +term2=sqrt(sum((con[,k]*est[,1]-LM)^2)) +term3=sqrt(sum((con[,k]*est[,1]-UM)^2)) +L[k]=term1-term2 +U[k]=term1+term3 +} +CI=cbind(EST,L,U) +dimnames(CI)=list(NULL,c('Est','ci.low','ci.hi')) +list(p.hat=est[,1],CI=CI,con=con) +} + + +lincon.binPV<-function(r,n,con=NULL,alpha=.05,nullval=0,binCI=acbinomci){ +# +# Compare two binomials using the method in Zou et al.2009 CSDA. +# +# x and y are vectors of 1s and 0s. +# Or can use the argument +# r1 = the number of successes observed among group 1 +# r2 = the number of successes observed among group 2 +# n1 = sample size for group 1 +# n2 = sample size for group 2 +# +# nullval is the hypothesized value of the linear contrast +# +# binCI defaults to Agresti--Coull +# Other choices for binCI: +# binomci: Pratt's method +# binomCP: Clopper--Pearson +# kmsbinomci: Kulinskaya et al +# wilbinomci: Wilson +# binomLCO: Schilling--Doi +# +ci=lincon.bin.sub(r=r,n=n,alpha=alpha,con=con,binCI=binCI) +p.value=1 +p1.hat=r1/n1 +p2.hat=r2/n2 +alph<-c(1:99)/100 +for(i in 1:99){ +irem<-i +chkit<-lincon.bin.sub(r=r,n=n,alpha=alph[i],con=con,binCI=binCI)$CI[2:3] +if(chkit[1]>nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]nullval || chkit[2]=5)res=binom.conf(B,n,method=method,alpha=alpha,pr=FALSE) +res +} + + +smbinRC<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,method='ACSK',nboot=1000,est=tmean,alpha=.05,FWE.method='hoch', +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# y is assumed to be binary +# +# Split data based on the covariates indicated by +# IV +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# +# Then compare the probability of a success corresponding to the resulting groups +# +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +# Next convert y to 0 and 1s +n=length(y) +yy=rep(0,n) +flag=which(y==max(y)) +yy[flag]=1 +y=yy +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +if(length(unique(y))>2)stop('y should be binary') +z=list() +group=list() +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +for(i in 1:N.int)L1[i]=paste('IV1.G',i) +for(i in 1:N.int2)L2[i]=paste('IV2.G',i) +dimnames(est.mat)=list(L1,L2) +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2,...) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) +} +NT=N.int*N.int2 +MID=matrix(c(1:NT),nrow=N.int,ncol=N.int2,byrow=TRUE) +# pull out groups indicated by the columns of MID and do tests +IV1res=NULL +a=NULL +r=NA +n=NA +for(j in 1:N.int2){ +zsub=z[MID[,j]] +r=lapply(zsub,sum) +n=lapply(zsub,length) +r=as.vector(matl(r)) +n=as.vector(matl(n)) +a=binpair(r,n,method=method,alpha=alpha) +IV1res=rbind(IV1res,a[,3:11]) +} +# update adjusted p-value +IV1res[,9]=p.adjust(IV1res[,8],method=FWE.method) +#Now do IV2 +IV2res=NULL +r=NA +n=NA +a=NULL +for(j in 1:N.int){ +zsub=z[MID[j,]] +r=lapply(zsub,sum) +n=lapply(zsub,length) +r=as.vector(matl(r)) +n=as.vector(matl(n)) +a=binpair(r,n,method=method,alpha=alpha) +IV2res=rbind(IV2res,a[,3:11]) +IV2res[,9]=p.adjust(IV2res[,8],method=FWE.method) +} +list(Independent.variables.summary=group,Res.4.IV1=IV1res,Res.4.IV2=IV2res) +} + +smgridRC<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,alpha=.05,PB=FALSE,est=tmean,nboot=1000,pr=TRUE,method='hoch', +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Compare measures of location among grids defined by quantiles of two IVs. By default 20% trimming is used +# est=median would use medians +# est=hd would use the Harrell-Davis estimator for the median. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# Then compare binomials# +# +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +# tr: amount of trimming +# +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE)$keep +x<-x[flag,] +y<-y[flag] +} +if(identical(est,median))PB=TRUE +if(identical(est,hd))PB=TRUE +z=list() +group=list() +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +n.mat=matrix(NA,nrow=N.int,ncol=N.int2) +DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +for(i in 1:N.int)L1[i]=paste('IV1.G',i) +for(i in 1:N.int2)L2[i]=paste('IV2.G',i) +dimnames(est.mat)=list(L1,L2) +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2[,p1],...) +n.mat[j,k]=length(xsub2[,p1]) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) +} +NT=N.int*N.int2 +MID=matrix(c(1:NT),nrow=N.int,ncol=N.int2,byrow=TRUE) +# pull out groups indicated by the columns of MID and do tests +IV1res=NULL +a=NULL +for(j in 1:N.int2){ +zsub=z[MID[,j]] +DV.mat[,j]=matl(lapply(zsub,est,...)) +if(!PB)a=lincon(zsub,tr=tr,pr=FALSE,alpha=alpha)$psihat[,3:8] + +if(PB){ +if(identical(est,tmean))a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,SEED=SEED,tr=tr)$output[,c(3:9)] +else +a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,method=method,SEED=SEED,...)$output[,c(3:9)] +} +IV1res=rbind(IV1res,a) +} +#Now do IV2 +IV2res=NULL +a=NULL +for(j in 1:N.int){ +zsub=z[MID[j,]] +if(!PB){ +a=lincon(zsub,tr=tr,pr=FALSE,alpha=alpha)$psihat[,3:8] +} +if(PB){ +print(zsub) +if(identical(est,tmean))a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,SEED=SEED,tr=tr)$output[,c(3:9)] +else +a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,SEED=SEED,...)$output[,c(3:9)] +} +IV2res=rbind(IV2res,a) +} +if(!PB){ #fix labels add adjusted p-value +IV1res=cbind(IV1res[,1:4],p.adjust(IV1res[,4],method=method),IV1res[,5:6]) +IV2res=cbind(IV2res[,1:4],p.adjust(IV2res[,4],method=method),IV2res[,5:6]) +dimnames(IV1res)=list(NULL,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) +dimnames(IV2res)=list(NULL,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) +} +if(PB){ +IV1res[,3]=p.adjust(IV1res[,2],method=method) +IV2res[,3]=p.adjust(IV2res[,2],method=method) +IV1res=IV1res[,c(1,4,5,2,3,6,7)] +IV2res=IV2res[,c(1,4,5,2,3,6,7)] +dimnames(IV1res)=list(NULL,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) +dimnames(IV2res)=list(NULL,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) +} +list(est.loc.4.DV=est.mat,n=n.mat,Independent.variables.summary=group,Res.4.IV1=IV1res,Res.4.IV2=IV2res) +} + +sm.inter<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,PB=FALSE,est=tmean,tr=.2,nboot=1000,pr=TRUE,con=NULL, +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Split on two variables. +# Look for interactions +# PB=FALSE: use trimmed means +# PB=TRUE: use percentile bootstrap. +# +# TR: amount of trimming when using a non-bootstrap method. To alter the amount of trimming when using +# a bootstrap method use +# tr. Example, tr=.25 would use 25% trimming. +# +# est=median would use medians +# est=hd would use the Harrell-Davis estimator for the median. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# + +# Then test the hypothesis of equal measures of location +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +# if(length(unique(y)>2))stop('y should be binary') +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +if(identical(est,median))PB=TRUE +if(identical(est,hd))PB=TRUE +z=list() +group=list() +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +for(i in 1:N.int)L1[i]=paste('IV1.G',i) +for(i in 1:N.int2)L2[i]=paste('IV2.G',i) +dimnames(est.mat)=list(L1,L2) +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2[,p1],...) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('For group',del,'The sample size is less than 6')) +} +test=NULL +if(is.null(con))con=con2way(N.int,N.int2)$conAB +if(!PB){ +a=lincon(z,con=con,tr=tr,pr=FALSE) +test=a$test +psihat=a$psihat +} +if(PB){ +a=linconpb(z,con=con,est=est,...) +psihat=a$output +} +ES=IND.PAIR.ES(z,con=con)$effect.size +list(Group.summary=group,loc.est=est.mat,test=test,psihat=psihat,con=con,Effect.Sizes=ES) +} + +smgridLC=sm.inter + +Depth.class<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,DIST=TRUE,SEED=TRUE,...){ +# +# Do classification using max depths or max depth distribution as suggested by +# Makinde and Fasoranbaku (2018). JAS +# +# depthfun indicates how the depth of a point is computed. +# By default, projection depth is used. depthfun=zonoid would use zonoid depth +# +# train is the training set +# test is the test data +# g: labels for the data in the training set. +# +# depthfun must be a function having the form depthfun(x,pts). +# That is, compute depth for the points in pts relative to points in x. +# +# +library(class) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +} +Train=cbind(train,g) +Train=elimna(Train) +p=ncol(train) +p1=p+1 +train=Train[,1:p] +g=Train[,p1] +flag=g==min(g) +x1=Train[flag,1:p] +x2=Train[!flag,1:p] +} +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +train=elimna(train) +test=elimna(test) +train=as.matrix(train) +test=as.matrix(test) +if(ncol(test)==1)test=t(test) # If test is a vector, a single point, transpose to get correct number of columns. +if(ncol(test)!=ncol(train))stop('test and train do not have the same number of columns') +ntest=nrow(test) +P=ncol(train) +P1=P+1 +xall=as.data.frame(matrix(NA,nrow=nrow(train),ncol=P1)) +xall[,1:P]=train +xall[,P1]=g +xall=elimna(xall) +x1=xall[,1:P] +xall=as.matrix(xall) +g=as.vector(xall[,P1]) +ids=unique(g) # Number of categories +x2=elimna(test) +x1=as.matrix(x1) +x2=as.matrix(x2) +n=nrow(x1) +n2=nrow(x2) +p=length(ids) +D=matrix(NA,nrow=p,ncol=n2) +for(i in 1:p){ +flag=g==ids[i] +D[i,]=depthfun(as.matrix(x1[flag,]),pts=x2,SEED=SEED,...) # depth of test points relative to train data in cat i +} +if(!DIST)res=apply(D,2,which.max) +if(DIST){ +res=NA +all.dep=list() +for(i in 1:p){ +flag=g==ids[i] +all.dep[[i]]=depthfun(x1[flag,],pts=x1[flag,],...) #Have depth of all point in the training set for class i +} +for(j in 1:ntest){ +dt=NA +cum.depth=NA +for(i in 1:p){ +flag=g==ids[i] +chkpt=matrix(x2[j,],nrow=1) +#dt[i]=depthfun(x1[flag,],pts=x2[j,],...) +dt[i]=depthfun(x1[flag,],chkpt,...) +cum.depth[i]=mean(all.dep[[i]]<=dt[i]) +} +chkit=which(cum.depth==max(cum.depth)) +res[j]=chkit[1] +}} +res +} + +Depth.class.bag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,DIST=FALSE,nboot=100,SEED=TRUE,...){ +# +# +# g=class id +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# The function will then create appropriate labels and store them in g. +# +# KNN classification using data depths. +# KNNdist uses data depths, for the n1!=n2 it can be a bit biased, meaning that +# when there is no association, the probability of a correct classification will be less than .5 +# +# +if(SEED)set.seed(2) +if(is.null(test))stop('test =NULL, no test data provided') +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group labels, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n1,n,replace=TRUE) +id2=sample(n2,n,replace=TRUE) +dvec[i,]=Depth.class(x1=x1[id1,],x2=x2[id2,],test=test,depthfun=depthfun,DIST=DIST,...) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} + + +smbin.test<-function(x,y,IV=1,Qsplit=.5,method='SK',nboot=1000, +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# +# Qsplit: split the independent variable indicated by the argument +# IV; it defaults to 1, the first variable , column 1, in x. +# Example: +# IV=1: split on the first independent variable, +# IV=2: split on the second independent variable, +# +# Qsplit indicates the quantiles to be used and defaults to .5. +# Example Qsplit=c(.25,.5,.75) would split based on the quartiles +# +# Then compare the probability of success corresponding to the groups. +# +# if(length(unique(y)>2))stop('y should be binary') +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +m<-xy[flag,] +x<-m[,1:p] +y<-m[,p1] +} +z=list() +N.int=length(Qsplit)+1 +qv=quantile(x[,IV],Qsplit) +qv=c(min(x[,IV]),qv,max(x[,IV])) +group=list() +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV,qv[j],qv[j1]) +z[[j]]=xsub[,p1] +group[[j]]=summary(xsub[,IV]) +} +r=NA +n=NA +for(j in 1:N.int){ +r[j]=sum(z[[j]]) +n[j]=length(z[[j]]) +} +a=binpair(r,n,method=method) +list(Group.summary=group,output=a) +} + +smbinAB<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,method='KMS', +xout=FALSE,outfun=outpro,...){ +# +# y is assumed to be binary +# +# x a matrix or data frame +# +# Split on two indepnddent variables. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# + +# Then test the hypotheses about the probability of a success. +# +# IV[1]: indicates the column containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +# +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +if(length(unique(y))>2)stop('y should be binary') +flag=max(y) +y[flag]=1 +y[!flag]=0 +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +JK=J*K +MAT=matrix(1:JK,J,K,byrow=TRUE) +z=list() +group=list() +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +n.mat=matrix(NA,nrow=N.int,ncol=N.int2) +Nsuc=matrix(NA,nrow=N.int,ncol=N.int2) +DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +Nsuc[j,k]=sum(xsub2[,p1]==1) +est.mat[j,k]=mean(xsub2[,p1]) +n.mat[j,k]=length(xsub2[,p1]) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('For group',del,'the sample size is less than 6')) +} +A=list() +B=list() +for(j in 1:J)A[[j]]=lincon.bin(Nsuc[j,],n.mat[j,],method=method)$CI +for(j in 1:K)B[[j]]=lincon.bin(Nsuc[,j],n.mat[,j],method=method)$CI +list(est.loc.4.DV=est.mat,n=n.mat,A=A,B=B) +} + +smgrid<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,PB=FALSE,est=tmean,nboot=1000,pr=TRUE, +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Split on two variables, not just one. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# + +# Then test the hypothesis of equal measures of location +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +# if(length(unique(y)>2))stop('y should be binary') +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +z=list() +group=list() +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +for(i in 1:N.int)L1[i]=paste('IV1.G',i) +for(i in 1:N.int2)L2[i]=paste('IV2.G',i) +dimnames(est.mat)=list(L1,L2) +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2,...) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) +} +test=NULL +ES=IND.PAIR.ES(z)$effect.size +if(!PB){ +a=lincon(z,tr=tr,pr=FALSE) +chk.sig=sign(a$psihat[,4])*sign(a$psihat[,5]) +num.sig=sum(chk.sig>=0) +test=a$test +res=a$psihat +} +if(PB){ +a=linpairpb(z,est=est,nboot=nboot,...) +num.sig=a$num.sig +res=a$output +} +list(Group.summary=group,loc.est=est.mat,test=test,output=res,num.sig=num.sig,Effect.Sizes=ES) +} + + +reg.hyp.split<-function(x,y,split.reg=Qreg,TR=.2,alpha = 0.05, PB = FALSE, est = tmean, nboot = 1000, pr = TRUE, + method = "hoch", xout = FALSE, outfun = outpro, SEED = TRUE, ...){ +# +# Split design space based on the hyperplane associated with the argument +# split.reg +# Default is a quantile regression estimate based on the data in x +# Split the original data then split the results again to get a 2-by-2 ANOVA design +# +# Compare measures of location based on the resulting splits +# +# Choices for split.reg: any R function that returns coefficients in $coef +# Ex. split.reg=depreg would use a deepest regression estimator. +# Could get different split using different quantiles +# Ex.split.reg=Qreg,q=.25, would split the design space based .25 quantile hyperplanes. +# split.reg=mdepreg.coef would use the deepest regression line estimator. +# +# +p=ncol(x) +xy=elimna(cbind(x,y)) +if(xout){ +flag<-outfun(xy[,1:p],plotit=FALSE,...)$keep +xy<-xy[flag,] +} +if(identical(est,median))PB=TRUE +if(identical(est,hd))PB=TRUE +if(p<2)stop('Should have two or more independent variables') +pm1=p-1 +p1=p+1 +hat=reg.pred(xy[,1:pm1],xy[,p],regfun=split.reg,...) +res=xy[,p]-hat +flag=res>0 +x1=xy[flag,] +x2=xy[!flag,] +# +hat=reg.pred(x1[,1:pm1],x1[,p],regfun=split.reg,...) +res=x1[,p]-hat +flag=res>0 +xy1=x1[flag,] +xy2=x1[!flag,] +# +hat=reg.pred(x2[,1:pm1],x2[,p],regfun=split.reg,...) +res=x2[,p]-hat +flag=res>0 +xy3=x2[flag,] +xy4=x2[!flag,] +y=list() +y[[1]]=xy1[,p1] +y[[2]]=xy2[,p1] +y[[3]]=xy3[,p1] +y[[4]]=xy4[,p1] +group=list() +group[[1]]=summary(xy1[,1:p]) +group[[2]]=summary(xy2[,1:p]) +group[[3]]=summary(xy3[,1:p]) +group[[4]]=summary(xy4[,1:p]) +if(!PB)a=lincon(y,tr=TR) +if(PB)a=linconpb(y,est=est,nboot=nboot,...) +list(Independent.variables.summary=group,output=a) +} + +regbin.hyp.split<-function(x,y,split.reg=Qreg,alpha = 0.05, nboot = 1000, + method ='SK', xout = FALSE, outfun = outpro, SEED = TRUE, ...){ +# +# y is assumed to be binary +# +# Split design space based on the hyperplane associated with the argument +# split.reg +# Default is a squantile regression estimate based on the data in x +# Split the original data then split the results again to get a 2-by-2 ANOVA design +# +# Compare binomial distributions using based on the argument +# method, which defaults to Storer--Kim. To get confidence intervals use +# method='KMS' +# +# Choices for split.reg: any R function that returns coefficients in $coef +# Ex. split.reg=depreg would use a deepest regression estimator. +# Could get different split using different quantiles +# Ex.split.reg=Qreg,q=.25, would split the design space based .25 quantile hyperplanes. +# split.reg=mdepreg.coef would use the deepest regression line estimator. +# +# +p=ncol(x) +xy=elimna(cbind(x,y)) +if(xout){ +flag<-outfun(xy[,1:p],plotit=FALSE,...)$keep +xy<-xy[flag,] +} +if(length(unique(y))>2)stop('y should be binary') +n=length(y) +yy=rep(0,n) +flag=which(y==max(y)) +yy[flag]=1 +y=yy +if(p<2)stop('Should have two or more independent variables') +pm1=p-1 +p1=p+1 +hat=reg.pred(xy[,1:pm1],xy[,p],regfun=split.reg,...) +res=xy[,p]-hat +flag=res>0 +x1=xy[flag,] +x2=xy[!flag,] +# +hat=reg.pred(x1[,1:pm1],x1[,p],regfun=split.reg,...) +res=x1[,p]-hat +flag=res>0 +xy1=x1[flag,] +xy2=x1[!flag,] +# +hat=reg.pred(x2[,1:pm1],x2[,p],regfun=split.reg,...) +res=x2[,p]-hat +flag=res>0 +xy3=x2[flag,] +xy4=x2[!flag,] +r=NA +r[1]=sum(xy1[,p1]) +r[2]=sum(xy2[,p1]) +r[3]=sum(xy3[,p1]) +r[4]=sum(xy4[,p1]) +n=NA +n[1]=nrow(xy1) +n[2]=nrow(xy2) +n[3]=nrow(xy3) +n[4]=nrow(xy4) +group=list() +group[[1]]=summary(xy1[,1:p]) +group[[2]]=summary(xy2[,1:p]) +group[[3]]=summary(xy3[,1:p]) +group[[4]]=summary(xy4[,1:p]) +a=binpair(r,n,method=method,alpha=alpha) +list(Independent.variables.summary=group,output=a) +} + +KNNbag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,nboot=100,SEED=TRUE,...){ +# +# +# g=class id +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# The function will then create appropriate labels and store them in g. +# +# KNN classification using data depths. +# KNNdist uses data depths, for the n1!=n2 it can be a bit biased, meaning that +# when there is no association, the probability of a correct classification will be less than .5 +# It removes any row vector with missing values +# +# +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +} +Train=cbind(train,g) +Train=elimna(Train) +p=ncol(train) +p1=p+1 +train=Train[,1:p] +g=Train[,p1] +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x1=elimna(x1) +x2=elimna(x2) +test=elimna(test) +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n1,n,replace=TRUE) +id2=sample(n2,n,replace=TRUE) +dvec[i,]=KNNdist(x1=x1[id1,],x2=x2[id2,],test=test,depthfun=depthfun) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} +SVMbag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,kernel='radial',nboot=100,SEED=TRUE,...){ +# +# +# g=class id +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# The function will then create appropriate labels and store them in g. +# +# Support Vector Machine classification method. +# Unlike standard SVM this function has the following property. Suppose n1!=n2 and n2/n1 is small. If there is no +# association between the training data and the labels, the probability of a misclassification is .5 +# In contrast, using standard SVM, it is approximately n2/(n1+n2) +# +if(is.null(test))stop('Argument test is null, contains no data') +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n1,n,replace=TRUE) +id2=sample(n2,n,replace=TRUE) +dvec[i,]=SVM(x1=x1[id1,],x2=x2[id2,],test=test,kernel=kernel) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} + +mulwmw.dist.new<-function(m1,m2,new,cop=3){ +# +# +# Determine center corresponding to two +# independent groups, project all points onto line +# connecting the centers based on m1 and m2. Return projected distances for m1 +# m2. +# new: new data, not known whether it came from group 1 or 2. +# This function is used in pro.class, a classification method. +# +# +# There are three options for computing the center of the +# cloud of points when computing projections: +# cop=1 uses Donoho-Gasko median +# cop=2 uses MCD center +# cop=3 uses median of the marginal distributions. +# +# When using cop=2 or 3, default critical value for outliers +# is square root of the .975 quantile of a +# chi-squared distribution with p degrees +# of freedom. +# +if(is.null(dim(m1))||dim(m1)[2]<2){stop("m1 and m2 should have two or more columns") +} +m1<-elimna(m1) # Remove missing values +m2<-elimna(m2) +new=elimna(new) +FLAG=FALSE +new=as.matrix(new) +if(ncol(new)==1){ +FLAG=TRUE +new=t(new) # If test is a vector, a single point, transpose to get correct number of columns. +new=rbind(new,new) #avoid R from aborting. +} +n1=nrow(m1) +n2=nrow(m2) +if(cop==1){ +if(ncol(m1)>2){ +center1<-dmean(m1,tr=.5) +center2<-dmean(m2,tr=.5) +} +if(ncol(m1)==2){ +tempd<-NA +for(i in 1:nrow(m1)) +tempd[i]<-depth(m1[i,1],m1[i,2],m1) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center1<-m1[flag,] +if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) +for(i in 1:nrow(m2)) +tempd[i]<-depth(m2[i,1],m2[i,2],m2) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center2<-m2[flag,] +if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) +}} +if(cop==2){ +center1<-cov.mcd(m1)$center +center2<-cov.mcd(m2)$center +} +if(cop==3){ +center1<-apply(m1,2,median) +center2<-apply(m2,2,median) +} +if(cop==4){ +center1<-smean(m1) +center2<-smean(m2) +} +center<-(center1+center2)/2 +B<-center1-center2 +if(sum(center1^2).5 +# +# SEED=NULL, done for convenience when this function is called by other functions. +# +if(is.null(test))stop('Argument test is null, contains no data') +if(is.null(test))stop('Argument test is null, contains no data') +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +}} +CHK=FALSE +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +CHK=TRUE +d=mulwmw.dist.new(x1,x2,test) +}} +if(!CHK){ +if(is.null(g))stop('The argument g should contain the group id values') +xg=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=xg[,1:p] +g=xg[,p1] +if(length(unique(g))!=2)stop('Should have only two unique values in g') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] + d=mulwmw.dist.new(x1,x2,test) +} +pdf1=NA +pdf2=NA +for(i in 1:length(d$dis.new)){ #Avoid sorting issue done by akerd +pdf1[i]=akerd(d$dist1,pts=d$dis.new[i],pyhat=TRUE,plotit=FALSE) +pdf2[i]=akerd(d$dist2,pts=d$dis.new[i],pyhat=TRUE,plotit=FALSE) +} +dec=rep(2,nrow(test)) +dec[pdf1/pdf2>rule]=1 +dec +} + +pro.classPD.bag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,rule=1,nboot=100,SEED=TRUE,...){ +# +# +# A bagged version of pro.classPD +# +# g=class id +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# The function will then create appropriate labels and store them in g. +# +# +if(is.null(test))stop('Argument test is null, contains no data') +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group labels, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n1,n,replace=TRUE) +id2=sample(n2,n,replace=TRUE) +dvec[i,]=pro.classPD(x1=x1[id1,],x2=x2[id2,],test=test,rule=rule) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} + + + +UB.class<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL, +method=c('DIS','DEP','PRO'),depthfun=prodepth,...){ +# +# A collection of classification methods for which the error rate is not +# impacted by unequal sample sizes. +# Bagged version are available in class.bag +# +# DIS: Points classified based on their depths +# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS +# PRO: project the points onto a line connecting the centers of the data clouds. +# Then use estimate of the pdf for each group to make a decision about future points. +# +type=match.arg(method) +switch(type, + DIS=discdepth(train=train,test=test,g=g,x1=x1,x2=x2), + DEP=Depth.class(train=train,test=test,g=g,x1=x1,x2=x2), + PRO=pro.classPD(train=train,test=test,g=g,x1=x1,x2=x2), + ) +} + +smbin.inter<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,alpha=.05,con=NULL,xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Split on two variables. +# Look for interactions when dealing with binary dependent variable. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] + if(length(unique(y))>2)stop('y should be binary') +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +z=list() +group=list() +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +if(is.null(con))con=con2way(N.int,N.int2)$conAB +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +for(i in 1:N.int)L1[i]=paste('IV1.G',i) +for(i in 1:N.int2)L2[i]=paste('IV2.G',i) +dimnames(est.mat)=list(L1,L2) +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=mean(xsub2[,p1],...) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +r=NA +n=NA +for(j in 1:length(z)){ +r[j]=sum(z[[j]]) +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +del=which(n<=5) +if(pr)print(paste('For group',del,'the sample size is less than 6')) +} +test=lincon.bin(r,n,con=con) +list(Group.summary=group,Prob.est=est.mat,output=test$CI,con=con) +} + +class.logR<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,sm=TRUE,fr=2,rule=.5,SEED=NULL){ +# +# Do classification using logistic or a smoother +# sm=TRUE: a smoother will be used with the span taken to be +# fr is this span +# sm=FALSE: use logistic regression. +# +# +# train is the training set +# test is the test data +# g contains labels for the data in the training set, +# +# This function removes the need to call library class. +# For more information, use the command ?knn +# +# SEED=NULL, used for convenience when called by other functions that expect SEED +# +if(!is.null(train)){ +train=as.matrix(train) +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g))if(dim(g)>1)stop('Argument g should be a vector') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(is.null(x1))stop('Something is wrong, no data in x1') +if(is.null(x2))stop('Something is wrong, no data in x2') +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +n1=nrow(x1) +n2=nrow(x2) +train=rbind(x1,x2) +g=c(rep(1,n1),rep(2,n2)) +if(!sm)e=logreg.pred(train,g,test) +if(sm)e=logSMpred(train,g,test,fr=fr) +if(is.null(test))stop('Argument test is null, contains no data') +test=as.matrix(test) +res=rep(1,nrow(test)) +flag=e>rule +res[flag]=2 +res +} + + + +UBROC<-function(train=NULL,g=NULL,x1=NULL,x2=NULL,method='KNN',reps=10,pro.p=.8,SEED=TRUE,POS=TRUE,EN=TRUE,...){ +# +# Compute ROC curve based on an 'unbiased' classification method if EN=TRUE, possible bias if EN=FALSE +# method indicates the method to be used +# +# Current choices available: +# KNN: Nearest neighbor using robust depths +# DIS: Points classified based on their depths +# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS +# SVM: support vector machine +# RF: Random forest +# NN: neural network +# ADA: ada boost +# PRO: project the points onto a line connecting the centers of the data clouds. +# Then use estimate of the pdf for each group to make a decision about future points. +# +# +# reps: number of resamples resamples +# pro.p controls the proportion used in the training, the rest are used in the test set +# +library(ROCR) +CHK=FALSE +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +CHK=TRUE +}} +if(!CHK){ +if(is.null(g))stop('The argument g should contain the group id values') +xg=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=xg[,1:p] +g=xg[,p1] +if(length(unique(g))!=2)stop('Should have only two unique values in g') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +n1=nrow(x1) +n2=nrow(x2) +ns1=round(pro.p*n1) +ns2=round(pro.p*n2) +if(EN)ns1=ns2=min(ns1,ns2) +PRED=NULL +LABS=NULL +for(j in 1:reps){ +N1=sample(n1,ns1) +N2=sample(n2,ns2) +test1=x1[-N1,] +test2=x2[-N2,] +a1=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test1,method=method,...) +a2=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test2,method=method,...) +PRED=c(PRED,c(a1,a2)) +LABS=c(LABS,c(rep(1,length(a1)),rep(2,length(a2)))) +pred=prediction(PRED,LABS) +} +if(POS)perf <- performance(pred,'tpr','fpr') +if(!POS)perf <- performance(pred,'tnr','fnr') +plot(perf) +} + +ROCmul.curve<-function(train=NULL,g=NULL,x1=NULL,x2=NULL,method=c('KNN','DIS'),pro.p=.8, +SEED=TRUE,reps=10,POS=TRUE,...){ +# +# Required ROCR +# +# Using cross validation +# +# Plot ROC curves based on two or more methods +# Current choices available: +# KNN: Nearest neighbor using robust depths +# DIS: Points classified based on their depths +# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS +# SVM: support vector machine +# RF: Random forest +# NN: neural network +# ADA: ada boost +# PRO: project the points onto a line connecting the centers of the data clouds. +# Then use estimate of the pdf for each group to make a decision about future points. +# +# reps number of resamples, the resulting ROC curves are averaged and the average is plotted. +# pro.p controls the proportion used in the training, the rest are used in the test set +# +library(ROCR) +n.meth=length(method) +UBROC(train=train,g=g,x1=x1,x2=x2,method=method[1],reps=reps,pro.p=pro.p,SEED=SEED,POS=POS,...) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +} +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +k=1 +if(n.meth>1){ +for(k in 2:n.meth){ +CHK=TRUE +if(!CHK){ +if(is.null(g))stop('The argument g should contain the group id values') +xg=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=xg[,1:p] +g=xg[,p1] +if(length(unique(g))!=2)stop('Should have only two unique values in g') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +ns1=round(pro.p*n1) +ns2=round(pro.p*n2) +PRED=NULL +LABS=NULL +for(j in 1:reps){ +N1=sample(n1,ns1) +N2=sample(n2,ns2) +test1=x1[-N1,] +test2=x2[-N2,] +a1=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test1,method=method[k],...) +a2=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test2,method=method[k],...) +PRED=c(PRED,c(a1,a2)) +LABS=c(LABS,c(rep(1,length(a1)),rep(2,length(a2)))) +pred=prediction(PRED,LABS) +} +if(POS)perf <- performance(pred,'tpr','fpr') +if(!POS)perf <- performance(pred,'tnr','fnr') +plot(perf,lty=k,add=TRUE) +}} +} +# perf=performance(pred, "spec") +# auroc<- perf@y.values +pro.class.probs<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,nonpar=TRUE,rule=.5,fr=2){ +# +# Same as pro.class, but also reports probabilities fo being in second class for each vector in test. +# +# project the data onto a line, then estimate the probability that +# a value in test data is in first group. +# +if(rule<=0 || rule>=1)stop('rule should be greater than 0 and less than 1') +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +}} +CHK=FALSE +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +CHK=TRUE +d=mulwmw.dist.new(x1,x2,test) +}} +if(!CHK){ +if(is.null(g))stop('The argument g should contain the group id values') +xg=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=xg[,1:p] +g=xg[,p1] +if(length(unique(g))!=2)stop('Should have only two unique values in g') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] + d=mulwmw.dist.new(x1,x2,test) +} +flag=g==min(g) +gnum=g +gnum[flag]=0 +gnum[!flag]=1 +x=c(d$dist1,d$dist2) +if(nonpar){ +v=logSMpred(x,gnum,d[[3]],fr=fr) +} +if(!nonpar){ +v=logreg.pred(x,gnum,d[[3]]) +} +dec=rep(2,nrow(test)) +dec[v1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n,n,replace=TRUE) +id2=sample(n,n,replace=TRUE) +if(!PR)dvec[i,]=pro.class(x1=x1[id1,],x2=x2[id2,],test=test) +if(PR)dvec[i,]=pro.class.probs(x1=x1[id1,],x2=x2[id2,],test=test)$prob.in.second.class +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +if(!PR)idec=chk2>chk1 +if(PR){ +chk=apply(dvec,2,mean) +idec=chk>.5 +} +dec[idec]=2 +dec +} + + + +pro.class<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,nonpar=TRUE,rule=.5,fr=2,SEED=TRUE){ +# +# Project the data onto a line, then estimate the probability that +# a value in test data is in first group. Impacted by unequal sample sizes. To avoid this use +# pro.class.bag. Or use this function but with equal samples sizes for the test data. +# +# nonpar=TRUE: use a smoother to estimate probabilities +# in which case +# fr is the span. +# FALSE: use logistic regression +# +if(is.null(test))stop('Argument test is NULL') +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group labels, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +}} +CHK=FALSE +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +CHK=TRUE +d=mulwmw.dist.new(x1,x2,test) +}} +if(!CHK){ +if(is.null(g))stop('The argument g should contain the group id values') +xg=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=xg[,1:p] +g=xg[,p1] +if(length(unique(g))!=2)stop('Should have only two unique values in g') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] + d=mulwmw.dist.new(x1,x2,test) +} +flag=g==min(g) +gnum=g +gnum[flag]=0 +gnum[!flag]=1 +x=c(d$dist1,d$dist2) +if(nonpar){ +g1=rep(0,n1) +v=logSMpred(x,gnum,d[[3]],fr=fr,SEED=SEED) +} +if(!nonpar){ +v=logreg.pred(x,gnum,d[[3]]) +} +dec=rep(2,nrow(test)) +dec[v2){ +center1<-dmean(m1,tr=.5) +center2<-dmean(m2,tr=.5) +} +if(ncol(m1)==2){ +tempd<-NA +for(i in 1:nrow(m1)) +tempd[i]<-depth(m1[i,1],m1[i,2],m1) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center1<-m1[flag,] +if(sum(flag)>1)center1<-apply(m1[flag,],2,mean) +for(i in 1:nrow(m2)) +tempd[i]<-depth(m2[i,1],m2[i,2],m2) +mdep<-max(tempd) +flag<-(tempd==mdep) +if(sum(flag)==1)center2<-m2[flag,] +if(sum(flag)>1)center2<-apply(m2[flag,],2,mean) +}} +if(cop==2){ +center1<-cov.mcd(m1)$center +center2<-cov.mcd(m2)$center +} +if(cop==3){ +center1<-apply(m1,2,median) +center2<-apply(m2,2,median) +} +if(cop==4){ +center1<-smean(m1) +center2<-smean(m2) +} +center<-(center1+center2)/2 +B<-center1-center2 +if(sum(center1^2)1)stop('Argument g should be a vector') +}} +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +}} +g=as.numeric(as.vector(g)) +flag=g==min(g) +gnum=g +gnum[flag]=0 +gnum[!flag]=1 +g=gnum +test=as.matrix(test) +FLAG=FALSE +if(ncol(test)==1){ +FLAG=TRUE +test=t(test) # If test is a vector, a single point, transpose to get correct number of columns. +test=rbind(test,test) #avoid R from aborting. +} +svm_model=svm(train,as.factor(g),kernel=kernel) +dec=predict(svm_model,as.matrix(test)) +dec=as.vector(as.numeric(dec)) +if(FLAG)dec=dec[1] +dec +} + +UB.class.error<-function(train=NULL,g=NULL,x1=NULL,x2=NULL,method=c('KNN','SVM','DIS','DEP','PRO','PROBAG'), +alpha=.05,pro.p=.8,SEED=TRUE,...){ +# +# Use cross validation to estimate error rates associated with the classification method indicated by the argument +# method +# +# By default, estimates are computed for each method listed in the argument' +# method +# +# To include a neural net method, included 'NNbag' in methods; not included automatically to avoid high execution time. +# +# Also reports estimates of a false positive or false negative, but no confidence interval is included. The obvious approach performs poorly +# +# pro.p = proportion used from each of the two training groups; remainder used as test data. +# +# g=class id +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# +# FN: False negative, assign to group 2 by mistake e.g., NULL predict no fracture but Non-null gets a fracture +# FP: False positive, assign to group 1 by mistake like NULL, e.g, will not have a fracture, but did +# TE: Overall all error rate. +# +# +n.est=length(method) +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +} +Train=cbind(train,g) +Train=elimna(Train) +p=ncol(train) +p1=p+1 +train=Train[,1:p] +g=Train[,p1] +flag=g==min(g) +x1=Train[flag,1:p] +x2=Train[!flag,1:p] +} +if(is.null(x1))stop('Something is wrong, no data in x1') +if(is.null(x2))stop('Something is wrong, no data in x2') +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +ns1=round(pro.p*n1) +ns2=round(pro.p*n2) +ciFN=matrix(NA,nrow=reps,ncol=2) +ciFP=matrix(NA,nrow=reps,ncol=2) +ciTE=matrix(NA,nrow=reps,ncol=2) +phat.FN=NA +phat.FP=NA +phat.TE=NA +RES=matrix(NA,nrow=n.est,ncol=3) +dimnames(RES)=list(method,c('phat.FN','phat.FP','phat.TE')) +for(k in 1:n.est){ +N1=sample(n1,ns1) +N2=sample(n2,ns2) +test1=x1[-N1,] +test2=x2[-N2,] +a1=UB.class(x1=x1[N1,],x2=x2[N2,],test=test1,method=method[k],SEED=SEED,...) +a2=UB.class(x1=x1[N1,],x2=x2[N2,],test=test2,method=method[k],SEED=SEED,...) +flag1=a1!=1 # ID False negatives e.g., predict no fracture but fracture occurred. +flag2=a2!=2 # ID False positives e.g., predict fracture but no fracture occurred. +FN=binom.conf(y=flag1,alpha=alpha,pr=FALSE) +FP=binom.conf(y=flag2,alpha=alpha,pr=FALSE) #CI does not work well when P(FP)=.5 +TE=binom.conf(y=c(flag1,flag2),alpha=alpha,pr=FALSE) #total error +RES[k,]=c(FN$phat,FP$phat,TE$phat) +} +RES +} + +quantregForest <-function(x,y, nthreads = 1, keep.inbag=FALSE, ...){ +# +# This function does robust random Forest regression based on +# Nicolai Meinshausen (2006) Quantile Regression Forests, +# Journal of Machine Learning Research, 7, 983-999. +# The code used here is based on a modification code downloaded from github, which is maintained by +# Loris Michel +# +# + +x=as.data.frame(x) + if(is.null(nrow(x)) || is.null(ncol(x))) + stop(' x contains no data ') + if( nrow(x) != length(y) ) + stop(' predictor variables and response variable must contain the same number of samples ') + + if (any(is.na(x))) stop('NA not permitted in predictors') + if (any(is.na(y))) stop('NA not permitted in response') + ## Check for categorial predictors with too many categories (copied from randomForest package) + if (is.data.frame(x)) { + ncat <- sapply(x, function(x) if(is.factor(x) && !is.ordered(x)) + length(levels(x)) else 1) + } else { + ncat <- 1 + } + maxcat <- max(ncat) + if (maxcat > 32) + stop('Can not handle categorical predictors with more than 32 categories.') + ## Note that crucial parts of the computation + ## are only invoked by the predict method + cl <- match.call() + cl[[1]] <- as.name('quantregForest') + qrf <- if(nthreads > 1){ + parallelRandomForest(x=x, y=y, nthreads = nthreads,keep.inbag=keep.inbag, ...) + }else{ + randomForest( x=x,y=y ,keep.inbag=keep.inbag,...) + } + nodesX <- attr(predict(qrf,x,nodes=TRUE),'nodes') + rownames(nodesX) <- NULL + nnodes <- max(nodesX) + ntree <- ncol(nodesX) + n <- nrow(x) + valuesNodes <- matrix(nrow=nnodes,ncol=ntree) + for (tree in 1:ntree){ + shuffledNodes <- nodesX[rank(ind <- sample(1:n,n)),tree] + useNodes <- sort(unique(as.numeric(shuffledNodes))) + valuesNodes[useNodes,tree] <- y[ind[match(useNodes,shuffledNodes )]] + } + + qrf[['call']] <- cl + qrf[['valuesNodes']] <- valuesNodes + if(keep.inbag){ + # + # create a prediction vector with same shape as predictOOBNodes + predictOOBNodes <- attr(predict(qrf,newdata=x,nodes=TRUE),'nodes') + rownames(predictOOBNodes) <- NULL + valuesPredict <- 0*predictOOBNodes + ntree <- ncol(valuesNodes) + valuesPredict[qrf$inbag >0] <- NA + # + # for each tree and observation sample another observation of the same node + for (tree in 1:ntree){ + is.oob <- qrf$inbag[,tree] == 0 + n.oob <- sum(is.oob) + if(n.oob!=0) { + y.oob <- sapply(which(is.oob), + function(i) { + cur.node <- nodesX[i, tree] + y.sampled <- if (length(cur.y <- y[setdiff(which(nodesX[,tree] == cur.node) + ,i)])!=0) { + cur.y[sample(x = 1:length(cur.y), size = 1)] + } else { + NA + } + return(y.sampled) + }) + valuesPredict[is.oob, tree] <- y.oob + } + } + + minoob <- min( apply(!is.na(valuesPredict),1,sum)) + if(minoob<10) stop('need to increase number of trees for sufficiently many out-of-bag observations') + valuesOOB <- t(apply( valuesPredict,1 , function(x) sample( x[!is.na(x)], minoob))) + qrf[['valuesOOB']] <- valuesOOB + } + class(qrf) <- c('quantregForest','randomForest') + + return(qrf) +} + +predict.robust.Forest<-function(object,newdata=NULL, what=tmean,... ){ +# +# Goal: estimate a measure of location for newdata based on the regions stemming from the Random Forest method +# +# what: a function indicating the measure of location to be estimated. Default is a 20% trimmed mean +# +# Example: +# a=quantregForest(x,y) +# predict.robust.Forest(a,newdata = new,what=hd) +# +# For convenience, these steps are combined in the function regR.Forest, which calls this function and eliminates the need for the +# library command. +# +# For each region generated by the random Forest method,this would estimate the median based on the Harrell-Davis estimator. +# +# predict.robust.Forest(a,newdata = new,what=mean,tr=.1) 10% trimmed mean +# +# To estimate one or more quantiles, can use predict.quantregForest or could use this function +# with what containing the quantiles to be estimated. For example what=c(.25,.75) would estimate the +# lower and upper quartiles. +# +# This code is based on modifications of code written by L. Michel +# + class(object) <- 'randomForest' + if(is.null(newdata)){ + if(is.null(object[['valuesOOB']])) stop('need to fit with option keep.inbag=TRUE if trying to get out-of-bag observations') + valuesPredict <- object[['valuesOOB']] + }else{ + predictNodes <- attr(predict(object,newdata=newdata,nodes=TRUE),'nodes') + rownames(predictNodes) <- NULL + valuesPredict <- 0*predictNodes + ntree <- ncol(object[['valuesNodes']]) + for (tree in 1:ntree){ + valuesPredict[,tree] <- object[['valuesNodes']][ predictNodes[,tree],tree] + } + } + if(is.function(what)){ + if(is.function(what(1:4))){ + result <- apply(valuesPredict,1,what) + }else{ + if(length(what(1:4))==1){ + result <- apply(valuesPredict,1,what,...) + }else{ + result <- t(apply(valuesPredict,1,what)) + } + } + }else{ + if( !is.numeric(what)) stop(' argument what needs to be either a function or a vector with quantiles') + if( min(what)<0) stop(' if what specifies quantiles, the minimal values needs to be non-negative') + if( max(what)>1) stop(' if what specifies quantiles, the maximal values cannot exceed 1') + if(length(what)==1){ + result <- apply( valuesPredict,1,quantile, what,na.rm=TRUE) + }else{ + result <- t(apply( valuesPredict,1,quantile, what,na.rm=TRUE)) + colnames(result) <- paste('quantile=',what) + } + } + return(result) +} + +regR.Forest<-function(x,y,newdata=NULL,pts=x,pyhat=FALSE,loc.fun=tmean,xout=FALSE,plotit=TRUE,outfun=outpro, span = 0.75,LP=TRUE,pch='.', +ZLIM = FALSE, scale = TRUE, xlab = 'X', ylab = 'Y', ticktype='simple',frame=TRUE,eout=FALSE, + zlab ='', theta = 50, phi = 25,...){ + +# Goal: estimate a measure of location for newdata based +# on the Random Forest method +# Default, estimate measure of location for training data x +# +# loc.fun: a function indicating the measure of location to be estimated. +# Default is a 20% trimmed mean +# +# Method, initially use random forest then smooth using LOESS +# pyhat=TRUE: return the predicted values +# if LP=FALSE, return the random forest predicted values instead. +# +x<-as.matrix(x) +p=ncol(x) +p1=p+1 +if(p==1){ +xs=order(x) +x=x[xs] +y=y[xs] +} +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:p] +x<-as.matrix(x) +y<-xx[,p1] +x<-as.data.frame(x) +if(xout){ +x<-as.data.frame(x) +flag<-outfun(x,plotit=FALSE)$keep +x<-x[flag,] +y<-y[flag] +x<-as.data.frame(x) +} +if(is.null(newdata))newdata=as.data.frame(x) +library(randomForest) +a=quantregForest(x,y) +res=predict.robust.Forest(a,newdata=newdata,what=loc.fun,...) +if(plotit){ +if(p==2) +lplot(x,res,ZLIM=ZLIM,span=span,scale=scale,xlab=xlab,ylab=ylab,zlab,zlab,ticktype=ticktype,frame=frame,theta=theta, +phi=phi,pyhat=pyhat,eout=eout,xout=FALSE,pr=FALSE) +if(p==1){ +plot(x[,1],y,xlab=xlab,ylab=ylab,pch=pch) +xs=order(x[,1]) +e=lplot.pred(x[xs,1],res[xs],span=span)$yhat +lines(x[,1],e) +} +} +if(LP)res=lplot.pred(x,res,pts=pts) +if(!pyhat)res=NULL +res +} + + +RFreg=regR.Forest + +class.forest<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,sm=FALSE,fr=2,SEED=NULL){ +# +# Do classification using random forest +# +# +# train is the training set +# test is the test data +# g contains labels for the data in the training set, +# +# Alternatively, store the data for the two groups in +# x1 and x2, in which case the function creates labels, i.e., no need to specify train and g. +# +# SEED=NULL, used for convenience when called by other functions that expect SEED +# +if(is.null(test))stop('Argument test is null, contains no data') +library(randomForest) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +} +g=as.numeric(as.vector(g)) +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +if(is.vector(test))stop('Argument test is a vector, should contain two or more variables') +g=as.factor(g) +train=as.data.frame(train) +d=randomForest(g~., train) +test=as.data.frame(test) +e=predict(d,newdata=test) +res=as.numeric(as.vector(e))+1 +res +} + + +RFbag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,depthfun=prodepth,kernel='radial',nboot=100,SEED=TRUE,...){ +# +# +# g=class id +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# The function will then create appropriate labels and store them in g. +# +# Random forest classification using data depths. +# class., for the n1!=n2 it can be a bit biased, meaning that +# when there is no association, the probability of a correct classification will be less than .5 +# +# +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group labels, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n,n,replace=TRUE) +id2=sample(n,n,replace=TRUE) +xs1=as.data.frame(x1[id1,]) +xs2=as.data.frame(x2[id2,]) +dvec[i,]=class.forest(x1=xs1,x2=xs2,test=test) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} + +class.gbm<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,SEED=NULL,n.trees=100){ +# +# Do classification using boosting via the R package gdm +# +# +# train is the training set +# test is the test data +# g contains labels for the data in the training set, +# +# Alternatively, store the data for the two groups in +# x1 and x2, in which case the function creates labels, i.e., no need to specify train and g. +# +# This function removes the need to call library gbm. +# +# SEED=NULL, used for convenience when called by other functions that expect SEED +# +if(is.null(test))stop('Argument test is null, contains no data') +library(gbm) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +} +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} + +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +if(is.vector(test))stop('Argument test is a vector, should contain two or more variables') +if(ncol(train)!=ncol(test))stop('training data and test data have different number of columns') +data=data.frame(g,train) +a=gbm(g~., data=data,distribution ='bernoulli',n.trees=n.trees) +test=data.frame(test) +e=predict(a,newdata=test,n.trees=n.trees) +res=rep(1,nrow(test)) +flag=e>0 +res[flag]=2 +res +} + +nearNN<-function(x,pt=x,K=10,mcov,...){ +# +# identify the K rows in x that are closest to vector in pt +# mcov: some type of covariance matrix associated with x +# +if(!is.matrix(x)& !is.data.frame(x))stop('Data are not stored in a matrix or data frame.') +pt=as.vector(pt) +x=elimna(x) +n=nrow(pts) +if(K>nrow(x))stop(' Cannot have K>n') +dis=sqrt(mahalanobis(x,t(pt),mcov)) +chk.dup=sum(duplicated(dis)) +if(chk.dup>0)dis=jitter(dis) +ord=sort(dis) +id=which(dis<=ord[K]) +id +} + +KNNreg<-function(x,y,pts=NULL,K=10,est=tmean,cov.fun=covmcd, +xout=FALSE,outfun=outpro,...){ +x<-as.matrix(x) +if(ncol(x)==1)stop('Should have two or more independent variables') +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(is.null(pts))pts=x +n=1 +if(is.matrix(pts) || is.data.frame(pts)) +n=nrow(pts) +if(n==1)pts=matrix(pts,nrow=1) +e=NA +mcov=cov.fun(x)$cov +for(i in 1:n){ +e[i]=est(y[nearNN(x,pt=pts[i,],K=K,mcov=mcov,...)]) +} +e +} + +NN.class<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,SEED=NULL){ +# +# Do classification using the neural network method via the R package neuralnet. +# This function provides another way of applying this approach using R commands +# consistent with other classification methods in Rallfun +# +# +# train is the training set +# test is the test data +# g contains labels for the data in the training set, +# +# Alternatively, store the data for the two groups in +# x1 and x2, in which case the function creates labels, i.e., no need to specify train and g. +# +# SEED=NULL, used for convenience when called by other functions that expect SEED +# +if(is.null(test))stop('Argument test is NULL, contains no data') +library(neuralnet) + +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +}} +if(!is.null(x1)){ +if(!is.null(x2)){ +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +}} +g=as.numeric(as.vector(g)) +train=elimna(train) +if(is.null(test))stop('Argument test is null, no data') +test=elimna(test) +train=as.matrix(train) +test=as.matrix(test) + + +if(is.vector(test))stop('Argument test is a vector, should contain two or more variables') +# Next, store data as expected by neuralnet +ddata=as.matrix(cbind(g,train)) +dimnames(ddata)=list(NULL,NULL) +p1=ncol(ddata) +p=p1-1 +ddata=as.data.frame(ddata) +if(p>8)stop('Current version limited to 8 independent variables') +if(p==2)d=neuralnet(V1~V2+V3,data=ddata) +if(p==3)d=neuralnet(V1~V2+V3+V4,data=ddata) +if(p==4)d=neuralnet(V1~V2+V3+V4+V5,data=ddata) +if(p==5)d=neuralnet(V1~V2+V3+V4+V5+V6,data=ddata) +if(p==6)d=neuralnet(V1~V2+V3+V4+V5+V6+V7,data=ddata) +if(p==7)d=neuralnet(V1~V2+V3+V4+V5+V6+V7+V8,data=ddata) +if(p==8)d=neuralnet(V1~V2+V3+V4+V5+V6+V7+V8+V9,data=ddata) +e=predict(d,newdata=test) +res=rep(1,nrow(test)) +flag=e>.5 +res[flag]=2 +res +} + +NNbag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,nboot=100,SEED=TRUE,...){ +# +# +# g=class id +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# The function will then create appropriate labels and store them in g. +# +# KNN classification using data depths. +# KNNdist uses data depths, for the n1!=n2 it can be a bit biased, meaning that +# when there is no association, the probability of a correct classification will be less than .5 +# +# +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group labels, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n1,n,replace=TRUE) +id2=sample(n2,n,replace=TRUE) +dvec[i,]=NN.class(x1=x1[id1,],x2=x2[id2,],test=test) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} + + + class.ada.bag<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,nboot=100, +SEED=TRUE,baselearner='bbs',...){ +# +# class.bag: for n1!=n2 +# when there is no association, the expected probability of a correct classification can differ from .5 +# +# This function deals with this via bootstrap bagging +# g=class labels +# if there are two classes and the training data are stored in separate variables, can enter +# the data for each class via the arguments +# x1 and x2. +# The function will then create appropriate labels and store them in g. +# +# nboot: number of bootstrap sample. Using nboot=20, bias remains with n1=200, n2=100 +# +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group labels, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +#test=as.data.frame(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n1,n,replace=TRUE) +id2=sample(n2,n,replace=TRUE) +dvec[i,]=class.ada(x1=x1[id1,],x2=x2[id2,],test=test,baselearner=baselearner) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} + +CLASS.BAG<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,SEED=TRUE,kernel='radial',nboot=100, +method=c('KNN','SVM','DIS','DEP','PRO','NN','RF','ADA','LSM'),depthfun=prodepth,baselearner ='bbs',sm=TRUE,rule=.5,...){ +# +# A collection of classification methods for which the error rate is not +# impacted by unequal sample sizes. +# Bagged version of various classification methods is used. +# +# For methods that do not require bagging, see UB.class. +# +# KNN: calls KNNbag: a robust analog of the K nearest neighbor method +# SVM: a type of bagging method used in conjunction with support vector machine +# DIS: Points classified based on their depths +# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS +# PRO: project the points onto a line connecting the centers of the data clouds. +# Then use estimate of the pdf for each group to make a decision about future points. +# NN: Neural network +# RF: Random forest +# ADA: adaboost method +# LSM: Uses a smoother designed for a binary dependent variable. sm=FALSE, uses logistic regression +# +type=match.arg(method) +switch(type, + KNN=KNNbag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,nboot=nboot), + SVM=SVMbag(train=train,test=test,g=g,x1=x1,x2=x2,depthfun=depthfun,SEED=SEED,nboot=nboot,...), + DIS=dis.depth.bag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,nboot=nboot), + DEP=Depth.class.bag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,nboot=nboot), + PRO=pro.class.bag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,nboot=nboot,...), + NN=NNbag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,...), + RF=RFbag(train=train,test=test,g=g,x1=x1,x2=x2,SEED=SEED,kernel=kernel,nboot=nboot,...), + ADA=class.ada.bag(train=train,test=test,g=g,x1=x1,x2=x2,baselearner=baselearner,SEED=SEED,nboot=nboot,...), + LSM=LSMbag(train=train,test=test,g=g,x1=x1,x2=x2,sm=sm,rule=rule,SEED=SEED,nboot=nboot,...), + ) +} + +qloc.dif<-function(x,y,est=tmean,...){ +# +# Compute a measure of location for each group. +# Using the data in first group, determine what quantiles +# these measure of location correspond to. +# The difference is used as a measure of effect size. +# +m1=est(x) +m2=est(y) +q1=mean(x<=m1) +q2=mean(x<=m2) +delta=q1-q2 +delta +} + +loc.dif.summary<-function(x,y){ +# +# Estimate the difference between a collection of measures of location:: +# MEAN: +# MEAN20: 20% mean +# MED: median +# OS: One-step M-estimator +# Mdif: median of typical difference +# +x=elimna(x) +y=elimna(y) +output=matrix(NA,ncol=1,nrow=5) +output[1,1]=tmean(x,tr=0)-tmean(y,tr=0) +output[2,1]=tmean(x,tr=0.2)-tmean(y,tr=0.2) +output[3,1]=median(x)-median(y) +output[4,1]=onestep(x)-onestep(y) +output[5,1]=wmwloc(x,y) +dimnames(output)=list(c('MEAN','MEAN.20%','MEDIAN','M-EST','Mdif'),c('Est')) +output +} + + +dep.loc.summary<-function(x,y){ +# +# Estimate the measures of location based on difference scores: +# MEAN: +# MEAN20: 20% mean +# MED: median +# OS: One-step M-estimator +# Mdif: median of typical difference +# +d=NULL +chk=ncol(x) +if(is.vector(x))d=x-y +if(!is.null(chk)){ +if(dim(x)[2]==2)d=x[,1]-x[,2] +} +if(is.null(d))stop('x and y should be vectors, or x should have two columns') +d=elimna(d) +output=matrix(NA,ncol=1,nrow=4) +output[1,1]=tmean(d,tr=0) +output[2,1]=tmean(d,tr=0.2) +output[3,1]=median(d) +output[4,1]=onestep(d) +dimnames(output)=list(c('MEAN','MEAN.20%','MEDIAN','M-EST'),c('Est')) +output +} + + + +ES.sum.REL.MAG<-function(REL.M,n = 10000,reps=10){ +# +# Determine small medium and large equivalent measures of effect size based on the values in +# REL.M +# +if(length(REL.M)!=3)stop('Should have three value in REL.M') +if(n>10000)n=10000 +x=rnorm(n) +y=rnorm(n) +output=matrix(0,ncol=3,nrow=6) +int=matrix(NA,ncol=3,nrow=6) +dimnames(output)=list(c('AKP','EP','QS (median)','QStr','WMW','KMS'),c('S','M','L')) +for(k in 1:reps){ +for(j in 1:3)int[,j]=ES.summary(x,y-REL.M[j],)[,1] +output=output+int +} +output=output/reps +output +} + +ES.summary<-function(x,y,tr=.2,NULL.V=c(0,0,.5,.5,.5,0),REL.MAG=NULL, REL.M=NULL,n.est=1000000){ +# +# Estimate a collection of effect sizes: +# AKP: Homoscedastic robust analog of Cohen's d +# EP: Explanatory power +# QS: Quantile shift based on the median of the distribution of X-Y, +# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y +# WMW: P(X.5 & output[5,5]< .5){ +output[5,3:5]=1-output[5,3:5] +} +if(output[5,1]<.5 & output[5,5]> .5){ +output[5,3:5]=1-output[5,3:5] +} + +a=akp.effect.ci(x,y,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED) +output[1,6:7]=a$ci +output[1,8]=a$p.value +a=EPci(x,y,tr=tr,alpha=alpha,SEED=SEED,nboot=nboot) +output[2,6:7]=a$ci +output[2,8]=yuen(x,y,tr=tr)$p.value +a=shiftPBci(x,y,locfun=QSfun,alpha=alpha,nboot=nboot,SEED=SEED) +output[3,6:7]=a$ci +output[3,8]=a$p.value +a=shiftPBci(x,y,locfun=tmean,alpha=alpha,nboot=nboot,SEED=SEED) +output[4,6:7]=a$ci +output[4,8]=a$p.value +a=cidv2(x,y,alpha=alpha) +output[5,6:7]=a$p.ci +output[5,8]=a$p.value +a=KMS.ci(x,y,alpha=alpha,nboot=nboot,SEED=SEED) +output[6,6:7]=a$ci +output[6,8]=a$p.value +if(output[6,1]<0)output[6,3:5]=-1*output[6,3:5] +output[,9]=p.adjust(output[,8],method=method) +output +} + + +class.ada<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL,sm=FALSE,fr=2,SEED=NULL,baselearner='btree'){ +# +# Do classification using adaboost +# +# baselearner='btree': Stumps +# bbs: Splines +# bols: linear models +# +# train is the training set +# test is the test data +# g contains labels for the data in the training set, +# +# This function removes the need to call library mboost. +# +# SEED=NULL, used for convenience when called by other functions that expect SEED +# +library(mboost) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g)){ +if(dim(g)>1)stop('Argument g should be a vector') +} +g=as.numeric(as.vector(g)) +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +x1=as.matrix(x1) +x2=as.matrix(x2) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +n1=nrow(x1) +n2=nrow(x2) +g=c(rep(0,n1),rep(1,n2)) +train=rbind(x1,x2) +if(is.null(test))stop('Argument test is null, contains no data') +if(is.vector(test))stop('Argument test is a vector, should contain two or more variables') +g=as.factor(g) +ddata=data.frame(g,train) +d=mboost(g~., data=ddata,family=AdaExp(),baselearner=baselearner) +test=data.frame(test) +e=predict(d,newdata=test) +res=rep(1,nrow(test)) +flag=e>0 +res[flag]=2 +res +} + +CLASS.fun<-function(train=NULL,test=NULL,g=NULL,x1=NULL,x2=NULL, +method=c('KNN','DIS','DEP','SVM','RF','NN','ADA','PRO','LSM','GBT'), +depthfun=prodepth,kernel='radial',baselearner='btree',sm=TRUE,rule=.5,...){ +# +# A collection of classification methods: +# +# KNN: Nearest neighbor using robust depths +# DIS: Points classified based on their depths +# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS +# SVM: support vector machine +# RF: Random forest +# NN: neural network +# ADA: ada boost +# PRO: project the points onto a line connecting the centers of the data clouds. +# Then use estimate of the pdf for each group to make a decision about future points. +# GBT: Gradient boosted trees: requires R package gbm +# +type=match.arg(method) +switch(type, + KNN=KNNdist(train=train,test=test,g=g,x1=x1,x2=x2,depthfun=depthfun), + DIS=discdepth(train=train,test=test,g=g,x1=x1,x2=x2), + DEP=Depth.class(train=train,test=test,g=g,x1=x1,x2=x2), + SVM=SVM(train=train,test=test,g=g,x1=x1,x2=x2,kernel=kernel), + RF=class.forest(train=train,test=test,g=g,x1=x1,x2=x2), + NN=NN.class(train=train,test=test,g=g,x1=x1,x2=x2), + ADA=class.ada(train=train,test=test,g=g,x1=x1,x2=x2,baselearner=baselearner), + PRO=pro.classPD(train=train,test=test,g=g,x1=x1,x2=x2), + LSM=class.logR(train=train,test=test,g=g,x1=x1,x2=x2,sm=sm,rule=rule), + GBT=class.gbm(train=train,test=test,g=g,x1=x1,x2=x2), + ) +} + + + +BWPHmcp<-function(J,K, x, method='KMS'){ +# +# For a between-by-within design: +# Check for interactions by comparing binomials +# That is, use a Patel--Hoel approach. +# +# KMS is the Kulinskaya et al. method. Other options: +# 'ZHZ' +# 'SK' +# +# +p<-J*K +connum<-(J^2-J)*(K^2-K)/4 +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode') +imap<-matrix(c(1:p),J,K,byrow=TRUE) +outm<-matrix(NA,ncol=10,nrow=connum) +outsk<-matrix(NA,ncol=8,nrow=connum) +dimnames(outsk)<-list(NULL,c('Fac.A','Fac.A','Fac.B','Fac.B','p1','p2','p.value','p.adj')) +dimnames(outm)<-list(NULL,c('Fac.A','Fac.A','Fac.B','Fac.B','p1','p2','ci.low','ci.up','p.value','p.adj')) +ic<-0 +for (j in 1:J){ +for (jj in 1:J){ +if(j1)stop('Argument g should be a vector') +traing=elimna(cbind(train,g)) +p=ncol(train) +p1=p+1 +train=traing[,1:p] +test=traing[,p1] +if(length(unique(g))>2)stop('Only two groups allowed, g has more than two unique values') +} +x=fac2list(train,g) +x1=x[[1]] +x2=x[[2]] +} +test=as.matrix(test) +n.test=nrow(test) +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +if(ncol(test)!=ncol(x2))stop('test and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +n1=nrow(x1) +n2=nrow(x2) +n=min(c(n1,n2)) +dvec=matrix(NA,nrow=nboot,ncol=n.test) +for(i in 1:nboot){ +id1=sample(n,n,replace=TRUE) +id2=sample(n,n,replace=TRUE) +dvec[i,]=class.logR(x1=x1[id1,],x2=x2[id2,],test=test,sm=sm,rule=rule) +} +dec=rep(1,n.test) +test1=dvec==1 +test2=dvec==2 +chk1=apply(test1,2,sum) +chk2=apply(test2,2,sum) +idec=chk2>chk1 +dec[idec]=2 +dec +} + + + +class.error.com<-function(x1=NULL,x2=NULL,train=NULL,g=NULL,method=NULL, +pro.p=.8,nboot=100,EN=FALSE,FAST=TRUE, +SEED=TRUE,...){ +# +# For two classification methods indicated by the arguments +# class.fun1 and +# class.fun2 +# use cross validation coupled with resampling to estimate the probability that of a correct classification. +# +# The data for the two groups can be entered via the arguments +# x1 and x2 +# or +# store all of the data in the argument train in which case g specifies the group +# +# Choices for these two arguments: +# +# Current choices available: +# KNN: Nearest neighbor using robust depths +# DIS: Points classified based on their depths +# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS +# SVM: support vector machine +# RF: Random forest +# NN: neural network +# ADA: ada boost +# PRO: project the points onto a line connecting the centers of the data clouds. +# Then use estimate of the pdf for each group to make a decision about future points. +# LSM: smooth version of logistic regression when sm=TRUE; otherwise use logistic regression. +# +# EN=TRUE; use equal samples for the test data to deal with classification bias +# Otherwise, the ratio of the sample sizes is n1/n2 +# +# method=NULL All of the methods listed above will be compared if +# FAST=FALSE +# For method 'PRO', execution time might take several minutes if the sample sizes are large +# For this reason.PRO is is not used if FAST=TRUE +# +# pro.p=.8 means 80% of the data will be used as training data +# nboot=number of bootstrap samples + +# Returns estimate of the error rate plus +# FP (false positive): average proportion of values in x1 erroneously classified as coming from x2 +# Example, x1 contains no fracture, x2 contains fractures. +# FN (false negative): average proportion of values in x2 erroneously classified as coming from x1 +# +# +# +if(is.null(method))method=c('KNN','DIS','DEP','SVM','RF','NN','ADA','PRO','LSM') +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g))if(dim(g)>1)stop('Argument g should be a vector') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +if(is.null(x1))stop('Something is wrong, no data in x1') +if(is.null(x2))stop('Something is wrong, no data in x2') +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +dimnames(x1)=list(NULL,NULL) # can be necessary to eliminate labels to avoid an error in randomForest. +dimnames(x2)=list(NULL,NULL) +n1=nrow(x1) +n2=nrow(x2) +ns1=round(pro.p*n1) +ns2=round(pro.p*n2) +if(EN)ns1=ns2=min(c(ns1,ns2)) +P1hat=NA +P2hat=NA +Av=NA +Bv=NA +Cv=NA +Dv=NA + +J=length(method) +TE=matrix(NA,nrow=nboot,ncol=J) +FP=matrix(NA,nrow=nboot,ncol=J) +FN=matrix(NA,nrow=nboot,ncol=J) + +for(k in 1:nboot){ +N1=sample(n1,ns1) +N2=sample(n2,ns2) +test1=x1[-N1,] +test2=x2[-N2,] +for(j in 1:J){ +a1=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test1,method=method[j],...) +a2=CLASS.fun(x1=x1[N1,],x2=x2[N2,],test=test2,method=method[j],...) +flag1=a1!=1 # ID False negatives e..g., method 1 predict no fracture but fracture occurred. So !flag1 is correct decision +flag2=a2!=2 # ID False positives e..g., predict fracture but no fracture occurred. +flag=c(flag1,flag2) #Overall mistakes +TE[k,j]=mean(flag) +FN[k,j]=mean(flag1) +FP[k,j]=mean(flag2) +}} + +ERR=matrix(NA,nrow=3,ncol=J) +dimnames(ERR)=list(c('TE','FP','FN'),method) +v=apply(TE,2,mean) +ERR[1,]=v +v=apply(FP,2,mean) +ERR[2,]=v +v=apply(FN,2,mean) +ERR[3,]=v + +list(Error.rates=ERR) +} + +runstest.med<-function(x){ +# +# runs test based on whether values are < or > than the median +# +library(tseries) +x=elimna(x) +n=length(x) +g=rep(1,n) +flag=x 8, might include the argument +# op=3 and +# outfun=outproadj +# But this will increase execution time considerably. +# +# m1 is an n by p matrix +# +# For single distribution, m2=NULL, +# test the hypothesis that the measure of location estimated by +# locfun is equal to the value specified by +# nullv. +# +# For two dependent groups, meaning that +# m2 is not mull +# test hypothesis that the difference scores have measures of locations that are equal to nullv +# +# This is done by computing a confidence interval for each of the +# p variables under study using a percentile bootstrap. +# +# +if(is.null(m2))D=m1 +else{ +if(ncol(m1) != ncol(m2))stop('Number of variables in group 1 does not equal the number in group 2.') +D=m1-m2 +} +names(D)=NULL +p=ncol(D) +nb1=nboot+1 +if(SEED)set.seed(2) +D<-elimna(D) +n<-nrow(D) +val<-matrix(0,ncol=p,nrow=nboot) +bvec=matrix(NA,nboot,p) +ci=matrix(NA,2,p) +est=locfun(D,...) +if(is.list(est))est=est$center +for(j in 1: nboot){ +id<-sample(n,size=n,replace=TRUE) +v1<-locfun(D[id,],...) +if(is.list(v1)){ +val[j,]=(v1$centerY)')) +res +} + + + +anc.best.ex<-function(x,tr=.2){ +# +# Used by anc.best.crit +pvec=NA +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +est=lapply(x,tmean,tr=tr) +est=matl(est) +R=order(est,decreasing = TRUE) +ic=0 +for(j in 2:J){ +ic=ic+1 +pvec[ic]=yuen(x[[R[1]]],x[[R[[j]]]],tr=tr)$p.value +} +pvec +} + +anc.bestH<-function(x,rem=NULL,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ +# +# +# Identify group with largest trimmed mean +# Make a decision if every p.value<=p.crit +# +# p.crit: If NULL, critical p-values are determined so that that FWE is alpha +# This is done using a simulation to determine the null distribution based on +# iter=5000 replications. +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=lapply(x,tmean,tr=tr) +n=lapply(x,length) +est=matl(est) +n=as.vector(matl(n)) +R=order(est,decreasing = TRUE) +pvec=NA +if(is.null(rem)){ +pvdist=anc.bestH.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) +} +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.adj')) +for(i in 2:J){ +im1=i-1 +a=yuen(x[[R[1]]],x[[R[[i]]]],alpha=qest(pvdist[,im1],alpha)) +pvec[im1]=mean(pvdist<=a$p.value) +output[im1,1:7]=c(a$est.1,R[[i]],a$est.2,a$dif,a$ci[1],a$ci[2],a$p.value) +} +output[,8]=p.adjust(output[,7]) +Best='No Decisions' +id=output[,8]<=alpha +if(sum(id>0))Best=output[id,2] +if(sum(id)==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) +put +} + +anc.best<-function(x,p.crit=NULL,alpha=.05,tr=.2,iter=5000,SEED=TRUE,NEG=FALSE){ +# +# +# For J independent groups +# Identify group with largest trimmed mean +# Make a decision if every p.value<=p.crit +# +# p.crit: If NULL, critical p-values are determined so that that FWE is alpha +# This is done using a simulation to determine the null distribution based on +# iter=5000 replications. +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# Confidence intervals DO NOT necessarily have simultaneous probability coverage 1-alpha +# using the adjusted level. +# +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +if(J<3)stop('Should have 3 or more groups') +if(NEG)for(j in 1:J)x[[j]]=0-x[[j]] +Jm1=J-1 +est=lapply(x,tmean,tr=tr) +n=lapply(x,length) +est=matl(est) +n=as.vector(matl(n)) +R=order(est,decreasing = TRUE) +pvec=NA +if(is.null(p.crit)){ +v=anc.best.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) +p.crit=v$fin.crit +pvdist=v$pvdist +} +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=yuen(x[[R[1]]],x[[R[[i]]]],alpha=p.crit[im1]) +pvec[im1]=a$p.value +output[im1,]=c(a$est.1,R[[i]],a$est.2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) +} +Best='No Decisions' +flag=sum(output[,7]<=output[,8]) +id=output[,7]<=output[,8] +if(sum(id>0))Best=output[id,2] +if(flag==Jm1)Best='All' +#setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) +setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) +#put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=dpv,Larger.than=Best,n=n,output=output) +put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) +put +} + +anc.best.crit<-function(J,n=30,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ +# +# Determine critical p-values for anc.best +# +if(SEED)set.seed(2) +Jm1=J-1 +rem=matrix(NA,iter,Jm1) +for(k in 1:iter){ +if(length(n)==1){ +x=rmul(n,p=J) +x=listm(x) +} +else{ +x=list() +if(length(n)!=J)stop('J is not equal to the length of n') +for(j in 1:J)x[[j]]=rnorm(n[j]) +} +rem[k,]=anc.best.ex(x,tr=tr) +} +# +init=apply(rem,2,qest,alpha) +z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) +fin.crit=z$par*init +list(fin.crit=fin.crit,pvdist=rem) +} + +anc.best.fun<-function(a,init,iter,rem,Jm1,alpha){ +# +chk=0 +init=a*init +for(i in 1:iter){ +flag=0 +for(j in 1:Jm1)if(rem[i,j]<=init[j])flag=flag+1 +if(flag>0)chk=chk+1 +} +chk=chk/iter +dif=abs(chk-alpha) +dif +} + + +anc.best.fun<-function(a,init,iter,rem,Jm1,alpha){ +# +chk=0 +init=a*init +for(i in 1:iter){ +flag=0 +for(j in 1:Jm1)if(rem[i,j]<=init[j])flag=flag+1 +if(flag>0)chk=chk+1 +} +chk=chk/iter +dif=abs(chk-alpha) +dif +} + + + + +anc.bestpb<-function(x,loc.fun=tmean,nboot=3000,p.crit=NULL,alpha=.05,iter=5000,SEED=TRUE,...){ +# +# +# Identify group with largest trimmed mean +# Make a decision if every p.value<=p.crit +# +# p.crit: If NULL, critical p-values are determined so that that FWE is alpha +# This is done using a simulation to determine the null distribution based on +# iter=5000 replications. +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=lapply(x,loc.fun,...) +n=lapply(x,length) +est=matl(est) +n=as.vector(matl(n)) +R=order(est,decreasing = TRUE) +pvec=NA +if(is.null(p.crit))p.crit=anc.best.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED)$fin.crit +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=pb2gen(x[[R[1]]],x[[R[[i]]]],alpha=p.crit[im1],nboot=nboot,est=loc.fun,SEED=SEED,...) +pvec[im1]=a$p.value +output[im1,]=c(a$est.1,R[[i]],a$est.2,a$est.dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) +} +Best='No Decision' +flag=sum(pvec<=p.crit) +if(flag==Jm1)Best=R[[1]] +list(Group.with.largest.est=R[[1]],Best=Best,n=n,output=output) +} + + anc.bestpb.PV<-function(x,loc.fun=tmean,nboot=2000,alpha=.05,iter=5000,SEED=TRUE,...){ +# +# +# Identify group with largest trimmed mean +# Make a decision if every p.value<=p.crit +# +# p.crit: If NULL, critical p-values are determined so that that FWE is alpha +# This is done using a simulation to determine the null distribution based on +# iter=5000 replications. +# +# +# Returns: +# a p-value related to making a decision about which group has the largest measure of location. +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=lapply(x,loc.fun,...) +n=lapply(x,length) +est=matl(est) +n=as.vector(matl(n)) +R=order(est,decreasing = TRUE) +pvec=NA +aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) +id=which(aval==alpha) +if(length(id)==0)stop('alpha be one of the values .001(.001).1 or 11(.01).99') +v=anc.best.crit.det(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) +p.crit=v[id,] +if(is.null(p.crit))p.crit=anc.best.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=pb2gen(x[[R[1]]],x[[R[[i]]]],alpha=p.crit[im1],nboot=nboot,est=loc.fun,SEED=SEED,...) +pvec[im1]=a$p.value +output[im1,]=c(a$est.1,R[[i]],a$est.2,a$est.dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) +} +# Determine p-value for overall decision +na=length(aval) +for(i in 1:na){ +chk=sum(output[,7]<=v[i,]) +pv=aval[i] +if(chk==Jm1)break +} +Best='No Decisions' +flag=sum(output[,7]<=output[,8]) +id=output[,7]<=output[,8] +if(sum(id>0))Best=output[id,2] +if(flag==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) +put +} + + +anc.best.PV<-function(x,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ +# +# +# Identify group with largest trimmed mean +# Make a decision if every p.value<=p.crit + +# Unlike ancbest, this function returns a p-value associated with making a decision +# about which group has the largest trimmed mean. +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=lapply(x,tmean,tr=tr) +n=lapply(x,length) +est=matl(est) +n=as.vector(matl(n)) +R=order(est,decreasing = TRUE) +pvec=NA + +aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) +id=which(aval==alpha) +if(length(id)==0)stop('alpha be one one values .001(.001).1 or 11(.01).99') +v=anc.best.crit.det(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) +p.crit=v[id,] + + +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=yuen(x[[R[1]]],x[[R[[i]]]],alpha=p.crit[im1]) +pvec[im1]=a$p.value +output[im1,]=c(a$est.1,R[[i]],a$est.2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) +} + +# Determine p-value for overall decision +na=length(aval) +for(i in 1:na){ +chk=sum(output[,7]<=v[i,]) +pv=aval[i] +if(chk==Jm1)break +} +Best='No Decisions' +flag=sum(output[,7]<=output[,8]) +id=output[,7]<=output[,8] +if(sum(id>0))Best=output[id,2] +if(flag==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) +put +} + + + + +anc.best.crit.det<-function(J,n,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ +# +# Determine critical p-values for anc.best +# +if(SEED)set.seed(2) +Jm1=J-1 +rem=matrix(NA,iter,Jm1) +for(k in 1:iter){ +if(length(n)==1){ +x=rmul(n,p=J) +x=listm(x) +} +else{ +x=list() +if(length(n)!=J)stop('J is not equal to the length of n') +for(j in 1:J)x[[j]]=rnorm(n[j]) +} +rem[k,]=anc.best.ex(x,tr=tr) +} +aval=c(seq(.001,.1,.001),seq(.011,.99,.01)) +na=length(aval) +fin.crit=matrix(NA,na,Jm1) +for(i in 1:na){ +init=apply(rem,2,qest,aval[i]) +z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=aval[i],method='Brent',lower=0,upper=1) +fin.crit[i,]=z$par*init +} +fin.crit +} + + + +IND.PAIR.ES<-function(x,con=NULL,fun=ES.summary,...){ +# +# J independent groups +# For each column of a specified matrix of linear contrast +# coefficients, pool the data having a contrast coefficient of 1 into one +# group, do the same for contrast coefficient of -1, +# then estimate measures of effect size. +# +# Default, all pairwise comparisons using all of the measures of effect size +# via ES.summary +# +# To get individual measures of effect size, use +# fun=ESfun and include the argument +# method. +# Example: fun=ESfun, method='EP' does explanatory power. +# Choices for method are: +# EP: Explanatory power +# QS: Quantile shift based on the medians +# QStr: Based on trimmed means +# AKP: Robust analog of Cohen's d +# WMW: P(X10^3){ +if(SEED)set.seed(2) +Nmin1=min(c(nv[1],nv[2],100)) +Nmin2=min(c(nv[3],nv[4],100)) +for(i in 1:iter){ +id1=sample(nv[1],Nmin1) +id2=sample(nv[2],Nmin1) +L1=outer(x[[1]][id1],x[[2]][id2],FUN='-') +id1=sample(nv[3],Nmin2) +id2=sample(nv[4],Nmin2) +L2=outer(x[[3]][id1],x[[4]][id2],FUN='-') +ef[i]=ESfun(L1,L2,method=method,tr=tr,pr=pr,SEED=FALSE) +}} +else{ +L1=outer(x[[1]],x[[2]],FUN='-') +L2=outer(x[[3]],x[[4]],FUN='-') +ef=ESfun(L1,L2,method=method,tr=tr,pr=pr) +} +} +ef=mean(ef) +ef +} + +inter.TDES.sub<-function(x,method='QS',iter=5,SEED=TRUE,tr=.2,pr=FALSE,switch=FALSE){ +# +# +# Measures of effect size for an interaction in a 2-by-2 design +# For level 1 of Factor A, estimate the distribution of the +# the typical difference for levels 1 and 2 Factor B +# Do the same for level 2 of Factor A, and compute a measure of +# effect size based on these two distributions. +# +# swithch=TRUE, interchange the rows and columns +# +# Choices for the argument method: +# 'DNT',`EP',`QS',`QStr',`AKP',`KMS' +# DNT= De Neve and Thas P(X_1-X_2 < X_3-X_4) so a WMW-type measure +# EP=explanatory power, +# QS= quantile shift (median, +# QStr= quantile shift (trimmed mean) , +# AKP =trimmed mean version of Cohen's d, +# KMS=heteroscedastic analog of Cohen's d +# +# +if(is.matrix(x))x=listm(x) +if(switch)x=x[1,3,2,4] +ef=NA +if(length(x)!=4)stop('Limited to a two-by-two design') +x=elimna(x) +FLAG=TRUE +if(method=='DNT'){ +ef=WMWinter.est(x,iter=iter,SEED=SEED) +FLAG=FALSE +} +if(FLAG){ +nv=as.vector(matl(lapply(x,FUN='length'))) +nt=prod(nv) +if(nt>10^3){ +if(SEED)set.seed(2) +Nmin1=min(c(nv[1],nv[2],100)) +Nmin2=min(c(nv[3],nv[4],100)) +for(i in 1:iter){ +id1=sample(nv[1],Nmin1) +id2=sample(nv[2],Nmin1) +L1=outer(x[[1]][id1],x[[2]][id2],FUN='-') +id1=sample(nv[3],Nmin2) +id2=sample(nv[4],Nmin2) +L2=outer(x[[3]][id1],x[[4]][id2],FUN='-') +ef[i]=ESfun(L1,L2,method=method,tr=tr,pr=pr,SEED=FALSE) +}} +else{ +L1=outer(x[[1]],x[[2]],FUN='-') +L2=outer(x[[3]],x[[4]],FUN='-') +ef=ESfun(L1,L2,method=method,tr=tr,pr=pr) +} +} +ef=mean(ef) +ef +} + + + inter.TDES<-function(x,iter=5,SEED=TRUE,tr=.2,pr=FALSE,switch=FALSE){ +# +# +# Compute six measures of effect size for an interaction in a 2-by-2 design +# For level 1 of Factor A, estimate the distribution of the +# the typical difference for levels 1 and 2 Factor B +# Do the same for level 2 of Factor A, and compute a measure of +# effect size based on these two distributions. +# +# swithch=TRUE, interchange the rows and columns +# +# The measues are: +# 'DNT',`EP',`QS',`QStr',`AKP',`KMS' +# DNT= De Neve and Thas P(X_1-X_2 < X_3-X_4) so a WMW-type measure +# EP=explanatory power, +# QS= quantile shift (median), +# QStr= quantile shift (trimmed mean, trimming controlled by the argument tr), +# AKP =trimmed mean version of Cohen's d, +# KMS=heteroscedastic analog of Cohen's d. Under normality and homoscedasticity, 2(KMS) = Cohen's d +# +# +meth=c('DNT','EP','QS','QStr','AKP','KMS') +est=matrix(NA,ncol=2,nrow=6) +est[,1]=c(0.5, 0.0, 0.5 ,0.5, 0.0, 0.0) +for(j in 1:6)est[j,2]=inter.TDES.sub(x,method=meth[j],iter=iter,SEED=SEED,tr=tr,pr=pr,switch=switch) +dimnames(est)=list(c('DNT','EP','QS','QStr','AKP','KMS'),c('NULL','EST')) +est +} + + +interES.2by2<-function(x,tr=.2,SW=FALSE){ +# +# Estimate a collection of effect sizes +# for the first row of a 2-by-2 design +# do the same for the second row +# return estimates of the differences +# +# AKP: Homoscedastic robust analog of Cohen's d +# EP: Explanatory power +# QS: Quantile shift based on the median of the distribution of X-Y, +# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y +# KMS: Robust heteroscedastic analog of Cohen's d +# PH: Patel--Hoel, uses Cliff'a analog of Wilcoxon--Mann--Whitney +# +# switch=TRUE: reverses rows and columns + +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(SW)x=x[c(1,3,2,4)] +J=length(x) +if(J!=4)stop('Should have four groups; designed for a 2-by-2 ANOVA only') +a=c('AKP','EP','QS','QStr','KMS','WMW') +output=matrix(NA,ncol=4,nrow=6) +output[,1]=c(0.0,0.0,0.5,0.5,0.0,0.5) +for(j in 1:6){ +output[j,2]=ESfun(x[[1]],x[[2]],method=a[j],tr=tr,pr=FALSE) +output[j,3]=ESfun(x[[3]],x[[4]],method=a[j],tr=tr,pr=FALSE) +output[j,4]=output[j,2]-output[j,3] +} +dimnames(output)=list(c('AKP','EP','QS (median)','QStr', +'KMS','PH'),c('NULL','Est 1','Est 2','Diff')) +output +} + +interJK.ESmul<-function(J,K,x,method='QS',tr=.2,SEED=TRUE){ +# +# Compute measures of effect size for interactions associated with +# in J-by-K design. +# This is done for all relevant tetrad cells using interES.2by2 +# Missing values are automatically removed. +# +# Methods, see the R function ESfun +# Defaults to quantile shfit +# +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop("Data must be stored in a matrix or in list mode.") +CCJ<-(J^2-J)/2 +CCK<-(K^2-K)/2 +CC<-CCJ*CCK +JK=J*K +test<-matrix(NA,CC,7) +x=elimna(x) +mat=matrix(c(1:JK),nrow=J,ncol=K,byrow=TRUE) +dimnames(test)<-list(NULL,c("Factor A","Factor A","Factor B","Factor B","Effect Size 1","Effect Size 2","Diff")) +jcom<-0 +for (j in 1:J){ +for (jj in 1:J){ +if (j < jj){ +for (k in 1:K){ +for (kk in 1:K){ +if (k < kk){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-jj +test[jcom,3]<-k +test[jcom,4]<-kk +id1=mat[j,k] +id2=mat[j,kk] +a=ESfun(x[[id1]],x[[id2]],method=method,tr=tr,pr=FALSE,SEED=SEED) +id1=mat[jj,k] +id2=mat[jj,kk] +b=ESfun(x[[id1]],x[[id2]],method=method,tr=tr,pr=FALSE,SEED=SEED) +test[jcom,5:7]<-c(a,b,a-b) +}}}}}} +list(EFFECT.est=test) +} + +linsign<-function(x,con,nreps=200,SEED=TRUE,nmax=10^8){ +# +# Estimate the probability that a linear contrast is less than zero +# +if(sum(con)!=0)stop('Contrast coefficients must sum to zero') +if(SEED)set.seed(2) +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +nv=as.vector(matl(lapply(x,FUN='length'))) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +nv=as.vector(matl(lapply(x,FUN='length'))) +if(length(con)!=J)stop('Length of con should equal number of groups') +x=elimna(x) +B=list() +np=prod(nv) +nmin=min(nv) +if(np>nmax)nmin=min(c(nmin,100)) +M=matrix(NA,nrow=nmin,ncol=J) +for(i in 1:nreps){ +for(j in 1:J)M[,j]=sample(x[[j]],nmin) +B[[i]]=M +} +L=lapply(B,linWMWMC.sub,con=con) +ef.size=NA +for(j in 1:length(L))ef.size[j]=mean(L[[j]]<0) +ef=mean(ef.size) +ef +} + +LCES<-function(x,con,nreps=200,tr=.2,SEED=TRUE){ +# +# For each column of con, compute four measures of effect size: +# quantile shift based on median +# quantile shift based on a trimmed mean +# AKP generalization of Cohen's d +# SIGN: analog of the sign test. +# +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +x=elimna(x) +con=as.matrix(con) +d=ncol(con) +mat=matrix(NA,nrow=4,ncol=d) +LAB=NULL +for(i in 1:d){ +LAB[i]=paste('Con',i) +mat[1,i]=lin.ES(x,as.vector(con[,i]),locfun=median,nreps=nreps)$Effect.Size +mat[2,i]=lin.ES(x,as.vector(con[,i]),locfun=mean,nreps=nreps,tr=tr)$Effect.Size +mat[3,i]=lin.akp(x,con[,i],locfun=mean,nreps=nreps,tr=tr)$Effect.Size +mat[4,i]=linsign(x,con[,i],nreps=nreps) +} +mat=cbind(c(0.5,0.5,0.0,0.5),mat) +LAB=c('NULL',LAB) +dimnames(mat)=list(c('QS','Qstr','AKP','SIGN'),LAB) +list(EST=mat,con=con) +} + +qno.est<-function(x,q=.5){ +# +# Estimate of the qth quantile +# In some situations, offers a distinct advantage over the Harrell-Davis estimator when +# comparing extreme quantiles and distributions have heavy tails. +# +n<-length(x) +x<-sort(x) +s<-numeric() +ifelse(n>2, {for(g in 1:(n-2)){ +s[g]<-x[g+1]*(dbinom(g, size=n, prob=q)*(1-q)+dbinom(g+1, size=n, prob=q)*q) +} +sum(s,na.rm = TRUE) +t1<-(2*dbinom(0, size=n, prob=q)*q+dbinom(1, size=n, prob=q)*q)*x[1] +t2<-(2*(1-q)*dbinom(n, size=n, prob=q)+dbinom(n-1, size=n, prob=q)*(1-q))*x[n] +t3<-dbinom(0, size=n, prob=q)*(2-3*q)*x[2]-dbinom(0, size=n, prob=q)*(1-q)*x[3]- +dbinom(n, size=n, prob=q)*q*x[n-2]+dbinom(n, size=n, prob=q)*(3*q-1)*x[n-1] +quan<-sum(s,na.rm = T)+t1+t2+t3}, +ifelse(n==2,{quan <- (1-q)*x[1]+q*x[2]},quan<-x)) +quan +} + +bw.es.A<-function(J,K,x,tr=.2,pr=TRUE,fun=ES.summary,...){ +# +# Between-by-within design. + +#Using REL.M, can change default values for small, medium and large +# Example REL.M=c(.1,.3,.5) +# +# For each level of Factor B, compute effect sizes +# for all pairs of levels of Factor A . +# +# The R variable x is assumed to contain the raw +# x stored in list mode. x[[1]] contains the x +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the x for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the x for level 1,K +# x[[K+1]] is the x for level 2,1, x[2K] is level 2,K, etc. +# +# The default amount of trimming is tr=.2 +# +# It is assumed that x has length JK, the total number of +# groups being tested, but a subset of the x can be analyzed +# using grp +# +# +if(pr){ +if(!identical(fun,ES.summary.CI))print('To get confidence intervals, set the argument fun=ES.summary.CI') +} +if(is.matrix(x) || is.data.frame(x))x=listm(x) +JK=J*K +mat=matrix(c(1:JK),J,K,byrow=TRUE) +B=list() +for(k in 1:K){ +B[[k]]=IND.PAIR.ES(x[mat[,k]],fun=fun,...) +if(k==1){ +if(pr){ +print('B[[1]] contains pairwise measures of effect size for all levels of Factor A') +print(' and level 1 of Factor B') +print(' B[[2]] contains pairwise measures of effect size for all levels of Factor A') +print('and level 2 of Factor B') +}}} +list(B=B) +} + +dep.ES.summary<-function(x,y=NULL,tr=.2, alpha=.05, REL.MAG=NULL,SEED=TRUE,nboot=2000){ +# +# +# For two dependent groups, +# compute confidence intervals for four measures of effect size based on difference scores: +# +# AKP: robust standardized difference similar to Cohen's d +# QS: Quantile shift based on the median of the distribution of difference scores, +# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y +# SIGN: P(X0.5)REL.EF[4,]=.5-(REL.EF[4,]-.5) +output[,3:5]=REL.EF +output +} + +dep.ES.summary.sub<-function(x,y=NULL,tr=.2){ +# +# +# Used to determine equivalent effect size based on specified standard deviations +# + +if(!is.null(y))x=x-y +output=matrix(NA,ncol=2,nrow=4) +dimnames(output)=list(c('AKP','QS (median)','QStr','SIGN'),c('NULL','Est')) +output[1,1:2]=c(0,D.akp.effect(x)) +output[2,1:2]=c(0.5,depQS(x)$Q.effect) +output[3,1:2]=c(0.5,depQS(x,locfun=mean,tr=tr)$Q.effect) +output[4,1:2]=c(0.5,mean(x[x!=0]<0)) +output +} + + +dep.ES.summary.CI<-function(x,y=NULL,tr=.2, alpha=.05, REL.MAG=NULL,SEED=TRUE,nboot=1000,AUTO=FALSE){ +# +# +# For two dependent groups, +# compute confidence intervals for four measures of effect size based on difference scores: +# +# AKP: robust standardized difference similar to Cohen's d +# QS: Quantile shift based on the median of the distribution of difference scores, +# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y +# SIGN: P(X0.5)REL.EF[4,]=.5-(REL.EF[4,]-.5) +output[,3:5]=REL.EF +a=D.akp.effect.ci(x,alpha=alpha,SEED=SEED,tr=tr,nboot=nboot) +output[1,6:7]=a$ci +output[1,8]=a$p.value +#output[1,6:7]=D.akp.effect.ci(x,alpha=alpha,SEED=SEED,tr=tr,nboot=nboot)$ci +a=depQSci(x,alpha=alpha,SEED=SEED,nboot=nboot) +output[2,6:7]=a$ci +output[2,8]=a$p.value +a=depQSci(x,locfun=tmean,alpha=alpha, SEED=SEED,tr=tr,nboot=nboot) +output[3,6:7]=a$ci +output[3,8]=a$p.value +Z=sum(x<0) +nm=length(x[x!=0]) +a=binom.conf.pv(Z,nm,alpha=alpha,AUTO=AUTO,pr=FALSE) +output[4,6:7]=a$ci +output[4,8]=a$p.value +output +} + + +bw.es.B<-function(J,K,x,tr=.2,POOL=FALSE,OPT=FALSE,CI=FALSE,SEED=TRUE,REL.MAG=NULL,pr=TRUE){ +# +# Between-by-within design. +# +# For each level of Factor A, compute effect sizes +# for all j0))Best=output[id,2] +if(flag==Jm1)Best='All' +#setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) #not sure select p.value is valid +#put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=dpv,Larger.than=Best,n=n,output=output) +setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) +put +} + +bin.best.crit<-function(p,n,iter=5000,SEED=TRUE){ +# +# +# +if(SEED)set.seed(2) +J=length(n) #Number of groups +Jm1=J-1 +pv.mat=matrix(NA,iter,Jm1) +for(i in 1:iter){ +x=rbinom(J,n,p) +pv.mat[i,]=bin.best.sub(x,n) +} +pv.mat +} + +bin.best.sub<-function(x,n,p.crit=NULL,alpha=.05,iter=5000,SEED=TRUE){ +# +# Used by bin.best.crit +# +# x is a vector containing the number of successes. +# n is a vector indicating the sample sizes. +# +# +J=length(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=x/n +R=order(est,decreasing = TRUE) +pvec=NA +for(i in 2:J){ +im1=i-1 +a=bi2KMSv2(x[R[1]],n[R[1]],x[R[i]],n[R[i]],alpha=p.crit[im1]) +pvec[im1]=a$p.value +} +pvec=as.vector(matl(pvec)) +pvec +} + + +bin.best.PV<-function(x,n,alpha=.05,iter=5000,SEED=TRUE){ +# +# For J independent groups, +# identify the group with highest probability of success. +# Make a decision if every p.value<=p.crit +# +# x is a vector containing the number of successes. +# n is a vector indicating the sample sizes. +# +# p.crit: If NULL, critical p-values are determined so that that FWE is alpha +# This is done using a simulation to determine the null distribution based on +# iter=5000 replications. +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +J=length(x) +if(J<2)stop('Should have 2 or more groups') +Jm1=J-1 +est=x/n +R=order(est,decreasing = TRUE) +pvec=NA + +phat=sum(x)/sum(n) + + +aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) +id=which(aval==alpha) +if(length(id)==0)stop('alpha be one one values .001(.001).1 or 11(.01).99') + +v=bin.best.crit.det(phat,n=n,iter=iter,SEED=SEED) +p.crit=v[id,] + + +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=bi2KMSv2(x[R[1]],n[R[1]],x[R[i]],n[R[i]],alpha=p.crit[im1]) +pvec[im1]=a$p.value +output[im1,1:7]=c(a$p1, R[i], a$p2,a$est.dif,a$ci[1],a$ci[2],a$p.value) +} +output[,8]=p.crit + + +# Determine p-value for overall decision +na=length(aval) +for(i in 1:na){ +chk=sum(output[,7]<=v[i,]) +pv=aval[i] +if(chk==Jm1)break +} +Best='No Decisions' +flag=sum(output[,7]<=output[,8]) +id=output[,7]<=output[,8] +if(sum(id>0))Best=output[id,2] +if(flag==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) +put +} + + +bin.best.crit.det<-function(p,n,iter=5000,SEED=TRUE){ +# +# +# +if(SEED)set.seed(2) +J=length(n) #Number of groups +Jm1=J-1 +pv.mat=matrix(NA,iter,Jm1) +for(i in 1:iter){ +x=rbinom(J,n,p) +pv.mat[i,]=bin.best.sub(x,n) +} +rem=pv.mat +aval=c(seq(.001,.1,.001),seq(.011,.99,.01)) +na=length(aval) +fin.crit=matrix(NA,na,Jm1) +for(i in 1:na){ +init=apply(rem,2,qest,aval[i]) +z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=aval[i],method='Brent',lower=0,upper=1) +fin.crit[i,]=z$par*init +} +fin.crit +} + + +bin.best.EQA<-function(x,n,p.crit=NULL,alpha=.05,iter=5000,SEED=TRUE){ +# +# +# Identify the group with highest probability of success. +# Make a decision if every p.value<=p.crit +# +# x is a vector containing the number of successes. +# n is a vector indicating the sample sizes. +# +# p.crit: If NULL, critical p-values are determined so that that FWE is alpha +# This is done using a simulation to determine the null distribution based on +# iter=5000 replications. +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +J=length(x) +if(J<2)stop('Should have 2 or more groups') +Jm1=J-1 +est=x/n +R=order(est,decreasing = TRUE) +pvec=NA +init=rep(alpha,Jm1) +if(is.null(p.crit)){ +phat=sum(x)/sum(n) +pv.mat=bin.best.crit(phat,n=n,iter=iter,SEED=SEED) +z=optim(0,anc.best.fun,init=init,iter=iter,rem=pv.mat,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) +p.crit=z$par*init +} +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=bi2KMSv2(x[R[1]],n[R[1]],x[R[i]],n[R[i]],alpha=p.crit[im1]) +pvec[im1]=a$p.value +output[im1,]=c(a$p1, R[i], a$p2,a$est.dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) +} +Best='No Decisions' +flag=sum(output[,7]<=output[,8]) +id=output[,7]<=output[,8] +if(sum(id>0))Best=output[id,2] +if(flag==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) +put +} + + + +binmcp<-function(x,n,p.crit=NULL,alpha=.05,iter=2000,SEED=TRUE){ +# +# +# x is a vector containing the number of successes. +# n is a vector indicating the sample sizes. +# +# p.crit: If NULL, critical p-values are determined so that that FWE is alpha +# This is done using a simulation to determine the null distribution based on +# iter=5000 replications. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +J=length(x) +A=(J^2-J)/2 +if(J<2)stop('Should have 2 or more groups') +Jm1=J-1 +est=x/n +pvec=NA +init=rep(alpha,Jm1) +if(is.null(p.crit)){ +phat=sum(x)/sum(n) +pv.mat=binmcp.crit(phat,n=n,iter=iter,SEED=SEED) +} +p.crit=qest(pv.mat,alpha) +output<-matrix(NA,A,9) +dimnames(output)=list(NULL,c('Grp','Grp','Est 1','Est 2','Dif','ci.low','ci.up','p.value','p.crit')) +p.crit=p.crit/A +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j0))Best=output[id,2] +if(flag==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) +put +} + + +rmanc.best.crit<-function(x,alpha=.05,tr=.2,iter=5000,SEED=TRUE,...){ +# +# Determine critical p-values for rmanc.best +# +if(SEED)set.seed(2) +library(MASS) +J=ncol(x) +n=nrow(x) +Jm1=J-1 +rem=matrix(NA,iter,Jm1) +A=winall(x,tr=tr)$cov +for(k in 1:iter){ +xs=mvrnorm(n,mu=rep(0,J),Sigma=A) +rem[k,]=rmanc.best.ex(xs,tr=tr) +} +init=apply(rem,2,qest,alpha) +z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) +fin.crit=z$par*init +fin.crit +} + +rmanc.best.PV<-function(x,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ +# +# +# For J dependent groups, +# identify the group with largest trimmed mean +# Make a decision if every p.value<=p.crit +# +# p.crit is determined via +# a simulation to determine the null distribution based on +# iter=5000 replications. +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure of location if a decision can be made. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +x=elimna(x) +flag=TRUE +if(is.list(x))stop('x should be a matrix or a data frame') +J=ncol(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=apply(x,2,tmean,tr=tr) +n=nrow(x) +est=matl(est) +R=order(est,decreasing = TRUE) +pvec=NA + +aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) +id=which(aval==alpha) +if(length(id)==0)stop('alpha be one one values .001(.001).1 or 11(.01).99') +v=rmanc.best.crit.det(x,iter=iter,alpha=alpha,tr=tr,SEED=SEED) +p.crit=v[id,] +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=yuend(x[,R[1]],x[,R[i]],alpha=p.crit[im1],tr=tr) +pvec[im1]=a$p.value +output[im1,]=c(a$est1,R[[i]],a$est2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) +} + +# Determine p-value for overall decision +na=length(aval) +for(i in 1:na){ +chk=sum(output[,7]<=v[i,]) +pv=aval[i] +if(chk==Jm1)break +} +Best='No Decisions' +flag=sum(output[,7]<=output[,8]) +id=output[,7]<=output[,8] +if(sum(id>0))Best=output[id,2] +if(flag==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) +put +} + +rmanc.best.crit.det<-function(x,tr=.2,iter=5000,SEED=TRUE,...){ +# +# Determine critical p-values for rmanc.best +# +if(SEED)set.seed(2) +J=ncol(x) +n=nrow(x) +Jm1=J-1 +rem=matrix(NA,iter,Jm1) +A=winall(x,tr=tr)$cov +for(k in 1:iter){ +xs=mvrnorm(n,mu=rep(0,J),Sigma=A) +rem[k,]=rmanc.best.ex(xs,tr=tr) +} + +aval=c(seq(.001,.1,.001),seq(.011,.99,.01)) +na=length(aval) +fin.crit=matrix(NA,na,Jm1) +for(i in 1:na){ +init=apply(rem,2,qest,aval[i]) +z=optim(0,anc.best.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=aval[i],method='Brent',lower=0,upper=1) +fin.crit[i,]=z$par*init +} +fin.crit +} + +ancJN.LC<-function(x,y,pts=NULL,con=NULL,regfun=tsreg,nmin=12,npts=5, +alpha=.05,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,pr=TRUE,...){ +# +# ANCOVA: Linear contrasts +# J independent groups +# using a robust regression estimator. +# By default, use the Theil--Sen estimator +# +# Assume data are in +# x and y: list mode with length J or matrices with J columns +# +# pts can be used to specify the design points where the regression lines +# are to be compared. +# +# npts=5 points are used that are equally spaced +# npts=25 would use 25 points equally spaced. +# +# To get adjusted p-values that control FWE, set +# p6n50=p5n50 and p6n100=p6n100 +# where the R variables p6n5 and p6n100 contain the data in the files +# p6n5.csv and p6n100.csv, which are stored at +# https://dornsife.usc.edu/cf/labs/wilcox/wilcox-faculty-display.cfm +# in the directory labeled datasets +# These adjusted p-values are based on an estimate of the null distribution of the p-values using 10,000 replications. +# +# +if(identical(outfun,boxplot))stop('Use outfun=outbox') +if(SEED)set.seed(2) +FLAG=pts +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(is.matrix(y) || is.data.frame(y))y=listm(y) +J=length(x) +if(is.null(con))con=con.all.pairs(J) +con=as.matrix(con) + +for(j in 1:J){ +xy=cbind(x[[j]],y[[j]]) +xy=elimna(xy) +x[[j]]=xy[,1] +y[[j]]=xy[,2] +} + +LEV=seq(.001,.1,.001) +# Critical p-values: +PC=c( 0.0004715554, 0.0010600032, 0.0014646097, 0.0018389954, 0.0022100770, 0.0026392236 + , 0.0030845986, 0.0034799136, 0.0039374557, 0.0045480591, 0.0052206792, 0.0057883125 + , 0.0062902648, 0.0068322005, 0.0074001036, 0.0079687149, 0.0085694544, 0.0091944370 + , 0.0097899444, 0.0103380231, 0.0108787002, 0.0114575017, 0.0120748898, 0.0127022411 + , 0.0133151567, 0.0138929664, 0.0144234632, 0.0149174499, 0.0153969778, 0.0158737390 + , 0.0163486640, 0.0168226766, 0.0172990400, 0.0177788317, 0.0182590407, 0.0187350584 + , 0.0192063600, 0.0196827391, 0.0201828045, 0.0207226980, 0.0213060840, 0.0219224603 + , 0.0225496919, 0.0231594943, 0.0237279940, 0.0242453799, 0.0247167117, 0.0251561867 + , 0.0255817130, 0.0260113587, 0.0264602401, 0.0269376359, 0.0274445303, 0.0279729461 + , 0.0285099972, 0.0290459058, 0.0295790691, 0.0301128895, 0.0306482248, 0.0311800075 + , 0.0317011819, 0.0322091285, 0.0327081943, 0.0332067908, 0.0337124366, 0.0342287456 + , 0.0347555836, 0.0352908030, 0.0358314032, 0.0363734449, 0.0369117409, 0.0374406811 + , 0.0379564126, 0.0384591067, 0.0389535674, 0.0394475575, 0.0399489437, 0.0404633647 + , 0.0409932052, 0.0415375872, 0.0420929727, 0.0426543011, 0.0432164300, 0.0437752831 + , 0.0443282924, 0.0448743464, 0.0454136998, 0.0459479133, 0.0464794759, 0.0470108970 + , 0.0475436440, 0.0480776649, 0.0486119396, 0.0491457340, 0.0496797020, 0.0502161233 + , 0.0507581656, 0.0513086116, 0.0518687047, 0.0524377104) + PC100=c( 0.0002966929, 0.0007169931, 0.0011232960, 0.0014665833, 0.0018499989, 0.0021811159, 0.0025574707, 0.0030353289, 0.0035106136, +0.0039372118, 0.0043468911, 0.0047755738, 0.0052219494, 0.0056692252, 0.0061033036, 0.0065103109, 0.0068936246, 0.0072767310 +, 0.0076766388, 0.0080950473, 0.0085363254, 0.0090079848, 0.0095103514, 0.0100413399, 0.0105923616, 0.0111373687, 0.0116501421 +, 0.0121316345, 0.0126067868, 0.0130940807, 0.0135865147, 0.0140673848, 0.0145340038, 0.0149959496, 0.0154605814, 0.0159310004 +, 0.0164110067, 0.0169026157, 0.0173999561, 0.0178905940, 0.0183633096, 0.0188126308, 0.0192375443, 0.0196398972, 0.0200251178 +, 0.0204024531, 0.0207816755, 0.0211673500, 0.0215562764, 0.0219417240, 0.0223201432, 0.0226935235, 0.0230670390, 0.0234465536 +, 0.0238377401, 0.0242449082, 0.0246692644, 0.0251088323, 0.0255604045, 0.0260209857, 0.0264872622, 0.0269547078, 0.0274182559 +, 0.0278743138, 0.0283226660, 0.0287673658, 0.0292163846, 0.0296798737, 0.0301671058, 0.0306828917, 0.0312249485, 0.0317836349 +, 0.0323445314, 0.0328929974, 0.0334187668, 0.0339185027, 0.0343952967, 0.0348558545, 0.0353073007, 0.0357552402, 0.0362033565 +, 0.0366537740, 0.0371074184, 0.0375642517, 0.0380236755, 0.0384852821, 0.0389497084, 0.0394190068, 0.0398960179, 0.0403828416 +, 0.0408792748, 0.0413823211, 0.0418872318, 0.0423894946, 0.0428866160, 0.0433788246, 0.0438685719, 0.0443592827, 0.0448539520 +, 0.0453540386) + +LV10=seq(.11,.99,.01) + +PC2=c( 0.05847911,0.06353127,0.06846620,0.07310904,0.07881193,0.08499317 + ,0.09026885,0.09574866,0.10124244,0.10716984,0.11236755,0.11770113 + ,0.12277641,0.12816436,0.13376686,0.13841793,0.14410387,0.14968273 +,0.15593615,0.16189668,0.16835575,0.17408581,0.17990687,0.18615084 +,0.19255775,0.19789818,0.20461883,0.21144604,0.21805108,0.22337111 +,0.22987129,0.23652924,0.24281857,0.24931816,0.25527023,0.26170124 +,0.26891556,0.27539636,0.28170631,0.28851563,0.29594635,0.30318192 +,0.31045830,0.31845106,0.32544425,0.33439997,0.34149133,0.34720162 +,0.35473714,0.36186855,0.36966857,0.37744663,0.38495268,0.39335818 +,0.40139728,0.40976955,0.41707974,0.42741844,0.43614133,0.44525646 +,0.45212461,0.46081343,0.46949317,0.47873498,0.48961349,0.50015426 +,0.50931802,0.52037963,0.53148887,0.54267461,0.55344746,0.56735092 +,0.57914832,0.58912604,0.60177045,0.61639495,0.62953494,0.64515539 +,0.66071262,0.67259936,0.68883014,0.70680224,0.72531254,0.74408131 +,0.76444699,0.79049224,0.81775009,0.84817210,0.88710556) + +PC100v2=c( 0.05001203,0.05433510,0.05923540,0.06449529,0.06983196,0.07506101 + ,0.07980333,0.08542380,0.09160087,0.09662086,0.10166432,0.10676042 +,0.11225467,0.11892794,0.12375516,0.12925937,0.13452963,0.14002073 +,0.14507355,0.15091410,0.15606790,0.16249155,0.16797190,0.17373478 +,0.17882609,0.18493900,0.19193563,0.19735772,0.20348841,0.20935194 +,0.21635380,0.22225273,0.22983262,0.23667483,0.24413437,0.25020140 +,0.25702517,0.26403520,0.27014313,0.27804532,0.28442682,0.29088905 +,0.29821809,0.30537715,0.31289211,0.32086820,0.32832301,0.33561776 +,0.34266364,0.34882764,0.35668900,0.36457176,0.37183425,0.37923028 +,0.38652374,0.39483644,0.40189416,0.41095361,0.42066139,0.43029008 +,0.43909925,0.44914388,0.45986646,0.47000245,0.48024114,0.48927833 +,0.50263045,0.51392305,0.52498817,0.53697186,0.54793443,0.55984674 +,0.57110087,0.58246374,0.59547592,0.61054637,0.62515843,0.63817892 +,0.65448463,0.67194322,0.69024711,0.70584982,0.72322165,0.74211690 +,0.76534343,0.78913944,0.81819273,0.84990380,0.89251684) + +LV=c(LEV,LV10) +PC50=c(PC,PC2) +PC100.all=c(PC100,PC100v2) +n=lapply(y,length) +n=as.vector(matl(n)) +nmin=min(n) +if(nmin<=75)cp4=lplot.pred(LV,PC50,alpha)$yhat +else +cp4=lplot.pred(LV,PC100.all,alpha)$yhat +crit=qnorm(1-cp4/2) +if(xout){ +for(j in 1:J){ +flag=outfun(x[[j]],plotit=FALSE,...)$keep +m<-cbind(x[[j]],y[[j]]) +p1=ncol(m) +p=p1-1 +m<-m[flag,] +x[[j]]<-m[,1:p] +y[[j]]<-m[,p1] +}} +if(!is.null(pts))npts=length(pts) +if(is.null(pts[1])){ +xall=lapply(x,unique) +L=lapply(xall,min) +U=lapply(xall,max) +L=matl(L) +U=matl(U) +L=max(L) +U=min(U) +if(L>=U)stop('The range of covariate values is not sufficiently similar among the groups') +pts=seq(max(L),min(U),length.out=npts) +} +NT=ncol(con) +CON=list() +mat<-matrix(NA,npts,8) +dimnames(mat)<-list(NULL,c('X','Est','TEST','se','ci.low','ci.hi','p.value','Adj.p.value')) +mat[,1]=pts +sqsd=list() +est=list() +for(j in 1:J){ +sqsd[[j]]=regYvar(x[[j]],y[[j]],pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=FALSE,outfun=outfun,...) +est[[j]]=regYhat(x[[j]],y[[j]],xr=pts,regfun=regfun,xout=FALSE,outfun=outfun,...) +} +est=matl(est) +sqsd=matl(sqsd) +for(K in 1:NT){ +for(k in 1:npts){ +EST=0 +SD=0 +EST=sum(con[,K]*est[k,]) +SD=sum(con[,K]^2*sqsd[k,]) +sd=sqrt(SD) +mat[k,4]=sd +tests=EST/sd +mat[k,3]=tests +pv=2*(1-pnorm(abs(tests))) +mat[k,7]=pv +mat[k,5]=EST-crit*sd +mat[k,6]=EST+crit*sd +mat[k,2]=EST +mat[k,8]=NA +# Compute a p-value +if(nmin<=75){ +flag=mat[k,7]>=PC50 +ID=which(flag==TRUE) +ic=max(ID,1) +mat[k,8]=LV[ic] +} +else{ +flag=mat[k,7]>=PC100.all +ID=which(flag==TRUE) +ic=max(ID,1) +mat[k,8]=LV[ic] +} +} +CON[[K]]=mat +} +pts=as.matrix(pts,ncol=1) +g.est=cbind(pts,est) +LAB='X' +J1=J+1 +for(j in 2:J1)LAB[j]=paste('GRP',j-1) +dimnames(g.est)=list(NULL,LAB) +list(n=n,crit.p.value=cp4,CON=CON,con=con,GRP.est=g.est) +} + +elimna2g<-function(x,y){ +# +# Assume both are matrices or list mode +# +if(is.matrix(x)){ +J=ncol(x) +if(J!=ncol(y))stop('x and y have different number of columns') +J1=J+1 +J2=2*J +xy=elimna(cbind(x,y)) +x=xy[,1:J] +y=xy[,J1:J2] +} +if(is.list(x)){ +J=length(x) +J1=J+1 +J2=2*J +if(J!=length(y))stop('x and y have different lengths') +xy=elimna(c(x,y)) +x=xy[1:J] +y=xy[J1:J2] +} +list(x=x,y=y) +} + + +smgridVRC<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,TR=.2,alpha=.05,VAL1=NULL,VAL2=NULL,PB=FALSE,est=tmean,nboot=1000,pr=TRUE,method='hoch', +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Compare measures of location among grids defined by quantiles of two IVs. By default 20% trimming is used +# est=median would use medians +# est=hd would use the Harrell-Davis estimator for the median. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# + +# Then test the hypothesis of equal measures of location +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +# TR: amount of trimming when using a non-bootstrap method. To alter the amount of trimming when using +# a bootstrap method use +# tr. Example, tr=.25 would use 25% trimming. +# +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +if(identical(est,median))PB=TRUE +if(identical(est,hd))PB=TRUE +z=list() +group=list() +if(is.null(VAL1) || is.null(VA2)){ +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +} +else { +N.int=length(VAL1)+1 +N.int2=length(VAL2)+1 +} +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +n.mat=matrix(NA,nrow=N.int,ncol=N.int2) +DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +for(i in 1:N.int)L1[i]=paste('IV1.G',i) +for(i in 1:N.int2)L2[i]=paste('IV2.G',i) +dimnames(est.mat)=list(L1,L2) + +if(is.null(VAL1) || is.null(VA2)){ +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +} +else{ +qv=VAL1 +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=VAL2 +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +} +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2[,p1],...) +n.mat[j,k]=length(xsub2[,p1]) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) +} +NT=N.int*N.int2 +MID=matrix(c(1:NT),nrow=N.int,ncol=N.int2,byrow=TRUE) +# pull out s indicated by the columns of MID and do tests +IV1res=NULL +a=NULL +for(j in 1:N.int2){ +zsub=z[MID[,j]] +DV.mat[,j]=matl(lapply(zsub,est,...)) +if(!PB)a=lincon(zsub,tr=TR,pr=FALSE,alpha=alpha)$psihat[,3:8] +if(PB)a=linpairpb(zsub,nboot=nboot,alpha=alpha,SEED=SEED,...)$output[,c(3:9)] +IV1res=rbind(IV1res,a) +} +#Now do IV2 +IV2res=NULL +a=NULL +for(j in 1:N.int){ +zsub=z[MID[j,]] +if(!PB){ +a=lincon(zsub,tr=TR,pr=FALSE,alpha=alpha)$psihat[,3:8] +} +if(PB){ +a=linpairpb(zsub,nboot=nboot,alpha=alpha,est=est,SEED=SEED,...)$output[,c(3:9)] +} +IV2res=rbind(IV2res,a) +} +if(!PB){ #fix labels add adjusted p-value +IV1res=cbind(IV1res[,1:4],p.adjust(IV1res[,4],method=method),IV1res[,5:6]) +IV2res=cbind(IV2res[,1:4],p.adjust(IV2res[,4],method=method),IV2res[,5:6]) +} +if(PB){ +IV1res[,3]=p.adjust(IV1res[,2],method=method) +IV2res[,3]=p.adjust(IV2res[,2],method=method) +IV1res=IV1res[,c(1,4,5,2,3,6,7)] +IV2res=IV2res[,c(1,4,5,2,3,6,7)] +} +nr=nrow(IV1res) +Lnam1=NULL +for(j in 1:nr)Lnam1=c(Lnam1,paste(' IV1Level',j)) +print(dim(IV1res)) +print(Lnam1) +nr=nrow(IV2res) +Lnam2=NULL +for(j in 1:nr)Lnam2=c(Lnam2,paste('IV2 Level',j)) +dimnames(IV1res)=list(Lnam1,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) +dimnames(IV2res)=list(Lnam2,c('psihat','ci.lower','ci.upper','p.value','p.adjust','Est.1','Est.2')) +list(est.loc.4.DV=est.mat,n=n.mat,Independent.variables.summary=group,Res.4.IV1=IV1res,Res.4.IV2=IV2res) +} + + + +# -------------------------------------------------------------- +# Code adapted from RGenData::GenDataPopulation from John Ruscio +# -------------------------------------------------------------- + +# Reference +# Ruscio, J. & Kaczetow, W. (2008) +# Simulating Multivariate Nonnormal Data Using an Iterative Algorithm. +# Multivariate Behav Res, 43, 355-381. +# https://www.ncbi.nlm.nih.gov/pubmed/26741201 + +# License: MIT +# Copyright <2018> + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# Original +# https://github.com/cran/RGenData/blob/master/R/EFAGenData.R + +# Simulate multivariate g-and-h data using an iterative algorithm +# +# Args: +# n.cases : Number of observations for each variable - default 1000 +# n.variables : Number of variables - default 2 +# g : g parameter of the g-and-h distribution - default 0 +# h : h parameter of the g-and-h distribution - default 0 +# rho : Target correlation between variables - default 0 +# corr.type : Type of correlation - default "pearson", alternative "spearman" +# +# Returns: +# data : Population of data - matrix n.cases rows by n.variables columns +# +gengh <- function(n, p = 2, + g = 0, h = 0, rho = 0, + corr.type = "pearson"){ +n.cases=n +n.variables=p +target.corr <- matrix(c(1, rho, rho, 1), nrow = 2, byrow = TRUE) # covariance matrix + +n.factors <- 0 # Number of factors (scalar) +max.trials <- 5 # Maximum number of trials (scalar) +initial.multiplier <- 1 # Value of initial multiplier (scalar) + +# generate g-and-h data +distributions <- matrix(NA, nrow = n.cases, ncol = n.variables) + for (V in 1:n.variables){ + distributions[,V] <- sort(ghdist(n.cases, g=g, h=h)) + } + +data <- matrix(0, nrow = n.cases, ncol = n.variables) +iteration <- 0 +best.rmsr <- 1 +trials.without.improvement <- 0 +intermediate.corr <- target.corr + +# If number of latent factors was not specified, determine it +if (n.factors == 0){ + Eigenvalues.Observed <- eigen(intermediate.corr)$values + Eigenvalues.Random <- matrix(0, nrow = 100, ncol = n.variables) + Random.Data <- matrix(0, nrow = n.cases, ncol = n.variables) + for (i in 1:100){ + for (j in 1:n.variables){ + Random.Data[,j] <- sample(distributions[,j], size = n.cases, replace = TRUE) + } + Eigenvalues.Random[i,] <- eigen(cor(Random.Data))$values + } + Eigenvalues.Random <- apply(Eigenvalues.Random, 2, mean) # calculate mean eigenvalue for each factor + n.factors <- max(1, sum(Eigenvalues.Observed > Eigenvalues.Random)) +} + +shared.comp <- matrix(rnorm(n.cases * n.factors, 0, 1), nrow = n.cases, + ncol = n.factors) +unique.comp <- matrix(rnorm(n.cases * n.variables, 0, 1), nrow = n.cases, + ncol = n.variables) +shared.load <- matrix(0, nrow = n.variables, ncol = n.factors) +unique.load <- matrix(0, nrow = n.variables, ncol = 1) +while (trials.without.improvement < max.trials) { + iteration <- iteration + 1 + factor.analysis <- FactorAnalysis(intermediate.corr, corr.matrix = TRUE, + max.iteration = 50, n.factors, corr.type) + if (n.factors == 1) { + shared.load[, 1] <- factor.analysis$loadings + } else { + for (i in 1:n.factors) + shared.load[, i] <- factor.analysis$loadings[, i] + } + shared.load[shared.load > 1] <- 1 + shared.load[shared.load < -1] <- -1 + if (shared.load[1, 1] < 0) + shared.load <- shared.load * -1 + for (i in 1:n.variables) + if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { + unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) + } else { + unique.load[i, 1] <- 0 + } + unique.load <- sqrt(unique.load) + for (i in 1:n.variables) + data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * + unique.load[i, 1] + for (i in 1:n.variables) { + data <- data[sort.list(data[, i]), ] + data[, i] <- distributions[, i] + } + reproduced.corr <- cor(data, method = corr.type) + residual.corr <- target.corr - reproduced.corr + rmsr <- sqrt(sum(residual.corr[lower.tri(residual.corr)] * + residual.corr[lower.tri(residual.corr)]) / + (.5 * (n.variables * n.variables - n.variables))) + if (rmsr < best.rmsr) { + best.rmsr <- rmsr + best.corr <- intermediate.corr + best.res <- residual.corr + intermediate.corr <- intermediate.corr + initial.multiplier * + residual.corr + trials.without.improvement <- 0 + } else { + trials.without.improvement <- trials.without.improvement + 1 + current.multiplier <- initial.multiplier * + .5 ^ trials.without.improvement + intermediate.corr <- best.corr + current.multiplier * best.res + } +} + +factor.analysis <- FactorAnalysis(best.corr, corr.matrix = TRUE, + max.iteration = 50, n.factors, + corr.type) +if (n.factors == 1) { + shared.load[, 1] <- factor.analysis$loadings +} else { + for (i in 1:n.factors) + shared.load[, i] <- factor.analysis$loadings[, i] +} +shared.load[shared.load > 1] <- 1 +shared.load[shared.load < -1] <- -1 +if (shared.load[1, 1] < 0) + shared.load <- shared.load * -1 +for (i in 1:n.variables) + if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { + unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) + } else { + unique.load[i, 1] <- 0 + } +unique.load <- sqrt(unique.load) +for (i in 1:n.variables) + data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * + unique.load[i, 1] +data <- apply(data, 2, scale) # standardizes each variable in the matrix +for (i in 1:n.variables) { + data <- data[sort.list(data[, i]), ] + data[, i] <- distributions[, i] +} +data +} + +################################################################################ +FactorAnalysis <- function(data, corr.matrix = FALSE, max.iteration = 50, + n.factors = 0, corr.type = "pearson") { +# Analyzes comparison data with known factorial structures +# +# Args: +# data : Matrix to store the simulated data. +# corr.matrix : Correlation matrix (default is FALSE) +# max.iteration : Maximum number of iterations (scalar, default is 50). +# n.factors : Number of factors (scalar, default is 0). +# corr.type : Type of correlation (character, default is "pearson", +# user can also call "spearman"). +# +# Returns: +# $loadings : Factor loadings (vector, if one factor. matrix, if multiple +# factors) +# $factors : Number of factors (scalar). +# + data <- as.matrix(data) + n.variables <- dim(data)[2] + if (n.factors == 0) { + n.factors <- n.variables + determine <- TRUE + } else { + determine <- FALSE + } + if (!corr.matrix) { + corr.matrix <- cor(data, method = corr.type) + } else { + corr.matrix <- data + } + criterion <- .001 + old.h2 <- rep(99, n.variables) + h2 <- rep(0, n.variables) + change <- 1 + iteration <- 0 + factor.loadings <- matrix(nrow = n.variables, ncol = n.factors) + while ((change >= criterion) & (iteration < max.iteration)) { + iteration <- iteration + 1 + eigenvalue <- eigen(corr.matrix) + l <- sqrt(eigenvalue$values[1:n.factors]) + for (i in 1:n.factors) + factor.loadings[, i] <- eigenvalue$vectors[, i] * l[i] + for (i in 1:n.variables) + h2[i] <- sum(factor.loadings[i, ] * factor.loadings[i, ]) + change <- max(abs(old.h2 - h2)) + old.h2 <- h2 + diag(corr.matrix) <- h2 + } + if (determine) n.factors <- sum(eigenvalue$values > 1) + return(list(loadings = factor.loadings[, 1:n.factors], + factors = n.factors)) +} + + +rmVARcom<-function(x,y=NULL,alpha=.05,est=bivar,plotit=TRUE,nboot=500,SEED=TRUE,...){ +# +# Use a percentile bootstrap method to compare dependent groups. +# based on some robust measure of variation. +# +# if y=NULL, assume x is a matrix or data frame with two columns. +# +# +# nboot is the number of bootstrap samples. +# +# +if(!is.null(y[1]))x<-cbind(x,y) +if(ncol(x)>2)stop('x should have at most two columns') +x=elimna(x) +n=nrow(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +bvec=matrix(NA,nboot,2) +for(ib in 1:nboot){ +bvec[ib,]<-apply(x[data[ib,],],2,est,...) +} +# +# Now have an nboot by 2 matrix of bootstrap values. +# +pstar=mean(bvec[,1]2, or data.frame or list mode with length >2') +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=lapply(x,tmean,tr=tr) +n=lapply(x,length) +est=matl(est) +n=as.vector(matl(n)) +R=order(est) +pvec=NA +aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) +if(length(id)==0)stop('alpha must be one of values .001(.001).1 or 11(.01).99') +v=ord.loc.crit.det(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) +p.crit=v[id,] + +pvdist=NULL +if(is.null(p.crit)){ +v=ord.loc.crit(J=J,n=n,iter=iter,alpha=alpha,SEED=SEED) +p.crit=v$fin.crit +pvdist=v$pvdist +} +output<-matrix(NA,Jm1,9) +dimnames(output)=list(NULL,c('Grp.L','Grp.R','Est.L','Est.R','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=yuen(x[[R[im1]]],x[[R[[i]]]],alpha=p.crit[im1]) +pvec[im1]=a$p.value +output[im1,]=c(R[im1],R[i],a$est.1,a$est.2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) +} +dpv=NA +if(!is.null(pvdist)){ +chk=0 +for(i in 1:iter){ +flag=0 +for(j in 1:Jm1)if(pvdist[i,j]<=output[j,7])flag=flag+1 +if(flag>0)chk=chk+1 +} +dpv=chk/iter +} +# Determine p-value for overall decision +na=length(aval) +for(i in 1:na){ +chk=sum(output[,7]<=v[i,]) +pv=aval[i] +if(chk==Jm1)break +} +ORD.ID='NO' +id=output[,8]<=output[,9] +if(sum(id)==Jm1)ORD.ID='YES' +setClass('BIN',slots=c('Make.a.Decison','Decision.p.value','Estimates','n','output')) +put=new('BIN',Make.a.Decison=ORD.ID,Decision.p.value=pv,Estimates=est,n=n,output=output) +put +} + +ord.loc.crit<-function(J,n=30,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ +# +# Determine critical p-values for ord.loc +# +if(SEED)set.seed(2) +Jm1=J-1 +rem=matrix(NA,iter,Jm1) +for(k in 1:iter){ +if(length(n)==1){ +x=rmul(n,p=J) +x=listm(x) +} +else{ +x=list() +if(length(n)!=J)stop('J is not equal to the length of n') +for(j in 1:J)x[[j]]=rnorm(n[j]) +} +rem[k,]=ord.loc.ex(x,tr=tr) +} +init=apply(rem,2,qest,alpha) +#print(apply(rem,2,mean)) +#print(init) +z=optim(0,ord.loc.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) +fin.crit=z$par*init +list(fin.crit=fin.crit,pvdist=rem) +} + +ord.loc.fun<-function(a,init,iter,rem,Jm1,alpha){ +# +chk=0 +init=a*init +for(i in 1:iter){ +flag=0 +for(j in 1:Jm1)if(rem[i,j]<=init[j])flag=flag+1 +if(flag>0)chk=chk+1 +} +chk=chk/iter +dif=abs(chk-alpha) +dif +} + +ord.loc.crit.det<-function(J,n,alpha=.05,tr=.2,iter=5000,SEED=TRUE){ +# +# Determine critical p-values for anc.best +# +if(SEED)set.seed(2) +Jm1=J-1 +rem=matrix(NA,iter,Jm1) +for(k in 1:iter){ +if(length(n)==1){ +x=rmul(n,p=J) +x=listm(x) +} +else{ +x=list() +if(length(n)!=J)stop('J is not equal to the length of n') +for(j in 1:J)x[[j]]=rnorm(n[j]) +} +rem[k,]=ord.loc.ex(x,tr=tr) +} +aval=c(seq(.001,.1,.001),seq(.011,.99,.01)) +na=length(aval) +fin.crit=matrix(NA,na,Jm1) +for(i in 1:na){ +init=apply(rem,2,qest,aval[i]) +z=optim(0,ord.loc.fun,init=init,iter=iter,rem=rem,Jm1=Jm1,alpha=aval[i],method='Brent',lower=0,upper=1) +fin.crit[i,]=z$par*init +} +fin.crit +} + +RMcomvar.locdis<-function(x,y, +loc.fun=median,CI=FALSE,plotit=TRUE,xlab='First Group', +ylab='Est.1 - Est.2',ylabQCOM='Est.2 - Est.1',sm=TRUE,QCOM=TRUE,q=c(.1,.25,.75,.9),MC=FALSE,nboot=2000,PR=TRUE,...){ +# +# Compare the marginal distributions of two dependent groups in terms of the +# variation in the tails using all of the quantiles +# after centering the data. +# +# CI=FALSE, suppresses confidence intervals +# +if(!QCOM){ +if(PR){ +print('Interpretation: when using QCOM=F: If values in q.sig.greater are less than .5') +print('this indicates more variation in the lower tail for group 1') +print('Interpretation: If values in q.sig.greater are greater than .5') +print('This indicates more variation in the lower tail for group 2') + +print('Interpretation: If values in q.sig.less are less than .5') +print('this indicates more variation in the upper tail for group 2') +print('Interpretation: If values in q.sig.less are greater than .5') +print('This indicates more variation in the upper tail for group 1') +} +} +x=elimna(x) +y=elimna(y) +mx=loc.fun(x,...) +my=loc.fun(y,...) +X=x-mx +Y=y-my +if(!QCOM){ +a=lband(X,Y,plotit=plotit,xlab=xlab,ylab=ylabQCOM,sm=sm,CI=CI) +if(!CI)a$m=NULL +} +else{ +a=Dqcomhd(X,Y,q=q,nboot=nboot,plotit=plotit,xlab=xlab,ylab=ylab) +} +a +} + +g5.cen.plot<-function(x1, x2, x3 = NULL, x4 = NULL, x5 = NULL, fr = 0.8, + aval = 0.5, xlab = 'X', ylab ='', color = rep('black', 5), + main = NULL, sub = NULL,loc.fun=median){ +# +# Same a g5plot, only center the data based on the +# measure of location indicated by the argument +# loc.fun +# +x1=elimna(x1) +x2=elimna(x2) +x1=x1-loc.fun(x1) +x2=x2-loc.fun(x2) +if(!is.null(x3))x3=x3-loc.fun(x3) +if(!is.null(x4))x4=x4-loc.fun(x4) +if(!is.null(x5))x5=x5-loc.fun(x5) +g5plot(x1=x1, x2=x2, x3=x3, x4 = x4, x5 = x5, fr = fr, + aval = aval, xlab = xlab, ylab =ylab, color = color, + main = main, sub = sub) +} + + +rmul.MAR<-function(n,p=2,g=rep(0,p),h=rep(0,p),rho=0,cmat=NULL){ +# +# Generate multivariate normal data and transform the marginal +# distributions to g-and-h distributions +# +if(!is.null(cmat)){ +if(ncol(cmat)!=p)stop('cmat: number of columns must equal the value in the argument p') +} +if(abs(rho)>1)stop('rho must be between -1 and 1') +if(is.null(cmat)){ +cmat<-matrix(rho,p,p) +diag(cmat)<-1 +} +if(length(g)!=p)stop('Length of g should equal p') +if(length(h)!=p)stop('Length of h should equal p') +library(MASS) +x=mvrnorm(n,rep(0,p),cmat) +for(j in 1:p){ +if(g[j]==0)x[,j]=x[,j]*exp(h[j]*x[,j]^2/2) +if(g[j]>0)x[,j]=(exp(g[j]*x[,j])-1)*exp(h[j]*x[,j]^2/2)/g[j] +} +x +} + +lnormsd=function()sqrt(exp(1))*sqrt(exp(1)-1) #standard deviation of a lognormal distribution. + + + +varcom.IND.MP<-function(x,y,SEED=TRUE){ +# +# +# Compare the variances of two independent variables. +# Uses an updated Morgan-Pitman test based on a random +# permutations of the data. +# +# Returns a p-value and estimates of the variances +# No confidence interval +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +e1=var(x) +e2=var(y) +if(length(x)>length(y)){ +tempx=x +tempy=y +x=tempy +y=tempx +} +n1=length(x) +n2=length(y) +n=min(n1,n2) +nmax=max(n1,n2) +X=sample(x,n1) +Y=sample(y,n2) +p1=comdvar(X[1:n],Y[1:n])$p.value +PV=p1 +if(n1!=n2){ +neq=floor(nmax/n) +EQ=neq*n1 +A=matrix(c(1:EQ),nrow=n1) +PV=NA +J=ncol(A) +for(j in 1:J)PV[j]=comdvar(X[1:n],Y[A[,j]])$p.value +if(nmax>EQ){ +d=n2-n1+1 +Y2=Y[n2:d] #deliberately reversed the order. +p2=comdvar(X[1:n],Y2)$p.value +PV=c(PV,p2) +}} +PV=min(p.adjust(PV,method='hoch')) +list(est1=e1,est2=e2,p.value=PV) +} + + + +selvar.ind.ex<-function(x){ +# +pvec=NA +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +EST=lapply(x,var) +EST=matl(EST) +R=order(EST) +ic=0 +for(j in 2:J){ +ic=ic+1 +pvec[ic]=varcom.IND.MP(x[[R[1]]],x[[R[[j]]]],SEED=FALSE)$p.value +} +pvec +} + +selvar.ind.crit<-function(J,n,alpha=.05,iter=1000,...){ +# +# Determine null distribution of p-values for selvar.ind.MP +# +Jm1=J-1 +rem=matrix(NA,iter,Jm1) +XS=list() +for(k in 1:iter){ +for(j in 1:J)XS[[j]]=rnorm(n[j]) +rem[k,]=selvar.ind.ex(XS) +} +rem +} + +selvar.ind.MP<-function(x,alpha=.05, rem=NULL,iter=2000,p.crit=NULL,SEED=TRUE){ +# +# For J independent groups, +# identify the group with smallest variance +# Make a decision if every p.value<=p.crit +# +# x is a matrix or data frame or can have list mode. +# +# rem: The distribution of the null p-value under normality. +# If not specified, the funcion computes it using a simulation with iter replications. +# Or, one could compute it with the R function selva.ind.crit, store the +# results in some R variable, say Z, then use rem=Z in future applications +# that have the same sample sizes +# +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +x=elimna(x) +J=length(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=lapply(x,var) +n=lapply(x,length) +est=as.vector(matl(est)) +n=as.vector(matl(n)) +R=order(est) +pvec=NA +if(is.null(rem))rem=selvar.ind.crit(J=J,n=n,iter=iter,SEED=SEED) +output<-matrix(NA,Jm1,5) +dimnames(output)=list(NULL,c('Smallest.Est','Grp','Est','p.value','p.adj')) +for(i in 2:J){ +im1=i-1 +SM=est[R[1]] +a=varcom.IND.MP(x[[R[1]]],x[[R[i]]],SEED=SEED) +pvec[im1]=a$p.value +pv=mean(rem[,im1]<=pvec[im1]) +output[im1,1:4]=c(SM,R[i],est[R[i]],pv) +} +output[,5]=p.adjust(output[,4],method='hoch') +Best='No Decision' +flag=sum(output[,4]<=alpha) +id=output[,5]<=alpha +if(sum(id>0))Best=output[id,2] +if(flag==Jm1)Best='Smaller.than.all' +setClass('SSV',slots=c('Group.with.smallest.estimate','Less.than','n','output')) +put=new('SSV',Group.with.smallest.estimate=R[[1]],Less.than=Best,n=n,output=output) +put +} + +selvar.ind.MP<-function(x,alpha=.05, rem=NULL,iter=2000,p.crit=NULL,SEED=TRUE){ +# +# For J independent groups, +# identify the group with smallest variance +# Make a decision if every p.value<=p.crit +# +# x is a matrix or data frame or can have list mode. +# +# rem: The distribution of the null p-value under normality. +# If not specified, the funcion computes it using a simulation with iter replications. +# Or, one could compute it with the R function selva.ind.crit, store the +# results in some R variable, say Z, then use rem=Z in future applications +# that have the same sample sizes +# +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure if a decision can be made. +# +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +x=elimna(x) +J=length(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=lapply(x,var) +n=lapply(x,length) +est=as.vector(matl(est)) +n=as.vector(matl(n)) +R=order(est) +pvec=NA +if(is.null(rem))rem=selvar.ind.crit(J=J,n=n,iter=iter,SEED=SEED) +output<-matrix(NA,Jm1,5) +dimnames(output)=list(NULL,c('Smallest.Est','Grp','Est','p.value','p.adj')) +for(i in 2:J){ +im1=i-1 +SM=est[R[1]] +a=varcom.IND.MP(x[[R[1]]],x[[R[i]]],SEED=SEED) +pvec[im1]=a$p.value +pv=mean(rem[,im1]<=pvec[im1]) +output[im1,1:4]=c(SM,R[i],est[R[i]],pv) +} +output[,5]=p.adjust(output[,4],method='hoch') +Best='No Decision' +flag=sum(output[,5]<=alpha) +id=output[,5]<=alpha +if(sum(flag>0))Best=output[id,2] +if(flag==Jm1)Best='Smaller.than.all' +setClass('SSV',slots=c('Group.with.smallest.estimate','Less.than','n','output')) +put=new('SSV',Group.with.smallest.estimate=R[[1]],Less.than=Best,n=n,output=output) +put +} + +corCOM.DVvsIV<-function(x,y,com.p.dist=FALSE,corfun=wincor,iter=200,PV=NULL,pr=TRUE,neg.col=NULL,LARGEST=TRUE, +alpha=.05,nboot=500,SEED=TRUE,MC=FALSE,xout=FALSE,outfun=outpro,FWE.method='hoch',...){ +# +# Regresiion: +# Consider the IV with the largest correlation estimate with the DV +# Is it reasonable to decide that it has the highest population +# correlation? +# +# That is, have two or more independent variables, compare +# cor(y,x_I) to cor(y,x_k) for all k!=I, where +# cor(i,x_I) is the highest correlation +# Winsorized correlation is used by default. +# Hochberg's method is used to control FWE. +# +# x is assumed to be a matrix or data frame +# +# Possible alternative choices for corfun include: +# spear +# tau +# pbcor +# bicor +# scor +# mve.cor +# mcd.cor +# +# +if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') +p=ncol(x) +if(p<=2)stop('Should have 3 or more independent variables. With two, use TWOpov') +pm1=p-1 +p1=p+1 +m1=cbind(x,y) +m1<-elimna(m1) # Eliminate rows with missing values +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +nval=nrow(x) +x=neg.colM(x,neg.col) +L=c(seq(.001,.1,.001),seq(.11,.99,.01)) + +if(!is.null(PV)){ +rem=matrix(NA,length(L),pm1) +for(k in 1:pm1){ +for(i in 1:length(L))rem[i,k]=hd(PV[,k],q=L[i]) +}} +if(p>6)com.p.dist=TRUE +if(com.p.dist){ +if(is.null(PV)){ +if(pr)print('Computing the null distribution can take several minutes') +PV=corCOM.DVvsIV.crit(p,nval,iter=iter,MC=MC,...) +rem=matrix(NA,length(L),pm1) +for(k in 1:pm1){ +for(i in 1:length(L))rem[i,k]=hd(PV[,k],q=L[i]) +}}} + +P3=matrix(c( 0.002175049 ,1.335108e-05 + ,0.005872039 ,7.055924e-05 + ,0.010619227 ,2.497530e-04 + ,0.016286140 ,5.345789e-04 + ,0.022398371 ,7.976283e-04 + ,0.028262446 ,1.277063e-03 + ,0.033345906 ,1.627195e-03 + ,0.037456504 ,1.939087e-03 + ,0.040694218 ,2.234302e-03 + ,0.043307931 ,2.546080e-03 + ,0.045568092 ,2.903338e-03 + ,0.047696567 ,3.323658e-03 + ,0.049845675 ,3.813582e-03 + ,0.052103950 ,4.371974e-03 + ,0.054511258 ,4.993196e-03 + ,0.057074761 ,5.668892e-03 + ,0.059782746 ,6.388620e-03 + ,0.062615440 ,7.140126e-03 + ,0.065552496 ,7.909937e-03 + ,0.068577121 ,7.684452e-03 + ,0.071677317 ,9.451369e-03 + ,0.074845101 ,1.020104e-02 + ,0.078074759 ,1.092742e-02 + ,0.081360990 ,1.162839e-02 + ,0.084697501 ,1.230540e-02 + ,0.088076254 ,1.296274e-02 + ,0.091487319 ,1.360651e-02 + ,0.094919200 ,1.424359e-02 + ,0.098359413 ,1.488081e-02 + ,0.101795143 ,1.552446e-02 + ,0.105213802 ,1.617997e-02 + ,0.108603375 ,1.685188e-02 + ,0.111952481 ,1.754394e-02 + ,0.115250189 ,1.825919e-02 + ,0.118485658 ,1.900005e-02 + ,0.121647762 ,1.976830e-02 + ,0.124724850 ,2.056506e-02 + ,0.127704758 ,2.139068e-02 + ,0.130575140 ,2.224462e-02 + ,0.133324080 ,2.312538e-02 + ,0.135940914 ,2.403049e-02 + ,0.138417122 ,2.495656e-02 + ,0.140747141 ,2.589933e-02 + ,0.142928988 ,2.685388e-02 + ,0.144964592 ,2.781478e-02 + ,0.146859829 ,2.877632e-02 + ,0.148624253 ,2.973273e-02 + ,0.150270597 ,3.067838e-02 + ,0.151814117 ,3.160798e-02 + ,0.153271855 ,3.251674e-02 + ,0.154661905 ,3.340049e-02 + ,0.156002725 ,3.425580e-02 + ,0.157312540 ,3.508002e-02 + ,0.158608833 ,3.587127e-02 + ,0.159907941 ,3.662845e-02 + ,0.161224730 ,3.735119e-02 + ,0.162572340 ,3.803977e-02 + ,0.163961992 ,3.869502e-02 + ,0.165402841 ,3.931829e-02 + ,0.166901885 ,3.991132e-02 + ,0.168463921 ,4.047615e-02 + ,0.170091558 ,4.101511e-02 + ,0.171785292 ,4.153070e-02 + ,0.173543642 ,4.202554e-02 + ,0.175363353 ,4.250234e-02 + ,0.177239642 ,4.296380e-02 + ,0.179166497 ,4.341259e-02 + ,0.181137006 ,4.385123e-02 + ,0.183143686 ,4.428208e-02 + ,0.185178820 ,4.470724e-02 + ,0.187234759 ,4.512852e-02 + ,0.189304201 ,4.554735e-02 + ,0.191380404 ,4.596483e-02 + ,0.193457363 ,4.638162e-02 + ,0.195529910 ,4.679804e-02 + ,0.197593772 ,4.721406e-02 + ,0.199645563 ,4.762936e-02 + ,0.201682735 ,4.804340e-02 + ,0.203703491 ,4.845551e-02 + ,0.205706665 ,4.886496e-02 + ,0.207691581 ,4.927106e-02 + ,0.209657914 ,4.967325e-02 + ,0.211605535 ,5.007115e-02 + ,0.213534385 ,5.046464e-02 + ,0.215444346 ,5.085386e-02 + ,0.217335153 ,5.123927e-02 + ,0.219206331 ,5.162162e-02 + ,0.221057158 ,5.200196e-02 + ,0.222886667 ,5.238156e-02 + ,0.224693687 ,5.276190e-02 + ,0.226476899 ,5.314459e-02 + ,0.228234929 ,5.353130e-02 + ,0.229966452 ,5.392373e-02 + ,0.231670301 ,5.432350e-02 + ,0.233345582 ,5.473211e-02 + ,0.234991770 ,5.515090e-02 + ,0.236608793 ,5.558098e-02 + ,0.238197083 ,5.602325e-02 + ,0.239757602 ,5.647831e-02 + ,0.241291828 ,5.694653e-02 + ,0.255725281 ,6.227133e-02 + ,0.269643155 ,6.812649e-02 + ,0.282572599 ,7.379864e-02 + ,0.294230675 ,7.937555e-02 + ,0.305279532 ,7.560157e-02 + ,0.316231246 ,9.290848e-02 + ,0.327118515 ,1.006091e-01 + ,0.337725949 ,1.077792e-01 + ,0.348507973 ,1.143072e-01 + ,0.360275335 ,1.203813e-01 + ,0.372732081 ,1.259302e-01 + ,0.384865494 ,1.309854e-01 + ,0.396170468 ,1.359841e-01 + ,0.406525702 ,1.414512e-01 + ,0.416179917 ,1.474082e-01 + ,0.425830260 ,1.532244e-01 + ,0.435731315 ,1.584113e-01 + ,0.445574831 ,1.634671e-01 + ,0.455403556 ,1.693949e-01 + ,0.465473322 ,1.764807e-01 + ,0.475493015 ,1.840579e-01 + ,0.485104441 ,1.914634e-01 + ,0.494622601 ,1.987244e-01 + ,0.504479896 ,2.062144e-01 + ,0.514530980 ,2.140417e-01 + ,0.524379809 ,2.218487e-01 + ,0.533811067 ,2.291090e-01 + ,0.542964071 ,2.357491e-01 + ,0.552415670 ,2.422511e-01 + ,0.562702410 ,2.489342e-01 + ,0.573662536 ,2.556239e-01 + ,0.584499138 ,2.621385e-01 + ,0.594424353 ,2.684997e-01 + ,0.603091449 ,2.746540e-01 + ,0.610580104 ,2.804293e-01 + ,0.617291412 ,2.857687e-01 + ,0.623819392 ,2.908378e-01 + ,0.630625766 ,2.959208e-01 + ,0.637780329 ,3.012891e-01 + ,0.645128388 ,3.071430e-01 + ,0.652676811 ,3.135295e-01 + ,0.660706577 ,3.202344e-01 + ,0.669513580 ,3.269345e-01 + ,0.679074179 ,3.335610e-01 + ,0.688924859 ,3.403140e-01 + ,0.698359030 ,3.473033e-01 + ,0.706803246 ,3.544525e-01 + ,0.714138033 ,3.617598e-01 + ,0.720773425 ,3.694019e-01 + ,0.727376023 ,3.775318e-01 + ,0.734427437 ,3.860345e-01 + ,0.741910594 ,3.945174e-01 + ,0.749301181 ,4.026368e-01 + ,0.756038316 ,4.104358e-01 + ,0.762108143 ,4.183084e-01 + ,0.767991314 ,4.266920e-01 + ,0.774125688 ,4.358112e-01 + ,0.780642168 ,4.456474e-01 + ,0.787654735 ,4.560432e-01 + ,0.795560943 ,4.667688e-01 + ,0.804494568 ,4.776577e-01 + ,0.813689728 ,4.888006e-01 + ,0.822246533 ,5.003803e-01 + ,0.830074100 ,5.121099e-01 + ,0.837601422 ,5.231564e-01 + ,0.845257402 ,5.330505e-01 + ,0.853115769 ,5.423039e-01 + ,0.860852876 ,5.518459e-01 + ,0.868387995 ,5.622222e-01 + ,0.875848963 ,5.737404e-01 + ,0.882870692 ,5.869893e-01 + ,0.889442420 ,6.017184e-01 + ,0.896636569 ,6.161664e-01 + ,0.904626820 ,6.294051e-01 + ,0.911772218 ,6.420279e-01 + ,0.917549883 ,6.554093e-01 + ,0.923211670 ,6.714817e-01 + ,0.928746246 ,6.890702e-01 + ,0.934160521 ,7.060012e-01 + ,0.941177134 ,7.215806e-01 + ,0.949363922 ,7.357055e-01 + ,0.956847590 ,7.494285e-01 + ,0.963599180 ,7.621246e-01 + ,0.969055149 ,7.763082e-01 + ,0.973136108 ,7.936805e-01 + ,0.976868136 ,7.142080e-01 + ,0.981258840 ,7.460053e-01 + ,0.988264909 ,7.786117e-01 + ,0.994433844 ,9.192672e-01), + byrow=TRUE,ncol=2) + + +P4=matrix(c(0.02825196 ,0.002694346 ,2.999028e-06 + ,0.03650906 ,0.004146146 ,4.263801e-05 + ,0.04206787 ,0.005668266 ,1.964571e-04 + ,0.04755122 ,0.007287032 ,4.847254e-04 + ,0.05291511 ,0.009334544 ,8.277385e-04 + ,0.05813244 ,0.011977936 ,1.156019e-03 + ,0.06348677 ,0.015026277 ,1.458581e-03 + ,0.06908051 ,0.018151721 ,1.739547e-03 + ,0.07468576 ,0.021131256 ,1.999549e-03 + ,0.08003211 ,0.023887028 ,2.244613e-03 + ,0.08506408 ,0.026419261 ,2.483839e-03 + ,0.08992250 ,0.028745172 ,2.721838e-03 + ,0.09476611 ,0.030872849 ,2.960090e-03 + ,0.09964373 ,0.032802489 ,3.201924e-03 + ,0.10449749 ,0.034540459 ,3.452190e-03 + ,0.10923896 ,0.036109254 ,3.712771e-03 + ,0.11381116 ,0.037544213 ,3.980112e-03 + ,0.11820149 ,0.038882089 ,4.247472e-03 + ,0.12242309 ,0.040152826 ,4.509743e-03 + ,0.12649443 ,0.041380182 ,4.767249e-03 + ,0.13043191 ,0.042587785 ,5.026383e-03 + ,0.13425237 ,0.043803447 ,5.297105e-03 + ,0.13797722 ,0.045057120 ,5.588809e-03 + ,0.14163298 ,0.046373077 ,5.906670e-03 + ,0.14524737 ,0.047760788 ,6.249983e-03 + ,0.14884258 ,0.049209682 ,6.612920e-03 + ,0.15242802 ,0.050690819 ,6.986926e-03 + ,0.15599509 ,0.052164992 ,7.363452e-03 + ,0.15951646 ,0.053593885 ,7.735951e-03 + ,0.16295105 ,0.054949857 ,8.100655e-03 + ,0.16625418 ,0.056221112 ,8.456323e-03 + ,0.16938955 ,0.057411310 ,8.803438e-03 + ,0.17233932 ,0.058534904 ,9.143341e-03 + ,0.17510887 ,0.059610687 ,9.477586e-03 + ,0.17772492 ,0.060655906 ,9.807601e-03 + ,0.18022813 ,0.061682484 ,1.013458e-02 + ,0.18266319 ,0.062695748 ,1.045946e-02 + ,0.18506945 ,0.063695282 ,1.078300e-02 + ,0.18747477 ,0.064677064 ,1.110569e-02 + ,0.18989354 ,0.065635990 ,1.142782e-02 + ,0.19232847 ,0.066568052 ,1.174942e-02 + ,0.19477450 ,0.067471733 ,1.207037e-02 + ,0.19722304 ,0.068348479 ,1.239045e-02 + ,0.19966526 ,0.069202350 ,1.270946e-02 + ,0.20209341 ,0.070039138 ,1.302729e-02 + ,0.20450072 ,0.070865253 ,1.334392e-02 + ,0.20688005 ,0.071686698 ,1.365930e-02 + ,0.20922248 ,0.072508287 ,1.397321e-02 + ,0.21151643 ,0.073333231 ,1.428515e-02 + ,0.21374770 ,0.074163062 ,1.459419e-02 + ,0.21590048 ,0.074997845 ,1.489902e-02 + ,0.21795919 ,0.075836565 ,1.519811e-02 + ,0.21991052 ,0.076677610 ,1.548999e-02 + ,0.22174535 ,0.077519237 ,1.577353e-02 + ,0.22346014 ,0.078359986 ,1.604830e-02 + ,0.22505752 ,0.079198980 ,1.631478e-02 + ,0.22654605 ,0.080036104 ,1.657442e-02 + ,0.22793930 ,0.080872053 ,1.682959e-02 + ,0.22925445 ,0.081708279 ,1.708334e-02 + ,0.23051059 ,0.082546845 ,1.733898e-02 + ,0.23172712 ,0.083390212 ,1.759979e-02 + ,0.23292230 ,0.084240996 ,1.786853e-02 + ,0.23411229 ,0.085101710 ,1.814722e-02 + ,0.23531044 ,0.085974525 ,1.843693e-02 + ,0.23652714 ,0.086861072 ,1.873778e-02 + ,0.23776992 ,0.087762307 ,1.904896e-02 + ,0.23904386 ,0.088678461 ,1.936895e-02 + ,0.24035207 ,0.089609084 ,1.969576e-02 + ,0.24169630 ,0.090553177 ,2.002710e-02 + ,0.24307742 ,0.091509412 ,2.036069e-02 + ,0.24449595 ,0.092476402 ,2.069439e-02 + ,0.24595237 ,0.093452991 ,2.102637e-02 + ,0.24744741 ,0.094438523 ,2.135516e-02 + ,0.24898216 ,0.095433051 ,2.167968e-02 + ,0.25055813 ,0.096437453 ,2.199923e-02 + ,0.25217708 ,0.097453412 ,2.231345e-02 + ,0.25384088 ,0.098483278 ,2.262224e-02 + ,0.25555107 ,0.099529803 ,2.292570e-02 + ,0.25730850 ,0.100595780 ,2.322410e-02 + ,0.25911286 ,0.101683637 ,2.351787e-02 + ,0.26096228 ,0.102795033 ,2.380753e-02 + ,0.26285307 ,0.103930520 ,2.409377e-02 + ,0.26477965 ,0.105089312 ,2.437741e-02 + ,0.26673470 ,0.106269188 ,2.465942e-02 + ,0.26870964 ,0.107466555 ,2.494090e-02 + ,0.27069514 ,0.108676633 ,2.522304e-02 + ,0.27268195 ,0.109893752 ,2.550708e-02 + ,0.27466156 ,0.111111714 ,2.579422e-02 + ,0.27662692 ,0.112324166 ,2.608553e-02 + ,0.27857284 ,0.113524958 ,2.638190e-02 + ,0.28049622 ,0.114708449 ,2.668392e-02 + ,0.28239603 ,0.115869742 ,2.699191e-02 + ,0.28427298 ,0.117004848 ,2.730585e-02 + ,0.28612913 ,0.118110784 ,2.762538e-02 + ,0.28796727 ,0.119185602 ,2.794990e-02 + ,0.28979037 ,0.120228378 ,2.827855e-02 + ,0.29160108 ,0.121239153 ,2.861033e-02 + ,0.29340136 ,0.122218858 ,2.894414e-02 + ,0.29519222 ,0.123169211 ,2.927891e-02 + ,0.29697369 ,0.124092605 ,2.961360e-02 + ,0.31397659 ,0.132621459 ,3.287664e-02 + ,0.32905317 ,0.141687415 ,3.635344e-02 + ,0.34142264 ,0.150239874 ,4.011146e-02 + ,0.35320262 ,0.157769982 ,4.336456e-02 + ,0.36543162 ,0.165856637 ,4.597872e-02 + ,0.37719191 ,0.174946656 ,4.927422e-02 + ,0.38887463 ,0.183829329 ,5.322584e-02 + ,0.40152779 ,0.191918219 ,5.706844e-02 + ,0.41555978 ,0.200422337 ,6.072351e-02 + ,0.42908873 ,0.209907793 ,6.413420e-02 + ,0.44052818 ,0.219253257 ,6.745447e-02 + ,0.45132279 ,0.227775172 ,7.077917e-02 + ,0.46314082 ,0.234996714 ,7.417266e-02 + ,0.47488247 ,0.241586411 ,7.752946e-02 + ,0.48469259 ,0.248330403 ,8.082463e-02 + ,0.49326509 ,0.255625615 ,8.409529e-02 + ,0.50168814 ,0.263258846 ,8.746989e-02 + ,0.51028363 ,0.271055706 ,9.086717e-02 + ,0.51912560 ,0.278856970 ,9.420999e-02 + ,0.52769780 ,0.286320658 ,9.786763e-02 + ,0.53603821 ,0.293266278 ,1.020140e-01 + ,0.54478766 ,0.299467332 ,1.062560e-01 + ,0.55366271 ,0.305690735 ,1.102368e-01 + ,0.56272309 ,0.312768360 ,1.140490e-01 + ,0.57233129 ,0.319963119 ,1.178623e-01 + ,0.58200310 ,0.326558078 ,1.219016e-01 + ,0.59099024 ,0.332901385 ,1.263491e-01 + ,0.59937345 ,0.339303862 ,1.310123e-01 + ,0.60758641 ,0.345851059 ,1.357467e-01 + ,0.61598033 ,0.352190085 ,1.405864e-01 + ,0.62528643 ,0.357913761 ,1.455560e-01 + ,0.63543095 ,0.363356622 ,1.504331e-01 + ,0.64471050 ,0.369132056 ,1.551387e-01 + ,0.65228925 ,0.375308548 ,1.599456e-01 + ,0.65934374 ,0.381636254 ,1.650710e-01 + ,0.66706291 ,0.388370558 ,1.704709e-01 + ,0.67515257 ,0.395534711 ,1.758367e-01 + ,0.68284073 ,0.402421344 ,1.812059e-01 + ,0.69038336 ,0.408623429 ,1.868699e-01 + ,0.69845919 ,0.414460275 ,1.926662e-01 + ,0.70715142 ,0.420523246 ,1.981833e-01 + ,0.71574865 ,0.427206110 ,2.033596e-01 + ,0.72320765 ,0.434498031 ,2.084776e-01 + ,0.72954754 ,0.442149909 ,2.137674e-01 + ,0.73571186 ,0.449959300 ,2.191326e-01 + ,0.74226506 ,0.457421738 ,2.245519e-01 + ,0.74905392 ,0.464237364 ,2.303037e-01 + ,0.75612564 ,0.470902809 ,2.367213e-01 + ,0.76388865 ,0.477994733 ,2.435881e-01 + ,0.77159609 ,0.485500173 ,2.500961e-01 + ,0.77797709 ,0.493064413 ,2.561486e-01 + ,0.78328352 ,0.500852021 ,2.622563e-01 + ,0.78864834 ,0.509132560 ,2.684670e-01 + ,0.79436056 ,0.517227408 ,2.749325e-01 + ,0.80015524 ,0.524694907 ,2.817636e-01 + ,0.80624435 ,0.532053355 ,2.884733e-01 + ,0.81293829 ,0.540118580 ,2.947507e-01 + ,0.82015085 ,0.548921459 ,3.010033e-01 + ,0.82763569 ,0.557637151 ,3.078365e-01 + ,0.83505891 ,0.566310996 ,3.155196e-01 + ,0.84198778 ,0.575253522 ,3.239425e-01 + ,0.84848454 ,0.583976152 ,3.326209e-01 + ,0.85494638 ,0.592377395 ,3.411542e-01 + ,0.86153456 ,0.600715905 ,3.495319e-01 + ,0.86786548 ,0.608554390 ,3.581296e-01 + ,0.87340081 ,0.615887245 ,3.669205e-01 + ,0.87845635 ,0.623939370 ,3.759445e-01 + ,0.88379941 ,0.632832612 ,3.856236e-01 + ,0.88951674 ,0.641696896 ,3.957400e-01 + ,0.89459349 ,0.651106256 ,4.056838e-01 + ,0.89882091 ,0.662461064 ,4.146894e-01 + ,0.90377294 ,0.675025139 ,4.226494e-01 + ,0.90970927 ,0.687246375 ,4.314995e-01 + ,0.91531400 ,0.699233434 ,4.438556e-01 + ,0.92062598 ,0.710363456 ,4.575252e-01 + ,0.92625531 ,0.720885287 ,4.697992e-01 + ,0.93147783 ,0.730627162 ,4.799491e-01 + ,0.93661779 ,0.740878409 ,4.909327e-01 + ,0.94256155 ,0.753110750 ,5.054689e-01 + ,0.94879244 ,0.766049888 ,5.218112e-01 + ,0.95441461 ,0.779461673 ,5.382110e-01 + ,0.95922582 ,0.795415271 ,5.580688e-01 + ,0.96461303 ,0.812122645 ,5.830378e-01 + ,0.96954495 ,0.827981003 ,6.131022e-01 + ,0.97446302 ,0.845818141 ,6.404610e-01 + ,0.97912580 ,0.862982643 ,6.619244e-01 + ,0.98539520 ,0.887825781 ,6.905761e-01 + ,0.98986795 ,0.916261737 ,7.236242e-01 + ,0.99487474 ,0.960335712 ,7.689094e-01), + byrow=TRUE,ncol=3) + +P5=matrix(c( 0.05652412 ,0.01305194 ,0.002390178 ,3.378326e-12 + ,0.06574073 ,0.01487696 ,0.003342965 ,7.244389e-11 + ,0.07151676 ,0.01720008 ,0.004648512 ,7.773423e-10 + ,0.07511946 ,0.01981078 ,0.006063858 ,5.567153e-09 + ,0.07828081 ,0.02244535 ,0.007485693 ,2.994702e-08 + ,0.08180130 ,0.02494366 ,0.008893920 ,1.291037e-07 + ,0.08583995 ,0.02730528 ,0.010278443 ,4.647955e-07 + ,0.09025786 ,0.02960906 ,0.011622679 ,1.437865e-06 + ,0.09481793 ,0.03192766 ,0.012912280 ,3.903397e-06 + ,0.09930536 ,0.03429573 ,0.014138866 ,9.451086e-06 + ,0.10358876 ,0.03671695 ,0.015295959 ,2.067608e-05 + ,0.10762929 ,0.03918039 ,0.016375245 ,4.130899e-05 + ,0.11145588 ,0.04167054 ,0.017367679 ,7.605707e-05 + ,0.11512861 ,0.04416935 ,0.018267991 ,1.300654e-04 + ,0.11870678 ,0.04665486 ,0.019078858 ,2.080362e-04 + ,0.12222956 ,0.04910107 ,0.019812158 ,3.132019e-04 + ,0.12571005 ,0.05148055 ,0.020487010 ,4.464557e-04 + ,0.12913936 ,0.05376876 ,0.021125903 ,6.059430e-04 + ,0.13249584 ,0.05594791 ,0.021750598 ,7.872902e-04 + ,0.13575514 ,0.05800867 ,0.022379035 ,9.844397e-04 + ,0.13889807 ,0.05994945 ,0.023023715 ,1.190860e-03 + ,0.14191516 ,0.06177395 ,0.023691420 ,1.400786e-03 + ,0.14480785 ,0.06348830 ,0.024383868 ,1.610156e-03 + ,0.14758751 ,0.06509889 ,0.025098853 ,1.817036e-03 + ,0.15027304 ,0.06661130 ,0.025831529 ,2.021495e-03 + ,0.15288821 ,0.06803034 ,0.026575628 ,2.225055e-03 + ,0.15545896 ,0.06936082 ,0.027324476 ,2.429942e-03 + ,0.15801110 ,0.07060840 ,0.028071770 ,2.638347e-03 + ,0.16056820 ,0.07178026 ,0.028812074 ,2.851885e-03 + ,0.16315001 ,0.07288527 ,0.029541074 ,3.071323e-03 + ,0.16577118 ,0.07393380 ,0.030255603 ,3.296558e-03 + ,0.16844049 ,0.07493712 ,0.030953535 ,3.526795e-03 + ,0.17116069 ,0.07590682 ,0.031633586 ,3.760793e-03 + ,0.17392879 ,0.07685417 ,0.032295108 ,3.997135e-03 + ,0.17673683 ,0.07778972 ,0.032937919 ,4.234427e-03 + ,0.17957306 ,0.07872305 ,0.033562189 ,4.471420e-03 + ,0.18242312 ,0.07966272 ,0.034168393 ,4.707065e-03 + ,0.18527142 ,0.08061630 ,0.034757295 ,4.940502e-03 + ,0.18810221 ,0.08159041 ,0.035329958 ,5.171027e-03 + ,0.19090055 ,0.08259081 ,0.035887740 ,5.398048e-03 + ,0.19365295 ,0.08362233 ,0.036432265 ,5.621047e-03 + ,0.19634778 ,0.08468879 ,0.036965369 ,5.839553e-03 + ,0.19897548 ,0.08579287 ,0.037489019 ,6.053142e-03 + ,0.20152856 ,0.08693588 ,0.038005212 ,6.261435e-03 + ,0.20400154 ,0.08811768 ,0.038515895 ,6.464128e-03 + ,0.20639083 ,0.08933656 ,0.039022878 ,6.661029e-03 + ,0.20869457 ,0.09058924 ,0.039527788 ,6.852104e-03 + ,0.21091258 ,0.09187106 ,0.040032038 ,7.037528e-03 + ,0.21304622 ,0.09317615 ,0.040536826 ,7.217739e-03 + ,0.21509838 ,0.09449787 ,0.041043146 ,7.393470e-03 + ,0.21707346 ,0.09582915 ,0.041551812 ,7.565765e-03 + ,0.21897727 ,0.09716299 ,0.042063485 ,7.735961e-03 + ,0.22081697 ,0.09849292 ,0.042578697 ,7.905643e-03 + ,0.22260093 ,0.09981337 ,0.043097865 ,8.076563e-03 + ,0.22433851 ,0.10112002 ,0.043621312 ,8.250539e-03 + ,0.22603978 ,0.10240999 ,0.044149265 ,8.429335e-03 + ,0.22771524 ,0.10368199 ,0.044681863 ,8.614546e-03 + ,0.22937544 ,0.10493623 ,0.045219159 ,8.807490e-03 + ,0.23103064 ,0.10617433 ,0.045761127 ,9.009122e-03 + ,0.23269046 ,0.10739901 ,0.046307667 ,9.219978e-03 + ,0.23436362 ,0.10861381 ,0.046858620 ,9.440156e-03 + ,0.23605766 ,0.10982269 ,0.047413783 ,9.669322e-03 + ,0.23777881 ,0.11102960 ,0.047972925 ,9.906756e-03 + ,0.23953181 ,0.11223815 ,0.048535807 ,1.015141e-02 + ,0.24131994 ,0.11345120 ,0.049102198 ,1.040201e-02 + ,0.24314491 ,0.11467064 ,0.049671889 ,1.065710e-02 + ,0.24500695 ,0.11589718 ,0.050244706 ,1.091518e-02 + ,0.24690483 ,0.11713029 ,0.050820511 ,1.117476e-02 + ,0.24883595 ,0.11836818 ,0.051399206 ,1.143446e-02 + ,0.25079643 ,0.11960797 ,0.051980725 ,1.169304e-02 + ,0.25278126 ,0.12084583 ,0.052565028 ,1.194945e-02 + ,0.25478439 ,0.12207730 ,0.053152083 ,1.220290e-02 + ,0.25679899 ,0.12329752 ,0.053741852 ,1.245283e-02 + ,0.25881759 ,0.12450159 ,0.054334269 ,1.269891e-02 + ,0.26083237 ,0.12568485 ,0.054929219 ,1.294106e-02 + ,0.26283537 ,0.12684313 ,0.055526514 ,1.317939e-02 + ,0.26481879 ,0.12797294 ,0.056125873 ,1.341420e-02 + ,0.26677527 ,0.12907169 ,0.056726901 ,1.364589e-02 + ,0.26869809 ,0.13013771 ,0.057329079 ,1.387498e-02 + ,0.27058146 ,0.13117030 ,0.057931750 ,1.410201e-02 + ,0.27242063 ,0.13216971 ,0.058534126 ,1.432755e-02 + ,0.27421212 ,0.13313704 ,0.059135301 ,1.455210e-02 + ,0.27595371 ,0.13407414 ,0.059734269 ,1.477611e-02 + ,0.27764451 ,0.13498346 ,0.060329960 ,1.499992e-02 + ,0.27928490 ,0.13586793 ,0.060921287 ,1.522377e-02 + ,0.28087643 ,0.13673079 ,0.061507187 ,1.544778e-02 + ,0.28242170 ,0.13757549 ,0.062086680 ,1.567195e-02 + ,0.28392419 ,0.13840552 ,0.062658920 ,1.589617e-02 + ,0.28538803 ,0.13922434 ,0.063223239 ,1.612023e-02 + ,0.28681780 ,0.14003533 ,0.063779188 ,1.634387e-02 + ,0.28821831 ,0.14084167 ,0.064326561 ,1.656674e-02 + ,0.28959443 ,0.14164635 ,0.064865403 ,1.678851e-02 + ,0.29095083 ,0.14245215 ,0.065396011 ,1.700879e-02 + ,0.29229189 ,0.14326158 ,0.065918911 ,1.722723e-02 + ,0.29362151 ,0.14407696 ,0.066434823 ,1.744351e-02 + ,0.29494305 ,0.14490033 ,0.066944618 ,1.765732e-02 + ,0.29625927 ,0.14573355 ,0.067449265 ,1.786841e-02 + ,0.29757232 ,0.14657825 ,0.067949777 ,1.807657e-02 + ,0.29888374 ,0.14743586 ,0.068447155 ,1.828162e-02 + ,0.30019451 ,0.14830758 ,0.068942336 ,1.848344e-02 + ,0.31328362 ,0.15791008 ,0.073880466 ,2.031038e-02 + ,0.32658914 ,0.16858633 ,0.078745227 ,2.182686e-02 + ,0.34081183 ,0.17902263 ,0.083679715 ,2.336938e-02 + ,0.35529268 ,0.18923993 ,0.089192608 ,2.528373e-02 + ,0.36855379 ,0.19911099 ,0.095098319 ,2.734008e-02 + ,0.37994080 ,0.20799604 ,0.100761289 ,2.928966e-02 + ,0.39062100 ,0.21671444 ,0.105774473 ,3.127971e-02 + ,0.40257143 ,0.22609178 ,0.110115264 ,3.337375e-02 + ,0.41621275 ,0.23569341 ,0.113958744 ,3.539360e-02 + ,0.43046818 ,0.24490377 ,0.117643306 ,3.736688e-02 + ,0.44486669 ,0.25330316 ,0.121679236 ,3.972222e-02 + ,0.45923530 ,0.26068382 ,0.126584071 ,4.280986e-02 + ,0.47296930 ,0.26745795 ,0.132471529 ,4.640810e-02 + ,0.48590094 ,0.27415019 ,0.138753880 ,5.001684e-02 + ,0.49834493 ,0.28093903 ,0.144554010 ,5.345159e-02 + ,0.51026131 ,0.28775596 ,0.149452848 ,5.679297e-02 + ,0.52119211 ,0.29432490 ,0.153686512 ,5.996537e-02 + ,0.53082589 ,0.30059787 ,0.157685666 ,6.275756e-02 + ,0.53943126 ,0.30692404 ,0.161762480 ,6.518147e-02 + ,0.54774724 ,0.31353121 ,0.166120746 ,6.753031e-02 + ,0.55642024 ,0.32037096 ,0.170885289 ,7.007491e-02 + ,0.56550181 ,0.32736781 ,0.176132244 ,7.279915e-02 + ,0.57464883 ,0.33444881 ,0.181843360 ,7.550823e-02 + ,0.58368440 ,0.34149272 ,0.187714538 ,7.816054e-02 + ,0.59259791 ,0.34836154 ,0.193239542 ,8.092069e-02 + ,0.60135765 ,0.35491577 ,0.198212454 ,8.389308e-02 + ,0.60997532 ,0.36111908 ,0.202943209 ,8.697786e-02 + ,0.61843676 ,0.36715577 ,0.207920518 ,9.000649e-02 + ,0.62674513 ,0.37332556 ,0.213468203 ,9.287236e-02 + ,0.63517287 ,0.37979485 ,0.219617217 ,9.553586e-02 + ,0.64411712 ,0.38648760 ,0.226087437 ,9.803810e-02 + ,0.65355691 ,0.39327005 ,0.232464891 ,1.005292e-01 + ,0.66286128 ,0.40022900 ,0.238568656 ,1.032200e-01 + ,0.67142296 ,0.40767245 ,0.244631428 ,1.062392e-01 + ,0.67945227 ,0.41574779 ,0.250960847 ,1.095391e-01 + ,0.68784816 ,0.42420043 ,0.257504402 ,1.129935e-01 + ,0.69717228 ,0.43269300 ,0.263984690 ,1.165367e-01 + ,0.70693776 ,0.44108963 ,0.270350361 ,1.201702e-01 + ,0.71609195 ,0.44926200 ,0.276782149 ,1.239319e-01 + ,0.72398625 ,0.45697627 ,0.283281861 ,1.278724e-01 + ,0.73066318 ,0.46420271 ,0.289549848 ,1.319571e-01 + ,0.73650853 ,0.47133730 ,0.295322586 ,1.360494e-01 + ,0.74191863 ,0.47888254 ,0.300607528 ,1.400640e-01 + ,0.74721366 ,0.48699345 ,0.305518411 ,1.440467e-01 + ,0.75262380 ,0.49549336 ,0.310111010 ,1.480611e-01 + ,0.75824105 ,0.50420009 ,0.314535705 ,1.520995e-01 + ,0.76409670 ,0.51305053 ,0.319195032 ,1.561658e-01 + ,0.77030423 ,0.52194190 ,0.324545735 ,1.603832e-01 + ,0.77696430 ,0.53068348 ,0.330761451 ,1.648740e-01 + ,0.78392814 ,0.53921911 ,0.337687815 ,1.695305e-01 + ,0.79087142 ,0.54764584 ,0.345040336 ,1.740919e-01 + ,0.79764992 ,0.55599003 ,0.352535977 ,1.784667e-01 + ,0.80440374 ,0.56418827 ,0.359947893 ,1.828362e-01 + ,0.81125920 ,0.57217204 ,0.367126284 ,1.874504e-01 + ,0.81807396 ,0.57997214 ,0.374007327 ,1.923635e-01 + ,0.82455298 ,0.58778776 ,0.380706775 ,1.973552e-01 + ,0.83054464 ,0.59568049 ,0.387507714 ,2.021553e-01 + ,0.83619452 ,0.60335949 ,0.394648281 ,2.067057e-01 + ,0.84182788 ,0.61067252 ,0.402210773 ,2.111209e-01 + ,0.84769382 ,0.61810882 ,0.410130422 ,2.155355e-01 + ,0.85379684 ,0.62646991 ,0.418239217 ,2.201354e-01 + ,0.85982991 ,0.63596326 ,0.426436850 ,2.251539e-01 + ,0.86534864 ,0.64569215 ,0.434743378 ,2.307076e-01 + ,0.87020343 ,0.65434970 ,0.443196556 ,2.368692e-01 + ,0.87472363 ,0.66158858 ,0.451892254 ,2.437247e-01 + ,0.87942450 ,0.66828595 ,0.460851245 ,2.509675e-01 + ,0.88437025 ,0.67525025 ,0.469770141 ,2.580093e-01 + ,0.88904758 ,0.68246978 ,0.478325822 ,2.648763e-01 + ,0.89322701 ,0.68982898 ,0.486536728 ,2.723872e-01 + ,0.89745058 ,0.69753791 ,0.494776780 ,2.812096e-01 + ,0.90239053 ,0.70572617 ,0.503530501 ,2.908907e-01 + ,0.90800180 ,0.71430596 ,0.513032547 ,3.003753e-01 + ,0.91346775 ,0.72311282 ,0.523668882 ,3.096709e-01 + ,0.91813323 ,0.73249598 ,0.535903771 ,3.196147e-01 + ,0.92221702 ,0.74332982 ,0.549315193 ,3.304979e-01 + ,0.92622555 ,0.75519029 ,0.562491098 ,3.426984e-01 + ,0.93049876 ,0.76676088 ,0.574311153 ,3.573481e-01 + ,0.93559016 ,0.77792982 ,0.584778988 ,3.742742e-01 + ,0.94158878 ,0.78972489 ,0.594741057 ,3.918491e-01 + ,0.94715113 ,0.80271250 ,0.605197756 ,4.107232e-01 + ,0.95168452 ,0.81675031 ,0.616562519 ,4.307354e-01 + ,0.95643627 ,0.83145104 ,0.629586884 ,4.491496e-01 + ,0.96150476 ,0.84592600 ,0.643095195 ,4.683523e-01 + ,0.96619142 ,0.86139307 ,0.658098224 ,4.899430e-01 + ,0.97232678 ,0.87780728 ,0.682404452 ,5.109344e-01 + ,0.97800785 ,0.89418487 ,0.715182394 ,5.334721e-01 + ,0.98168187 ,0.90711236 ,0.750757190 ,5.668841e-01 + ,0.98661473 ,0.92175945 ,0.795670468 ,6.221022e-01 + ,0.99161624 ,0.94482117 ,0.844332168 ,6.873383e-01), + byrow=TRUE,ncol=4) + + P6=matrix(c( 0.02845841 ,0.009644671 ,0.0009664242 ,0.0007502456 ,2.594514e-11 + ,0.03862415 ,0.017910204 ,0.0031407471 ,0.0015589418 ,5.022928e-10 + ,0.05220254 ,0.025693640 ,0.0057938158 ,0.0021160814 ,4.868380e-09 + ,0.06621145 ,0.032227944 ,0.0082558224 ,0.0025765248 ,3.151134e-08 + ,0.07867811 ,0.037545052 ,0.0103200867 ,0.0030739790 ,1.532944e-07 + ,0.08886734 ,0.041725975 ,0.0120483568 ,0.0036182088 ,5.980879e-07 + ,0.09687788 ,0.044882223 ,0.0135631342 ,0.0041650257 ,1.950299e-06 + ,0.10316830 ,0.047217776 ,0.0149788321 ,0.0046727336 ,5.469913e-06 + ,0.10824058 ,0.048995043 ,0.0163941682 ,0.0051213088 ,1.347713e-05 + ,0.11250832 ,0.050462880 ,0.0178877558 ,0.0055125816 ,2.965286e-05 + ,0.11627805 ,0.051812087 ,0.0195084853 ,0.0058643506 ,5.903392e-05 + ,0.11976844 ,0.053169834 ,0.0212684847 ,0.0062028177 ,1.075073e-04 + ,0.12313219 ,0.054614572 ,0.0231443453 ,0.0065546842 ,1.807643e-04 + ,0.12647380 ,0.056192980 ,0.0250870063 ,0.0069403944 ,2.829186e-04 + ,0.12986454 ,0.057930892 ,0.0270368653 ,0.0073701618 ,4.152024e-04 + ,0.13335383 ,0.059838004 ,0.0289391057 ,0.0078435357 ,5.752174e-04 + ,0.13697580 ,0.061909087 ,0.0307548517 ,0.0083519589 ,7.570695e-04 + ,0.14075114 ,0.064124538 ,0.0324658987 ,0.0088828750 ,9.524158e-04 + ,0.14468601 ,0.066452103 ,0.0340732641 ,0.0094238009 ,1.152134e-03 + ,0.14877041 ,0.068850570 ,0.0355916148 ,0.0099652366 ,1.348124e-03 + ,0.15297783 ,0.071275211 ,0.0370422456 ,0.0105019844 ,1.534731e-03 + ,0.15726699 ,0.073684014 ,0.0384468508 ,0.0110330581 ,1.709452e-03 + ,0.16158577 ,0.076043401 ,0.0398233432 ,0.0115607019 ,1.872830e-03 + ,0.16587656 ,0.078332254 ,0.0411839642 ,0.0120890846 ,2.027675e-03 + ,0.17008218 ,0.080543571 ,0.0425352335 ,0.0126230926 ,2.177920e-03 + ,0.17415147 ,0.082683715 ,0.0438790091 ,0.0131674283 ,2.327430e-03 + ,0.17804364 ,0.084769760 ,0.0452139704 ,0.0137260428 ,2.479054e-03 + ,0.18173091 ,0.086825700 ,0.0465370496 ,0.0143018329 ,2.634074e-03 + ,0.18519926 ,0.088878323 ,0.0478445734 ,0.0148965065 ,2.792108e-03 + ,0.18844742 ,0.090953392 ,0.0491330541 ,0.0155105477 ,2.951390e-03 + ,0.19148459 ,0.093072518 ,0.0503996631 ,0.0161432460 ,3.109303e-03 + ,0.19432745 ,0.095250927 ,0.0516424566 ,0.0167927877 ,3.263029e-03 + ,0.19699700 ,0.097496165 ,0.0528604171 ,0.0174564124 ,3.410139e-03 + ,0.19951573 ,0.099807706 ,0.0540533669 ,0.0181306383 ,3.549062e-03 + ,0.20190550 ,0.102177381 ,0.0552217983 ,0.0188115440 ,3.679327e-03 + ,0.20418614 ,0.104590523 ,0.0563666642 ,0.0194950810 ,3.801605e-03 + ,0.20637484 ,0.107027661 ,0.0574891666 ,0.0201773871 ,3.917550e-03 + ,0.20848607 ,0.109466576 ,0.0585905770 ,0.0208550641 ,4.029514e-03 + ,0.21053195 ,0.111884485 ,0.0596721117 ,0.0215253931 ,4.140185e-03 + ,0.21252275 ,0.114260111 ,0.0607348702 ,0.0221864686 ,4.252232e-03 + ,0.21446746 ,0.116575429 ,0.0617798326 ,0.0228372461 ,4.367992e-03 + ,0.21637417 ,0.118816908 ,0.0628079009 ,0.0234775060 ,4.489244e-03 + ,0.21825036 ,0.120976170 ,0.0638199623 ,0.0241077482 ,4.617087e-03 + ,0.22010296 ,0.123050046 ,0.0648169547 ,0.0247290363 ,4.751923e-03 + ,0.22193833 ,0.125040101 ,0.0657999179 ,0.0253428107 ,4.893526e-03 + ,0.22376206 ,0.126951747 ,0.0667700240 ,0.0259506919 ,5.041168e-03 + ,0.22557886 ,0.128793104 ,0.0677285875 ,0.0265542902 ,5.193786e-03 + ,0.22739239 ,0.130573768 ,0.0686770614 ,0.0271550377 ,5.350146e-03 + ,0.22920525 ,0.132303638 ,0.0696170311 ,0.0277540524 ,5.508989e-03 + ,0.23101900 ,0.133991919 ,0.0705502132 ,0.0283520430 ,5.669140e-03 + ,0.23283434 ,0.135646375 ,0.0714784671 ,0.0289492606 ,5.829573e-03 + ,0.23465133 ,0.137272882 ,0.0724038164 ,0.0295454968 ,5.989428e-03 + ,0.23646965 ,0.138875254 ,0.0733284760 ,0.0301401298 ,6.148001e-03 + ,0.23828893 ,0.140455337 ,0.0742548720 ,0.0307322114 ,6.304704e-03 + ,0.24010890 ,0.142013293 ,0.0751856416 ,0.0313205878 ,6.459029e-03 + ,0.24192967 ,0.143548021 ,0.0761236012 ,0.0319040403 ,6.610507e-03 + ,0.24375172 ,0.145057640 ,0.0770716723 ,0.0324814318 ,6.758695e-03 + ,0.24557590 ,0.146539976 ,0.0780327640 ,0.0330518431 ,6.903176e-03 + ,0.24740333 ,0.147992990 ,0.0790096154 ,0.0336146829 ,7.043585e-03 + ,0.24923515 ,0.149415131 ,0.0800046099 ,0.0341697603 ,7.179648e-03 + ,0.25107232 ,0.150805565 ,0.0810195786 ,0.0347173115 ,7.311226e-03 + ,0.25291533 ,0.152164307 ,0.0820556131 ,0.0352579785 ,7.438362e-03 + ,0.25476396 ,0.153492230 ,0.0831129092 ,0.0357927443 ,7.561312e-03 + ,0.25661711 ,0.154791002 ,0.0841906620 ,0.0363228330 ,7.680559e-03 + ,0.25847267 ,0.156062941 ,0.0852870246 ,0.0368495882 ,7.796806e-03 + ,0.26032752 ,0.157310848 ,0.0863991407 ,0.0373743441 ,7.910946e-03 + ,0.26217762 ,0.158537805 ,0.0875232506 ,0.0378983036 ,8.024013e-03 + ,0.26401815 ,0.159746996 ,0.0886548636 ,0.0384224367 ,8.137120e-03 + ,0.26584379 ,0.160941546 ,0.0897889847 ,0.0389474062 ,8.251387e-03 + ,0.26764897 ,0.162124387 ,0.0909203766 ,0.0394735277 ,8.367871e-03 + ,0.26942825 ,0.163298178 ,0.0920438371 ,0.0400007620 ,8.487505e-03 + ,0.27117659 ,0.164465254 ,0.0931544716 ,0.0405287383 ,8.611045e-03 + ,0.27288969 ,0.165627620 ,0.0942479401 ,0.0410567998 ,8.739032e-03 + ,0.27456420 ,0.166786971 ,0.0953206650 ,0.0415840663 ,8.871781e-03 + ,0.27619794 ,0.167944735 ,0.0963699857 ,0.0421095038 ,9.009371e-03 + ,0.27779004 ,0.169102127 ,0.0973942565 ,0.0426319942 ,9.151667e-03 + ,0.27934099 ,0.170260209 ,0.0983928834 ,0.0431503996 ,9.298339e-03 + ,0.28085262 ,0.171419936 ,0.0993663043 ,0.0436636169 ,9.448902e-03 + ,0.28232802 ,0.172582199 ,0.1003159175 ,0.0441706205 ,9.602751e-03 + ,0.28377144 ,0.173747838 ,0.1012439683 ,0.0446704936 ,9.759205e-03 + ,0.28518812 ,0.174917647 ,0.1021534027 ,0.0451624485 ,9.917545e-03 + ,0.28658401 ,0.176092356 ,0.1030476997 ,0.0456458386 ,1.007705e-02 + ,0.28796561 ,0.177272586 ,0.1039306913 ,0.0461201648 ,1.023703e-02 + ,0.28933969 ,0.178458806 ,0.1048063812 ,0.0465850770 ,1.039685e-02 + ,0.29071301 ,0.179651274 ,0.1056787690 ,0.0470403750 ,1.055595e-02 + ,0.29209211 ,0.180849979 ,0.1065516870 ,0.0474860075 ,1.071386e-02 + ,0.29348304 ,0.182054592 ,0.1074286553 ,0.0479220717 ,1.087020e-02 + ,0.29489121 ,0.183264423 ,0.1083127575 ,0.0483488124 ,1.102471e-02 + ,0.29632114 ,0.184478407 ,0.1092065408 ,0.0487666200 ,1.117722e-02 + ,0.29777639 ,0.185695102 ,0.1101119397 ,0.0491760269 ,1.132768e-02 + ,0.29925947 ,0.186912720 ,0.1110302259 ,0.0495777007 ,1.147610e-02 + ,0.30077178 ,0.188129170 ,0.1119619808 ,0.0499724340 ,1.162261e-02 + ,0.30231363 ,0.189342139 ,0.1129070920 ,0.0503611291 ,1.176737e-02 + ,0.30388432 ,0.190549177 ,0.1138647708 ,0.0507447791 ,1.191065e-02 + ,0.30548226 ,0.191747804 ,0.1148335893 ,0.0511244432 ,1.205270e-02 + ,0.30710505 ,0.192935620 ,0.1158115351 ,0.0515012188 ,1.219386e-02 + ,0.30874972 ,0.194110415 ,0.1167960815 ,0.0518762105 ,1.233443e-02 + ,0.31041287 ,0.195270270 ,0.1177842705 ,0.0522504975 ,1.247476e-02 + ,0.31209087 ,0.196413653 ,0.1187728072 ,0.0526251016 ,1.261515e-02 + ,0.31378008 ,0.197539481 ,0.1197581619 ,0.0530009554 ,1.275592e-02 + ,0.33073053 ,0.207947746 ,0.1287029996 ,0.0569307733 ,1.424281e-02 + ,0.34766587 ,0.218180516 ,0.1351175921 ,0.0609818273 ,1.602065e-02 + ,0.36497276 ,0.229372730 ,0.1404381947 ,0.0646360301 ,1.803053e-02 + ,0.38214131 ,0.240774517 ,0.1460402987 ,0.0678929574 ,1.984669e-02 + ,0.39892707 ,0.251527352 ,0.1517699541 ,0.0710908941 ,2.140172e-02 + ,0.41477035 ,0.261312042 ,0.1577197965 ,0.0744655082 ,2.290802e-02 + ,0.42985193 ,0.270169908 ,0.1649510495 ,0.0779547358 ,2.442240e-02 + ,0.44459125 ,0.278214867 ,0.1733065465 ,0.0816344914 ,2.596410e-02 + ,0.45830629 ,0.285626732 ,0.1811614844 ,0.0857820745 ,2.747529e-02 + ,0.47040113 ,0.293106158 ,0.1877528247 ,0.0903640622 ,2.897806e-02 + ,0.48149178 ,0.301009058 ,0.1935701784 ,0.0951257701 ,3.068337e-02 + ,0.49230962 ,0.308762383 ,0.1992769670 ,0.0998239781 ,3.268333e-02 + ,0.50293366 ,0.315833032 ,0.2049784969 ,0.1044290674 ,3.485126e-02 + ,0.51329043 ,0.322327249 ,0.2104162141 ,0.1090755934 ,3.704227e-02 + ,0.52322279 ,0.328643258 ,0.2155878925 ,0.1137388083 ,3.919411e-02 + ,0.53250776 ,0.334876631 ,0.2207245049 ,0.1182751586 ,4.126276e-02 + ,0.54136758 ,0.341122307 ,0.2258390954 ,0.1227003370 ,4.321786e-02 + ,0.55057692 ,0.347936973 ,0.2307672090 ,0.1272211470 ,4.510598e-02 + ,0.56083373 ,0.355714012 ,0.2355401547 ,0.1320061535 ,4.697462e-02 + ,0.57188476 ,0.364046026 ,0.2403678821 ,0.1369508295 ,4.879706e-02 + ,0.58258251 ,0.372072572 ,0.2454048413 ,0.1418771565 ,5.057703e-02 + ,0.59206406 ,0.379279106 ,0.2507385738 ,0.1468283555 ,5.241157e-02 + ,0.60043277 ,0.385862014 ,0.2565083544 ,0.1518653755 ,5.442717e-02 + ,0.60824065 ,0.392470283 ,0.2627890921 ,0.1568425686 ,5.672558e-02 + ,0.61587328 ,0.399665003 ,0.2693240828 ,0.1615409971 ,5.932340e-02 + ,0.62350556 ,0.407442902 ,0.2757405639 ,0.1659131190 ,6.211202e-02 + ,0.63130400 ,0.415294312 ,0.2820549478 ,0.1701469869 ,6.493040e-02 + ,0.63950683 ,0.422769887 ,0.2885097791 ,0.1744470541 ,6.766549e-02 + ,0.64822576 ,0.429800809 ,0.2949604623 ,0.1788535306 ,7.024569e-02 + ,0.65718853 ,0.436524548 ,0.3009940240 ,0.1832922406 ,7.260984e-02 + ,0.66586026 ,0.443072284 ,0.3066417800 ,0.1877498418 ,7.477539e-02 + ,0.67385499 ,0.449570002 ,0.3124210302 ,0.1923869531 ,7.686614e-02 + ,0.68116860 ,0.456177204 ,0.3186629632 ,0.1973744564 ,7.901179e-02 + ,0.68817020 ,0.463016405 ,0.3252135392 ,0.2026604329 ,8.127321e-02 + ,0.69545456 ,0.470153870 ,0.3317877025 ,0.2080443155 ,8.365480e-02 + ,0.70339322 ,0.477628656 ,0.3382869934 ,0.2133829471 ,8.614100e-02 + ,0.71168363 ,0.485354548 ,0.3446879623 ,0.2185816498 ,8.874283e-02 + ,0.71966391 ,0.493111522 ,0.3508452160 ,0.2235543145 ,9.151748e-02 + ,0.72706877 ,0.500759585 ,0.3565974060 ,0.2283233375 ,9.452512e-02 + ,0.73411394 ,0.508436756 ,0.3620578719 ,0.2330030834 ,9.779298e-02 + ,0.74096543 ,0.516596717 ,0.3676326358 ,0.2376654675 ,1.013167e-01 + ,0.74754062 ,0.525572730 ,0.3736020261 ,0.2423531847 ,1.050525e-01 + ,0.75389357 ,0.534868176 ,0.3798150608 ,0.2471555672 ,1.089039e-01 + ,0.76044477 ,0.543419973 ,0.3859717103 ,0.2520806896 ,1.127538e-01 + ,0.76755285 ,0.550743471 ,0.3920256074 ,0.2568981812 ,1.165591e-01 + ,0.77509023 ,0.557269510 ,0.3981237467 ,0.2613608036 ,1.203644e-01 + ,0.78258820 ,0.563602135 ,0.4043785159 ,0.2656200877 ,1.241878e-01 + ,0.78956970 ,0.569952952 ,0.4108934218 ,0.2702271522 ,1.280316e-01 + ,0.79578357 ,0.576313169 ,0.4177625748 ,0.2756524098 ,1.320053e-01 + ,0.80125054 ,0.582816465 ,0.4248741636 ,0.2818778472 ,1.362441e-01 + ,0.80604156 ,0.589718190 ,0.4319593756 ,0.2884428281 ,1.407053e-01 + ,0.81021918 ,0.597012063 ,0.4388877066 ,0.2948281054 ,1.452044e-01 + ,0.81409451 ,0.604298241 ,0.4456550600 ,0.3008228462 ,1.496867e-01 + ,0.81820723 ,0.611181771 ,0.4521718937 ,0.3065488400 ,1.543527e-01 + ,0.82291940 ,0.617566520 ,0.4583418105 ,0.3121847541 ,1.593959e-01 + ,0.82816550 ,0.623606817 ,0.4642293076 ,0.3178061250 ,1.646657e-01 + ,0.83358635 ,0.629645757 ,0.4699982684 ,0.3234987538 ,1.698169e-01 + ,0.83888900 ,0.636078808 ,0.4758026388 ,0.3294433637 ,1.747666e-01 + ,0.84407903 ,0.643093954 ,0.4817498902 ,0.3359109428 ,1.797028e-01 + ,0.84924411 ,0.650551157 ,0.4878352970 ,0.3432352168 ,1.847478e-01 + ,0.85426493 ,0.658159827 ,0.4939669306 ,0.3514380763 ,1.899971e-01 + ,0.85905395 ,0.665825808 ,0.5002414181 ,0.3599104495 ,1.956613e-01 + ,0.86383966 ,0.673703674 ,0.5069596693 ,0.3680164470 ,2.017933e-01 + ,0.86878464 ,0.681723132 ,0.5141410970 ,0.3757733259 ,2.081422e-01 + ,0.87369919 ,0.689369371 ,0.5213869817 ,0.3836108960 ,2.144345e-01 + ,0.87848296 ,0.696307162 ,0.5284253727 ,0.3917481718 ,2.205671e-01 + ,0.88333342 ,0.703025539 ,0.5354226066 ,0.3997804393 ,2.267723e-01 + ,0.88852444 ,0.710392130 ,0.5425937450 ,0.4071416647 ,2.334748e-01 + ,0.89422273 ,0.718655581 ,0.5500700208 ,0.4139631116 ,2.405566e-01 + ,0.90022856 ,0.727393600 ,0.5580678239 ,0.4208102626 ,2.474752e-01 + ,0.90634711 ,0.736233079 ,0.5663716281 ,0.4277100246 ,2.541958e-01 + ,0.91250898 ,0.745181126 ,0.5745840700 ,0.4341555467 ,2.613699e-01 + ,0.91826744 ,0.754393531 ,0.5831569010 ,0.4404809807 ,2.695945e-01 + ,0.92342137 ,0.763619264 ,0.5926394880 ,0.4480172195 ,2.787520e-01 + ,0.92839321 ,0.772228663 ,0.6031756664 ,0.4576132755 ,2.886374e-01 + ,0.93344816 ,0.780039421 ,0.6154126922 ,0.4690720632 ,2.991463e-01 + ,0.93844307 ,0.787892760 ,0.6293505344 ,0.4817779858 ,3.100554e-01 + ,0.94305951 ,0.796994339 ,0.6430075903 ,0.4942618672 ,3.217248e-01 + ,0.94732447 ,0.807084776 ,0.6556595336 ,0.5058265444 ,3.343702e-01 + ,0.95204731 ,0.817744490 ,0.6691307167 ,0.5191457332 ,3.476188e-01 + ,0.95754260 ,0.828993245 ,0.6843916089 ,0.5349257669 ,3.612037e-01 + ,0.96262769 ,0.840489010 ,0.7010625113 ,0.5506951842 ,3.762375e-01 + ,0.96744559 ,0.851185162 ,0.7179714379 ,0.5654664242 ,3.939733e-01 + ,0.97198841 ,0.862041313 ,0.7355142033 ,0.5808629729 ,4.145818e-01 + ,0.97581215 ,0.873028712 ,0.7534527187 ,0.6022223081 ,4.363161e-01 + ,0.98074122 ,0.884033783 ,0.7720247523 ,0.6336556646 ,4.631840e-01 + ,0.98656778 ,0.899552193 ,0.7975923713 ,0.6670769076 ,4.902773e-01 + ,0.99129231 ,0.924710622 ,0.8318770069 ,0.7077418476 ,5.161952e-01 + ,0.99566295 ,0.958284024 ,0.8782468473 ,0.7720592633 ,5.667889e-01), + byrow=TRUE,ncol=5) + +if(is.null(PV)){ +if(!com.p.dist){ +if(p==3)rem=P3 +if(p==4)rem=P4 +if(p==5)rem=P5 +if(p==6)rem=P6 +} } + +est=NA +for(j in 1:p)est[j]=corfun(x[,j],y,...)$cor +id=which(est==max(est)) +R=order(est,decreasing=LARGEST) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +# If you use corfun=scor, set plotit=F +# +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(MC){ +library(parallel) +bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun,...) +} +if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun,...) +output=matrix(NA,nrow=pm1,ncol=8) +if(LARGEST)dimnames(output)=list(NULL,c('IV','Est.', 'Largest.Est', 'Dif','ci.low','ci.hi','p.value','adj.p.value')) +if(!LARGEST)dimnames(output)=list(NULL,c('IV','Est.', 'Smallest.Est', 'Dif','ci.low','ci.hi','p.value','adj.p.value')) +mat=matrix(NA,nrow=nboot,ncol=p) +ihi<-floor((1-alpha/2)*nboot+.5) +ilow<-floor((alpha/2)*nboot+.5) +for(i in 1:nboot)mat[i,]=bvec[[i]] +for(j in 2:p){ +k=j-1 +output[k,1]=R[j] +output[k,3]=est[R[1]] +output[k,2]=est[R[j]] +bsort<-sort(mat[,R[1]]-mat[,R[j]]) +output[k,4]=est[R[1]]-est[R[j]] +output[k,5]=bsort[ilow] +output[k,6]=bsort[ihi] +pv=mean(bsort<0)+.5*mean(bsort==0) +output[k,7]=2*min(c(pv,1-pv)) +flag=output[k,7]>=rem[,k] +ID=which(flag==TRUE) +ic=max(ID,1) +output[k,7]=L[ic] +} +Best='No Decision' +CH=R[1] +names(CH)='IV.w.Largest.Est' +if(!LARGEST)names(CH)='IV.w.Smallest.Est' +output[,8]=p.adjust(output[,7],method=FWE.method) +if(sum(output[,8]<=alpha)==pm1)Best='Decide' +list(CH,Conclusion=Best,results=output) +} + + + +corREGorder<-function(x,y,com.p.dist=FALSE, corfun=wincor,iter=1000,PV=NULL,pr=TRUE, +alpha=.05,nboot=500,SEED=TRUE,MC=FALSE,xout=FALSE,outfun=outpro,method='hoch',...){ +# +# Regresion: +# +# Have two or more independent variables, compare +# cor(y,x_I) to cor(y,x_k) for all k!=I, where +# cor(i,x_I) is the highest +# Winsorized correlation is used by default. +# Hochberg's method is used to control FWE. +# +# x is assumed to be a matrix or data frame +# +# +if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') +p=ncol(x) +pm1=p-1 +p1=p+1 +m1=cbind(x,y) +m1<-elimna(m1) # Eliminate rows with missing values +x=m1[,1:pm1] +y=m1[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +nval=nrow(x) + +if(pr){ +if(!com.p.dist){ +if(is.null(PV)){ +if(p>6 || nval>350)print('Might need to use com.p.dist=TRUE') +}}} + +if(is.null(rem))rem=corREGorder.crit(p,nval,iter=iter) +x<-m1[,1:p] +y=m1[,p1] + +L=c(seq(.001,.1,.001),seq(.11,.99,.01)) +if(!is.null(PV)){ +rem=matrix(NA,length(L),pm1) +for(k in 1:pm1){ +for(i in 1:length(L))rem[i,k]=hd(PV[,k],q=L[i]) +}} + +if(p>6)com.p.dist=TRUE +if(com.p.dist){ +if(is.null(PV)){ +if(pr)print('Computing the null distribution can take several minutes') +PV=corREGorder.crit(p,nval,iter=iter,MC=MC) +rem=matrix(NA,length(L),pm1) +for(k in 1:pm1){ +for(i in 1:length(L))rem[i,k]=hd(PV[,k],q=L[i]) +}}} +P3=matrix(c(0.002175049 ,0.01338189 + ,0.005872039 ,0.02090430 + ,0.010619227 ,0.02738247 + ,0.016286140 ,0.03219214 + ,0.022398371 ,0.03579440 + ,0.028262446 ,0.03877950 + ,0.033345906 ,0.04156497 + ,0.037456504 ,0.04436593 + ,0.040694218 ,0.04726051 + ,0.043307931 ,0.05025906 + ,0.045568092 ,0.05334328 + ,0.047696567 ,0.05648043 + ,0.049845675 ,0.05962933 + ,0.052103950 ,0.06274849 + ,0.054511258 ,0.06580645 + ,0.057074761 ,0.06878943 + ,0.059782746 ,0.07170227 + ,0.062615440 ,0.07456228 + ,0.065552496 ,0.07738946 + ,0.068577121 ,0.08019727 + ,0.071677317 ,0.08298752 + ,0.074845101 ,0.08575039 + ,0.078074759 ,0.08846894 + ,0.081360990 ,0.09112571 + ,0.084697501 ,0.09370929 + ,0.088076254 ,0.09621874 + ,0.091487319 ,0.09866517 + ,0.094919200 ,0.10107020 + ,0.098359413 ,0.10346231 + ,0.101795143 ,0.10587191 + ,0.105213802 ,0.10832639 + ,0.108603375 ,0.11084604 + ,0.111952481 ,0.11344138 + ,0.115250189 ,0.11611213 + ,0.118485658 ,0.11884783 + ,0.121647762 ,0.12162960 + ,0.124724850 ,0.12443290 + ,0.127704758 ,0.12723046 + ,0.130575140 ,0.12999536 + ,0.133324080 ,0.13270348 + ,0.135940914 ,0.13533548 + ,0.138417122 ,0.13787790 + ,0.140747141 ,0.14032355 + ,0.142928988 ,0.14267126 + ,0.144964592 ,0.14492507 + ,0.146859829 ,0.14709314 + ,0.148624253 ,0.14918647 + ,0.150270597 ,0.15121765 + ,0.151814117 ,0.15319972 + ,0.153271855 ,0.15514520 + ,0.154661905 ,0.15706537 + ,0.156002725 ,0.15896974 + ,0.157312540 ,0.16086577 + ,0.158608833 ,0.16275881 + ,0.159907941 ,0.16465206 + ,0.161224730 ,0.16654682 + ,0.162572340 ,0.16844268 + ,0.163961992 ,0.17033786 + ,0.165402841 ,0.17222951 + ,0.166901885 ,0.17411410 + ,0.168463921 ,0.17598769 + ,0.170091558 ,0.17784632 + ,0.171785292 ,0.17968623 + ,0.173543642 ,0.18150411 + ,0.175363353 ,0.18329733 + ,0.177239642 ,0.18506405 + ,0.179166497 ,0.18680333 + ,0.181137006 ,0.18851520 + ,0.183143686 ,0.19020068 + ,0.185178820 ,0.19186171 + ,0.187234759 ,0.19350113 + ,0.189304201 ,0.19512259 + ,0.191380404 ,0.19673036 + ,0.193457363 ,0.19832922 + ,0.195529910 ,0.19992426 + ,0.197593772 ,0.20152065 + ,0.199645563 ,0.20312347 + ,0.201682735 ,0.20473749 + ,0.203703491 ,0.20636692 + ,0.205706665 ,0.20801528 + ,0.207691581 ,0.20968521 + ,0.209657914 ,0.21137841 + ,0.211605535 ,0.21309554 + ,0.213534385 ,0.21483623 + ,0.215444346 ,0.21659918 + ,0.217335153 ,0.21838219 + ,0.219206331 ,0.22018238 + ,0.221057158 ,0.22199635 + ,0.222886667 ,0.22382040 + ,0.224693687 ,0.22565074 + ,0.226476899 ,0.22748367 + ,0.228234929 ,0.22931582 + ,0.229966452 ,0.23114424 + ,0.231670301 ,0.23296658 + ,0.233345582 ,0.23478109 + ,0.234991770 ,0.23658668 + ,0.236608793 ,0.23838287 + ,0.238197083 ,0.24016976 + ,0.239757602 ,0.24194791 + ,0.241291828 ,0.24371824 + ,0.255725281 ,0.26121652 + ,0.269643155 ,0.27818483 + ,0.282572599 ,0.29361598 + ,0.294230675 ,0.30801197 + ,0.305279532 ,0.32217337 + ,0.316231246 ,0.33525837 + ,0.327118515 ,0.34679924 + ,0.337725949 ,0.35781816 + ,0.348507973 ,0.36999019 + ,0.360275335 ,0.38380923 + ,0.372732081 ,0.39753304 + ,0.384865494 ,0.40899196 + ,0.396170468 ,0.41839542 + ,0.406525702 ,0.42826562 + ,0.416179917 ,0.44045085 + ,0.425830260 ,0.45430132 + ,0.435731315 ,0.46791777 + ,0.445574831 ,0.47988279 + ,0.455403556 ,0.48962935 + ,0.465473322 ,0.49746681 + ,0.475493015 ,0.50444076 + ,0.485104441 ,0.51166945 + ,0.494622601 ,0.51972051 + ,0.504479896 ,0.52847494 + ,0.514530980 ,0.53743526 + ,0.524379809 ,0.54615592 + ,0.533811067 ,0.55455128 + ,0.542964071 ,0.56289375 + ,0.552415670 ,0.57149093 + ,0.562702410 ,0.58046156 + ,0.573662536 ,0.58975098 + ,0.584499138 ,0.59931329 + ,0.594424353 ,0.60929957 + ,0.603091449 ,0.61973844 + ,0.610580104 ,0.63005679 + ,0.617291412 ,0.63940637 + ,0.623819392 ,0.64740125 + ,0.630625766 ,0.65425937 + ,0.637780329 ,0.66050475 + ,0.645128388 ,0.66675110 + ,0.652676811 ,0.67358269 + ,0.660706577 ,0.68141986 + ,0.669513580 ,0.69030010 + ,0.679074179 ,0.69978580 + ,0.688924859 ,0.70926800 + ,0.698359030 ,0.71838801 + ,0.706803246 ,0.72709029 + ,0.714138033 ,0.73536725 + ,0.720773425 ,0.74316348 + ,0.727376023 ,0.75049022 + ,0.734427437 ,0.75739163 + ,0.741910594 ,0.76379473 + ,0.749301181 ,0.76965778 + ,0.756038316 ,0.77526923 + ,0.762108143 ,0.78110525 + ,0.767991314 ,0.78741712 + ,0.774125688 ,0.79407849 + ,0.780642168 ,0.80072386 + ,0.787654735 ,0.80714732 + ,0.795560943 ,0.81355663 + ,0.804494568 ,0.82019692 + ,0.813689728 ,0.82693733 + ,0.822246533 ,0.83343489 + ,0.830074100 ,0.83948569 + ,0.837601422 ,0.84515904 + ,0.845257402 ,0.85070450 + ,0.853115769 ,0.85637797 + ,0.860852876 ,0.86248746 + ,0.868387995 ,0.86942987 + ,0.875848963 ,0.87706919 + ,0.882870692 ,0.88443942 + ,0.889442420 ,0.89118298 + ,0.896636569 ,0.89779842 + ,0.904626820 ,0.90423655 + ,0.911772218 ,0.91069863 + ,0.917549883 ,0.91754899 + ,0.923211670 ,0.92378977 + ,0.928746246 ,0.92901361 + ,0.934160521 ,0.93466485 + ,0.941177134 ,0.94142063 + ,0.949363922 ,0.94828957 + ,0.956847590 ,0.95420934 + ,0.963599180 ,0.95946403 + ,0.969055149 ,0.96451675 + ,0.973136108 ,0.97019334 + ,0.976868136 ,0.97565816 + ,0.981258840 ,0.98193948 + ,0.988264909 ,0.98843665 + ,0.994433844 ,0.99456404), + byrow=TRUE,ncol=2) + +P4=matrix(c( 0.01551012 ,0.04057726 ,0.04378631 + ,0.02786269 ,0.06546848 ,0.04944117 + ,0.03553717 ,0.08711494 ,0.05573588 + ,0.04023774 ,0.10496114 ,0.06252382 + ,0.04411887 ,0.12025416 ,0.06944572 + ,0.04807201 ,0.13340708 ,0.07602629 + ,0.05230897 ,0.14435931 ,0.08201922 + ,0.05685652 ,0.15314426 ,0.08742154 + ,0.06171756 ,0.16004283 ,0.09233439 + ,0.06689524 ,0.16549047 ,0.09686627 + ,0.07238520 ,0.16994001 ,0.10110915 + ,0.07816280 ,0.17377046 ,0.10514421 + ,0.08417136 ,0.17725430 ,0.10904320 + ,0.09031706 ,0.18056527 ,0.11285974 + ,0.09647542 ,0.18380356 ,0.11661935 + ,0.10250880 ,0.18702173 ,0.12031681 + ,0.10828903 ,0.19024325 ,0.12392259 + ,0.11371720 ,0.19347300 ,0.12739517 + ,0.11873498 ,0.19670257 ,0.13069417 + ,0.12332546 ,0.19991412 ,0.13379041 + ,0.12750572 ,0.20308477 ,0.13667102 + ,0.13131499 ,0.20619197 ,0.13933985 + ,0.13480237 ,0.20921901 ,0.14181443 + ,0.13801702 ,0.21215938 ,0.14412125 + ,0.14100194 ,0.21501886 ,0.14629078 + ,0.14379136 ,0.21781520 ,0.14835323 + ,0.14641091 ,0.22057551 ,0.15033563 + ,0.14887943 ,0.22333212 ,0.15226034 + ,0.15121153 ,0.22611802 ,0.15414467 + ,0.15342004 ,0.22896253 ,0.15600151 + ,0.15551793 ,0.23188795 ,0.15784045 + ,0.15751958 ,0.23490765 ,0.15966900 + ,0.15944129 ,0.23802542 ,0.16149379 + ,0.16130115 ,0.24123624 ,0.16332148 + ,0.16311848 ,0.24452789 ,0.16515915 + ,0.16491299 ,0.24788319 ,0.16701445 + ,0.16670375 ,0.25128243 ,0.16889527 + ,0.16850811 ,0.25470562 ,0.17080920 + ,0.17034067 ,0.25813429 ,0.17276281 + ,0.17221242 ,0.26155271 ,0.17476099 + ,0.17413006 ,0.26494845 ,0.17680637 + ,0.17609563 ,0.26831228 ,0.17889891 + ,0.17810649 ,0.27163767 ,0.18103585 + ,0.18015567 ,0.27491998 ,0.18321179 + ,0.18223251 ,0.27815551 ,0.18541909 + ,0.18432367 ,0.28134075 ,0.18764844 + ,0.18641426 ,0.28447176 ,0.18988943 + ,0.18848908 ,0.28754393 ,0.19213127 + ,0.19053376 ,0.29055199 ,0.19436336 + ,0.19253579 ,0.29349030 ,0.19657586 + ,0.19448531 ,0.29635330 ,0.19876015 + ,0.19637555 ,0.29913601 ,0.20090917 + ,0.19820304 ,0.30183457 ,0.20301766 + ,0.19996751 ,0.30444661 ,0.20508235 + ,0.20167155 ,0.30697148 ,0.20710203 + ,0.20332016 ,0.30941038 ,0.20907756 + ,0.20492013 ,0.31176625 ,0.21101180 + ,0.20647945 ,0.31404361 ,0.21290945 + ,0.20800667 ,0.31624825 ,0.21477685 + ,0.20951041 ,0.31838693 ,0.21662164 + ,0.21099886 ,0.32046697 ,0.21845237 + ,0.21247950 ,0.32249597 ,0.22027815 + ,0.21395877 ,0.32448147 ,0.22210809 + ,0.21544200 ,0.32643073 ,0.22395091 + ,0.21693331 ,0.32835049 ,0.22581448 + ,0.21843563 ,0.33024683 ,0.22770540 + ,0.21995078 ,0.33212513 ,0.22962873 + ,0.22147965 ,0.33398991 ,0.23158777 + ,0.22302229 ,0.33584491 ,0.23358389 + ,0.22457818 ,0.33769307 ,0.23561658 + ,0.22614641 ,0.33953655 ,0.23768352 + ,0.22772593 ,0.34137682 ,0.23978072 + ,0.22931573 ,0.34321472 ,0.24190279 + ,0.23091509 ,0.34505052 ,0.24404323 + ,0.23252371 ,0.34688402 ,0.24619468 + ,0.23414188 ,0.34871459 ,0.24834929 + ,0.23577053 ,0.35054128 ,0.25049895 + ,0.23741133 ,0.35236284 ,0.25263561 + ,0.23906664 ,0.35417779 ,0.25475146 + ,0.24073945 ,0.35598448 ,0.25683915 + ,0.24243332 ,0.35778113 ,0.25889193 + ,0.24415220 ,0.35956584 ,0.26090380 + ,0.24590024 ,0.36133667 ,0.26286960 + ,0.24768166 ,0.36309169 ,0.26478508 + ,0.24950046 ,0.36482898 ,0.26664698 + ,0.25136029 ,0.36654674 ,0.26845307 + ,0.25326420 ,0.36824330 ,0.27020217 + ,0.25521450 ,0.36991721 ,0.27189420 + ,0.25721259 ,0.37156729 ,0.27353012 + ,0.25925888 ,0.37319266 ,0.27511196 + ,0.26135269 ,0.37479283 ,0.27664276 + ,0.26349226 ,0.37636767 ,0.27812647 + ,0.26567472 ,0.37791746 ,0.27956791 + ,0.26789620 ,0.37944287 ,0.28097264 + ,0.27015188 ,0.38094492 ,0.28234681 + ,0.27243611 ,0.38242495 ,0.28369706 + ,0.27474260 ,0.38388453 ,0.28503033 + ,0.27706456 ,0.38532538 ,0.28635373 + ,0.27939486 ,0.38674928 ,0.28767441 + ,0.28172621 ,0.38815796 ,0.28899935 + ,0.30368556 ,0.40162893 ,0.30368088 + ,0.32094974 ,0.41407715 ,0.32178330 + ,0.33413706 ,0.42686681 ,0.33940421 + ,0.34519638 ,0.44106918 ,0.35258534 + ,0.35553655 ,0.45434941 ,0.36200235 + ,0.36622977 ,0.46608461 ,0.37040153 + ,0.37786945 ,0.47769452 ,0.37925941 + ,0.39037354 ,0.48941876 ,0.38854815 + ,0.40339828 ,0.50079642 ,0.39825958 + ,0.41579925 ,0.51188178 ,0.40920109 + ,0.42617477 ,0.52311541 ,0.42158463 + ,0.43455327 ,0.53487858 ,0.43453761 + ,0.44208791 ,0.54714403 ,0.44723309 + ,0.45001905 ,0.55923271 ,0.45932100 + ,0.45896481 ,0.57027401 ,0.47072048 + ,0.46860843 ,0.57999513 ,0.48143650 + ,0.47834450 ,0.58893700 ,0.49173131 + ,0.48781034 ,0.59778542 ,0.50198123 + ,0.49695736 ,0.60684063 ,0.51232433 + ,0.50614965 ,0.61608565 ,0.52269385 + ,0.51570653 ,0.62525226 ,0.53300494 + ,0.52544301 ,0.63382683 ,0.54336275 + ,0.53507224 ,0.64135887 ,0.55402012 + ,0.54466956 ,0.64795069 ,0.56477936 + ,0.55450045 ,0.65423352 ,0.57492627 + ,0.56470767 ,0.66082598 ,0.58412311 + ,0.57520589 ,0.66798477 ,0.59274171 + ,0.58563795 ,0.67557212 ,0.60127760 + ,0.59553109 ,0.68316811 ,0.60997760 + ,0.60465763 ,0.69037519 ,0.61886863 + ,0.61309294 ,0.69714665 ,0.62782311 + ,0.62096747 ,0.70381712 ,0.63662138 + ,0.62843358 ,0.71066866 ,0.64498294 + ,0.63586964 ,0.71749020 ,0.65265906 + ,0.64375353 ,0.72384456 ,0.65957668 + ,0.65212793 ,0.72967232 ,0.66583193 + ,0.66049812 ,0.73529566 ,0.67159225 + ,0.66842274 ,0.74091198 ,0.67713840 + ,0.67595398 ,0.74641608 ,0.68293746 + ,0.68342673 ,0.75170365 ,0.68940237 + ,0.69097598 ,0.75681883 ,0.69648797 + ,0.69842249 ,0.76180033 ,0.70370713 + ,0.70563560 ,0.76670054 ,0.71064719 + ,0.71281012 ,0.77173041 ,0.71735349 + ,0.72033581 ,0.77705000 ,0.72412267 + ,0.72848382 ,0.78241378 ,0.73111595 + ,0.73708729 ,0.78733284 ,0.73832883 + ,0.74551054 ,0.79160726 ,0.74567870 + ,0.75319837 ,0.79552364 ,0.75291314 + ,0.76015349 ,0.79957172 ,0.75966877 + ,0.76670769 ,0.80408855 ,0.76584461 + ,0.77313827 ,0.80924036 ,0.77174026 + ,0.77962403 ,0.81516519 ,0.77765648 + ,0.78632238 ,0.82177046 ,0.78360390 + ,0.79330110 ,0.82852505 ,0.78963534 + ,0.80032049 ,0.83486410 ,0.79614367 + ,0.80690242 ,0.84071720 ,0.80341723 + ,0.81285318 ,0.84643390 ,0.81116026 + ,0.81843087 ,0.85232962 ,0.81884205 + ,0.82389821 ,0.85834921 ,0.82628132 + ,0.82925515 ,0.86412609 ,0.83357420 + ,0.83452095 ,0.86947172 ,0.84068096 + ,0.84009069 ,0.87468947 ,0.84740433 + ,0.84641252 ,0.88012125 ,0.85369747 + ,0.85337749 ,0.88550274 ,0.85984420 + ,0.86056675 ,0.89031731 ,0.86629374 + ,0.86782542 ,0.89466471 ,0.87323085 + ,0.87514888 ,0.89925142 ,0.88037505 + ,0.88234515 ,0.90465670 ,0.88731009 + ,0.88906611 ,0.91058956 ,0.89382801 + ,0.89489509 ,0.91600949 ,0.89989887 + ,0.89964387 ,0.92037164 ,0.90556334 + ,0.90392518 ,0.92425694 ,0.91107573 + ,0.90874993 ,0.92837270 ,0.91667095 + ,0.91433102 ,0.93276449 ,0.92229467 + ,0.92009330 ,0.93716660 ,0.92780387 + ,0.92614330 ,0.94129808 ,0.93326004 + ,0.93274464 ,0.94523346 ,0.93882150 + ,0.93904729 ,0.94970628 ,0.94451555 + ,0.94498605 ,0.95507385 ,0.95042484 + ,0.95172002 ,0.96051456 ,0.95623595 + ,0.95904715 ,0.96546866 ,0.96109440 + ,0.96528014 ,0.96983221 ,0.96583206 + ,0.96981308 ,0.97446466 ,0.97124046 + ,0.97390026 ,0.97945170 ,0.97556419 + ,0.97783109 ,0.98385517 ,0.97915772 + ,0.98192499 ,0.98887894 ,0.98323212 + ,0.98701317 ,0.99271102 ,0.98660775 + ,0.99226134 ,0.99482149 ,0.99079872), + byrow=TRUE,ncol=3) + + P5=matrix(c( 0.05652412 ,0.1064700 ,0.05210169 ,0.005493583 + ,0.06574073 ,0.1193620 ,0.06365731 ,0.013235067 + ,0.07151676 ,0.1298226 ,0.07635689 ,0.024181097 + ,0.07511946 ,0.1389007 ,0.09074149 ,0.036214033 + ,0.07828081 ,0.1482306 ,0.10635395 ,0.047401883 + ,0.08180130 ,0.1583439 ,0.12166792 ,0.056974651 + ,0.08583995 ,0.1691003 ,0.13539360 ,0.065203466 + ,0.09025786 ,0.1802110 ,0.14713767 ,0.072752928 + ,0.09481793 ,0.1914175 ,0.15726850 ,0.080170849 + ,0.09930536 ,0.2024707 ,0.16646093 ,0.087701224 + ,0.10358876 ,0.2130961 ,0.17530679 ,0.095324733 + ,0.10762929 ,0.2230221 ,0.18413231 ,0.102886098 + ,0.11145588 ,0.2320476 ,0.19299485 ,0.110212699 + ,0.11512861 ,0.2400992 ,0.20177360 ,0.117183521 + ,0.11870678 ,0.2472403 ,0.21027870 ,0.123747085 + ,0.12222956 ,0.2536340 ,0.21833522 ,0.129906857 + ,0.12571005 ,0.2594800 ,0.22582830 ,0.135695457 + ,0.12913936 ,0.2649560 ,0.23271445 ,0.141152226 + ,0.13249584 ,0.2701803 ,0.23901157 ,0.146309897 + ,0.13575514 ,0.2752025 ,0.24477977 ,0.151189960 + ,0.13889807 ,0.2800167 ,0.25010143 ,0.155803801 + ,0.14191516 ,0.2845862 ,0.25506478 ,0.160156707 + ,0.14480785 ,0.2888690 ,0.25975221 ,0.164252701 + ,0.14758751 ,0.2928368 ,0.26423308 ,0.168098998 + ,0.15027304 ,0.2964849 ,0.26856022 ,0.171709323 + ,0.15288821 ,0.2998329 ,0.27276929 ,0.175105614 + ,0.15545896 ,0.3029190 ,0.27688008 ,0.178317905 + ,0.15801110 ,0.3057918 ,0.28089924 ,0.181382526 + ,0.16056820 ,0.3085024 ,0.28482365 ,0.184339066 + ,0.16315001 ,0.3110976 ,0.28864393 ,0.187226779 + ,0.16577118 ,0.3136161 ,0.29234767 ,0.190081080 + ,0.16844049 ,0.3160868 ,0.29592209 ,0.192930735 + ,0.17116069 ,0.3185293 ,0.29935598 ,0.195796047 + ,0.17392879 ,0.3209550 ,0.30264093 ,0.198688172 + ,0.17673683 ,0.3233687 ,0.30577201 ,0.201609481 + ,0.17957306 ,0.3257704 ,0.30874798 ,0.204554768 + ,0.18242312 ,0.3281569 ,0.31157121 ,0.207513022 + ,0.18527142 ,0.3305232 ,0.31424738 ,0.210469505 + ,0.18810221 ,0.3328636 ,0.31678506 ,0.213407838 + ,0.19090055 ,0.3351728 ,0.31919516 ,0.216311897 + ,0.19365295 ,0.3374465 ,0.32149031 ,0.219167333 + ,0.19634778 ,0.3396823 ,0.32368416 ,0.221962623 + ,0.19897548 ,0.3418799 ,0.32579080 ,0.224689627 + ,0.20152856 ,0.3440415 ,0.32782411 ,0.227343686 + ,0.20400154 ,0.3461714 ,0.32979739 ,0.229923353 + ,0.20639083 ,0.3482759 ,0.33172307 ,0.232429882 + ,0.20869457 ,0.3503624 ,0.33361256 ,0.234866606 + ,0.21091258 ,0.3524387 ,0.33547633 ,0.237238310 + ,0.21304622 ,0.3545126 ,0.33732401 ,0.239550684 + ,0.21509838 ,0.3565905 ,0.33916450 ,0.241809893 + ,0.21707346 ,0.3586772 ,0.34100612 ,0.244022277 + ,0.21897727 ,0.3607754 ,0.34285661 ,0.246194145 + ,0.22081697 ,0.3628855 ,0.34472307 ,0.248331635 + ,0.22260093 ,0.3650055 ,0.34661180 ,0.250440593 + ,0.22433851 ,0.3671314 ,0.34852808 ,0.252526433 + ,0.22603978 ,0.3692577 ,0.35047594 ,0.254593979 + ,0.22771524 ,0.3713776 ,0.35245788 ,0.256647268 + ,0.22937544 ,0.3734840 ,0.35447483 ,0.258689342 + ,0.23103064 ,0.3755696 ,0.35652605 ,0.260722057 + ,0.23269046 ,0.3776277 ,0.35860928 ,0.262745921 + ,0.23436362 ,0.3796525 ,0.36072093 ,0.264760011 + ,0.23605766 ,0.3816393 ,0.36285653 ,0.266761969 + ,0.23777881 ,0.3835848 ,0.36501105 ,0.268748094 + ,0.23953181 ,0.3854871 ,0.36717945 ,0.270713533 + ,0.24131994 ,0.3873455 ,0.36935709 ,0.272652546 + ,0.24314491 ,0.3891604 ,0.37154010 ,0.274558844 + ,0.24500695 ,0.3909334 ,0.37372567 ,0.276425957 + ,0.24690483 ,0.3926667 ,0.37591220 ,0.278247625 + ,0.24883595 ,0.3943630 ,0.37809926 ,0.280018169 + ,0.25079643 ,0.3960256 ,0.38028752 ,0.281732830 + ,0.25278126 ,0.3976578 ,0.38247846 ,0.283388055 + ,0.25478439 ,0.3992633 ,0.38467408 ,0.284981717 + ,0.25679899 ,0.4008454 ,0.38687646 ,0.286513250 + ,0.25881759 ,0.4024077 ,0.38908746 ,0.287983708 + ,0.26083237 ,0.4039536 ,0.39130828 ,0.289395740 + ,0.26283537 ,0.4054864 ,0.39353919 ,0.290753487 + ,0.26481879 ,0.4070095 ,0.39577932 ,0.292062415 + ,0.26677527 ,0.4085258 ,0.39802648 ,0.293329080 + ,0.26869809 ,0.4100386 ,0.40027722 ,0.294560863 + ,0.27058146 ,0.4115507 ,0.40252683 ,0.295765673 + ,0.27242063 ,0.4130649 ,0.40476958 ,0.296951643 + ,0.27421212 ,0.4145841 ,0.40699893 ,0.298126831 + ,0.27595371 ,0.4161107 ,0.40920786 ,0.299298952 + ,0.27764451 ,0.4176472 ,0.41138921 ,0.300475144 + ,0.27928490 ,0.4191957 ,0.41353603 ,0.301661783 + ,0.28087643 ,0.4207583 ,0.41564194 ,0.302864352 + ,0.28242170 ,0.4223369 ,0.41770144 ,0.304087372 + ,0.28392419 ,0.4239330 ,0.41971011 ,0.305334383 + ,0.28538803 ,0.4255480 ,0.42166488 ,0.306607981 + ,0.28681780 ,0.4271829 ,0.42356406 ,0.307909888 + ,0.28821831 ,0.4288386 ,0.42540740 ,0.309241059 + ,0.28959443 ,0.4305155 ,0.42719605 ,0.310601803 + ,0.29095083 ,0.4322137 ,0.42893242 ,0.311991913 + ,0.29229189 ,0.4339327 ,0.43061997 ,0.313410782 + ,0.29362151 ,0.4356718 ,0.43226306 ,0.314857513 + ,0.29494305 ,0.4374295 ,0.43386662 ,0.316331000 + ,0.29625927 ,0.4392040 ,0.43543592 ,0.317829987 + ,0.29757232 ,0.4409928 ,0.43697631 ,0.319353103 + ,0.29888374 ,0.4427930 ,0.43849299 ,0.320898866 + ,0.30019451 ,0.4446010 ,0.43999077 ,0.322465682 + ,0.31328362 ,0.4621878 ,0.45451950 ,0.338826479 + ,0.32658914 ,0.4768889 ,0.46817448 ,0.354761127 + ,0.34081183 ,0.4889837 ,0.48092037 ,0.369582715 + ,0.35529268 ,0.4997010 ,0.49462451 ,0.384419904 + ,0.36855379 ,0.5096083 ,0.51023274 ,0.398755180 + ,0.37994080 ,0.5184617 ,0.52647200 ,0.411411797 + ,0.39062100 ,0.5263287 ,0.54163677 ,0.423038765 + ,0.40257143 ,0.5344950 ,0.55461543 ,0.434627278 + ,0.41621275 ,0.5440195 ,0.56505570 ,0.446414188 + ,0.43046818 ,0.5547192 ,0.57379461 ,0.458794883 + ,0.44486669 ,0.5653656 ,0.58229160 ,0.471543195 + ,0.45923530 ,0.5750124 ,0.59101203 ,0.483555129 + ,0.47296930 ,0.5839699 ,0.59924570 ,0.494652841 + ,0.48590094 ,0.5928607 ,0.60697069 ,0.505540897 + ,0.49834493 ,0.6017113 ,0.61531580 ,0.516236508 + ,0.51026131 ,0.6105778 ,0.62505014 ,0.526149184 + ,0.52119211 ,0.6200780 ,0.63576320 ,0.535122517 + ,0.53082589 ,0.6304166 ,0.64643772 ,0.543660340 + ,0.53943126 ,0.6407196 ,0.65638365 ,0.552285332 + ,0.54774724 ,0.6500929 ,0.66564380 ,0.561135243 + ,0.55642024 ,0.6586570 ,0.67450792 ,0.570308867 + ,0.56550181 ,0.6670994 ,0.68302769 ,0.580053861 + ,0.57464883 ,0.6756273 ,0.69143503 ,0.590349969 + ,0.58368440 ,0.6838335 ,0.70025072 ,0.600590044 + ,0.59259791 ,0.6914073 ,0.70958149 ,0.610032910 + ,0.60135765 ,0.6984377 ,0.71882369 ,0.618579939 + ,0.60997532 ,0.7051144 ,0.72721163 ,0.626728815 + ,0.61843676 ,0.7115369 ,0.73447399 ,0.634788345 + ,0.62674513 ,0.7178305 ,0.74089008 ,0.642664491 + ,0.63517287 ,0.7242564 ,0.74693265 ,0.650337943 + ,0.64411712 ,0.7310644 ,0.75290477 ,0.658048604 + ,0.65355691 ,0.7382701 ,0.75884913 ,0.665979304 + ,0.66286128 ,0.7456401 ,0.76482766 ,0.674038165 + ,0.67142296 ,0.7528643 ,0.77103158 ,0.682044900 + ,0.67945227 ,0.7597670 ,0.77745500 ,0.689918781 + ,0.68784816 ,0.7663382 ,0.78372370 ,0.697608584 + ,0.69717228 ,0.7725694 ,0.78940484 ,0.705026668 + ,0.70693776 ,0.7783837 ,0.79438682 ,0.712091065 + ,0.71609195 ,0.7837138 ,0.79895071 ,0.718801927 + ,0.72398625 ,0.7886294 ,0.80353613 ,0.725335927 + ,0.73066318 ,0.7934308 ,0.80838726 ,0.731990069 + ,0.73650853 ,0.7984516 ,0.81339086 ,0.738959992 + ,0.74191863 ,0.8036594 ,0.81827024 ,0.746236537 + ,0.74721366 ,0.8086236 ,0.82286981 ,0.753653660 + ,0.75262380 ,0.8129806 ,0.82718266 ,0.760952429 + ,0.75824105 ,0.8167987 ,0.83122205 ,0.767851565 + ,0.76409670 ,0.8204615 ,0.83502651 ,0.774128504 + ,0.77030423 ,0.8243454 ,0.83871807 ,0.779748097 + ,0.77696430 ,0.8286333 ,0.84243305 ,0.784919804 + ,0.78392814 ,0.8333291 ,0.84622101 ,0.789918249 + ,0.79087142 ,0.8383467 ,0.85001054 ,0.794848310 + ,0.79764992 ,0.8435563 ,0.85367430 ,0.799645577 + ,0.80440374 ,0.8488329 ,0.85720937 ,0.804300457 + ,0.81125920 ,0.8540607 ,0.86077075 ,0.808937266 + ,0.81807396 ,0.8590979 ,0.86444685 ,0.813668806 + ,0.82455298 ,0.8638895 ,0.86820083 ,0.818594382 + ,0.83054464 ,0.8685076 ,0.87202674 ,0.823874450 + ,0.83619452 ,0.8729580 ,0.87586813 ,0.829667963 + ,0.84182788 ,0.8772068 ,0.87951882 ,0.836051379 + ,0.84769382 ,0.8813966 ,0.88292402 ,0.842860562 + ,0.85379684 ,0.8857317 ,0.88639513 ,0.849605729 + ,0.85982991 ,0.8902404 ,0.89031782 ,0.855810573 + ,0.86534864 ,0.8948524 ,0.89481091 ,0.861500169 + ,0.87020343 ,0.8994987 ,0.89970471 ,0.867139594 + ,0.87472363 ,0.9041210 ,0.90468146 ,0.872950260 + ,0.87942450 ,0.9088721 ,0.90940470 ,0.878638514 + ,0.88437025 ,0.9139336 ,0.91367910 ,0.883816792 + ,0.88904758 ,0.9189854 ,0.91755701 ,0.888324527 + ,0.89322701 ,0.9235781 ,0.92120549 ,0.892386541 + ,0.89745058 ,0.9278084 ,0.92477201 ,0.896563267 + ,0.90239053 ,0.9320707 ,0.92830567 ,0.901221708 + ,0.90800180 ,0.9364567 ,0.93180350 ,0.906121919 + ,0.91346775 ,0.9405897 ,0.93545855 ,0.911051876 + ,0.91813323 ,0.9441004 ,0.93949383 ,0.916500400 + ,0.92221702 ,0.9470722 ,0.94376981 ,0.922684763 + ,0.92622555 ,0.9500208 ,0.94792822 ,0.928942208 + ,0.93049876 ,0.9535137 ,0.95168820 ,0.934711316 + ,0.93559016 ,0.9573979 ,0.95514668 ,0.940041161 + ,0.94158878 ,0.9611006 ,0.95847721 ,0.945997043 + ,0.94715113 ,0.9646607 ,0.96144331 ,0.952681937 + ,0.95168452 ,0.9683622 ,0.96448503 ,0.957991575 + ,0.95643627 ,0.9719767 ,0.96793903 ,0.962249417 + ,0.96150476 ,0.9751701 ,0.97130901 ,0.967311290 + ,0.96619142 ,0.9782121 ,0.97505959 ,0.972704298 + ,0.97232678 ,0.9821594 ,0.97956025 ,0.977392196 + ,0.97800785 ,0.9866658 ,0.98474153 ,0.982039939 + ,0.98168187 ,0.9907911 ,0.98956488 ,0.987315734 + ,0.98661473 ,0.9937451 ,0.99413910 ,0.991894481 + ,0.99161624 ,0.9972316 ,0.99730242 ,0.996798896), + byrow=TRUE,ncol=4) + + P6=matrix(c(0.02845841 ,0.1323308 ,0.1003219 ,0.1384688 ,0.05477254 + ,0.03862415 ,0.1504455 ,0.1516471 ,0.1577896 ,0.05960211 + ,0.05220254 ,0.1708109 ,0.1948779 ,0.1744245 ,0.06519134 + ,0.06621145 ,0.1900433 ,0.2222013 ,0.1878881 ,0.07081226 + ,0.07867811 ,0.2067988 ,0.2375538 ,0.1993997 ,0.07609186 + ,0.08886734 ,0.2210344 ,0.2471487 ,0.2097566 ,0.08088784 + ,0.09687788 ,0.2332937 ,0.2552052 ,0.2193390 ,0.08524504 + ,0.10316830 ,0.2441869 ,0.2636724 ,0.2284077 ,0.08932865 + ,0.10824058 ,0.2541494 ,0.2731186 ,0.2372217 ,0.09334220 + ,0.11250832 ,0.2634030 ,0.2834439 ,0.2459903 ,0.09745755 + ,0.11627805 ,0.2720146 ,0.2942621 ,0.2547909 ,0.10177306 + ,0.11976844 ,0.2799750 ,0.3051103 ,0.2635490 ,0.10630380 + ,0.12313219 ,0.2872606 ,0.3155798 ,0.2720913 ,0.11099790 + ,0.12647380 ,0.2938675 ,0.3253870 ,0.2802295 ,0.11576708 + ,0.12986454 ,0.2998218 ,0.3343885 ,0.2878280 ,0.12051851 + ,0.13335383 ,0.3051771 ,0.3425552 ,0.2948321 ,0.12517872 + ,0.13697580 ,0.3100060 ,0.3499315 ,0.3012592 ,0.12970519 + ,0.14075114 ,0.3143922 ,0.3565967 ,0.3071707 ,0.13408620 + ,0.14468601 ,0.3184238 ,0.3626399 ,0.3126418 ,0.13833266 + ,0.14877041 ,0.3221871 ,0.3681465 ,0.3177403 ,0.14246686 + ,0.15297783 ,0.3257612 ,0.3731941 ,0.3225181 ,0.14651218 + ,0.15726699 ,0.3292136 ,0.3778517 ,0.3270116 ,0.15048655 + ,0.16158577 ,0.3325961 ,0.3821812 ,0.3312470 ,0.15440006 + ,0.16587656 ,0.3359439 ,0.3862375 ,0.3352459 ,0.15825605 + ,0.17008218 ,0.3392769 ,0.3900693 ,0.3390287 ,0.16205392 + ,0.17415147 ,0.3426022 ,0.3937200 ,0.3426162 ,0.16579194 + ,0.17804364 ,0.3459191 ,0.3972286 ,0.3460302 ,0.16946914 + ,0.18173091 ,0.3492229 ,0.4006301 ,0.3492928 ,0.17308558 + ,0.18519926 ,0.3525091 ,0.4039573 ,0.3524272 ,0.17664150 + ,0.18844742 ,0.3557754 ,0.4072408 ,0.3554574 ,0.18013586 + ,0.19148459 ,0.3590236 ,0.4105092 ,0.3584099 ,0.18356508 + ,0.19432745 ,0.3622586 ,0.4137885 ,0.3613127 ,0.18692243 + ,0.19699700 ,0.3654879 ,0.4171007 ,0.3641954 ,0.19019849 + ,0.19951573 ,0.3687203 ,0.4204629 ,0.3670873 ,0.19338240 + ,0.20190550 ,0.3719641 ,0.4238853 ,0.3700154 ,0.19646371 + ,0.20418614 ,0.3752258 ,0.4273710 ,0.3730020 ,0.19943431 + ,0.20637484 ,0.3785096 ,0.4309154 ,0.3760622 ,0.20229001 + ,0.20848607 ,0.3818167 ,0.4345067 ,0.3792026 ,0.20503150 + ,0.21053195 ,0.3851457 ,0.4381272 ,0.3824203 ,0.20766451 + ,0.21252275 ,0.3884927 ,0.4417550 ,0.3857039 ,0.21019930 + ,0.21446746 ,0.3918524 ,0.4453661 ,0.3890342 ,0.21264952 + ,0.21637417 ,0.3952189 ,0.4489369 ,0.3923868 ,0.21503090 + ,0.21825036 ,0.3985867 ,0.4524457 ,0.3957345 ,0.21735972 + ,0.22010296 ,0.4019509 ,0.4558750 ,0.3990498 ,0.21965153 + ,0.22193833 ,0.4053081 ,0.4592123 ,0.4023072 ,0.22192010 + ,0.22376206 ,0.4086562 ,0.4624507 ,0.4054849 ,0.22417673 + ,0.22557886 ,0.4119939 ,0.4655884 ,0.4085662 ,0.22642994 + ,0.22739239 ,0.4153207 ,0.4686284 ,0.4115399 ,0.22868536 + ,0.22920525 ,0.4186356 ,0.4715768 ,0.4144002 ,0.23094602 + ,0.23101900 ,0.4219366 ,0.4744418 ,0.4171462 ,0.23321268 + ,0.23283434 ,0.4252203 ,0.4772317 ,0.4197810 ,0.23548425 + ,0.23465133 ,0.4284812 ,0.4799538 ,0.4223106 ,0.23775824 + ,0.23646965 ,0.4317114 ,0.4826136 ,0.4247430 ,0.24003124 + ,0.23828893 ,0.4349012 ,0.4852138 ,0.4270871 ,0.24229916 + ,0.24010890 ,0.4380394 ,0.4877545 ,0.4293518 ,0.24455754 + ,0.24192967 ,0.4411139 ,0.4902331 ,0.4315456 ,0.24680175 + ,0.24375172 ,0.4441127 ,0.4926456 ,0.4336759 ,0.24902706 + ,0.24557590 ,0.4470248 ,0.4949865 ,0.4357493 ,0.25122873 + ,0.24740333 ,0.4498409 ,0.4972506 ,0.4377710 ,0.25340210 + ,0.24923515 ,0.4525542 ,0.4994334 ,0.4397457 ,0.25554261 + ,0.25107232 ,0.4551608 ,0.5015319 ,0.4416770 ,0.25764589 + ,0.25291533 ,0.4576600 ,0.5035450 ,0.4435684 ,0.25970787 + ,0.25476396 ,0.4600538 ,0.5054743 ,0.4454232 ,0.26172490 + ,0.25661711 ,0.4623474 ,0.5073235 ,0.4472447 ,0.26369385 + ,0.25847267 ,0.4645479 ,0.5090988 ,0.4490365 ,0.26561225 + ,0.26032752 ,0.4666643 ,0.5108083 ,0.4508025 ,0.26747840 + ,0.26217762 ,0.4687065 ,0.5124617 ,0.4525472 ,0.26929144 + ,0.26401815 ,0.4706847 ,0.5140696 ,0.4542750 ,0.27105140 + ,0.26584379 ,0.4726089 ,0.5156436 ,0.4559907 ,0.27275919 + ,0.26764897 ,0.4744884 ,0.5171951 ,0.4576988 ,0.27441663 + ,0.26942825 ,0.4763314 ,0.5187352 ,0.4594038 ,0.27602636 + ,0.27117659 ,0.4781447 ,0.5202743 ,0.4611098 ,0.27759181 + ,0.27288969 ,0.4799335 ,0.5218217 ,0.4628199 ,0.27911708 + ,0.27456420 ,0.4817019 ,0.5233854 ,0.4645368 ,0.28060691 + ,0.27619794 ,0.4834523 ,0.5249719 ,0.4662620 ,0.28206660 + ,0.27779004 ,0.4851863 ,0.5265858 ,0.4679962 ,0.28350186 + ,0.27934099 ,0.4869043 ,0.5282303 ,0.4697390 ,0.28491882 + ,0.28085262 ,0.4886062 ,0.5299067 ,0.4714888 ,0.28632382 + ,0.28232802 ,0.4902914 ,0.5316147 ,0.4732435 ,0.28772339 + ,0.28377144 ,0.4919591 ,0.5333528 ,0.4749997 ,0.28912404 + ,0.28518812 ,0.4936083 ,0.5351181 ,0.4767537 ,0.29053216 + ,0.28658401 ,0.4952382 ,0.5369065 ,0.4785012 ,0.29195381 + ,0.28796561 ,0.4968483 ,0.5387135 ,0.4802377 ,0.29339456 + ,0.28933969 ,0.4984381 ,0.5405341 ,0.4819586 ,0.29485931 + ,0.29071301 ,0.5000073 ,0.5423631 ,0.4836596 ,0.29635210 + ,0.29209211 ,0.5015561 ,0.5441954 ,0.4853366 ,0.29787602 + ,0.29348304 ,0.5030846 ,0.5460263 ,0.4869864 ,0.29943305 + ,0.29489121 ,0.5045932 ,0.5478516 ,0.4886062 ,0.30102400 + ,0.29632114 ,0.5060823 ,0.5496679 ,0.4901943 ,0.30264856 + ,0.29777639 ,0.5075524 ,0.5514723 ,0.4917498 ,0.30430529 + ,0.29925947 ,0.5090041 ,0.5532629 ,0.4932728 ,0.30599175 + ,0.30077178 ,0.5104381 ,0.5550384 ,0.4947644 ,0.30770464 + ,0.30231363 ,0.5118548 ,0.5567981 ,0.4962267 ,0.30943996 + ,0.30388432 ,0.5132549 ,0.5585419 ,0.4976622 ,0.31119330 + ,0.30548226 ,0.5146391 ,0.5602700 ,0.4990746 ,0.31295994 + ,0.30710505 ,0.5160078 ,0.5619830 ,0.5004678 ,0.31473518 + ,0.30874972 ,0.5173619 ,0.5636814 ,0.5018461 ,0.31651448 + ,0.31041287 ,0.5187020 ,0.5653660 ,0.5032140 ,0.31829362 + ,0.31209087 ,0.5200289 ,0.5670370 ,0.5045763 ,0.32006892 + ,0.31378008 ,0.5213431 ,0.5686948 ,0.5059374 ,0.32183726 + ,0.33073053 ,0.5339107 ,0.5844004 ,0.5201736 ,0.33893240 + ,0.34766587 ,0.5455699 ,0.5977995 ,0.5358098 ,0.35618985 + ,0.36497276 ,0.5565563 ,0.6088557 ,0.5512613 ,0.37599206 + ,0.38214131 ,0.5670711 ,0.6182221 ,0.5653348 ,0.39630223 + ,0.39892707 ,0.5774423 ,0.6269864 ,0.5781859 ,0.41294824 + ,0.41477035 ,0.5887255 ,0.6365914 ,0.5904167 ,0.42604441 + ,0.42985193 ,0.6012620 ,0.6472327 ,0.6023352 ,0.43772949 + ,0.44459125 ,0.6137931 ,0.6578100 ,0.6136908 ,0.44944166 + ,0.45830629 ,0.6247817 ,0.6680975 ,0.6238971 ,0.46148163 + ,0.47040113 ,0.6338022 ,0.6784202 ,0.6325018 ,0.47293114 + ,0.48149178 ,0.6415214 ,0.6879435 ,0.6396385 ,0.48317936 + ,0.49230962 ,0.6486243 ,0.6955894 ,0.6462754 ,0.49290670 + ,0.50293366 ,0.6556637 ,0.7014553 ,0.6530889 ,0.50315292 + ,0.51329043 ,0.6629905 ,0.7067582 ,0.6598945 ,0.51404269 + ,0.52322279 ,0.6704942 ,0.7127512 ,0.6664369 ,0.52474488 + ,0.53250776 ,0.6779803 ,0.7196826 ,0.6730249 ,0.53501309 + ,0.54136758 ,0.6852235 ,0.7269392 ,0.6804293 ,0.54550010 + ,0.55057692 ,0.6920138 ,0.7339733 ,0.6891016 ,0.55652673 + ,0.56083373 ,0.6985598 ,0.7407423 ,0.6983890 ,0.56770747 + ,0.57188476 ,0.7051878 ,0.7473342 ,0.7069275 ,0.57851707 + ,0.58258251 ,0.7118141 ,0.7535933 ,0.7139242 ,0.58875425 + ,0.59206406 ,0.7181377 ,0.7592423 ,0.7197399 ,0.59840912 + ,0.60043277 ,0.7241198 ,0.7641102 ,0.7253388 ,0.60737553 + ,0.60824065 ,0.7302008 ,0.7682440 ,0.7313809 ,0.61559579 + ,0.61587328 ,0.7369912 ,0.7720497 ,0.7377497 ,0.62335187 + ,0.62350556 ,0.7444895 ,0.7761923 ,0.7438837 ,0.63120528 + ,0.63130400 ,0.7518477 ,0.7811232 ,0.7494397 ,0.63946844 + ,0.63950683 ,0.7582114 ,0.7868131 ,0.7545253 ,0.64791259 + ,0.64822576 ,0.7634978 ,0.7929559 ,0.7594398 ,0.65618729 + ,0.65718853 ,0.7682302 ,0.7991861 ,0.7643607 ,0.66415285 + ,0.66586026 ,0.7729213 ,0.8051205 ,0.7692685 ,0.67170795 + ,0.67385499 ,0.7777031 ,0.8105010 ,0.7741140 ,0.67868318 + ,0.68116860 ,0.7824831 ,0.8154188 ,0.7789630 ,0.68495291 + ,0.68817020 ,0.7872791 ,0.8202914 ,0.7838794 ,0.69056808 + ,0.69545456 ,0.7921821 ,0.8255185 ,0.7888123 ,0.69590049 + ,0.70339322 ,0.7971795 ,0.8311156 ,0.7937833 ,0.70157680 + ,0.71168363 ,0.8021996 ,0.8366622 ,0.7989904 ,0.70809578 + ,0.71966391 ,0.8071535 ,0.8415997 ,0.8045273 ,0.71550505 + ,0.72706877 ,0.8119058 ,0.8456326 ,0.8101695 ,0.72339718 + ,0.73411394 ,0.8163800 ,0.8489277 ,0.8155690 ,0.73113224 + ,0.74096543 ,0.8207198 ,0.8519149 ,0.8205280 ,0.73821297 + ,0.74754062 ,0.8252428 ,0.8548854 ,0.8251003 ,0.74459805 + ,0.75389357 ,0.8301610 ,0.8578935 ,0.8295502 ,0.75057740 + ,0.76044477 ,0.8353000 ,0.8609909 ,0.8341226 ,0.75637074 + ,0.76755285 ,0.8401622 ,0.8643055 ,0.8388245 ,0.76200276 + ,0.77509023 ,0.8444077 ,0.8678815 ,0.8434774 ,0.76755578 + ,0.78258820 ,0.8482099 ,0.8716303 ,0.8478628 ,0.77338458 + ,0.78956970 ,0.8520035 ,0.8754454 ,0.8518595 ,0.77990334 + ,0.79578357 ,0.8559626 ,0.8792329 ,0.8555642 ,0.78708237 + ,0.80125054 ,0.8599094 ,0.8828409 ,0.8592369 ,0.79436547 + ,0.80604156 ,0.8636647 ,0.8861239 ,0.8631591 ,0.80125859 + ,0.81021918 ,0.8672377 ,0.8890983 ,0.8674684 ,0.80774304 + ,0.81409451 ,0.8707223 ,0.8919124 ,0.8719953 ,0.81404504 + ,0.81820723 ,0.8742131 ,0.8946943 ,0.8764030 ,0.82035059 + ,0.82291940 ,0.8778013 ,0.8975358 ,0.8804936 ,0.82673907 + ,0.82816550 ,0.8815756 ,0.9005363 ,0.8842174 ,0.83309877 + ,0.83358635 ,0.8855560 ,0.9037186 ,0.8875872 ,0.83911146 + ,0.83888900 ,0.8896385 ,0.9070040 ,0.8907332 ,0.84454564 + ,0.84407903 ,0.8937433 ,0.9103602 ,0.8937858 ,0.84960856 + ,0.84924411 ,0.8979929 ,0.9137611 ,0.8967297 ,0.85480217 + ,0.85426493 ,0.9025422 ,0.9170020 ,0.8996128 ,0.86038748 + ,0.85905395 ,0.9072085 ,0.9198988 ,0.9027462 ,0.86618304 + ,0.86383966 ,0.9115513 ,0.9226146 ,0.9064442 ,0.87189147 + ,0.86878464 ,0.9153553 ,0.9254626 ,0.9106255 ,0.87743613 + ,0.87369919 ,0.9187912 ,0.9285133 ,0.9149145 ,0.88292991 + ,0.87848296 ,0.9221554 ,0.9315980 ,0.9190752 ,0.88839609 + ,0.88333342 ,0.9255519 ,0.9345754 ,0.9230635 ,0.89375641 + ,0.88852444 ,0.9289291 ,0.9374606 ,0.9268703 ,0.89911378 + ,0.89422273 ,0.9325025 ,0.9403301 ,0.9305157 ,0.90464514 + ,0.90022856 ,0.9367256 ,0.9431860 ,0.9339928 ,0.91036426 + ,0.90634711 ,0.9416530 ,0.9460100 ,0.9373097 ,0.91620446 + ,0.91250898 ,0.9465501 ,0.9489720 ,0.9406272 ,0.92204347 + ,0.91826744 ,0.9504197 ,0.9522643 ,0.9440552 ,0.92778824 + ,0.92342137 ,0.9530621 ,0.9556061 ,0.9473789 ,0.93323589 + ,0.92839321 ,0.9551899 ,0.9585219 ,0.9505219 ,0.93796702 + ,0.93344816 ,0.9574912 ,0.9611978 ,0.9538301 ,0.94208767 + ,0.93844307 ,0.9600012 ,0.9642909 ,0.9574326 ,0.94613939 + ,0.94305951 ,0.9625902 ,0.9679124 ,0.9608894 ,0.95006010 + ,0.94732447 ,0.9652668 ,0.9714955 ,0.9636651 ,0.95357045 + ,0.95204731 ,0.9681020 ,0.9746738 ,0.9659957 ,0.95718333 + ,0.95754260 ,0.9712236 ,0.9775883 ,0.9688384 ,0.96124457 + ,0.96262769 ,0.9744829 ,0.9802432 ,0.9724819 ,0.96510323 + ,0.96744559 ,0.9773397 ,0.9826635 ,0.9763755 ,0.96890250 + ,0.97198841 ,0.9802640 ,0.9855485 ,0.9794530 ,0.97335289 + ,0.97581215 ,0.9841504 ,0.9886425 ,0.9820909 ,0.97807428 + ,0.98074122 ,0.9873238 ,0.9909320 ,0.9853537 ,0.98318562 + ,0.98656778 ,0.9900431 ,0.9927363 ,0.9882823 ,0.98878283 + ,0.99129231 ,0.9932818 ,0.9951413 ,0.9923500 ,0.99385907 + ,0.99566295 ,0.9970155 ,0.9968195 ,0.9963426 ,0.99728643), + byrow=TRUE,ncol=5) + + if(is.null(PV)){ + if(!com.p.dist){ +if(p==3)rem=P3 +if(p==4)rem=P4 +if(p==5)rem=P5 +if(p==6)rem=P6 +}} +est=NA +for(j in 1:p)est[j]=corfun(x[,j],y)$cor +#id=which(est==max(est)) +R=order(est,decreasing=TRUE) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +# If you use corfun=scor, set plotit=F +# +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(MC){ +library(parallel) +bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun,...) +} +if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun,...) +output=matrix(NA,nrow=pm1,ncol=9) +dimnames(output)=list(NULL,c('IV.1','IV.2','Est.1', +'Est.2','Dif','ci.low','ci.hi','p.value','adj.p.value')) +mat=matrix(NA,nrow=nboot,ncol=p) +ihi<-floor((1-alpha/2)*nboot+.5) +ilow<-floor((alpha/2)*nboot+.5) +for(i in 1:nboot)mat[i,]=bvec[[i]] +for(j in 2:p){ +k=j-1 +output[k,1]=R[k] +output[k,2]=R[j] +output[k,3]=est[R[k]] +output[k,4]=est[R[j]] +bsort<-sort(mat[,R[k]]-mat[,R[j]]) +output[k,5]=est[R[k]]-est[R[j]] +output[k,6]=bsort[ilow] +output[k,7]=bsort[ihi] +pv=mean(bsort<0)+.5*mean(bsort==0) +output[k,8]=2*min(c(pv,1-pv)) +flag=output[k,8]>=rem[,k] +ID=which(flag==TRUE) +ic=max(ID,1) +output[k,8]=L[ic] +} +output[,9]=p.adjust(output[,8],method=method) +list(results=output) +} + + + + +corREGorder.crit<-function(p,n,corfun=wincor,iter=1000,nboot=1000,SEED=TRUE,MC=FALSE,pr=TRUE,...){ +# +# Estimate null distribution of the p-values for corREGorde +# +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +if(pr)print('Execution time might take several minutes') +pm1=p-1 +rem=matrix(NA,iter,pm1) +p1=p+1 +for(I in 1:iter){ +x=rmul(n,p) +y=rnorm(n) +est=NA +for(j in 1:p)est[j]=corfun(x[,j],y)$cor +R=order(est,decreasing=TRUE) +# +# If you use corfun=scor, set plotit=F +# +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(MC){ +library(parallel) +bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun,...) +} +if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun,...) +output=matrix(NA,nrow=pm1,ncol=8) +dimnames(output)=list(NULL,c('IV','Larges.Est','Est.2','Dif','ci.low','ci.hi','p.value','adj.p.value')) +mat=matrix(NA,nrow=nboot,ncol=p) +ihi<-floor((1-alpha/2)*nboot+.5) +ilow<-floor((alpha/2)*nboot+.5) +for(i in 1:nboot)mat[i,]=bvec[[i]] +for(j in 2:p){ +k=j-1 +bsort<-sort(mat[,R[k]]-mat[,R[j]]) +pv=mean(bsort<0)+.5*mean(bsort==0) +rem[I,k]=2*min(c(pv,1-pv)) +} +} +rem +} + +corCOM.DVvsIV.crit<-function(p,n,corfun=wincor,iter=1000,nboot=500,SEED=TRUE,MC=FALSE,...){ +# +# Null p-value distribution for corCOM.DVvsIV +# +# p: number of independent variables +# n: sample size +# +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +pm1=p-1 +rem=matrix(NA,iter,pm1) +p1=p+1 +for(I in 1:iter){ +x=rmul(n,p) +y=rnorm(n) +est=NA +for(j in 1:p)est[j]=corfun(x[,j],y)$cor +id=which(est==max(est)) +R=order(est,decreasing=TRUE) +# +# If you use corfun=scor, set plotit=F +# +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(MC){ +library(parallel) +bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun,...) +} +if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun,...) +mat=matrix(NA,nrow=nboot,ncol=p) +ihi<-floor((1-alpha/2)*nboot+.5) +ilow<-floor((alpha/2)*nboot+.5) +for(i in 1:nboot)mat[i,]=bvec[[i]] +for(j in 2:p){ +k=j-1 +bsort<-sort(mat[,R[1]]-mat[,R[j]]) +pv=mean(bsort<0)+.5*mean(bsort==0) +rem[I,k]=2*min(c(pv,1-pv)) +} +} +rem +} + +rexgauss<-function(n,mu=0,sigma=1,rate=1){ +# +# Generate data from an Ex-Gaussian distribution +# +x=rnorm(n,mean=mu,sd=sigma) +y=rexp(n,rate=rate) +z=x+y +z +} + + + +t1way.EXES.ci<-function(x,alpha=.05,tr=0,nboot=500,SEED=TRUE,ITER=5,adj=TRUE,...){ +# +# Confidence interval for explanatory measure of effect size +# +# ITER: yuenv2, for unequal sample sizes. iterates to get estimate +# +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x=listm(x) +J=length(x) +n=lapply(x,length) +if(SEED)set.seed(2) +chk=t1wayv2(x,tr=tr,SEED=FALSE) +v=list() +val=NA +x=elimna(x) +for(i in 1:nboot){ +for(j in 1:J)v[[j]]=sample(x[[j]],replace=TRUE) +val[i]=t1wayv2(v,tr=tr,nboot=ITER,SEED=FALSE)$Effect.Size +} +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +val=sort(val) +ci=val[ilow] +ci[2]=val[ihi] +if(chk$p.value>alpha)ci[1]=0 +if(adj){ +fix=c(1, 1.268757, 1.467181, 1.628221, 1.763191, 1.856621, 1.993326) +if(J>8)print('No adjustment available when J>8') +J1=J-1 +if(j<=8){ +chk$Effect.Size=fix[J1]*chk$Effect.Size +ci=fix[J1]*ci +} +} +list(Effect.Size=chk$Effect.Size,ci=ci) +} + + +KMSmcp.ci<-function(x,tr=.2,alpha=0.05,SEED=TRUE,nboot=500,CI=TRUE,method='hoch'){ +# +# Estimate KMS effect size when comparing all +# pairs of groups in a one-way (independent) groups design +# +# CI=TRUE: confidence intervals for the measure of effect size are computed. +# +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J=length(x) +Jall=(J^2-J)/2 +con1=con1way(J) +output=matrix(NA,nrow=Jall,ncol=7) +dimnames(output)=list(NULL,c('Group','Group','Effect.Size','low.ci','up.ci','p.value','p.adjust')) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j0))Best=output[id,2] +if(flag==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Select.Best.p.value','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Select.Best.p.value=pv,Larger.than=Best,n=n,output=output) +put +} + +deplin.ES.summary.CI<-function(x,con=NULL,tr=.2,REL.MAG=NULL,SEED=TRUE,nboot=1000){ +# +# For J dependent variables, +# compute four measures of effect size based on a linear contrast of the J variables specified by the argument +# con +# +# Generalizes dep.ES.summary.CI +# Example: +# If x is a matrix with two columns and con=c(1,-1), get the same results as dep.ES.summary.CI +# +# By default, do all pairwise comparisons +# +# Measures of effect size: +# +# AKP: robust standardized difference similar to Cohen's d +# QS: Quantile shift based on the median of the distribution of difference scores, +# QStr: Quantile shift based on the trimmed mean of the distribution of X-Y +# SIGN: P(X1){ +temp<-c(temp,x[[flag]]) +}} +data[[k]]<-temp +} +POOLED<-rmmcp(data,con=con,tr=tr,alpha=alpha,dif=dif,hoch=hoch) +PSI=NULL +A=NULL +} +if(!pool){ +A=list() +PSI=list() +mat<-matrix(c(1:JK),ncol=K,byrow=TRUE) +for(j in 1:J){ +data<-list() +ic<-0 +for(k in 1:K){ +ic<-ic+1 +data[[ic]]<-x[[mat[j,k]]] +} +temp=rmmcp(data,con=con,tr=tr,alpha=alpha,dif=dif,hoch=hoch) +A[[j]]=temp$test +PSI[[j]]=temp$psihat +POOLED=NULL +}} +list(TESTS.4.EACH.LEVEL.OF.A=A,PSIHAT.4.EACH.LEVEL.OF.A=PSI,POOLED.RESULTS=POOLED) +} + + +in.interval<-function(x,low,up){ +# +# flag values in the vector x between low and up +# +x=elimna(x) +n=length(x) +id=rep(FALSE,n) +flag1=x<=up +flag2=x>=low +flag=flag1*flag2 +id[flag]=TRUE +id +} + + +wwlin.es<-function(J,K,x,tr = 0.2, REL.MAG = NULL, SEED = TRUE, nboot = 1000){ +# +# # For within-by-within +# +# Effect sizes based on linear sum of the random variables. +# Simplest case, compute effect sizes on x-y, difference scores +# +con=con2way(J,K) +A=deplin.ES.summary.CI(x,con=con$conA,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +B=deplin.ES.summary.CI(x,con=con$conB,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +AB=deplin.ES.summary.CI(x,con=con$conAB,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +list(Factor.A=A,Factor.B=B,Interactions=AB) +} + +wwwlin.es<-function(J,K,L,x,tr = 0.2, REL.MAG = NULL, SEED = TRUE, nboot = 1000){ +# +# For within-by-within-by-within +# +# Effect sizes based on linear sum of the random variables. +# Simplest case, compute effect sizes based on x-y, difference scores +# +con=con3way(J,K,L) +A=deplin.ES.summary.CI(x,con=con$conA,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +B=deplin.ES.summary.CI(x,con=con$conB,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +C=deplin.ES.summary.CI(x,con=con$conC,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +AB=deplin.ES.summary.CI(x,con=con$conAB,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +AC=deplin.ES.summary.CI(x,con=con$conAC,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +BC=deplin.ES.summary.CI(x,con=con$conBC,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +ABC=deplin.ES.summary.CI(x,con=con$conABC,tr=tr,SEED=SEED,REL.MAG=REL.MAG,nboot=nboot) +list(Factor.A=A,Factor.B=B,Factor.C=C,Inter.AB=AB,Inte.AC=AC,Inter.BC=BC,Inter.ABC=ABC) +} + +bwwA.es<-function(J,K,L,x,fun=QSci,nboot=1000,...){ +# +# For every two levels of Factor A, compute effect size +# and do this for each +# level of Factors B and C. +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +JKL=J*K*L +KL=K*L +id=matrix(c(1:JKL),ncol=KL,byrow=TRUE) +con=con.all.pairs(J) +A=list() +N=(J^2-J)/2 +for(i in 1:N){ +w=which(con[,i]!=0) +isel=id[w,] +A[[i]]=bwwA.es.sub(2,K,L,x[isel],fun=fun,nboot=nboot,...) +} +list(A=A,con=con) +} + + + +bwwA.es.sub<-function(J,K,L,x,fun=QSci,nboot=1000,...){ +# +# Effect sizes for the between factor, computed +# for each level of the within factors +# +if(J!=2)stop('Must have J=2') +JKL=J*K*L +KL=K*L +ic=0 +LOW=1 +UP=1+KL +id=matrix(c(1:JKL),ncol=KL,byrow=TRUE) +ES=matrix(NA,nrow=KL,ncol=7) +for(k in 1:K){ +for(l in 1:L){ +ic=ic+1 +ES[ic,1]=k +ES[ic,2]=l +isel=id[,ic] +d=IND.PAIR.ES(x[id[,ic]],fun=fun,...)$effect.size +temp=c(d[[1]]$n1,d[[1]]$n2,d[[1]]$Q.Effect,d[[1]][4]$ci) +ES[ic,3:7]=temp +}} +dimnames(ES)=list(NULL,c('B.Level','C.Level','n1','n2','Efect.Size','ci.low','ci.up')) +ES +} + +anc.ES.sum<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,pts=NA,SEED=TRUE,nboot=1000, +pr=TRUE,xout=FALSE,outfun=out, nmin=12,NULL.V = c(0, 0, 0.5, 0.5, 0.5, 0), REL.M = NULL, n.est = 1e+06,...){ +# +# +# For each point where the regression lines are compared, +# compute several measures of effect size via the R function ESsummary.CI +# +# Results for the ith point are returned in ES.4.Each.pt[[i]] +# +# +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +A=list() +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +FLAG=TRUE +if(is.na(pts[1])){ +FLAG=FALSE +npt<-5 +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=nmin]) +isub[5]<-max(sub[vecn>=nmin]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +pts=NA +n1=NA +n2=NA +for (i in 1:5){ +g1<-y1[near(x1,x1[isub[i]],fr1)] +g2<-y2[near(x2,x1[isub[i]],fr2)] +pts[i]=x1[isub[i]] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +n1[i]=length(g1) +n2[i]=length(g2) +A[[i]]=ES.summary.CI(g1,g2,tr=tr,SEED=SEED,alpha=alpha,nboot=nboot,NULL.V=NULL.V, REL.M =REL.M,n.est=n.est) +}} +if(FLAG){ +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +for (i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +g1<-g1[!is.na(g1)] +g2<-g2[!is.na(g2)] +A[[i]]=ES.summary.CI(g1,g2,tr=tr,SEED=SEED,alpha=alpha,nboot=nboot,NULL.V=NULL.V, REL.M =REL.M,n.est=n.est) +}} +list(n1=n1,n2=n2,pts=pts,ES.4.Each.pt=A) +} + +Dancova.ES.sum<-function(x1,y1,x2=x1,y2,fr1=1,fr2=1,tr=.2,alpha=.05,pts=NA,xout=FALSE,outfun=out, +REL.MAG=NULL, SEED=TRUE,nboot=1000,...){ +# +# Compute measures of effect size based on difference scores. +# This is done for each covariate value where the regression lines are compared as indicated the the argument +# pts +# This is done via the R function dep.ES.summary.CI +# +# No parametric assumption is made about the form of +# the regression lines--a running interval smoother is used. +# +# Assume data are in x1 y1 x2 and y2 +# +A=list() +N=NA +if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x1)!=length(x2))stop('x1 and y2 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +if(length(y1)!=length(y2))stop('y1 and y2 have different lengths') +xy=elimna(cbind(x1,y1,x2,y2)) +x1=xy[,1] +y1=xy[,2] +x2=xy[,3] +y2=xy[,4] +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +FLAG=TRUE +n=length(y1) +ivals=c(1:n) +if(is.na(pts[1])){ +FLAG=FALSE +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=12]) +isub[5]<-max(sub[vecn>=12]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +for (i in 1:5){ +t1=near(x1,x1[isub[i]],fr1) +t2=near(x2,x1[isub[i]],fr2) +iv1=ivals[t1] +iv2=ivals[t2] +pick=unique(c(iv1,iv2)) +N[i]=length(pick) +pts[i]=x1[isub[i]] +A[[i]]=dep.ES.summary.CI(y1[pick],y2[pick], tr=tr, alpha=alpha, REL.MAG=REL.MAG, SEED=SEED,nboot=nboot) +}} +if(FLAG){ +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +npts=length(pts) +for(i in 1:npts){ +t1=near(x1,pts[i],fr1) +t2=near(x2,pts[i],fr2) +iv1=ivals[t1] +iv2=ivals[t2] +pick=unique(c(iv1,iv2)) +N[i]=length(pick) +A[[i]]=dep.ES.summary.CI(y1[pick],y2[pick], tr=tr, alpha=alpha, REL.MAG=REL.MAG, SEED=SEED,nboot=nboot) +}} +list(n=N,pts=pts,ES.4.Each.pt=A) +} + +pool.fun<-function(J,K,x){ +# +# x is assumed to have list mode. +# +# For a between-by-within design +# For data in list mode, pool the data +# over Factor A (between) +# and store in a new variable have list model with length K +# +# That is, ignore levels of A +# +JK=J*K +imat=matrix(c(1:JK),ncol=K,byrow=TRUE) +B=list() +for(k in 1:K){ +id=imat[,k] +B[[k]]=as.vector(matl(x[id])) +} +B +} + +spmcpbA<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),dif=TRUE,alpha=.05, +nboot=NA,SEED=TRUE,...){ +# +# For each level of Factor A +# use a percentile bootstrap for all pairwise +# multiple comparisons +# among dependent groups in a split-plot design +# +# +# If dif=T, the analysis is done based on all pairs +# of difference scores. +# Otherwise, marginal measures of location are used. +# +# The R variable x is assumed to contain the raw +# data stored in list mode or in a matrix. +# If in list mode, x[[1]] contains the data +# for the first level of both factors: level 1,1. +# x[[2]] is assumed to contain the data for level 1 of the +# first factor and level 2 of the second: level 1,2 +# x[[K]] is the data for level 1,K +# x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc. +# +# If the data are in a matrix, column 1 is assumed to +# correspond to x[[1]], column 2 to x[[2]], etc. +# +# When in list mode x is assumed to have length JK, the total number of +# groups being tested, but a subset of the data can be analyzed +# using grp +# + + if(is.matrix(x) || is.data.frame(x)) { + y <- list() + for(j in 1:ncol(x)) + y[[j]] <- x[, j] + x <- y +} +JK<-J*K +data<-list() +for(j in 1:length(x)){ +data[[j]]<-x[[grp[j]]] # Now have the groups in proper order. +} +x<-data +A=list() +imat=matrix(c(1:JK),nrow=J,byrow=TRUE) +for(j in 1:J){ +# Now call function rmmcppb to do the analysis +id=imat[j,] +A[[j]]<-rmmcppb(x[id],est=est,pr=FALSE,nboot=nboot,dif=dif,alpha=alpha,plotit=FALSE,SEED=SEED,...) +} +list(A.Level=A) +} + + +anc.grid<-function(x1,y1,x2,y2, alpha=.05, +#IV=c(1,2), +Qsplit1=.5,Qsplit2=.5, SV1=NULL,SV2=NULL, +tr=.2,PB=FALSE,est=tmean,nboot=1000,CI=FALSE, +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Two independent groups. +# Split on two independent variables based on data in x1. Compare the corresponding regions +# +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# +# Alternatively, the data can be split based in values stored in the arguments +# SV1 and SV2. +# + +# Then test the hypothesis of equal measures of location +# IV[1]: indicates the column containing the first independent variable to use. +# IV[2]: indicates the column containing the second independent variable to use. +# +# if(length(unique(y)>2))stop('y should be binary') +p=ncol(x1) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +if(p!=ncol(x2))stop('x2 and x1 do not have the same of variables, ncol(x1)!=ncol(x2)') + +if(ncol(x1) != 2 || ncol(x2) !=2)stop('Should have two covariates') + +xy1<-elimna(cbind(x1,y1)) +x1<-xy1[,1:p] +y1<-xy1[,p1] + +xy2<-elimna(cbind(x2,y2)) +x2<-xy2[,1:p] +y2<-xy2[,p1] +ES=list() +if(xout){ +flag<-outfun(x1,plotit=FALSE)$keep +x1<-x1[flag,] +y1<-y1[flag] +flag<-outfun(x2,plotit=FALSE)$keep +x2<-x2[flag,] +y2<-y2[flag] +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +if(!is.null(SV1))J=length(SV1)+1 +if(!is.null(SV2))K=length(SV2)+1 + +JK=J*K +MAT=matrix(1:JK,J,K,byrow=TRUE) +z=list() +group=list() +N.int=J +N.int2=K + +NG=N.int*N.int2 +GRID=matrix(NA,NG,9) +GI=matrix(NA,NG,4) # grid intervals +L1=NULL +L2=NULL +qv=quantile(x1[,1],Qsplit1) +if(!is.null(SV1))qv=SV1 +qv=c(min(x1[,1]),qv,max(x1[,1])) +qv2=quantile(x2[,2],Qsplit2) +if(!is.null(SV2))qv2=SV2 +qv2=c(min(x2[,2]),qv2,max(x2[,2])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub1.1=binmat(xy1,1,qv[j],qv[j1]) # split, group 1 +xsub1.2=binmat(xy2,1,qv[j],qv[j1]) #split, group 2 +for(k in 1:N.int2){ +k1=k+1 +xsub2.1=binmat(xsub1.1,2,qv2[k],qv2[k1]) +xsub2.2=binmat(xsub1.2,2,qv2[k],qv2[k1]) +ic=ic+1 +if(length(xsub2.1[,3])<=7 || length(xsub2.2[,3])<=7)print('Not enough data in one or more grids') +GI[ic,]=c(qv[j],qv[j1],qv2[k],qv2[k1]) + +if(length(xsub2.1[,3])>7 || length(xsub2.2[,3])>7){ +a=yuen(xsub2.1[,3],xsub2.2[,3],tr=tr,alpha=alpha) +a=pool.a.list(a) +a=a[c(1:4,8,5:7)] +if(PB){ +pbv=trimpb2(xsub2.1[,3],xsub2.2[,3],tr=tr,alpha=alpha,nboot=nboot) +pbv=pool.a.list(pbv) +a[6:8]=pbv[c(2,3,1)] +} +GRID[ic,1:8]=a[1:8] +if(!CI)ES[[ic]]=ES.summary(xsub2.1,xsub2.2,tr=tr) +if(CI)ES[[ic]]=ES.summary.CI(xsub2.1,xsub2.2,tr=tr) +}} +} +dimnames(GI)=list(NULL,c('Int.1.low','Int.1.up','Int.2.low','Int.2.up')) +GRID[,9]=p.adjust(GRID[,8],method='hoch') +dimnames(GRID)=list(NULL,c('n1','n2','est.1','est.2','dif','ci.low','ci.up','p.value','adj.p.value')) +list(GRID.INTERVALS=GI,GRID=GRID, Effect.Sizes=ES) +} + +anc.grid.bin<-function(x1,y1,x2,y2, alpha=.05,method='KMS', +Qsplit1=.5,Qsplit2=.5, SV1=NULL,SV2=NULL, +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Two independent groups. +# Split on two independent variables based on data in x1. Compare the corresponding regions +# +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# +# The argument method can be 'KMS, 'SK' or 'ECP' +# See the 5th edition of Wilcox, Intro to Robust Estimation and Hypothesis Testing +# details. +# +# Alternatively, the data can be split based in values stored in the arguments +# SV1 and SV2. +# +if(identical(method,'ZHZ'))stop('Argument method should be KMS, SK or ECP') +if(length(unique(y1))>2)stop('y1 should be binary') +if(length(unique(y2))>2)stop('y2 should be binary') +p=ncol(x1) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +if(p!=ncol(x2))stop('x2 and x1 do not have the same of variables, ncol(x1)!=ncol(x2)') +if(ncol(x1) != 2 || ncol(x2) !=2)stop('Should have two covariates') +xy1<-elimna(cbind(x1,y1)) +x1<-xy1[,1:p] +y1<-xy1[,p1] +xy2<-elimna(cbind(x2,y2)) +x2<-xy2[,1:p] +y2<-xy2[,p1] +if(xout){ +flag<-outfun(x1,plotit=FALSE)$keep +x1<-x1[flag,] +y1<-y1[flag] +flag<-outfun(x2,plotit=FALSE)$keep +x2<-x2[flag,] +y2<-y2[flag] +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +if(!is.null(SV1))J=length(SV1)+1 +if(!is.null(SV2))K=length(SV2)+1 +JK=J*K +MAT=matrix(1:JK,J,K,byrow=TRUE) +z=list() +group=list() +N.int=J +N.int2=K +NG=N.int*N.int2 +GRID=matrix(NA,NG,9) +GI=matrix(NA,NG,4) # grid intervals +L1=NULL +L2=NULL +qv=quantile(x1[,1],Qsplit1) +if(!is.null(SV1))qv=SV1 +qv=c(min(x1[,1]),qv,max(x1[,1])) +qv2=quantile(x2[,2],Qsplit2) +if(!is.null(SV2))qv2=SV2 +qv2=c(min(x2[,2]),qv2,max(x2[,2])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub1.1=binmat(xy1,1,qv[j],qv[j1]) # split, group 1 +xsub1.2=binmat(xy2,1,qv[j],qv[j1]) #split, group 2 +for(k in 1:N.int2){ +k1=k+1 +xsub2.1=binmat(xsub1.1,2,qv2[k],qv2[k1]) +xsub2.2=binmat(xsub1.2,2,qv2[k],qv2[k1]) +ic=ic+1 +if(length(xsub2.1[,3])<=7 || length(xsub2.2[,3])<=7)print('Not enough data in one or more grids') +GI[ic,]=c(qv[j],qv[j1],qv2[k],qv2[k1]) + +if(length(xsub2.1[,3])>7 || length(xsub2.2[,3])>7){ +a=binom2g(sum(xsub2.1[,3]),length(xsub2.1[,3]), +sum(xsub2.2[,3]),length(xsub2.2[,3]), method=method,alpha=alpha) +if(identical(method,'KMS')){ +a=pool.a.list(a) +#print(a) +a=c(length(xsub2.1[,3]),length(xsub2.2[,3]),a[c(3:5,1:2,6)]) +} +if(identical(method,'SK')){ +a=c(length(xsub2.1[,3]),length(xsub2.2[,3]),a$p1,a$p2,a$p1-a$p2,NA,NA,a$p.value) +} +if(identical(method,'ECP')){ +a=c(length(xsub2.1[,3]),length(xsub2.2[,3]),a$output[1,3:8]) +} +GRID[ic,1:8]=a[1:8] +}} +} +dimnames(GI)=list(NULL,c('Int.1.low','Int.1.up','Int.2.low','Int.2.up')) +GRID[,9]=p.adjust(GRID[,8],method='hoch') +dimnames(GRID)=list(NULL,c('n1','n2','est.1','est.2','dif','ci.low','ci.up','p.value','adj.p.value')) +list(GRID.INTERVALS=GI,GRID=GRID) +} + +best.cell.sub<-function(x,alpha=.05,LARGEST=TRUE,method='AC',AUTO=FALSE){ +# +# For a multinomial distribution, can a decision be made +# about which cell has the highest probability +# +# x Assumed to contain the cell frequencies +# +x=elimna(x) +n=sum(x) +NCELL=length(x) +NCm1=NCELL-1 +xor=order(x,decreasing = LARGEST) +output=NA +ic=0 +for(j in 2:NCELL){ +ic=ic+1 +output[ic]=cell.com.pv(x,xor[1],xor[j]) +} +output +} + + +cell.com.pv<-function(x,i=1,j=2,method='AC'){ +# +# For a multinomial distribution, compute a confidence interval +# for p_i-p_j, the difference between the probabilities asscoiated with cells i and j +# +# x= cell frequencies +# +n=sum(x) +p1=x[i]/n +p2=x[j]/n +COR=0-sqrt(p1*p1/((1-p1)*(1-p2))) +a=seq(.001,.1,.001) +a=c(a,seq(.1,.99,.01)) +a=rev(a) + +if(x[i]==x[j])pv=1 +if(x[i]!=x[j]){ +for(k in 1:length(a)){ +c2=acbinomci(x[j],n,alpha=a[k])$ci +c1=acbinomci(x[i],n,alpha=a[k])$ci +T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) +T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) +T2=max(c(0,T2)) +T1=max(c(0,T1)) +L=p1-p2-sqrt(T1) +U=p1-p2+sqrt(T2) +pv=a[k] +if(sign(L*U)<0)break +}} +if(n<=35){ +if(x[i]==x[j])pvnew=1 +else{ +pv.up=pv+.1 +anew=seq(pv,pv.up,.01) +for(k in 1:length(anew)){ +c1=binom.conf(x[i],n,AUTO=TRUE,method=method,alpha=anew[k],pr=FALSE)$ci +c2=binom.conf(x[j],n,AUTO=TRUE,method=method,alpha=anew[k],pr=FALSE)$ci +T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) +T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) +T2=max(c(0,T2)) +T1=max(c(0,T1)) +L=p1-p2-sqrt(T1) +U=p1-p2+sqrt(T2) +pvnew=anew[k] +if(sign(L*U)>0)break +}} +pv=pvnew +} +pv +} + + +cell.com<-function(x,i=1,j=2,alpha=.05,AUTO=TRUE,method='AC'){ +# +# For a multinomial distribution, compute a confidence interval +# for p_i-p_j, the difference between the probabilities associated with cells i and j +# +# x= cell frequencies +# +n=sum(x) +c1=binom.conf(x[i],n,AUTO=AUTO,method=method,alpha=alpha,pr=FALSE)$ci +c2=binom.conf(x[j],n,AUTO=AUTO,method=method,alpha=alpha,pr=FALSE)$ci +p1=x[i]/n +p2=x[j]/n +COR=0-sqrt(p1*p1/((1-p1)*(1-p2))) +T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) +T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) +T2=max(c(0,T2)) +T1=max(c(0,T1)) +L=p1-p2-sqrt(T1) +U=p1-p2+sqrt(T2) +list(ci=c(L,U)) +} + +best.cell.crit<-function(N,ncell,LARGEST=TRUE,iter=1000,alpha=.05,SEED=TRUE,AUTO=FALSE){ +# +# +# N sample size +# ncell number of cells +# +if(SEED)set.seed(2) +NCm1=ncell-1 +pv=NA +a=rmultinom(iter,N,rep(1/ncell,ncell)) +pv.mat=apply(a,2,best.cell.sub,AUTO=AUTO,LARGEST=LARGEST) +init=apply(pv.mat,1,qest,alpha) +pv.mat=t(pv.mat) # For simplicity when using extant code related to this function +z=optim(0,anc.best.fun,init=init,iter=iter,rem=pv.mat,Jm1=NCm1, +alpha=alpha,method='Brent',lower=0,upper=1) +p.crit=z$par*init +p.crit +} + +BEST.cell<-function(x,alpha=.05,LARGEST=TRUE,method='AC',p.crit=NULL,AUTO=FALSE,iter=2000,SEED=TRUE,pr=TRUE){ +# +# For a multinomial distribution, can a decision be made about +# about which cell has the highest probability? +# +# PV if specified, is a N by iter matrix of p-values that can be computed via best.cell.crit +# N=number of cells +# x Assumed to contain the cell frequencies +# +if(pr)print('Confidence intervals are based on the critical p-values') +if(SEED)set.seed(2) +x=elimna(x) +n=sum(x) +NCELL=length(x) +NCm1=NCELL-1 +xor=order(x,decreasing = LARGEST) +IND.pv=NA +ic=0 +CI=matrix(NA,nrow=NCm1,ncol=2) +for(j in 2:NCELL){ +ic=ic+1 +IND.pv[ic]=cell.com.pv(x,xor[1],xor[j]) +} +if(is.null(p.crit))p.crit=best.cell.crit(n,NCELL,LARGEST=LARGEST,iter=iter,AUTO=FALSE,SEED=SEED) +output=matrix(NA,nrow=NCm1,8) +output[,1]=rep(x[xor[1]]/n,NCm1) +output[,2]=xor[2:NCELL] +output[,3]=x[xor[2:NCELL]]/n +output[,4]=output[,1]-output[,3] +ic=0 +for(j in 2:NCELL){ +ic=ic+1 +CI=cell.com(x,xor[1],xor[j],AUTO=AUTO,method=method,alpha=p.crit[ic]) +output[ic,5:6]=CI$ci +} +output[,7]=IND.pv +output[,8]=p.crit +dimnames(output)=list(NULL,c('Largest.Est','CELL','Est','Dif','ci.low','ci.up','p.value','p.crit')) +flag=IND.pv<=p.crit +id=output[flag,2] +setClass('BIN',slots=c('Cell.with.largest.estimate','Larger.than','n','output')) +put=new('BIN',Cell.with.largest.estimate=xor[1],Larger.than=id,n=n,output=output) + +if(!LARGEST){ +dimnames(output)=list(NULL,c('Smallest.Est','CELL','Est','Dif','ci.low','ci.up','p.value','p.crit')) +setClass('BIN',slots=c('Cell.with.smallest.estimate','smaller.than','n','output')) +put=new('BIN',Cell.with.smallest.estimate=xor[1],smaller.than=id,n=n,output=output) +} +put +} + + anc.grid.cat<-function(x1,y1,x2,y2, alpha=.05,KMS=FALSE, +Qsplit1=.5,Qsplit2=.5, SV1=NULL,SV2=NULL,pr=TRUE, +xout=FALSE,outfun=outpro){ +# +# Two independent groups. +# Split on two independent variables based on data in x1. Compare the corresponding regions +# +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# +# The argument method can be 'KMS, 'SK' or 'ECP' +# See the 5th edition of Wilcox, Intro to Robust Estimation and Hypothesis Testing +# details. +# +# Alternatively, the data can be split based in values stored in the arguments +# SV1 and SV2. +# +if(!KMS){ +if(pr)print('To get confidence intervals, set KMS=TRUE') +} +p=ncol(x1) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +if(p!=ncol(x2))stop('x2 and x1 do not have the same of variables, ncol(x1)!=ncol(x2)') +if(ncol(x1) != 2 || ncol(x2) !=2)stop('Should have two covariates') +xy1<-elimna(cbind(x1,y1)) +x1<-xy1[,1:p] +y1<-xy1[,p1] +xy2<-elimna(cbind(x2,y2)) +x2<-xy2[,1:p] +y2<-xy2[,p1] +if(xout){ +flag<-outfun(x1,plotit=FALSE)$keep +x1<-x1[flag,] +y1<-y1[flag] +flag<-outfun(x2,plotit=FALSE)$keep +x2<-x2[flag,] +y2<-y2[flag] +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +if(!is.null(SV1))J=length(SV1)+1 +if(!is.null(SV2))K=length(SV2)+1 +JK=J*K +MAT=matrix(1:JK,J,K,byrow=TRUE) +z=list() +group=list() +N.int=J +N.int2=K +NG=N.int*N.int2 +#GRID=matrix(NA,NG,9) +GRID=list() +GI=matrix(NA,NG,4) # grid intervals +L1=NULL +L2=NULL +qv=quantile(x1[,1],Qsplit1) +if(!is.null(SV1))qv=SV1 +qv=c(min(x1[,1]),qv,max(x1[,1])) +qv2=quantile(x2[,2],Qsplit2) +if(!is.null(SV2))qv2=SV2 +qv2=c(min(x2[,2]),qv2,max(x2[,2])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub1.1=binmat(xy1,1,qv[j],qv[j1]) # split, group 1 +xsub1.2=binmat(xy2,1,qv[j],qv[j1]) #split, group 2 +for(k in 1:N.int2){ +k1=k+1 +xsub2.1=binmat(xsub1.1,2,qv2[k],qv2[k1]) +xsub2.2=binmat(xsub1.2,2,qv2[k],qv2[k1]) +ic=ic+1 +if(length(xsub2.1[,3])<=7 || length(xsub2.2[,3])<=7)print('Not enough data in one or more grids') +GI[ic,]=c(qv[j],qv[j1],qv2[k],qv2[k1]) +if(length(xsub2.1[,3])>7 || length(xsub2.2[,3])>7){ +GRID[[ic]]=binband(xsub2.1[,3],xsub2.2[,3],alpha=alpha,KMS=KMS,plotit=FALSE,pr=FALSE) +}}} +dimnames(GI)=list(NULL,c('Int.1.low','Int.1.up','Int.2.low','Int.2.up')) +list(GRID.INTERVALS=GI,GRID=GRID) +} + + +PMD.PCD<-function(n=NULL,delta=.5,x=NULL, tr=.2, SIG=NULL,alpha=.05,p.crit=NULL,iter=5000,SEED=TRUE){ +# +# Which group has the largest measures of location? +# +# Use an indifference zone. Given +# n a vector of sample sizes, determine the +# probability of making a decision and the probability of +# of correct decision given that a decision is made. +# +# Number of groups is length(n) +# +if(is.null(n) & is.null(x))stop('Either n or x must be specified') +if(SEED)set.seed(2) +if(!is.null(x)){ +x=elimna(x) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +est=lapply(x,tmean,tr=tr) +est=as.vector(matl(est)) +ID=which(est==max(est)) +n=as.vector(matl(lapply(x,length))) +} +J=length(n) +if(J<=1)stop('n should have 2 or more values') +if(is.null(p.crit))p.crit=anc.best.crit(J,n,alpha=alpha,tr=tr,iter=iter,SEED=SEED)$fin.crit +z=list() +PMD=0 +PCD=0 +sig=rep(1,J) +if(is.null(SIG)){ +if(!is.null(x)){ +for(j in 1:J)sig[j]=sd(x[[j]]) +}} +if(!is.null(SIG))sig=SIG +for(i in 1:iter){ +for(j in 1:J)z[[j]]=sig[j]*rnorm(n[j]) +z[[1]]=z[[1]]+delta*sig[ID] +pv=anc.best(z,p.crit=p.crit, tr=tr,SEED=F) +if(pv@Larger.than[1]=='All'){ +PMD=PMD+1 +if(pv@Group.with.largest.estimate==1)PCD=PCD+1 +}} +PCD.ci=NA +PMD.ci=binom.conf(PMD,iter,pr=FALSE)$ci +if(PMD>0)PCD.ci=binom.conf(PCD,PMD,pr=FALSE)$ci +PCD=PCD/max(PMD,1) +PMD=PMD/iter +list(PMD=PMD,PMD.ci=PMD.ci,PCD=PCD,PCD.ci=PCD.ci) +} + + +bin.PMD.PCD<-function(n,p,DO=TRUE,alpha=.05,p.crit=NULL,iter=5000,SEED=TRUE){ +# +# Which group has the largest probability of success? +# +# Use an indifference zone. Given +# n a vector of sample sizes, determine the +# probability of making a decision and the probability of +# of correct decision given that a decision is made. +# +# Number of groups is length(n) +# x if specified contain the number of succcess in which case p=mean(x/n) +# + +if(SEED)set.seed(2) +pmax=which(p==max(p)) +J=length(n) +if(!is.null(x))p=rep(mean(x/n),J) +Jm1=J-1 +if(J<=1)stop('n should have 2 or more values') +remp=p +id=which(p==max(p))[1] +if(is.null(p.crit)){ +pv.mat=bin.best.crit(remp,n,iter=iter,SEED=FALSE) +init=apply(pv.mat,2,qest,alpha) +z=optim(0,anc.best.fun,init=init,iter=iter,rem=pv.mat,Jm1=Jm1,alpha=alpha,method='Brent',lower=0,upper=1) +p.crit=z$par*init +} +PMD=0 +PCD=0 +for(i in 1:iter){ +x=rbinom(J,n,p) +if(!DO){ +pv=bin.best(x,n,p.crit=p.crit,SEED=FALSE) +if(pv@Larger.than[1]=='All'){ +PMD=PMD+1 +if(pv@Group.with.largest.estimate==id)PCD=PCD+1 +}} +if(DO){ +a=bin.best.DO(x,n) +if(a$p.value<=alpha){ +PMD=PMD+1 +if(max(a$Est)==pmax)PCD=PCD+1 +}} +} +PCD=PCD/PMD +PMD=PMD/iter +list(PMD=PMD,PCD=PCD) +} + + +WINCOR<-function(x,tr=.2){ +# +# For convenience, compute Winsorized correlation matrix only. +# +a=winall(x,tr=tr)$cor +a +} + + MVECOR<-function(x){ +library(MASS) +val<-cov.mve(x) +val=cov2cor(val$cov) +val +} + +MCDCOR<-function(x){ +library(MASS) +#val<-cov.mcd(x) +val<-DetMCD(x) +val=cov2cor(val$cov) +val +} + +COR.ROB<-function(x,method=c('WIN','PB','skip','mve','mcd','Ken','Spear','BIC'),tr=.2,...){ +# +# +# WIN: Winsorized +# PB: Percentage Bend +# skip: Skipped correlation based on projection-type outlier detection method +# mve: minimum volume ellipsoid +# mcd: minimum covariance determinant +# Ken: Kendall's tau +# Spear: Spearman's rho +# BIC: biweight correlation. +# +type=match.arg(method) +switch(type, + WIN=winall(m=x,tr=tr)$cor, + PB=pball(m=x,...)$pbcorm, + skip=scorall(x=x,...), + mve=MVECOR(x=x), + mcd=MCDCOR(x=x), + Ken=tauall(m=x)$taum, + Spear=spear(x=x)$cor, + BIC=bicovm(x)$mcor, + ) +} + + +COR.PAIR<-function(x,y,method=c('WIN','PB','skip','mve','mcd','Ken','Spear','BIC'),skip.cor=pcor,tr=.2,...){ +# +# For the bivariat case, compute a correlation +# +# WIN: Winsorized +# PB: Percentage Bend +# skip: Skipped correlation based on projection-type outlier detection method +# mve: minimum volume ellipsoid +# mcd: minimum covariance determinant +# Ken: Kendall's tau +# Spear: Spearman's rho +# BIC: biweight correlation. +# +x=cbind(x,y) +type=match.arg(method) +switch(type, + WIN=list(cor=winall(m=x,tr=tr)$cor[1,2]), + PB=list(cor=pball(m=x,...)$pbcorm[1,2]), + skip=list(cor=scor(x=x,corfun=skip.cor)$cor), + mve=list(cor=MVECOR(x=x)[1,2]), + mcd=list(cor=MCDCOR(x=x)[1,2]), + Ken=list(cor=tauall(m=x)$taum[1,2]), + Spear=list(cor=spear(x=x)$cor[1,2]), + BIC=list(cor=bicovm(x)$mcor[1,2]), + ) +} + +bicorM<-function(x){ +a=bicovM(x) +a=cov2cor(a) +a +} + +bicor<-function(x,y){ +a=bicovM(cbind(x,y)) +a=cov2cor(a) +list(cor=a[1,2]) +} + +corregci<-function(x,y,corfun=wincor,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE,...){ +# +# Deals with correlations between some dependent variable y and p independent variables, x +# Compute confidence intervals for correlation coefficients and p-values when testing the +# hypothesis of a zero correlation, +# Also reported are adjusted p-values based on Hochberg's method. +# +# The predictor values are assumed to be in the n by p matrix x. +# The default number of bootstrap samples is nboot=599 +# +# corfun can be any R function that returns a correlation having the form +# the vector corfun$cor. Examples are pbcor and bicor, spear and tau. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +nrem=length(y) +estit=NA +for(j in 1:p)estit[j]=corfun(x[,j],y,...)$cor +nv=length(y) +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,corregci.sub,x,y,corfun=corfun) +#Leverage points already removed. +# bvec is a p by nboot matrix. The first row +# contains the bootstrap correlations for the first IV,, the second row +# contains the bootstrap values for first predictor, etc. +regci<-matrix(0,p,5) +vlabs=NA +for(j in 1:p)vlabs[j]=paste("ind.var",j) +i#vlabs[1:p]=labels(x)[[2]] +dimnames(regci)<-list(vlabs,c("ci.low","ci.up","Estimate","p-value",'Adj.p.value')) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +se<-NA +pvec<-NA +for(i in 1:p){ +bsort<-sort(bvec[i,]) +pvec[i]<-sum(bvec[i,]<0)/nboot #+.5*sum(bvec[i,]==0)/nboot +if(pvec[i]>.5)pvec[i]<-1-pvec[i] +regci[i,1]<-bsort[ilow] +regci[i,2]<-bsort[ihi] +} +regci[,3]=estit +pvec<-2*pvec +regci[,4]=pvec +regci[,5]=p.adjust(pvec,method='hoch') +num.sig=sum(regci[,5]<=alpha) +list(output=regci,n=nrem,num.sig=num.sig) +} + +corregci.sub<-function(isub,x,y,corfun){ +p=ncol(x) +xmat<-matrix(x[isub,],nrow(x),ncol(x)) +e=NA +for(j in 1:p)e[j]=corfun(xmat[,j],y[isub])$cor +e +} + +neg.colM<-function(x,id=NULL){ +# +# Columns of the matrix are mutliplied by -1 +# +x[,id]=x[,id]*-1 +x +} + +mve.cor<-function(x,y){ +xy=cbind(x,y) +a=MVECOR(x=xy)[1,2] +list(cor=a) +} + +mcd.cor<-function(x,y){ +xy=cbind(x,y) +a=MCDCOR(x=xy)[1,2] +list(cor=a) +} + + + +HQreg<-function(x,y,alpha=1,xout=FALSE,method='huber',tau=.5,outfun=outpro,...){ +# +# +# Robust elastic net +# Yi, C. & Huang, J. (2016) Semismooth Newton coordinate descent algorithm for elastic-net penalized +# Huber loss regression and quantile regression. (https://arxiv.org/abs/1509.02957) +# Journal of Computational and Graphical Statistics +# http://www.tandfonline.com/doi/full/10.1080/10618600.2016.1256816 + +# +library(hqreg) +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +a=hqreg(x,y,method=method,alpha=alpha,tau=tau)$beta +list(coef=a[,100]) +} + + +LTS.EN<-function(x,y,xout=FALSE,family='gaussian',alphas=NULL,lambdas=NULL,outfun=outpro,...){ +# +# +# Robust elastic net +# Yi, C. & Huang, J. (2016) Semismooth Newton coordinate descent algorithm for elastic-net penalized +# Huber loss regression and quantile regression. (https://arxiv.org/abs/1509.02957) +# Journal of Computational and Graphical Statistics +# http://www.tandfonline.com/doi/full/10.1080/10618600.2016.1256816 +# +# +library(enetLTS) +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,ncol(x)+1] +temp<-NA +x<-as.matrix(x) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=plotit,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +} +if(!is.null(alphas) & !is.null(lambdas))a=enetLTS(x,y,alphas=alphas,lambdas=lambdas,family=family,plot=FALSE) +if(!is.null(alphas) & is.null(lambdas))a=enetLTS(x,y,alphas=alphas,family=family,plot=FALSE) +if(is.null(alphas) & is.null(lambdas))a=enetLTS(x,y,family=family,plot=FALSE) +list(coef=a[6]$raw.coefficients) +} + +corCOM.PMDPCD<-function(n,p,rho=0,delta=.3,corfun=wincor,LARGEST=TRUE,alpha=.05, +x=NULL,y=NULL,iter=500,pr=TRUE,SEED=TRUE,MC=TRUE,FUN=mean,...){ +# +# Given n and p, the number of explanatory variables, +# determine the probability of making a decision about which independent variable has the +# the largest correlation with the dependent variable +# in the context of an indifference zone +# +# +# All independent variables have a same correlation with dependent variable indicated by the argument +# rho, +# except the first independent variable, which has correlation +# rho+delta. +# Default is rho=0 and delta= .3 +# (Cohen's suggestion for a small medium and large correlation +# are .1, .3 and .5) +# This is designed for situations where the goal is to make a decision about which IV has the largest correlation. +# +# If LARGEST=TRUE; the function default to rho=0 for the first IV and delta for the remaining IVs +# +# Possible alternative choices for corfun include: +# spear +# tau +# pbcor +# bicor +# scor +# mve.cor +# mcd.cor +# +if(pr)print('Execution time can be high') +use.cor=FALSE +if(!is.null(x) & !is.null(y)){ +xy=cbind(y,x) +R=COR.ROB(xy,method=COR.method) +rho=FUN(R[upper.tri(R)]) +n=nrow(x) +p=ncol(x) +use.cor=TRUE +} + +if(rho+delta>1)stop('rho+delta is greater than 1') +if(rho+delta<0-1)stop('rho+delta is less than -1') +if(MC)library(parallel) +if(SEED)set.seed(2) +if(delta<0-1 || delta>1)stop('rho + delta should be between -1 and 1') +PMD=0 +PCD=0 +p1=p+1 +if(LARGEST){ +COV=matrix(rho,p1,p1) +COV[1,2]=COV[2,1]=rho+delta +} +if(!LARGEST){ +COV=matrix(rho+delta,p1,p1) +COV[1,2]=COV[2,1]=rho +} +diag(COV)=1 +if(use.cor)COV=R #Over rule using an indifference zone; use estimate of the correlation matrix +x=list() +for(i in 1:iter)x[[i]]=rmulnorm(n,p1,COV) +if(!MC)a=lapply(x,corCOM.PMDPCD.sub,corfun=corfun,LARGEST=LARGEST,...) +if(MC)a=mclapply(x,corCOM.PMDPCD.sub,corfun=corfun,LARGEST=LARGEST,...) +for(i in 1:iter){ +if(a[[i]]$Conclusion=='Decide'){ +PMD=PMD+1 +if(a[[i]][1]==1)PCD=PCD+1 +}} +PCD.CI=binom.conf(PCD,PMD,alpha=alpha,pr=FALSE)$ci +PMD.CI=binom.conf(PMD,iter,alpha=alpha,pr=FALSE)$ci +PCD=PCD/max(1,PMD) +PMD=PMD/iter +list(PMD=PMD,PMD.CI=PMD.CI, PCD=PCD, PCD.CI=PCD.CI) +} + +corCOM.PMDPCD.sub<-function(x,corfun,LARGEST=LARGEST,...){ +p1=ncol(x) +a=corCOM.DVvsIV(x[,2:p1],x[,1],corfun=corfun,SEED=FALSE,LARGEST=LARGEST,...) +a +} + +RM.PMD.PCD<-function(x,tr=.2,delta=.5,alpha=.05,p.crit=NULL,iter=5000,nboot=500,SEED=TRUE){ +# +# +# Use an indifference zone. Given x +# determine the +# probability of making a decision and the probability of +# of correct decision given that a decision is made +# within the context of an indiffernce zone +# +if(is.list(x))stop('x should be a matrix or a data frame') +PMD=0 +PCD=0 +x=elimna(x) +est=apply(x,2,tmean,tr=tr) +ID=which(est==max(est)) +n=nrow(x) +# First, determine decision rule +# +A=cov(x) +J=ncol(x) +aval=c(seq(.001,.1,.001),seq(.11,.99,.01)) +id=which(aval==alpha) +if(length(id)==0)stop('alpha be one one values .001(.001).1 or 11(.01).99') +p.crit=rmanc.best.crit(x,iter=iter,alpha=alpha,tr=tr,SEED=SEED) +# +# Now simulate indifference zone. +for(i in 1:nboot){ +x=mvrnorm(n,mu=rep(0,J),Sigma=A) +x[,1]=x[,1]+delta*sqrt(A[ID,ID]) +est=apply(x,2,tmean,tr=tr) +id=which(est==max(est)) +a=rmanc.best.ex(x,tr=tr) +if(sum(a<=p.crit)){ +PMD=PMD+1 +if(id==1)PCD=PCD+1 +}} +PMD.ci=binom.conf(PMD,nboot,pr=FALSE)$ci +PCD.ci=binom.conf(PCD,PMD,pr=FALSE)$ci +PCD=PCD/max(PMD,1) +PMD=PMD/nboot +list(PMD=PMD,PMD.ci=PMD.ci,PCD=PCD,PCD.ci=PCD.ci) +} + +RS.LOC.IZ<-function(n,J=NULL,locfun=tmean,delta,iter=10000,SEED=TRUE,...){ +# +# Estimate probability of a correct decision based on an indifference zone +# +if(SEED)set.seed(2) +PCD=0 +if(length(n)==1){ +if(is.null(J))stop('Number of groups, J, was not specified') +n=rep(n,J) +} +if(length(n)>1){ +x=list() +J=length(n) +for(i in 1:iter){ +for(j in 1:J)x[[j]]=rnorm(n[j]) +x[[1]]=x[[1]]+delta +est=lapply(x,locfun,...) +est=as.vector(list2mat(est)) +id=which(est==max(est)) +if(id==1)PCD=PCD+1 +}} +PCD.ci=binom.conf(PCD,iter,pr=FALSE)$ci +PCD=PCD/iter +list(PCD=PCD,PCD.ci=PCD.ci) +} + +dep.dif.fun<-function(x,y,tr=.2,alpha=.05,AUTO=TRUE,PVSD=FALSE,nboot=2000,method=c('TR','TRPB','HDPB','MED','AD','SIGN')){ +# +# +# For two dependent groups, +# compute confidence intervals based on difference scores +# +# TR: trimmed mean Tukey--McLaughlin +# TRPB: trimmed means percentile bootstrap +# MED: median of the difference scores. +#.HDPB: median of the difference scores using Harrell--Davis and a percentile bootstrap +# AD: based on the median of the distribution of x-y, which can differ from the median of the difference scores. +# SIGN: P(X7){ +a=yuend(xsub2.1[,3],xsub2.1[,4],tr=tr,alpha=alpha) +GRID[ic,1]=a$n +GRID[ic,2]=a$est1 +GRID[ic,3]=a$est2 +GRID[ic,4]=a$dif +GRID[ic,5]=a$ci[1] +GRID[ic,6]=a$ci[2] +GRID[ic,7]=a$p.value +if(PB){ +pbv=trimpb2(xsub2.1[,3],xsub2.1[,4],tr=tr,alpha=alpha,nboot=nboot) +GRID[ic,5]=pbv$ci[1] +GRID[ic,6]=pbv$ci[2] +GRID[ic,7]=pbv$p.value +} +}} +if(DIF){ +if(length(xsub2.1[,3])>7){ +d=dep.dif.fun(xsub2.1[,3],xsub2.1[,4],tr=tr,alpha=alpha,method=METHOD,AUTO=AUTO,PVSD=PVSD) +J=which(LAB==METHOD) +d=pool.a.list(d) +GRID[ic,1]=length(xsub2.1[,3]) +GRID[ic,2:5]=d[IZ[J,]] +}} +ES[[ic]]=dep.ES.summary.CI(xsub2.1[,3],xsub2.1[,4],tr=tr) +}} +dimnames(GI)=list(NULL,c('Int.1.low','Int.1.up','Int.2.low','Int.2.up')) +if(DIF){ +dimnames(GRID)=list(NULL,c('n','Est','ci.low','ci.up','p.value','adj.p.value')) +GRID[,6]=p.adjust(GRID[,5],method='hoch') +} +if(!DIF){ +dimnames(GRID)=list(NULL,c('n','est.1','est.2','DIF','ci.low','ci.up','p.value','adj.p.value')) +GRID[,8]=p.adjust(GRID[,7],method='hoch') +} +list(GRID.INTERVALS=GI,GRID=GRID, Effect.Sizes=ES) +} + +comdvar.mcp<-function(x,method='hoch'){ +# +# Compare the variances of J depenent variables. +# Perform all pairwise comparisons using the HC4 extension of the Morgan-Pitman test +# +if(is.null(dim(x)))stop('x should be a matrix or data frame') +J=ncol(x) +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Est. 1','Est 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=comdvar(x[,j],x[,k]) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=a$est1 +output[ic,4]=a$est2 +output[ic,5]=a$est1-a$est2 +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + + + +rmVARcom.mcp<-function(x,est=winvar,alpha=.05,nboot=500,method='hoch',SEED=TRUE){ +# +# Compare the variances of J dependent variables. +# Perform all pairwise comparisons using the HC4 extension of the Morgan-Pitman test +# +if(is.null(dim(x)))stop('x should be a matrix or data frame') +J=ncol(x) +CC=(J^2-J)/2 +output<-matrix(0,CC,9) +dimnames(output)<-list(NULL,c('Var','Var','Est. 1','Est 2','Dif','cilow','ci.up','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=rmVARcom(x[,j],x[,k],est=est,alpha=alpha,nboot=nboot,plotit=FALSE,SEED=SEED) +output[ic,1]=j +output[ic,2]=k +output[ic,3:4]=a$estimate +output[ic,5]=a$dif +output[ic,6]=a$ci[1] +output[ic,7]=a$ci[2] +output[ic,8]=a$p.value +}}} +output[,9]=p.adjust(output[,8],method=method) +output +} + +comvar.mcp<-function(x,method='hoch',SEED=TRUE){ +# +# Compare the variances of J indepenent variables. +# Perform all pairwise comparisons using +# a slight extension of HC4 vesion of the Morgan-Pitman test +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','SD 1','SD 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=varcom.IND.MP(x[[j]],x[[k]],SEED=SEED) +a=pool.a.list(a) +output[ic,1]=j +output[ic,2]=k +output[ic,3:4]=a[1:2] +output[ic,3:4]=sqrt(output[ic,3:4]) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a[3] +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +robVARcom.mcp<-function(x,est=winvar,alpha=.05,nboot=2000,method='hoch',SEED=TRUE){ +# +# Compare the robust variances of J indepenent variables. +# Perform all pairwise comparisons using the HC4 extension of the Morgan-Pitman test +# +if(is.null(dim(x)))stop('x should be a matrix or data frame') +J=ncol(x) +CC=(J^2-J)/2 +output<-matrix(0,CC,9) +dimnames(output)<-list(NULL,c('Var','Var','Est. 1','Est 2','Dif','cilow','ci.up','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=b2ci(x[,j],x[,k],est=est,SEED=SEED,nboot=nboot,alpha=alpha) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=a$est1 +output[ic,4]=a$est2 +output[ic,5]=a$est1-a$est2 +output[ic,6]=a$ci[1] +output[ic,7]=a$ci[2] +output[ic,8]=a$p.value +}}} +output[,9]=p.adjust(output[,8],method=method) +output +} + +oph.ind.comvar<-function(x,y=NULL,method='hommel',invalid=4,SEED=TRUE,STOP=TRUE){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -3 diopters or greater than 3 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare the variances of J independent measures. +# All pairwise comparisons are performed using +# a slight extension of the HC4 vesion of the Morgan-Pitman test +# +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# +# By default, Hochberg's method is used to control the probability of one +# or more TypeI errors +# +if(!is.null(y))x=list(x,y) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +for(j in 1:J){ +flag=abs(elimna(x[[j]]))>invalid +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','SD 1','SD 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=varcom.IND.MP(x[[j]],x[[k]],SEED=SEED) +a=pool.a.list(a) +output[ic,1]=j +output[ic,2]=k +output[ic,3:4]=a[1:2] +output[ic,3:4]=sqrt(output[ic,3:4]) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a[3] +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + + +oph.dep.comvar<-function(x, y=NULL, invalid=4, method='hommel',STOP=TRUE){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -3 diopters or greater than 3 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare the variances of J dependent measures. +# All pairwise comparisons are performed using +# a slight extension of the HC4 version of the Morgan-Pitman test +# Compare the variances of J dependent variables. +# Perform all pairwise comparisons using the HC4 extension of the Morgan-Pitman test + +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# +# By default, Hochberg's method is used to control the probability of one +# or more Type I errors +# +if(!is.null(y))x=cbind(x,y) +if(is.list(x)){ +n=pool.a.list(lapply(x,length)) +if(var(n)!=0)stop('lengths have different values') +x=matl(x) +} +J=ncol(x) +flag=abs(elimna(x))>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +if(STOP)stop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','SD 1','SD 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=comdvar(x[,j],x[,k]) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=a$est1 +output[ic,4]=a$est2 +output[ic,3]=sqrt(a$est1) +output[ic,4]=sqrt(a$est2) +output[ic,5]=sqrt(a$est1)- sqrt(a$est2) +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + + +ancM.COV.ES<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,pts=NULL,xout=FALSE,outfun=outpro,...){ +# +# +# For two or more covariates, estimate effect sizes for +# a collection of points. +# +# That is, for each point of interest, determine +# a cloud of points close to it and based on the +# corresponding y values, compute measures of effect size +# +# If pts=NULL +# the significant points returned by +# ancdetM4 are used +# +p=ncol(x1) +if(p<2)stop('This function is for two or more covariates') +p1=p+1 +if(ncol(x2)!=p)stop('x1 and x2 do not have the same number of columns') +xy=elimna(cbind(x1,y1)) +x1=xy[,1:p] +y1=xy[,p1] +xy=elimna(cbind(x2,y2)) +x2=xy[,1:p] +y2=xy[,p1] +if(min(length(y1),length(y2))<50)stop('The minimum sample size must be greater than or equal to 50') +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag,] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag,] +y2<-y2[flag] +} +guide1=NA +guide2=NA +x1=as.matrix(x1) +x2=as.matrix(x2) +a=ancdetM4(x1=x1,y1=y1,x2=x2,y2=y2,fr1=fr1,fr2=fr2,tr=tr,pts=pts,...) +if(is.null(pts))pts=a$significant.points +M=NA +SML.class=NA +if(!is.null(pts)){ +if(is.vector(pts))pts=matrix(pts,nrow=1) +nr=nrow(pts) +M=matrix(NA,nr,6) +SML.class=matrix(0,nr,6) +m1<-covmcd(x1) +m2<-covmcd(x2) +for(i in 1:nr){ +id1=near3d(x1,pts[i,],fr1,m1) +id2=near3d(x2,pts[i,],fr2,m2) +if(sum(id1)<10 ||sum(id2)<10)print(paste('For point',j,'not enough nearest neighbors')) +if(sum(id1)>=10 & sum(id2)>10){ +ES=ES.summary(y1[id1],y2[id2]) +M[i,]=ES[,1] +}} +dimnames(M)=list(NULL,names(ES[,1])) +dum1=rnorm(50) +dum2=rnorm(50) +guide1=ES.summary(dum1,dum2+3)[,-1] +guide2=ES.summary(dum1,dum2-3)[,-1] +} +if(!is.na(M[1])){ +for(i in 1:nr){ +flag=M[i,] <= guide1[,2] || M[i,]>= guide2[,2] +SML.class[i,flag]=1 +flag=M[i,] <= guide1[,3] || M[i,]>= guide2[,3] +SML.class[i,flag]=2 +flag=M[i,] <= guide1[,4] || M[i,]>= guide2[,4] +SML.class[i,flag]=3 +}} +leg1='0=At most small, 1=between S and M, 2=between M and L, 3=greater than L' +list(ES.REL.MAG.G1.less.than.G2=guide1, ES.REL.MAG.G1.greater.than.G2=guide2,Est=M,legend.4.SML.class=leg1,SML.class=SML.class) +} + +ancDEP.MULC.ES<-function(x1,y1,y2,fr1=1.5,fr2=1.5,tr=.2,pts=NULL,xout=FALSE,outfun=outpro,cov.fun=skip.cov,...){ +# +# +# Dependent groups +# For two or more covariates, estimate effect sizes for +# a collection of points. +# +# That is, for each point of interest, determine +# a cloud of points close to it and based on the +# corresponding y values, compute measures of effect size +# +# If pts=NULL +# the significant points returned by +# ancdetM4 are used +# +library(MASS) +x2=x1 +p=ncol(x1) +if(p<2)stop('This function is for two or more covariates') +p1=p+1 +#if(ncol(x2)!=p)stop('x1 and x2 do not have the same number of columns') +xy=elimna(cbind(x1,y1)) +x1=xy[,1:p] +y1=xy[,p1] +xy=elimna(cbind(x2,y2)) +x2=xy[,1:p] +y2=xy[,p1] +if(min(length(y1),length(y2))<50)stop('The minimum sample size must be greater than or equal to 50') +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag,] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag,] +y2<-y2[flag] +} +guide1=NA +guide2=NA +x1=as.matrix(x1) +x2=as.matrix(x2) +a=Dancovamp(x1=x1,y1=y1,x2=x2,y2=y2,fr1=fr1,fr2=fr2,tr=tr,pts=pts,...) +if(is.null(pts))pts=a$pts +M=NA +SML.class=NA +if(!is.null(pts)){ +if(is.vector(pts))pts=matrix(pts,nrow=1) +nr=nrow(pts) +M=matrix(NA,nr,4) +SML.class=matrix(0,nr,4) +m1<-cov.fun(x1) +m2<-cov.fun(x2) +for(i in 1:nr){ +id1=near3d(x1,pts[i,],fr1,m1) +id2=near3d(x2,pts[i,],fr2,m2) +if(sum(id1)<10 ||sum(id2)<10)print(paste('For point',j,'not enough nearest neighbors')) +if(sum(id1)>=10 & sum(id2)>10){ +ES=dep.ES.summary(y1[id1],y2[id2]) +M[i,]=ES[,2] +}} +dimnames(M)=list(NULL,names(ES[,1])) +dum1=rnorm(50) +dum2=rnorm(50) +guide1=dep.ES.summary(dum1,dum2+3)[,-2] +guide2=dep.ES.summary(dum1,dum2-3)[,-2] +} +if(!is.na(M[1])){ +for(i in 1:nr){ +flag=M[i,] <= guide1[,2] || M[i,]>= guide2[,2] +SML.class[i,flag]=1 +flag=M[i,] <= guide1[,3] || M[i,]>= guide2[,3] +SML.class[i,flag]=2 +flag=M[i,] <= guide1[,4] || M[i,]>= guide2[,4] +SML.class[i,flag]=3 +}} +leg1='0=At most small, 1=between S and M, 2=between M and L, 3=greater than L' +list(ES.REL.MAG.G1.less.than.G2=guide1, ES.REL.MAG.G1.greater.than.G2=guide2,Est=M,legend.4.SML.class=leg1,SML.class=SML.class) +} + +rmESPRO.null<-function(n,J,est=tmean,nboot=2000,SEED=TRUE,...){ + # + # Determine null distribution + # for rmES.pro + # + if(SEED)set.seed(2) + v=NA + for(i in 1:nboot){ + x=rmul(n,J,g=0,h=0,rho=0) +E=apply(x,2,est,...) + GM=mean(E) + GMvec=rep(GM,J) + DN=pdis(x,GMvec,center=E) +# DN=pdis(x,E,center=GMvec) +v[i]=DN +} +v +} + + +rmES.dif.pro<-function(x,est=tmean,...){ +# +# Global measure of effect size, +# based on difference scores, +# relative to the null distribution +# +if(is.list(x))x=matl(x) +x=elimna(x) +n=nrow(x) +n1=n+1 +J=ncol(x) +ALL=(J^2-J)/2 +M=matrix(NA,nrow=n,ncol=ALL) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j0)PCD.ci=binom.conf(PCD,PMD,pr=FALSE)$ci +PCD=PCD/max(PMD,1) +PMD=PMD/iter +list(PMD=PMD,PMD.ci=PMD.ci,PCD=PCD,PCD.ci=PCD.ci) +} + +RMPB.PMD.PCD<-function(x,est=tmean,delta=.5,alpha=.05,iter=500,nboot=1000,SEED=TRUE,...){ +# +# +# Use an indifference zone. Given x +# determine the +# probability of making a decision and the probability of +# of correct decision given that a decision is made +# within the context of an indifference zone +# +if(SEED)set.seed(2) +if(is.list(x))stop('x should be a matrix or a data frame') +PMD=0 +PCD=0 +x=elimna(x) +n=nrow(x) +E=apply(x,2,est,...) +ID=which(E==max(E)) +# +A=cov(x) +J=ncol(x) +# +# Now simulate indifference zone. +for(i in 1:iter){ +x=mvrnorm(n,mu=rep(0,J),Sigma=A) +x[,1]=x[,1]+delta*sqrt(A[ID,ID]) +e=apply(x,2,est,...) +id=which(e==max(e)) +a=rmanc.best.PB(x,est=est,nboot=nboot,SEED=FALSE,...) +if(sum(a$p.value<=alpha)){ +PMD=PMD+1 +if(id==1)PCD=PCD+1 +}} +PMD.ci=binom.conf(PMD,iter,pr=FALSE)$ci +PCD.ci=binom.conf(PCD,PMD,pr=FALSE)$ci +PCD=PCD/max(PMD,1) +PMD=PMD/iter +list(PMD=PMD,PMD.ci=PMD.ci,PCD=PCD,PCD.ci=PCD.ci) +} + + +ID.sm.varPB<-function(x,var.fun=winvar,nboot=500,NARM=FALSE,na.rm=TRUE,SEED=TRUE,...){ +# +# +# Strategy: suppose group 2 has the lowest estimate. +# Generate a bootstrap sample and determine whether +# the lowest bootstrap estimate corresponds to group 2. +# Repeat nboot times and let P denote the proportion of times group 2 has the lowest estimate +# Make a decision if this proportion is sufficiently high. +# P yields a pseudo p-value +# +# +x=elimna(x) +J=ncol(x) +chk=0 +if(is.list(x))x<-matl(x) +if(NARM)x=elimna(x) +e=apply(x,2,var.fun,...) +id=which(e==min(e)) +n=nrow(x) +J=length(x) +for(i in 1:nboot){ +isam=sample(n,replace=TRUE) +b=apply(x[isam,],2,var.fun,na.rm=na.rm,...) +ichk=which(b==min(b)) +if(id==ichk)chk=chk+1 +} +pv=chk/nboot +pv=2*min(pv,1-pv) +list(n=n,Est=e,p.value=pv) +} + +bin.best.DO<-function(x,n){ +# +# Determine whether it is reasonable to +# decide which group has largest probability of success +# +# x= vector number of successes +# n=sample sizes +# +chk=0 +e=x/n +J=length(x) +id=which(e==max(e))[1] +CON=conCON(J,id)$conCON +a=lincon.bin(x,n,con=CON) +pv=max(a$CI[,4]) +list(Est.=e,p.value=pv) +} + + +rplotCIsmm<-function(x,y,tr=.2,fr=.5,plotit=TRUE,scat=TRUE,pyhat=FALSE,SEED=TRUE, +dfmin=2,pts=NULL,npts=25,nmin=12, +eout=FALSE,xout=FALSE,xlab='x',ylab='y',outfun=out,LP=TRUE,MID=TRUE,alpha=.05,pch='.',...){ +# +# Confidence interval for running interval smoother based on a trimmed mean. +# +# rplotCI will provide shorter and more accurate confidence intervals but +# is limited to 10 or 25 points and alpha=.05. +# This functions returns confidence intervals that are generally a bit wider +# but it has low execution time if alpha differs from 0.5 or there is interest +# using something other than 10 or 25 points. +# +# Unlike rplot,a confidence band based on the Studentized maximum modulus dist +# is computed, +# unless alpha is not equal to .05 or the number of confidence intervals +# is greater than npts=28, in which case the distribution of the max of npts +# random variables is used. +# +# LP=TRUE, the plot is further smoothed via lowess +# +# fr controls amount of smoothing +# +xord=order(x) +x=x[xord] +y=y[xord] +if(!is.null(pts))pts=sort(pts) +str=rplot(x,y,tr=tr,xout=xout,plotit=FALSE,LP=LP,fr=fr,pr=FALSE)$Strength.Assoc +m<-cbind(x,y) +if(ncol(m)>2)stop('To get a smooth with more than one covariate, use rplot') +m<-elimna(m) +nv=nrow(m) +if(eout && xout)stop('Not allowed to have eout=xout=T') +if(eout){ +flag<-outfun(m,plotit=FALSE)$keep +m<-m[flag,] +} +if(xout){ +flag<-outfun(m[,1])$keep +m<-m[flag,] +} +if(is.null(pts)){ +res1=ancova(x,y,x,y,pr=FALSE,plotit=FALSE,fr1=fr,fr2=fr,nmin=nmin)$output +pts=seq(res1[1,1],res1[5,1],length.out=npts) +} +x=m[,1] +y=m[,2] +n.keep=length(y) +if(is.null(pts)){ +if(!MID)pts=seq(min(x),max(x),length.out=npts) +vv=idealf(x) +if(MID)pts=seq(vv$ql,vv$qu,length.out=npts) +} +rmd=NA +for(i in 1:length(pts))rmd[i]<-mean(y[near(x,pts[i],fr)],tr=tr) +sedf=runse(x,y,fr=fr,tr=tr,pts=pts,SEED=SEED) +df=sedf$df +flag=df>dfmin +se=sedf$se +ntest=length(df[flag]) +mdif=min(df[flag]) +crit=NA +dfval=df[flag] +for(it in 1:ntest)crit[it]=qsmm(1-alpha,ntest,dfval[it]) +low=rmd[flag]-crit*se[flag] +up=rmd[flag]+crit*se[flag] +ptsall=pts +rmdall=rmd +rmd=rmd[flag] +pts=pts[flag] +if(plotit){ +ord=order(x) +x=x[ord] +y=y[ord] +if(LP){ +rmd=lplot(pts,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +up=lplot(pts,up,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +low=lplot(pts,low,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat +} +plot(c(x,pts),c(y,rmd),xlab=xlab,ylab=ylab,type='n') +if(scat)points(x,y,pch=pch) +lines(pts,up,lty=2) +lines(pts,low,lty=2) +lines(pts,rmd) +} +if(pyhat){output<-cbind(pts,rmd,low,up) +dimnames(output)=list(NULL,c('pts','y.hat','ci.low','ci.up')) +} +if(!pyhat)output<-'Done' +list(output=output,str=str,n=nv,n.keep=n.keep) +} + +corREG.best<-function(x,y,corfun=wincor,alpha=.05,nboot=500, neg.col=NULL,LARGEST=TRUE, SEED=TRUE,MC=FALSE,xout=FALSE,outfun=outpro,...){ +# +# Can a decision be made about which IV +# has the strongest correlation with the DV +# Winsorized correlation is used by default. +# +# x is assumed to be a matrix +# +# +if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') +p=ncol(x) +p1=p+1 +m1=cbind(x,y) +m1<-elimna(m1) # Eliminate rows with missing values +nval=nrow(m1) +x<-m1[,1:p] +y=m1[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x=neg.colM(x,neg.col) +est=NA +for(j in 1:p)est[j]=corfun(x[,j],y)$cor +if(LARGEST)ID=which(est==max(est)) +if(!LARGEST)ID=which(est==min(est)) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +# +# If you use corfun=scor, set plotit=F +# +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +data=listm(t(data)) +if(MC){ +library(parallel) +bvec<-mclapply(data,corCOMmcp_sub,x,y,corfun) +} +IB=NA +if(!MC)bvec<-lapply(data,corCOMmcp_sub,x,y,corfun) +for(i in 1:nboot){ +if(LARGEST)IB[i]=which(bvec[[i]]==max(bvec[[i]])) +if(!LARGEST)IB[i]=which(bvec[[i]]==min(bvec[[i]])) +} +PC=mean(IB==ID) +PC=2*min(PC,1-PC) +list(Est.=est,p.value=PC) +} + +corREG.best.DO=corREG.best + +rung3hatCI<-function(x,y,pts=x,tr=.2,alpha=.05,fr=1,nmin=12,ADJ=FALSE,iter=1000,...){ +# +# Compute y hat for each row of data in the matrix pts +# use a running interval smoother to compute a confidence interval for trimmed mean of Y given X +# +# fr controls amount of smoothing +# tr is the amount of trimming +# x is an n by p matrix of predictors. +# pts is an m by p matrix, m>=1. +# +oldSeed <- .Random.seed +set.seed(12) # So get consistent results from near3d +if(ADJ)alpha=rung3hat.pcrit(x,pts=pts,tr=tr,nmin=nmin,fr=fr,iter=iter) +x=as.matrix(x) +p=ncol(x) +pts=as.matrix(pts) +library(MASS) +m<-cov.mve(x) +rmd<-1 # Initialize rmd +nval<-1 +ci=matrix(NA,nrow=nrow(pts),ncol=2) +x.used=matrix(NA,nrow=nrow(pts),ncol=p) +for(i in 1:nrow(pts)){ +flag=near3d(x,pts[i,],fr,m) +rmd[i]<-mean(y[flag],tr) +nval[i]<-length(y[flag]) +if(nval[i]>nmin){ +ci[i,]=trimci(y[flag],tr=tr,alpha=alpha,pr=FALSE)$ci +x.used[i,]=pts[i,] +} +} +flag=!is.na(x.used[,1]) +x.used=x.used[flag,] +rmd=rmd[flag] +nval=nval[flag] +ci=ci[flag,] +output=cbind(nval,rmd,ci) +dimnames(output)=list(NULL,c('n','Est.','ci.low','ci.up')) +assign(x='.Random.seed', value=oldSeed, envir=.GlobalEnv) +list(pts.used=x.used,output=output,alpha.used=alpha) +} + +rung3hat.pcrit<-function(x,pts=x,alpha=.05,iter=1000,tr=.2,fr=1,nmin=12,...){ +# +# Compute critical p-value for rung3hatCI. +# +x=as.matrix(x) +n=nrow(x) +pts=as.matrix(pts) +pvdist=NA +m<-cov.mve(x) +for(i in 1:iter){ +y=rnorm(n) +a=rung3hat.sub(x,y,pts=pts,m=m,tr=tr,fr=fr,nmin=nmin) +pvdist[i]=min(a,na.rm=TRUE) +} +pc=hd(pvdist,alpha) +pc +} + + +rung3hat.sub<-function(x,y,pts,m,tr=.2,fr,nmin){ +pv=NA +for(i in 1:nrow(pts)){ +flag=near3d(x,pts[i,],fr,m) +if(sum(flag)>nmin)pv[i]=trimci(y[flag],tr=tr,pr=FALSE)$p.value +} +pv +} + +sm.vs.sm<-function(x,y,method1='RUN',method2='RF',xout=FALSE,outfun=outpro,xlab='Est1', +ylab='Est2',pch='.',pr=TRUE,xoutL=FALSE,...){ +# +# If the smoothers give similar results, the plot returned here should be +# tightly clustered around a line having slope=1 and intercept=0, indicated +# by a dashed line. +# +# if(!xoutL)print('Suggest also looking at result using xoutL=TRUE) +e1=smpred(x,y,method=method1,xout=xout,outfun=outfun,...) +e2=smpred(x,y,method=method2,xout=xout,outfun=outfun,...) +lplot(e1,e2,xlab=xlab,ylab=ylab,pc=pch,xout=xoutL,pr=FALSE) +abline(0,1,lty=2) +} + +best.DO<-function(x,tr=.2,...){ +# +# Determine whether it is reasonable to +# decide which group has largest measure of location +# +# +chk=0 +if(is.matrix(x)||is.data.frame(x))x<-listm(x) +x=elimna(x) +J=length(x) +e=lapply(x,tmean,tr) +e=pool.a.list(e) +id=which(e==max(e)) +CON=conCON(J,id)$conCON +a=lincon(x,con=CON,pr=FALSE) +pv=max(a$psihat[,5]) +list(Est.=e,p.value=pv) +} + +rmanc.best.DO<-function(x,tr=.2,...){ +# +# Determine whether it is reasonable to +# decide which group has largest measure of location +# +# +if(is.list(x))x=matl(x) +x=elimna(x) +x<-listm(x) +J=length(x) +e=lapply(x,tmean,tr) +e=pool.a.list(e) +id=which(e==max(e)) +id=id[1] +e=lapply(x,tmean,tr) +e=pool.a.list(e) +id=which(e==max(e)) +CON=conCON(J,id)$conCON +a=rmmcp(x,con=CON,dif=FALSE,tr=tr) +pv=max(a$test[,3]) +list(Best.Group=id,Est.=e,p.value=pv) +} + + + + + +rmbestPB.DO<-function(x,est=tmean,nboot=NA,SEED=TRUE,...){ +# +# Determine whether it is reasonable to +# decide which group has largest measure of location +# +# +if(is.list(x))x=matl(x) +x=elimna(x) +x<-listm(x) +J=length(x) +e=lapply(x,est,...) +e=pool.a.list(e) +id=which(e==max(e)) +CON=conCON(J,id)$conCON +a=rmmcppb(x,con=CON,dif=FALSE,est=est,nboot=nboot,SEED=SEED,pr=FALSE,...) +pv=max(a$output[,3]) +list(Est.=e,p.value=pv,con=CON) +} + + + +rmanc.bestPB<-function(x,alpha=.05,est=tmean,iter=5000,SEED=TRUE,nboot=2000,PB=FALSE,...){ +# +# +# For J dependent groups, +# identify the group with largest trimmed mean +# Make a decision if every p.value<=p.crit +# +# p.crit is determined via +# a simulation to determine the null distribution based on +# iter=5000 replications. +# +# PB=FALSE: Determine critical values via rmanc.best.crit. Faster execution time but can differ somewhat +# from values based on PB method +# +# +# Returns: +# Best='No Decision' if not significant +# Best= the group with largest measure of location if a decision can be made. +# +# Confidence intervals having simultaneous probability coverage 1-alpha +# using the adjusted level. +# +if(is.list(x))x=matl(x) +x=elimna(x) +flag=TRUE +J=ncol(x) +if(J<3)stop('Should have 3 or more groups') +Jm1=J-1 +est=apply(x,2,tmean,tr=tr) +n=nrow(x) +est=matl(est) +R=order(est,decreasing = TRUE) +pvec=NA +if(!PB)p.crit=rmanc.best.crit(x,iter=iter,alpha=alpha,SEED=SEED) +if(PB)p.crit=rmanc.best.critPB(x,iter=iter,alpha=alpha,SEED=SEED) +output<-matrix(NA,Jm1,8) +dimnames(output)=list(NULL,c('Est.Best','Grp','Est','Dif','ci.low','ci.up','p.value','p.crit')) +for(i in 2:J){ +im1=i-1 +a=yuend(x[,R[1]],x[,R[i]],alpha=p.crit[im1],tr=tr) +pvec[im1]=a$p.value +output[im1,]=c(a$est1,R[[i]],a$est2,a$dif,a$ci[1],a$ci[2],a$p.value,p.crit[im1]) +} +Best='No Decisions' +flag=sum(output[,7]<=output[,8]) +id=output[,7]<=output[,8] +if(sum(id>0))Best=output[id,2] +if(flag==Jm1)Best='All' +setClass('BIN',slots=c('Group.with.largest.estimate','Larger.than','n','output')) +put=new('BIN',Group.with.largest.estimate=R[[1]],Larger.than=Best,n=n,output=output) +put +} + +cell.com<-function(x,i=1,j=2,alpha=.05,AUTO=TRUE,method='AC',data=NULL){ +# +# For a multinomial distribution, compuate a confidence interval +# for p_i-p_j, the difference between the probabilities asscoiated with cells i and j +# +# x= cell frequencies +# +if(!is.null(data))x=splot(data)$frequencies +n=sum(x) +c1=binom.conf(x[i],n,AUTO=AUTO,method=method,alpha=alpha,pr=FALSE)$ci +c2=binom.conf(x[j],n,AUTO=AUTO,method=method,alpha=alpha,pr=FALSE)$ci +p1=x[i]/n +p2=x[j]/n +COR=0-sqrt(p1*p1/((1-p1)*(1-p2))) +T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) +T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) +T2=max(c(0,T2)) +T1=max(c(0,T1)) +L=p1-p2-sqrt(T1) +U=p1-p2+sqrt(T2) +list(cells.compared=c(i,j),dif=p1-p2,Estimates=x/n,ci=c(L,U)) +} + + +cell.com.pv<-function(x,i=1,j=2,method='AC',data=NULL){ +# +# For a multinomial distribution, compute a confidence interval +# for p_i-p_j, the difference between the probabilities asscoiated with cells i and j +# +# x= cell frequencies +# +if(!is.null(data))x=splot(data)$frequencies +n=sum(x) +p1=x[i]/n +p2=x[j]/n +COR=0-sqrt(p1*p1/((1-p1)*(1-p2))) +a=seq(.001,.1,.001) +a=c(a,seq(.1,.99,.01)) +a=rev(a) + +if(x[i]==x[j])pv=1 +else{ +for(k in 1:length(a)){ +c2=acbinomci(x[j],n,alpha=a[k])$ci +c1=acbinomci(x[i],n,alpha=a[k])$ci +T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) +T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) +T2=max(c(0,T2)) +T1=max(c(0,T1)) +L=p1-p2-sqrt(T1) +U=p1-p2+sqrt(T2) +pv=a[k] +if(sign(L*U)<0)break +}} +if(n<=35){ +if(x[i]==x[j])pvnew=1 +else{ +pv.up=pv+.1 +anew=seq(pv,pv.up,.01) +for(k in 1:length(anew)){ +c1=binom.conf(x[i],n,AUTO=TRUE,method=method,alpha=anew[k],pr=FALSE)$ci +c2=binom.conf(x[j],n,AUTO=TRUE,method=method,alpha=anew[k],pr=FALSE)$ci +T1=(p1-c1[1])^2+(p2-c2[2])^2-2*COR*(p1-c1[1])*(c2[2]-p2) +T2=(p1-c1[2])^2+(p2-c2[1])^2-2*COR*(c1[2]-p1)*(p2-c2[1]) +T2=max(c(0,T2)) +T1=max(c(0,T1)) +L=p1-p2-sqrt(T1) +U=p1-p2+sqrt(T2) +pvnew=anew[k] +if(sign(L*U)>0)break +}} +pv=pvnew +} +pv +} + + +TM<-function(x,bend=1.28){ + + ## x has list mode + ## Computes TM test statistic. + ## "mestse" is used as the standard error of one-step M-estimator and + ## "mad" is used as a measure of scale. + + X<-lapply(x,na.omit) + f1<-function(t){length(t[abs((t-median(t))/mad(t))>bend])} + alist<-X + f<-(sapply(alist,length))-(sapply(alist,f1)) + s=sapply(alist,mestse)^2 + wden=sum(1/s) + w=(1/s)/wden + xplus<-sum(w*(sapply(alist,onestep))) + tt<-((sapply(alist,onestep))-xplus)/sqrt(s) + TM<-sum(tt^2) + list(TM=TM) +} + +boot.TM<-function(x,nboot=599,alpha=.05,SEED=TRUE){ +# +# Global test for equal M-measures of location, J independent groups +# +# This is method TM in 5th Ed of Intro to Robust Estimation and Testing +# +if(SEED)set.seed(2) +B=nboot + if(is.matrix(x) || is.data.frame(x))xlist=listm(x) + else xlist=x + xlist=elimna(xlist) + T.test<-TM(xlist)$TM + k<-length(xlist) + ylist<-vector(mode="list",length=k) + TT<-numeric(B) + b<-floor((1-alpha)*B) + onesteps<-sapply(xlist,onestep) + for (i in 1:B){ + j<-1 + repeat { + ylist[[j]]<-(sample(xlist[[j]],length(xlist[[j]]),replace=T)-onesteps[j]) + if (mad(ylist[[j]])>0) j<-j+1 #MAD must be greater than zero for every bootstrap sample + if (j>k)break + } + TT[i]<-TM(ylist,alpha)$TM + } + TT=sort(TT) + if(T.test>=TT[b]){1} else{0} +pv=mean(T.test<=TT) +list(Est.=onesteps,p.value=pv) +} + + + +cat.dat.ci<-function(x,alpha=.05){ +# +# x is assumed to be discrete with a relatively small +# sample space. +# For each oberved value, x, compute a confidence interval +# for the probability that x occurs +# +x=elimna(x) +n=length(x) +v=unique(x) +v=sort(v) +N=length(v) +M=matrix(NA,nrow=N,ncol=4) +for(i in 1:N){ +M[i,1]=v[i] +z=sum(x==v[i]) +a=binom.conf(z,n,pr=FALSE) +M[i,2]=a$phat +M[i,3:4]=a$ci +} +dimnames(M)=list(NULL,c('x','Est.','ci.low','ci.up')) +list(output=M) +} + +smvar.DO<-function(x,est=winsd,nboot=1000,SEED=TRUE,pr=TRUE,...){ +# +# For J independent groups. +# Determine whether it is reasonable to +# decide which group has smallest robust measure of variation +# +# Default is the Winsorized standard deviation +# +if(is.matrix(x)||is.data.frame(x))x<-listm(x) +J=length(x) +e=lapply(x,est,...) +e=pool.a.list(e) +id=which(e==min(e)) +id=id[1] +e=lapply(x,est,...) +e=pool.a.list(e) +pv=NA +CON=conCON(J,id)$conCON +a=linconpb(x,con=CON,est=est,nboot=nboot,SEED=SEED,...)$output[,3] +pv=max(a) +list(Group.Smallest=id,Est.=e,p.value=pv) +} + +manES<-function(x1,x2,method=NULL,pro.p=0.8,nboot=100,...){ +# +# Estimate probability of a correct classification +# for two independent groups having +# unknown multivariate distributions +# +# The function estimates misclassification rates using +# techniques indicated by the argument +# method. +# method=NULL means that methods 'KNN','DIS','DEP','SVM','RF','NN','PRO','LSM','GBT' +# are used See function CLASS.fun +# +# The lowest value is used as the +# estimate or a correct classification. +# +if(is.null(method))method=c('KNN','DIS','DEP','SVM','RF','NN','PRO','LSM','GBT') +if(method[1]=='ALL')method=NULL +a=class.error.com(x1,x2,method=method,pro.p=pro.p,nboot=nboot,...) +IOR=order(a$Error.rates[1,]) +e=1-min(a$Error.rates[1,]) +LAB=dimnames(a$Error.rates)[[2]][IOR[1]] +list(Method.Used=LAB,Prob.Correct.Decision=e) +} + +MCWB<-function(x,tr=.2,alpha=.05,SEED=TRUE,REPS=5000,...){ +# +# J independent groups +# Multiple comparisons with the best based on trimmed means +# Control FWE when all J have a common trimmed mean. +# +# +chk=0 +if(is.matrix(x)||is.data.frame(x))x<-listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +e=lapply(x,tmean,tr) +e=pool.a.list(e) +n=pool.a.list(lapply(x,length)) +id=which(e==max(e)) +CON=conCON(J,id)$conCON +pcrit=MCWB.crit(n=n,alpha=alpha,SEED=SEED,REPS=REPS,...) +a=lincon(x,con=CON,pr=FALSE) +numsig=sum(a$psihat[,5]<=pcrit) +list(n=a$n,tests=a$test,psihat=a$psihat,con=CON, +Best.Group=id,Est.=e,IND.p.values=a$psihat[,5],p.crit=pcrit, +num.sig=numsig) +} + +MCWB.crit<-function(n,alpha,SEED=TRUE,REPS=5000,...){ +if(SEED)set.seed(3) +J=length(n) +z=list() +REM=NA +for(i in 1:REPS){ +for(j in 1:J)z[[j]]=rnorm(n[j]) +e=lapply(z,tmean,tr) +e=pool.a.list(e) +id=which(e==max(e)) +CON=conCON(J,id)$conCON +a=lincon(z,con=CON,pr=FALSE) +REM[i]=min(a$psihat[,5]) +} +hd(REM,alpha) +} + +ESprodis<-function(x,est=tmean,REP=10,DIF=FALSE,SEED=TRUE,...){ +# +# Independent groups. +# Compute an effect size based on projection distances +# +if(SEED)set.seed(2) +if(is.matrix(x))x=listm(x) +J=length(x) +n=pool.a.list(lapply(x,length)) +nmin=min(n) +V=var(n) +if(V==0)E=ESprodis.EQ(x,est=est,DIF=DIF,REP=REP,...) +if(V!=0){ +E=NA +XS=list() +for(i in 1:REP){ +for(j in 1:J)XS[[j]]=sample(x[[j]],nmin) + +E[i]=ESprodis.EQ(XS,est=est,DIF=DIF,...) +} +E=mean(E) +} +E +} + + +ESprodis.EQ<-function(x,est=tmean,REP=1,DIF=TRUE,iter=1,...){ +# +# Independent groups. +# Compute an effect size based on projection distances +# Equal sample sizes +# +if(is.matrix(x))x=listm(x) +J=length(x) +n=pool.a.list(lapply(x,length)) +nord=order(n) +nmin=n[nord[1]] +XS=list() +E=NA +for(k in 1:REP){ +for(j in 1:J)XS[[j]]=sample(x[[j]]) +if(!DIF)E[k]=rmES.pro(XS,est=est,iter=iter,...)$effect.size +if(DIF)E[k]=rmES.dif.pro(XS,est=est,...) +} +E=mean(E) +E +} + + + +PcorREG.best.DO<-function(x,y,neg.col=NULL, +LARGEST=TRUE,xout=FALSE,outfun=outpro,...){ +# +# Can a decision be made about which IV +# has the strongest Pearson correlation with the DV +# +# x is assumed to be a matrix or data frame +# +# +if(nrow(x)!=length(y))stop('x and y have different sample sizes; should be equal') +p=ncol(x) +p1=p+1 +pm1=p-1 +m1=cbind(x,y) +m1<-elimna(m1) # Eliminate rows with missing values +nval=nrow(m1) +x<-m1[,1:p] +y=m1[,p1] +if(xout){ +m<-cbind(x,y) +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +x=neg.colM(x,neg.col) +est=NA +for(j in 1:p)est[j]=cor(x[,j],y) +if(LARGEST)ID=which(est==max(est)) +if(!LARGEST)ID=which(est==min(est)) +a=matrix(NA,nrow=pm1,ncol=7) +dimnames(a)=list(NULL,c('Best.IV','IV','Est.best','Est','dif','ci.low','ci.up')) +ic=0 +for(j in 1:p){ +if(j!=ID){ +ic=ic+1 +b=TWOpov(x[,c(ID,j)],y) +a[ic,]=c(ID,j,b$est.rho1,b$est.rho2,b$dif,b$ci[1],b$ci[2]) +}} +chk=sign(a[,6]*a[,7]) +D='No Decision' +if(sum(chk)==pm1)D=paste('Decide IV',ID,' is best') +list(output=a, Result=D) +} + +two.dep.pb<-function(x,y=NULL,alpha=.05,est=tmean,plotit=FALSE,dif=TRUE, +nboot=NA,xlab='Group 1',ylab='Group 2',pr=TRUE,SEED=TRUE,...){ +# +# Two dependent groups +# Compare measures of location via a percentile bootstrap. +# Trimmed mean used by default. +# +# Calls rmmcppb, provided for convenience +# nboot, number of bootstrap samples defaults to 1000 +# +if(pr){ +if(dif)print('dif=TRUE, difference scores were used') +if(!dif)print('dif=FALSE, marginal trimmed means were used') +} +if(is.null(y)){ +if(ncol(x)!=2)stop('y is null so x should have two columns') +} +if(!is.null(y)){ +xy=cbind(x,y) +xy=elimna(xy) +x=xy[,1] +y=xy[,2] +} +e=apply(cbind(x,y),2,est,...) +a=rmmcppb(x,y,est=est,nboot=nboot,alpha=alpha,SR=FALSE,SEED=SEED, +plotit=plotit,dif=dif,BA=FALSE,pr=FALSE,...)$output +if(!dif){ +output=matrix(c(e[1],e[2],a[1,2],a[1,3],a[1,5],a[1,6]),nrow=1) +dimnames(output)=list(NULL,c('Est.1','Est.2','Est.dif','p.value','ci.lower','ci.upper')) +} +if(dif){ +output=matrix(c(a[1,2],a[1,3],a[1,5],a[1,6]),nrow=1) +dimnames(output)=list(NULL,c('Est.typical.dif','p.value','ci.lower','ci.upper')) +} +output +} + +bmean<-function(x,na.rm=TRUE){ +# +# Compute a skipped estimator of location. +# where outliers are flagged based on a boxplot rule +# +if(na.rm)x<-x[!is.na(x)] #Remove missing values +flag<-outbox(x)$keep +es<-mean(x[flag]) +es +} + +zwe<-function(x,k=3,C=0.2){ +# +# Zuo's (2010) weighted estimator +# +x=elimna(x) +SD=abs((x-median(x)))/mad(x,constant=1) +D=1/(SD+1) +n=length(x) +IDGE =rep(0,n) +flag=D >= C +IDGE[flag]=1 +IDLT=rep(0,n) +flag=D0.5 || a$lims[iu,1]<0.5)break +} +A=wmw.bca(x,y,alpha=alpha,nboot=nboot,SEED=SEED) +list(n1=n1,n2=n2,phat=est$phat,ci.low=A$ci.low,ci.upper=A$ci.upper,p.value=pv) +} + + +wmw.bca<-function(x,y,alpha=.05,nboot=1000,SEED=TRUE,...){ +# +# BCA confidence interval for P(X0. || a$lims[iu,1]<0.)break +} +A=cor.skip.com(x,y,corfun=corfun,outfun=outpro,alpha=.05,nboot=nboot,SEED=SEED,...) +ci=c(a$lims[1,1],a$lims[3,1]) +list(n=nrow(m),Est1=est1,Est2=est2,difference=dif,ci.low=A[1],ci.upper=A[2],p.value=pv) +} + +runbin.CI<-function(x,y,pts=NULL,fr=1.2,xout=FALSE,outfun=outpro){ +# +# Based on running interval smoother, for each point in pts, compute a confidence +# interval for probability of success based on the nearest neighbors +# +xx<-cbind(x,y) +xx<-elimna(xx) +n=nrow(xx) +p1=ncol(xx) +p=p1-1 +x=xx[,1:p] +y=xx[,p1] +if(is.null(pts))pts=x +pts=unique(pts) +pts=as.matrix(pts) +x=as.matrix(x) +if(p>1)m=cov.mve(x) +npts=nrow(pts) +output=matrix(NA,npts,5) +dimnames(output)=list(NULL,c('n','pts.no','Est','ci.low','ci.upper')) +for(i in 1:npts){ +if(p==1)Z=y[near(x[,1],pts[i,],fr)] +if(p>1)Z=y[near3d(x,pts[i,],fr,m)] +if(length(Z)>5){ +a=binom.conf(sum(Z),length(Z),pr=FALSE) +output[i,3]=a$phat +output[i,2]=i +output[i,1]=a$n +output[i,4]=a$ci[1] +output[i,5]=a$ci[2] +}} +list(points=pts,output=output) +} + + +wmw.RZR<-function(x,y,nboot=1000,SEED=TRUE){ +# +# Perform the Reiczigel et al. (2005) improvement of of the +# Wilcoxon--Mann--Whitney test +# +if(SEED)set.seed(2) +val=0 +n1=length(x) +n2=length(y) +xy=rank(c(x,y)) +N=n1+n2 +n1p1=n1+1 +a=yuen(xy[1:n1],xy[n1p1:N],tr=0)$teststat +#print(a) +LOC=loc2dif(x,y) +x=x-a +y=y-a +#print(yuen(x,x2,tr=0)) +bval=0 +for(i in 1:nboot){ +z1=sample(x,n1,replace=TRUE) +z2=sample(y,n2,replace=TRUE) +XY=rank(c(z1,z2)) +bval[i]=yuen(XY[1:n1],XY[n1p1:N],tr=0)$teststat +} +#print(bval[1:10]) +pv1=mean(a>bval) +pv2=mean(a8)print('No adjustment available when J>8') +else es=fix[J1]*es +} +es +} + + +KMS.inter.pbci<-function(x,tr=.2,alpha=.05,nboot=1000,SEED=TRUE,SW=FALSE){ +# +# For a 2-by-2 design, compare +# explanatory power associated with the two levels of the first factor +# +# SW=TRUE, switches rows and column + +if(SEED)set.seed(2) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(length(x)!=4)stop('Should have four groups exactly') +for(j in 1:4)x[[j]]=elimna(x[[j]]) +if(SW)x=x[c(1,3,2,4)] +v=list() +dif=NA +for(i in 1:nboot){ +for(j in 1:4)v[[j]]=sample(x[[j]],replace=TRUE) +a1=kms.effect(v[[1]],v[[2]],tr=tr)$effect.size +a2=kms.effect(v[[3]],v[[4]],tr=tr)$effect.size +dif[i]=a1-a2 +} +dif=sort(dif) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=dif[ilow] +ci[2]=dif[ihi] +pv=mean(dif<0)+.5*mean(dif==0) +pv=2*min(pv,1-pv) +a1=kms.effect(x[[1]],x[[2]],tr=tr)$effect.size +a2=kms.effect(x[[3]],x[[4]],tr=tr)$effect.size +Dif=a1-a2 +list(Est.1=a1, Est.2=a2,Dif=Dif,ci=ci,p.value=pv) +} + +KMSinter.mcp<-function(J,K,x,tr=.2,alpha=.05,nboot=999,SEED=TRUE,SW=FALSE){ +# +# Interactions based on KMS measure of effect size +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +con=con2way(J,K)$conAB +if(SW){ +JK=J*K +M=matrix(c(1:JK),nrow=J,byrow=TRUE) +M=as.vector(M) +x=x[M] +con=con2way(K,J)$conAB +} +num=ncol(con) +CON=matrix(NA,nrow=num,ncol=8) +dimnames(CON)=list(NULL,c('Con.num','Est.1','Est.2','Dif','ci.low','ci.up','p.value','p.adjusted')) +#CON=list() +for(j in 1:ncol(con)){ +id=which(con[,j]!=0) +dat=x[id] +temp=pool.a.list(KMS.inter.pbci(dat,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED)) +CON[j,1]=j +CON[j,2:7]=temp +} +CON[,8]=p.adjust(CON[,7],method='hoch') +list(CON=CON,con=con) +} + +QS.inter.pbci<-function(x,locfun=median,alpha=.05,nboot=1000,SEED=TRUE,SW=FALSE){ +# +# For a 2-by-2 design, characterize an interaction +# in terms of a quantile shift measure of effect size +# +# SW=TRUE, switches rows and column +# +if(SEED)set.seed(2) +if(is.matrix(x))x=listm(x) +if(length(x)!=4)stop('There should be exactly four groups') +for(j in 1:4)x[[j]]=elimna(x[[j]]) +if(SW)x=x[c(1,3,2,4)] +v=list() +dif=NA +a1=NA +a2=NA +for(i in 1:nboot){ +for(j in 1:4)v[[j]]=sample(x[[j]],replace=TRUE) +a1[i]=shiftQS(v[[1]],v[[2]],locfun=locfun)$Q.Effect +a2[i]=shiftQS(v[[3]],v[[4]],locfun=locfun)$Q.Effect +} +dif=a1-a2 +dif=sort(dif) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=dif[ilow] +ci[2]=dif[ihi] +pv=mean(dif<0)+.5*mean(dif==0) +pv=2*min(pv,1-pv) +a1=shiftQS(x[[1]],x[[2]],locfun=locfun)$Q.Effect +a2=shiftQS(x[[3]],x[[4]],locfun=locfun)$Q.Effect +Dif=a1-a2 +list(Est.1=a1, Est.2=a2,Dif=Dif,ci=ci,p.value=pv) +} + + +QSinter.mcp<-function(J,K,x,alpha=.05,nboot=999,SEED=TRUE,SW=FALSE){ +# +# Interactions based on measure of effect size +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +con=con2way(J,K)$conAB +if(SW){ +JK=J*K +M=matrix(c(1:JK),nrow=J,byrow=TRUE) +M=as.vector(M) +x=x[M] +con=con2way(K,J)$conAB +} +num=ncol(con) +CON=matrix(NA,nrow=num,ncol=8) +dimnames(CON)=list(NULL,c('Con.num','Est.1','Est.2','Dif','ci.low','ci.up','p.value','p.adjusted')) +for(j in 1:ncol(con)){ +id=which(con[,j]!=0) +dat=x[id] +temp=pool.a.list(QS.inter.pbci(dat,alpha=alpha,nboot=nboot,SEED=SEED)) +CON[j,1]=j +CON[j,2:7]=temp +} +CON[,8]=p.adjust(CON[,7],method='hoch') +list(CON=CON,con=con) +} + +smgrid2M<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,PB=FALSE,est=tmean,nboot=1000,pr=TRUE,fun=ES.summary, +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Split on two variables, +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# + +# Then test main effects based on trimmed means +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +# if(length(unique(y)>2))stop('y should be binary') +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +xy=cbind(x,y) +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +JK=J*K +MAT=matrix(1:JK,J,K,byrow=TRUE) +z=list() +group=list() +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +n.mat=matrix(NA,nrow=N.int,ncol=N.int2) +DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2[,p1],...) +n.mat[j,k]=length(xsub2[,p1]) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('For group',del,'the sample size is less than 6')) +} +A=t2way(J,K,z,tr=tr) +A +} + +ANOG2KMS<-function(J,K,x,tr=.2,alpha=.05,iter=5000,nulldist=NULL,SEED=TRUE,FAC.B=FALSE,...){ +# +# Two-way ANOVA independent groups. +# Compare global KMS measure of effect size for all pairs of rows +# Example: for row of Factor A, compute KMS global effect size. Do the sample for +# row 2 and test the hypothesis that they are the same. Do this for all pairs of rows. +# +# Can do the same for Factor B by setting +# FAC.B=TRUE. +# +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +JK=J*K +mat=matrix(c(1:JK),nrow=J,byrow=TRUE) +if(FAC.B){ +ic=0 +y=list() +for(j in 1:J){ +for(k in 1:K){ +ic=ic+1 +y[ic]=x[mat[j,k]] +}} +x=y +rem.J=J +J=K +K=rem.J +mat=t(mat) +} +num=(J^2-J)/2 +n=pool.a.list(lapply(x,length)) +if(!is.null(nulldist))V=ND +if(is.null(nulldist)){ +if(SEED)set.seed(2) +ndist=NA +V=matrix(NA,iter,num) +ic=0 +for(j in 1:J){ +for(jj in 1:J){ +if(j6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) +} +a=AOV2KMS.mcp(J,K,z,tr=tr,nboot=nboot) +b=AOV2KMS.mcp(J,K,z,tr=tr,nboot=nboot,FAC.B=TRUE) +list(Factor.A=a,Factor.B=b) +} + +AOV2KMS.mcp<-function(J,K,x,tr=.2,alpha=.05,nboot=500,SEED=TRUE,FAC.B=FALSE,...){ +# +# Two-way ANOVA independent groups. +# Compare average KMS measure of effect size for all pairs of rows and columns +# Example: for row of Factor A, compute KMS global effect size. Do the sample for +# row 2 and test the hypothesis that the averages are the same. Do this for all pairs of rows. +# +# +if(SEED)set.seed(2) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +JK=J*K +mat=matrix(c(1:JK),nrow=J,byrow=TRUE) +if(FAC.B){ +ic=0 +y=list() +for(j in 1:J){ +for(k in 1:K){ +ic=ic+1 +y[ic]=x[mat[j,k]] +}} +x=y +rem.J=J +J=K +K=rem.J +mat=t(mat) +} +num=(J^2-J)/2 +A=matrix(NA,num,8) +ic=0 +for (j in 1:J){ +for(jj in 1:J){ +if(j6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) +} +a=ANOG2KMS(J,K,z,tr=tr,iter=iter,nulldist=nulldist.a) +b=ANOG2KMS(J,K,z,tr=tr,iter=iter,FAC.B=TRUE,nulldist=nulldist.b) +list(Factor.A=a,Factor.B=b) +} + +anova.KMS.ND<-function(n,tr=.2,iter=5000,nulldist=nulldist,SEED=TRUE){ +# +# Null distribution for anova.KMS +# +if(SEED)set.seed(2) +v=NA +dat=list() +J=length(n) # number of groups +for(i in 1:iter){ +for(L in 1:J)dat[[L]]=ghdist(n[L],g=0.75) +v[i]=KS.ANOVA.ES(dat,tr=tr) +} +v +} + + +wAKP.avg<-function(x,tr=.2){ +# +# Have J dependent groups. For each pair of groups, compute +# AKP type measure of effect size and average the results +# +# For tr=0, get Cohen d type measure +# +# +a=wmcpAKP(x,tr=tr) +e=mean(a[,3]) +e +} + +AOV2KMS<-function(J,K,x,tr=.2,alpha=.05,nboot=500,SEED=TRUE,...){ +# +# Two-way ANOVA independent groups. +# Compare averages KMS measure of effect size for all pairs of rows and columns +# +if(SEED)set.seed(2) +A=AOV2KMS.mcp(J,K,x,tr=tr,alpha=alpha,SEED=FALSE) +B=AOV2KMS.mcp(J,K,x,tr=tr,alpha=alpha,SEED=FALSE,FAC.B=TRUE) +list(Factor.A=A,Factor.B=B) +} + + +ANOG2KMS.ND<-function(J,K,n,tr=.2,iter=5000,SEED=TRUE,FAC.B=FALSE,...){ +# +# Two-way ANOVA independent groups. +# Compare global KMS measure of effect size for all pairs of rows and columns +# Example: for row of Factor A, compute KMS global effect size. Do the sample for +# row 2 and test the hypothesis that they are the same. Do this for all pairs of rows. +# +# Repeat for the columns of Factor B. +# +# +if(SEED)set.seed(2) +ndist=NA +JK=J*K +mat=matrix(c(1:JK),nrow=J,byrow=TRUE) +num=(J^2-J)/2 +if(FAC.B)num(K^2-K)/2 +V=matrix(NA,iter,num) +ic=0 +if(!FAC.B){ +for(j in 1:J){ +for(jj in 1:J){ +if(j2))stop('y should be binary') +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +xy=cbind(x,y) +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +z=list() +group=list() +if(is.null(VAL1) || is.null(VAL2)){ +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +} +else { +J=length(VAL1)+1 +K=length(VAL2)+1 +N.int=length(VAL1)+1 +N.int2=length(VAL2)+1 +} +JK=J*K +MAT=matrix(1:JK,J,K,byrow=TRUE) +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +n.mat=matrix(NA,nrow=N.int,ncol=N.int2) +DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +if(is.null(VAL1) || is.null(VAL2)){ +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +} +else{ +qv=VAL1 +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=VAL2 +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +} +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2[,p1],...) +n.mat[j,k]=length(xsub2[,p1]) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('For group',del,'the sample size is less than 6')) +} +A=t2way(J,K,z) +A +} + +KMS.ci<-function(x,y,tr=.2,alpha=.05,null.val=0,nboot=500,SEED=TRUE,...){ +# +# confidence interval for the difference between to KMS +# measures of effect size. +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +n1=length(x) +n2=length(y) +v=NA +ef=kms.effect(x,y)$effect.size +for(i in 1:nboot){ +X=sample(x,n1,replace=TRUE) +Y=sample(y,n2,replace=TRUE) +v[i]=kms.effect(X,Y)$effect.size +} +v=sort(v) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=v[ilow] +ci[2]=v[ihi] +pv=mean(v0)points(x[a$out.id,1],x[a$out.id,2]) +flag=which(d>=median(d)) +xx<-x[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +}} +a +} + +KMSgrid.mcp<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,VAL1=NULL,VAL2=NULL,alpha=05,SW=FALSE, +nulldist=NULL,est=tmean,iter=1000,pr=TRUE,method='hoch', +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Compare robust, heteroscedastic measures of effect size, the KMS measure for two or more groups +# among grids defined by quantiles of two IVs. +# Uses the sign version of KMS (the two group) case rather than the variation measure used by KMSgridRC +# +# The method tests for main effects based on the +# signed version (not the squared version) of the KMS measure of effect size. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# +# Basically, reduce the data to a two-way ANOVA design and examine main effects. +# +# +if(!is.null(VAL1))Qsplit1=PVALS(x[,IV[1]],VAL1) +if(!is.null(VAL2))Qsplit2=PVALS(x[,IV[2]],VAL2) +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +xy=cbind(x,y) +} +z=list() +group=list() +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +n.mat=matrix(NA,nrow=N.int,ncol=N.int2) +DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +for(i in 1:N.int)L1[i]=paste('IV1.G',i) +for(i in 1:N.int2)L2[i]=paste('IV2.G',i) +dimnames(est.mat)=list(L1,L2) +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2[,p1],...) +n.mat[j,k]=length(xsub2[,p1]) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('eliminated group',del,'The sample size is less than 6')) +} +a=KMSinter.mcp(J,K,z,tr=tr,SW=SW) +a +} + +smgrid.est<-function(x,y,est=tmean,IV=c(1,2),Qsplit1 = c(.3,.7),Qsplit2 = c(.3,.7),tr=.2,xout=FALSE,outfun=outpro,...){ +# +# Splits the data into groups +# +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +JK=J*K +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +ic=0 +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +if(!is.null(dim(xsub2))) +est.mat[j,k]=est(xsub2[,p1],...) +}} +est.mat +} + +KMS.ES.M<-function(x,y){ +# +# Computes the robust effect size using a simple generalization of the method in +# Kulinskaya, E., Morgenthaler, S. & Staudte, R. (2008). +# Meta Analysis: A guide to calibrating and combining statistical evidence p. 177 +# based on an M-estimator and percentage bend variance +#Cohen d=.2, .5 .8 correspond to .1, .25 and .4') (KMS p. 180) + +library(MASS) +x<-elimna(x) +y<-elimna(y) +n1<-length(x) +n2<-length(y) +N=n1+n2 +q=n1/N +s1sq=pbvar(x) +s2sq=pbvar(y) +t1=onestep(x) +t2=onestep(y) +top=q*s1sq+(1-q)*s2sq +bot=q*(1-q) +sigsq=top/bot # Quantity in brackets KMS p. 176 eq 21.1 +varrho=s2sq/s1sq +d1=(t1-t2)/sqrt(sigsq) +list(effect.size=d1,Cohen.d.equiv=2*d1) +} + +rplotN<-function(x,y,nsub=1000,est=tmean,fr=1,xout=FALSE,xlab='X',ylab='Y',zlab='',ticktype = 'simple',theta = 50, phi = 25, scale = TRUE, + expand = 0.5, SEED = TRUE,frame=TRUE){ +# + # Running interval smoother, good for large sample sizes or plots of the + # regression surface without a scatter plot. + # + # nsub is size of the random sample of the data used to predict outcome using all of the data + # + if(SEED)set.seed(2) + x=as.matrix(x) +p=ncol(x) +p1=p+1 + xy=cbind(x,y) + xy=elimna(xy) + n=nrow(xy) + nsub=min(n,nsub) + id=sample(n,nsub) +x=xy[,1:p] +y=xy[,p1] +x=as.matrix(x) +w=rplot.pred(x,y,pts=x[id,],fr=fr)$Y.hat +a=lplot(x[id,],w,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,frame=frame,phi=phi,theta=theta,scale=scale,pr=FALSE) +} + +lplotN<-function(x,y,nsub=1000,est=tmean,fr=1,xout=FALSE,xlab='X',ylab='Y',zlab='', +ticktype = 'simple',theta = 50, phi = 25, scale = TRUE, + expand = 0.5, SEED = TRUE,frame=TRUE){ +# + # Running interval smoother, good for large sample sizes or plots of the + # regression surface without a scatter plot. + # + # nsub is size of the random sample of the data used to predict outcome using all of the data + # + if(SEED)set.seed(2) + x=as.matrix(x) +p=ncol(x) +p1=p+1 + xy=cbind(x,y) + xy=elimna(xy) + n=nrow(xy) + nsub=min(n,nsub) + id=sample(n,nsub) +x=xy[,1:p] +y=xy[,p1] +x=as.matrix(x) +w=lplot.pred(x,y,pts=x[id,],fr=fr)$yhat +a=lplot(x[id,],w,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype, +frame=frame,phi=phi,theta=theta,scale=scale,pr=FALSE) +} + +bwquantile<-function(M1,M2,alpha=.05,nboot=1000,SEED=TRUE,q=.5,...){ +# +# M1 and M2 are assumed to be matrices with two columns +# They are random samples from some bivariate distribution from +# two independent groups +# +# For example, +# have two dependent groups, e.g., same subjects under two conditions, +# Have two independent groups, e.g., male and female +# +# Consider difference between males and females at condition 1, estimate difference between quantiles +# Under condition 2, does this difference differ from the difference under condition 1? +# +# q indicates the quantile to be used +# +# +# REQUIRES WRS PACKAGE OR THE FUNCTIONS IN RALLFUN-V38 +# +M1=elimna(M1) +M2=elimna(M2) +n1=nrow(M1) +n2=nrow(M2) +e1=apply(M1,2,hd,q) +e2=apply(M2,2,hd,q) +dif1=e1[1]-e2[1] +dif2=e1[2]-e2[2] +dif=dif1-dif2 +DIF=NA +for(i in 1:nboot){ +id1=sample(n1,replace=TRUE) +id2=sample(n2,replace=TRUE) +B1=apply(M1[id1,],2,hd,q) +B2=apply(M2[id2,],2,hd,q) +DIF[i]=B1[1]-B2[1]-B1[2]+B2[2] +} +DIF=sort(DIF) +pv=mean(DIF<0) +pv=2*min(pv,1-pv) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=DIF[ilow] +ci[2]=DIF[ihi] +list(p.value=pv,ci=ci) +} + +ghtransform<-function(x,g=0,h=0){ +# +# transform normal data in x to a g-and-h distribution + if (g>0){ + ghdist<-(exp(g*x)-1)*exp(h*x^2/2)/g + } + if(g==0)ghdist<-x*exp(h*x^2/2) + ghdist + } + + plot.ghdist<-function(g=0,h=0,xlab='',ylab='f(x)'){ +# +# plot density function of a g-and-h distribution +# +x=seq(-3,3,.05) +pf=dnorm(x) +xs=ghtransform(x,g=g,h=h) +plot(xs,pf,type='n',xlab=xlab,ylab=ylab) +lines(xs,pf) +} + + + +mulcen.region<-function(m,region=.05,plotit=TRUE,est=median, +xlab="VAR 1",ylab="VAR 2",...){ +# +# +# m is an n-by-2 matrix +# +# region=.05 means that the function +# determine the 1-.05=.95 deepest points and then plots the convex hull +# containing these points. +# +# Returns the points that form the convex hull +# +# +m<-as.matrix(m) +est=apply(m,2,est) +if(ncol(m)!=2)stop('Argument m should be a matrix with two columns') +temp<-fdepth(m,plotit=FALSE,center=est) #Defaults to using the marginal medians +flag=(temp>=qest(temp,region)) +xx<-m[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +if(plotit){ +plot(m[,1],m[,2],xlab=xlab,ylab=ylab) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +list(center=est,convex.hull.pts=xx[temp,]) +} + +mulcen.region.MF<-function(m,region=.05,est=median){ +# +# region=.05 means that the function +# determine the 1-.05=.95 +# This is done for each formula +# Assume m is a matrix or data frame having +# J columns. First two columns first formula, next two columns next formula.. +# +# So J should be an even integer +# +J=ncol(m) +N=J/2 +if(N != floor(N))stop('Should have an even number of columns') +region=list() +centers=list() +id=c(-1,0) +for(j in 1:N){ +id=id+2 +a=mulcen.region(m[,id],plotit=FALSE,est=est) +centers[[j]]=a$center +region[[j]]=a$convex.hull.pts +n=nrow(elimna(m[id])) +n=as.integer(n) +centers[[j]]=c(a$center,n) +names(centers[[j]])=c('V1','V2','N') +} +list(centers=centers,convex.hull.pts=region) +} + + +oph.astig.datasetconvexpoly.median<-function(m,Region=.05,plotit=FALSE,xlab='V1',ylab='V2'){ +# +# region=.05 means that the function +# determine the 1-.05=.95 +# This is done for each formula +# Assume m is a matrix or data frame having +# J columns. First two columns first formula, next two columns next formula.. +# +# So J should be an even integer +# +J=ncol(m) +N=J/2 +if(N != floor(N))stop('Should have an even number of columns') +region=list() +centers=list() +id=c(-1,0) +for(j in 1:N){ +id=id+2 +a=mulcen.region(elimna(m[,id]),region=Region,plotit=FALSE,xlab=xlab,ylab=ylab) +centers[[j]]=a$center +region[[j]]=a$convex.hull.pts +n=nrow(elimna(m[id,])) +n=as.integer(n) +centers[[j]]=c(a$center,n) +names(centers[[j]])=c('V1','V2','N') +} +if(plotit){ +M=m +if(N>1)par(mfrow=c(2,2)) +id=c(-1,0) +for(j in 1:N){ +id=id+2 +m=M[,id] +m=elimna(m) +m=as.matrix(m) +temp<-fdepth(m,plotit=FALSE) #Defaults to using the marginal medians +flag=(temp>=qest(temp,Region)) +xx<-m[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +plot(m[,1],m[,2],xlab=xlab,ylab=ylab,pch=pch,xlim=c(-1.5,1.5),ylim=c(-1.5,1.5),asp=2/3) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +par(mfrow=c(1,1)) +} +list(centers=centers,convex.hull.pts=region) +} + +oph.astig.datasetconvexpoly.mean<-function(m,Region=.05,plotit=FALSE,xlab='V1',ylab='V2'){ +# +# region=.05 means that the function +# determine the 1-.05=.95 +# This is done for each formula +# Assume m is a matrix or data frame having +# J columns. First two columns first formula, next two columns next formula.. +# +# So J should be an even integer +# +J=ncol(m) +N=J/2 +if(N != floor(N))stop('Should have an even number of columns') +region=list() +centers=list() +id=c(-1,0) +for(j in 1:N){ +id=id+2 +a=mulcen.region(elimna(m[,id]),region=Region,plotit=FALSE,xlab=xlab,ylab=ylab,est=mean) +centers[[j]]=a$center +region[[j]]=a$convex.hull.pts +n=nrow(elimna(m[id,])) +n=as.integer(n) +centers[[j]]=c(a$center,n) +names(centers[[j]])=c('V1','V2','N') +} +if(plotit){ +M=m +if(N>1)par(mfrow=c(2,2)) +id=c(-1,0) +for(j in 1:N){ +id=id+2 +m=M[,id] +m=elimna(m) +m=as.matrix(m) +temp<-fdepth(m,plotit=FALSE) #Defaults to using the marginal medians +flag=(temp>=qest(temp,Region)) +xx<-m[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +plot(m[,1],m[,2],xlab=xlab,ylab=ylab) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +par(mfrow=c(1,1)) +} +list(centers=centers,convex.hull.pts=region) +} + + + +psmm.x=function(x, c, r, nu) { + snu = sqrt(nu) + sx = snu * x + lgx = log(snu) - lgamma(nu/2) + (1 - nu/2) * log(2) + + (nu - 1) * log(sx) + (-sx^2/2) + exp(r * log(2 * pnorm(c * x) - 1) + lgx) + } + +psmm = function(x, r, nu) { + res = integrate(psmm.x, 0, Inf, c = x, r = r, nu = nu) + res$value + } + + qsmm<-function(q, r, nu) { + #r=number of comparisons + if (!is.finite(nu)) + return(qnorm(1 - 0.5 * (1 - q^(1/r)))) + res = uniroot(function(c, r, nu, q) { + psmm(c, r = r, nu = nu) - q + }, + c(0, 100), r = r, nu = nu, q = q) + res$root + } + + lincon<-function(x,con=0,tr=.2,alpha=.05,pr=FALSE){ +# +# A heteroscedastic test of d linear contrasts using trimmed means. +# +# This version uses an improved method for computing the quantiles of a +# Studentized maximum modulus distriburtion +# +# The data are assumed to be stored in $x$ in list mode, a matrix +# or a data frame. If in list mode, +# length(x) is assumed to correspond to the total number of groups. +# It is assumed all groups are independent. +# +# con is a J by d matrix containing the contrast coefficients that are used. +# If con is not specified, all pairwise comparisons are made. +# +# Missing values are automatically removed. +# +# pr=FALSE included to avoid errors using an earlier version of this function when +# dealing with two-way and higher designs +# +# Adjusted p-values are based on the Studentized maximum modulus distribution with the +# goal of controlling FWE +# +# To apply the Kaiser-Bowden method, use the function kbcon +# +if(tr==.5)stop('Use the R function medpb to compare medians') +if(is.data.frame(x))x=as.matrix(x) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +con<-as.matrix(con) +J<-length(x) +sam=NA +h<-vector('numeric',J) +w<-vector('numeric',J) +xbar<-vector('numeric',J) +for(j in 1:J){ +xx<-!is.na(x[[j]]) +val<-x[[j]] +x[[j]]<-val[xx] # Remove missing values +sam[j]=length(x[[j]]) +h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]])) + # h is the number of observations in the jth group after trimming. +w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1)) +xbar[j]<-mean(x[[j]],tr) +} +if(sum(con^2)==0){ +CC<-(J^2-J)/2 +psihat<-matrix(0,CC,9) +dimnames(psihat)<-list(NULL,c('Group','Group','psihat','ci.lower','ci.upper', +'p.value','Est.1','Est.2','adj.p.value')) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c('Group','Group','test','crit','se','df')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k]) +sejk<-sqrt(w[j]+w[k]) +test[jcom,5]<-sejk +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[j]-xbar[k]) +df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1)) +test[jcom,6]<-df +psihat[jcom,6]<-2*(1-pt(test[jcom,3],df)) +psihat[jcom,7]=xbar[j] +psihat[jcom,8]=xbar[k] +crit=qsmm(1-alpha,CC,df) +test[jcom,4]<-crit +psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk +psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk +psihat[jcom,9]=1-psmm(test[jcom,3],CC,df) +}}}} +if(sum(con^2)>0){ +if(nrow(con)!=length(x)){ +stop('The number of groups does not match the number of contrast coefficients.') +} +CC=ncol(con) +psihat<-matrix(0,ncol(con),6) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper', +'p.value','adj.p.value')) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c('con.num','test','crit','se','df')) +df<-0 +for (d in 1:ncol(con)){ +psihat[d,1]<-d +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-sqrt(sum(con[,d]^2*w)) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1)) +crit=qsmm(1-alpha,CC,df) +test[d,3]<-crit +test[d,4]<-sejk +test[d,5]<-df +psihat[d,3]<-psihat[d,2]-crit*sejk +psihat[d,4]<-psihat[d,2]+crit*sejk +psihat[d,5]<-2*(1-pt(abs(test[d,2]),df)) +psihat[d,6]=1-psmm(abs(test[d,2]),CC,df) +} +} +list(n=sam,test=test,psihat=psihat) +} + +linconpb<-function(x,alpha=.05,nboot=NA,grp=NA,est=tmean,con=0,method='holm',bhop=FALSE,SEED=TRUE,...){ +# +# Multiple comparisons for J independent groups using trimmed means +# +# A percentile bootstrap method with Rom's method is used. +# +# The data are assumed to be stored in x +# which either has list mode or is a matrix. In the first case +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J. +# If stored in a matrix, the columns of the matrix correspond +# to groups. +# +# est is the measure of location and defaults to the median +# ... can be used to set optional arguments associated with est +# +# The argument grp can be used to analyze a subset of the groups +# Example: grp=c(1,3,5) would compare groups 1, 3 and 5. +# +# Missing values are allowed. +# +con<-as.matrix(con) +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +if(bhop)method='BH' +if(!is.list(x))stop('Data must be stored in list mode or in matrix mode.') +if(!is.na(sum(grp))){ # Only analyze specified groups. +xx<-list() +for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]] +x<-xx +} +J<-length(x) +tempn<-0 +mvec<-NA +for(j in 1:J){ +temp<-x[[j]] +temp<-temp[!is.na(temp)] # Remove missing values. +tempn[j]<-length(temp) +x[[j]]<-temp +mvec[j]<-est(temp,...) +} +Jm<-J-1 +# +# Determine contrast matrix +# +if(sum(con^2)==0){ +ncon<-(J^2-J)/2 +con<-matrix(0,J,ncon) +id<-0 +for (j in 1:Jm){ +jp<-j+1 +for (k in jp:J){ +id<-id+1 +con[j,id]<-1 +con[k,id]<-0-1 +}}} +ncon<-ncol(con) +if(nrow(con)!=J)stop('Something is wrong with con; the number of rows does not match the number of groups.') +# Determine nboot if a value was not specified +if(is.na(nboot)){ +nboot<-5000 +if(J <= 8)nboot<-4000 +if(J <= 3)nboot<-2000 +} +# Determine critical values +if(method!='BH'){ +if(alpha==.05){ +dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +if(ncon > 10){ +avec<-.05/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(alpha != .05 && alpha != .01){ +dvec<-alpha/c(1:ncon) +} +} +if(method=='BH')dvec<-(ncon-c(1:ncon)+1)*alpha/ncon +bvec<-matrix(NA,nrow=J,ncol=nboot) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +for(j in 1:J){ +data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group +} +test<-NA +bcon<-t(con)%*%bvec #ncon by nboot matrix +tvec<-t(con)%*%mvec +for (d in 1:ncon){ +tv<-sum(bcon[d,]==0)/nboot +test[d]<-sum(bcon[d,]>0)/nboot+.5*tv +if(test[d]> .5)test[d]<-1-test[d] +} +test<-2*test +output<-matrix(0,ncon,7) +dimnames(output)<-list(NULL,c('con.num','psihat','p.value','p.crit','ci.lower','ci.upper','p.adjusted')) +temp2<-order(0-test) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2]>=zvec) +output[temp2,4]<-zvec +icl<-round(dvec[ncon]*nboot/2)+1 +icu<-nboot-icl-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-tvec[ic,] +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(bcon[ic,]) +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +output[,7]=p.adjust(output[,3],method=method) +num.sig<-sum(output[,3]<=output[,4]) +list(output=output,con=con,num.sig=num.sig) +} + wwmcp.miss<-function(J,K,x,tr=.2,alpha=.05,nboot=500,SEED=TRUE){ +# +# Do all multiple comparisons for a within-by-within design +# using trimmed means in a manner that uses all of the data when some +# values are missing. +# +conM=con2way(J,K) +A=rmmismcp(x,con=conM$conA,tr=tr,alpha=alpha,SEED=SEED,nboot=nboot) +B=rmmismcp(x,con=conM$conB,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED) +AB=rmmismcp(x,con=conM$conAB,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED) +list(Factor_A=A,Factor_B=B,Factor_AB=AB) +} + +smgridAB<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,VAL1=NULL,VAL2=NULL, +PB=FALSE,est=tmean,nboot=1000,pr=TRUE,fun=ES.summary, +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Split on two variables, not just one. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# +# Alternatively, can split the data based on specified values indicating by the arguments +# VAL1 and VAL2 +# +# Then test the hypothesis of equal measures of location +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +# if(length(unique(y)>2))stop('y should be binary') +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +xy=cbind(x,y) +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +z=list() +group=list() +if(is.null(VAL1) || is.null(VAL2)){ +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +} +else { +J=length(VAL1)+1 +K=length(VAL2)+1 +N.int=length(VAL1)+1 +N.int2=length(VAL2)+1 +} +JK=J*K +MAT=matrix(1:JK,J,K,byrow=TRUE) +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +n.mat=matrix(NA,nrow=N.int,ncol=N.int2) +DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +if(is.null(VAL1) || is.null(VAL2)){ +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +} +else{ +qv=VAL1 +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=VAL2 +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +} +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2[,p1],...) +n.mat[j,k]=length(xsub2[,p1]) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('For group',del,'the sample size is less than 6')) +} +A=list() +B=list() +A.ES=list() +B.ES=list() +for(j in 1:J)A.ES[[j]]=IND.PAIR.ES(z[MAT[j,]],fun=fun)$effect.size[[1]] +for(j in 1:K)B.ES[[j]]=IND.PAIR.ES(z[MAT[,j]],fun=fun)$effect.size[[1]] +if(!PB){ +for(j in 1:J)A[[j]]=lincon(z[MAT[j,]],tr=tr,pr=FALSE)$psihat +for(j in 1:K)B[[j]]=lincon(z[MAT[,j]],tr=tr,pr=FALSE)$psihat +} +if(PB){ +for(j in 1:J)A[[j]]=linpairpb(z[MAT[j,]],est=est,nboot=nboot,...)$output +for(j in 1:K)B[[j]]=linpairpb(z[MAT[,j]],est=est,nboot=nboot,...)$output +} +list(est.loc.4.DV=est.mat,n=n.mat,A=A,B=B,A.effect.sizes=A.ES,B.effect.sizes=B.ES) +} + + +linconbt<-function(x,con=0,tr=.2,alpha=.05,nboot=599,pr=FALSE,SEED=TRUE,method='holm'){ +# +# Compute a 1-alpha confidence interval for a set of d linear contrasts +# involving trimmed means using the bootstrap-t bootstrap method. +# Independent groups are assumed. +# +# The data are assumed to be stored in x in list mode. Thus, +# x[[1]] contains the data for the first group, x[[2]] the data +# for the second group, etc. Length(x)=the number of groups = J, say. +# +# Missing values are automatically removed. +# +# con is a J by d matrix containing the contrast coefficents of interest. +# If unspecified, all pairwise comparisons are performed. +# For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1) +# will test two contrasts: (1) the sum of the first two trimmed means is +# equal to the sum of the second two, and (2) the difference between +# the first two is equal to the difference between the trimmed means of +# groups 5 and 6. +# +# The default number of bootstrap samples is nboot=599 +# +# This function uses functions trimparts and trimpartt written for this +# book. +# +# +# +# +if(is.data.frame(x))x=as.matrix(x) +con<-as.matrix(con) +if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J<-length(x) +for(j in 1:J){ +xx<-x[[j]] +x[[j]]<-xx[!is.na(xx)] # Remove any missing values. +} +Jm<-J-1 +d<-(J^2-J)/2 +FLAG=FALSE +if(sum(con^2)==0){ +FLAG=TRUE +con=con.all.pairs(J) +} +if(nrow(con)!=length(x))stop('The number of groups does not match the number of contrast coefficients.') +bvec<-array(0,c(J,2,nboot)) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +nsam=matl(lapply(x,length)) +for(j in 1:J){ +xcen<-x[[j]]-mean(x[[j]],tr) +data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row +# contains the bootstrap trimmed means, the second row +# contains the bootstrap squared standard errors. +} +m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means +m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq. se. +boot<-matrix(0,ncol(con),nboot) +for (d in 1:ncol(con)){ +top<-apply(m1,2,trimpartt,con[,d]) +# A vector of length nboot containing psi hat values +consq<-con[,d]^2 +bot<-apply(m2,2,trimpartt,consq) +boot[d,]<-abs(top)/sqrt(bot) +} +testb<-apply(boot,2,max) +ic<-floor((1-alpha)*nboot) +ic.crit=ic +testb<-sort(testb) +psihat<-matrix(0,ncol(con),4) +test<-matrix(0,ncol(con),5) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper')) +dimnames(test)<-list(NULL,c('con.num','test','se','p.value','p.adjusted')) +for (d in 1:ncol(con)){ +test[d,1]<-d +psihat[d,1]<-d +testit<-lincon(x,con[,d],tr,pr=FALSE) +test[d,2]<-testit$test[1,2] +pval<-mean((abs(testit$test[1,2])invalid) +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +}} +for(j in 1:J){ +flag=as.logical(x[[j]]<0) +if(sum(flag)>0){ +print(paste('Values less than zero were detected')) +print(paste('Variable', j, 'has one or more values=0')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +Jm<-J-1 +d<-(J^2-J)/2 +FLAG=FALSE +if(sum(con^2)==0){ +FLAG=TRUE +con=con.all.pairs(J) +} +if(nrow(con)!=length(x))stop('The number of groups does not match the number of contrast coefficients.') +bvec<-array(0,c(J,2,nboot)) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +nsam=matl(lapply(x,length)) +for(j in 1:J){ +xcen<-x[[j]]-mean(x[[j]],tr) +data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot) +bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row +# contains the bootstrap trimmed means, the second row +# contains the bootstrap squared standard errors. +} +m1<-bvec[,1,] # J by nboot matrix containing the bootstrap trimmed means +m2<-bvec[,2,] # J by nboot matrix containing the bootstrap sq. se. +boot<-matrix(0,ncol(con),nboot) +for (d in 1:ncol(con)){ +top<-apply(m1,2,trimpartt,con[,d]) +# A vector of length nboot containing psi hat values +consq<-con[,d]^2 +bot<-apply(m2,2,trimpartt,consq) +boot[d,]<-abs(top)/sqrt(bot) +} +testb<-apply(boot,2,max) +ic<-floor((1-alpha)*nboot) +ic.crit=ic +testb<-sort(testb) +psihat<-matrix(0,ncol(con),4) +test<-matrix(0,ncol(con),5) +dimnames(psihat)<-list(NULL,c('con.num','psihat','ci.lower','ci.upper')) +dimnames(test)<-list(NULL,c('con.num','test','se','p.value','p.adjusted')) +for (d in 1:ncol(con)){ +test[d,1]<-d +psihat[d,1]<-d +testit<-lincon(x,con[,d],tr,pr=FALSE) +test[d,2]<-testit$test[1,2] +pval<-mean((abs(testit$test[1,2])invalid +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +}} +for(j in 1:J){ +flag=as.logical(x[[j]]<0) +if(sum(flag)>0){ +print(paste('Values less than zero were detected')) +print(paste('Variable', j, 'has one or more values=0')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +Jm<-J-1 +d<-(J^2-J)/2 +# +CC<-(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('V1','V2','n','Mean.1','Mean.2','p.value','adj.p.value')) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(jSP2) +n[2]=sum(flag,na.rm=TRUE) +flag=(x[,1]>SP1 & x[,2]<=SP2) +n[3]=sum(flag,na.rm=TRUE) +flag=(x[,1]>SP1 & x[,2]>SP2) +n[4]=sum(flag,na.rm=TRUE) +m=matrix(n,2,2,byrow=TRUE) +dimnames(m)=list(c('V1.less','V1.greater'),c('V2.less','V2.greater')) +m +} + + + + +oph.astig.mcnemar<-function(x,method='holm',invalid=4){ +# +# Astigmatism: compare prediction formulas +# +if(is.null(dim(x)))stop('x should be a matrix or data frame') +J=ncol(x) #number of formulas +flag=abs(elimna(x))>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +istop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,9) +dimnames(output)<-list(NULL,c('D', ' Var', 'N< ' , '%<', 'Var', 'N<', '%< ', +'p.value','Adj.p.value')) +E=list() +TAB=list() +D=seq(.25,2,.25) #D intervals from .25 to 2 +for(L in 1:length(D)){ +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=mat2table(x[,c(j,k)],D[L],D[L]) +n1=sum(x[[j]]<=D[L]) +pn1=mean(x[[j]]<=D[L]) +n2=sum(x[[k]]<=D[L]) +pn2=mean(x[[k]]<=D[L]) +if(sum(is.na(a)>0))print(paste('No data for VAR',j,'VAR',k,'D=',D[L])) +if(sum(is.na(a))==0){ +mct=mcnemar.test(a) +output[ic,1]=D[L] +output[ic,2]=j +output[ic,3]=n1 +output[ic,4]=pn1 +output[ic,5]=k +output[ic,6]=n2 +output[ic,7]=pn2 +output[ic,8]=mct[[3]] +if(a[1,2]==0 &a[2,1]==0)output[ic,8]=1 +}}}} +output[,9]=p.adjust(output[,8],method=method) +E[[L]]=output +} +E +} + +skipreg<-function(x,y,outfun=outpro.depth,Regfun=ols,...){ +# +# Skipped regression: remove outliers from cbind(x,y) using a method +# that takes into account the overall structure of the data cloud. +# +# other choices for outfun: +# outpro +# outmgv +# out +# +x<-as.matrix(x) +xx<-cbind(x,y) +xx<-elimna(xx) +temp<-NA +x<-as.matrix(x) +n=nrow(x) +a=outfun(xx) +id=a$keep +x<-xx[id,1:ncol(x)] +x<-as.matrix(x) +y<-xx[id,ncol(x)+1] +b=Regfun(x,y) +list(n=a$n,n.keep=a$n.keep,coef=b$coef) +} + + +oph.astig.datasetconvexpoly<-function(m,Region=.05,plotit=FALSE,xlab='V1',ylab='V2',pch='.',reset=TRUE){ +# +# region=.05 means that the function +# determine the 1-.05=.95 +# This is done for each formula +# Assume m is a matrix or data frame having +# J columns. First two columns first formula, next two columns next formula.. +# +# So J should be an even integer +# +J=ncol(m) +N=J/2 +if(N != floor(N))stop('Should have an even number of columns') +region=list() +centers=list() +id=c(-1,0) +for(j in 1:N){ +id=id+2 +a=mulcen.region(m[,id],region=Region,plotit=FALSE,xlab=xlab,ylab=ylab) +centers[[j]]=a$center +region[[j]]=a$convex.hull.pts +n=nrow(elimna(m[id,])) +n=as.integer(n) +centers[[j]]=c(a$center,n) +names(centers[[j]])=c('V1','V2','N') +} +if(plotit){ +par(pty='s') +M=m +if(N>1)par(mfrow=c(2,2)) +id=c(-1,0) +for(j in 1:N){ +id=id+2 +m=M[,id] +m=as.matrix(m) +temp<-fdepth(m,plotit=FALSE) #Defaults to using the marginal medians +flag=(temp>=qest(temp,Region)) +xx<-m[flag,] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +plot(m[,1],m[,2],xlab=xlab,ylab=ylab,pch=pch,xlim=c(-1.5,1.5),ylim=c(-1.5,1.5),asp=1) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +if(reset)par(mfrow=c(1,1)) +} +list(centers=centers,convex.hull.pts=region) +} + + +oph.astig.indepintervals<-function(m,method='holm',invalid=4){ +# +# For column of x, compare frequencies using KMS method +# +# +# n: sample sizes +# x is a matrix or data frame with 8 rows +# +# +E=list() +ic=0 +J=ncol(m) +x=m +flag=abs(elimna(x))>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +stop() +} +id=matrix(NA,8,2) +x=matrix(NA,8,2) +INT=c(0.25,0.50, 0.75,1,1.25,1.5,1.75,2) +dimnames(id)=list(NULL,ncol=c('S1','S2')) +for (j in 1:J){ + for (k in 1:J){ + if (j < k){ + ic=ic+1 +id[,1]=rep(j,8) +id[,2]=rep(k,8) +# Next determine frequencies +S1=elimna(m[,j]) +S2=elimna(m[,k]) +n1=length(S1) +n2=length(S2) +for(L in 1:8){ +x[L,1]=sum(S1<=INT[L]) +x[L,2]=sum(S2<=INT[L]) +} +a=srg1.vs.2(c(n1,n2),x) +Adj.p.value=p.adjust(a[,3],method=method) +E[[ic]]=cbind(id,a,Adj.p.value) + }}} +E +} + +srg1.vs.2<-function(n,m,alpha=.05){ +# +# Goal: Compare proportions for each of 8 intervals. +# +# m has two columns. +# Col1: frequencies for first person +# Col2: frequencies for second person +# n: indicates the totals (number of cases) for each +# So n=c(100,120) would indicate 100 surgeries for the first +# and 120 for the second. + +INT=c( +'<= 0.25', +'<= 0.50', +'<= 0.75', +'<= 1.00', +'<= 1.25', +'<= 1.50', +'<= 1.75', +'<= 2.00') +output<-matrix(NA,ncol=3,nrow=8) +n1=n[1] +n2=n[2] +for(j in 1:8){ +r1=m[j,1] +r2=m[j,2] +a=binom2g(r1,n1,r2,n2,alpha=alpha) +output[j,]=c(a$p1,a$p2,a$p.value) +} +dimnames(output)=list(INT,c('p1','p2','p-value')) +output +} + +ABES.KS<-function(J,K,x,tr=0.2){ +# +# Effect size for Factor A, ignoring B and +# Factor B, ignoring A + # + # A robust heteroscedastic analog of Cohen's d is used + # + if(is.data.frame(x))x=as.matrix(x) + if(is.matrix(x))x=listm(x) + JK=J*K + mat=matrix(c(1:JK),nrow=J,byrow=TRUE) + A=list() + for(j in 1:J){ + id=mat[j,] + z=pool.a.list(x[id]) + A[[j]]=z + } + + B=list() + for(k in 1:K){ + id=mat[,k] + z=pool.a.list(x[id]) + B[[k]]=z + } + E1=KS.ANOVA.ES(A,tr=tr) + E2=KS.ANOVA.ES(B,tr=tr) +list(A.Effect.Size=E1,B.Effect.Size=E2) +} + +rmdif.scores<-function(x){ +# +# Compute all pairwise difference scores +# + + if(!is.matrix(x) & !is.data.frame(x))stop('x should be matrix or data frame') + x=elimna(x) + n=nrow(x) + J=ncol(x) + ALL=(J^2-J)/2 + M=matrix(NA,nrow=n,ncol=ALL) + ic=0 + for(j in 1:J){ + for(k in 1:J){ + if(j0 & ci[1]<0)ci=0.001 +#if(DIF<0 & ci[2]>0)ci[2]=0-0.001 +#} +#Eif(pv>=alpha){ +#if(DIF>0 & ci[1]>0)ci[1]=0-0.001 +#if(DIF<0 & ci[2]<0)ci[2]=-0.001 +#} +list(n1=n1,n2=n2,Est1=E1,Est2=E2,p.value=pv) +} + +bwESP.GLOB.B.NULL<-function(n1,n2,K,tr=.2,MM=FALSE,SEED=TRUE,iter=1000,g=0.,rho=0){ +if(SEED)set.seed(2) +ND=NA +for(i in 1:iter){ +M1=rmul(n1,p=K,g=g,rho=rho) +M2=rmul(n2,p=K,g=g,rho=rho) +ND[i]=rmESPRO.est(M1,MM=MM)-rmESPRO.est(M2,MM=MM) +} +ND +} + + + + rmESPRO.est<-function(x,est=tmean,MM=FALSE,...){ + # + # Estimate projection measure of effect size + # + if(is.list(x))x=matl(x) + x=elimna(x) + n=nrow(x) + E=apply(x,2,est,...) + GM=mean(E) + J=ncol(x) + GMvec=rep(GM,J) + GMvec=rep(GM,J) + DN=pdis(x,E,center=GMvec,MM=MM) + DN +} + +smean.depth<-function(m){ +# +# Skipped estimator based on projection for removing outliers. +# Uses random projections +# +m=elimna(m) +id=outpro.depth(m)$keep +val=apply(m[id,],2,mean) +val +} + +oph.astig.depbivmeans<-function(m,alpha=.05,nboot=1999,SEED=TRUE,tr=0){ +# +# This function is designed to compare two bivariate distributions relevant to +# prediction errors when dealing with astigmatism. +# +# Assume m is a matrix or data frame having +# J columns. First two columns first formula, next two columns next formula.. +# +# So J should be an even integer +# +# Compare col 1-2 to 3-4, then 1-2 vs 5-6, etc +# using difference scores. That is, col1 and 3, use difference scores, col 2 and 4, then col 1 and 5, etc. +# +# returns confidence interval for pairwise difference scores. +# alpha = .05 = .95 confidence intervals +# +# Estimates are adjusted if outliers are found based on a projection method. +# +nullv=rep(0,2) +J=ncol(m) +N=J/2 +J1=J-1 +chk.n=names(m) +MAT=matrix(NA,nrow=2,ncol=4) +dimnames(MAT)=list(NULL,c('Mean 1','Mean 2','p.value','p.adjusted')) +if(N != floor(N))stop('Should have an even number of columns') +results=list() +mat=matrix(NA,nrow=N,ncol=2,byrow=TRUE) +v1=seq(1,J1,2) +mat[,1]=v1 +mat[,2]=v1+1 +ic=0 +for(j in 1:N){ +for(k in 1:N){ +if(j1)){ +d=jitter(m[id,j])/100 +m[id,j]=d +}} +if(plotit){ +plot.new() +if(N>1)par(mfrow=c(2,2)) +} +region=list() +centers=list() +val=list() +pv=list() +CENTERS=list() +id=c(-1,0) +for(j in 1:N){ +id=id+2 +a=smeancr.cord.oph(m[,id],SEED=SEED,plotit=FALSE,xlab=xlab,ylab=ylab,nboot=nboot) +centers[[j]]=a$center +region[[j]]=a$conf.region.points +val[[j]]=a$boot.vals +centers[[j]]=a$center +n=nrow(elimna(m[,id])) +n=as.integer(n) +CENTERS[[j]]=c(a$center,n) +names(CENTERS[[j]])=c('V1','V2','N') +pv[[j]]=a$p.value +} +VAL=val +#if(N>1)par(mfrow=c(2,2)) +id=c(-1,0) +for(j in 1:N){ +id=id+2 +n=nrow(m[,id]) +crit.level<-.05 +if(n<=120)crit.level<-.045 +if(n<=80)crit.level<-.04 +if(n<=60)crit.level<-.035 +if(n<=40)crit.level<-.03 +if(n<=30)crit.level<-.025 +if(n<=20)crit.level<-.02 +ic<-round((1-crit.level)*nboot) +val=VAL[[j]] +est=centers[[j]] +temp3<-est +ic<-round((1-crit.level)*nboot) +if(!MC)temp<-pdis(val,center=est) +if(MC)temp<-pdisMC(val,center=est) +temp.dis<-order(temp) +xx<-val[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +region[[j]]=xx[temp,] +if(plotit){ +#if(N>1)par(mfrow=c(2,2)) +plot(val[,1],val[,2],xlab=xlab,ylab=ylab) +points(temp3[1],temp3[2],pch="+") +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +} +par(mfrow=c(1,1)) +list(centers=CENTERS,conf.region.points=region,p.values=pv) +} + + +smeancr.cord.oph<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE, +nboot=500,plotit=TRUE,MC=FALSE,xlab="VAR 1",ylab="VAR 2",STAND=TRUE){ +# +# m is an n by p matrix +# +# Test hypothesis that multivariate skipped estimators +# are all equal to the null value, which defaults to zero. +# The level of the test is .05. +# +# Eliminate outliers using a projection method +# That is, determine center of data using: +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 MVE +# +# For each point +# consider the line between it and the center +# project all points onto this line, and +# check for outliers using +# +# MM=F, a boxplot rule. +# MM=T, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# Eliminate any outliers and compute means +# using remaining data. +# +if(SEED)set.seed(2) +m<-elimna(m) +n<-nrow(m) +est=smean(m,MC=MC,cop=cop,STAND=STAND) +crit.level<-.05 +if(n<=120)crit.level<-.045 +if(n<=80)crit.level<-.04 +if(n<=60)crit.level<-.035 +if(n<=40)crit.level<-.03 +if(n<=30)crit.level<-.025 +if(n<=20)crit.level<-.02 +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +val<-matrix(NA,ncol=ncol(m),nrow=nboot) +for(j in 1: nboot){ +mm<-m[data[j,],] +val[j,]<-smean(mm,MC=MC,cop=cop,STAND=STAND) +} +if(!MC)temp<-pdis(rbind(val,nullv),center=est) +if(MC)temp<-pdisMC(rbind(val,nullv),center=est) +sig.level<-sum(temp[nboot+1]1)par(mfrow=c(2,2)) +} +region=list() +centers=list() +val=list() +pv=list() +CENTERS=list() +id=c(-1,0) +for(j in 1:N){ +id=id+2 +a=meancr.cord.oph(m[,id],SEED=SEED,plotit=FALSE,xlab=xlab,ylab=ylab,nboot=nboot) +centers[[j]]=a$center +region[[j]]=a$conf.region.points +val[[j]]=a$boot.vals +centers[[j]]=a$center +n=nrow(elimna(m[,id])) +n=as.integer(n) +CENTERS[[j]]=c(a$center,n) +names(CENTERS[[j]])=c('V1','V2','N') +pv[[j]]=a$p.value +} +VAL=val +plot.new() +if(N>1)par(mfrow=c(2,2)) +id=c(-1,0) +for(j in 1:N){ +id=id+2 +n=nrow(m[,id]) +crit.level<-.05 +if(n<=120)crit.level<-.045 +if(n<=80)crit.level<-.04 +if(n<=60)crit.level<-.035 +if(n<=40)crit.level<-.03 +if(n<=30)crit.level<-.025 +if(n<=20)crit.level<-.02 +ic<-round((1-crit.level)*nboot) +val=VAL[[j]] +est=centers[[j]] +temp3<-est +ic<-round((1-crit.level)*nboot) +if(!MC)temp<-pdis(val,center=est) +if(MC)temp<-pdisMC(val,center=est) +temp.dis<-order(temp) +xx<-val[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +region[[j]]=xx[temp,] +if(plotit){ +plot(val[,1],val[,2],xlab=xlab,ylab=ylab) +points(temp3[1],temp3[2],pch="+") +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +}} +par(mfrow=c(1,1)) +list(centers=CENTERS,conf.region.points=region,p.values=pv) +} + + +meancr.cord.oph<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE,tr=0, +nboot=500,plotit=TRUE,MC=FALSE,xlab="VAR 1",ylab="VAR 2",STAND=TRUE){ +# +# m is an n by p matrix +# +# Test hypothesis that the means +# are all equal to the null value, which defaults to zero. +# The level of the test is .05. +# +# Eliminate outliers using a projection method +# That is, determine center of data using: +# +# cop=1 Donoho-Gasko median, +# cop=2 MCD, +# cop=3 marginal medians. +# cop=4 MVE +# +# For each point +# consider the line between it and the center +# project all points onto this line, and +# check for outliers using +# +# MM=F, a boxplot rule. +# MM=T, rule based on MAD and median +# +# Repeat this for all points. A point is declared +# an outlier if for any projection it is an outlier +# using a modification of the usual boxplot rule. +# +# Eliminate any outliers and compute means +# using remaining data. +# +if(SEED)set.seed(2) +m<-elimna(m) +n<-nrow(m) +#est=smean(m,MC=MC,cop=cop,STAND=STAND) +est=apply(m,2,mean,tr=tr) +crit.level<-.05 +if(n<=120)crit.level<-.045 +if(n<=80)crit.level<-.04 +if(n<=60)crit.level<-.035 +if(n<=40)crit.level<-.03 +if(n<=30)crit.level<-.025 +if(n<=20)crit.level<-.02 +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +val<-matrix(NA,ncol=ncol(m),nrow=nboot) +for(j in 1: nboot){ +mm<-m[data[j,],] +#val[j,]<-smean(mm,MC=MC,cop=cop,STAND=STAND) +val[j,]=apply(mm,2,mean) +} +if(!MC)temp<-pdis(rbind(val,nullv),center=est) +if(MC)temp<-pdisMC(rbind(val,nullv),center=est) +sig.level<-sum(temp[nboot+1]=v[j] & x[,xcol]=1)e.pow<-corfun(yhat,y)$cor^2 +stre=sqrt(e.pow) +} +list(coef=coef,residuals=residuals,Strength.Assoc=stre,Explanatory.Power=e.pow) +} + +rotate.points<-function(x,y,deg=NULL,rad=NULL){ +# +# Rotate points deg degrees or rad radians +# +if(!is.null(deg))rad=degrees.2.radians(deg) +if(is.null(rad))stop(' deg or rad need to be indicated') +xp=x*cos(rad)-y*sin(rad) +yp=y*cos(rad)+x*sin(rad) +d=cbind(xp,yp) +d +} + +degrees.2.radians<-function(d)d*pi/180 + +radians.2.degrees<-function(rad)rad*180/pi + +skip.gen.cor<-function(m,y=NULL,outfun=outpro.depth,...){ +# +# Eliminate outliers using +# outfun +# estimate correlation using remaining data +# +m=cbind(m,y) +id=outfun(m,...)$keep +if(ncol(m)==2)val=cor(m[id,])[1,2] +else val=cor(m[id,]) +val +} + +oph.astig.Dataset.Means.ConfEllipses<-function(m,plotit=TRUE,alpha=.05,reset=FALSE,POLY=FALSE, +xlab='X',ylab='Y',pch='.'){ +# +# See Hotelling_Bivariate_Transformation_Rand_10Jul21.docx in rfun +# +# +n=nrow(m) +J=ncol(m) +N=J/2 +if(N>1)par(mfrow=c(2,2)) +if(N != floor(N))stop('Should have an even number of columns') +results=list() +MAT=matrix(NA,nrow=N,ncol=10) +dimnames(MAT)=list(NULL,c('N','SW.px','SW.py','Mean.x','Mean.y','sd`x','sd`y','T','Cor','Ro.Ang.Deg')) +id=c(-1,0) +for(j in 1:N){ +id=id+2 +d=m[,id] +ntest1=round(shapiro.test(d[,1])$p.value,4) +ntest2=round(shapiro.test(d[,2])$p.value,4) +M=apply(d,2,mean) +M=round(M,4) +sd=apply(d,2,sd) +sd=round(sd,4) +Tsq=2*(n-1)*qf(1-alpha,2,n-2)/(n-2) +Tv=sqrt(Tsq) +P.cor=cor(d[,1],d[,2]) +P.cor=round(P.cor,4) +term=(2*P.cor*sd[1]*sd[2])/(var(d[,1])-var(d[,2])) +two.phi.rad=.5*atan(term) +two.phi.degrees=radians.2.degrees(two.phi.rad) # this actually phi, 2*phi is used when plotting +two.phi.degrees=round(two.phi.degrees,4) +if(sd[1]=int1[2] +flag3=m[,col[2]]<=int2[1] +flag4=m[,col[2]]>=int2[2] +} +if(!INC){ +flag1=m[,col[1]]int1[2] +flag3=m[,col[2]]int2[2] + +} +flag=as.logical(flag1*flag2*flag3*flag4) +m[flag,] +} + +harmonic.mean<-function(x)1/mean(1/x) + +rplotv2<- +function(x,y,est=tmean,scat=TRUE,fr=NA,plotit=TRUE,pyhat=FALSE,efr=.5,pch1='*',pch2='.', +theta=50,phi=25,scale=TRUE,expand=.5,SEED=TRUE,varfun=pbvar,outfun=outpro, +nmin=0,xout=FALSE,out=FALSE,eout=FALSE,xlab='X',ylab='Y',zscale=FALSE, +zlab=' ',pr=TRUE,duplicate='error',ticktype='simple',LP=TRUE,OLD=FALSE,pch='.',prm=TRUE,...){ +# +# Like rplot but can handle one or two binary independent variables, +# at least one non-binary independent variable is required. +# +# duplicate='error' +# In some situations where duplicate values occur, when plotting with +# two predictors, it is necessary to set duplicate='strip' +# +# LP=TRUE, the plot of the smooth is further smoothed via lplot (lowess) +# To get a plot as done with old version set +# LP=FALSE +# +# zscale=TRUE will standardize the dependent variable when plotting with 2 independent variables. +# +# efr is the span when computing explanatory strength of association +# +# cf qplot in the R package ggplot2 +# +if(pr){ +if(!xout)print('Suggest also looking at result using xout=TRUE') +} +x<-as.matrix(x) +p=ncol(x) +xx<-cbind(x,y) +xx<-elimna(xx) +n=nrow(xx) +if(eout){ +flag=outfun(xx,plotit=FALSE,...)$keep +xx=xx[flag,] +} +if(xout){ +flag=outfun(xx[,1:p],plotit=FALSE,...)$keep +xx=xx[flag,] +} +n.keep=nrow(xx) +x<-xx[,1:p] +x<-as.matrix(x) +p1=ncol(x)+1 +y<-xx[,p1] +if(ncol(x)==1){ +if(is.na(fr))fr<-.8 +val<-rungen(x,y,est=est,scat=scat,fr=fr,plotit=plotit,pyhat=TRUE, +xlab=xlab,ylab=ylab,LP=LP,pch=pch,...) +val2<-rungen(x,y,est=est,fr=efr,plotit=FALSE,pyhat=TRUE,LP=FALSE,...)$output +val<-val$output +} +if(ncol(x)>1){ +xvals=list() +id=chk4binary(x) +Lid=length(id) +if(Lid==ncol(x))stop('All independent variables are binary, a smoother is inappropriate') +if(Lid>2)stop('Can have a most two binary independent variables') +val=list() +if(Lid==1){ +xval=list() +yhat=list() +if(is.na(fr))fr=.8 +irow0=which(x[,id]==0) +val[[1]]=rplot(x[irow0,-id],y[irow0],pyhat=TRUE,plotit=FALSE,est=est,xlab=xlab,ylab=ylab,pr=FALSE)$yhat +irow1=which(x[,id]==1) +#print(x[irow1,-id]) +val[[2]]=rplot(x[irow1,-id],y[irow1],pyhat=TRUE,plotit=FALSE,est=est,xlab=xlab,ylab=ylab,pr=FALSE)$yhat +rplot2g(x[irow0,-id],y[irow0],x[irow1,-id],y[irow1],est=est,xlab=xlab,ylab=ylab,fr=fr,pch1=pch1,pch2=pch2) #,xout=xout,SEED=SEED) +xvals[[1]]=x[irow0,-id] +xvals[[2]]=x[irow1,-id] +} +if(Lid==2){ +if(ncol(x)>3)stop(' With two binary IVs, current version limited to a third continuous IV') +xval=NULL +yhat=NULL +xval=list() +yhat=list() +xy=cbind(x[,id],x[,-id],y) +v=bin2binary.IV(xy) +val=rplot4g(v,est=est,xlab=xlab,ylab=ylab,pyhat=pyhat) +if(pyhat){ +xvals=list() +val=list() +for(j in 1:4)xvals[[j]]=v[[j]][,3] +for(j in 1:4)val[[j]]=v[[j]][,4] +} +} + +if(Lid==0){ +if(pr && !OLD){ +print('A new estimate of the strength of the association is used by default.') +print(' To get the old estimate, set OLD=TRUE') +} +if(ncol(x)==2 && !scale){ +if(pr){print('scale=FALSE is specified.') +print('If there is dependence, might want to use scale=T') +}} +if(is.na(fr))fr<-1 +val<-rung3d(x,y,est=est,fr=fr,plotit=plotit,pyhat=TRUE,SEED=SEED,nmin=nmin,LP=LP, +scale=scale,phi=phi,theta=theta,expand=expand,zscale=zscale,pr=FALSE, +duplicate='error',xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,...) +E.power=NULL +if(OLD){ +E.power=varfun(val)/varfun(y) +names(E.power)='' +if(E.power>1)E.power=.99 +} +if(!OLD)E.power=smRstr(x,y,fr=fr)$str^2 +stra=sqrt(E.power) +# Best correction at the moment. Not sure when or if needed. +# Maybe a correlation option is better, but need to check this. +xvals=x +if(ncol(x)==1)xvals=sort(xvals) +if(!pyhat){ +val <- NULL +xvals=NULL +} +if(!prm){ +stra=NULL +E.power=NULL +val=NULL +}}} +list(n=n,n.keep=n.keep,xvals=xvals,yhat = val) +} + +regIQR<-function(x,y,xr=x,regfun=Qreg,xout=FALSE,outfun=outpro,...){ +# +# +IQR=regYhat(x,y,xr=xr,regfun=regfun,q=.75)-regYhat(x,y,xr=xr,regfun=regfun,q=.25) +IQR +} + +ESfun.CI<-function(x,y,QSfun=median,method=c('KMS','EP','QS','QStr','AKP','WMW'),tr=.2,pr=TRUE,alpha=.05, +nboot=2000,SEED=TRUE){ +type=match.arg(method) +switch(type, + KMS=KMS.ci(x,y,alpha=alpha,nboot=nboot,SEED=SEED), + EP=EPci(x,y,tr=tr,alpha=alpha,SEED=SEED,nboot=nboot), + QS=shiftPBci(x,y,locfun=QSfun,alpha=alpha,nboot=nboot,SEED=SEED), + QStr=shiftPBci(x,y,locfun=tmean,alpha=alpha,nboot=nboot,SEED=SEED), + AKP=akp.effect.ci(x,y,tr=tr,alpha=alpha,nboot=nboot,SEED=SEED), + WMW=cidv2(x,y)) +} + + +anclin.QS<-function(x1,y1,x2,y2,pts=NULL,xout=FALSE,ALL=FALSE,npts=10,outfun=outpro,REQMIN=.001,...){ +# +# x1, y1 is the control group +# x2 y2 is the experimental group +# +# For Exp group, estimate the median of Y given the x values stored in +# pts +# pts=NULL: If ALL=TRUE, 20 points are chosen by this function +# otherwise three points are used. +# +# The QS effect size is the conditional quantile of the control group corresponding +# to the median of Y, given x, for the experimental group. +# +# +xy=elimna(cbind(x1,y1)) +x1<-as.matrix(x1) +p=ncol(x1) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x1<-xy[,1:p] +y1<-xy[,p1] +x1<-as.matrix(x1) +xy=elimna(cbind(x2,y2)) +x2<-as.matrix(x2) +p=ncol(x2) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x2<-xy[,1:p] +y2<-xy[,p1] +x2<-as.matrix(x2) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1=length(y1) +n2=length(y2) +n=min(c(n1,n2)) +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +n1=nrow(m) +x1<-m[,1:p] +y1<-m[,p1] +x1=as.matrix(x1) +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +n2=nrow(m) +n=min(c(n1,n2)) +x2<-m[,1:p] +y2<-m[,p1] +x2=as.matrix(x2) +} +if(!is.null(pts))npts=length(pts) +if(is.null(pts)){ +xall=unique(c(x1,x2)) +if(ALL)pts=xall +if(!ALL){ +L1=qest(x1,.2) +L2=qest(x2,.2) +U1=qest(x1,.8) +U2=qest(x2,.8) +L=max(L1,L2) +U=min(U1,U2) +if(ALL)pts=seq(L,U,length.out=npts) +else{pts=c(L,(L+U)/2,U) +npts=3 +} +}} +e=reg.pred(x2,y2,xr=pts,regfun=Qreg,q=.5,xout=FALSE) +qs=NA +for(i in 1:npts){ +qs[i]=qinvreg(x1,y1,pts[i],e[i],REQMIN=REQMIN) +} +M=cbind(pts,e,qs) +dimnames(M)=list(NULL,c('Pts','Y.hat4ExpGrp','QS.Effect.Size')) +M +} + +qinvreg<-function(x,y,pt,v,REQMIN=.001){ +# +# Find q such that for Qreg Y hat equals v +# +xy=cbind(x,y) +a=nelderv2(xy,1,qinvreg.sub,START=.5,pt=pt,v=v,REQMIN=REQMIN) +# note: using optim, even with BFGS method, can result in highly inaccurate values +a +} +qinvreg.sub<-function(xy,q,pt,v){ +e=reg.pred(xy[,1],xy[,2],xr=pt,regfun=Qreg,q=q,xout=FALSE) +a=abs(e-v) +a +} + +anclin.QS.CIpb<-function(x1,y1,x2,y2,alpha=.05,pts=NULL,xout=FALSE,ALL=FALSE,npts=10,outfun=outpro,nboot=200, +MC=TRUE,REQMIN=.01,SEED=TRUE,...){ +# +# ANCOVA +# +# Compute a confidence interval for the conditional quantile shift +# measure of effect size +# x1, y1 is the control group +# x2 y2 is the experimental group +# +# for Exp group, estimate the median of Y given x for values stored in +# pts +# pts=NULL: If ALL=TRUE, 20 points are chosen by this function +# otherwise three points are used to reduce execution time. +# +# The QS effect size is the conditional quantile of the control group corresponding +# to the median of Y for the experimental group. +# +# +if(SEED)set.seed(2) +xy=elimna(cbind(x1,y1)) +x1=as.matrix(x1) +p=ncol(x1) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x1<-xy[,1:p] +y1<-xy[,p1] +x1<-as.matrix(x1) +xy=elimna(cbind(x2,y2)) +x2<-as.matrix(x2) +p=ncol(x2) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x2<-xy[,1:p] +y2<-xy[,p1] +x2<-as.matrix(x2) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1=length(y1) +n2=length(y2) +n=min(c(n1,n2)) +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE,...)$keep +m<-m[flag,] +n1=nrow(m) +x1<-m[,1:p] +y1<-m[,p1] +x1=as.matrix(x1) +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE,...)$keep +m<-m[flag,] +n2=nrow(m) +n=min(c(n1,n2)) +x2<-m[,1:p] +y2<-m[,p1] +x2=as.matrix(x2) +} +if(!is.null(pts))npts=length(pts) +if(is.null(pts)){ +xall=unique(c(x1,x2)) +if(ALL)pts=xall +if(!ALL){ +L1=qest(x1,.2) +L2=qest(x2,.2) +U1=qest(x1,.8) +U2=qest(x2,.8) +L=max(L1,L2) +U=min(U1,U2) +if(ALL)pts=seq(L,U,length.out=npts) +else{pts=c(L,(L+U)/2,U) +npts=3 +} +}} +v=NA +m=list() +for(i in 1:nboot){ +id1=sample(n1,replace=TRUE) +id2=sample(n2,replace=TRUE) +m[[i]]=list(x1[id1],y1[id1],x2[id2],y2[id2]) +} +if(!MC)v=lapply(m,anclinQS.sub,pts=pts,npts=npts,...) +if(MC){ +library(parallel) +v=mclapply(m,anclinQS.sub,pts=pts,npts=npts,...) +} +v=matl(v) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +nv=nrow(v) +CI=matrix(NA,nv,2) +pv=NA +for(i in 1:nv){ +pv[i]=mean(v[i,]<0.5) +pv[i]=2*min(pv[i],1-pv[i]) +sv=sort(v[i,]) +CI[i,1]=sv[ilow] +CI[i,2]=sv[ihi] +} +output=matrix(NA,nrow=nv,ncol=4) +output[,1]=pts +output[,2]=pv +output[,3:4]=CI +e=anclin.QS(x1,y1,x2,y2,pts=pts) +e=as.matrix(e[,2:3]) +if(nv==1)e=t(e) +output=cbind(output,e) +dimnames(output)=list(NULL,c('X','p.value','ci.low','ci.hi','Median.ExpGrp','QS.effect')) +output +} +anclinQS.sub<-function(m,pts,npts=npts,...){ +v=anclin.QS(m[[1]],m[[2]],m[[3]],m[[4]],pts=pts,npts=npts,...)[,3] +v +} + +anclinQS.plot<-function(x1,y1,x2,y2,pts=NULL,q=0.1,xout=FALSE,ALL=TRUE,npts=10,line=TRUE, +xlab='X',ylab='QS.Effect',outfun=outpro,REQMIN=.001,...){ +# +# x1, y1 is the control group +# x2 y2 is the experimental group +# +# For Exp group, estimate the median of Y given the x values stored in +# pts +# pts=NULL: If ALL=TRUE, 20 points are chosen by this function +# otherwise three points are used. +# +# The QS effect size is the conditional quantile of the control group corresponding +# to the median of Y, given x, for the experimental group. +# The function plots estimates of the QS effect size for the points in pts +# +# +xy=elimna(cbind(x1,y1)) +x1<-as.matrix(x1) +p=ncol(x1) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x1<-xy[,1:p] +y1<-xy[,p1] +x1<-as.matrix(x1) +xy=elimna(cbind(x2,y2)) +x2<-as.matrix(x2) +p=ncol(x2) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x2<-xy[,1:p] +y2<-xy[,p1] +x2<-as.matrix(x2) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1=length(y1) +n2=length(y2) +n=min(c(n1,n2)) +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE,...)$keep +m<-m[flag,] +n1=nrow(m) +x1<-m[,1:p] +y1<-m[,p1] +x1=as.matrix(x1) +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE,...)$keep +m<-m[flag,] +n2=nrow(m) +n=min(c(n1,n2)) +x2<-m[,1:p] +y2<-m[,p1] +x2=as.matrix(x2) +} +if(!is.null(pts))npts=length(pts) +if(is.null(pts)){ +if(q<=0 || q>=1)stop('Argument q must be greater than 0 and less than 1') +qu=1-q +L1=qest(x1,q) +L2=qest(x2,q) +U1=qest(x1,qu) +U2=qest(x2,qu) +L=max(L1,L2) +U=min(U1,U2) +if(ALL)pts=seq(L,U,length.out=npts) +else{pts=c(L,(L+U)/2,U) +npts=3 +}} +e=reg.pred(x2,y2,xr=pts,regfun=Qreg,q=.5,xout=FALSE) +qs=NA +for(i in 1:npts){ +qs[i]=qinvreg(x1,y1,pts[i],e[i],REQMIN=REQMIN) +} +M=cbind(pts,e,qs) +if(line){ +plot(pts,qs,xlab=xlab,ylab=ylab,ylim=c(0,1),type='n') +lines(pts,qs) +} +else +plot(pts,qs,xlab=xlab,ylab=ylab,ylim=c(0,1)) +dimnames(M)=list(NULL,c('Pts','Y.hat4ExpGrp','QS.Effect.Size')) +M +} + +ES.summary.sub<-function(x,n1,n2){ +id1=c(1:n1) +n1p=n1+1 +N=n1+n2 +id2=c(n1p:N) +a=ES.summary.CI(x[id1],x[id2],SEED=F)[,8] +} + +ksnorm.test<-function(z)ks.test(z,'pnorm',mean=mean(z),sd=sd(z)) #KS test for normality + +reg.reglev<-function(x,y,plotit=TRUE,xlab='X',ylab='Y',GEN=TRUE,regfun=tsreg,outfun=outpro,pr=TRUE,...){ + +# +# Remove any bad leverage points detected by +# the fit using the estimator indicated by regun +# +# GEN=TRUE: use a generalization of the Rousseeuw van Zomeren method +# GEN=FALSE: usw the Rousseeuw van Zomeren method. Unknown when if ever this older approach +# offers an advantage. +# +xy=elimna(cbind(x,y)) +n=nrow(xy) +x=as.matrix(x) +p=ncol(x) +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +x<-as.matrix(x) +keep=c(1:n) +if(!GEN)a=reglev(x,y,plotit=FALSE,SEED=FALSE)$bad.lev.points +else a=reglev.gen(x,y,plotit=FALSE,regfun=regfun,outfun=outfun)$bad.lev +if(length(a)>0)keep=keep[-a] +nk=length(y[keep]) +e=regfun(x[keep,],y[keep],...) +list(n=n,n.keep=nk,coef=e$coef) +} + + +oph.astig.depbivtotvars<-function(m,alpha=.05){ +# +# This function is designed to compare two variances dependent variables based +# prediction errors when dealing with astigmatism. +# +# Assume m is a matrix or data frame having +# J columns. First two columns first formula, next two columns next formula.. +# +# So J should be an even integer +# +# Compare col 1-2 to 3-4, then 1-2 vs 5-6, etc +# +# returns confidence interval for pairwise difference scores. So using difference scores for 1 and 3 as well as 2 and 4 +# alpha = .05 = .95 confidence intervals +# +# Estimates are adjusted if outliers are found based on a projection method. +# +J=ncol(m) +nv=NA +for(j in 1:J)nv[j]=length(elimna(m[,j])) +N=J/2 +J1=J-1 +F=NULL +for(j in 1:N)F=c(F,paste('F',j)) +chk.n=names(m) +MAT=matrix(NA,nrow=N,ncol=6) +dimnames(MAT)=list(NULL,c('Form', 'Form','Tot Var 1','Tot Var 2','Ratio','p.adjusted')) +MAT=as.data.frame(MAT) +if(N != floor(N))stop('Should have an even number of columns') +results=list() +results.total=list() +mat=matrix(NA,nrow=N,ncol=2,byrow=TRUE) +v1=seq(1,J1,2) +mat[,1]=v1 +mat[,2]=v1+1 +ic=0 +for(j in 1:N){ +for(k in 1:N){ +if(j1){ +if(p!=ncol(pts))stop('pts should be a matrix with',paste(p),'columns') +} + +x2<-as.matrix(x2) +if(p!=ncol(x2))stop('Number of col. for x1 is not equal to the number of col. for x2') +xy<-cbind(x2,y2) +xy<-elimna(xy) +x2<-xy[,1:p] +y2<-xy[,p1] + +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE,...)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE,...)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} + +e=NA +for(i in 1:nrow(pts)){ +d1=reg.con.dist(x1,y1,pts=pts[i,]) +d2=reg.con.dist(x2,y2,pts=pts[i,]) +p=NA +for(j in 1:99)p[j]=mean(d1[j]<=d2) +e[i]=mean(p) +} +e +} + + +outmc<-function(x,plotit=FALSE){ +# +# Detect outliers using a modification of Carling's method +# that takes into account skewness +# +x=elimna(x) +temp<-idealf(x) +gval<-(17.63*n-23.64)/(7.74*n-3.71) +M=median(x) +cl=M-gval*2*(M-temp$ql) +cu=M+gval*2*(temp$qu-M) +n=length(x) +flag<-NA +outid<-NA +vec<-c(1:n) +for(i in 1:n){ +flag[i]<-(x[i]< cl || x[i]> cu) +} +if(sum(flag)==0)outid<-NULL +if(sum(flag)>0)outid<-vec[flag] +keep<-vec[!flag] +outval<-x[flag] +n.out=sum(length(outid)) +list(out.val=outval,out.id=outid,keep=keep,n=n,n.out=n.out,cl=cl,cu=cu) +} + +reglev.gen<-function(x,y,regfun=tsreg,outfun=outpro.depth,regout=outpro,crit=sqrt(qchisq(.975,1)), +plotit=TRUE,xlab='X',ylab='Y',outplot=FALSE,DIS=FALSE,...){ +# +# Search for good and bad leverage points using the regression method +# indicated by +# regfun +# +# This is a more general version of reglev. +# Here, can specify the regression estimator and outlier detection method. +# +#. plotit=TRUE. Point marked o are bad leverage points +# +# When x is univariate and has a skewed distribution, suggest using outfun=outmc +# +# x is an n by p matrix +# +# Strategy: first determine whether there are any leverage points +# If yes, remove them and estimate the slopes and intercept +# Based on this fit, compute residuals using all of the data. +# Check for outliers among the residuals using MAD-median rule +# Bad leverage point is a leverage points for which the residual is an outlier. +# +# VALUE: +# keep indicates which points are not bad leverage points. +# +# if DIS=TRUE, distances used to determine leverage points are returned. +# +xy=elimna(cbind(x,y)) +nkeep=c(1:nrow(xy)) +x=as.matrix(x) +p=ncol(x) +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +x<-as.matrix(x) +d=outfun(x,plotit=outplot,...) +iout=d$out.id #leverage points +glp=iout +nlp=length(iout) +keep=d$keep +est=regfun(x[keep,],y[keep])$coef +yhat=est[1]+x%*%est[2:p1] +res=y-yhat +dis=abs(res-median(res))/mad(res) +chk<-ifelse(dis>crit,1,0) #residuals outliers +vec<-c(1:nrow(x)) +outid=resid=vec[chk==1] # id which are residuals outliers +keep<-vec[chk==0] +both=c(iout,outid) +blp=duplicated(both) +if(sum(!blp)>0)blp=unique(both[blp]) +else blp=NULL +if(length(blp)>0){ +flag=NULL +for(k in 1:length(blp)){ +flag=c(flag,which(iout==blp[k])) +} +glp=iout[-flag] +} +if(!is.null(blp))regout.n=length(blp) +nkeep=c(1:length(y)) +if(length(blp)>0)nkeep=vec[-blp] +if(ncol(xy)==2){ +if(plotit){ +plot(x,y,xlab=xlab,ylab=ylab,type='n') +points(x[keep],y[keep],pch='.') +points(x[glp],y[glp],pch='*') +points(x[blp],y[blp],pch='o') +}} +list(n.lev=d$n.out,lev.pts=iout,good.lev=glp,bad.lev=blp,res.out.id=resid,keep=nkeep) +} + + +outblp=reglev.gen + +B.outbox<-function(x,mbox=FALSE,gval=NA,plotit=FALSE,STAND=FALSE){ +# +# Uses the method derived by +# Walker, M. L., Dovoedo, Y. H., Chakraborti, S. \& Hilton, C. W. (2018). +# An Improved Boxplot for Univariate Data. {\em American Statistician, 72}, 348--353. +# +# +x<-x[!is.na(x)] # Remove missing values +if(plotit)boxplot(x) +n<-length(x) +temp<-idealf(x) +M=median(x) +Bc=(temp$qu+temp$ql-2*M)/(temp$qu-temp$ql) +if(is.na(gval))gval<-1.5 +cl<-temp$ql-gval*(temp$qu-temp$ql)*((1-Bc)/(1+Bc)) +cu<-temp$qu+gval*(temp$qu-temp$ql)*((1+Bc)/(1-Bc)) +flag<-NA +outid<-NA +vec<-c(1:n) +for(i in 1:n){ +flag[i]<-(x[i]< cl || x[i]> cu) +} +if(sum(flag)==0)outid<-NULL +if(sum(flag)>0)outid<-vec[flag] +keep<-vec[!flag] +outval<-x[flag] +n.out=sum(length(outid)) +list(out.val=outval,out.id=outid,keep=keep,n=n,n.out=n.out,cl=cl,cu=cu) +} + +anc.plot.es<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=x1,method='QS',CI=FALSE, +pr=TRUE,xout=FALSE,outfun=out,xlab='X',ylab='Effect.Size',pch='*',pts.only=TRUE,low.span=2/3, +nmin=12,...){ + +# Plot effect size curve. Done for each point in x1 for which the number of nearest neighbors for +# both x1 and x2 is > nmin +# nmim default =12 +# +# pts.only=TRUE: plot the estimates +# pts.only=FALSE: add a smoother to the points using LOESS +# low.span control the span +# +# fr1 and fr2 are the spans when looking for the nearest neighbors +# see function near +# +if(pr){ +print('Effect size is based on the argument method, default is quantile shift measure of effect size') +print('Other options: EP, explanatory power; AKP, robust analog of Cohen d; WMW, P(X1)stop('One covariate only is allowed with this function') +if(length(x1)!=length(y1))stop('x1 and y1 have different lengths') +if(length(x2)!=length(y2))stop('x2 and y2 have different lengths') +xy=elimna(cbind(x1,y1)) +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} + +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +e=NA +x=NA +ic=0 +n1=NA +ci.low=NA +ci.hi=NA +pv=NA +n=length(pts) +for(i in 1:n){ +ysub1=y1[near(x1,pts[i],fr1)] +ysub2=y2[near(x2,pts[i],fr2)] +n1=length(ysub1) +n2=length(ysub2) +if(n1>=nmin & n2>=nmin){ +ic=ic+1 +e[ic]=ESfun(ysub1,ysub2,method=method) +x[ic]=pts[i] +if(CI){ +temp=ESfun.CI(ysub1,ysub2,method=method) +if(identical(method,'WMW')){ +ci.low[ic]=temp$p.ci[1] +ci.hi[ic]=temp$p.ci[2] +pv[ic]=temp$p.value +} +if(!identical(method,'WMW')){ +ci.low[i]=temp$ci[1] +ci.hi[i]=temp$ci[2] +pv[ic]=temp$p.value +} +}}} +if(plotit){ +if(pts.only)plot(x,e,pch=pch,xlab=xlab,ylab=ylab) +else +lplot(x,e,pr=FALSE,xlab=xlab,ylab=ylab,low.span=low.span) +M='Done' +} +if(CI){ +M=cbind(x,e,ci.low,ci.hi,pv) +dimnames(M)=list(NULL,c('X','Est','ci.low','ci.hi','p.value')) +} +M +} + +ancova.KMS.plot<-function(x1,y1,x2,y2,pts=NULL,xlab='X',ylab='Effect Size',xout=FALSE,outfun=outpro,pch='x',line=TRUE){ +# +# +# Plot the robust KMS measure of effect size for the covariate values in pts +# +# pts=NULL, use the uniques values in x1 and x2 +# +xy=elimna(cbind(x1,y1)) +if(ncol(xy)!=2)stop('Only one covariate can be used') +x1=xy[,1] +y1=xy[,2] +xy=elimna(cbind(x2,y2)) +x2=xy[,1] +y2=xy[,2] +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1] +y1<-m[,2] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1] +y2<-m[,2] +} +if(is.null(pts)){ +id1=duplicated(x1) +id2=duplicated(x2) +X1=sort(x1[!id1]) +X2=sort(x2[!id2]) +n1=length(X1) +n2=length(X2) +low=max(X1[1],X2[1]) +up=min(X1[n1],X2[n2]) +X12=sort(c(X1,X2)) +flag=(X12>=low & X12<=up) +pts=X12[flag] +} +e=ancova.ES(x1,y1,x2,y2,pts=pts,plotit=FALSE) +plot(e[,1],e[,2],xlab=xlab,ylab=ylab,type='n') +if(line)lines(e[,1],e[,2]) +else +points(e[,1],e[,2],pch=pch) +} + +logistic.LR<-function(x,y,xout=FALSE,outfun=outpro,ROB=FALSE,ADJ=TRUE,reps=5000,SEED=TRUE){ +# +# Logistic regression: +# Likelihood ratio test that all slope parameters are equal to zero. +# ROB = True, initial estimate is based on the Bianco and Yohai (1996) estimator +# +x<-as.matrix(x) +xx<-cbind(x,y) +p1=ncol(xx) +p=p1-1 +xx<-elimna(xx) +x<-xx[,1:ncol(x)] +x<-as.matrix(x) +y<-xx[,p1] +n=nrow(x) +n.keep=n +if(ADJ){ +if(SEED)set.seed(2) +rem=NA +for(i in 1:reps){ +xx=rmul(n,p=p) +yy=rbinom(n,1,.5) +rem[i]=logistic.LR.sub(xx,yy,n) +} +} +if(ROB)xout=FALSE # ROB=T deals with leverage points. +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +n.keep=nrow(x) +} +xx=cbind(rep(n.keep,1),x) +e=logreg.pred(x,y,pts=x) +LF=sum(y*log(e))+sum((1-y)*log(1-e)) +py=mean(y) +LN=sum(y*log(py))+sum((1-y)*log(1-py)) +LR.test=-2*(LN-LF) +pv=1-pchisq(LR.test,p) +if(ADJ)pv=1-mean(pv<=rem) +pv +} +logistic.LR.sub<-function(x,y,n){ +p=ncol(x) +xx=cbind(rep(n,1),x) +e=logreg.pred(x,y,pts=x) +LF=sum(y*log(e))+sum((1-y)*log(1-e)) +py=mean(y) +LN=sum(y*log(py))+sum((1-y)*log(1-py)) +LR.test=-2*(LN-LF) +pv=1-pchisq(LR.test,p) +pv +} + + + + +out.dummy<-function(x,outfun=outpro,id,plotit=FALSE,...){ +# +# When using dummy coding in regression +# +# remove col indicated by +# id +# then check for outliers using +# outfun +x=as.matrix(x) +if(ncol(x)==1)stop(' Should have two or more columns') +X=x[,-id] +a=outfun(X,plotit=FALSE) +a +} + +out.by.groups<-function(x,grp.col,outfun=outpro,pr=TRUE,plotit=FALSE,...){ +# +# divide data into groups, id outliers in each group +# return: +# keep = id rows in x not outliers +# out.id =rows containing outliers +# +x=elimna(x) +p=ncol(x) +p1=p+1 +pv=c(1:p) +pv=pv[-grp.col] +#pv=c(pv,p1) +n=nrow(x) +ones=c(1:n) +w=cbind(x,ones) +z=fac2Mlist(w,grp.col=grp.col,c(1:p1),pr=FALSE) +MAT=NULL +for(j in 1:length(z)){ +m=z[[j]] +a=outfun(m[,pv],plotit=FALSE) +MAT=rbind(MAT,m[a$keep,]) +} +keep=MAT[,p1] +ou=ones[-keep] +list(out.id=ou,keep=keep) +} + + +risk.ratio<-function(x1,n1,x2,n2,alpha=.05){ +# +# Risk ratio confidence interval +# +p1=x1/n1 +p2=x2/n2 +rat=p1/p2 +term1=((n1-x1)/x1)/n1 +term2=((n2-x2)/x2)/n2 +term=sqrt(term1+term2) +z=qnorm(1-alpha/2) +LL=log(rat) +v1=LL-z*term +v2=LL+z*term +ci=c(exp(v1),exp(v2)) +# Compute p-value +pv=seq(.001,.999,.001) +for(j in 1:length(pv)){ +pv.rem=pv[j] +z=qnorm(1-pv[j]/2) +v1=LL-z*term +v2=LL+z*term +chk=c(exp(v1),exp(v2)) +if(chk[1]>1 || chk[2]<1)break +} +if(p1==p2)pv.rem=1 +list(p1=p1,p2=p2,RR.est=rat,ci=ci,p.value=pv.rem) +} + +KMS2way<-function(J,K,x,tr=.2,alpha=.05,nboot=999,SEED=TRUE,SW=FALSE){ +# +# Compare robust, heteroscedastic measures of effect size, the KMS measure effect size +# For main effects pool the data over levels and do all pairwise comparisons +# +# Do all interactions +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +JK=J*K +mat=matrix(1:JK,J,K,byrow=TRUE) +# First do Factor A +dat=list() +for(j in 1:J){ +DAT=NULL +for(k in 1:K){ +DAT=c(as.vector(matl(x[mat[j,k]]))) +} +dat[[j]]=DAT +} +A=KMSmcp.ci(dat,SEED=SEED) +# +# Factor B next +# +dat=list() +for(k in 1:K){ +DAT=NULL +for(j in j:J){ +DAT=c(as.vector(matl(x[mat[j,k]]))) +} +dat[[k]]=DAT +} +B=KMSmcp.ci(dat,SEED=SEED) +AB=KMSinter.mcp(J,K,x,tr=tr,SEED=SEED,SW=SW) +list(Factor.A=A,Factor.B=B,Interactions=AB) +} + +KMSgridAB<-function(x,y,IV=c(1,2),Qsplit1=.5,Qsplit2=.5,tr=.2,VAL1=NULL,VAL2=NULL,PB=FALSE,est=tmean,nboot=1000,pr=TRUE,fun=ES.summary, +xout=FALSE,outfun=outpro,SEED=TRUE,...){ +# +# Split on two variables, not just one. +# +# Qsplit: split the independent variable based on the +# quantiles indicated by Qsplit +# Example +# Qsplit1=c(.25,.5,.75) +# Qsplit2=.5 +# would split based on the quartiles for the first independent variable and the median +# for the second independent variable +# +# Alternatively, can split the data based on specified values indicating by the arguments +# VAL1 and VAL2 +# +# Then test the hypothesis of equal measures of location +# IV[1]: indicates the column of containing the first independent variable to use. +# IV[2]: indicates the column of containing the second independent variable to use. +# +# if(length(unique(y)>2))stop('y should be binary') +x=as.matrix(x) +p=ncol(x) +if(p==1)stop('There should be two or more independent variables') +p1=p+1 +xy<-elimna(cbind(x,y)) +x<-xy[,1:p] +y<-xy[,p1] +v=NULL +if(xout){ +flag<-outfun(x,plotit=FALSE,...)$keep +x<-x[flag,] +y<-y[flag] +xy=cbind(x,y) +} +J=length(Qsplit1)+1 +K=length(Qsplit2)+1 +z=list() +group=list() +if(is.null(VAL1) || is.null(VAL2)){ +N.int=length(Qsplit1)+1 +N.int2=length(Qsplit2)+1 +} +else { +J=length(VAL1)+1 +K=length(VAL2)+1 +N.int=length(VAL1)+1 +N.int2=length(VAL2)+1 +} +JK=J*K +MAT=matrix(1:JK,J,K,byrow=TRUE) +est.mat=matrix(NA,nrow=N.int,ncol=N.int2) +n.mat=matrix(NA,nrow=N.int,ncol=N.int2) +DV.mat=matrix(NA,nrow=N.int,ncol=N.int2) +L1=NULL +L2=NULL +if(is.null(VAL1) || is.null(VAL2)){ +qv=quantile(x[,IV[1]],Qsplit1) +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=quantile(x[,IV[2]],Qsplit2) +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +} +else{ +qv=VAL1 +qv=c(min(x[,IV[1]]),qv,max(x[,IV[1]])) +qv2=VAL2 +qv2=c(min(x[,IV[2]]),qv2,max(x[,IV[2]])) +} +ic=0 +for(j in 1:N.int){ +j1=j+1 +xsub=binmat(xy,IV[1],qv[j],qv[j1]) +for(k in 1:N.int2){ +k1=k+1 +xsub2=binmat(xsub,IV[2],qv2[k],qv2[k1]) +est.mat[j,k]=est(xsub2[,p1],...) +n.mat[j,k]=length(xsub2[,p1]) +ic=ic+1 +z[[ic]]=xsub2[,p1] +if(length(z[[ic]])>6)group[[ic]]=summary(xsub2[,1:p]) +} +} +n=NA +for(j in 1:length(z)){ +n[j]=length(z[[j]]) +} +if(min(n)<=5){ +id=which(n>5) +del=which(n<=5) +n=n[id] +if(pr)print(paste('For group',del,'the sample size is less than 6')) +} +A=list() +B=list() +for(j in 1:J)A[[j]]=lincon(z[MAT[j,]],tr=tr,pr=FALSE)$psihat +for(j in 1:K)B[[j]]=lincon(z[MAT[,j]],tr=tr,pr=FALSE)$psihat +list(est.loc.4.DV=est.mat,n=n.mat,A=A,B=B,A.effect.sizes=A,B.effect.sizes=B) +} + +AKPmcp.ci<-function(x,tr=.2,alpha=0.05,SEED=TRUE,nboot=500,CI=TRUE,method='hoch'){ +# +# Estimate AKP effect size when comparing all +# pairs of groups in a one-way (independent) groups design +# +# CI=TRUE: confidence intervals for the measure of effect size are computed. +# +if(is.matrix(x) || is.data.frame(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +J=length(x) +Jall=(J^2-J)/2 +con1=con1way(J) +output=matrix(NA,nrow=Jall,ncol=7) +dimnames(output)=list(NULL,c('Group','Group','Effect.Size','low.ci','up.ci','p.value','p.adjust')) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j1){ +if(p!=ncol(pts))stop('pts should be a matrix with',paste(p),'columns') +} + +x2<-as.matrix(x2) +if(p!=ncol(x2))stop('Number of col. for x1 is not equal to the number of col. for x2') +xy<-cbind(x2,y2) +xy<-elimna(xy) +x2<-xy[,1:p] +y2<-xy[,p1] + +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE,...)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE,...)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +e=NA +for(i in 1:nrow(pts)){ +d1=reg.con.dist(x1,y1,pts=pts[i,]) +d2=reg.con.dist(x2,y2,pts=pts[i,]) +e[i]=shiftQS(d1,d2) +} +e +} +QS.ancbse<-function(x1,y1,x2,y2,pts,nboot=100,SEED=TRUE,MC=FALSE,null.value=.5, +xout=FALSE,outfun=outpro,alpha=.05,...){ +# +# ANCOVA based on quantile shift measure of effect size. +# +# +# pts indicates the covariance values for which the groups will be compared. + +x1<-as.matrix(x1) +p1<-ncol(x1)+1 +p<-ncol(x1) +xy<-cbind(x1,y1) +xy<-elimna(xy) +x1<-xy[,1:p] +y1<-xy[,p1] + +x2<-as.matrix(x2) +if(p!=ncol(x2))stop('Number of col. for x1 is not equal to the number of col. for x2') +xy<-cbind(x2,y2) +xy<-elimna(xy) +x2<-xy[,1:p] +y2<-xy[,p1] + +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE,...)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE,...)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +if(SEED)set.seed(2) +e=QSanc(x1,y1,x2,y2,pts=pts) +e=as.vector(matl(e)) +npt=length(pts) +ci=matrix(NA,npt,2) +LAB=NULL +for(j in 1:npt)LAB[j]=paste('pts',j) +dimnames(ci)=list(LAB,c('ci.low','ci.up')) +E=matrix(NA,nboot,npt) +n1=length(y1) +n2=length(y2) +bl=list() +for(k in 1:nboot){ +id1=sample(n1,replace=TRUE) +id2=sample(n2,replace=TRUE) +bl[[k]]=list(x1[id1],y1[id1],x2[id2],y2[id2]) +} +if(!MC)temp=lapply(bl,QS.ancbse.sub,pts) +else{ +library(parallel) +temp=mclapply(bl,QS.ancbse.sub,pts) +} +tv=list() +for(j in 1:nboot)tv[[j]]=as.vector(matl(temp[[j]][1:npt])) +E=matl(tv) +E=as.matrix(E) +se=apply(E,1,sd) +ci[,1]<-e-qnorm(1-alpha/2)*se +ci[,2]<-e+qnorm(1-alpha/2)*se +test<-(e-null.value)/se +sig<-2*(1-pnorm(abs(test))) +list(Est=e,SE=se,test.stat=test,conf.int=ci,p.value=sig) +} + +QS.ancbse.sub<-function(m,pts){ +v=QSanc(m[[1]],m[[2]],m[[3]],m[[4]],pts=pts) +v +} + +getBetaHdi <- function(a, b, width) { +eps <- 1e-9 +if (a < 1 + eps & b < 1 + eps) # Degenerate case +return(c(NA, NA)) +if (a < 1 + eps & b > 1) # Left border case +return(c(0, width)) +if (a > 1 & b < 1 + eps) # Right border case +return(c(1 - width, 1)) +if (width > 1 - eps) +return(c(0, 1)) +# Middle case +mode <- (a - 1) / (a + b - 2) +pdf <- function(x) dbeta(x, a, b) +l <- uniroot( +f = function(x) pdf(x) - pdf(x + width), +lower = max(0, mode - width), +upper = min(mode, 1 - width), +tol = 1e-9 +)$root +r <- l + width +return(c(l, r)) +} + +thd<- function(x, q=.5, width = 1 / sqrt(length(x))) +sapply(q, function(p) { +# +# q =quantiles to be estimated +# +n <- length(x) +if (n == 0) return(NA) +if (n == 1) return(x) +x <- sort(x) +a <- (n + 1) * p +b <- (n + 1) * (1 - p) +hdi <- getBetaHdi(a, b, width) +hdiCdf <- pbeta(hdi, a, b) +cdf <- function(xs) { +xs[xs <= hdi[1]] <- hdi[1] +xs[xs >= hdi[2]] <- hdi[2] +(pbeta(xs, a, b) - hdiCdf[1]) / (hdiCdf[2] - hdiCdf[1]) +} +iL <- floor(hdi[1] * n) +iR <- ceiling(hdi[2] * n) +cdfs <- cdf(iL:iR/n) +W <- tail(cdfs, -1) - head(cdfs, -1) +sum(x[(iL+1):iR] * W) +}) + +t2way.KMS.curve<-function(x,y,pts=NULL,SW=FALSE,npts=15,xlab='X',ylab='Effect.Size'){ +# +# For a 2-by-2 design, compare +# KMS measure of effect size associated with the two levels of the first factor +# plots an interaction effect when there is a covariate. +# +# SW=TRUE, switches rows and column + +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(is.matrix(y) || is.data.frame(y))y=listm(y) +if(length(x)!=4)stop('Should have four groups exactly. Fix argument x') +if(length(y)!=4)stop('Should have four groups exactly. Fix argument y') +for(j in 1:4){ +xy=elimna(cbind(x[[j]],y[[j]])) +x[[j]]=xy[,1] +y[[j]]=xy[,2] +} +n=lapply(y,length) +n=as.vector(matl(n)) +adj=4.4/n+1.00086 +flag=n>150 +adj[flag]=1 +adj=mean(adj) +nmax=max(n) +if(is.null(pts)){ +xlow=max(matl((lapply(x,qest,.1)))) +xhi=min(matl((lapply(x,qest,.9)))) +pts=seq(xlow,xhi,length=npts) +} +nv=lapply(x,length) +if(SW){ +x=x[c(1,3,2,4)] +y=y[c(1,3,2,4)] +} +v1=ancova.KMS(x[[1]],y[[1]],x[[2]],y[[2]],pts=pts,plotit=FALSE)[,2] +v2=ancova.KMS(x[[3]],y[[3]],x[[4]],y[[4]],pts=pts,plotit=FALSE)[,2] +v=v1-v2 +plot(pts,v,xlab=xlab,ylab=ylab,type='n') +lines(pts,v) +} + + +t2way.KMS.interbt<-function(x,y,pts=NULL,alpha=.05,nboot=100,MC=FALSE,SEED=TRUE,SW=FALSE){ +# +# For a 2-by-2 design, compare +# KMS measure of effect size associated with the two levels of the first factor +# to get an interaction effect when there is a covariate. +# +# SW=TRUE, switches rows and column + +if(SEED)set.seed(2) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +if(is.matrix(y) || is.data.frame(y))y=listm(y) +if(length(x)!=4)stop('Should have four groups exactly. Fix argument x') +if(length(y)!=4)stop('Should have four groups exactly. Fix argument y') +for(j in 1:4){ +xy=elimna(cbind(x[[j]],y[[j]])) +x[[j]]=xy[,1] +y[[j]]=xy[,2] +} +n=lapply(y,length) +nn=as.vector(matl(n)) +adj=4.4/nn+1.00086 +flag=nn>150 +adj[flag]=1 +adj=mean(adj) +nmax=max(nn) +if(is.null(pts)){ +xlow=max(matl((lapply(x,qest,.1)))) +xhi=min(matl((lapply(x,qest,.9)))) +pts=c(xlow,(xlow+xhi)/2,xhi) +} +nv=lapply(x,length) +if(SW){ +x=x[c(1,3,2,4)] +y=y[c(1,3,2,4)] +} +npts=length(pts) +MAT1=matrix(NA,nmax,4) +MAT2=matrix(NA,nmax,4) +dat1=list() +dat2=list() +for(i in 1:nboot){ +id1=sample(nv[[1]],replace=TRUE) +id2=sample(nv[[2]],replace=TRUE) +MAT1[1:nv[[1]],1:2]=cbind(x[[1]][id1],y[[1]][id1]) +MAT1[1:nv[[2]],3:4]=cbind(x[[2]][id2],y[[2]][id2]) +dat1[[i]]=MAT1 +id1=sample(nv[[3]],replace=TRUE) +id2=sample(nv[[4]],replace=TRUE) +MAT2[1:nv[[3]],1:2]=cbind(x[[3]][id1],y[[3]][id1]) +MAT2[1:nv[[4]],3:4]=cbind(x[[4]][id2],y[[4]][id2]) +dat2[[i]]=MAT2 +} +if(MC){ +library(parallel) +a1=mclapply(dat1,t2way.KMS.inter.sub,pts=pts) +a2=mclapply(dat2,t2way.KMS.inter.sub,pts=pts) +} +if(!MC){ +a1=lapply(dat1,t2way.KMS.inter.sub,pts=pts) +a2=lapply(dat2,t2way.KMS.inter.sub,pts=pts) +} +a1=t(matl(a1)) +a2=t(matl(a2)) + +zq=qnorm(1-alpha/2) +sqse1=NA +sqse2=NA +for(j in 1:npts)sqse1[j]=var(a1[,j]) +for(j in 1:npts)sqse2[j]=var(a2[,j]) +Results=matrix(NA,npts,10) +Results[,1:2]=ancova.KMS(x[[1]],y[[1]],x[[2]],y[[2]],pts=pts,plotit=FALSE) +Results[,3]=ancova.KMS(x[[3]],y[[3]],x[[4]],y[[4]],pts=pts,plotit=FALSE)[,2] +Results[,4]=Results[,2]-Results[,3] +Results[,5]=adj*Results[,4]/sqrt(sqse1+sqse2) # n=40 1.15 work well get .054 +pv=2*(1-pnorm(abs(Results[,5]))) +Results[,6]=pv +Results[,7]=Results[,4]-zq*sqrt(sqse1+sqse2)/adj +Results[,8]=Results[,4]+zq*sqrt(sqse1+sqse2)/adj +Results[,9]=sqrt(sqse1+sqse2)/adj +Results[,10]=p.adjust(pv,method='hoch') +dimnames(Results)=list(NULL,c('pts','Est1','Est2','Dif','Test.Stat','p.value','ci.low','ci.up','SE','p.adjusted')) +n=matl(nv) +n=as.vector(n) +list(n=nn,Results=Results) +} + +t2way.KMS.inter.sub<-function(z,pts){ +ancova.KMS(z[,1],z[,2],z[,3],z[,4],pts=pts,plotit=FALSE)[,2] +} + +regblp.ci<- +function(x,y,regfun=tsreg,GEN=TRUE,nboot=599,alpha=.05,plotit=FALSE,pr=FALSE,MC=FALSE, +xlab='Predictor 1',ylab='Predictor 2',SEED=TRUE,...){ +# +# Compute a .95 confidence interval for each of the parameters of +# a linear regression equation using a method that removes bad +# leverage points +# +# GEN=TRUE: Use a modified version of the Rousseeuw and van Zomeren method, recommended. +# else, use Rousseeuw and van Zomeren method +# +# The predictor values are assumed to be in the n by p matrix x. +# The default number of bootstrap samples is nboot=599 +# +# plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based +# on the bootstrap samples. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +x=as.matrix(x) +n=nrow(x) +# Check or bad leverage points and remove any that are found +# +if(GEN)id=reglev.gen(x,y,regfun=regfun,plotit=FALSE)$bad.lev +else +id=reglev(x,y)$bad.lev.points +if(length(id)>0){ +xy=cbind(x,y) +xy=xy[-id,] +x<-xy[,1:p] +y<-xy[,p1] +} +nk=nrow(xy) +if(MC)e=regciMC(x,y,nboot=nboot,regfun=regfun,alpha=alpha,SEED=SEED,plotit=plotit,xlab=xlab,ylab=ylab,pr=pr) +if(!MC)e=regci(x,y,nboot=nboot,regfun=regfun,alpha=alpha,SEED=SEED,plotit=plotit,xlab=xlab,ylab=ylab,pr=pr) +e$n=n +e$n.keep=nk +e +} + +part.cor<-function(x,y,z,corfun=wincor,regfun=MMreg,plotit=FALSE,xout=FALSE,GEN=TRUE,BOOT=TRUE,SEED=TRUE,nboot=599, +XOUT.blp=TRUE,plot.out=FALSE, +outfun=outpro,plotfun=plot,xlab='Res 1',ylab='Res 2',...){ +# +# Robust partial correlation. +# Uses the correlation between the residuals of x with z and y with z +# +# Default is a Winsorized correlation between x and y controlling for z +# XOUT.blp=TRUE means that if any bad leverage points are detected, they are removed +# +# +# If XOUT.blp=FALSE +# and +# xout=TRUE remove leverage points. If +# GEN =TRUE, remove only bad leverage when dealing with the association between +#. x and z as well as y and z. In contrast, +# XOUT.blp=TRUE permanently removes bad leverage points associated the +# regression line for x and y, where x is the independent variable. +# +# if z contains a dummy variable, can ignore the corresponding col when removing outliers +#. Example +# part.cor(x,y,z,GEN=FALSE,outfun=out.dummy,id=2,xout=TRUE) +# +# Examples: +# part.cor(x,y,z,regfun=MMreg,corfun=wincor) +# part.cor(x,y,z,regfun=MMreg,corfun=cor.test,method='kendall') +# part.cor(x,y,z,regfun=MMreg,corfun=cor.test,method='spear') +# part.cor(x,y,z,regfun=MMreg,corfun=scor) #skpped correlation, +# +# +xyz=elimna(cbind(x,y,z)) +p3=ncol(xyz) +p1=p3-1 +x=xyz[,1] +y=xyz[,2] +z=xyz[,3:p3] +z=as.matrix(z) +if(XOUT.blp){ +id=outblp(x,y)$keep +x=x[id] +y=y[id] +z=z[id,] +xout=FALSE +} +if(xout){ +if(GEN){ +e1=reg.reglev(z,x,regfun=regfun)$coef +e2=reg.reglev(z,y,regfun=regfun)$coef +} +else{ +e1=regfun(z,x,xout=xout,outfun=outfun,...)$coef +e2=regfun(z,y,xout=xout,outfun=outfun,...)$coef +} +} +if(!xout){ +e1=regfun(z,x)$coef +e2=regfun(z,y)$coef +} +z=as.matrix(z) +res1=x-z%*%e1[2:p1]-e1[1] +res2=y-z%*%e2[2:p1]-e2[1] +if(plotit){ +if(plot.out){ +id=outpro(res1)$keep +res1=res1[id] +res2=res2[id] +} +if(identical(plotfun,plot))plot(res1,res2,xlab=xlab,ylab=ylab) +else plotfun(res1,res2,xlab=xlab,ylab=ylab,pr=FALSE) +} +if(BOOT)est=corb(res1,res2,corfun,SEED=SEED,nboot=nboot) +else +est=corfun(res1,res2) +est +} + +qcomthd<-function(x,y,q=c(.1,.25,.5,.75,.9),nboot=2000,plotit=TRUE,SEED=TRUE,xlab='Group 1',ylab='Est.1-Est.2',alpha=.05,ADJ.CI=TRUE,MC=FALSE){ +# +# Compare quantiles using pb2gen using trimmed version of the Harrell-Davis estimator +#Tied values are allowed. +# +# ADJ.CI=TRUE means that the confidence intervals are adjusted based on the level used by the corresponding +# test statistic. If a test is performed with at the .05/3 level, for example, the confidence returned has +# 1-.05/3 probability coverage. +# +# When comparing lower or upper quartiles, both power and the probability of Type I error +# compare well to other methods that have been derived. +# q: can be used to specify the quantiles to be compared +# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles +# +# Function returns p-values and critical p-values based on Hochberg's method. +# + +if(SEED)set.seed(2) +pv=NULL +output=matrix(NA,nrow=length(q),ncol=10) +dimnames(output)<-list(NULL,c('q','n1','n2','est.1','est.2','est.1_minus_est.2','ci.low','ci.up','p-value','adj.p.value')) +for(i in 1:length(q)){ +output[i,1]=q[i] +output[i,2]=length(elimna(x)) +output[i,3]=length(elimna(y)) +output[i,4]=thd(x,q=q[i]) +output[i,5]=thd(y,q=q[i]) +output[i,6]=output[i,4]-output[i,5] +temp=qcomthd.sub(x,y,nboot=nboot,q=q[i],SEED=FALSE,alpha=alpha,MC=MC) +output[i,7]=temp$ci[1] +output[i,8]=temp$ci[2] +output[i,9]=temp$p.value +} +temp=order(output[,9],decreasing=TRUE) +zvec=alpha/c(1:length(q)) +zvec[temp]=zvec +if(ADJ.CI){ +for(i in 1:length(q)){ +if(!MC)temp=pb2gen(x,y,nboot=nboot,est=thd,q=q[i],SEED=FALSE,alpha=zvec[i],pr=FALSE) +else +temp=pb2genMC(x,y,nboot=nboot,est=thd,q=q[i],SEED=FALSE,alpha=zvec[i],pr=FALSE) +output[i,7]=temp$ci[1] +output[i,8]=temp$ci[2] +output[i,9]=temp$p.value +} +temp=order(output[,10],decreasing=TRUE) +} +output[,10]=p.adjust(output[,9],method='hoch') + + +if(plotit){ +xax=rep(output[,4],3) +yax=c(output[,6],output[,7],output[,8]) +plot(xax,yax,xlab=xlab,ylab=ylab,type='n') +points(output[,4],output[,6],pch='*') +lines(output[,4],output[,6]) +points(output[,4],output[,7],pch='+') +points(output[,4],output[,8],pch='+') +} +output +} + +qcomhd.sub<-function(x,y,q,alpha=.05,nboot=2000,SEED=TRUE,MC=TRUE){ +# +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +datax=listm(t(datax)) +datay=listm(t(datay)) +if(MC){ +library(parallel) +bvecx<-mclapply(datax,hd,q,mc.preschedule=TRUE) +bvecy<-mclapply(datay,hd,q,mc.preschedule=TRUE) +} +else{ +bvecx<-lapply(datax,hd,q) +bvecy<-lapply(datay,hd,q) +} +bvecx=as.vector(matl(bvecx)) +bvecy=as.vector(matl(bvecy)) +bvec<-sort(bvecx-bvecy) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) +sig.level<-2*(min(temp,1-temp)) +se<-var(bvec) +list(est.1=hd(x,q),est.2=hd(y,q),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) +} + +qcomthd.sub<-function(x,y,q,alpha=.05,nboot=2000,SEED=TRUE,MC=TRUE){ +# +x<-x[!is.na(x)] # Remove any missing values in x +y<-y[!is.na(y)] # Remove any missing values in y +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot) +datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot) +datax=listm(t(datax)) +datay=listm(t(datay)) +if(MC){ +library(parallel) +bvecx<-mclapply(datax,thd,q,mc.preschedule=TRUE) +bvecy<-mclapply(datay,thd,q,mc.preschedule=TRUE) +} +else{ +bvecx<-lapply(datax,thd,q) +bvecy<-lapply(datay,thd,q) +} +bvecx=as.vector(matl(bvecx)) +bvecy=as.vector(matl(bvecy)) +bvec<-sort(bvecx-bvecy) +low<-round((alpha/2)*nboot)+1 +up<-nboot-low +temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot) +sig.level<-2*(min(temp,1-temp)) +se<-var(bvec) +list(est.1=thd(x,q),est.2=thd(y,q),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y)) +} + + +bw.2by2.int.es<-function(x,CI=FALSE){ +# +# Form difference scores and compute several measures of effect size +# +# if CI=TRUE, compute confidence intervals +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +d1=x[[1]]-x[[2]] +d2=x[[3]]-x[[4]] +if(!CI)a=ES.summary(d1,d2) +else +a=ES.summary.CI(d1,d2) +a +} + +bw.int.es<-function(J,K,x,method='KMS',tr=.2,SEED=TRUE,nboot=2000,CI=FALSE){ +# +# All Interactions based on difference scores +# +#. Choices for method: 'EP','QS','QStr','AKP','WMW','KMS' +# +if(is.matrix(x) || is.data.frame(x))x=listm(x) +con=con2way(J,K)$conAB +num=ncol(con) +if(CI){ +CON=matrix(NA,nrow=num,ncol=8) +dimnames(CON)=list(NULL,c('Con.num','n1','n2','Est.','ci.low','ci.up','p.value','p.adjusted')) +} +if(!CI){ +CON=matrix(NA,nrow=num,ncol=2) +dimnames(CON)=list(NULL,c('Con.num','Est.')) +} + +for(j in 1:ncol(con)){ +id=which(con[,j]!=0) +dat=x[id] +d1=dat[[1]]-dat[[2]] +d2=dat[[3]]-dat[[4]] +if(!CI){ +temp=ESfun(d1,d2) +CON[j,1]=j +CON[j,2]=temp +} +else{ +temp=ESfun.CI(d1,d2) +CON[j,1]=j +a=ESfun.CI(d1,d2) +CON[j,2]=a$n1 +CON[j,3]=a$n2 +CON[j,4]=a$effect.size +CON[j,5]=a$ci[1] +CON[j,6]=a$ci[2] +CON[j,7]=a$p.value +} +} +if(CI) +CON[,8]=p.adjust(CON[,7],method='hoch') +list(CON=CON,con=con) +} + +quant<-function(x,q=.5,names=TRUE,na.rm=TRUE,type=8){ +# +# For convenience, follow style of other functions in Rallfun +# when using the R function quanitle. Also, use by default the estimator +# recommended by Hyndman and Fan (1996). +# +a=quantile(x,probs=q,names=names,type=type,na.rm=na.rm) +a +} + +qghdist<-function(q=.5,g=0,h=0){ +# +# Determine quantile of a g-and-h distribution +# +e=qnorm(q) +v=ghtransform(e,g=g,h=h) +v +} + +MED.ES<-function(x,tr=.25,null.val=0,est=median){ +# +# One-sample effect size analog of Cohen's d based on the median +# and either MAD or Winsorized standard deviation rescaled to estimate the standard deviation when +# sampling from a normal distribution +# +x=elimna(x) +e=est(x) +bot=mad(x) +if(bot==0)bot=winsdN(x,tr=tr) +if(bot==0)stop('Both measures of scale are equal to zero') +es=(e-null.val)/bot +es +} + + +winsdN<-function(x,tr=.2){ +# +# Rescale a Winsorized standard deviation so that it estimates +# the population standard deviation under normality. +# +library(MASS) +x=elimna(x) +e=winsd(x,tr=tr) +if(tr==0)cterm=1 +else +cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr +cterm=sqrt(cterm) +e=e/cterm +e +} + +qcomhd<-function(x,y,est=hd,q=c(.1,.25,.5,.75,.9),nboot=4000,plotit=TRUE,SEED=TRUE,xlab='Group 1',ylab='Est.1-Est.2',alpha=.05,ADJ.CI=TRUE,MC=FALSE){ +# +# Compare quantiles using pb2gen using trimmed version of the Harrell-Davis estimator +# Tied values are allowed. +# +# ADJ.CI=TRUE means that the confidence intervals are adjusted based on the level used by the corresponding +# test statistic. If a test is performed with at the .05/3 level, for example, the confidence returned has +# 1-.05/3 probability coverage. +# +# When comparing lower or upper quartiles, both power and the probability of Type I error +# compare well to other methods that have been derived. +# q: can be used to specify the quantiles to be compared +# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles +# +# Function returns p-values and critical p-values based on Hochberg's method. +# + +if(SEED)set.seed(2) +pv=NULL +output=matrix(NA,nrow=length(q),ncol=10) +dimnames(output)<-list(NULL,c('q','n1','n2','est.1','est.2','est.1_minus_est.2','ci.low','ci.up','p-value','adj.p.value')) +for(i in 1:length(q)){ +output[i,1]=q[i] +output[i,2]=length(elimna(x)) +output[i,3]=length(elimna(y)) +output[i,4]=hd(x,q=q[i]) +output[i,5]=hd(y,q=q[i]) +output[i,6]=output[i,4]-output[i,5] +temp=qcomhd.sub(x,y,nboot=nboot,q=q[i],SEED=FALSE,alpha=alpha,MC=MC) +output[i,7]=temp$ci[1] +output[i,8]=temp$ci[2] +output[i,9]=temp$p.value +} +temp=order(output[,9],decreasing=TRUE) +zvec=alpha/c(1:length(q)) +zvec[temp]=zvec +if(ADJ.CI){ +for(i in 1:length(q)){ +if(!MC)temp=pb2gen(x,y,nboot=nboot,est=est,q=q[i],SEED=FALSE,alpha=zvec[i],pr=FALSE) +else +temp=pb2genMC(x,y,nboot=nboot,est=est,q=q[i],SEED=FALSE,alpha=zvec[i],pr=FALSE) +output[i,7]=temp$ci[1] +output[i,8]=temp$ci[2] +output[i,9]=temp$p.value +} +temp=order(output[,10],decreasing=TRUE) +} +output[,10]=p.adjust(output[,9],method='hoch') +if(plotit){ +xax=rep(output[,4],3) +yax=c(output[,6],output[,7],output[,8]) +plot(xax,yax,xlab=xlab,ylab=ylab,type='n') +points(output[,4],output[,6],pch='*') +lines(output[,4],output[,6]) +points(output[,4],output[,7],pch='+') +points(output[,4],output[,8],pch='+') +} +output +} + + chk.lin<-function(x,y,regfun=tsreg,xout=FALSE,outfun=outpro,LP=TRUE,...){ +# +# Check for linearity by plotting predicted vs residuals. +# +xy=cbind(x,y) +xy=elimna(xy) +p1=ncol(xy) +p=p1-1 +x=xy[,1:p] +y=xy[,p1] +x=as.matrix(x) +if(xout){ +id=outfun(x)$keep +x=x[id,] +y=y[id] +} +x=as.matrix(x) +if(identical(regfun,MMreg))res=MMreg(x,y,RES=TRUE)$residuals +else +res=regfun(x,y)$residuals +pre=reg.pred(x,y) +if(LP)q=lplot(pre,res,ylab='Res',xlab='Yhat',pr=FALSE) +else +plot(pre,res,ylab='Res',xlab='Yhat') +} + +wincovN<-function(x,y=NULL,tr=0.2){ +# +# Winsorized covariance rescaled to est cov under normality when there is no trimming +# +library(MASS) +e=wincor(x,y,tr=tr)$cov +if(tr==0)cterm=1 +else cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr +e=e/cterm +e +} + +rm.marg.es<-function(x,y=NULL,tr=0.2){ +# +# Analog of robust version KMS measure of effect size for two +# dependent groups +# +library(MASS) +if(!is.null(y))x=cbind(x,y) +x=elimna(x) +if(ncol(x)>2)stop('Should have only two variables') +v1=winsdN(x[,1],tr=tr)^2 +v2=winsdN(x[,2],tr=tr)^2 +v3=wincovN(x,tr=tr) +a=v1+v2 -2*v3 +e=sqrt(2)*(mean(x[,1],tr=tr)-mean(x[,2],tr=tr))/sqrt(a) +e +} + +rm.marg.esCI<-function(x,y=NULL,tr=.2,nboot=1000,SEED=TRUE,alpha=.05, +null.val=0,MC=FALSE,...){ +# +# Two dependent groups. +# Confidence interval for effect size that takes into account heteroscedasticity as well as the +# association between X and Y based on the marginal distributions, not the +# difference scores. For robust estimators, these two approaches generally give +# different results. +# +library(MASS) +if(!is.null(y))x=cbind(x,y) +x=elimna(x) +if(SEED)set.seed(2) +e=rm.marg.es(x,tr=tr) +n=nrow(x) +if(!MC){ +v=NA +for(i in 1:nboot){ +id=sample(n,replace=TRUE) +v[i]=rm.marg.es(x[id,],tr=tr) +} +} +if(MC){ +library(parallel) +d=list() +for(j in 1:nboot){ +id=sample(n,replace=TRUE) +d[[j]]=x[id,] +} +v=mclapply(d,rm.marg.es,tr=tr) +v=matl(v) +} + +v=sort(v) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=v[ilow] +ci[2]=v[ihi] +pv=mean(v0){ +print("Bootstrap estimates of location could not be computed") +print("This can occur when using an M-estimator") +print("Might try est=tmean") +} +bcon<-t(con)%*%bvec #C by nboot matrix +tvec<-t(con)%*%mvec +tvec<-tvec[,1] +tempcen<-apply(bcon,1,mean) +vecz<-rep(0,ncol(con)) +bcon<-t(bcon) +smat<-var(bcon-tempcen+tvec) +temp<-bcon-tempcen+tvec +bcon<-rbind(bcon,vecz) +if(op==1)dv<-mahalanobis(bcon,tvec,smat) +if(op==2){ +smat<-cov.mcd(temp)$cov +dv<-mahalanobis(bcon,tvec,smat) +} +if(op==3){ +if(!MC)dv<-pdis(bcon,MM=MM,cop=cop) +if(MC)dv<-pdisMC(bcon,MM=MM,cop=cop) +} +bplus<-nboot+1 +sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot +list(p.value=sig.level,psihat=tvec,con=con,n=nvec) +} + +pbad3way<-function(J,K,L,x,est=tmean,alpha=.05,nboot=2000,MC=FALSE){ +# +# Three-way ANOVA for robust measures of locaton +# To compare medians, use est=hd, in case which tied values are allowed. +# +if(is.matrix(x)|| is.data.frame(x))x=listm(x) +chkcar=NA +for(j in 1:length(x))chkcar[j]=length(unique(x[[j]])) +if(min(chkcar)<14){ +print('Warning: Sample size is less than') +print('14 for one more groups. Type I error might not be controlled') +} +con=con3way(J,K,L) +A=pbadepth(x,est=est,con=con$conA,alpha=alpha,nboot=nboot,MC=MC) +B=pbadepth(x,est=est,con=con$conB,alpha=alpha,nboot=nboot,MC=MC) +C=pbadepth(x,est=est,con=con$conC,alpha=alpha,nboot=nboot,MC=MC) +AB=pbadepth(x,est=est,con=con$conAB,alpha=alpha,nboot=nboot,MC=MC) +AC=pbadepth(x,est=est,con=con$conAC,alpha=alpha,nboot=nboot,MC=MC) +BC=pbadepth(x,est=est,con=con$conBC,alpha=alpha,nboot=nboot,MC=MC) +ABC=pbadepth(x,est=est,con=con$conABC,alpha=alpha,nboot=nboot,MC=MC) +list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC) +} + + ph.inter<-function(x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4){ +# +# Patel--Hoel interaction for a +# in 2 by 2 design. The method is based on an +# extension of Cliff's heteroscedastic technique for +# handling tied values and the Patel-Hoel definition of no interaction. +# +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# The default value for alpha is .05. Any other value results in using +# alpha=.01. +# +# Argument grp can be used to rearrange the order of the data. +# + if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +a=rimul(2,2,x,alpha=alpha,plotit=FALSE) + e1=cid(x[[1]],x[[2]],,plotit=FALSE)$phat + e2=cid(x[[3]],x[[4]],,plotit=FALSE)$phat +if(plotit){ +m1<-outer(x[[1]],x[[2]],FUN='-') +m2<-outer(x[[3]],x[[4]],FUN='-') +m1<-as.vector(m1) +m2<-as.vector(m2) +g2plot(m1,m2,op=op) +} +list(Est.1=e1,Est.2=e2,dif=e1-e2,ci.lower=a$test[1,6],ci.upper=a$test[1,7],p.value=a$test[8]) +} + + ph.inter<-function(x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4,SW=FALSE){ +# +# Patel--Hoel interaction for a +# in 2 by 2 design. The method is based on an +# extension of Cliff's heteroscedastic technique for +# handling tied values and the Patel-Hoel definition of no interaction. +# +# The function rimul deals with the J by K design +# +# It is assumed all groups are independent. +# +# Missing values are automatically removed. +# +# The default value for alpha is .05. Any other value results in using +# alpha=.01. +# +# Argument grp can be used to rearrange the order of the data. +# + if(is.matrix(x))x<-listm(x) +if(!is.list(x))stop('Data must be stored in a matrix or in list mode.') +if(SW)x=x[c(1,3,2,4)] +a=rimul(2,2,x,alpha=alpha,plotit=FALSE) + e1=cid(x[[1]],x[[2]],,plotit=FALSE)$phat + e2=cid(x[[3]],x[[4]],,plotit=FALSE)$phat +if(plotit){ +m1<-outer(x[[1]],x[[2]],FUN='-') +m2<-outer(x[[3]],x[[4]],FUN='-') +m1<-as.vector(m1) +m2<-as.vector(m2) +g2plot(m1,m2,op=op) +} +list(Est.1=e1,Est.2=e2,dif=e1-e2,ci.lower=a$test[1,6],ci.upper=a$test[1,7],p.value=a$test[8]) +} + +ESmcp.CI<-function(x,method='KMS',alpha=.05,nboot=2000,SEED=TRUE,pr=TRUE){ +# +# All +# Choices for method: +# 'EP','QS','QStr','AKP','WMW','KMS' +# +# + +if(is.data.frame(x))x=as.matrix(x) +if(SEED)set.seed(2) +if(is.matrix(x))x<-listm(x) +x=elimna(x) +n=lapply(x,length) +J<-length(x) +JALL=(J^2-J)/2 +if(identical(method,'EP')){ +if(pr)print('Note: A method for computing a p.value for EP is not yet available') +output=matrix(NA,JALL,5) +ic=0 +for(j in 1:J){ +for(k in 1:J){ +if(j Eigenvalues.Random)) +} + +shared.comp <- matrix(rnorm(n.cases * n.factors, 0, 1), nrow = n.cases, + ncol = n.factors) +unique.comp <- matrix(rnorm(n.cases * n.variables, 0, 1), nrow = n.cases, + ncol = n.variables) +shared.load <- matrix(0, nrow = n.variables, ncol = n.factors) +unique.load <- matrix(0, nrow = n.variables, ncol = 1) +while (trials.without.improvement < max.trials) { + iteration <- iteration + 1 + factor.analysis <- FactorAnalysis(intermediate.corr, corr.matrix = TRUE, + max.iteration = 50, n.factors, corr.type) + if (n.factors == 1) { + shared.load[, 1] <- factor.analysis$loadings + } else { + for (i in 1:n.factors) + shared.load[, i] <- factor.analysis$loadings[, i] + } + shared.load[shared.load > 1] <- 1 + shared.load[shared.load < -1] <- -1 + if (shared.load[1, 1] < 0) + shared.load <- shared.load * -1 + for (i in 1:n.variables) + if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { + unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) + } else { + unique.load[i, 1] <- 0 + } + unique.load <- sqrt(unique.load) + for (i in 1:n.variables) + data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * + unique.load[i, 1] + for (i in 1:n.variables) { + data <- data[sort.list(data[, i]), ] + data[, i] <- distributions[, i] + } + reproduced.corr <- cor(data, method = corr.type) + residual.corr <- target.corr - reproduced.corr + rmsr <- sqrt(sum(residual.corr[lower.tri(residual.corr)] * + residual.corr[lower.tri(residual.corr)]) / + (.5 * (n.variables * n.variables - n.variables))) + if (rmsr < best.rmsr) { + best.rmsr <- rmsr + best.corr <- intermediate.corr + best.res <- residual.corr + intermediate.corr <- intermediate.corr + initial.multiplier * + residual.corr + trials.without.improvement <- 0 + } else { + trials.without.improvement <- trials.without.improvement + 1 + current.multiplier <- initial.multiplier * + .5 ^ trials.without.improvement + intermediate.corr <- best.corr + current.multiplier * best.res + } +} + +factor.analysis <- FactorAnalysis(best.corr, corr.matrix = TRUE, + max.iteration = 50, n.factors, + corr.type) +if (n.factors == 1) { + shared.load[, 1] <- factor.analysis$loadings +} else { + for (i in 1:n.factors) + shared.load[, i] <- factor.analysis$loadings[, i] +} +shared.load[shared.load > 1] <- 1 +shared.load[shared.load < -1] <- -1 +if (shared.load[1, 1] < 0) + shared.load <- shared.load * -1 +for (i in 1:n.variables) + if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { + unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) + } else { + unique.load[i, 1] <- 0 + } +unique.load <- sqrt(unique.load) +for (i in 1:n.variables) + data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * + unique.load[i, 1] +data <- apply(data, 2, scale) # standardizes each variable in the matrix +for (i in 1:n.variables) { + data <- data[sort.list(data[, i]), ] + data[, i] <- distributions[, i] +} +data +} + +################################################################################ +FactorAnalysis <- function(data, corr.matrix = FALSE, max.iteration = 50, + n.factors = 0, corr.type = "pearson") { +# Analyzes comparison data with known factorial structures +# +# Args: +# data : Matrix to store the simulated data. +# corr.matrix : Correlation matrix (default is FALSE) +# max.iteration : Maximum number of iterations (scalar, default is 50). +# n.factors : Number of factors (scalar, default is 0). +# corr.type : Type of correlation (character, default is "pearson", +# user can also call "spearman"). +# +# Returns: +# $loadings : Factor loadings (vector, if one factor. matrix, if multiple +# factors) +# $factors : Number of factors (scalar). +# + data <- as.matrix(data) + n.variables <- dim(data)[2] + if (n.factors == 0) { + n.factors <- n.variables + determine <- TRUE + } else { + determine <- FALSE + } + if (!corr.matrix) { + corr.matrix <- cor(data, method = corr.type) + } else { + corr.matrix <- data + } + criterion <- .001 + old.h2 <- rep(99, n.variables) + h2 <- rep(0, n.variables) + change <- 1 + iteration <- 0 + factor.loadings <- matrix(nrow = n.variables, ncol = n.factors) + while ((change >= criterion) & (iteration < max.iteration)) { + iteration <- iteration + 1 + eigenvalue <- eigen(corr.matrix) + l <- sqrt(eigenvalue$values[1:n.factors]) + for (i in 1:n.factors) + factor.loadings[, i] <- eigenvalue$vectors[, i] * l[i] + for (i in 1:n.variables) + h2[i] <- sum(factor.loadings[i, ] * factor.loadings[i, ]) + change <- max(abs(old.h2 - h2)) + old.h2 <- h2 + diag(corr.matrix) <- h2 + } + if (determine) n.factors <- sum(eigenvalue$values > 1) + return(list(loadings = factor.loadings[, 1:n.factors], + factors = n.factors)) +} + + +rmm.mar<-function(x, tr = 0.2, alpha = 0.05,BH=FALSE,ADJ.CI=FALSE){ +# +# Dependent groups +# Pairwise comparisons based on trimmed means of the marginal distributions +# ADJ.CI=TRUE: Confidence interval adjusted based on Hochberg or, if BH=TRUE, Benjamini--Hochberg + +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J<-ncol(x) +L=(J^2-J)/2 +psihat<-matrix(0,L,7) +testt<-matrix(0,L,4) +dimnames(psihat)<-list(NULL,c('Group','Group','est 1','est 2','dif','ci.lower','ci.upper')) +test<-matrix(NA,L,4) +dimnames(test)<-list(NULL,c('Group','Group','p.value','p.adjust')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,1]<-j +psihat[jcom,2]<-k +a=yuend(x[,j],x[,k],tr=tr) +psihat[jcom,3]=a$est1 +psihat[jcom,4]=a$est2 +psihat[jcom,5]=a$dif +test[jcom,1]<-j +test[jcom,2]<-k +test[jcom,3]<-a$p.value +psihat[jcom,6]=a$ci[1] +psihat[jcom,7]=a$ci[2] +}}} +if(ADJ.CI){ +ior=order(0-test[,3]) +adj=alpha/c(1:L) #Hoch +if(BH)adj=alpha*(L-1:L+1) +# +# Adjust confidence intervals +# +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +print(adj[ior[[jcom]]]) +a=yuend(x[,j],x[,k],alpha=adj[ior[[jcom]]],tr=tr) +psihat[jcom,6]=a$ci[1] +psihat[jcom,7]=a$ci[2] +}}} +} +test[,4]=p.adjust(test[,3],method='hoch') +if(BH)test[,4]=p.adjust(test[,3],method='BH') +nval=nrow(x) +list(n=nval,test=test,psihat=psihat) +} +rmm.dif<-function(x, tr = 0.2, alpha = 0.05,BH=FALSE,ADJ.CI=FALSE){ +# +# Dependent groups +# Pairwise comparisons, trimmed means based on difference scores +# ADJ.CI=TRUE: Confidence interval adjusted based on Hochberg or, if BH=TRUE, Benjamini--Hochberg +# +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J<-ncol(x) +L=(J^2-J)/2 +psihat<-matrix(0,L,5) +testt<-matrix(0,L,4) +dimnames(psihat)<-list(NULL,c('Group','Group','est','ci.lower','ci.upper')) +test<-matrix(NA,L,4) +dimnames(test)<-list(NULL,c('Group','Group','p.value','p.adjust')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,1]<-j +psihat[jcom,2]<-k +a=trimci(x[,j]-x[,k],tr=tr,pr=FALSE) +psihat[jcom,3]=a$estimate +test[jcom,1]<-j +test[jcom,2]<-k +test[jcom,3]<-a$p.value +psihat[jcom,4]=a$ci[1] +psihat[jcom,5]=a$ci[2] +}}} +if(ADJ.CI){ +ior=rev(rank(test[,3])) +adj=alpha/c(1:L) #Hoch +if(BH)adj=alpha*(L-1:L+1) +# +# Adjust confidence intervals +# +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +a=trimci(x[,j]-x[,k],alpha=adj[ior[[jcom]]],tr=tr,pr=FALSE) +psihat[jcom,4]=a$ci[1] +psihat[jcom,5]=a$ci[2] +}}} +} +test[,4]=p.adjust(test[,3],method='hoch') +if(BH)test[,4]=p.adjust(test[,3],method='BH') +nval=nrow(x) +list(n=nval,test=test,psihat=psihat) +} + +rmm.difpb<-function(x, est=tmean, alpha = 0.05,nboot=NA,SEED=TRUE,BH=FALSE,ADJ.CI=FALSE,...){ +# +# Dependent groups +# Pairwise comparisons, trimmed means based on difference scores +# ADJ.CI=TRUE: Confidence interval adjusted based on Hochberg or, if BH=TRUE, Benjamini--Hochberg +# +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J<-ncol(x) +L=(J^2-J)/2 +psihat<-matrix(0,L,5) +testt<-matrix(0,L,4) +dimnames(psihat)<-list(NULL,c('Group','Group','est','ci.lower','ci.upper')) +test<-matrix(NA,L,4) +dimnames(test)<-list(NULL,c('Group','Group','p.value','p.adjust')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,1]<-j +psihat[jcom,2]<-k +a=rmmcppbd(x[,j],x[,k],est=est,alpha=alpha,nboot=nboot,SEED=SEED,plotit=FALSE,...)$output +psihat[jcom,3]=a[2] +test[jcom,1]<-j +test[jcom,2]<-k +test[jcom,3]<-a[3] +psihat[jcom,4]=a[5] +psihat[jcom,5]=a[6] +}}} +if(ADJ.CI){ +ior=rev(rank(test[,3])) +adj=alpha/c(1:L) #Hoch +if(BH)adj=alpha*(L-1:L+1) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +print(adj[ior[[jcom]]]) +#a=trimci(x[,j]-x[,k],alpha=adj[ior[[jcom]]],tr=tr) +#a=two.dep.pb(x[,j],x[,k],dif=TRUE,est=est,alpha=adj[ior[[jcom]]],nboot=nboot,SEED=SEED,pr=FALSE,...) +a=rmmcppbd(x[,j],x[,k],est=est,alpha=adj[ior[[jcom]]],nboot=nboot,SEED=SEED,plotit=FALSE,...)$output +psihat[jcom,4]=a[5] +psihat[jcom,5]=a[6] +}}} +} +test[,4]=p.adjust(test[,3],method='hoch') +if(BH)test[,4]=p.adjust(test[,3],method='BH') +nval=nrow(x) +list(n=nval,test=test,psihat=psihat) +} + +rmm.marpb<-function(x, est=tmean, alpha = 0.05,nboot=NA,BH=FALSE,SEED=TRUE,ADJ.CI=FALSE,...){ +# +# Dependent groups +# Pairwise comparisons based on trimmed means of the marginal distributions +# ADJ.CI=TRUE: Confidence interval adjusted based on Hochberg or, if BH=TRUE, Benjamini--Hochberg +# +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop('Data must be stored in a matrix or in list mode.') +J<-ncol(x) +if(nrow(x)<80){ +if(identical(est,mom))stop('Use rmmcppb with argument BA=TRUE') +if(identical(est,onestep))stop('Use rmmcppb with argument BA=TRUE') +} +L=(J^2-J)/2 +psihat<-matrix(0,L,7) +testt<-matrix(0,L,4) +dimnames(psihat)<-list(NULL,c('Group','Group','est 1','est 2','dif','ci.lower','ci.upper')) +test<-matrix(NA,L,4) +dimnames(test)<-list(NULL,c('Group','Group','p.value','p.adjust')) +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,1]<-j +psihat[jcom,2]<-k +#a=yuend(x[,j],x[,k],tr=tr) +a=two.dep.pb(x[,j],x[,k],dif=FALSE,est=est,nboot=nboot,SEED=SEED,pr=FALSE,...) +psihat[jcom,3]=a[1] +psihat[jcom,4]=a[2] +psihat[jcom,5]=a[3] +test[jcom,1]<-j +test[jcom,2]<-k +test[jcom,3]<-a[4] +psihat[jcom,6]=a[5] +psihat[jcom,7]=a[6] +}}} +if(ADJ.CI){ +ior=order(0-test[,3]) +adj=alpha/c(1:L) #Hoch +if(BH)adj=alpha*(L-1:L+1) +# +#. Next, adjust the confidence intervals +# +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +#a=yuend(x[,j],x[,k],alpha=adj[ior[[jcom]]],tr=tr) +a=two.dep.pb(x[,j],x[,k],dif=FALSE,est=est,alpha=adj[ior[[jcom]]],nboot=nboot,SEED=SEED,pr=FALSE,...) +psihat[jcom,6]=a[5] +psihat[jcom,7]=a[6] +}}} +} +test[,4]=p.adjust(test[,3],method='hoch') +if(BH)test[,4]=p.adjust(test[,3],method='BH') +nval=nrow(x) +list(n=nval,test=test,psihat=psihat) +} + +dat2dif<-function(x){ +# +# x is assumed to be a matrix or data frame with at least 2 columns +# +# For J dependent groups, compute all pairwise differences and return the results +# +# +if(is.null(dim(x)))stop('x should be a matrix or data frame') +ic=0 +J=ncol(x) +n=nrow(x) +N=(J^2-J)/2 +ic=0 +dif=matrix(NA,nrow=n,ncol=N) +for(j in 1:J){ +for(k in 1:J){ +if(j1)stop('Argument g should be a vector') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +if(is.null(x1))stop('Something is wrong, no data in x1') +if(is.null(x2))stop('Something is wrong, no data in x2') +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +dimnames(x1)=list(NULL,NULL) # can be necessary to eliminate labels to avoid an error in randomForest. +dimnames(x2)=list(NULL,NULL) +n1=nrow(x1) +n2=nrow(x2) +ns1=min(n1,nboot) +ns2=min(n2,nboot) +nm=min(ns1,ns2) +if(EN)ns1=ns2=min(c(ns1,ns2)) +P1hat=NA +P2hat=NA +Av=NA +Bv=NA +Cv=NA +Dv=NA + +J=length(method) +TE=matrix(NA,nrow=nm,ncol=J) +FP=matrix(NA,nrow=nm,ncol=J) +FN=matrix(NA,nrow=nm,ncol=J) +TP=matrix(NA,nrow=nm,ncol=J) +TN=matrix(NA,nrow=nm,ncol=J) + +isub1=sample(c(1:nm)) +isub2=sample(c(1:nm)) +for(k in 1:nm){ +N1=isub1[k] +N2=isub2[k] +train1=x1[-N1,] +train2=x2[-N2,] +test=rbind(x1[N1,],x2[N2,]) +for(j in 1:J){ +a=CLASS.fun(x1=train1,x2=train2,test=test,method=method[j],...) +a1=a[1] +a2=a[2] +flag1=a1!=1 # ID False negatives e..g., method 1 predict no fracture but fracture occurred. So !flag1 is correct decision +flag2=a2!=2 # ID False positives e..g., predict fracture but no fracture occurred. +flag=c(flag1,flag2) #Overall mistakes +TE[k,j]=mean(flag) +FN[k,j]=mean(flag1) +FP[k,j]=mean(flag2) +flag3=a1==1 +flag4=a2==2 +TP[k,j]=mean(flag3) #method 1 predict fracture and fracture occurred +TN[k,j]=mean(flag4) #method 1 predict no fracture and no fracture occurred +}} +ERR=matrix(NA,nrow=5,ncol=J) +dimnames(ERR)=list(c('TE','FP','FN','TP','TN'),method) +#dimnames(CAT)=list(c('TRUE 1','TRUE 2'),c('PRED 1','PRED2')) +v=apply(TE,2,mean) +ERR[1,]=v +v=apply(FP,2,mean) +ERR[2,]=v +v=apply(FN,2,mean) +ERR[3,]=v +v=apply(TP,2,mean) +ERR[4,]=v +v=apply(TN,2,mean) +ERR[5,]=v +list(Error.rates=ERR) +} + +class.error.CM<-function(x1=NULL,x2=NULL,train=NULL,g=NULL,method='KNN',nboot=100,EN=TRUE,FAST=TRUE, +AUC=FALSE,SEED=TRUE,...){ +# +# For a classification methods indicated by the argument +# method +# use cross validation leaving one out. +# +#. Return a confusion matrix: unconditional, To get a conditional result use class.error.CP +# +# The data for the two groups can be entered via the arguments +# x1 and x2 +# or +# store all of the data in the argument train in which case g specifies the group +# AUC=TRUE, returns auc. Default is FALSE because conditions can be created where +# Error: $ operator is invalid for atomic vectors +# +# Current choices available: +# KNN: Nearest neighbor using robust depths +# DIS: Points classified based on their depths +# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS +# SVM: support vector machine +# RF: Random forest +# NN: neural network +# ADA: ada boost +# PRO: project the points onto a line connecting the centers of the data clouds. +# Then use estimate of the pdf for each group to make a decision about future points. +# LSM: smooth version of logistic regression when sm=TRUE; otherwise use logistic regression. +# +# Returns confusion matrix +# +# +# method='KNN' is default +# +# nboot=number of samples +# +if(length(method)!=1)stop('Only one method at a time is allowed') +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g))if(dim(g)>1)stop('Argument g should be a vector') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +if(is.null(x1))stop('Something is wrong, no data in x1') +if(is.null(x2))stop('Something is wrong, no data in x2') +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +dimnames(x1)=list(NULL,NULL) # can be necessary to eliminate labels to avoid an error in randomForest. +dimnames(x2)=list(NULL,NULL) +n1=nrow(x1) +n2=nrow(x2) +ns1=min(n1,nboot) +ns2=min(n2,nboot) +mn=min(ns1,ns2) +CM=matrix(0,2,2) +isub1=sample(c(1:ns1)) +isub2=sample(c(1:ns2)) +A1=NULL +A2=NULL +ic1=0 +ic2=0 +for(k in 1:mn){ +N1=isub1[k] +N2=isub2[k] +train1=x1[-N1,] +train2=x2[-N2,] +test=rbind(x1[N1,],x2[N2,]) +a=CLASS.fun(x1=train1,x2=train2,test=test,method=method,...) +a1=a[1] +a2=a[2] +A1[k]=a1 +A2[k]=a2 +if(a1==1)CM[1,1]=CM[1,1]+1 #true = 1 pred 1 +else +CM[1,2]=CM[1,2]+1 +if(a2==2)CM[2,2]=CM[2,2]+1 #true =2 and pred 2 +else +CM[2,1]=CM[2,1]+1 +} +FREQ=CM +CM=CM/(2*nboot) +F=matrix(NA,3,3) +dimnames(F)=list(c('True 1','True 2','Sum'),c('Pred 1','Pred 2','Sum')) +F[1,1]=FREQ[1,1] +F[1,2]=FREQ[1,2] +F[2,1]=FREQ[2,1] +F[2,2]=FREQ[2,2] +F[1,3]=F[1,1]+F[1,2] +F[2,3]=F[2,1]+F[2,2] +F[3,1]=F[1,1]+F[2,1] +F[3,2]=F[1,2]+F[2,2] +F[3,3]=F[1,3]+F[2,3] +RES=F/F[3,3] +auroc=NULL +if(AUC){ +library(ROCR) +PRED=c(A1,A2) +LABS=c(rep(1,length(A1)),rep(2,length(A2))) +pred=prediction(PRED,LABS) +perf=performance(pred, "auc") + auroc<- perf@y.values[[1]] +} +dimnames(RES)=list(c('True 1','True 2','Sum'),c('Pred 1','Pred 2','Sum')) +list(C.MAT=RES,COUNTS=F,AUC=auroc[[1]]) +} + + +regtest.blp<-function(x,y,regfun=tsreg,nboot=600,alpha=.05,plotit=TRUE, +grp=c(1:ncol(x)),nullvec=c(rep(0,length(grp))),SEED=TRUE,pr=TRUE,...){ +# +# Test the hypothesis that q of the p predictors are equal to +# some specified constants. By default, the hypothesis is that all +# p predictors have a coefficient equal to zero. +# The method is based on a confidence ellipsoid. +# The critical value is determined with the percentile bootstrap method +# in conjunction with Mahalanobis distance. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +m<-cbind(x,y) +flag<-reglev.gen(x,y,regfun=regfun,plotit=FALSE)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +x<-as.matrix(x) +if(length(grp)!=length(nullvec))stop('The arguments grp and nullvec must have the same length.') +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +#print('Taking bootstrap samples. Please wait.') +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +bvec<-apply(data,1,regboot,x,y,regfun) # A p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +grp<-grp+1 #Ignore the intercept. +est<-regfun(x,y)$coef +estsub<-est[grp] +bsub<-t(bvec[grp,]) +if(length(grp)==1){ +m1<-sum((bvec[grp,]-est)^2)/(length(y)-1) +dis<-(bsub-estsub)^2/m1 +} +if(length(grp)>1){ +mvec<-apply(bsub,2,FUN=mean) +m1<-var(t(t(bsub)-mvec+estsub)) +dis<-mahalanobis(bsub,estsub,m1) +} +dis2<-order(dis) +dis<-sort(dis) +critn<-floor((1-alpha)*nboot) +crit<-dis[critn] +test<-mahalanobis(t(estsub),nullvec,m1) +sig.level<-1-sum(test>dis)/nboot +print(length(grp)) +if(length(grp)==2 && plotit){ +plot(bsub,xlab='Parameter 1',ylab='Parameter 2') +points(nullvec[1],nullvec[2],pch=0) +xx<-bsub[dis2[1:critn],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +} +list(test=test,crit=crit,p.value=sig.level,nullvec=nullvec,est=estsub,n=length(y)) +} + +chbin2num<-function(x){ +# +# Make sure x is binary and numeric +# +n=length(x) +v=rep(NA,n) +y=elimna(x) +chk=unique(y) +if(length(chk)!=2)stop('Should have binary data after NA removed') +M=max(chk) +id=x==M +v[id]=1 +M=min(chk) +id=x==M +v[id]=0 +v +} + +wmw.anc.plot<-function(x1,y1,x2,y2,q1=c(.1,.9),q2=c(.1,.9),npts=20, +pts=NULL,xout=FALSE,outfun=outpro,xlab='X',ylab='P(Y1=1)stop('Argument q must be greater than 0 and less than 1') +qu=1-ql +L1=qest(x1,ql) +L2=qest(x2,ql) +U1=qest(x1,qu) +U2=qest(x2,qu) +L=max(L1,L2) +U=min(U1,U2) +pts=seq(L,U,length.out=npts) +} +s1sq=regIQRsd(x1,y1,pts=pts) +s2sq=regIQRsd(x2,y2,pts=pts) +e1=regYhat(x1,y1,xr=pts,regfun=Qreg) +e2=regYhat(x2,y2,xr=pts,regfun=Qreg) +v1=s1sq^2 +v2=s2sq^2 +n1=length(y1) +n2=length(y2) +N=n1+n2 +q=n1/N +top=(1-q)*v1+q*v2 +bot=q*(1-q) +sigsq=top/bot # Quantity in brackets KMS p. 176 eq 21.1 +es=(e1-e2)/sqrt(sigsq) +mat=cbind(pts,es) +#if(plotit)reg2plot(x1,y1,x2,y2,regfun=Qreg,xlab=xlab,ylab=ylab) +if(plotit)anclinKMS.plot(x1,y1,x2,y2,pts=pts,line=line,xlab=xlab,ylab=ylab,ylim=ylim) +dimnames(mat)=list(NULL,c('pts','Effect.size')) +mat +} + + anclinKMS.plot<-function(x1,y1,x2,y2,pts=NULL,q=0.1,xout=FALSE,ALL=TRUE,npts=10,line=TRUE, +xlab='X',ylab='KMS.Effect',outfun=outpro,ylim=NULL,...){ +# +# Plot KMS measure of effect size +# pts=NULL: If ALL=TRUE, 10 points are chosen by this function +# otherwise three points are used. +# +# The KMS effect size is a heteroscedastic robust analog of Cohen's d +# +# +xy=elimna(cbind(x1,y1)) +x1<-as.matrix(x1) +p=ncol(x1) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x1<-xy[,1:p] +y1<-xy[,p1] +x1<-as.matrix(x1) +xy=elimna(cbind(x2,y2)) +x2<-as.matrix(x2) +p=ncol(x2) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x2<-xy[,1:p] +y2<-xy[,p1] +x2<-as.matrix(x2) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1=length(y1) +n2=length(y2) +n=min(c(n1,n2)) +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE,...)$keep +m<-m[flag,] +n1=nrow(m) +x1<-m[,1:p] +y1<-m[,p1] +x1=as.matrix(x1) +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE,...)$keep +m<-m[flag,] +n2=nrow(m) +n=min(c(n1,n2)) +x2<-m[,1:p] +y2<-m[,p1] +x2=as.matrix(x2) +} +if(!is.null(pts))npts=length(pts) +if(is.null(pts)){ +if(q<=0 || q>=1)stop('Argument q must be greater than 0 and less than 1') +qu=1-q +L1=qest(x1,q) +L2=qest(x2,q) +U1=qest(x1,qu) +U2=qest(x2,qu) +L=max(L1,L2) +U=min(U1,U2) +if(ALL)pts=seq(L,U,length.out=npts) +else{pts=c(L,(L+U)/2,U) +npts=3 +}} + +#e=reg.pred(x2,y2,xr=pts,regfun=Qreg,q=.5,xout=FALSE) +qs=ancova.ES(x1,y1,x2,y2,pts=pts,plotit=FALSE)[,2] +M=cbind(pts,qs) +if(is.null(ylim)) +ylim=min(-.8,min(qs)) +ylim[2]=max(.8,max(qs)) +if(line){ +plot(pts,qs,xlab=xlab,ylab=ylab,ylim=ylim,type='n') +lines(pts,qs) +} +else +plot(pts,qs,xlab=xlab,ylab=ylab,ylim=ylim) +dimnames(M)=list(NULL,c('Pts','KMS.Effect.Size')) +M +} + +ancNCE.QS.plot<-function(x1,y1,x2,y2,pts=NULL,q=0.1,xout=FALSE,ALL=TRUE,npts=10,line=TRUE, +xlab='X',ylab='QS.Effect',outfun=outpro,...){ +# +# Plot quantile shift measure of effect size +# No control group +# +# q = lower quantile used to determine the points used, +# +# pts=NULL: If ALL=TRUE, 20 points are chosen by this function +# otherwise three points are used. +# +# +# +xy=elimna(cbind(x1,y1)) +x1<-as.matrix(x1) +p=ncol(x1) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x1<-xy[,1:p] +y1<-xy[,p1] +x1<-as.matrix(x1) +xy=elimna(cbind(x2,y2)) +x2<-as.matrix(x2) +p=ncol(x2) +if(p>1)stop('Current version allows one covariate only') +p1=p+1 +vals=NA +x2<-xy[,1:p] +y2<-xy[,p1] +x2<-as.matrix(x2) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1=length(y1) +n2=length(y2) +n=min(c(n1,n2)) +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE,...)$keep +m<-m[flag,] +n1=nrow(m) +x1<-m[,1:p] +y1<-m[,p1] +x1=as.matrix(x1) +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE,...)$keep +m<-m[flag,] +n2=nrow(m) +n=min(c(n1,n2)) +x2<-m[,1:p] +y2<-m[,p1] +x2=as.matrix(x2) +} +if(!is.null(pts))npts=length(pts) +if(is.null(pts)){ +if(q<=0 || q>=1)stop('Argument q must be greater than 0 and less than 1') +qu=1-q +L1=qest(x1,q) +L2=qest(x2,q) +U1=qest(x1,qu) +U2=qest(x2,qu) +L=max(L1,L2) +U=min(U1,U2) +if(ALL)pts=seq(L,U,length.out=npts) +else{pts=c(L,(L+U)/2,U) +npts=3 +}} +qs=QSanc(x1,y1,x2,y2,pts=pts) +qs=as.vector(matl(qs)) +M=cbind(pts,qs) +if(line){ +plot(pts,qs,xlab=xlab,ylab=ylab,ylim=c(0,1),type='n') +lines(pts,qs) +} +else +plot(pts,qs,xlab=xlab,ylab=ylab,ylim=c(0,1)) +dimnames(M)=list(NULL,c('Pts','QS.Effect.Size')) +M +} + +com2gfun<-function(x,y,est=tmean,tr=.2,alpha=.05,SEED=TRUE,nboot=2000,method=c('Y','PB','CID','BM')){ +# +# +# Y=Yuen +# PB=Percentile bootstrap +# CID= Cliff's +# BM = Brunner--Munzel +# +type=match.arg(method) +switch(type, +Y=yuen(x,y,tr=tr,alpha=alpha), +PB=pb2gen(x,y,est=est,alpha=alpha,SEED=SEED,nboot=nboot), +CID=cidv2(x,y,alpha=alpha), +BM=bmp(x,y,alpha=alpha)) +} + +anc.2gbin<-function(x1,y1,x2,y2,pts=NA,fr1=.8,fr2=.8,npts=10,xlab='X',ylab='Est. Dif',xout=FALSE, +outfun=out,nmin=12,plotit=TRUE){ +# +# Compare probability of success give a value for some covariate. +# A running-interval smoother is used coupled with the KMS method for comparing binomial distributions +# +isub=0 +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +xorder=order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +if(is.na(pts[1])){ +n1<-1 +n2<-1 +vecn<-1 +isub=0 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=nmin]) +isub[2]<-max(sub[vecn>=nmin]) +} +bot=x1[isub[1]] +top=x1[isub[2]] +pts=seq(bot,top,length.out=npts) +output=matrix(NA,nrow=length(pts),ncol=8) +for(i in 1:length(pts)){ +g1<-y1[near(x1,pts[i],fr1)] +g2<-y2[near(x2,pts[i],fr2)] +v=binom2g(x=g1,y=g2) +output[i,1:7]=c(pts[i],v$p1,v$p2,v$est.dif,v$ci[1],v$ci[2],v$p.value) +} +dimnames(output)=list(NULL,c('pts','p1','p2','est.dif','ci.lower','ci.upper','p.value','p.adjusted')) +if(plotit){ +plot(c(pts,pts,pts),c(output[,4],output[,5],output[,6]),type='n',xlab=xlab,ylab=ylab,ylim=c(-1,1)) +points(pts,output[,4]) +points(pts,output[,5],pch='+') +points(pts,output[,6],pch='+') +} +output[,8]=p.adjust(output[,7],method='hoch') +output +} + +ancmg1.power<-function(n,del=.2,alpha=.05,iter=100,SEED=TRUE,ADJ=FALSE){ +# +# n sample sizes, length of n indicates number of groups +#. Estimate power with no data +# Simulate assuming standard normal distributions but first group has a mean del +# +# +J=length(n) +x=list() +y=list() +chk=0 +if(SEED)set.seed(2) +for(i in 1:iter){ +for(j in 1:J){ +x[[j]]=rnorm(n[j]) +y[[j]]=rnorm(n[j]) +} +y[[1]]=y[[1]]+del +a=ancmg1(x,y,pr=FALSE) +pv=NA +K=length(a$points) +for(k in 1:K){ +if(!ADJ)pv[k]=min(a$point[[k]][,3]) +else +pv[k]=min(a$point[[k]][,7]) +} +if(min(pv)<=alpha)chk=chk+1 +} +chk/iter +} + + + +oph.dep.comRMSAE<-function(x, y=NULL, tr=0,invalid=4, method='hommel',STOP=TRUE,nboot=1999){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# +# Goal: compare the root-mean-square absolute prediction error of J dependent measures. +# Strictly speaking, the squared mean absolute error value is used. +# The estimates reported by the function are the root-mean-squared absolute errors. +# All pairwise comparisons are performed using a bootstrap-t method based on means +# To use a 20% trimmed mean, set tr=.2 + +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# +# By default, Hommel's method is used to control the probability of one +# or more Type I errors +# +if(!is.null(y))x=cbind(x,y) +if(is.list(x)){ +n=pool.a.list(lapply(x,length)) +if(var(n)!=0)stop('lengths have different values') +x=matl(x) +} +J=ncol(x) +flag=abs(elimna(x))>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +if(STOP)stop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','RMSAE 1','RMSAE 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=ydbt(x[,j]^2,x[,k]^2,tr=tr,nboot=nboot) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=RMSAE(elimna(x[,j])) +output[ic,4]=RMSAE(elimna(x[,k])) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +RMSAE<-function(x) sqrt(sum(x^2)/(length(x))) + +oph.indep.comRMSAE<-function(x,y=NULL,method='hoch',invalid=4,STOP=TRUE,tr=0,nboot=1999){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -4 diopters or greater than 4 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare root-mean-square Absolute Error (RMSAE) of J independent measures. +# All pairwise comparisons are performed using a heteroscedastic +# Welch method +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# To get an even more robust method using a 20% trimmed mean, set the argument tr=.2 +# +# By default, Hochberg's method is used to control the probability of one +# or more TypeI errors +# +if(!is.null(y))x=list(x,y) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +for(j in 1:J){ +flag=abs(x[[j]])>invalid +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','RMSAE 1','RMSAE 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=yuenbt(x[[j]]^2,x[[k]]^2,tr=tr,nboot=nboot) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=RMSAE(x[[j]]) +output[ic,4]=RMSAE(x[[k]]) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.dep.comMeanAE<-function(x, y=NULL, tr=0,invalid=4, method='hommel',STOP=TRUE,nboot=1999){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# +# Goal: compare the root-mean-square absolute prediction error of J dependent measures. +# Strictly speaking, the squared mean absolute error value is used. +# The estimates reported by the function are the root-mean-squared absolute errors. +# All pairwise comparisons are performed using a bootstrap-t method based on means +# For an even more robust method using a20% trimmed mean, set tr=.2 + +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# +# By default, Hommel's method is used to control the probability of one +# or more Type I errors +# +if(!is.null(y))x=cbind(x,y) +if(is.list(x)){ +n=pool.a.list(lapply(x,length)) +if(var(n)!=0)stop('lengths have different values') +x=matl(x) +} +J=ncol(x) +flag=abs(elimna(x))>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +if(STOP)stop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Mean.AE 1','Mean.AE 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=ydbt(abs(x[,j]),abs(x[,k]),tr=tr,nboot=nboot) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=mean(abs(x[,j]),tr=tr,na.rm=TRUE) +output[ic,4]=mean(abs(x[,k]),tr=tr,na.rm=TRUE) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.indep.comMeanAE<-function(x,y=NULL,method='hoch',invalid=4,STOP=TRUE,tr=0,nboot=1999){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -4 diopters or greater than 4 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare root-mean-square Absolute Error (RMSAE) of J independent measures. +# All pairwise comparisons are performed using a heteroscedastic +# Welch method +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# For a more robust method using a 20% trimme mean, set tr=.2 +# +# By default, Hochberg's method is used to control the probability of one +# or more TypeI errors +# +if(!is.null(y))x=list(x,y) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +for(j in 1:J){ +flag=abs(x[[j]])>invalid +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Mean.AE 1','Mean.AE 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=yuenbt(abs(x[[j]]),abs(x[[k]]),tr=tr,nboot=nboot) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=mean(abs(x[[j]]),tr=tr) +output[ic,4]=mean(abs(x[[k]]),tr=tr) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.dep.commean<-function(x, y=NULL, tr=0,invalid=4, method='hommel',STOP=TRUE){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -4 diopters or greater than 4 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare the means of J dependent measures. +# All pairwise comparisons are performed using a bootstrap-t method based on means +# To use an even more robust method using a 20% trimmed mean, set tr=.2 +# + +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# +# By default, Hommel's method is used to control the probability of one +# or more Type I errors +# +if(!is.null(y))x=cbind(x,y) +if(is.list(x)){ +n=pool.a.list(lapply(x,length)) +if(var(n)!=0)stop('lengths have different values') +x=matl(x) +} +J=ncol(x) +flag=abs(x)>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +if(STOP)stop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Mean 1','Mean 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=ydbt(x[,j],x[,k],tr=tr) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=a$Est.1 +output[ic,4]=a$Est.2 +output[ic,5]=a$Est.1- a$Est.2 +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.indep.commean<-function(x,y=NULL,method='hommel',invalid=4,STOP=TRUE,tr=0){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -4 diopters or greater than 4 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare means of J independent measures. +# All pairwise comparisons are performed using a heteroscedastic +# Welch method +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# To use an even more robust method using a 20% trimmed mean, set tr=.2 +# +# By default, Hommel's method is used to control the probability of one +# or more TypeI errors +# +if(!is.null(y))x=list(x,y) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +for(j in 1:J){ +flag=abs(elimna(x[[j]]))>invalid +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Mean 1','Mean 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=yuenbt(x[[j]],x[[k]],tr=tr) +output[ic,1]=j +output[ic,2]=k +output[ic,3:4]=c(a$est.1,a$est.2) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + + +oph.dep.comMedAE<-function(x, y=NULL, est=median,dif=FALSE, invalid=4, method='hommel',STOP=TRUE,nboot=1999){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# +# Goal: compare the median absolute prediction error of J dependent measures. +# +# All pairwise comparisons are performed using a bootstrap-t method based on means +# +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# +# By default, Hommel's method is used to control the probability of one +# or more Type I errors +# +if(!is.null(y))x=cbind(x,y) +if(is.list(x)){ +n=pool.a.list(lapply(x,length)) +if(var(n)!=0)stop('lengths have different values') +x=matl(x) +} +J=ncol(x) +flag=abs(elimna(x))>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +if(STOP)stop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Med.AE 1','Med.AE 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=dmedpb(abs(x[,j]),abs(x[,k]),est=est,dif=dif,nboot=nboot,pr=FALSE,plotit=FALSE) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=est(abs(x[,j]),na.rm=TRUE) +output[ic,4]=est(abs(x[,k]),na.rm=TRUE) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$output[,3] +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.indep.commedian<-function(x,y=NULL,method='hommel',invalid=4,STOP=TRUE,SEED=TRUE){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -4 diopters or greater than 4 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare medians of J independent measures. +# All pairwise comparisons are performed using a heteroscedastic +# percentile bootstrap method +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# To use an even more robust method using a 20% trimmed mean, set tr=.2 +# +# By default, Hommel's method is used to control the probability of one +# or more TypeI errors +# +if(!is.null(y))x=list(x,y) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +for(j in 1:J){ +flag=abs(x[[j]])>invalid +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Median 1','Median 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=medpb2(x[[j]],x[[k]],SEED=SEED) +output[ic,1]=j +output[ic,2]=k +output[ic,3:4]=c(a$est1,a$est2) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.dep.commedian<-function(x, y=NULL,invalid=4, method='hommel',STOP=TRUE,SEED=TRUE){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -4 diopters or greater than 4 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare the medians of J dependent measures. +# All pairwise comparisons are performed using a percentile bootstrap method +# +# + +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# +# By default, Hommel's method is used to control the probability of one +# or more Type I errors +# +if(!is.null(y))x=cbind(x,y) +if(is.list(x)){ +n=pool.a.list(lapply(x,length)) +if(var(n)!=0)stop('lengths have different values') +x=matl(x) +} +J=ncol(x) +flag=abs(elimna(x))>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +if(STOP)stop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Median 1','Median 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=dmedpb(x[,j],x[,k],pr=FALSE,plotit=FALSE,nboot=2000,SEED=SEED) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=median(x[,j],na.rm=TRUE) +output[ic,4]=median(x[,k],na.rm=TRUE) +output[ic,5]=a$output[1,2] +output[ic,6]=a$output[1,3] +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.mcnemar<-function(x,method='holm',invalid=4){ +# +# Astigmatism: compare prediction formulas +# +if(is.null(dim(x)))stop('x should be a matrix or data frame') +x=abs(x) +J=ncol(x) #number of formulas +flag=max(abs(x),na.rm=TRUE)>invalid +if(flag){ +nr=c(1:nrow(x)) +if(sum(flag,na.rm=TRUE)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag,na.rm=TRUE)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +stop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,9) +dimnames(output)<-list(NULL,c('D', ' Var', 'N< ' , '%<', 'Var', 'N<', '%< ', +'p.value','Adj.p.value')) +E=list() +TAB=list() +D=seq(.25,2,.25) #D intervals from .25 to 2 +for(L in 1:length(D)){ +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=mat2table(x[,c(j,k)],D[L],D[L]) +n1=sum(x[[j]]<=D[L],na.rm=TRUE) +pn1=mean(x[[j]]<=D[L],na.rm=TRUE) +n2=sum(x[[k]]<=D[L],na.rm=TRUE) +pn2=mean(x[[k]]<=D[L],na.rm=TRUE) +if(sum(is.na(a)>0))print(paste('No data for VAR',j,'VAR',k,'D=',D[L])) +if(sum(is.na(a))==0){ +mct=mcnemar.test(a) +output[ic,1]=D[L] +output[ic,2]=j +output[ic,3]=n1 +output[ic,4]=pn1 +output[ic,5]=k +output[ic,6]=n2 +output[ic,7]=pn2 +output[ic,8]=mct[[3]] +if(a[1,2]==0 &a[2,1]==0)output[ic,8]=1 +}}}} +output[,9]=p.adjust(output[,8],method=method) +E[[L]]=output +} +E +} + +oph.indepintervals<-function(m,method='holm',invalid=4){ +# +# For column of x, compare frequencies using KMS method +# +# +# n: sample sizes +# x is a matrix or data frame with 8 rows +# +# +E=list() +ic=0 +m=abs(m) +J=ncol(m) +x=m +flag=abs(elimna(x))>invalid +if(sum(flag)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +stop() +} +id=matrix(NA,8,2) +x=matrix(NA,8,2) +INT=c(0.25,0.50, 0.75,1,1.25,1.5,1.75,2) +dimnames(id)=list(NULL,ncol=c('S1','S2')) +for (j in 1:J){ + for (k in 1:J){ + if (j < k){ + ic=ic+1 +id[,1]=rep(j,8) +id[,2]=rep(k,8) +# Next determine frequencies +S1=elimna(m[,j]) +S2=elimna(m[,k]) +n1=length(S1) +n2=length(S2) +for(L in 1:8){ +x[L,1]=sum(S1<=INT[L]) +x[L,2]=sum(S2<=INT[L]) +} +a=srg1.vs.2(c(n1,n2),x) +Adj.p.value=p.adjust(a[,3],method=method) +E[[ic]]=cbind(id,a,Adj.p.value) + }}} +E +} + +oph.dep.comMAD<-function(x, y=NULL, tr=0,invalid=4, method='hommel',STOP=TRUE,nboot=1999){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# +# Goal: # Goal: compare mean absolute deviation of J dependent measures. +# Strictly speaking, the squared mean absolute error value is used. +# The estimates reported by the function are the root-mean-squared absolute errors. +# All pairwise comparisons are performed using a bootstrap-t method based on means +# + +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# +# By default, Hommel's method is used to control the probability of one +# or more Type I errors +# +if(!is.null(y))x=cbind(x,y) +if(is.list(x)){ +n=pool.a.list(lapply(x,length)) +if(var(n)!=0)stop('lengths have different values') +x=matl(x) +} +J=ncol(x) +flag=abs(x)>invalid +if(sum(flag,na.rm=TRUE)>0){ +nr=c(1:nrow(x)) +if(sum(flag)>1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following rows have invalid values') +} +if(sum(flag)==1){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print('The following row has an invalid value') +} +irow=NA +ic=0 +N=nrow(x) +for(i in 1:N){ +iflag=abs(x[i,])>invalid +if(sum(iflag,na.rm=TRUE)>0){ +ic=ic+1 +irow[ic]=i +}} +print(irow) +if(STOP)stop() +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','MAD 1','MAD 2','Dif','p.value','Adj.p.value')) +ic=0 +for(j in 1:J){ +mx=mean(x[,j],na.rm=TRUE) +#x[,j]=x[,j]-mean(x[,j],na.rm=TRUE) +x[,j]=x[,j]-mx +} +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=ydbt(abs(x[,j]),abs(x[,k]),tr=tr,nboot=nboot) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=mean(abs(x[,j]),tr=tr,na.rm=TRUE) +output[ic,4]=mean(abs(x[,k]),tr=tr,na.rm=TRUE) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.indep.comMAD<-function(x,y=NULL,method='hoch',invalid=4,STOP=TRUE,tr=0,nboot=1999){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -4 diopters or greater than 4 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare mean absolute deviation of J independent measures. +# All pairwise comparisons are performed using a heteroscedastic +# Welch method +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# and the goal is to compare the variances of the data in x and y. +# For a more robust method using a 20% trimme mean, set tr=.2 +# +# By default, Hochberg's method is used to control the probability of one +# or more TypeI errors +# +if(!is.null(y))x=list(x,y) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +for(j in 1:J){ +flag=abs(x[[j]])>invalid +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','MAD 1','MAD 2','Dif','p.value','Adj.p.value')) +ic=0 +for(j in 1:J)x[[j]]=x[[j]]-mean(x[[j]]) +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=yuenbt(abs(x[[j]]),abs(x[[k]]),tr=tr,nboot=nboot) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=mean(abs(x[[j]]),tr=tr) +output[ic,4]=mean(abs(x[[k]]),tr=tr) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + +oph.indep.comMedAE<-function(x,y=NULL,est=median,method='hommel',invalid=4,STOP=TRUE,nboot=1999){ +# +# This function is designed specifically for dealing with +# Prediction Error for Intraocular Lens Power Calculation +# It is assumed that any value less than -4 diopters or greater than 4 diopters +# is invalid. The argument invalid can be used to change this decision rule. +# +# Goal: compare median Absolute Error of J independent measures. +# All pairwise comparisons are performed using a heteroscedastic method +# x can be a matrix, a data frame or it can have list mode. +# if y is not NULL, the function assumes x is a vector +# +# +# By default, Hommel's method is used to control the probability of one +# or more TypeI errors +# +if(!is.null(y))x=list(x,y) +if(is.matrix(x) || is.data.frame(x))x=listm(x) +J=length(x) +for(j in 1:J)x[[j]]=elimna(x[[j]]) +for(j in 1:J){ +flag=abs(x[[j]])>invalid +if(sum(flag,na.rm=TRUE)>0){ +print(paste('The value of argument invalid indicates that any value greater in absolute value than', invalid,' is invalid')) +print(paste('Variable', j, 'has one or more invalid values')) +print('They occur in the following positions') +nr=c(1:length(x[[j]])) +print(nr[flag]) +if(STOP)stop() +} +} +CC=(J^2-J)/2 +output<-matrix(0,CC,7) +dimnames(output)<-list(NULL,c('Var','Var','Med.AE 1','Med.AE 2','Dif','p.value','Adj.p.value')) +ic=0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +ic=ic+1 +a=pb2gen(abs(x[[j]]),abs(x[[k]]),est=est,nboot=nboot) +output[ic,1]=j +output[ic,2]=k +output[ic,3]=est(abs(x[[j]]),) +output[ic,4]=est(abs(x[[k]])) +output[ic,5]=output[ic,3]-output[ic,4] +output[ic,6]=a$p.value +}}} +output[,7]=p.adjust(output[,6],method=method) +output +} + + +corblp.ci<-function(x,y,regfun=tsreg,varfun=pbvar,nboot=100,alpha=.05,outfun=outpro.depth,SEED=TRUE, +plotit=FALSE,...){ +# +# Correlation, basically a robust version of explanatory power, +# based on a robust regression estimator with bad +# leverage points removes +# +if(SEED)set.seed(2) +xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +if(p!=1)stop('Only a single independent variable is allowed') +x=xy[,1] +y=xy[,2] +id=reglev.gen(x,y,regfun=regfun,plotit=plotit,outfun=outpro.depth)$keep +X=x[id] +Y=y[id] +v=NA +n=length(Y) +bot=varfun(Y) +for(i in 1:nboot){ +id=sample(n,replace=TRUE) +e=reg.pred(X[id],Y[id],regfun=regfun) +top=varfun(e) +rsq=top/bot +rsq=min(rsq,1) +est=regfun(X[id],Y[id])$coef[2] +rest=sign(est)*sqrt(rsq) +v[i]=rest +} +se=sd(v) +est=corblp.EP(x,y,regfun=regfun,varfun=varfun) +test=est$cor/se +sig<-2*(1-pnorm(abs(test))) +crit=qnorm(1-alpha/2) +ci=est$cor-crit*se +ci=max(ci,-1) +ci[2]=est$cor+crit*se +ci[2]=min(ci[2],1) +list(cor=est$cor,test=test,p.value=sig,ci=ci) +} + +ancovap2.KMS<-function(x1,y1,x2,y2,pts=NULL,BOTH=TRUE,npts=20,profun=prodepth, +xout=FALSE,outfun=outpro){ +# +# Comparing two independent regression lines. +# based on an analog of the KMS measure of effect size +# for the points indicated by pts. +# pts=NULL: three points used that are determined based on the data +# +# profun=prdepth, random projections are used to measure the depth of a point +# =pdepth would use a deterministic method, might have high execution time +# if n is large. +# +# BOTH=TRUE: combine x1 and x2 when picking points, otherwise use x1 +# +x1=as.matrix(x1) +p=ncol(x1) +p1=p+1 +m1=elimna(cbind(x1,y1)) +x1=m1[,1:p] +y1=m1[,p1] +x2=as.matrix(x2) +p=ncol(x2) +p1=p+1 +m2=elimna(cbind(x2,y2)) +x2=m2[,1:p] +y2=m2[,p1] +if(xout){ +m<-cbind(x1,y1) +if(identical(outfun,reglev))flag=outfun(x1,y1,plotit=FALSE)$keep +else +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +if(identical(outfun,reglev))flag=outfun(x2,y2,plotit=FALSE)$keep +else +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +if(is.null(pts)){ +if(!BOTH){ +d=profun(x1) +ior=order(d) +id=seq(1,min(n1),length.out=npts) +id=floor(id) +pts=x1[ior[id],] +} +if(BOTH){ +X=rbind(x1,x2) +d=profun(X) +ior=order(d) +id=seq(1,min(n1),length.out=npts) +id=floor(id) +pts=X[ior[id],] +} +} +pts<-as.matrix(pts) +s1sq=regIQRsd(x1,y1,pts=pts) +s2sq=regIQRsd(x2,y2,pts=pts) +e1=regYhat(x1,y1,xr=pts,regfun=Qreg) +e2=regYhat(x2,y2,xr=pts,regfun=Qreg) +v1=s1sq^2 +v2=s2sq^2 +n1=length(y1) +n2=length(y2) +N=n1+n2 +q=n1/N +top=(1-q)*v1+q*v2 +bot=q*(1-q) +sigsq=top/bot # Quantity in brackets KMS p. 176 eq 21.1 +es=(e1-e2)/sqrt(sigsq) +mat=cbind(pts,es) +lab=NA +for(i in 1:p)lab[i]=paste('X',i) +dimnames(mat)=list(NULL,c(lab,'KMS')) +mat +} + +ancovap2.KMS.SEpb<-function(x1,y1,x2,y2,nboot=100,pts=NULL,SEED=TRUE){ +# +# Estimate standard error +# +if(is.null(pts))stop('No points were specified') +n1=nrow(x1) +n2=nrow(x2) +p=ncol(x1)+1 +npts=nrow(pts) +if(SEED)set.seed(2) +v=matrix(NA,nrow=nboot,ncol=nrow(pts)) +for(i in 1:nboot){ +id1=sample(n1,replace=TRUE) +id2=sample(n2,replace=TRUE) +X1=x1[id1,] +Y1=y1[id1] +X2=x2[id2,] +Y2=y2[id2] +v[i,]=ancovap2.KMS(X1,Y1,X2,Y2,pts=pts)[,p] +} +se=apply(v,2,sd) +se +} + +ancovap2.KMSci<-function(x1,y1,x2,y2,pts=NULL,alpha=.05,nboot=100,SEED=TRUE,npts=20, +SIMPLE=FALSE,PLOT.ADJ=FALSE, +plotit=TRUE,xlab='X1',ylab='X2',BOTH=TRUE,profun=prodepth, +xout=FALSE,outfun=outpro,method='hoch'){ +# +# Two independent groups, have two covariates. +# +# For each specified value for x, stored in pts, compute a heteroscedastic measure of effect +# +# if pts=NULL +# SIMPLE=TRUE: use the quartiles of the marginal distributions of group 1 +# to determine the covariate points used, +# SIMPLE=FALSE +# points are chosen based on the depths of the points, which is computed +# by the R function indicated by the argument profun. +# The default is profun=prodepth. +# To use a random collection of projections, set +# profun=pdepth.depth +# +# npts=20 When SIMPLE=FALSE, means 20 points are selected evenly spaced +# between the deepest point and the +# least deep point. +# +# The function tests the hypothesis that the measure of effect is zero, no effect. +# +# iter=100: number of replications used to estimate the standard error. +# +# BOTH=TRUE and SIMPLE=FALSE: +# combine x1 and x2 when picking points, otherwise use x1 +# +FLAG=FALSE +if(!is.null(pts))FLAG=TRUE +p=ncol(x1) +if(p!=2)stop('Current version is limited to two covariates') +p1=p+1 +xy=elimna(cbind(x1,y1)) +x1=xy[,1:p] +y1=xy[,p1] +n1=nrow(xy) +xy=elimna(cbind(x2,y2)) +n2=nrow(xy) +x2=xy[,1:p] +y2=xy[,p1] +if(xout){ +m<-cbind(x1,y1) +if(identical(outfun,reglev))flag=outfun(x1,y1,plotit=FALSE)$keep +else +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +if(identical(outfun,reglev))flag=outfun(x2,y2,plotit=FALSE)$keep +else +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +n1=length(y1) +n2=length(y2) +n=n1+n2 +if(is.null(pts)&SIMPLE) pts=cbind(qest(x1[,1],c(.25,.5,.75)),qest(x1[,2],c(.25,.5,.75))) + +if(is.null(pts)){ +if(!BOTH){ +d=profun(x1) +ior=order(d) +id=seq(1,n1,length.out=npts) +id=floor(id) +pts=x1[ior[id],] +} +if(BOTH){ +X=rbind(x1,x2) +d=profun(X) +ior=order(d) +id=seq(1,n,length.out=npts) +id=floor(id) +pts=X[ior[id],] +} +} +adj=matrix(c(20, 0.673367, +30, 0.8048804, +40, 0.8452348, +50, 0.8702816, +75, 0.8975298, +100, 0.9231938, +125, 0.9363285, +150, 0.940, +175, 0.9438881, +200, 0.9492541, +250, 0.9546365, +300, 0.9527324),byrow=TRUE,ncol=2) +nmid=(n1+n2)/2 +if(max(n1,n2)>300)b.adj=.975 +else +b.adj=lplot.pred(1/adj[,1],adj[,2],1/nmid)$yhat +npts=nrow(pts) +RES=matrix(NA,nrow=npts,ncol=6) +SE=ancovap2.KMS.SEpb(x1,y1,x2,y2,nboot=nboot,pts=pts,SEED=SEED) +SE=b.adj*SE +RES[,1]=ancovap2.KMS(x1,y1,x2,y2,pts=pts)[,p1] +RES[,3]=RES[,1]-qnorm(1-alpha/2)*SE +RES[,4]=RES[,1]+qnorm(1-alpha/2)*SE +test=RES[,1]/SE +pv=2*(1-pnorm(abs(test))) +RES[,5]=pv +RES[,2]=test +dimnames(RES)=list(NULL,c('Est.','Test.Stat','ci.low','ci.up','p-value','p.adjusted')) +RES[,6]=p.adjust(RES[,5],method=method) +ip=which(RES[,5]<=.05) +sig.output=NULL +sig.points=NULL +if(length(ip)>0){ +sig.output=RES[ip,] +sig.points=pts[ip,] +} +if(FLAG)sig.output=RES +if(plotit){ +plot(x1[,1],x1[,2],xlab=xlab,ylab=ylab,pch='.') #type='n') +if(PLOT.ADJ)ip=which(RES[,6]<=.05) +else ip=which(RES[,5]<=.05) +points(pts[,1],pts[,2],pch='*') +if(length(ip)>0)points(pts[ip,],pch='o') +} +RES +list(pts=pts,output=RES) +} + + + +ancovap2.KMS.plot<-function(x1,y1,x2,y2,pts=NULL,xlab='X1',ylab='X2',zlab='Effect Size', +xout=FALSE,outfun=outpro,SEED=TRUE, theta = 50, phi = 25,REV=FALSE){ +# +# +# Two covariates, plot the KMS measure of effect size +# The function automatically removes leverage points. +# +# The function computes the KMS measure of effect size for the points in +# pts +# and plots the results. if +#. pts=NULL, the function picks the deepest 90% of the pooled +# data in x1 and x2 +# +# REV=FALSE: The plot created by LOESS is impacted by which independent +# variable is first in the matrix +#. pts +# To switch which is first, set REV=TRUE +# +xy=elimna(cbind(x1,y1)) +if(ncol(xy)!=3)stop('Only two covariates can be used') +x1=xy[,1:2] +y1=xy[,3] +xy=elimna(cbind(x2,y2)) +x2=xy[,1:2] +y2=xy[,3] +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:2] +y1<-m[,3] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:2] +y2<-m[,3] +} +if(is.null(pts))pts=rbind(x1,x2) +N=nrow(pts) +e=ancovap2.KMS(x1,y1,x2,y2,pts=pts)[,3] +if(N<25){ +library(scatterplot3d) +scatterplot3d(pts[,1],pts[,2],e,xlab=xlab,ylab=ylab,zlab=zlab) +} +if(N>=25){ +if(!REV)f=lplot(pts,e,xlab=ylab,ylab=xlab,zlab=zlab,ticktype='det',pr=FALSE,theta=theta,phi=phi) +else f=lplot(pts[,c(2,1)],e,xlab=xlab,ylab=ylab,zlab=zlab,ticktype='det',pr=FALSE,theta=theta,phi=phi) +} +list(Number_of_points_used_is=N) +} + + +#Wrap-upp functions + +#Spherical Equivalent Prediction Error Dataset +#Choose this file (SEQ_PE): source(file.choose()) +#for dependent dataset: SEQ_PE(dependent=T) +#for independent dataset: SEQ_PE(dependent=F) + +SEQ_PE<-function(dependent=T){ + +#library("readxl") +#library("xlsx") +library("reshape") +library("tidyverse") +library("viridis") +library("ragg") + +#src<-choose.files(caption="Please specify source R code file: Rallfun-v40") +#src<-file.choose() +#source(src) + + +##filein<-choose.files(caption="Please specify your data input file") +filein<-file.choose() +x=read.table(filein,header=T,sep="\t") + +## dir of the inputfile: +## dir<-dirname(filein) + +#x=read_excel(filein) +#dim(x) +#print(x) + +#x=read.table(file.choose(),header=T,sep="\t") + +f_d<-dependent +max<-as.integer(max(abs(x),na.rm=T))+1 +if(max>7) { +#print("A maxium value is > 6 in your data. Stop running."); +stop("A maxium magnitude is > 6 in your data. Stop running.",call.=F);} +cmbn<-names(x) + +## fileout<-file.choose() + + +##dir<-choose.dir(caption="Please specify where to save your results") +dir=dirname(filein) + +box_plot1(x,paste0(dir,"/","PE_boxplot.tiff")) + + +## if dependent is False +if(f_d==F) { +## SD: + +e1<-try({ +name="02.SD" +res<-as.data.frame(oph.ind.comvar(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) +} +) + +name="04.MAD" +res<-as.data.frame(oph.indep.comMAD(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +name="06.Intervals" +resList<-oph.indepintervals(x,invalid=max) +resL<-do.call(rbind,resList) +resL1<-as.data.frame(resL) +resL1$D<-row.names(resL) +colnames(resL1)[1]="Formula_1" +colnames(resL1)[2]="Formula_2" +resL1$Formula_1<-cmbn[match(resL1$Formula_1,c(1:length(cmbn)))] +resL1$Formula_2<-cmbn[match(resL1$Formula_2,c(1:length(cmbn)))] +resL1<-resL1[,c(dim(resL1)[2],1:dim(resL1)[2]-1)] +write.table(resL1,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=F,append=F) + +e1<-try({ +name="08.Mean" +res<-as.data.frame(oph.indep.commean(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) +} +) + +name="10.Median" +res<-as.data.frame(oph.indep.commedian(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +name="12.MeanAE" +res<-as.data.frame(oph.indep.comMeanAE(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +name="14.MedianAE" +res<-as.data.frame(oph.indep.comMedAE(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + + +#oph.indep.comRMSAE(x,invalid=max) +name="16.RMSAE" +res<-as.data.frame(oph.indep.comRMSAE(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +} + +else { + +name="01.SD" +res<-as.data.frame(oph.dep.comvar(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +name="07.Mean" +res<-as.data.frame(oph.dep.commean(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +name="09.Median" +res<-as.data.frame(oph.dep.commedian(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +name="11.MeanAE" +res<-as.data.frame(oph.dep.comMeanAE(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +name="13.MedianAE" +res<-as.data.frame(oph.dep.comMedAE(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +e1<-try({ +name="03.MAD" +res<-as.data.frame(oph.dep.comMAD(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) +} +) + +name="15.RMSAE" +res<-as.data.frame(oph.dep.comRMSAE(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +name="05.Intervals" +resList<-oph.mcnemar(x,invalid=max) +resL<-do.call(rbind,resList) +resL<-as.data.frame(resL) +colnames(resL)[2]="Formula_1" +colnames(resL)[5]="Formula_2" +resL$Formula_1<-cmbn[match(resL$Formula_1,c(1:length(cmbn)))] +resL$Formula_2<-cmbn[match(resL$Formula_2,c(1:length(cmbn)))] +write.table(resL,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + +} +} + +box_plot1<-function(x,fileout){ +library("reshape") +library("tidyverse") +library("viridis") +library("ggplot2") +library("hrbrthemes") + +x1<-melt(x) +#tiff(fileout) +#jpeg +plot<-ggplot(x1,aes(x=variable,y=value,fill=variable))+ + geom_boxplot() + +scale_fill_viridis(discrete = TRUE, alpha=0.6) + +geom_jitter(color="black", size=1.5, alpha=0.9) + +theme_ipsum() + + theme( + legend.position="none", + plot.title = element_text(size=11) + ) + + ggtitle(" Prediction Errors (D)") + + xlab("")+ + ylab("") +ggsave(fileout,plot=plot) + +} + +testplot<-function(x){ +plot(x[,1],x[,2]) +} + + +#Astigmatism Magnitude Dataset: +#Choose this file (Astig_Magnitude): source(file.choose()) +#for dependent dataset: Astig_Magnitude(dependent=T) +#for independent dataset: Astig_Magnitude(dependent=F) + +Astig_Magnitude<-function(dependent=T){ + +#library("readxl") +#library("xlsx") + +#src<-choose.files(caption="Please specify source R code file: Rallfun-v40") +#src<-file.choose() +#source(src) + + +#filein<-choose.files(caption="Please specify your data input file") +filein<-file.choose() +x=read.table(filein,header=T,sep="\t") + +f_d<-dependent +max<-as.integer(max(abs(x),na.rm=T))+1 +if(max>7) { +#print("A maxium value is > 6 in your data. Stop running."); +stop("A maxium value is > 6 in your data. Stop running.",call.=F);} +cmbn<-names(x) + +## fileout<-file.choose() + + +#dir<-choose.dir(caption="Please specify where to save your results") +dir=dirname(filein) + +## if dependent is False +if(f_d==F) { + +e1<-try({ +name="18.Mean" +res<-as.data.frame(oph.astig.indepcom(x,invalid=max)) +colnames(res)[4]="Formula_1" +colnames(res)[5]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +#write.table(res,file=fileout,sep="\t",quote=F,row.names=F,append=T) +res<-res[,c(1:12)] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) +} +) + +name="20.Intervals" +resList<-oph.astig.indepintervals(x,invalid=max) +resL<-do.call(rbind,resList) +resL1<-as.data.frame(resL) +resL1$D<-row.names(resL) +colnames(resL1)[1]="Formula_1" +colnames(resL1)[2]="Formula_2" +resL1$Formula_1<-cmbn[match(resL1$Formula_1,c(1:length(cmbn)))] +resL1$Formula_2<-cmbn[match(resL1$Formula_2,c(1:length(cmbn)))] +resL1<-resL1[,c(dim(resL1)[2],1:dim(resL1)[2]-1)] +write.table(resL1,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=F,append=F) +} + +else { + +name="17.Mean" +res<-as.data.frame(oph.astig.depcom(x,invalid=max)) +colnames(res)[1]="Formula_1" +colnames(res)[2]="Formula_2" +res$Formula_1<-cmbn[match(res$Formula_1,c(1:length(cmbn)))] +res$Formula_2<-cmbn[match(res$Formula_2,c(1:length(cmbn)))] +write.table(res,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) + + +name="19.Intervals" +resList<-oph.astig.mcnemar(x,invalid=max) +resL<-do.call(rbind,resList) +resL<-as.data.frame(resL) +colnames(resL)[2]="Formula_1" +colnames(resL)[5]="Formula_2" +resL$Formula_1<-cmbn[match(resL$Formula_1,c(1:length(cmbn)))] +resL$Formula_2<-cmbn[match(resL$Formula_2,c(1:length(cmbn)))] +write.table(resL,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=F,col.names=NA) +} +} + + +#Astigmatism Vector Dataset +#Choose this file (Astig_Vector): source(file.choose()) +#for dependent dataset: Astig_Vector(dependent=T) +#for independent dataset: Astig_Vector(dependent=F) + +Astig_Vector<-function(dependent=T){ + +#library("readxl") +#library("xlsx") + +#src<-choose.files(caption="Please specify source R code file: Rallfun-v40") +#src<-file.choose() +#source(src) + +#filein<-choose.files(caption="Please specify your data input file") +filein<-file.choose() +x=read.table(filein,header=T,sep="\t") + +f_d<-dependent +max<-as.integer(max(abs(x),na.rm=T))+1 +if(max>7) { +#print("A maximum value is > 6 in your data. Stop running."); +stop("A maximum value is > 6 in your data. Stop running.",call.=F);} +cmbn<-names(x) +cmbn1<- unlist(strsplit(cmbn,split="\\.")) +cmbn1<-cmbn1[seq_along(cmbn1)%%2 >0] +cmbn1<-cmbn1[seq_along(cmbn1)%%2 >0] +empty<-data.frame() +## fileout<-file.choose() + + +#dir<-choose.dir(caption="Please specify where to save your results") +dir=dirname(filein) + +name="23.DatasetMeanConvexpoly" +res<-oph.astig.datasetconvexpoly.mean(x,plotit=F) +CNT<-as.data.frame(do.call(rbind,res$centers)) +colnames(CNT)<-c("center.X","center.Y","N") +row.names(CNT)<-cmbn1 +write.table(CNT,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +Dm<-list() +for(i in 1:length(res$convex.hull.pts)){ +DD<-as.data.frame(res$convex.hull.pts[[i]]); +#Dm<-append(Dm,DD) +Dm[[i]]<-DD +colnames(DD)<-c(paste0(cmbn1[i],".","X"),paste0(cmbn1[i],".","Y")) +write.table(DD,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +#Dm<-append(Dm,DD) +} + +name="21.MeanConvexpoly" +name1="Mean Convex Polygon" +res<-oph.astig.meanconvexpoly(x,plotit=F) +P<-as.data.frame(do.call(rbind,res$p.values)) +colnames(P)<-"P.values" +row.names(P)<-cmbn1 +write.table(P,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +CNT<-as.data.frame(do.call(rbind,res$centers)) +colnames(CNT)<-c("center.X","center.Y","N") +row.names(CNT)<-cmbn1 +write.table(CNT,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +for(i in 1:length(res$conf.region.points)){ +D<-as.data.frame(res$conf.region.points[[i]]); +colnames(D)<-c(paste0(cmbn1[i],".","X"),paste0(cmbn1[i],".","Y")) +write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +##PDF(paste0(dir,"\\",name,".",cmbn1[i],".PDF")) +ragg::agg_tiff(paste0(dir,"/",name,".",cmbn1[i],".tiff"), width = 6, height = 7, units = "in", res = 300) +plotDAP(x[,c(2*i-1,2*i)],D,CNT[i,],Dm[[i]],paste0(name1," ",cmbn1[i])) +dev.off() +} + +name="24.DatasetMedianConvexpoly" +res<-oph.astig.datasetconvexpoly.median(x,plotit=F) +CNT<-as.data.frame(do.call(rbind,res$centers)) +colnames(CNT)<-c("center.X","center.Y","N") +row.names(CNT)<-cmbn1 +write.table(CNT,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +Dm<-list() +for(i in 1:length(res$convex.hull.pts)){ +DD<-as.data.frame(res$convex.hull.pts[[i]]); +#Dm<-append(Dm,DD) +Dm[[i]]<-DD +colnames(DD)<-c(paste0(cmbn1[i],".","X"),paste0(cmbn1[i],".","Y")) +write.table(DD,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +#Dm<-append(Dm,DD) +} + +name="22.MedianConvexpoly" +name1="Median Convex Polygon" +res<-oph.astig.medianconvexpoly(x,plotit=F) +P<-as.data.frame(do.call(rbind,res$p.values)) +colnames(P)<-"P.values" +row.names(P)<-cmbn1 +write.table(P,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +CNT<-as.data.frame(do.call(rbind,res$centers)) +colnames(CNT)<-c("center.X","center.Y","N") +row.names(CNT)<-cmbn1 +write.table(CNT,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +for(i in 1:length(res$conf.region.points)){ +D<-as.data.frame(res$conf.region.points[[i]]); +colnames(D)<-c(paste0(cmbn1[i],".","X"),paste0(cmbn1[i],".","Y")) +write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +##PDF(paste0(dir,"/",name,".",cmbn1[i],".PDF")) +ragg::agg_tiff(paste0(dir,"/",name,".",cmbn1[i],".tiff"), width = 6, height = 7, units = "in", res = 300) +plotDAP(x[,c(2*i-1,2*i)],D,CNT[i,],Dm[[i]],paste0(name1," ",cmbn1[i])) +dev.off() +} + +cmbn2<-combn(cmbn1,2,simplify=F) +empty<-data.frame() + +## if dependent is False +if(f_d==F) { + +e1<-try({ +name="26.BivMeans.independent" +res<-oph.astig.indepbivmeans(x) +for(i in 1:length(cmbn1)){ +D<-as.data.frame(res[[i]]) +colnames(D)<-c(cmbn2[[i]][1],cmbn2[[i]][2],"p.value", "p.adjusted") +rownames(D)<-c("X","Y") +write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +} +} +) + +name="28.Bivmarg.totvars.independent" +res<-oph.astig.indepbivmarg.totvars(x) +for(i in 1:length(cmbn1)){ +D<-as.data.frame(res$results[[i]]) +colnames(D)<-c(cmbn2[[i]][1],cmbn2[[i]][2],"Ratio","p.value", "p.adjusted") +rownames(D)<-c("X","Y") +write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +} +D1<-as.data.frame(res$results.total) +colnames(D1)<-c("Formula_1","Formula_2","TotalVar_X","TotalVar_Y","SD_X","SD_Y","p.value", "p.adjusted") +D1$Formula_1<-cmbn1[match(D1$Formula_1,paste0("F ",c(1:length(cmbn1))))] +D1$Formula_2<-cmbn1[match(D1$Formula_2,paste0("F ",c(1:length(cmbn1))))] +write.table(D1,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) + +} + +else { + +e1<-try({ +name="25.BivMeans.dependent" +res<-oph.astig.depbivmeans(x) +for(i in 1:length(cmbn1)){ +D<-as.data.frame(res[[i]]) +colnames(D)<-c(cmbn2[[i]][1],cmbn2[[i]][2],"p.value", "p.adjusted") +rownames(D)<-c("X","Y") +write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +} +} +) + +name="27.Bivmarg.totvars.dependent" +res<-oph.astig.depbivmarg.totvars(x) +for(i in 1:length(cmbn1)){ +D<-as.data.frame(res$results[[i]]) +colnames(D)<-c(cmbn2[[i]][1],cmbn2[[i]][2],"Ratio","p.value", "p.adjusted") +rownames(D)<-c("X","Y") +write.table(D,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +write.table(empty,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) +} +D1<-as.data.frame(res$results.total) +colnames(D1)<-c("Formula_1","Formula_2","TotalVar_X","TotalVar_Y","SD_X","SD_Y","p.value", "p.adjusted") +D1$Formula_1<-cmbn1[match(D1$Formula_1,paste0("F ",c(1:length(cmbn1))))] +D1$Formula_2<-cmbn1[match(D1$Formula_2,paste0("F ",c(1:length(cmbn1))))] +write.table(D1,file=paste0(dir,"/",name,".xls"),sep="\t", quote=F,row.names=T,append=T,col.names=NA) + + +} +} + + +plotDAP<-function(rawData,CRP,center,DataMean,name){ +library("plotrix") +#rawDat<-read.table("rawData.txt",sep="\t",header=T) +#realDat<-read.table("liwData.txt",sep="\t",header=F) +colnames(rawData)<-c("X","Y") +colnames(CRP)<-c("X","Y") +colnames(center)<-c("X","Y") +colnames(DataMean)<-c("X","Y") + +D1<-data.frame(X1=CRP$X-mean(CRP$X),Y1=CRP$Y-mean(CRP$Y),X=CRP$X,Y=CRP$Y) +tmp1<-D1[D1$Y1>0,] +tmp2<-D1[D1$Y1<0,] +Tmp1<-tmp1[order(tmp1$X1,decreasing=T),] +Tmp2<-tmp2[order(tmp2$X1,decreasing=F),] +Tmp<-rbind(Tmp1,Tmp2) +Tmp$a<-atan2(Tmp$Y1,Tmp$X1) +Tmp<-Tmp[order(Tmp$a,decreasing=T),] +## add an ending point as the starting point, so that the circle is complete +newDat<-rbind(Tmp,Tmp[1,]) + +D2<-data.frame(X1=DataMean$X-mean(DataMean$X),Y1=DataMean$Y-mean(DataMean$Y),X=DataMean$X,Y=DataMean$Y) +tmp1<-D2[D2$Y1>0,] +tmp2<-D2[D2$Y1<0,] +Tmp1<-tmp1[order(tmp1$X1,decreasing=T),] +Tmp2<-tmp2[order(tmp2$X1,decreasing=F),] +Tmp<-rbind(Tmp1,Tmp2) +Tmp$a<-atan2(Tmp$Y1,Tmp$X1) +Tmp<-Tmp[order(Tmp$a,decreasing=T),] +## add an ending point as the starting point, so that the circle is complete +newDat2<-rbind(Tmp,Tmp[1,]) + +R<-4 +cos45<-cos(pi/4) +#tiff("DoubleAnglePlot.tiff") +plot(newDat$X,newDat$Y,type="p",xlim=c(-5,5),ylim=c(-5,5),col="blue",pch=4,cex=0,frame.plot=F,axes=FALSE,xlab="",ylab="",asp=1,main=paste(name,"")) +points(rawData$X,rawData$Y,type="p",col="black",pch=19,cex=0.5) +points(newDat2$X,newDat2$Y,type="p",col="purple",pch=8,cex=0) +points(center$X,center$Y,type="p",col="red",pch=15,cex=1) +lines(newDat$X,newDat$Y,col="blue",lwd=1.3) +lines(newDat2$X,newDat2$Y,col="purple",lwd=2.0) +### lines(D[,1],D[,2],type="p",col="red",pch=0,cex=5) + +draw.circle(0,0,1,lty=1,lwd=0.5) +draw.circle(0,0,2,lty=1,lwd=0.5) +draw.circle(0,0,3,lty=1,lwd=0.5) +draw.circle(0,0,4,lty=1,lwd=0.5) +segments(-1*R*cos45,-1*R*cos45,1*R*cos45,R*cos45,lty=1,lwd=0.5) +segments(-1*R*cos45,1*R*cos45,1*R*cos45,-1*R*cos45,lty=1,lwd=0.5) +segments(0,R,0,-1*R,lty=1,lwd=0.5) +segments(R,0,-1*R,0,lty=1,lwd=0.5) + +R1<-R+0.6 +text(R1,0,paste0("0",intToUtf8(176))) +text(R1*cos45,R1*cos45,paste0("22.5",intToUtf8(176))) +text(0, R1,paste0("45",intToUtf8(176))) +text(-1*R1*cos45,R1*cos45,paste0("67.5",intToUtf8(176))) +text(-1*R1,0,paste0("90",intToUtf8(176))) +text(-1*R1*cos45,-1*R1*cos45,paste0("112.5",intToUtf8(176))) +text(0,-1*R1,paste0("135",intToUtf8(176))) +text(R1*cos45,-1*R1*cos45,paste0("157.5",intToUtf8(176))) +#dev.off() + +#legend(x="bottomleft",pch=c(15,4,8),legend=c("Centroid","Mean Convex Polygon", "Dataset Convex Polygon"),col=c("red","blue","purple")) +legend(x="bottomleft",pch=c(15,NA,NA), lty=c(NA,1,1),cex=0.88,legend=c("Centroid","Mean Convex Polygon", "Dataset Convex Polygon"),col=c("red","blue","purple")) +} + +mulquant<-function(x,q=c(1:9)/10,HD=TRUE,type=8){ +# +# Estimate multiple quantiles for the data in vector x +# By default estimate the deciles +# HD=TRUE: use the Harrell-Davis estimate of the qth quantile +# HD=FALSE:use R function quantile +# +x=elimna(x) +nq=length(q) +if(HD){ +xs<-sort(x) +n<-length(x) +vecx<-seq(along=x) +xq<-0 +for (i in 1:nq){ +m1<-(n+1)*q[i] +m2<-(n+1)*(1-q[i]) +wx<-pbeta(vecx/n,m1,m2)-pbeta((vecx-1)/n,m1,m2) # W sub i values +xq[i]<-sum(wx*xs) +}} +if(!HD){ +xq=quantile(x,probs=q,type=type) +} +xq +} + +matbin2v<-binmat2v<-function(m,col=c(1,2),int1=c(.5,.5),int2=c(.5,.5),INC=TRUE){ +# +# pull out the rows of the matrix m based on the values in the column +# indicated by the argument +# int1 indicates intervals for first variable +# int2 indicates intervals for second variable +# By default, split at the median for both variables. + +# col indicates the columns of m by which the splits are made. + +# +# Example: binmat(m,c(1,3),c(10,15),c(2:6)) will return all rows such that the +# values in column 1 are between 10 and 15, inclusive. +# values in col 15 are between 2 and 5 +# +if(is.null(m))stop('First argument should be a matrix with two or more columns') +if(ncol(m)==1)stop('First argument should be a matrix with two or more columns') +if(INC){ +flag1=m[,col[1]]>=int1[1] +flag2=m[,col[1]]<=int1[2] +flag3=m[,col[2]]>=int2[1] +flag4=m[,col[2]]<=int2[2] +} +if(!INC){ +flag1=m[,col[1]]>int1[1] +flag2=m[,col[1]]int2[1] +flag4=m[,col[2]]0)points(pts[flag,1],pts[flag,2],pch='o') +} +output +} + + +wmw.ancbsep2.sub<-function(m,pts){ +v=wmw.ancp2(m[[1]],m[[2]],m[[3]],m[[4]],pts=pts) +v +} + +ancovap2.wmw=wmw.ancbsep2 + +ancovap2.wmw.plot<-function(x1,y1,x2,y2,pts=NULL,xlab='X1',ylab='X2',zlab='Effect Size',REV=FALSE, +xout=FALSE,outfun=outpro,SEED=TRUE, theta = 50, phi = 25){ +# +# +# Two covariates, plot the Wilcoxon--Mann--Whitney measure of effect size +# using a smoother +# +# +xy=elimna(cbind(x1,y1)) +if(ncol(xy)!=3)stop('Only two covariates can be used') +x1=xy[,1:2] +y1=xy[,3] +xy=elimna(cbind(x2,y2)) +x2=xy[,1:2] +y2=xy[,3] +if(xout){ +m<-cbind(x1,y1) +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:2] +y1<-m[,3] +m<-cbind(x2,y2) +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:2] +y2<-m[,3] +} +if(is.null(pts))pts=rbind(x1,x2) +N=nrow(pts) +e=wmw.ancp2(x1,y1,x2,y2,pts=pts) +if(N<25){ +library(scatterplot3d) +scatterplot3d(pts[,1],pts[,2],e,xlab=xlab,ylab=ylab,zlab=zlab) +} +if(N>=25){ +if(!REV)f=lplot(pts,e,xlab=ylab,ylab=xlab,zlab=zlab,ticktype='det',pr=FALSE,theta=theta,phi=phi) +else f=lplot(pts[,c(2,1)],e,xlab=xlab,ylab=ylab,zlab=zlab,ticktype='det',pr=FALSE,theta=theta,phi=phi) +} +list(Number_of_points_used_is=N) +} + + + + +wmw.ancp2<-function(x1,y1,x2,y2,pts=NULL,xout=FALSE,outfun=outpro){ +# +# For the regression lines corresponding to two independent groups +# estimate the conditional WMW effect size for each point in pts +# +# pts=NULL: three points used that are determined based on the data +# +x1=as.matrix(x1) +p=ncol(x1) +p1=p+1 +m1=elimna(cbind(x1,y1)) +x1=m1[,1:p] +y1=m1[,p1] +x2=as.matrix(x2) +p=ncol(x2) +p1=p+1 +m2=elimna(cbind(x2,y2)) +x2=m2[,1:p] +y2=m2[,p1] +if(xout){ +m<-cbind(x1,y1) +if(identical(outfun,reglev))flag=outfun(x1,y1,plotit=FALSE)$keep +else +flag<-outfun(x1,plotit=FALSE)$keep +m<-m[flag,] +x1<-m[,1:p] +y1<-m[,p1] +m<-cbind(x2,y2) +if(identical(outfun,reglev))flag=outfun(x2,y2,plotit=FALSE)$keep +else +flag<-outfun(x2,plotit=FALSE)$keep +m<-m[flag,] +x2<-m[,1:p] +y2<-m[,p1] +} +if(is.null(pts[1])){ +x1<-as.matrix(x1) +pts<-ancdes(x1) +pts=unique(pts) +} +pts<-as.matrix(pts) +if(is.null(pts)){ +x1<-as.matrix(x1) +pts<-ancdes(x1) +pts=unique(pts) +} +e=NA +PV=NA +n1=length(y1) +n2=length(y2) +for(i in 1:nrow(pts)){ +d1=reg.con.dist(x1,y1,pts=pts[i,]) +d2=reg.con.dist(x2,y2,pts=pts[i,]) +p=NA +for(j in 1:99)p[j]=mean(d1[j]<=d2) +e[i]=mean(p) +} +e +} + +hdmq<-function(x,q=.5,tr=FALSE){ +# +# +# Estimate one or more quantiles. +e=NA +nq=length(q) +if(!tr)for(i in 1:nq)e[i]=hd(x,q[i]) +if(tr)for(i in 1:nq)e[i]=thd(x,q[i]) +e +} + +decinter<-function(x,alpha=.05,q=c(1:9)/10,nboot=1000,SEED=TRUE,method='BH'){ +# +# By default, use all deciles when dealing with interactions in a 2-by-2 design. +# The quantiles used can be altered via the argument q +# +if(SEED)set.seed(2) +if(is.matrix(x))x=listm(x) +x=elimna(x) +bv1=matrix(NA,nrow=9,ncol=nboot) +bv2=matrix(NA,nrow=9,ncol=nboot) +bv3=matrix(NA,nrow=9,ncol=nboot) +bv4=matrix(NA,nrow=9,ncol=nboot) +data<-matrix(sample(x[[1]],size=length(x[[1]])*nboot,replace=TRUE),nrow=nboot) +bv1=apply(data,1,hdmq,q=q) +data<-matrix(sample(x[[2]],size=length(x[[2]])*nboot,replace=TRUE),nrow=nboot) +bv2=apply(data,1,hdmq,q=q) +data<-matrix(sample(x[[3]],size=length(x[[3]])*nboot,replace=TRUE),nrow=nboot) +bv3=apply(data,1,hdmq,q=q) +data<-matrix(sample(x[[4]],size=length(x[[4]])*nboot,replace=TRUE),nrow=nboot) +bv4=apply(data,1,hdmq,q=q) +be=bv1-bv2-bv3+bv4 +pv=NA +nq=length(q) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +vs=sort(be) +cilow=NA +ciup=NA +for(i in 1:nq){ +pv[i]=mean(be[i,]<0) +pv[i]=2*min(pv[i],1-pv[i]) +bes=sort(be[i,]) +cilow[i]=bes[ilow] +ciup[i]=bes[ihi] +} +output=matrix(NA,nrow=nq,ncol=8) +dimnames(output)=list(NULL,c('Quant','Est.Lev 1','Est.Lev 2','Dif','ci.low','ci.up','p-value','p.adj')) +output[,1]=q +e=lapply(x,hdmq,q=q) +est=e[[1]]-e[[2]]-e[[3]]+e[[4]] +output[,2]=e[[1]]-e[[2]] +output[,3]=e[[3]]-e[[4]] +output[,4]=est +output[,5]=cilow +output[,6]=ciup +output[,7]=pv +output[,8]=p.adjust(pv,method=method) +output +} + +ancsm.es<- +function(x1,y1,x2,y2,method='KMS',pts=NA,est=tmean, +fr1=1,fr2=1,nboot=NA,nmin=12,alpha=.05,xout=FALSE, +outfun=outpro,plotit=TRUE,LP=TRUE,xlab='X',ylab='Y',pch1='*',pch2='+',...){ +# +# Compare two independent groups using +# a percentile bootstrap combined with a running interval +# smooth and some robust measure of effect size: +# +#Choices for method: +# 'EP','QS','QStr','AKP','WMW','KMS' +# +# Assume data are in x1 y1 x2 and y2 +# Comparisons are made at the design points contained in the vector +# pts +# +flag.est=FALSE +if(identical(est,onestep))flag.est=TRUE +if(flag.est)LP=FALSE # Get an error when using onestep in conjunction with LP=T +if(identical(est,mom))flag.est=TRUE +xy1=elimna(cbind(x1,y1)) +x1=xy1[,1] +y1=xy1[,2] +xy2=elimna(cbind(x2,y2)) +x2=xy2[,1] +y2=xy2[,2] +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +npt<-5 +gv1<-vector('list') +if(is.na(pts[1])){ +output=matrix(NA,5,5) +dimnames(output)=list(NULL,c('pts','Effect.Size','ci.low','ci.up','p.value')) +output[,1]=c(1:5) +isub<-c(1:5) # Initialize isub +test<-c(1:5) +xorder<-order(x1) +y1<-y1[xorder] +x1<-x1[xorder] +xorder<-order(x2) +y2<-y2[xorder] +x2<-x2[xorder] +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)]) +for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)]) +for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i]) +sub<-c(1:length(x1)) +isub[1]<-min(sub[vecn>=nmin]) +isub[5]<-max(sub[vecn>=nmin]) +isub[3]<-floor((isub[1]+isub[5])/2) +isub[2]<-floor((isub[1]+isub[3])/2) +isub[4]<-floor((isub[3]+isub[5])/2) +mat<-matrix(NA,5,3) +dimnames(mat)<-list(NULL,c('X','n1','n2')) +for (i in 1:5){ +j<-i+5 +temp1<-y1[near(x1,x1[isub[i]],fr1)] +temp2<-y2[near(x2,x1[isub[i]],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +mat[i,1]<-x1[isub[i]] +mat[i,2]<-length(temp1) +mat[i,3]<-length(temp2) +test=ESfun.CI(temp1,temp2,method=method) +if(method=='KMS')output[i,2]=test$effect.size +if(method=='QS')output[i,2]=test$effect.size +if(method=='QStr')output[i,2]=test$effect.size +if(method=='WMW'){ +output[i,2]=test$p.hat +test$ci[1]=test$p.ci[1] +test$ci[2]=test$p.ci[2] +} +if(method=='AKP')output[i,2]=test$akp.effect +if(method=='EP')output[i,2]=test$Effect.Size +output[i,3]=test$ci[1] +output[i,4]=test$ci[2] +if(method!='EP')output[i,5]=test$p.value +}} +# +if(!is.na(pts[1])){ +npt<-length(pts) +output=matrix(NA,npt,5) +output[,1]=c(1:npt) +dimnames(output)=list(NULL,c('pts','Effect.Size','ci.low','ci.up','p.value')) +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),3) +dimnames(mat)<-list(NULL,c('X','n1','n2')) +gv<-vector('list',2*length(pts)) +for (i in 1:length(pts)){ +j<-i+npt +temp1<-y1[near(x1,pts[i],fr1)] +temp2<-y2[near(x2,pts[i],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +test=ESfun.CI(temp1,temp2,method=method) +output[i,2]=test$effect.size +output[i,3]=test$ci[1] +output[i,4]=test$ci[2] +mat[i,1]<-pts[i] +if(length(temp1)<=5)paste('Warning, there are',length(temp1),' points corresponding to the design point X=',pts[i]) +if(length(temp2)<=5)paste('Warning, there are',length(temp2),' points corresponding to the design point X=',pts[i]) +mat[i,2]<-length(temp1) +mat[i,3]<-length(temp2) +#gv1[[i]]<-temp1 +#gv1[[j]]<-temp2 +test=ESfun.CI(temp1,temp2,method=method) +output[i,2]=test$effect.size +output[i,3]=test$ci[1] +output[i,4]=test$ci[2] +output[i,5]=test$p.value +} +} +if(plotit){ +runmean2g(x1,y1,x2,y2,fr=fr1,est=est,LP=LP,xlab=xlab,ylab=ylab,pch1=pch1,pch2=pch2,...) +} +list(mat=mat,output=output) +} + +ancsm.es<- +function(x1,y1,x2,y2,ES='KMS',npt=8,est=tmean,method='BH', +fr1=1,fr2=1,nboot=NA,nmin=12,alpha=.05,xout=FALSE,SEED=TRUE, +outfun=outpro,plotit=TRUE,LP=FALSE,xlab='X',ylab='Effect Size',pch1='*',pch2='+',...){ +# +# Compare two independent groups using +# a percentile bootstrap combined with a running interval +# smooth and some robust measure of effect size: +# +# This is done for npt covariate values, default is npt=8 +# +#. FWE is controlled based on the argument method, default is FDR +#. (Bejamini - Hochberg method). +# plotit=TRUE, plot estimates plus confidence intervals not adjusted to +# get simultaneous probability coverage. +# +# Choices for ES, the measure of effect size: +# 'KMS', 'EP','QS','QStr','AKP','WMW' +# +# Assume data are in x1 y1 x2 and y2 +# +flag.est=FALSE +if(identical(est,onestep))flag.est=TRUE +if(flag.est)LP=FALSE # Get an error when using onestep in conjunction with LP=T +if(identical(est,mom))flag.est=TRUE +xy1=elimna(cbind(x1,y1)) +x1=xy1[,1] +y1=xy1[,2] +xy2=elimna(cbind(x2,y2)) +x2=xy2[,1] +y2=xy2[,2] +if(xout){ +flag<-outfun(x1,...)$keep +x1<-x1[flag] +y1<-y1[flag] +flag<-outfun(x2,...)$keep +x2<-x2[flag] +y2<-y2[flag] +} +# +# +res1=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE,SEED=FALSE)$output +pts=seq(res1[1,1],res1[5,1],length.out=npt) +pts=unique(pts) +npt=length(pts) +output=matrix(NA,npt,6) +output[,1]=c(1:npt) +dimnames(output)=list(NULL,c('pts','Effect.Size','ci.low','ci.up','p.value','p.adj')) +n1<-1 +n2<-1 +vecn<-1 +for(i in 1:length(pts)){ +n1[i]<-length(y1[near(x1,pts[i],fr1)]) +n2[i]<-length(y2[near(x2,pts[i],fr2)]) +} +mat<-matrix(NA,length(pts),3) +dimnames(mat)<-list(NULL,c('X','n1','n2')) +gv<-vector('list',2*length(pts)) +for (i in 1:length(pts)){ +temp1<-y1[near(x1,pts[i],fr1)] +temp2<-y2[near(x2,pts[i],fr2)] +temp1<-temp1[!is.na(temp1)] +temp2<-temp2[!is.na(temp2)] +test=ESfun.CI(temp1,temp2,method=ES,SEED=SEED,alpha=alpha) +output[i,2]=test$effect.size +output[i,3]=test$ci[1] +output[i,4]=test$ci[2] +mat[i,1]<-pts[i] +if(length(temp1)<=5)paste('Warning, there are',length(temp1),' points corresponding to the design point X=',pts[i]) +if(length(temp2)<=5)paste('Warning, there are',length(temp2),' points corresponding to the design point X=',pts[i]) +mat[i,2]<-length(temp1) +mat[i,3]<-length(temp2) +test=ESfun.CI(temp1,temp2,method=ES,alpha=alpha,SEED=SEED) +output[i,2]=test$effect.size +output[i,3]=test$ci[1] +output[i,4]=test$ci[2] +output[i,5]=test$p.value +} +output[,6]=p.adjust(output[,5],method=method) +if(plotit){ +#runmean2g(x1,y1,x2,y2,fr=fr1,est=est,LP=LP,xlab=xlab,ylab=ylab,pch1=pch1,pch2=pch2,...) +plot(c(pts,pts,pts),c(output[,2],output[,3],output[,4]),xlab=xlab,ylab=ylab,type='n') +points(pts,output[,3],pch='+') +points(pts,output[,4],pch='+') +points(pts,output[,2],pch='*') +} +list(mat=mat,output=output) +} + + +class.error.CP<-function(x1=NULL,x2=NULL,train=NULL,g=NULL,method='KNN',nboot=100,EN=TRUE,FAST=TRUE, +AUC=FALSE,SEED=TRUE,...){ +# +#. Requires ROCR pacakage +# +# For a classification methods indicated by the argument +# method +# use cross validation leaving one out. +# +#. Return a confusion matrix +# +# +# The data for the two groups can be entered via the arguments +# x1 and x2 +# or +# store all of the data in the argument train in which case g specifies the group +# +# AUC=TRUE, returns auc. Default is FALSE because conditions can be created where +# Error: $ operator is invalid for atomic vectors +# +# Current choices available: +# KNN: Nearest neighbor using robust depths +# DIS: Points classified based on their depths +# DEP: Uses depths as suggested by Makinde and Fasoranbaku (2018). JAS +# SVM: support vector machine +# RF: Random forest +# NN: neural network +# ADA: ada boost +# PRO: project the points onto a line connecting the centers of the data clouds. +# Then use estimate of the pdf for each group to make a decision about future points. +# LSM: smooth version of logistic regression when sm=TRUE; otherwise use logistic regression. +# +# Returns confusion matrix +# +# +# method='KNN' is default +# +# nboot=number of samples +# +if(length(method)!=1)stop('Only one method at a time is allowed') +if(SEED)set.seed(2) +if(!is.null(train)){ +if(is.null(g))stop('Argument g, group ids, must be specified') +if(is.matrix(g))if(dim(g)>1)stop('Argument g should be a vector') +flag=g==min(g) +x1=train[flag,] +x2=train[!flag,] +} +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +if(is.null(x1))stop('Something is wrong, no data in x1') +if(is.null(x2))stop('Something is wrong, no data in x2') +if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns') +x1=elimna(x1) +x2=elimna(x2) +x1=as.matrix(x1) +x2=as.matrix(x2) +dimnames(x1)=list(NULL,NULL) # can be necessary to eliminate labels to avoid an error in randomForest. +dimnames(x2)=list(NULL,NULL) +n1=nrow(x1) +n2=nrow(x2) +ns1=min(n1,nboot) +ns2=min(n2,nboot) +mn=min(ns1,ns2) +CM=matrix(0,2,2) +isub1=sample(c(1:ns1)) +isub2=sample(c(1:ns2)) +A1=NULL +A2=NULL +ic1=0 +ic2=0 +for(k in 1:mn){ +N1=isub1[k] +N2=isub2[k] +train1=x1[-N1,] +train2=x2[-N2,] +test=rbind(x1[N1,],x2[N2,]) +a=CLASS.fun(x1=train1,x2=train2,test=test,method=method,...) +a1=a[1] +a2=a[2] +A1[k]=a1 +A2[k]=a2 +if(a1==1)CM[1,1]=CM[1,1]+1 #true = 1 pred 1 +else +CM[1,2]=CM[1,2]+1 +if(a2==2)CM[2,2]=CM[2,2]+1 #true =2 and pred 2 +else +CM[2,1]=CM[2,1]+1 +} +FREQ=CM +CM=CM/(2*nboot) +F=matrix(NA,2,3) +dimnames(F)=list(c('True 1','True 2'),c('Pred 1','Pred 2','Sum')) +F[1,1]=FREQ[1,1] +F[1,2]=FREQ[1,2] +F[2,1]=FREQ[2,1] +F[2,2]=FREQ[2,2] +F[1,3]=F[1,1]+F[1,2] +F[2,3]=F[2,1]+F[2,2] +RES=F +RES[1,1]=F[1,1]/(F[1,1]+F[1,2]) +RES[1,2]=F[1,2]/(F[1,1]+F[1,2]) +RES[2,1]=F[2,1]/(F[2,1]+F[2,2]) +RES[2,2]=F[2,2]/(F[2,1]+F[2,2]) +RES[,3]=1 +auroc=NULL +if(AUC){ +library(ROCR) +PRED=c(A1,A2) +LABS=c(rep(1,length(A1)),rep(2,length(A2))) +pred=prediction(PRED,LABS) +perf=performance(pred, "auc") + auroc<- perf@y.values[[1]] + } +list(C.MAT=RES,COUNTS=F,AUC=auroc[[1]]) +} + + +class.uni.error<-function(x,y,xy=NULL,g=NULL){ +# +# For univariate data, estimate prediction error using +# a kernel density estimator. +# Returns conditional estimate of the error rates +# Example: Given that a values is from group 1 erroneously classify it as coming from +# group 2 +# +# Also returns unconditional probabilities +# Example, the probability that a randomly sample subject is in group 1 and +# is classified as being in group 1 +# +# +if(!is.null(xy)){ +xy=fac2list(xy,g) +x=xy[[1]] +y=xy[[2]] +} +x=elimna(x) +y=elimna(y) +n1=length(x) +n2=length(y) +xsort=sort(x) +ysort=sort(y) +n1p=n1+1 +N=n1+n2 +if(is.null(x1))stop('Something is wrong, no data in x1') +if(is.null(x2))stop('Something is wrong, no data in x2') +UC=matrix(NA,2,2) +CP=matrix(NA,2,2) +d1<-akerd(x,pts=xsort,pyhat=TRUE,plotit=FALSE) +d2<-akerd(y,pts=xsort,pyhat=TRUE,plotit=FALSE) +e1=d1>d2 #means predict group 1 for data in group 1 +D1<-akerd(x,pts=ysort,pyhat=TRUE,plotit=FALSE) +D2<-akerd(y,pts=ysort,pyhat=TRUE,plotit=FALSE) +e2=D2>D1 # means predict group 2 for data in group 2 +CP[1,1]=mean(e1==1) +CP[1,2]=1-CP[1,1] +CP[2,2]=mean(e2==0) +CP[2,1]=1-CP[2,2] +# +e3=c(e1,e2) +UC[1,1]=sum(e3[1:n1]==1) +UC[1,2]=sum(e3[1:n1]==0) +UC[2,1]=sum(e3[n1p:N]==0) +UC[2,2]=sum(e3[n1p:N]==1) +UC=UC/sum(UC) +dimnames(CP)=list(c('True 1','True 2'),c('Pred 1','Pred 2')) +dimnames(UC)=list(c('True 1','True 2'),c('Pred 1','Pred 2')) +list(Conditional.prob=CP, Unconditiion.prob=UC,prob.correct.decision=UC[1,1]+UC[2,2]) +} + +hdno<-function(x,q=.5){ +# +# Use hd when .1<1<.9, +# otherwise use no estimator +# +x=elimna(x) +if(q<=.1 || q>=.9)e=qno.est(x,q) +else +e=hd(x,q) +e +} + +IQR2g.W<-function(x,y,nboot=100,alpha=.05,SEED=TRUE){ +# +# Wald-type test for comparing interquartile range of two independent groups. +# +sd1=bootse(x,est=IQRhd,nboot=nboot,SEED=SEED) +sd2=bootse(x,est=IQRhd,nboot=nboot,SEED=SEED) +e1=IQRhd(x) +e2=IQRhd(y) +se=sqrt(sd1^2+sd2^2) +W=(e1-e2)/se +crit=qnorm(1-alpha/2) +ci=(e1-e2)-crit*se +ci[2]=(e1-e2)+crit*se +pv=2*(1-pnorm(abs(W))) +list(Est.1=e1,Est.2=e2,Test.Stat=W,ci=ci,p.value=pv) +} + +IQRhd<-function(x){ +e=hd(x,.75)-hd(x,.25) +e +} + +decJKinter<-function(J,K,x,alpha = 0.05, q = c(1:9)/10, nboot = 1000, SEED = TRUE, + method = "BH"){ +# +# For every relevant interaction, compare multiple quantiles +# +if(is.matrix(x))x=listm(x) +INT=list() +JK=J*K +CO=con2way(J,K)$conAB +for( j in 1:ncol(CO)){ +id=which(CO[,j]!=0) +X=x[id] +INT[[j]]=decinter(X,alpha=alpha,q=q,nboot=nboot,SEED=SEED,method=method) +} +list(interactions=INT,con=CO) +} + +decJKinter<-function(J,K,x,alpha = 0.05, q = c(1:9)/10, nboot = 1000, SEED = TRUE, + method = "BH"){ +# +# For every relevant interaction, compare multiple quantiles +# +if(is.matrix(x))x=listm(x) +INT=list() +JK=J*K +CO=con2way(J,K)$conAB +for( j in 1:ncol(CO)){ +id=which(CO[,j]!=0) +X=x[id] +INT[[j]]=decinter(X,alpha=alpha,q=q,nboot=nboot,SEED=SEED,method=method) +} +list(interactions=INT,con=CO) +} + +con2by2A<-function(J,K){ +# +# For J by K design, for every two rows and two columns, +# create contrast coefficients for main effect for Factor A. +# +JK=J*K +Ja<-(J^2-J)/2 +Ka<-(K^2-K)/2 +conAB<-matrix(0,nrow=JK,ncol=Ka*Ja) +ic<-0 +for(j in 1:J){ +for(jj in 1:J){ +if(j < jj){ +for(k in 1:K){ +for(kk in 1:K){ +if(k4)nboot<-5000 +} +n<-nrow(mat) +connum<-ncol(con) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +xbars<-apply(mat,2,est,na.rm=NA.RM,...) +psidat<-NA +for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars) +psihat<-matrix(0,connum,nboot) +psihatcen<-matrix(0,connum,nboot) +bvec<-matrix(NA,ncol=J,nrow=nboot) +data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot) +for(ib in 1:nboot){ +bvec[ib,]<-apply(x[data[ib,],],2,est,na.rm=NA.RM,...) +} +# +# Now have an nboot by J matrix of bootstrap values. +# +test<-1 +for (ic in 1:connum){ +psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic]) +ptemp<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot +test[ic]<-ptemp +test[ic]<-2*min(test[ic],1-test[ic]) +} +ncon<-ncol(con) +if(plotit && ncol(bvec)==2){ +z<-c(0,0) +one<-c(1,1) +plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n") +points(bvec) +totv<-apply(x,2,est,...) +cmat<-var(bvec) +dis<-mahalanobis(bvec,totv,cmat) +temp.dis<-order(dis) +ic<-round((1-alpha)*nboot) +xx<-bvec[temp.dis[1:ic],] +xord<-order(xx[,1]) +xx<-xx[xord,] +temp<-chull(xx) +lines(xx[temp,]) +lines(xx[c(temp[1],temp[length(temp)]),]) +abline(0,1) +} +ncon<-ncol(con) +output<-matrix(0,connum,6) +dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.adj","ci.lower","ci.upper")) +tmeans<-apply(mat,2,est,na.rm=NA.RM,...) +psi<-1 +for (ic in 1:ncol(con)){ +output[ic,2]<-sum(con[,ic]*tmeans) +output[ic,1]<-ic +output[ic,3]<-test[ic] +temp<-sort(psihat[ic,]) +icl<-round(alpha*nboot/2) +icu<-nboot-icl +icl=icl+1 +output[ic,5]<-temp[icl] +output[ic,6]<-temp[icu] +} +ids=NA +output[,4]=p.adjust(output[,3],method=method) +num.sig=sum(output[,4]<=alpha) +if(is.na(output[1,3])){ +if(pr)print('Evidently, one or more groups have too many missing values') +} +list(output=output,con=con,num.sig=num.sig) +} + +rmmcpv2<-function(x, y=NULL,con = 0, tr = 0.2, alpha = 0.05,dif=TRUE, +hoch=TRUE,na.rm=TRUE,nmin=5){ +# +# MCP on trimmed means with FWE controlled with Hochberg's method +# hoch=FALSE, will use Rom's method if alpha=.05 or .01 and number of tests is <=10 +# +# Note: confidence intervals are adjusted based on the corresponding critical p-value. +# +if(!is.null(y))x=cbind(x,y) +flagcon=FALSE +if(!is.matrix(x))x<-matl(x) +if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.") +con<-as.matrix(con) +J<-ncol(x) +nval<-nrow(x) +if(sum(con^2!=0))CC<-ncol(con) +if(sum(con^2)==0)CC<-(J^2-J)/2 +ncon<-CC +#if(alpha==.05){ +#dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511) +#if(ncon > 10){ +#avec<-.05/c(11:ncon) +#dvec<-c(dvec,avec) +#}} +if(alpha==.01){ +dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101) +if(ncon > 10){ +avec<-.01/c(11:ncon) +dvec<-c(dvec,avec) +}} +if(hoch)dvec<-alpha/c(1:ncon) +if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon) +if(sum(con^2)==0){ +flagcon<-TRUE +psihat<-matrix(0,CC,5) +dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper")) +test<-matrix(NA,CC,6) +dimnames(test)<-list(NULL,c("Group","Group","test","p.value","p.adj","se")) +temp1<-0 +jcom<-0 +for (j in 1:J){ +for (k in 1:J){ +if (j < k){ +jcom<-jcom+1 +y=elimna(x[,c(j,k)]) +if(is.null(dim(y)))y=matrix(c(1,1),nrow=1) +if(nrow(y)<=nmin)print(paste('Skipping group', j, ' and group', k, 'due to small sample size')) +if(nrow(y)>nmin){ +h1<-nrow(y)-2*floor(tr*nrow(y)) +df<-h1-1 +xbar=mean(y[,1],tr=tr) +xbar[2]=mean(y[,2],tr=tr) +q1<-(nrow(y)-1)*winvar(y[,1],tr) +q2<-(nrow(y)-1)*winvar(y[,2],tr) +q3<-(nrow(y)-1)*wincor(y[,1],y[,2],tr)$cov +sejk<-sqrt((q1+q2-2*q3)/(h1*(h1-1))) +if(!dif){ +test[jcom,6]<-sejk +test[jcom,3]<-(xbar[1]-xbar[2])/sejk +temp1[jcom]<-2 * (1 - pt(abs(test[jcom,3]), df)) +test[jcom,4]<-temp1[jcom] +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-(xbar[1]-xbar[2]) +} +if(dif){ +dv<-y[,1]-y[,2] +test[jcom,6]<-trimse(dv,tr) +temp<-trimci(dv,alpha=alpha/CC,pr=FALSE,tr=tr) +test[jcom,3]<-temp$test.stat +temp1[jcom]<-temp$p.value +test[jcom,4]<-temp1[jcom] +psihat[jcom,1]<-j +psihat[jcom,2]<-k +test[jcom,1]<-j +test[jcom,2]<-k +psihat[jcom,3]<-mean(dv,tr=tr) +psihat[jcom,4]<-temp$ci[1] +psihat[jcom,5]<-temp$ci[2] +} +}}}} +if(hoch)dvec<-alpha/c(1:ncon) +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2,4]>=zvec) +sigvec=elimna(sigvec) +if(sum(sigvec)0){ +xbar=apply(x,2,mean,tr=tr) +if(nrow(con)!=ncol(x))warning("The number of groups does not match the number + of contrast coefficients.") +ncon<-ncol(con) +psihat<-matrix(0,ncol(con),4) +dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper")) +test<-matrix(0,ncol(con),5) +dimnames(test)<-list(NULL,c("con.num","test","p.value","p.adj","se")) +temp1<-NA +for (d in 1:ncol(con)){ +psihat[d,1]<-d +if(!dif){ +psihat[d,2]<-sum(con[,d]*xbar) +sejk<-0 +for(j in 1:J){ +for(k in 1:J){ +djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1)) +sejk<-sejk+con[j,d]*con[k,d]*djk +}} +sejk<-sqrt(sejk) +test[d,1]<-d +test[d,2]<-sum(con[,d]*xbar)/sejk +test[d,5]<-sejk +temp1[d]<-2 * (1 - pt(abs(test[d,2]), df)) +} +if(dif){ +for(j in 1:J){ +if(j==1)dval<-con[j,d]*x[,j] +if(j>1)dval<-dval+con[j,d]*x[,j] +} +temp1[d]<-trimci(dval,tr=tr,pr=FALSE)$p.value +test[d,1]<-d +test[d,2]<-trimci(dval,tr=tr,pr=FALSE)$test.stat +test[d,5]<-trimse(dval,tr=tr) +psihat[d,2]<-mean(dval,tr=tr) +}} +test[,3]<-temp1 +temp2<-order(0-temp1) +zvec<-dvec[1:ncon] +sigvec<-(test[temp2,3]>=zvec) +if(sum(sigvec)top)flag[i]=TRUE +} +outid <- NULL +if(sum(flag) > 0)outid <- vec[flag] #regression outlier +both=c(iout,outid) +blp=duplicated(both) +if(sum(!blp)>0) +blp=unique(both[blp]) +else + blp=NULL +glp=iout +if(length(blp)>0){ +flag=NULL +for(k in 1:length(blp)){ +flag=c(flag,which(iout==blp[k])) +} +glp=iout[-flag] +keep=vec[-blp] +} +if(plotit){ +plot(x,y,type='n',xlab=xlab,ylab=ylab) +points(x[keep],y[keep],pch='*') +points(x[blp],y[blp],pch='o') +} +list(n=n,n.out=length(iout),res.out.id=outid,keep=keep,good.lev=glp,bad.lev=blp) +} + +corblp.bca.C<-function(x,y,regfun=tsreg,varfun=pbvar,nboot=1000,alpha=.05,outfun=outpro.depth,SEED=TRUE, +plotit=FALSE,...){ +# +# Correlation based on a robust regression estimator with bad +# leverage points removes + +library(bcaboot) +if(SEED)set.seed(2) +xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +if(p!=1)stop('Only a single independent variable is allowed') +x=xy[,1] +y=xy[,2] +n=length(y) +est=corblp(x,y,regfun=regfun,varfun=varfun)$cor +a=bcajack2(xy,1000,corblp.sub,alpha=alpha/2,regfun=regfun,varfun=varfun) +ci=c(a$lims[1,1],a$lims[3,1]) +list(cor=est,ci=ci) +} + +corblp.sub<-function(xy,regfun=tsreg,varfun=pbvar){ +X=xy[,1] +Y=xy[,2] +rest=corblp(X,Y,regfun=regfun,varfun=varfun)$cor +rest +} + + +corblppb<-function(x,y,regfun=tsreg,varfun=pbvar,nboot=1000,alpha=.05,outfun=outpro.depth,SEED=TRUE, +plotit=FALSE,...){ +# +# Correlation based on a robust regression estimator with bad +# leverage points removes +# +if(SEED)set.seed(2) +xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +if(p!=1)stop('Only a single independent variable is allowed') +x=xy[,1] +y=xy[,2] +v=NA +n=length(y) +for(i in 1:nboot){ +id=sample(n,replace=TRUE) +v[i]=corblp(x[id],y[id],regfun=regfun,varfun=varfun)$cor +} +P=mean(v<0) +pv=2*min(P,1-P) +sv=sort(v) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci<-sv[ilow] +ci[2]<-sv[ihi] +est=corblp(x,y,regfun=regfun,varfun=varfun) +list(cor.est=est$cor,p.value=pv,ci=ci) +} + + + reg.slope.only<-function(m,regfun=tsreg,...){ + # + # Assume data are in a matrix with 2 columns, used by BCA method +a=regfun(m[,1],m[,2],...)$coef[2] +a +} + + reg.bca<- +function(x,y,alpha=.05,nboot=1000,regfun=tsreg,SEED=TRUE,...){ +# +# BCA confidence interval for slope of a linear regression model +# +# Method: Bias corrected accelerated bootstrap +# +library(bcaboot) +if(SEED)set.seed(2) +m=elimna(cbind(x,y)) +n=nrow(m) +if(ncol(m)!=2)stop('Only one independent variable is allowed') +e=regfun(m[,1],m[,2])$coef[2] +a=bcajack(m,nboot,reg.slope.only,alpha=alpha/2,verbose=FALSE,regfun=regfun,...) +list(n=n,slope=e,ci.low=a$lims[1,1],ci.upper=a$lims[3,1]) +} + +reghet.blp<-function(x,y,regfun=tsreg,HH=TRUE,...){ + +# Eliminate bad leverage points using a heteroscedastic method +# Then estimate the parameters. +# +xx<-cbind(x,y) +xx<-elimna(xx) +if(ncol(xx)!=2)stop('Current version limited to a single independent variable') +x<-xx[,1] +y=xx[,2] +if(HH)id= outblp.HH(x,y,regfun=regfun,plotit=FALSE)$keep +else id=regcon.out(x,y,plotit=FALSE)$keep +e=regfun(x[id],y[id],...) +e +} + + +reghet.blp.ci<-function(x,y,regfun=tsreg,nboot=999,HH=TRUE, +SEED=TRUE,BCA=FALSE,pr=TRUE,...){ + +# Eliminate bad leverage points using a heteroscedastic method +# Then compute a confidence interval for the slope +# +#Use bias corrected accelerated bootstrap when BCA=TRUE, +# otherwise use a percentile bootstrap +# +xx<-cbind(x,y) +if(ncol(xx)!=2)stop('Current version limited to a single independent variable') +xx<-elimna(xx) +n=nrow(xx) +if(!BCA & n<50) #print('Might be safer to use BCA=TRUE') +if(BCA & pr)print('Note: when BCA=TRUE, only returns a confidence interval for the slope') +x<-xx[,1] +y=xx[,2] +if(HH)id= outblp.HH(x,y,regfun=regfun,plotit=FALSE)$keep +else id=regcon.out(x,y,plotit=FALSE)$keep +if(BCA)e=reg.bca(x[id],y[id],SEED=SEED,regfun=regfun) +else e=regci(x[id],y[id],SEED=SEED,regfun=regfun,nboot=nboot,...) +e +} + + +outblp.HH<-function(x,y,regfun=tsreg,omit.col=NULL,plotit=TRUE,xlab='X',ylab='Y'){ +# +# indicates which points, if any, are bad leverage points +# using a blend of a homoscedastic and heteroscedastic methods. +# +# This approach helps to avoid issues with Type I errors when testing hpotheses +# +# If for example +# omit.col=c(1,3) +# columns 1 and 3 of x are ignored when checking for bad leverage points. +# These columns might be, for example, dummy variables. +# +library(MASS) +xy=elimna(cbind(x,y)) +n=nrow(xy) +x=as.matrix(x) +p=ncol(x) +p1=p+1 +x=xy[,1:p] +y=xy[,p1] +if(p>1){ +if(!is.null(omit.col)) +x=x[,-omit.col] +} +x<-as.matrix(x) +out.id=NULL +temp=reglev.gen(x,y,regfun=regfun,plotit=FALSE) +out.id=temp$bad.lev +temp2=regcon.out(x,y,plotit=FALSE) +vec=keep=c(1:n) +out.id=unique(c(out.id,temp2$bad.lev)) +if(length(out.id)>0)keep=vec[-out.id] +n.out=length(out.id) +if(plotit){ +plot(x,y,type='n',xlab=xlab,ylab=ylab) +points(x[keep],y[keep],pch='*') +points(x[out.id],y[out.id],pch='o') +} +list(n=n,n.out=n.out,bad.lev=out.id,keep=keep) +} + + + +regHH<-function(x,y,regfun=tsreg,SO=FALSE,...){ +# +# +# SO=TRUE, estimate slope only, convenient for some bootstrap methods +# +xy=elimna(cbind(x,y)) +if(ncol(xy)!=2)stop('Current version limited to a single independent variable') +id= outblp.HH(xy[,1],xy[,2])$keep +if(!SO)e=regfun(xy[id,1],xy[id,2],...) +else e=regfun(xy[id,1],xy[id,2])$coef +list(coef=e) +} + + +regciHH<-function(x,y,regfun=tsreg,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE,null.val=NULL, +method='hoch',plotit=FALSE,xlab='Predictor 1',ylab='Predictor 2',WARNS=FALSE,LABELS=FALSE,...){ +# +# Compute a .95 confidence interval for each of the parameters of +# a linear regression equation. The default regression method is +# the Theil-Sen estimator. +# +# Use method HH to eliminate bad leverage points +# +# When using the least squares estimator, and when n<250, use +# lsfitci instead. +# +# The predictor values are assumed to be in the n by p matrix x. +# The default number of bootstrap samples is nboot=599 +# +# regfun can be any R function that returns the coefficients in +# the vector regfun$coef, the first element of which contains the +# estimated intercept, the second element contains the estimated of +# the first predictor, etc. +# +# plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based +# on the bootstrap samples. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +nrem=length(y) + +flag<-outblp.HH(x,y,plotit=FALSE,...)$keep +xy<-xy[flag,] +x<-xy[,1:p] +y<-xy[,p1] + +estit=regfun(x,y,...)$coef +if(is.null(null.val))null.val=rep(0,p1) +flagF=FALSE +flagF=identical(regfun,tsreg) +if(flagF){if(pr){ +if(sum(duplicated(y)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') +}} +nv=length(y) +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print('Taking bootstrap samples. Please wait.') +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +if(!WARNS)options(warn=-1) +bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) +options(warn=0) +#Leverage points already removed. +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +regci<-matrix(0,p1,6) +vlabs='Intercept' +for(j in 2:p1)vlabs[j]=paste('Slope',j-1) +if(LABELS)vlabs[2:p1]=labels(x)[[2]] +dimnames(regci)<-list(vlabs,c('ci.low','ci.up','Estimate','S.E.','p-value','p.adj')) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +se<-NA +pvec<-NA +for(i in 1:p1){ +bsort<-sort(bvec[i,]) +#pvec[i]<-(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot +pvec[i]<-(sum(bvec[i,].5)pvec[i]<-1-pvec[i] +regci[i,1]<-bsort[ilow] +regci[i,2]<-bsort[ihi] +se[i]<-sqrt(var(bvec[i,])) +} +if(p1==3){ +if(plotit){ +plot(bvec[2,],bvec[3,],xlab=xlab,ylab=ylab) +}} +regci[,3]=estit +pvec<-2*pvec +regci[,4]=se +regci[,5]=regci[,6]=pvec +regci[2:p1,6]=p.adjust(pvec[2:p1],method=method) +list(regci=regci,n=nrem,n.keep=nv) +} + +regci.het.blp<-function(x,y,regfun=tsreg,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE,null.val=NULL, +method='hoch',plotit=FALSE,xlab='Predictor 1',ylab='Predictor 2',WARNS=FALSE,LABELS=FALSE,...){ +# +# Compute a .95 confidence interval for each of the parameters of +# a linear regression equation. The default regression method is +# the Theil-Sen estimator. +# +# Use method HH to eliminate bad leverage points +# +# When using the least squares estimator, and when n<250, use +# lsfitci instead. +# +# The predictor values are assumed to be in the n by p matrix x. +# The default number of bootstrap samples is nboot=599 +# +# regfun can be any R function that returns the coefficients in +# the vector regfun$coef, the first element of which contains the +# estimated intercept, the second element contains the estimated of +# the first predictor, etc. +# +# plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based +# on the bootstrap samples. +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +nrem=length(y) + +flag<-regcon.out(x,y,plotit=FALSE,...)$keep +xy<-xy[flag,] +x<-xy[,1:p] +y<-xy[,p1] + +estit=regfun(x,y,...)$coef +if(is.null(null.val))null.val=rep(0,p1) +flagF=FALSE +flagF=identical(regfun,tsreg) +if(flagF){if(pr){ +if(sum(duplicated(y)>0))print('Duplicate values detected; tshdreg might have more power than tsreg') +}} +nv=length(y) +x<-as.matrix(x) +if(SEED)set.seed(2) # set seed of random number generator so that +# results can be duplicated. +if(pr)print('Taking bootstrap samples. Please wait.') +data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot) +if(!WARNS)options(warn=-1) +bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) +options(warn=0) +#Leverage points already removed. +# bvec is a p+1 by nboot matrix. The first row +# contains the bootstrap intercepts, the second row +# contains the bootstrap values for first predictor, etc. +regci<-matrix(0,p1,6) +vlabs='Intercept' +for(j in 2:p1)vlabs[j]=paste('Slope',j-1) +if(LABELS)vlabs[2:p1]=labels(x)[[2]] +dimnames(regci)<-list(vlabs,c('ci.low','ci.up','Estimate','S.E.','p-value','p.adj')) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +se<-NA +pvec<-NA +for(i in 1:p1){ +bsort<-sort(bvec[i,]) +#pvec[i]<-(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot +pvec[i]<-(sum(bvec[i,].5)pvec[i]<-1-pvec[i] +regci[i,1]<-bsort[ilow] +regci[i,2]<-bsort[ihi] +se[i]<-sqrt(var(bvec[i,])) +} +if(p1==3){ +if(plotit){ +plot(bvec[2,],bvec[3,],xlab=xlab,ylab=ylab) +}} +regci[,3]=estit +pvec<-2*pvec +regci[,4]=se +regci[,5]=regci[,6]=pvec +regci[2:p1,6]=p.adjust(pvec[2:p1],method=method) +list(regci=regci,n=nrem,n.keep=nv) +} + +reglev.est<-function(x,y,regfun=tsreg,...){ +# +# Use homoscedastic method to remove bad leverage points and estimate parameters using remaining data +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +y<-xy[,p1] +x=as.matrix(x) +nrem=length(y) +id=reglev.gen(x,y,plotit=FALSE,...)$keep +e=regfun(x[id,],y[id],...)$coef +e +} + +wmw.det<-function(x,y,refp=NULL,plotit=FALSE,xlab='Difference',ylab='Density', +plotfun=kerSORT){ +# +# +# If execution time is an issue when plotting, try plotfun=skerd +# +# +# Compare the tails distribution of x-y based on a specified value +# indicated by the argument +# refp +# That is compare P(x-y< -refp) vs P(x-y > refp) +# +# +if(is.null(refp))stop('No reference point was provided') +x<-x[!is.na(x)] +y<-y[!is.na(y)] +refp=abs(refp) +m<-outer(x,y,FUN='-') +if(refp!=0){ +L=mean(m<=0-refp) +U=mean(m>=refp) +} +else{ +L=mean(m<0) +U=mean(m>0) +} +if(plotit)plotfun(as.vector(m),xlab=xlab,ylab=ylab) +list(L=L,U=U,dif=U-L) +} + +wmw.ref.dif<-function(x,y,q=.25,pts=NULL,nboot=1000,alpha=.05,SEED=TRUE, +plotit=FALSE,xlab='Difference',ylab='Density',estfun=hdmq, +plotfun=kerSORT){ +# +# If pts is specified, the goal is to make inferences about +# P(x-y< -pts)-P(x-y > pts) +# using a percentile bootstrap method +# +# If pts is not specified, and make inferences +# about the 1-q and q quantiles. If X and Y +# have identical distributions, D=X-Y is symmetric about zero and the sum of the +# 1-q and qth quantiles is zero. Should not be used when there are tied values +# +# If QC=FALSE and pts=NULL, +# take pts to be estimate of the q quantile of D. +# +# if pts is not NULL, QC=FALSE is used +# +# Output: +# L=P(x-y< -pts) +# U = P(x-y > pts) +# Est.dif=U-L +# +QC=TRUE +if(!is.null(pts))QC=FALSE +if(is.null(pts)){ +if(sum(q<.5)!=length(q))stop('All q values should be <=.5') +} +if(SEED)set.seed(2) +d=NA +x<-x[!is.na(x)] +y<-y[!is.na(y)] +n1=length(x) +n2=length(y) +if(!QC){ +if(!is.null(pts)){ +e=wmw.det(x,y,refp=pts,plotit=plotit,xlab=xlab,ylab=ylab,plotfun=plotfun) +est=e$dif +L=e$L +U=e$U +} +else{ +pts=qest(outer(x,y,FUN='-'),q=q) +e=wmw.det(x,y,refp=pts,plotit=plotit,xlab=xlab,ylab=ylab,plotfun=plotfun) +est=e$dif +L=e$L +U=e$U +}} + +if(QC){ +d=outer(x,y,FUN='-') +d=as.vector(d) +qv=estfun(d,q=c(q,1-q)) +if(plotit)plotfun(d,xlab=xlab,ylab=ylab) +est=qv[1]+qv[2] +L=qv[1] +U=qv[2] +} +for(i in 1:nboot){ +id1=sample(n1,replace=TRUE) +id2=sample(n2,replace=TRUE) +if(!QC)d[i]=wmw.det(x[id1],y[id2],refp=pts,plotit=FALSE)$dif +else{ +qv=estfun(outer(x[id1],y[id2],FUN='-'),q=c(q,1-q)) +d[i]=qv[1]+qv[2] +}} +crit<-alpha/2 +icl<-round(crit*nboot)+1 +icu<-nboot-icl +dif=sort(d) +ci=dif[icl] +ci[2]=dif[icu] +pv=mean(dif<0)+.5*mean(dif==0) +pv<-2*min(pv,1-pv) +list(L=L,U=U,Est.dif=est,ci=ci,p.value=pv) +} + +wmw.ref.mul<-function(x,y,refp=NULL,pts=NULL,q=seq(.6,.9,.1), center=FALSE, estfun=hdmq, alpha=.05,nboot=1000,SEED=TRUE,method='BH',plotit=FALSE, +xlab='Difference',ylab='Density', +plotfun=kerSORT){ +# +# +# For multiple reference points, refp, +# make inferences about P(x-y< -refp) vs P(x-y > refp) +# refp can be constants chosen by the user. If not specified, +# refp are taken to be the .6(.1).9 estimated quantiles of the distribution of X-Y +# +# pts can be used to indicate specified reference points, refp +# +# To use the Harrell-Davis estimator, set estfun=hdmq +# +if(SEED)set.seed(2) +if(!is.null(pts))refp=pts +x=elimna(x) +y=elimna(y) +if(is.null(refp)){ +m=outer(x,y,FUN='-') +m=as.vector(m) +morig=m + +if(center)m=m-median(m) +refp=estfun(m,q) +} +np=length(refp) +output<-matrix(NA,np,8) +dimnames(output)=list(NULL,c('Pts','P(x-y<-Pts)' ,'P(x-y>Pts)','Dif','ci.low','ci.up','p.value','p.adj')) +for(i in 1:np){ +e=wmw.ref.dif(x,y,pts=refp[i],alpha=alpha,nboot=nboot,SEED=FALSE) +output[i,1:7]=c(refp[i],e$L,e$U,e$Est.dif,e$ci[1],e$ci[2],e$p.value) +} +output[,8]=p.adjust(output[,7],method=method) +if(plotit)plotfun(as.vector(morig),xlab=xlab,ylab=ylab) +output +} + +wmw.QC.mul<-function(x,y,q=seq(.1,.4,.1), estfun=hdmq, alpha=.05,nboot=1000,SEED=TRUE,method='BH',plotit=FALSE, +xlab='Difference',ylab='Density', +plotfun=kerSORT){ +# +# +# For multiple reference quantiles, q>.5, +# make inferences about P(x-y< -refp) vs P(x-y > refp) +#. where refp is the q<.5 quantile +# refp can be constants chosen by the user. If not specified, +# refp are taken to be the .6(.1).9 estimated quantiles of the distribution of X-Y +# +# To use the Harrell-Davis estimator, set estfun=hdmq which is the default +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +np=length(q) +output<-matrix(NA,np,8) +dimnames(output)=list(NULL,c('q','q.quant' ,'1-q.quant','Sum','ci.low','ci.up','p.value','p.adj')) +for(i in 1:np){ +e=wmw.QC(x,y,q=q[i],alpha=alpha,nboot=nboot,SEED=FALSE) +output[i,1:7]=c(q[i],e$L,e$U,e$Est.dif,e$ci[1],e$ci[2],e$p.value) +} +output[,8]=p.adjust(output[,7],method=method) +if(plotit)plotfun(as.vector(morig),xlab=xlab,ylab=ylab) +output +} + +tailci<-function(x,y,pts=NULL,q=.25, nboot=1000,estfun=hdmq,alpha=.05,SEED=TRUE){ +# +# If pts is specified, the goal is to compute a confidence interval +# P(x-y< pts). If pts is not specified, it is taken to be an estimate of the qth quantile, +# q=.25 is the default. +# +if(SEED)set.seed(2) +x<-x[!is.na(x)] +y<-y[!is.na(y)] +n1=length(x) +n2=length(y) +qe=NA +m=as.vector(outer(x,y,FUN='-')) +if(is.null(pts))e=estfun(m,q) +else e=pts +est=mean(m<=e) +V=NA +for(i in 1:nboot){ +id1=sample(n1,replace=TRUE) +id2=sample(n2,replace=TRUE) +M=as.vector(outer(x[id1],y[id2],FUN='-')) +V[i]=mean(M<=e) +} +crit<-alpha/2 +icl<-round(crit*nboot)+1 +icu<-nboot-icl +a=sort(V) +ci=a[icl] +ci[2]=a[icu] +list(Est=est,ci=ci) +} + +difqci=tailci + + +difqci.mul<-function(x,y,refp=NULL,pts=NULL,q=seq(.1,.4,.1), estfun=hdmq, center=FALSE, alpha=.05,nboot=1000,SEED=TRUE,method='BH',plotit=FALSE, +xlab='Difference',ylab='Density', +plotfun=kerSORT){ +# +# +# For multiple reference points, refp, +# make inferences about P(x-y< refp) +# refp can be constants chosen by the user. If not specified, +# refp are taken to be the .6(.1).9 estimated quantiles of the distribution of X-Y +# +# pts can be used to indicate specified reference points, refp +# +# If refp=NULL, reference points are based on estimates of the q quantiles +# Default is q=.1(.1).4 +# +# Her, use the Harrell-Davis estimator by default, estfun=hdmq +# +if(SEED)set.seed(2) +if(!is.null(pts))refp=pts +x=elimna(x) +y=elimna(y) +FLAG=TRUE +m=outer(x,y,FUN='-') +m=as.vector(m) +morig=m + +if(center)m=m-median(m) +if(is.null(refp)){ +FLAG=FALSE +refp=estfun(m,q) +} +np=length(refp) +if(!FLAG){ +output<-matrix(NA,np,5) +dimnames(output)=list(NULL,c('q','Pts','P(x-y2)stop('Should have only two variables') +v1=pbvar(x[,1]) +v2=pbvar(x[,2]) +v3=pbcor(x[,1],x[,2])$cor +a=v1+v2 -2*v3*sqrt(v1)*sqrt(v2) +e=sqrt(2)*(locfun(x[,1])-locfun(x[,2]))/sqrt(a) +e +} + +rm.marg.OMCI<-function(x,y=NULL,locfun=onestep,nboot=1000,SEED=TRUE,alpha=.05, +null.val=0,MC=FALSE,...){ +# +# Two dependent groups. +# Confidence interval for effect size that takes into account heteroscedasticity as well as the +# association between X and Y based on the marginal distributions, not the +# difference scores. For robust estimators, these two approaches generally give +# different results. +# +library(MASS) +if(!is.null(y))x=cbind(x,y) +x=elimna(x) +if(SEED)set.seed(2) +e=rm.margOM.es(x) +n=nrow(x) +if(!MC){ +v=NA +for(i in 1:nboot){ +id=sample(n,replace=TRUE) +v[i]=rm.margOM.es(x[id,],locfun=locfun) +} +} +if(MC){ +library(parallel) +d=list() +for(j in 1:nboot){ +id=sample(n,replace=TRUE) +d[[j]]=x[id,] +} +v=mclapply(d,rm.margOM.es,locfun=locfun) +v=matl(v) +} + +v=sort(v) +ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 +ci=v[ilow] +ci[2]=v[ihi] +pv=mean(v0)flag=flag[-flag.out] +x1<-x1[flag] +y1<-y1[flag] +x2<-x2[flag] +y2<-y2[flag] +} +if(is.null(pts)){ +if(QM){ +q1=qest(x1,q) +q2=qest(x2,q) +pts=max(q1,q2) +u1=qest(x1,1-q) +u2=qest(x2,1-q) +up=min(u1,u2) +pts[2]=mean(c(pts[1],up)) +pts[3]=up +} +if(!QM)pts=ancova(x1,y1,x2,y2,pr=FALSE,plotit=FALSE,SEED=FALSE)$output[,1] +} + +# Use a bootstrap estimate of the covariance matrix to get an estimate of +# a marginal measure of dispersion. +n=length(y1) +if(n!=length(y2))stop('Time one sample size not equal to time two sample size') +npts=length(pts) +e1=matrix(NA,Nboot,npts) +e2=matrix(NA,Nboot,npts) +for(i in 1:Nboot){ +flag=sample(n,replace=TRUE) +e1[i,]=regYhat(x1[flag],y1[flag],xr=pts,regfun=regfun) +e2[i,]=regYhat(x2[flag],y2[flag],xr=pts,regfun=regfun) +} +CV=NA +sq1=apply(e1,2,var) +sq2=apply(e2,2,var) +for(j in 1:npts)CV[j]=cov(e1[,j],e2[,j]) +SQE=sq1+sq2-2*CV +bot=n*SQE +es=(regYhat(x1,y1,xr=pts,regfun=regfun)-regYhat(x2,y2,xr=pts,regfun=regfun))/sqrt(bot) +es=sqrt(2)*es +mat=cbind(pts,es) +if(plotit)reg2plot(x1,y1,x2,y2,regfun=regfun,xlab=xlab,ylab=ylab) +dimnames(mat)=list(NULL,c('pts','Effect.size')) +mat +} + +ancovad.ES.SEpb<-function(x1,y1,x2,y2,nboot=100,regfun=tsreg,pts=0,SEED=TRUE){ +# +# Estimate standard error +# +n1=length(x1) +npts=length(pts) +if(SEED)set.seed(2) +v=matrix(NA,nrow=nboot,ncol=npts) +for(i in 1:nboot){ +id1=sample(n1,replace=TRUE) +X1=x1[id1] +Y1=y1[id1] +X2=x2[id1] +Y2=y2[id1] +v[i,]=ancovad.ES(X1,Y1,X2,Y2,regfun=regfun,pts=pts,plotit=FALSE,SEED=FALSE)[,2] +} +se=apply(v,2,sd,na.rm=TRUE) +se +} + + +ancovad.ESci<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,alpha=.05,nboot=100,SEED=TRUE, +QM=FALSE,ql=.2, +xout=FALSE,outfun=outpro,xlab='Pts',ylab='Y',method='hoch',plotit=TRUE){ +# +# Two dependent groups. +# +# For each specified value for x, compute a heteroscedastic measure of effect +# So if x=2, rather can compare the groups using some specified measure of location, use a +# measure of effect size that takes into account the conditional measure of dispersion of y given x. +# +# if pts=NULL and +# QM=FALSE: pick covariate points based on default points used by the R funtcion ancovad +# +# ql determines quantiles of x that form the range of points +# pts can be used to specify the points x, if NULL, the function picks three values +# The function tests the hypothesis that the measure of effect is zero, no effect. +# +# iter=100: number of replications used to estimate the standard error. +# +# +xy=elimna(cbind(x1,y1,x2,y2)) +n=nrow(xy) +if(ncol(xy)!=4)stop('Only one covariate can be used') +x1=xy[,1] +y1=xy[,2] +x2=xy[,3] +y2=xy[,4] +if(xout){ +flag=c(1:n) +flag1=out.methods(x1,y1,regfun=regfun,plotit=FALSE,id=id)$out.id +flag2=out.methods(x2,y2,regfun=regfun,plotit=FALSE,id=id)$out.id +flag.out=unique(c(flag1,flag2)) +if(length(flag.out)>0)flag=flag[-flag.out] +x1<-x1[flag] +y1<-y1[flag] +x2<-x2[flag] +y2<-y2[flag] +} +if(is.null(pts)){ +if(QM){ +qu=1-ql +q1=qest(x1,ql) +q2=qest(x2,ql) +pts=max(q1,q2) +u1=qest(x1,qu) +u2=qest(x2,qu) +up=min(u1,u2) +pts[2]=mean(c(pts[1],up)) +pts[3]=up +} +pts=unique(pts) +if(!QM)pts=ancovad.ES(x1,y1,x2,y2,regfun=regfun,plotit=FALSE,SEED=FALSE)[,1] +} +npts=length(pts) +RES=matrix(NA,nrow=npts,ncol=8) +SE=ancovad.ES.SEpb(x1,y1,x2,y2,regfun=regfun,nboot=nboot,pts=pts,SEED=FALSE) +a=ancovad.ES(x1,y1,x2,y2,regfun=regfun,pts=pts,SEED=FALSE,plotit=plotit,xlab=xlab,ylab=ylab) +RES[,1]=a[,1] +RES[,2]=a[,2] +RES[,3]=SE +RES[,5]=RES[,2]-qnorm(1-alpha/2)*RES[,3] +RES[,6]=RES[,2]+qnorm(1-alpha/2)*RES[,3] +test=RES[,2]/RES[,3] +pv=2*(1-pnorm(abs(test))) +RES[,7]=pv +RES[,4]=test +dimnames(RES)=list(NULL,c('pts','Est.','SE','Test.Stat','ci.low','ci.up','p-value','p.adjusted')) +RES[,8]=p.adjust(RES[,7],method=method) +if(plotit){ +xa=c(pts,pts,pts) +ya=c(RES[,2],RES[,5],RES[,6]) +plot(xa,ya,xlab=xlab,ylab='ES',type='n') +lines(pts,RES[,5],lty=2) +lines(pts,RES[,2]) +lines(pts,RES[,6],lty=2) +} +RES +} + + +tailsci.mul=difqci.mul + +outDETMCD<-function(x,cov.fun=DETMCD,xlab='X',ylab='Y',qval=.975, +crit=NULL,KS=TRUE,plotit=FALSE,...){ +# +# Search for outliers using robust measures of location and scatter, +# which are used to compute robust analogs of Mahalanobis distance. +# +# x is an n by p matrix or a vector of data. +# +# The function returns the values flagged as an outlier plus +# the (row) number where the data point is stored. +# If x is a vector, out.id=4 indicates that the fourth observation +# is an outlier and outval=123 indicates that 123 is the value. +# If x is a matrix, out.id=4 indicates that the fourth row of +# the matrix is an outlier and outval reports the corresponding +# values. +# +# The function also returns the distance of the +# points identified as outliers +# in the variable dis. +# +# For bivariate data, if plotit=TRUE, plot points and circle outliers. +# +# cov.fun determines how the measure of scatter is estimated. +# The default is covDETMCD +# Possible choices are +# cov.mve (the MVE estimate) +# cov.mcd (the MCD estimate) +# covmba2 (the MBA or median ball algorithm) +# rmba (an adjustment of MBA suggested by D. Olive) +# cov.roc (Rockes TBS estimator) +# +# plotit=FALSE used to avoid problems when other functions in WRS call +# this function +# +# KS=TRUE: keep the seed that was used +# +if(is.data.frame(x))x=as.matrix(x) +if(is.list(x))stop('Data cannot be stored in list mode') +nrem=nrow(as.matrix(x)) +if(!is.matrix(x)){ +dis<-(x-median(x,na.rm=TRUE))^2/mad(x,na.rm=TRUE)^2 +if(is.null(crit))crit<-sqrt(qchisq(.975,1)) +vec<-c(1:length(x)) +} +if(is.matrix(x)){ +mve<-cov.fun(elimna(x)) +dis<-mahalanobis(x,mve$center,mve$cov) +if(is.null(crit))crit<-sqrt(qchisq(.975,ncol(x))) +vec<-c(1:nrow(x)) +} +dis[is.na(dis)]=0 +dis<-sqrt(dis) +chk<-ifelse(dis>crit,1,0) +id<-vec[chk==1] +keep<-vec[chk==0] +if(is.matrix(x)){ +if(ncol(x)==2 && plotit){ +plot(x[,1],x[,2],xlab=xlab,ylab=ylab,type='n') +flag<-rep(TRUE,nrow(x)) +flag[id]<-FALSE +points(x[flag,1],x[flag,2]) +if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch='*') +}} +if(!is.matrix(x))outval<-x[id] +if(is.matrix(x))outval<-x[id,] +n=nrow(as.matrix(x)) +n.out=length(id) +list(n=n,n.out=n.out,out.val=outval,out.id=id,keep=keep,dis=dis,crit=crit) +} + +out.methods<-function(x,y, regfun = tsreg,plotit=FALSE,id,method=c('PRO','PRO.R','BLP','DUM','MCD','BOX')){ +type=match.arg(method) +switch(type, + PRO=outpro(x,plotit=plotit), # projection method + PRO.R=outpro.depth(x), #projection method random, lower execution time vs outpro + BLP=outblp(x,y,regfun=regfun,plotit=FALSE), # regression method + DUM=out.dummy(x,y,outfun=outpro.depth,id=id), # Detect outliers ignoring col indicated by argument id + MCD=outDETMCD(x,plotit=plotit), + BOX=outbox(x)) # Boxplot method using ideal. fourths +} + +hoch2.simp<-function(n,V,cil,tr,alpha=.05,con=NULL){ +# +# +# Hochberg two-stage given n's sd's tr and alpha +# if tr>0, var. should contain winsorized variances +# raw data not provided +# +#V = variances or Winsorized variance if tr>0 +# cil desired length of the confidence intervals +# +# +if(is.matrix(x))x<-listm(x) +J=length(n) +svec=V +tempn=n +tempt<-floor((1-2*tr)*tempn) +A<-sum(1/(tempt-1)) +df<-J/A +if(is.null(con))con=con1way(J) +crit=qtukey(1-alpha,J,df) +avec<-NA +ncon=ncol(con) +for(i in 1:ncon){ +temp<-con[,i] +avec[i]<-sum(temp[temp>0]) +} +dvec<-(cil/(2*crit*avec))^2 +d<-max(dvec) +n.vec<-NA +for(j in 1:J){ +n.vec[j]<-max(tempn[j],floor(svec[j]/d)+1) +print(paste("Need an additional ", n.vec[j]-tempn[j], +" observations for group", j)) +} +} + +mean.pred.ci<-function(M,sd,orig.n,new.n,tr=0,alpha=.05){ +# +# +# Generalization of prediction method in Spence & Stanley +# Advances in Methods and Practices in Psychological Science January-March 2024, Vol. 7, No. 1, +#pp. 1-13 +# +# M = observed mean or can be a trimmed mean +# tr= amount of trimming +# sd = Winsorized standard deviation, which is the usual standard deviation when t=0 +orig.n=orig.n/(1-2*tr)^2 +new.n=new.n/(1-2*tr)^2 +se=sqrt(sd^2/orig.n+sd^2/new.n) +g=floor(tr*orig.n) +df=orig.n-2*g-1 +crit=qt(1-alpha/2,df) +ci=M-crit*se +ci=c(ci,M+crit*se) +ci +} + + +qcorp1.ci<-function(x,y,q=.5,alpha=.05,nboot=599,SEED=TRUE, xout=TRUE, +method='PRO',regfun=Qreg){ +# +# Confidence interval for a quantile regression measure of association +# +# +if(SEED)set.seed(2) + xy=elimna(cbind(x,y)) +p1=ncol(xy) +if(p1>2)stop('Current version is for a single independent variable only') +#, use the R function corblp.ci') +x=xy[,1] +y=xy[,2] + if(xout){ +x<-as.matrix(x) +flag<-out.methods(x,y,plotit=FALSE,method=method,regfun=regfun)$keep +x<-x[flag,] +y<-y[flag] +n.keep=length(y) +} +p=p1-1 +x=xy[,1:p] +x=as.matrix(x) +y=xy[,p1] + v=NA + n=nrow(xy) + if(n<=40)xout=FALSE # to avoid computational issues when n is small. + for(i in 1:nboot){ + id=sample(n,replace=TRUE) +v[i]=qcorp1(xy[id,1:p],xy[id,p1],q=q,xout=xout)$cor + } + mv=mean(v) + v=sort(v) + ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 + ci=v[ilow] + ci[2]=v[ihi] + e=qcorp1(x,y,q=q,xout=xout)$cor + pv=mean(v<0)+.5*mean(v==0) + pv=2*min(pv,1-pv) + list(Est.=e,ci=ci,p.value=pv) + } + + + +remove.lab.vec<-function(a){ +# +# Remove labels +# +a=as.matrix(a) +dimnames(a)=list(NULL,NULL) +a +} + +qcmul<-function(x,y,q=.5,nboot=599,alpha=.05,SEED=FALSE,xout=FALSE, +outfun=outpro,method='BH'){ +# +# +# For each independent variable, compute a confidence interval for a +# quantile regression correlation. +# +if(SEED)set.seed(2) + xy=elimna(cbind(x,y)) +p1=ncol(xy) +p=p1-1 +x=xy[,1:p] +x=as.matrix(x) +y=xy[,p1] +n=length(y) +if(xout){ +x<-as.matrix(x) +flag<-outfun(x,plotit=FALSE)$keep +x<-x[flag,] +y<-y[flag] +n=length(y) +xy=cbind(x,y) +} +x=as.matrix(x) +e=matrix(NA,p,5) +dimnames(e)=list(NULL,c('Est.','ci.low','ci.up','p-value','adj.p.value')) +for(j in 1:p)e[j,1]=qcorp1(x[,j],y,q=q)$cor + v=matrix(NA,nboot,p) + n=nrow(xy) + for(j in 1:p){ + for(i in 1:nboot){ + id=sample(n,replace=TRUE) +v[i,j]=qcorp1(xy[id,j],xy[id,p1],q=q)$cor + }} + v=apply(v,2,sort) + ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 + e[,2]=v[ilow,] + e[,3]=v[ihi,] + pv=NA + for(j in 1:p){ + pv[j]=mean(v[,j]<0) + if(pv[j]>=.5)pv[j]=1-pv[j] + } + e[,4]=2*pv + e[,5]=p.adjust(e[,4],method=method) + list(n=n,results=e) +} + + +qcor.ci<-function(x,y,q=.5,alpha=.05,nboot=599,SEED=TRUE, xout=TRUE, +method='PRO',regfun=Qreg){ +# +# Confidence interval for a quantile regression measure of association derived by Li et al. +# +# +if(SEED)set.seed(2) + xy=elimna(cbind(x,y)) +p1=ncol(xy) +if(p1>2)stop('Current version is for a single independent variable only') +#, use the R function corblp.ci') +x=xy[,1] +y=xy[,2] + if(xout){ +x<-as.matrix(x) +flag<-out.methods(x,y,plotit=FALSE,method=method,regfun=regfun)$keep +x<-x[flag,] +y<-y[flag] +n.keep=length(y) +} +p=p1-1 +x=xy[,1:p] +x=as.matrix(x) +y=xy[,p1] + v=NA + n=nrow(xy) + if(n<=40)xout=FALSE # to avoid computational issues when n is small. + for(i in 1:nboot){ + id=sample(n,replace=TRUE) +v[i]=qcor(xy[id,1:p],xy[id,p1],q=q,xout=xout)$cor + } + mv=mean(v) + v=sort(v) + ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 + ci=v[ilow] + ci[2]=v[ihi] + pv=mean(v<0)+.5*mean(v==0) + pv=2*min(pv,1-pv) + e=qcor(x,y,q=q)$cor # Already did or did not remove leverage points + list(Est.=e,ci=ci,p.value=pv) + } + + qcor.EP<-function(x,y,q=c(.25,.5,.75),alpha=.05,nboot=1000,SEED=TRUE, xout=TRUE, +method='PRO',regfun=Qreg){ +# +# Correlation based on the quantile regression estimator. +# This version is based on explanatory power. +# See Wilcox (2022, section 11.9). +# +nq=length(q) +res=matrix(NA,nq,5) +for(j in 1:nq){ +a=qcor.ep.ci(x,y,q=q[j],alpha=alpha,nboot=nboot,SEED=SEED,xout=xout) +res[j,]=c(q[j],a$Est.,a$ci[1],a$ci[2],a$p.value) +} +dimnames(res)=list(NULL,c('q','Est','ci.low','ci.up','p-value')) +res +} + +qcor.ep.ci<-function(x,y,q=.5,alpha=.05,nboot=599,SEED=TRUE, xout=TRUE, +method='PRO',regfun=Qreg){ +# +# Confidence interval for a quantile regression measure of association +# +# +if(SEED)set.seed(2) + xy=elimna(cbind(x,y)) +p1=ncol(xy) +if(p1>2)stop('Current version is for a single independent variable only') +#, use the R function corblp.ci') + if(xout){ +x<-as.matrix(x) +flag<-out.methods(x,y,plotit=FALSE,method=method,regfun=regfun)$keep +x<-x[flag,] +y<-y[flag] +n.keep=length(y) +} +p=p1-1 +x=xy[,1:p] +x=as.matrix(x) +y=xy[,p1] + v=NA + n=nrow(xy) + if(n<=40)xout=FALSE # to avoid computational issues when n is small. + for(i in 1:nboot){ + id=sample(n,replace=TRUE) +v[i]=qcor.ep(xy[id,1:p],xy[id,p1],q=q,xout=xout)$cor + } + mv=mean(v) + v=sort(v) + ilow<-round((alpha/2) * nboot) +ihi<-nboot - ilow +ilow<-ilow+1 + ci=v[ilow] + ci[2]=v[ihi] + e=qcor.ep(x,y,q=q,xout=xout)$cor + pv=mean(v<0)+.5*mean(v==0) + pv=2*min(pv,1-pv) + list(Est.=e,ci=ci,p.value=pv) + } + + qcor.L<-function(x,y,q=c(.25,.5,.75),alpha=.05,nboot=1000,SEED=TRUE, xout=TRUE, +method='PRO',regfun=Qreg){ +# +# Correlation based on the quantile regression estimator. +# This version is based on the approach used by +# Li, G., Li, Y. \& Tsai, C.-L. (2015). +# Quantile correlations and quantile autoregressive +# modeling. {\em Journal of the American Statistical Association , 110}, 246--261. +# https://doi.org/10.1080/01621459.2014.892007 +# +# +nq=length(q) +res=matrix(NA,nq,5) +for(j in 1:nq){ +a=qcor.ci(x,y,q=q[j],alpha=alpha,nboot=nboot,SEED=SEED,xout=xout) +res[j,]=c(q[j],a$Est.,a$ci[1],a$ci[2],a$p.value) +} +dimnames(res)=list(NULL,c('q','Est','ci.low','ci.up','p-value')) +res +} + +qcor.R<-function(x,y,q=c(.25,.5,.75),alpha=.05,nboot=1000,SEED=TRUE, xout=TRUE, +method='PRO',regfun=Qreg){ +# +# Correlation based on the quantile regression estimator. +# This version is based on the ratio of the loss function for the full model versus the null case +# See Wilcox (2022, section 11.9). +# +nq=length(q) +res=matrix(NA,nq,5) +for(j in 1:nq){ +a=qcorp1.ci(x,y,q=q[j],alpha=alpha,nboot=nboot,SEED=SEED,xout=xout) +res[j,]=c(q[j],a$Est.,a$ci[1],a$ci[2],a$p.value) +} +dimnames(res)=list(NULL,c('q','Est','ci.low','ci.up','p-value')) +res +} + +qcor.EP<-function(x,y,q=c(.25,.5,.75),alpha=.05,nboot=1000,SEED=TRUE, xout=TRUE, +method='PRO',regfun=Qreg){ +# +# Correlation based on the quantile regression estimator. +# This version is based on explanatory power. +# See Wilcox (2022, section 11.9). +# +nq=length(q) +res=matrix(NA,nq,5) +for(j in 1:nq){ +a=qcor.ep.ci(x,y,q=q[j],alpha=alpha,nboot=nboot,SEED=SEED,xout=xout) +res[j,]=c(q[j],a$Est.,a$ci[1],a$ci[2],a$p.value) +} +dimnames(res)=list(NULL,c('q','Est','ci.low','ci.up','p-value')) +res +} + +qcor.ep<-function(x,y,qest=hd,q=.5,xout=FALSE,method='PRO',regfun=MMreg, +plotit=FALSE,...){ +# +# Compute a measure of the strength of the association in terms of explanatory power and +# based on the quantile regression line +# +X=cbind(x,y) +X=elimna(X) +x<-as.matrix(x) +p=ncol(x) +x=X[,1:p] +p1=p+1 +y=X[,p1] +if(xout){ +x<-as.matrix(x) +flag<-out.methods(x,y,plotit=plotit,method=method,regfun=regfun)$keep +x<-x[flag,] +y<-y[flag] +x<-as.matrix(x) +X=cbind(x,y) +} +est=qreg(x,y,q=q)$coef +pred=reg.pred(x,y,regfun=Qreg,q=q) +top=pbvar(pred) +bot=pbvar(y) +EP=top/bot +if(p==1)ce=sign(est[2])*sqrt(EP) +list(cor=ce,Explanatory.power=EP) +} + +Stein.pairs<-function(x,delta,alpha=.05,power=.8,z.sqrt=NULL,tr=.2,reps=100000,SEED=TRUE){ +# +# All pairwise comparisons among J independent groups +# Using available data, determine how many observations, if any, are needed to get power >= the +# value indicated by the argument +# power, given that the difference between the measures of location is +# delta +# +if(is.matrix(x))x=listm(x) +J=length(x) +ic=0 +ALL=(J^2-J)/2 +output=matrix(NA,ALL,6) +for(j in 1:J){ +for(k in 1:J){ +if(j= the value +# specified by the argument power when the linear contrast is >= the value +# indicated by the argument +# delta +# +# +if(SEED)set.seed(2) +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +n=lapply(x,length) +n=list2vec(n) +g=floor(tr*n) +df=n-2*g-1 +sq=lapply(x,winsdN,tr=tr) +sq=list2vec(sq) +sq=sq^2 +top=1-alpha/2 +t2=1-power +if(is.null(z.sqrt)){ +v=sum.T(df,reps=reps,con=con) +ta=qest(v,top) +q=qest(v,t2) +delta=abs(delta) # Same result if not done but this avoids getting negative sqrt(z) +z.sqrt=delta/(ta-q) +} +N=rep(0,J) +for(k in 1:J){ +if(con[k]!=0)N[k]=max(floor(sq[k]/z.sqrt^2),n[k]) +} +list(n=n,N=N,z.sqrt=z.sqrt) +} + + +Stein.LC<-function(x,delta,con,alpha=.05,power=.8,z=NULL,tr=.2,reps=100000,SEED=TRUE){ +# +# For a collection of linear contrast coefficients, +# determine the total number of observations to achieve power >= the value +# specified by the argument power in the linear contrast is >= the value +# indicated by the argument +# delta +# +# +if(SEED)set.seed(2) +if(is.null(ncol(con)))N=Stein.LC1(x,delta=delta,con=con,alpha=alpha,power=power,tr=tr, +reps=reps,SEED=SEED)$N +else{ +J=nrow(con) +NL=ncol(con) +N=matrix(NA,J,NL) +for(k in 1:NL)N[,k]=Stein.LC1(x,delta=delta,con[,k],alpha=alpha,power=power,tr=tr, +reps=reps,SEED=SEED)$N +} +list(con=con,N=N) +} + +Stein2g<-function(x,y=NULL,delta,alpha=.05,power=.8,z.sqrt=NULL,tr=.2,reps=100000,SEED=TRUE){ +# +# For two independent groups, to Stein-type two stage method for determining how many more +# observations, if any, are need to achieve power indicated by the argument +# pow given a difference in location +# delta +# +# if y=NULL +# x can be a matrix with 2 columns or have list mode with length 2 +# +# Or +# x and y can be vector. +# +if(SEED){ +if(is.null(z.sqrt))set.seed(2) +} +if(!is.null(y))x=list(x,y) +x=elimna(x) +if(is.matrix(x))x=listm(x) +J=length(x) +if(J!=2)stop('This function is for two groups only') +n=lapply(x,length) +n=list2vec(n) +g=floor(tr*n) +df=n-2*g-1 +sq=lapply(x,winsdN,tr=tr) +sq=list2vec(sq) +sq=sq^2 +top=1-alpha/2 +t2=1-power +if(is.null(z.sqrt)){ +v=sum.T(df,reps=reps,con=c(1,-1)) +ta=qest(v,top) +q=qest(v,t2) +delta=abs(delta) # Same result if not done but this avoids getting negative sqrt(z) +z.sqrt=delta/(ta-q) +} +N=NA +for(k in 1:2){ +N[k]=max(floor(sq[k]/z.sqrt^2),n[k]) +} +list(n=n,N=N,z.sqrt=z.sqrt) +} + +sum.T<-function(df,reps=100000,con){ +# +# Estimate distribution of T=sum T_j, T_j independent T distribution with degree of stored in +# df +# +# Return dist. +n=df+1 +v=NA +K=length(df) +for(j in 1:reps){ +e=0 +for(k in 1:K)e=e+con[k]*rt(1,df=df[k]) +v[j]=e +} +v +} + + +qest.meth<-function(x,q=.5,method=c('HD','NO','TR','SO')){ + +type=match.arg(method) +switch(type, +HD=hd(x,q=q), +NO=qno.est(x,q=q), +TR=thd(x,q=q), +SO=qest(x,q=q)) +} + +rbin.mul<-function(n,N,prob=.5,p=2){ +# +# p multivariate binomial, correlations are .5 +# +p1=p+1 +np1=n*p1 +z=matrix(rbinom(np1,size=N,prob=prob),ncol=p1) +for(j in 1:p)z[,j]=z[,j]+z[,p1] +z[,1:p] +} + +bwdepth<-function(x,y,fun=prodepth,plotit=FALSE,xlab='V1',ylab='V2'){ +# +# For two independent groups, let X and Y denote multivariate random variables +# This function estimates the extent the distributions overlap using the notion +# of projection distances. In effect, a nonparametric measure of effect size is +# estimated +# For identical distribution, effect size is .5. The more separated the distributions, the +# closer is the effect size to zero. Complete separation means the effect size is equal to zero +# +# +# +x=elimna(x) +y=elimna(y) +x=as.matrix(x) +y=as.matrix(y) +n1=nrow(x) +n2=nrow(y) +if(ncol(x)==1){ +fun=unidepth +x=as.vector(x) +y=as.vector(y) +} +pdyy=fun(y,y) +pdyx=fun(y,x) +pdxx=fun(x,x) +pdxy=fun(x,y) +v1=NA +v2=NA +ic=0 +for(i in 1:n2){ +for(j in 1:n1){ +ic=ic+1 +v1[ic]=pdyy[i]<=pdyx[j] +}} +ic=0 +for(j in 1:n1){ +for(i in 1:n2){ +ic=ic+1 +v2[ic]=pdxx[j]<=pdxy[i] +}} +e1=mean(v1) +e2=mean(v2) +e=(n1*e1+n2*e2)/(n1+n2) +x=as.matrix(x) +y=as.matrix(y) +if(plotit){ +if(ncol(x)==2){ +plot(rbind(x,y),xlab=xlab,ylab=ylab,type='n') +points(x,pch='*') +points(y,pch='o') +}} +list(e=e,e1=e1,e2=e2) +} + +bwdepthMC.ci<-function(x,y,fun=prodepth,nboot=100,alpha=.05,MC=TRUE, +SEED=TRUE,plotit=FALSE,xlab='V1',ylab='V2'){ +# +if(SEED)set.seed(2) + crit=qnorm(1-alpha/2) +if(identical(fun,prodepth))MC=FALSE # get odd error otherwise +x=elimna(x) +y=elimna(y) +x=as.matrix(x) +y=as.matrix(y) +n1=nrow(x) +n2=nrow(y) +est=bwdepth(x,y,plotit=plotit,xlab=xlab,ylab=ylab) +if(MC)library(parallel) +id=list() +for(i in 1:nboot)id[[i]]=c(sample(n1,replace=TRUE),sample(n2,replace=TRUE)) +if(!MC)BE=lapply(id,bwdepth.sub,x,y,n1,n2,fun=fun) +if(MC)BE=mclapply(id,bwdepth.sub,x,y,n1,n2,fun=fun) +E=matl(BE) +se=sd(E) +c1=est$e-crit*se +c1[2]=est$e+crit*se +test=(est$e-.5)/se +pv=2*(1-pnorm(abs(test))) +list(n1=n1,n2=n2,Est=est$e,ci=c1,p.value=pv) +} + +bwdepth.sub<-function(id,x,y,n1,n2,fun){ +n=n1+n2 +np1=n1+1 +e=bwdepth(x[id[1:n1],],y[id[np1:n],],fun=fun)$e +e +} + +bwdepth.perm<-function(x,y,reps=500, +fun=prodepth,alpha=.05,SEED=TRUE){ +# +# Permutation test of F=G, two independent multivariate distributions +# +# +if(SEED)set.seed(2) +x=elimna(x) +y=elimna(y) +x=as.matrix(x) +y=as.matrix(y) +n1=nrow(x) +np1=n1+1 +n2=nrow(y) +n=n1+n2 +d=bwdepth(x,y)$e +xy=rbind(x,y) +print(dim(xy)) +v=NA +for(i in 1:reps){ +ip=sample(n,replace=FALSE) +z=xy[ip,] +v[i]=bwdepth(z[1:n1,],z[np1:n,])$e +} +v=sort(v) +il=round(alpha*reps/2) +iu=reps-il +list(Est=d,Lower.crit=v[il],Upper.crit=v[iu]) +} + +simp.break<-function(x,y,pts){ +# +# Estimate the break point +# +#. Using a method derived by +# Muggeo 2003 +# STATISTICS IN MEDICINE +# Statist. Med. 2003; 22:3055-3071 (DOI: 10.1002/sim.1545) +# + library(segmented) +sati=data.frame(xx=x,yy=y) +M.lm=lm(y~x,data=dati) +a=segmented(ut.lm,psi=pts) +a +} + +reg.break<-function(x,y,int=NULL,xout=TRUE,regfun=tsreg,outfun=outpro,...){ +# +# Estimate the break point of a regression line, where the line bends. +# That is, where the slope suddenly changes. +# Use a. robust analog of the method in +# A Statistical Method for Determining the Breakpoint of Two Lines +# Jones and Molitoris +# Analytical Biochemistry 287-290 (1984) +# +x<-as.matrix(x) +p1<-ncol(x)+1 +p<-ncol(x) +if(p!=1)stop('Current version limited to a single independent variable') +xy<-cbind(x,y) +xy<-elimna(xy) +x<-xy[,1:p] +x=as.vector(x) +y<-xy[,p1] +if(xout){ +m<-cbind(x,y) +if(identical(outfun,outblp))flag=outblp(x,y,plotit=FALSE)$keep +else +flag<-outfun(x,plotit=FALSE,...)$keep +m<-m[flag,] +x<-m[,1:p] +y<-m[,p1] +} +if(is.null(int)){ +low=qest(x,.25) +up=qest(x,.75) +int=seq(low,up,length.out=25) +} +nrem=length(y) +if(!is.null(int)){ +x0=int +iup=length(x0) + +} +v=NA +for(i in 1:iup){ +id=x