# Example from book of Box, Hunter, Hunter on lima beans. # A: depth of planting (0.5 in or 1.5 in) # B: watering (once or twice daily) # C: type of bean (baby or large) # Y: yield a<-rep(c(-1,1),4) b<-rep(c(-1,-1,1,1),2) c<-c(rep(-1,4),rep(1,4)) ab<-a*b; ac<-a*c; bc<-b*c; abc<-a*b*c y <- c(6,4,10,7,4,3,8,5) summary(lm(y~a+b+c+ab+ac+bc+abc)) summary(lm(y~(a+b+c)^3)) # easier - using model formula notation # note: "a:b" in output means entrywise multiplication of columns in design matrix, i.e. the same as the multiplication above. # Even easier: library(FrF2) plan <- FrF2(nruns=8,nfactors=3,randomize=FALSE) plan plan <- add.response(plan,y) plan # now we have an ordinary data set up to be used with lm lm3 <- lm(y~.^3,data=plan) summary(lm3) MEPlot(lm3) # dev.copy2pdf(file="LimaME.pdf") IAPlot(lm3) # dev.copy2pdf(file="LimaIA.pdf") effects <- 2*lm3$coeff effects # moving on to inference # all residuals are zero, since we have 8 observations and have fitted 8 parameters lm3$resid # Total sum of squares (not in BF's sense) is equal to the regression sum of squares (not in BF's sense) since SSE = 0: sum((lm3$fitted-mean(y))^2) # regression sum of squares by definition (not in BF's sense) 8*sum(lm3$coeff[-1]^2) # and by theory for two-level factorial design sum((y-mean(y))^2) # total sum of squares (not in BF's sense) # Lenth's metod qnorm(.25) # The 0.674 of Lenth's method lenth <- function(lmobj,alpha=0.05) # by Mette Langaas { abseffects <- abs(2*lmobj$coeff)[-1] medabseffects <- median(abseffects) medabseffects s0 <- 1.5*medabseffects keepeffects <- abseffects[abseffects< 2.5*s0] PSE <- 1.5*median(keepeffects) signlimit <-qt(1-alpha/2,length(abseffects)/3)*PSE # significance limit return(list("PSE"=PSE,"Signlimit"=signlimit,"Number of sign"=sum(abseffects>signlimit))) } lenthres <- lenth(lm3) lenthres # see that factors with effects larger than lenthres$Signlimit should be assumed # to have effect different from zero effects abs(effects)>lenthres$Signlimit # here only B # How to make a relatively ok Paretoplot for the effects (Mette Langaas) barplot(sort(abs(effects[-1]),decreasing=FALSE),las=1,horiz=TRUE) abline(v=lenthres$Signlimit,col=2,lwd=2) #dev.copy2pdf(file="limaPartoR.pdf") # Or from library BsMD: library(BsMD) LenthPlot(lm3) sig <- which(effects[-1]>lenthres$Signlimit) DanielPlot(lm3,faclab=list(idx=sig,lab=(paste(" ",attr(lm3$terms,"term.labels"))[sig]))) detach("package:BsMD") # Back to FrF2 - some functions of it and BsMD have the same names # Normal plot from FrF2 labels more effects - critical values are determined by simulation instead of using t distribution or table of critical values DanielPlot(plan) # we then go for only modelling main effects lm1 <- lm(y~.,data=plan) summary(lm1) sqrt(summary(lm1)$sigma^2/8) # st. errors shown in summary table sse <- sum((y-lm1$fitted.values)^2) summary(lm1)$sigma^2*(8-4) # the same 8*sum(lm3$coeff[5:8]^2) # this function of removed covariates of full model is equal to sse of reduced model 2*pt(abs(lm3$coeff/(summary(lm1)$sigma/sqrt(8))),8-3-1,lower.tail=FALSE) # p-values, the same as in summary(lm1) for those covariates present there effects <- 2*lm1$coeff effects # model check by residual plots # 1. fitted vs studentized residuals rres <- rstudent(lm1) plot(lm1$fitted,rres) # 2. if (1) strange, each x vs. studentized residuals # hard with -1 and 1 as only values # 3. normality of residuals qqnorm(rres) qqline(rres) library(nortest) ad.test(rstudent(lm1)) # 4 observation order vs. time? # not here? # What if the residuals looked strange? Need transformation of y? library(MASS) boxcox(lm1,plotit=TRUE) # this suggests log # could go further with this - but in our project we have more than 8 observations! # Then, move to more replicates # now, we have two more observation of each factor combination plan <- FrF2(nruns=8,nfactors=3,replications=3,randomize=FALSE) plan y <- c(6,4,10,7,4,3,8,5,7,5,9,7,5,3,7,5,6,5,8,6,4,1,7,4) y plan <- add.response(plan,y) plan lm3r <- lm(y~.^3,data=plan) summary(lm3r) A <- c(-1,1)[plan$A]; B <- c(-1,1)[plan$B]; C <- c(-1,1)[plan$C] # convert to numbers instead of factors lm3rcheck <- lm(y~(A+B+C)^3) summary(lm3rcheck) # yes, same as lm3r (sum(y[A*B==1])-sum(y[A*B==-1]))/length(y) # beta hat for A*B - as shown in summary sum(y[A*B*C==1])-sum(y[A*B*C==-1]) # so third order interaction effect is 0 (by chance) summary(lm3r) anova(lm3r) sum((y-mean(y))^2) # total SS (not in sense of BF) sum((mean(y)-lm3r$fitted.values)^2) # regression SS (not in sense of BF) length(y)*lm3r$coeff[-1]^2 # entries of anova output length(y)*sum(lm3r$coeff[-1]^2) # regression SS sse <- sum((y-lm3r$fitted.values)^2) # SSE - residuals in anova output sse MEPlot(lm3r) IAPlot(lm3r) effects <- 2*lm3r$coeff effects n <- length(y) sest <- summary(lm3r)$sigma sqrt(sse/(n-2^3)) # the same seffect <- 2*sest/sqrt(n) # SD of 2*beta-hat seffect # compare this to Std. Error column in summary(lm3r) to see this is 2*Std.Error # the cut-off for significance is signcut <- qt(0.975,df=n-2^3)*seffect signcut # Pareto-type plot barplot(sort(abs(effects[-1]),decreasing=FALSE),las=1,horiz=TRUE) abline(v=signcut,col=2,lwd=2) #dev.copy2pdf(file="lima3PartoR.pdf") # model check by residual plots # 1. fitted vs studentized residuals rres <- rstudent(lm3r) plot(lm3r$fitted,rres) # dev.copy2pdf(file="lima3res.pdf") # 2 if (1) strange, each x vs. studentized residuals # 3 normality of residuals qqnorm(rres) qqline(rres) # dev.copy2pdf(file="lima3qq.pdf") library(nortest) ad.test(rstudent(lm3r)) boxcox(lm3r,plotit=TRUE) # lambda=1 is within 95%CI # Finally, go back to one replicate. What if ABC is redefined as a block variable instead of an interaction? plan <- FrF2(nruns=8,nfactors=3,randomize=FALSE) y <- c(6,4,10,7,4,3,8,5) plan <- add.response(plan,y) lm3 <- lm(y~.^3,data=plan,x=TRUE) # x=TRUE: Design matrix is generated summary(lm3) # If 10 is added to response whenever ABC = 1: int3 <- lm3$x[,"A1:B1:C1"]==1 # Used design matrix here y y[int3] <- y[int3]+10 y plan <- add.response(plan,y,replace=TRUE) summary(lm(y~.^3,data=plan)) # all other effects as in lm3 # only estimate of A:B:C (and intercept) lost