Multiple regression

An example:

Midwestern region basemap shapefile components:

[midwotl.dbf] [midwotl.shp] [midwotl.shx]

# Midwest modern pollen and climate data
library(maptools)
library(gpclib)
library(RColorBrewer)
library(spdep)
library(leaps)
attach(midwtf2)

# read and plot shape file
midwotl.shp <- readShapeLines(file.choose(),
     proj4string=CRS("+proj=longlat"))
plot(midotl.shp)


# get point locations
midwtf2.loc=cbind(longitud, latitude)
points(midwtf2.loc)

# distance neighbors
d <- 100 # points are neighbors if locations are within d km of one another
midwtf2.neighbors.dist <- dnearneigh(midwtf2.loc, 0, d, longlat=TRUE)
plot(midwtf2.neighbors.dist, midwtf2.loc, add=TRUE, col="magenta")


# examine the data set
# plot lat, lon and climate variables
plot(midwtf2[,2:6])

# plot July temperature and selected pollen types
plot(midwtf2[,c(4,7,10,13,21)])

# plot July temperature and transformed pollen
plot(midwtf2[,c(4,22,25,28,36)])


# map tmeanjul
varname="tmeanjul"
plotvar <- tmeanjul
plotvar <- (plotvar-mean(plotvar))/sd(plotvar)
nclr <- 8
plotclr <- brewer.pal(nclr,"PuOr")
plotclr <- plotclr[nclr:1] # reorder colors
colornum <- cut(plotvar, c(-4:4), labels=FALSE)
brks <- c(-4:4)
color <- plotclr[colornum]
plot(midwotl.shp)
points(longitud, latitude, pch=ifelse(plotvar > 0,24,25),
    bg=color, cex=1.5)
legend(locator(1), legend=leglabs(brks), fill=plotclr, cex=0.6, bty="n")


# look at spatial autocorrelation of dependent variable (tmeanjul)
moran.test(tmeanjul, zero.policy=T,
     nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))


# naive model
tmeanjul.lm0 <- lm(tmeanjul ~ Picea + Abies + Juniper
   + Pinus + Betula + Fraxinus + Quercus + Ulmus
   + Acer + Juglans + Carya + Tsuga + Fagus
   + Alnus + Herbsum)


summary(tmeanjul.lm0)
oldpar <- par(mfrow = c(2, 2))
plot(tmeanjul.lm0)
par(oldpar)

# map residuals
varname="residuals: tmeanjul.lm0"
plotvar <- residuals(tmeanjul.lm0)
plotvar <- (plotvar-mean(plotvar))/sd(plotvar)
nclr <- 8
plotclr <- brewer.pal(nclr,"PuOr")
plotclr <- plotclr[nclr:1] # reorder colors
colornum <- cut(plotvar, c(-4:4), labels=FALSE)
brks <- c(-4:4)
color <- plotclr[colornum]
plot(midwotl.shp)
points(longitud, latitude, pch=ifelse(plotvar > 0,24,25),
    bg=color, cex=1.5)
legend(locator(1), legend=leglabs(brks), fill=plotclr, cex=0.6, bty="n")

# Moran test
moran.test(residuals(tmeanjul.lm0), zero.policy=T,
     nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))


# a second model, transformed predictors
tmeanjul.lm1 <- lm(tmeanjul ~ Picea.50 + Abies.100 + Junip.100
    + Pinus.100 + Betula.50 + Frax.15 + Querc.25 + Ulmus.25
    + Acer.100 + Juglans.25 + Carya.25 + Tsuga.100 + Fagus.100
    + Alnus.100 + Herbs.25)


summary(tmeanjul.lm1)
oldpar <- par(mfrow = c(2, 2))
plot(tmeanjul.lm1)
par(oldpar)


# map residuals
varname="residuals: tmeanjul.lm1"
plotvar <- residuals(tmeanjul.lm1)
plotvar <- (plotvar-mean(plotvar))/sd(plotvar)
nclr <- 8
plotclr <- brewer.pal(nclr,"PuOr")
plotclr <- plotclr[nclr:1] # reorder colors
colornum <- cut(plotvar, c(-4:4), labels=FALSE)
brks <- c(-4:4)
color <- plotclr[colornum]
plot(midwotl.shp)
points(longitud, latitude, pch=ifelse(plotvar > 0,24,25),
    bg=color, cex=1.5)
legend(locator(1), legend=leglabs(brks), fill=plotclr, cex=0.6, bty="n")


# Moran test
moran.test(residuals(tmeanjul.lm1), zero.policy=T,
     nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))


# another model, all possible subsets regression
tmeanjul.lm2 <- regsubsets(tmeanjul ~ Picea.50 + Abies.100 + Junip.100
+ Pinus.100 + Betula.50 + Frax.15 + Querc.25 + Ulmus.25
+ Acer.100 + Juglans.25 + Carya.25 + Tsuga.100 + Fagus.100
+ Alnus.100 + Herbs.25, data=midwtf2)
summary(tmeanjul.lm2)


# rerun one of the best-subsets regressions
tmeanjul.lm3 <- lm(tmeanjul ~ Picea.50 + Abies.100 + Betula.50 + Fraxinus
     + Querc.25 + Fagus.100 + Herbs.25)

summary(tmeanjul.lm3)
oldpar <- par(mfrow = c(2, 2))
plot(tmeanjul.lm1)
par(oldpar)

# map residuals
varname="residuals: tmeanjul.lm3"
plotvar <- residuals(tmeanjul.lm3)
plotvar <- (plotvar-mean(plotvar))/sd(plotvar)
nclr <- 8
plotclr <- brewer.pal(nclr,"PuOr")
plotclr <- plotclr[nclr:1] # reorder colors
colornum <- cut(plotvar, c(-4:4), labels=FALSE)
brks <- c(-4:4)
color <- plotclr[colornum]
plot(midwotl.shp)
points(longitud, latitude, pch=ifelse(plotvar > 0,24,25),
bg=color, cex=1.5)
legend(locator(1), legend=leglabs(brks), fill=plotclr, cex=0.6, bty="n")

# Moran test
moran.test(residuals(tmeanjul.lm3), zero.policy=T,
    nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))


# compare Moran tests

# dependent variable (tmeanjul)
moran.test(tmeanjul, zero.policy=T,
     nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))


# lm0 (naive model) no transformation, all predictors included
moran.test(residuals(tmeanjul.lm0), zero.policy=T,
    nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))


# lm1 transformed predictors, all predictors included
moran.test(residuals(tmeanjul.lm1), zero.policy=T,
    nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))


# lm3 transformed predictors, best subset model
moran.test(residuals(tmeanjul.lm3), zero.policy=T,
    nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))