Multiple regression
An example:
Midwestern region basemap shapefile components:
[midwotl.dbf] [midwotl.shp] [midwotl.shx]
# Midwest modern pollen and climate data
library(maptools)
library(gpclib)
library(RColorBrewer)
library(spdep)
library(leaps)
attach(midwtf2)
# read and plot shape file
midwotl.shp <- readShapeLines(file.choose(),
proj4string=CRS("+proj=longlat"))
plot(midotl.shp)
# get point locations
midwtf2.loc=cbind(longitud, latitude)
points(midwtf2.loc)
# distance neighbors
d <- 100 # points are neighbors if locations are within d km of one another
midwtf2.neighbors.dist <- dnearneigh(midwtf2.loc, 0, d, longlat=TRUE)
plot(midwtf2.neighbors.dist, midwtf2.loc, add=TRUE, col="magenta")
# examine the data set
# plot lat, lon and climate variables
plot(midwtf2[,2:6])
# plot July temperature and selected pollen types
plot(midwtf2[,c(4,7,10,13,21)])
# plot July temperature and transformed pollen
plot(midwtf2[,c(4,22,25,28,36)])
# map tmeanjul
varname="tmeanjul"
plotvar <- tmeanjul
plotvar <- (plotvar-mean(plotvar))/sd(plotvar)
nclr <- 8
plotclr <- brewer.pal(nclr,"PuOr")
plotclr <- plotclr[nclr:1] # reorder colors
colornum <- cut(plotvar, c(-4:4), labels=FALSE)
brks <- c(-4:4)
color <- plotclr[colornum]
plot(midwotl.shp)
points(longitud, latitude, pch=ifelse(plotvar > 0,24,25),
bg=color, cex=1.5)
legend(locator(1), legend=leglabs(brks), fill=plotclr, cex=0.6, bty="n")
# look at spatial autocorrelation of dependent variable (tmeanjul)
moran.test(tmeanjul, zero.policy=T,
nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))
# naive model
tmeanjul.lm0 <- lm(tmeanjul ~ Picea + Abies + Juniper
+ Pinus + Betula + Fraxinus + Quercus + Ulmus
+ Acer + Juglans + Carya + Tsuga + Fagus
+ Alnus + Herbsum)
summary(tmeanjul.lm0)
oldpar <- par(mfrow = c(2, 2))
plot(tmeanjul.lm0)
par(oldpar)
# map residuals
varname="residuals: tmeanjul.lm0"
plotvar <- residuals(tmeanjul.lm0)
plotvar <- (plotvar-mean(plotvar))/sd(plotvar)
nclr <- 8
plotclr <- brewer.pal(nclr,"PuOr")
plotclr <- plotclr[nclr:1] # reorder colors
colornum <- cut(plotvar, c(-4:4), labels=FALSE)
brks <- c(-4:4)
color <- plotclr[colornum]
plot(midwotl.shp)
points(longitud, latitude, pch=ifelse(plotvar > 0,24,25),
bg=color, cex=1.5)
legend(locator(1), legend=leglabs(brks), fill=plotclr, cex=0.6, bty="n")
# Moran test
moran.test(residuals(tmeanjul.lm0), zero.policy=T,
nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))
# a second model, transformed predictors
tmeanjul.lm1 <- lm(tmeanjul ~ Picea.50 + Abies.100 + Junip.100
+ Pinus.100 + Betula.50 + Frax.15 + Querc.25 + Ulmus.25
+ Acer.100 + Juglans.25 + Carya.25 + Tsuga.100 + Fagus.100
+ Alnus.100 + Herbs.25)
summary(tmeanjul.lm1)
oldpar <- par(mfrow = c(2, 2))
plot(tmeanjul.lm1)
par(oldpar)
# map residuals
varname="residuals: tmeanjul.lm1"
plotvar <- residuals(tmeanjul.lm1)
plotvar <- (plotvar-mean(plotvar))/sd(plotvar)
nclr <- 8
plotclr <- brewer.pal(nclr,"PuOr")
plotclr <- plotclr[nclr:1] # reorder colors
colornum <- cut(plotvar, c(-4:4), labels=FALSE)
brks <- c(-4:4)
color <- plotclr[colornum]
plot(midwotl.shp)
points(longitud, latitude, pch=ifelse(plotvar > 0,24,25),
bg=color, cex=1.5)
legend(locator(1), legend=leglabs(brks), fill=plotclr, cex=0.6, bty="n")
# Moran test
moran.test(residuals(tmeanjul.lm1), zero.policy=T,
nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))
# another model, all possible subsets regression
tmeanjul.lm2 <- regsubsets(tmeanjul ~ Picea.50 + Abies.100 + Junip.100
+ Pinus.100 + Betula.50 + Frax.15 + Querc.25 + Ulmus.25
+ Acer.100 + Juglans.25 + Carya.25 + Tsuga.100 + Fagus.100
+ Alnus.100 + Herbs.25, data=midwtf2)
summary(tmeanjul.lm2)
# rerun one of the best-subsets regressions
tmeanjul.lm3 <- lm(tmeanjul ~ Picea.50 + Abies.100 + Betula.50 + Fraxinus
+ Querc.25 + Fagus.100 + Herbs.25)
summary(tmeanjul.lm3)
oldpar <- par(mfrow = c(2, 2))
plot(tmeanjul.lm1)
par(oldpar)
# map residuals
varname="residuals: tmeanjul.lm3"
plotvar <- residuals(tmeanjul.lm3)
plotvar <- (plotvar-mean(plotvar))/sd(plotvar)
nclr <- 8
plotclr <- brewer.pal(nclr,"PuOr")
plotclr <- plotclr[nclr:1] # reorder colors
colornum <- cut(plotvar, c(-4:4), labels=FALSE)
brks <- c(-4:4)
color <- plotclr[colornum]
plot(midwotl.shp)
points(longitud, latitude, pch=ifelse(plotvar > 0,24,25),
bg=color, cex=1.5)
legend(locator(1), legend=leglabs(brks), fill=plotclr, cex=0.6, bty="n")
# Moran test
moran.test(residuals(tmeanjul.lm3), zero.policy=T,
nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))
# compare Moran tests
# dependent variable (tmeanjul)
moran.test(tmeanjul, zero.policy=T,
nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))
# lm0 (naive model) no transformation, all predictors included
moran.test(residuals(tmeanjul.lm0), zero.policy=T,
nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))
# lm1 transformed predictors, all predictors included
moran.test(residuals(tmeanjul.lm1), zero.policy=T,
nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))
# lm3 transformed predictors, best subset model
moran.test(residuals(tmeanjul.lm3), zero.policy=T,
nb2listw(midwtf2.neighbors.dist, zero.policy=T, style="W"))