Section 8.2: Estimating annual average airpollution in England and Wales

Sujit K. Sahu

University of Southampton

2022-03-30

Abstract

This file contains all the code for reproducing the figures and tables for modeling air pollution data from England and Wales as discussed in Section 8.2.

library(bmstdr)
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated
library(akima)
library(ggplot2)
library(rgdal)
library(spdep)
library(broom)
library(tidyr)
library(RColorBrewer)
library(ggpubr)
library(spdep)
library(GGally)
library(geoR)
library(spTimer)
library(sp)
## Note the start time
start.time<-proc.time()[3]
figurepath <- "figures/chap8figures/ewpollution/"
filepath <- "txttables/"
longrun <- FALSE
dpath <- "datafiles/"
load(file=paste0(dpath, "C8ewpollution.RData"))

1 Code to reproduce Figure8.1


sites <- unique(p2011data[, 1:6])
# Knot selection 
delta <- 100000
a <- c(min(sites$easting)-1.5*delta, max(sites$easting)+delta)
b <- c(min(sites$northing)-delta, max(sites$northing)+1.5*delta)
#a
#b
knots.coords <- spT.grid.coords(Lon=a, Lat=b, by=c(15, 15))
dim(knots.coords)
## [1] 225   2
knots <- data.frame(easting=knots.coords[,1], northing=knots.coords[,2])

p1 <-  ggplot(data=engw, aes(x=long, y=lat, group = group)) +
  geom_polygon(colour='black',size=0.8, fill=NA) + 
  geom_point(data=knots, aes(x=easting, y = northing), col="red", inherit.aes = FALSE, size=1.5) +
  labs( title= "Knot locations", x="", y = "") +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
  theme(legend.title = element_blank()) +  
 # theme(legend.position =c(0.1, 0.6))  + 
  ggsn::scalebar(data =engw, dist =100, location = "topleft", transform=F, dist_unit = "km", st.dist = .05, st.size = 5, height = .06, st.bottom=T) +
  ggsn::north(data=engw, location="topright", symbol=12) 
p1


ggsave(filename=paste0(figurepath, "allknotsew.png"))
## Saving 7 x 5 in image

pnts <- knots
colnames(pnts) <- c("long", "lat")
coordinates(pnts)<- ~long + lat
pnts <- SpatialPoints(coords = pnts)
proj4string(pnts) <-  proj4string(ewmap)
## Warning in proj4string(ewmap): CRS object has comment, which is lost in output
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in Proj4
## definition
proj4string(ewmap)
## Warning in proj4string(ewmap): CRS object has comment, which is lost in output
## [1] "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs"
proj4string(pnts)
## Warning in proj4string(pnts): CRS object has comment, which is lost in output
## [1] "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs"
identicalCRS(ewmap, pnts)
## [1] TRUE
a1 <- sp::over(pnts, ewmap, fn = NULL) 
summary(a1)
##    geo_code          geo_label          geo_labelw           label          
##  Length:225         Length:225         Length:225         Length:225        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      name          
##  Length:225        
##  Class :character  
##  Mode  :character
knots.inside <- knots[!is.na(a1$geo_code), ]
dim(knots.inside)
## [1] 58  2
knots.inside.58 <- knots.inside
dim(knots)
## [1] 225   2
head(knots.inside)
##     easting  northing
## 48 208562.4  63229.43
## 51 208562.4 226154.07
## 63 256537.1  63229.43
## 64 256537.1 117537.64
## 66 256537.1 226154.07
## 68 256537.1 334770.50

p2 <-  ggplot(data=engw, aes(x=long, y=lat, group = group)) +
  geom_polygon(colour='black',size=0.8, fill=NA) + 
  geom_point(data=knots.inside.58, aes(x=easting, y = northing), col="red", inherit.aes = FALSE, size=1.5) +
  labs( title= "Knot locations inside the map", x="", y = "") +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
  theme(legend.title = element_blank()) +  
  # theme(legend.position =c(0.1, 0.6))  + 
  ggsn::scalebar(data =engw, dist =100, location = "topleft", transform=F, dist_unit = "km", st.dist = .05, st.size = 5, height = .06, st.bottom=T) +
  ggsn::north(data=engw, location="topright", symbol=12) 
p2


ggsave(filename=paste0(figurepath, "insideknots.png"))
## Saving 7 x 5 in image

2 Reproduce Figure 8.2

head(predgrid)
##   index easting northing  type year month day  aqum_no2
## 1    18  330470   439518 Urban 2011     1   1 25.637844
## 2    18  330470   439518 Urban 2011     1   2 16.279803
## 3    18  330470   439518 Urban 2011     1   3 24.817246
## 4    18  330470   439518 Urban 2011     1   4  8.411372
## 5    18  330470   439518 Urban 2011     1   5 11.198054
## 6    18  330470   439518 Urban 2011     1   6 10.791748
onlygrid <- unique(predgrid[, 1:4])
dim(onlygrid)
## [1] 2124    4

ppred <-  ggplot(data=engw, aes(x=long, y=lat, group = group)) +
  geom_polygon(colour='black',size=0.8, fill=NA) + 
  geom_point(data=onlygrid, aes(x=easting, y = northing,shape=type, col=type),  inherit.aes = FALSE) +
  labs( title= "Prediction locations", x="", y = "") +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
  theme(legend.title = element_blank()) +  
  theme(legend.position =c(0.1, 0.6))  + 
  ggsn::scalebar(data =engw, dist =100, location = "topleft", transform=F, dist_unit = "km", st.dist = .05, st.size = 5, height = .06, st.bottom=T) +
  ggsn::north(data=engw, location="topright", symbol=12) 
ppred


ggsave(paste0(figurepath, "predgridew.png"))
## Saving 7 x 5 in image

knots.coords <- spT.grid.coords(Lon=a, Lat=b, by=c(10, 10))
dim(knots.coords)
knots <- data.frame(easting=knots.coords[,1], northing=knots.coords[,2])
pnts <- knots
colnames(pnts) <- c("long", "lat")
coordinates(pnts)<- ~long + lat
pnts <- SpatialPoints(coords = pnts)
proj4string(pnts) <-  proj4string(ewmap)
proj4string(ewmap)
proj4string(pnts)
identicalCRS(ewmap, pnts)
a1 <- sp::over(pnts, ewmap, fn = NULL) 
summary(a1)
knots.inside <- knots[!is.na(a1$geo_code), ]
dim(knots.inside)
knots.inside.25 <- knots.inside
dim(knots)


pnts <- knots
colnames(pnts) <- c("long", "lat")
coordinates(pnts)<- ~long + lat
pnts <- SpatialPoints(coords = pnts)
proj4string(pnts) <-  proj4string(ewmap)
proj4string(ewmap)
proj4string(pnts)
identicalCRS(ewmap, pnts)
a1 <- sp::over(pnts, ewmap, fn = NULL) 
summary(a1)
knots.inside <- knots[!is.na(a1$geo_code), ]
dim(knots.inside)
knots.inside.58 <- knots.inside
head(knots.inside)


# Another grid size
knots.coords <- spT.grid.coords(Lon=a, Lat=b, by=c(20, 20))
dim(knots.coords)
knots <- data.frame(easting=knots.coords[,1], northing=knots.coords[,2])
pnts <- knots
colnames(pnts) <- c("long", "lat")
coordinates(pnts)<- ~long + lat
pnts <- SpatialPoints(coords = pnts)
proj4string(pnts) <-  proj4string(ewmap)
proj4string(ewmap)
proj4string(pnts)
identicalCRS(ewmap, pnts)
a1 <- sp::over(pnts, ewmap, fn = NULL) #, returnList = TRUE) 
summary(a1)
knots.inside <- knots[!is.na(a1$geo_code), ]
dim(knots.inside)
knots.inside.106 <- knots.inside


f2 <- obs_no2 ~ type + sqrt(aqum_no2) + type:sqrt(aqum_no2) 
set.seed(44)
vs <- sort(sample(nrow(p2011data), 100))
head(p2011data)
M9.25 <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=p2011data, 
              coordtype="utm", coords=4:5, scale.transform = "SQRT", 
              knots.coords=knots.inside.25, n.report=2, validrows=vs)

summary(M9.25)
M9.58 <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=p2011data, 
                 coordtype="utm", coords=4:5, scale.transform = "SQRT", 
                 knots.coords=knots.inside.58, n.report=2, validrows=vs)

summary(M9.58)

M9.106 <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=p2011data, 
                 coordtype="utm", coords=4:5, scale.transform = "SQRT", 
                 knots.coords=knots.inside.106, n.report=2, validrows=vs)

summary(M9.106)
cbind(M9.25$stats, M9.58$stats, M9.106$stats)
if (longrun) {
f2 <- obs_no2 ~ type + sqrt(aqum_no2) + type:sqrt(aqum_no2) 
M9 <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=p2011data, 
              coordtype="utm", coords=4:5, scale.transform = "SQRT", 
              knots.coords=knots.inside.58, n.report=2, N=1000, burn.in = 500)
gpred <- predict(M9$fit, tol.dist =0.0, newdata=predgrid, newcoords=~easting+northing)

summary(M9)

table8.1 <- M9$params
dput(table8.1, file=paste0(filepath, "table8.1.txt"))

modfit <- M9$fit
itmax <- modfit$iterations - modfit$nBurn
dim(gpred$pred.samples)
anmean <- function(x, pattern=predgrid$index){
  a <- as.vector(tapply(x, INDEX=pattern, FUN=mean))
}

amcmc <- apply(gpred$pred.samples, MARGIN = 2, FUN=anmean)
dim(amcmc)
xo <- seq(from=min(allpreds$easting)-500, to = max(allpreds$easting)+500, length=200)
yo <- seq(from=min(allpreds$northing)-500, to = max(allpreds$northing)+500, length=200)

pnts <- expand.grid(surf$x, surf$y)
colnames(pnts) <- c("long", "lat")
coordinates(pnts)<- ~long + lat
pnts <- SpatialPoints(coords = pnts)
proj4string(pnts) <-  proj4string(ewmap)
proj4string(ewmap)
proj4string(pnts)
identicalCRS(ewmap, pnts)
a1 <- sp::over(pnts, ewmap, fn = NULL) #, returnList = TRUE) 
summary(a1)

save(amcmc, xo, yo, a1, file="mcmc_output_files/ewpollution/modfit.RData")
# save(allpreds, predgrid, onlygrid, p2011data, f2, knots.inside.58, ewmap,  engw, file="mcmc_output_files/ewpollution/ewpollution.RData")
} else { 
  load(file="mcmc_output_files/ewpollution/modfit.RData")
}
# Processing to generate the maps 

apreds <- get_validation_summaries(t(amcmc))
head(apreds)
##   meanpred    sdpred medianpred      low       up
## 1 35.89058 0.8876948   35.86375 34.21450 37.62684
## 2 36.86114 0.8539995   36.88744 35.23346 38.41307
## 3 65.56333 1.2224105   65.45216 63.31427 68.11039
## 4 64.72496 1.2085014   64.72130 62.55566 67.11706
## 5 65.33927 1.2915555   65.32116 62.80704 67.89645
## 6 48.95396 1.1651769   49.00960 46.44268 51.13857
summary(apreds$meanpred)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.92   22.15   25.57   27.56   29.51   65.73
summary(apreds$medianpred)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.91   22.14   25.56   27.55   29.49   65.71
summary(apreds)
##     meanpred         sdpred         medianpred         low       
##  Min.   :14.92   Min.   :0.5508   Min.   :14.91   Min.   :13.81  
##  1st Qu.:22.15   1st Qu.:0.6996   1st Qu.:22.14   1st Qu.:20.77  
##  Median :25.57   Median :0.7826   Median :25.56   Median :24.05  
##  Mean   :27.56   Mean   :0.8052   Mean   :27.55   Mean   :26.01  
##  3rd Qu.:29.51   3rd Qu.:0.8878   3rd Qu.:29.49   3rd Qu.:27.83  
##  Max.   :65.73   Max.   :1.3301   Max.   :65.71   Max.   :63.31  
##        up       
##  Min.   :16.01  
##  1st Qu.:23.57  
##  Median :27.09  
##  Mean   :29.14  
##  3rd Qu.:31.22  
##  Max.   :68.29

allpreds <- cbind(onlygrid, apreds)
head(allpreds)
##      index easting northing  type meanpred    sdpred medianpred      low
## 1       18  330470   439518 Urban 35.89058 0.8876948   35.86375 34.21450
## 366     20  412327    93329 Urban 36.86114 0.8539995   36.88744 35.23346
## 731     24  380915   404763   RKS 65.56333 1.2224105   65.45216 63.31427
## 1096    77  508291   177811   RKS 64.72496 1.2085014   64.72130 62.55566
## 1461   149  517490   178015   RKS 65.33927 1.2915555   65.32116 62.80704
## 1826   181  525773   174690 Urban 48.95396 1.1651769   49.00960 46.44268
##            up
## 1    37.62684
## 366  38.41307
## 731  68.11039
## 1096 67.11706
## 1461 67.89645
## 1826 51.13857
surf <- interp(allpreds$easting, allpreds$northing, allpreds$meanpred,  xo=xo, yo=yo)
names(surf)
## [1] "x" "y" "z"
surf <- list(x=surf$x, y=surf$y, z=surf$z)
# v <- pts1[!is.na(a1), ] # Set NA  to outside of the map 
# a1 calculated above 
z <- as.vector(surf$z)
length(z)
## [1] 40000
summary(z)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   14.93   18.71   23.86   24.07   27.11   63.49   15622
z[is.na(a1)] <- NA
z <- matrix(z, ncol=200, nrow=200)

interp1 <- data.frame(long = surf$x, z)
names(interp1)[1:length(surf$y)+1] <- surf$y
interp1 <- gather(interp1, key = lat, value =prediction,-long,convert = TRUE)
zr <- range(interp1$prediction, na.rm=T)

#pcolors <- brewer.pal(8,"Dark2")
pcolors <- brewer.pal(9,"YlOrRd")

pno2 <-  ggplot(data=engw, aes(x=long, y=lat, group = group)) +
  # geom_polygon(colour='blue',size=1, fill=NA) +  
  scale_fill_gradientn(colours=pcolors, na.value="gray95", limits=zr) +
  geom_raster(data=interp1, aes(x = long, y =lat,fill =prediction), inherit.aes = FALSE, interpolate = T) + 
  labs( title= "Annual NO2 levels in 2011", x="", y = "") +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
  theme(legend.title = element_blank()) +  
  theme(legend.position =c(0.1, 0.8))  + 
  ggsn::north(data=engw, location="topright", symbol=12) 
pno2


ggsave(filename=paste0(figurepath, "2011_no2.png"))
## Saving 7 x 5 in image

# Repeat for sd 
head(allpreds)
##      index easting northing  type meanpred    sdpred medianpred      low
## 1       18  330470   439518 Urban 35.89058 0.8876948   35.86375 34.21450
## 366     20  412327    93329 Urban 36.86114 0.8539995   36.88744 35.23346
## 731     24  380915   404763   RKS 65.56333 1.2224105   65.45216 63.31427
## 1096    77  508291   177811   RKS 64.72496 1.2085014   64.72130 62.55566
## 1461   149  517490   178015   RKS 65.33927 1.2915555   65.32116 62.80704
## 1826   181  525773   174690 Urban 48.95396 1.1651769   49.00960 46.44268
##            up
## 1    37.62684
## 366  38.41307
## 731  68.11039
## 1096 67.11706
## 1461 67.89645
## 1826 51.13857
surf <- interp(allpreds$easting, allpreds$northing, allpreds$sdpred,  xo=xo, yo=yo)
names(surf)
## [1] "x" "y" "z"
#names(surf)
surf <- list(x=surf$x, y=surf$y, z=surf$z)
z <- as.vector(surf$z)
z[is.na(a1)] <- NA
z <- matrix(z, ncol=200, nrow=200)

interp1 <- data.frame(long = surf$x, z)
names(interp1)[1:length(surf$y)+1] <- surf$y
interp1 <- gather(interp1, key = lat, value =sd,-long,convert = TRUE)
zr <- range(interp1$sd, na.rm=T)


psd <-  ggplot(data=engw, aes(x=long, y=lat, group = group)) +
  # geom_polygon(colour='blue',size=1, fill=NA) +  
  scale_fill_gradientn(colours=pcolors, na.value="gray95", limits=zr) +
  geom_raster(data=interp1, aes(x = long, y =lat,fill =sd), inherit.aes = FALSE, interpolate = T) + 
  labs( title= "Sd of annual NO2 levels in 2011", x="", y = "") +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
  theme(legend.title = element_blank()) +  
  theme(legend.position =c(0.1, 0.8))  + 
  ggsn::north(data=engw, location="topright", symbol=12) 
psd

  
ggsave(filename=paste0(figurepath, "2011_no2_sd.png"))
## Saving 7 x 5 in image
# All done 
end.time <- proc.time()[3]
comp.time<- (end.time-start.time)/60
# comp.time<-fancy.time(comp.time)
print(comp.time)
##   elapsed 
## 0.3833333