Chapter 10: Bayesian modeling for areal unit data

Sujit K. Sahu

University of Southampton

2022-07-18

Abstract

This file contains all the code for reproducing the figures in Chapter 10.

library(bmstdr)
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated
library(ggplot2)
library(RColorBrewer)
library(ggpubr)
library(GGally)
library(doBy)
# Download mapfiles and data files from https://github.com/sujit-sahu/bookbmstdr
## Note the start time
start.time<-proc.time()[3]
figurepath <- "figures/chap10figures/"
filepath <- "txttables/"
load(file="mapfiles/englamap.rda")
load(file="mapfiles/engregmap.rda")
dpath <- "datafiles/" # Folder containing the data files 
longrun <- FALSE  # Should we run the model to check the results. 
# If FALSE we use the results, e.g. tables, figures and model fits from previous runs
# If TRUE it takes more than two hours to run the models. 

set.seed(44)
vs <- sample(nrow(engdeaths), 0.1 * nrow(engdeaths))
set.seed(44)
vst <- sample(nrow(engtotals), 0.1 * nrow(engtotals))

if (longrun) {
 N <- 120000
 burn.in <- 20000
 thin <- 10
 
if (!file.exists(figurepath)) {
  dir.create(figurepath)
}  

if (!file.exists(filepath)) {
  dir.create(filepath)
}  
 dir.create("engdeaths/")
 
} 

1 Code to reproduce Figure10.1


comparelinks <- function()
{
  x <- seq(from = -5., to = 5., length = 200)
  y <- pnorm(x)
  z <- 1./(1. + exp( - x))
  compl <- 1- exp(-exp(x))
  plot(x, y, type = "l", main = "Comparison of Probit, Logit and Complementary log log links", col=2)
  lines(x, z, lty = 2., col=4)
  lines(x, compl, lty=3, col=6)
  legend(-4., 0.8, legend = c("Probit", "Logit", "cloglog"),
         lty = c(1., 2., 3), col=c(2, 4, 6))
  
  k <- length(x)
  ldata <- data.frame(x=rep(x, 3), 
                      link=c(rep("Logit", k), rep("Probit", k),rep("Loglog", k)),   
                      values=c(z, y, compl), 
                      ltype=rep(1:3, each=k))
  ldata$link <- as.factor(ldata$link)
  ldata$link <- factor(ldata$link, levels=levels(ldata$link))
  ldata$ltype <- as.factor(ldata$ltype)
  head(ldata)
  linkplot <- ggplot() + 
    geom_line(data=ldata, aes(x=x, y=values, group=link, 
                              color=link, linetype=link), size=1) +
    labs(x ="Linear predictor", y = "Probability")+
    theme(legend.position=c(0.20, 0.75)) #+ 
  #  guides(guide_legend(reverse=TRUE))
  linkplot
}
linkplot <- comparelinks()

ggsave(filename=paste0(figurepath, "link_functions.png"))
## Saving 7 x 5 in image
f1 <-  noofhighweeks ~ jsa + log10(houseprice) + log(popdensity) + sqrt(no2) 
## independent logistic regression 
if (longrun) {
# nweek <- engtotals$nweek
M1 <- Bcartime(formula=f1,   data=engtotals, family="binomial", trials="nweek", N=N, burn.in = burn.in, thin=thin, verbose = FALSE) 

DIC <- c(M1$mchoice[2], M1$mchoice[1], rep(NA, 5))
WAIC <- c(M1$mchoice[4], M1$mchoice[3], rep(NA, 5))
table10.1 <- rbind(M1$params, DIC, WAIC)
# xtable(table10.1, digits=3)
dput(table10.1, file=paste0(filepath, "table10.1.txt"))
M1.v <- Bcartime(formula=f1,   data=engtotals, family="binomial", trials="nweek", N=N, burn.in = burn.in, thin=thin, verbose = FALSE, 
      validrows=vst) 
vresult10.1 <- M1.v$stats
dput(vresult10.1, file=paste0(filepath, "validationstats_table10.1.txt"))
summary(M1.v)
} else {
  table10.1 <- dget(file=paste0(filepath, "table10.1.txt"))
  vresult10.1 <- dget(file=paste0(filepath, "validationstats_table10.1.txt"))
}
round(table10.1, 3)
##                     Mean     2.5% 97.5% n.sample % accept n.effective
## (Intercept)       -1.223   -2.995 0.557    10000     43.1      2759.3
## jsa                0.206   -0.087 0.495    10000     43.1      2575.7
## log10(houseprice) -0.100   -0.450 0.249    10000     43.1      2555.4
## log(popdensity)    0.083    0.036 0.131    10000     43.1      2643.1
## sqrt(no2)          0.127    0.024 0.232    10000     43.1      3184.3
## DIC                4.958 1503.967    NA       NA       NA          NA
## WAIC               6.233 1505.363    NA       NA       NA          NA
##                   Geweke.diag
## (Intercept)              -0.9
## jsa                       1.3
## log10(houseprice)         1.0
## log(popdensity)           0.5
## sqrt(no2)                -1.7
## DIC                        NA
## WAIC                       NA
print(vresult10.1)
## $rmse
## [1] 2.469165
## 
## $mae
## [1] 1.709677
## 
## $crps
## [1] 1.219016
## 
## $cvg
## [1] 93.54839
## independent Poisson regression
f2 <-  covid ~ offset(logEdeaths) + jsa + log10(houseprice) + log(popdensity) + sqrt(no2) 
if (longrun) {
  
M2 <- Bcartime(formula=f2,   data=engtotals, family="poisson", 
               N=N, burn.in = burn.in, thin=thin, verbose = FALSE) 

table10.2 <- M2$params
DIC <- c(M2$mchoice[2], M2$mchoice[1], rep(NA, 5))
WAIC <- c(M2$mchoice[4], M2$mchoice[3], rep(NA, 5))
table10.2 <- rbind(M2$params, DIC, WAIC)
# xtable(table10.1, digits=3)
dput(table10.2, file=paste0(filepath, "table10.2.txt"))

M2.v <- Bcartime(formula=f2,   data=engtotals, family="poisson", 
               N=N, burn.in = burn.in, thin=thin, verbose = FALSE, validrows = vst) 
vresult10.2 <- M2.v$stats
dput(vresult10.2, file=paste0(filepath, "validationstats_table10.2.txt"))
} else {
  table10.2 <- dget(file=paste0(filepath, "table10.2.txt"))
  vresult10.2 <- dget(file=paste0(filepath, "validationstats_table10.2.txt"))
}
round(table10.2, 3)
##                     Mean     2.5%  97.5% n.sample % accept n.effective
## (Intercept)       -2.848   -3.133 -2.569    10000       44      2641.6
## jsa                0.204    0.160  0.248    10000       44      2432.4
## log10(houseprice)  0.325    0.269  0.381    10000       44      2436.0
## log(popdensity)    0.100    0.091  0.108    10000       44      2291.7
## sqrt(no2)          0.086    0.070  0.102    10000       44      3064.5
## DIC                4.903 5430.202     NA       NA       NA          NA
## WAIC              57.931 5485.221     NA       NA       NA          NA
##                   Geweke.diag
## (Intercept)               0.4
## jsa                       0.3
## log10(houseprice)        -0.3
## log(popdensity)           1.2
## sqrt(no2)                -1.5
## DIC                        NA
## WAIC                       NA
print(vresult10.2)
## $rmse
## [1] 58.07642
## 
## $mae
## [1] 34.35484
## 
## $crps
## [1] 5.851127
## 
## $cvg
## [1] 54.83871
## independent Poisson regression
f3 <-  sqrt(no2) ~  jsa + log10(houseprice) + log(popdensity) 

if (longrun) {
  M3 <- Bcartime(formula=f3,   data=engtotals, family="gaussian", 
               N=N, burn.in = burn.in , thin=thin, verbose = FALSE) 
DIC <- c(M3$mchoice[2], M3$mchoice[1], rep(NA, 5))
WAIC <- c(M3$mchoice[4], M3$mchoice[3], rep(NA, 5))
table10.3 <- rbind(M3$params, DIC, WAIC)
M3.v <- Bcartime(formula=f3,   data=engtotals, family="gaussian", 
               N=N, burn.in = burn.in , thin=thin, verbose = FALSE, 
               validrows=vst) 

#rownames(table10.4)[9:10] <- c("DIC", "WAIC")
dput(table10.3, file=paste0(filepath, "table10.3.txt"))
vresult10.3 <- M3.v$stats
dput(vresult10.3, file=paste0(filepath, "validationstats_table10.3.txt"))
} else {
  table10.3 <- dget(file=paste0(filepath, "table10.3.txt"))
   vresult10.3 <- dget(file=paste0(filepath, "validationstats_table10.3.txt"))
}
round(table10.3, 3)
##                     Mean    2.5%  97.5% n.sample % accept n.effective
## (Intercept)       -2.898  -4.856 -0.969    10000      100      9760.0
## jsa                0.044  -0.286  0.368    10000      100      9391.8
## log10(houseprice)  1.016   0.647  1.392    10000      100      9746.7
## log(popdensity)    0.168   0.119  0.216    10000      100      9810.3
## nu2                0.265   0.227  0.310    10000      100     10625.3
## DIC                4.980 473.439     NA       NA       NA          NA
## WAIC               6.004 474.641     NA       NA       NA          NA
##                   Geweke.diag
## (Intercept)              -0.1
## jsa                       0.0
## log10(houseprice)         0.1
## log(popdensity)           0.4
## nu2                       0.1
## DIC                        NA
## WAIC                       NA
print(vresult10.3)
## $rmse
## [1] 0.5754245
## 
## $mae
## [1] 0.4407268
## 
## $crps
## [1] 0.3388509
## 
## $cvg
## [1] 90.32258

f1 <-  noofhighweeks ~ jsa + log10(houseprice) + log(popdensity) + sqrt(no2)
if (longrun) { 
M1.leroux <- Bcartime(formula=f1, data=engtotals, scol="spaceid", W=Weng, 
                      family="binomial", trials="nweek", model="leroux", 
                      N=N, burn.in = burn.in , thin=thin, verbose = FALSE)

M1.bym <- Bcartime(formula=f1, data=engtotals, scol="spaceid", W=Weng, 
              family="binomial", trials="nweek", model="bym", 
              N=N, burn.in = burn.in , thin=thin, verbose = FALSE)

u <- M1.leroux$params[, 1:3]
v <- M1.bym$params[, 1:3]
dim(u)
dim(v)
u
v
w <- cbind.data.frame(u, v) # Now repair 
dim(w)
w
k <- nrow(w)
w[k+1, ] <- w[k, ]
w[k, 4:6] <- NA  ## Remove the rho for BYM model 
w[k+1, 1:3] <- NA ## Remove the Sigma^2 for Leroux 
rownames(w)[k+1] <- "sigma2"
w

DIC <- c(M1.leroux$mchoice[2], M1.leroux$mchoice[1], NA, M1.bym$mchoice[2], M1.bym$mchoice[1], NA)
WAIC <- c(M1.leroux$mchoice[4], M1.leroux$mchoice[3], NA, M1.bym$mchoice[4], M1.bym$mchoice[3], NA)
table10.4 <- rbind(w, DIC, WAIC)
rownames(table10.4)[9:10] <- c("DIC", "WAIC")
# xtable(10.4, digits=3)
dput(table10.4, file=paste0(filepath, "table10.4.txt"))
M1.leroux.v <- Bcartime(formula=f1, data=engtotals, scol="spaceid", W=Weng, 
                      family="binomial", trials="nweek", model="leroux", 
                      N=N, burn.in = burn.in , thin=thin, verbose = FALSE, validrows=vst)
vresult10.4 <- M1.leroux.v$stats
dput(vresult10.4, file=paste0(filepath, "validationstats_table10.4.txt"))

} else {
  table10.4 <- dget(file=paste0(filepath, "table10.4.txt"))
  vresult10.4 <- dget(file=paste0(filepath, "validationstats_table10.4.txt"))
}
round(table10.4, 3)
print(vresult10.4)
f2 <-  covid ~ offset(logEdeaths) + jsa + log10(houseprice) + log(popdensity) + sqrt(no2) 

if (longrun) {

M2.leroux <- Bcartime(formula=f2, data=engtotals,
            scol="spaceid",  model="leroux",  family="poisson", W=Weng,
            N=N, burn.in = burn.in, thin=thin, verbose=FAlSE)

M2.bym <- Bcartime(formula=f2, data=engtotals,
                   scol="spaceid",  model="bym",  family="poisson", W=Weng,
                   N=N, burn.in = burn.in, thin=thin, verbose=FALSE)
u <- M2.leroux$params[, 1:3]
v <- M2.bym$params[, 1:3]
dim(u)
dim(v)
u
v
w <- cbind.data.frame(u, v) # Now repair 
dim(w)
w
k <- nrow(w)
w[k+1, ] <- w[k, ]
w[k, 4:6] <- NA  ## Remove the rho for BYM model 
w[k+1, 1:3] <- NA ## Remove the Sigma^2 for Leroux 
rownames(w)[k+1] <- "sigma2"
w

DIC <- c(M2.leroux$mchoice[2], M2.leroux$mchoice[1], NA, M2.bym$mchoice[2], M2.bym$mchoice[1], NA)
WAIC <- c(M2.leroux$mchoice[4], M2.leroux$mchoice[3], NA, M2.bym$mchoice[4], M2.bym$mchoice[3], NA)
table10.5 <- rbind(w, DIC, WAIC)
rownames(table10.5)[9:10] <- c("DIC", "WAIC")
dput(table10.5, file=paste0(filepath, "table10.5.txt"))
M2.leroux.v <- Bcartime(formula=f2, data=engtotals,
            scol="spaceid",  model="leroux",  family="poisson", W=Weng,
            N=N, burn.in = burn.in, thin=thin, verbose=FALSE, 
            validrows=vst)
vresult10.5 <- M2.leroux.v$stats
dput(vresult10.5, file=paste0(filepath, "validationstats_table10.5.txt"))

} else {
  table10.5 <- dget(file=paste0(filepath, "table10.5.txt"))
  vresult10.5 <- dget(file=paste0(filepath, "validationstats_table10.5.txt"))
}
round(table10.5, 3)
print(vresult10.5 )
f3 <-  sqrt(no2) ~  jsa + log10(houseprice) + log(popdensity) 

if (longrun) { 
M3.leroux <- Bcartime(formula=f3, data=engtotals, scol="spaceid", W=Weng, 
                      family="gaussian", model="leroux")
w <- data.frame(M3.leroux$params[, 1:3])
dim(w)
w
k <- nrow(w)
DIC <- round(c(M3.leroux$mchoice[2], M3.leroux$mchoice[1], NA), 1)
WAIC <- round(c(M3.leroux$mchoice[4], M3.leroux$mchoice[3], NA), 1)
w[k+1, ] <- DIC
w[k+2, ] <- WAIC
table10.6 <- w
dput(table10.6, file=paste0(filepath, "table10.6.txt"))
} else {
  table10.6 <- dget(file=paste0(filepath, "table10.6.txt"))
}
round(table10.6, 3)
# remove fitted model object 
rm(list=ls(pattern="M"))

f1 <- highdeathsmr ~  jsa + log10(houseprice) + log(popdensity) 
scol <- "spaceid"
tcol <-  "Weeknumber"
engdeaths$nweek <- rep(1, nrow(engdeaths))

if (longrun) {
M1st_linear <- Bcartime(formula=f1, data=engdeaths, scol=scol, tcol=tcol, trials="nweek", 
               W=Weng, model="linear", family="binomial", package="CARBayesST", 
               N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
# summary(M1st_linear)


M1st_anova <- Bcartime(formula=f1, data=engdeaths, scol=scol, tcol=tcol, trials="nweek", 
                        W=Weng, model="anova", family="binomial", package="CARBayesST", 
                       N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
# summary(M1st_anova)
# names(M1st_anova)
M1st_anova_nointer <- Bcartime(formula=f1, data=engdeaths, scol=scol, tcol=tcol, trials="nweek", 
                       W=Weng, model="anova", interaction=FALSE, family="binomial", 
                       package="CARBayesST", N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
#summary(M1st_anova_nointer)


M1st_sepspat <- Bcartime(formula=f1, data=engdeaths, scol=scol, tcol=tcol, trials="nweek", 
                       W=Weng, model="sepspatial", family="binomial", 
                       package="CARBayesST", N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
# summary(M1st_sepspat)


M1st_ar <- Bcartime(formula=f1, data=engdeaths, scol=scol, tcol=tcol, trials="nweek", 
                         W=Weng, model="ar", AR=1, family="binomial", package="CARBayesST", 
                    N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
#summary(M1st_ar)

u <- rbind(M1st_linear$mchoice, M1st_anova$mchoice, M1st_anova_nointer$mchoice, 
           M1st_sepspat$mchoice, M1st_ar$mchoice)
rownames(u) <- c("Linear", "AnovaI", "AnovanoI", "Separable", "AR (1)")
table10.7 <- u[, c(2, 1, 4, 3)]
dput(table10.7, file=paste0(filepath, "table10.7.txt"))
table10.8 <- M1st_ar$params[, 1:3]
dput(table10.8, file=paste0(filepath, "table10.8.txt"))
} else {
  table10.7 <- dget(file=paste0(filepath, "table10.7.txt"))
  table10.8 <- dget(file=paste0(filepath, "table10.8.txt"))
}
round(table10.7, 1)
round(table10.8, 3)

f2 <-  covid ~ offset(logEdeaths) + jsa + log10(houseprice) + log(popdensity) + n0 + n1 + n2 + n3

scol <- "spaceid"
tcol <-  "Weeknumber"

if (longrun) {
M2st_linear <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol,  
               W=Weng, model="linear", family="poisson", package="CARBayesST", 
               N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
# summary(M2st_linear)


M2st_anova <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol,  
                        W=Weng, model="anova", family="poisson", package="CARBayesST", 
                       N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
# summary(M2st_anova)


M2st_anova_nointer <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol,  
                       W=Weng, model="anova",interaction=FALSE, family="poisson", 
                       package="CARBayesST", N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
# summary(M2st_anova_nointer)


M2st_sepspat <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol, 
               W=Weng, model="sepspatial",family="poisson", package="CARBayesST", 
               N=N, burn.in=burn.in, thin=thin, verbose=F)
# summary(M2st_sepspat)

M2st_ar <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol,  
                    W=Weng, model="ar", family="poisson", package="CARBayesST", 
                    N=N, burn.in=burn.in, thin=thin, verbose=F)

M2st_ar2 <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol,  
                    W=Weng, model="ar", AR=2, family="poisson", package="CARBayesST", 
                    N=N, burn.in=burn.in, thin=thin, verbose=FALSE)

# summary(M2st_ar)
u <- rbind(M2st_linear$mchoice, M2st_anova$mchoice, M2st_anova_nointer$mchoice, 
           M2st_sepspat$mchoice, M2st_ar$mchoice, M2st_ar2$mchoice)

rownames(u) <- c("Linear", "AnovaI", "AnovanoI", "Separable", "AR (1)", "AR (2)")

table10.9 <- u[, c(2, 1, 4, 3)]
table10.10 <- M2st_sepspat$params[, 1:3]

dput(table10.9, file=paste0(filepath, "table10.9.txt"))
dput(table10.10, file=paste0(filepath, "table10.10.txt"))
} else {
  table10.9 <- dget(file=paste0(filepath, "table10.9.txt"))
  table10.10 <- dget(file=paste0(filepath, "table10.10.txt"))
}
round(table10.9, 1)
round(table10.10, 3)


if (longrun) { 
  
M2st_ar2.0 <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol,  
                    W=Weng, model="ar", AR=2, family="poisson", package="CARBayesST", 
                    N=N, burn.in=burn.in, thin=thin, verbose=FALSE,
                    validrows=vs)
  
M2st_anova.0 <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol,  
                       W=Weng, model="anova", family="poisson", package="CARBayesST", 
                       N=N, burn.in=burn.in, thin=thin, verbose=FALSE,validrows=vs)

table10.11 <- rbind(Anova=unlist(M2st_anova.0$stats), 
                      AR = unlist(M2st_ar2.0$stats)) 
dput(table10.11, file=paste0(filepath, "table10.11.txt"))
} else {
  table10.11 <- dget(file=paste0(filepath, "table10.11.txt"))
}
round(table10.11, 2)

2 Table 10.14


if (longrun) {


vs <- sample(nrow(engdeaths), 0.1 * nrow(engdeaths))

f2 <-  covid ~ offset(logEdeaths) + jsa + log10(houseprice) + log(popdensity) + n0 + n1 +  n2 + n3

scol <- "spaceid"
tcol <-  "Weeknumber"

M2st_ar2 <- Bcartime(formula=f2, data=engdeaths, scol=scol, tcol=tcol,  
                     W=Weng, model="ar", AR=2, family="poisson", package="CARBayesST", 
                     N=N, burn.in=burn.in, thin=thin, verbose=T)

f20 <-  covid ~ offset(logEdeaths) + jsa + log10(houseprice) + log(popdensity) + n0
model <- c("bym", "ar1")
f2inla <-  covid ~  jsa + log10(houseprice) + log(popdensity) + n0 

M2st_ar2.0 <- Bcartime(formula=f20, data=engdeaths, scol=scol, tcol=tcol,  
                       W=Weng, model="ar", AR=2, family="poisson", package="CARBayesST", 
                       N=N, burn.in=burn.in, thin=thin, 
                       validrows=vs, verbose=T)
M2stinla.0  <- Bcartime(data=engdeaths, formula=f2inla, W=Weng, scol =scol, 
                        tcol=tcol,  offsetcol="logEdeaths",  model=model,  
                        link="log", family="poisson", package="inla", 
                        validrow=vs, N=2000, thin=1, burn.in=0) 
yobspred <- M2st_ar2.0$yobs_preds
names(yobspred)
yobs <- yobspred$covid
predsums <- get_validation_summaries(t(M2st_ar2.0$valpreds))
dim(predsums)
b <- obs_v_pred_plot(yobs, predsums, segments=T) 
names(M2stinla.0)
inlapredsums <- get_validation_summaries(t(M2stinla.0$valpreds))
dim(inlapredsums)
a <- obs_v_pred_plot(yobs, inlapredsums, segments=T) 
inlavalid <- a$pwithseg
ar2valid <- b$pwithseg
table10.15 <- rbind(AR2=unlist(M2st_ar2.0$stats), 
                    INLA = unlist(M2stinla.0$stats)) 
dput(table10.15, file=paste0(filepath, "table10.15.txt"))
save(M2st_ar2, inlavalid, ar2valid, file="engdeaths/two_engdeath_plots.RData")
model <- c("bym", "ar1")
f2inla <-  covid ~ jsa + log10(houseprice) + log(popdensity) + n0 + n1 + n2 + n3

M2stinla <- Bcartime(data=engdeaths, formula=f2, W=Weng, scol =scol, tcol=tcol,  
                     offsetcol="logEdeaths",  model=model,  link="log", family="poisson", 
                     package="inla", N=2000, burn.in=0, thin =1) 
summary(M2stinla )
table10.14 <- rbind(M2stinla$params, M2stinla$mchoice[1:4])
dput(table10.14, file=paste0(filepath, "table10.14.txt"))
} else{
  table10.14 <- dget(file=paste0(filepath, "table10.14.txt"))
  load(file="engdeaths/two_engdeath_plots.RData")
}

3 Spatial residual plots


M2st_ar2 <- Bcartime(formula=f2, data=engdeaths, scol= "spaceid", tcol="Weeknumber",  
                     W=Weng, model="ar", AR=2, family="poisson", package="CARBayesST", 
                     N=5000, burn.in=1000, thin=10, verbose=FALSE)
## ##
## # Total time taken:: 17.84  - Sec.

summary(M2st_ar2)
## 
##  The  ar  model has been fitted using the  CARBayesST  package.
## Call:
## Bcartime(formula = f2, data = engdeaths, family = "poisson", 
##     scol = "spaceid", tcol = "Weeknumber", package = "CARBayesST", 
##     model = "ar", AR = 2, W = Weng, N = 5000, burn.in = 1000, 
##     thin = 10, verbose = FALSE)
## ##
## # Total time taken:: 17.84  - Sec.
## Model formula
## covid ~ offset(logEdeaths) + jsa + log10(houseprice) + log(popdensity) + 
##     n0 + n1 + n2 + n3
## 
## 
## Parameter Estimates:
##                     Mean   2.5%  97.5% n.sample % accept n.effective
## (Intercept)       -4.544 -5.477 -3.778      400     42.7        12.3
## jsa                0.216  0.094  0.328      400     42.7        19.6
## log10(houseprice)  0.729  0.582  0.902      400     42.7        12.9
## log(popdensity)    0.080  0.060  0.095      400     42.7        20.9
## n0                 0.495  0.472  0.518      400     42.7        13.0
## n1                 0.202  0.178  0.228      400     42.7         8.8
## n2                 0.081  0.058  0.099      400     42.7        20.5
## n3                 0.060  0.047  0.073      400     42.7        34.1
## tau2               0.302  0.270  0.334      400    100.0        28.8
## rho.S              0.633  0.474  0.748      400     43.1        12.4
## rho1.T             0.050 -0.037  0.180      400    100.0        17.1
## rho2.T            -0.068 -0.176  0.055      400    100.0        12.7
##                   Geweke.diag
## (Intercept)               2.7
## jsa                      -1.4
## log10(houseprice)        -2.0
## log(popdensity)          -4.3
## n0                       -1.4
## n1                        3.0
## n2                       -1.9
## n3                        0.0
## tau2                      0.3
## rho.S                     5.4
## rho1.T                    7.1
## rho2.T                    7.3
## 
## Model Choice Statistics:
##           DIC           p.d          WAIC           p.w          LMPL 
##     26143.170      1947.384     26131.116      1529.455    -13376.175 
## loglikelihood 
##    -11124.201
z <- M2st_ar2$residuals
summary(z)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -8.323465 -0.986567 -0.282083 -0.003561  0.775152 13.230298

modfit <- M2st_ar2$fit
x <- modfit$samples
names(x)
## [1] "beta"   "phi"    "rho"    "tau2"   "fitted" "Y"
u <- x$fitted
dim(u)
## [1]  400 6260


obsd <- engdeaths$covid
k <- nrow(u)
omat <- matrix(rep(obsd, k), byrow=TRUE,  nrow=k)
dim(omat)
## [1]  400 6260

resids <- omat - u 
dim(resids)
## [1]  400 6260

meds <- apply(resids, 2, median) 
sds <- apply(resids, 2, sd) 
summary(meds)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -8.0591 -0.8875 -0.1893  0.1237  0.9236 13.4265
summary(sds)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.00073  0.34759  0.87872  1.50222  2.17067 14.04364
length(meds)
## [1] 6260


nla <- length(unique(engdeaths$Areacode))
nla
## [1] 313
nweek <- nrow(engdeaths)/nla
nweek  
## [1] 20
acodes <- rep(1:nla, each=nweek)

callameans <- function(x, lapattern=acodes) {
  u <- data.frame(x=x, lacodes=lapattern)
  v <- summaryBy(x~lacodes, data=u)
  as.vector(v$x.mean)
}
u <- callameans(resids[1,])

a <- apply(resids, 1, callameans)
dim(a)
## [1] 313 400
head(a[1:5, 1:5])
##            [,1]        [,2]        [,3]       [,4]        [,5]
## [1,] -0.3761325  0.17955117 -1.06734780 -0.6768716 -0.49378975
## [2,] -0.6367474 -1.06604463 -0.42524337  0.0734676 -0.07452693
## [3,] -0.3012173  0.08484782  0.24554364 -0.7295566  0.20713097
## [4,]  0.6076849  0.01928501 -0.07284724  0.0909267 -0.03644653
## [5,] -0.9018756 -0.97577134 -1.86180078 -1.2372255 -0.61557987

rmeans <- apply(a, 1, mean)
rsds <- apply(a, 1, sd)
u1 <- unique(englamap[, c(1, 8)])
head(u1)
##       id  Areacode
## 1      0 E06000001
## 443    1 E06000002
## 705   10 E06000011
## 2486 100 E07000077
## 3269 101 E07000078
## 3462 102 E07000079

u <- data.frame(Areacode=unique(engdeaths$Areacode), rmeans=rmeans, rsds=rsds)
head(u)
##    Areacode      rmeans      rsds
## 1 E06000001 -0.15955343 0.4085486
## 2 E06000002 -0.34241559 0.6540046
## 3 E06000003 -0.08958056 0.4441944
## 4 E06000004  0.45187851 0.4779861
## 5 E06000005 -0.68894097 0.4643947
## 6 E06000006  0.25881082 0.4922395
v <- merge(u, u1)
head(v)
##    Areacode      rmeans      rsds id
## 1 E06000001 -0.15955343 0.4085486  0
## 2 E06000002 -0.34241559 0.6540046  1
## 3 E06000003 -0.08958056 0.4441944  2
## 4 E06000004  0.45187851 0.4779861  3
## 5 E06000005 -0.68894097 0.4643947  4
## 6 E06000006  0.25881082 0.4922395  5

bdf <- merge(englamap, v, by.x="id", by.y="id")
head(bdf)
##   id     long      lat order  hole piece group Areacode.x Areacode.y     rmeans
## 1  0 447097.0 537152.0     1 FALSE     1   0.1  E06000001  E06000001 -0.1595534
## 2  0 447228.8 537033.4     2 FALSE     1   0.1  E06000001  E06000001 -0.1595534
## 3  0 447280.7 537120.1     3 FALSE     1   0.1  E06000001  E06000001 -0.1595534
## 4  0 447378.1 537095.1     4 FALSE     1   0.1  E06000001  E06000001 -0.1595534
## 5  0 447455.3 537023.7     5 FALSE     1   0.1  E06000001  E06000001 -0.1595534
## 6  0 447550.5 537078.1     6 FALSE     1   0.1  E06000001  E06000001 -0.1595534
##        rsds
## 1 0.4085486
## 2 0.4085486
## 3 0.4085486
## 4 0.4085486
## 5 0.4085486
## 6 0.4085486


plimits <- range(bdf$rmeans)
plimits
## [1] -1.286212  2.080422


com <- rev(c("red4", "red2","white", "dodgerblue2","dodgerblue4"))#colour palette
p01 <-  ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=rmeans)) +
  scale_fill_gradientn(colours=com, na.value="black",limits=plimits)  +
  geom_polygon(colour='black',size=0.25) +
  geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6)  +
  coord_equal() + guides(fill=guide_colorbar(title="Residuals")) +
  theme_bw()+theme(text=element_text(family="Times")) +
  labs(title= "Spatially aggregated residuals",  x="", y = "", size=2.5) +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())   +
  ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=FALSE,dist_unit = "km",
                 st.dist = .05, st.size =4, height = .06, st.bottom=TRUE)
p01

ggsave(filename=paste0(figurepath, "chap10_spat_resid.png"))
## Saving 7 x 5 in image

plimits <- range(bdf$rsds)
p02 <-  ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=rsds)) +
  scale_fill_gradientn(colours=colpalette, na.value="black",limits=plimits)  +
  geom_polygon(colour='black',size=0.25) +
  geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6)  +
  coord_equal() + guides(fill=guide_colorbar(title="sd")) +
  theme_bw()+theme(text=element_text(family="Times")) +
  labs(title= "Sd of spatially aggregated residuals",  x="", y = "", size=2.5) +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())   +
  ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=FALSE,  dist_unit = "km",
                 st.dist = .05, st.size =4, height = .06, st.bottom=TRUE)
p02

ggsave(filename=paste0(figurepath, "chap10_spat_resid_sd.png"))
## Saving 7 x 5 in image

allpost <- M2st_ar2$fit
names(M2st_ar2)
##  [1] "params"           "fit"              "fitteds"          "mchoice"         
##  [5] "residuals"        "sn"               "tn"               "formula"         
##  [9] "scale.transform"  "package"          "model"            "call"            
## [13] "computation.time"
names(M2st_ar2$fit)
##  [1] "summary.results"     "samples"             "fitted.values"      
##  [4] "residuals"           "modelfit"            "accept"             
##  [7] "localised.structure" "formula"             "model"              
## [10] "X"
names(M2st_ar2$fit$samples)
## [1] "beta"   "phi"    "rho"    "tau2"   "fitted" "Y"

names(allpost)
##  [1] "summary.results"     "samples"             "fitted.values"      
##  [4] "residuals"           "modelfit"            "accept"             
##  [7] "localised.structure" "formula"             "model"              
## [10] "X"

a <- allpost$samples$fitted ## nsamp by nla*nweek
class(a)
## [1] "mcmc"
b <- as.matrix(a)
class(b)
## [1] "matrix" "array"

dim(b)
## [1]  400 6260

calwkmeans <- function(x, wkpattern=engdeaths$Weeknumber) {
  u <- data.frame(x=100000*x/engdeaths$popn, wkpattern=wkpattern)
  # Transform to per 100,000
  v <- summaryBy(x~wkpattern, data=u)
  as.vector(v$x.mean)
}
u <- calwkmeans(b[1,])
length(u) 
## [1] 20
summary(u)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.4131  0.9610  2.5313  4.2967  7.1327 12.7757

a <- apply(b, 1, calwkmeans)
dim(a)
## [1]  20 400

fits <- apply(a, 1, mean) 
# fits <- apply(a, 1, quantile, probs=c(0.025, 0.5, 0.975)) 
fits
##  [1]  0.4060188  1.0919396  2.9866707  7.2098516 11.9309467 12.8956561
##  [7] 12.0841146  9.4745629  7.1224786  5.5058243  4.0580137  2.9105665
## [13]  2.1734421  1.6563488  1.2361900  1.0533209  0.6973635  0.5790135
## [19]  0.6059584  0.8303672
lims <-  apply(a, 1, FUN=quantile, probs=c(0.025, 0.975))  
lims
##            [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
## 2.5%  0.3772795 1.035511 2.886386 7.020941 11.67198 12.62161 11.83213 9.251159
## 97.5% 0.4372070 1.144057 3.101111 7.422220 12.20027 13.19627 12.36998 9.722311
##           [,9]    [,10]    [,11]    [,12]    [,13]    [,14]    [,15]     [,16]
## 2.5%  6.923415 5.336740 3.909126 2.797599 2.097284 1.581839 1.174733 0.9956819
## 97.5% 7.329721 5.669891 4.205946 3.035389 2.268343 1.740573 1.304793 1.1180599
##           [,17]     [,18]     [,19]     [,20]
## 2.5%  0.6620155 0.5434657 0.5729947 0.7863409
## 97.5% 0.7376994 0.6144862 0.6411534 0.8747891
head(lims)
##            [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
## 2.5%  0.3772795 1.035511 2.886386 7.020941 11.67198 12.62161 11.83213 9.251159
## 97.5% 0.4372070 1.144057 3.101111 7.422220 12.20027 13.19627 12.36998 9.722311
##           [,9]    [,10]    [,11]    [,12]    [,13]    [,14]    [,15]     [,16]
## 2.5%  6.923415 5.336740 3.909126 2.797599 2.097284 1.581839 1.174733 0.9956819
## 97.5% 7.329721 5.669891 4.205946 3.035389 2.268343 1.740573 1.304793 1.1180599
##           [,17]     [,18]     [,19]     [,20]
## 2.5%  0.6620155 0.5434657 0.5729947 0.7863409
## 97.5% 0.7376994 0.6144862 0.6411534 0.8747891
dim(lims)
## [1]  2 20


engdeaths$rcovid <- 100000*engdeaths$covid/engdeaths$popn 
wkmeans <- summaryBy(rcovid ~ Weeknumber, data =engdeaths, FUN =mean) # Weekly mean 
colnames(wkmeans)[2] <- "covid"
head(wkmeans)
##   Weeknumber       covid
## 1         11  0.06751427
## 2         12  0.57516468
## 3         13  2.82722575
## 4         14  7.97997210
## 5         15 12.90445264
## 6         16 13.30915947

# dataandfits <- cbind(wkmeans, fits=fits[2, ], lower=fits[1,],  upper=fits[3,])
dataandfits <- cbind(wkmeans, fits=fits, lower=lims[1,],  upper=lims[2,])
head(dataandfits)
##   Weeknumber       covid       fits      lower     upper
## 1         11  0.06751427  0.4060188  0.3772795  0.437207
## 2         12  0.57516468  1.0919396  1.0355112  1.144057
## 3         13  2.82722575  2.9866707  2.8863864  3.101111
## 4         14  7.97997210  7.2098516  7.0209410  7.422220
## 5         15 12.90445264 11.9309467 11.6719762 12.200270
## 6         16 13.30915947 12.8956561 12.6216087 13.196268
par(mfrow=c(1, 1))
plot(covid~Weeknumber, data=dataandfits, pch="*", col="blue")
lines(fits~Weeknumber, data=dataandfits, lty=1, col=2)
lines(lower~Weeknumber, data=dataandfits, lty=2, col=3)
lines(upper~Weeknumber, data=dataandfits, lty=2, col=3)

# points(glm~Weeknumber, data=dataandfits, lty=2, col=4, pch=3)

head(dataandfits)
##   Weeknumber       covid       fits      lower     upper
## 1         11  0.06751427  0.4060188  0.3772795  0.437207
## 2         12  0.57516468  1.0919396  1.0355112  1.144057
## 3         13  2.82722575  2.9866707  2.8863864  3.101111
## 4         14  7.97997210  7.2098516  7.0209410  7.422220
## 5         15 12.90445264 11.9309467 11.6719762 12.200270
## 6         16 13.30915947 12.8956561 12.6216087 13.196268
dataandfits
##    Weeknumber       covid       fits      lower      upper
## 1          11  0.06751427  0.4060188  0.3772795  0.4372070
## 2          12  0.57516468  1.0919396  1.0355112  1.1440574
## 3          13  2.82722575  2.9866707  2.8863864  3.1011111
## 4          14  7.97997210  7.2098516  7.0209410  7.4222201
## 5          15 12.90445264 11.9309467 11.6719762 12.2002705
## 6          16 13.30915947 12.8956561 12.6216087 13.1962680
## 7          17 11.44612945 12.0841146 11.8321255 12.3699792
## 8          18  8.81339654  9.4745629  9.2511589  9.7223106
## 9          19  6.63332995  7.1224786  6.9234149  7.3297212
## 10         20  4.90704342  5.5058243  5.3367404  5.6698910
## 11         21  3.92631637  4.0580137  3.9091263  4.2059458
## 12         22  3.07140906  2.9105665  2.7975990  3.0353893
## 13         23  2.24923258  2.1734421  2.0972839  2.2683428
## 14         24  1.64395081  1.6563488  1.5818395  1.7405733
## 15         25  1.22555715  1.2361900  1.1747329  1.3047933
## 16         26  1.05980040  1.0533209  0.9956819  1.1180599
## 17         27  0.73576301  0.6973635  0.6620155  0.7376994
## 18         28  0.59048046  0.5790135  0.5434657  0.6144862
## 19         29  0.43429277  0.6059584  0.5729947  0.6411534
## 20         30  0.33729676  0.8303672  0.7863409  0.8747891

k <- nrow(dataandfits)
k
## [1] 20
adata <- data.frame(Weeknumber=rep(dataandfits$Weeknumber, 3), 
                    fits=c(rep("fits", k), rep("lower", k), rep("upper", k)),  
                    vfits=c(dataandfits$fits, dataandfits$lower, dataandfits$upper))
adata$fits <- as.factor(adata$fits)
adata$fits <- factor(adata$fits, levels=rev(levels(adata$fits)))
head(adata)
##   Weeknumber fits      vfits
## 1         11 fits  0.4060188
## 2         12 fits  1.0919396
## 3         13 fits  2.9866707
## 4         14 fits  7.2098516
## 5         15 fits 11.9309467
## 6         16 fits 12.8956561
table(adata$fits)
## 
## upper lower  fits 
##    20    20    20
adata <- adata[adata$fits !="fits", ]

adata <- data.frame(Weeknumber=rep(dataandfits$Weeknumber, 2), 
                    intervals=c(rep("lower", k), rep("upper", k)),  
                    vfits=c(dataandfits$lower, dataandfits$upper)) 
adata$intervals <- as.factor(adata$intervals)
adata$intervals <- factor(adata$intervals, levels=levels(adata$intervals))
dim(adata)
## [1] 40  3
head(adata)
##   Weeknumber intervals      vfits
## 1         11     lower  0.3772795
## 2         12     lower  1.0355112
## 3         13     lower  2.8863864
## 4         14     lower  7.0209410
## 5         15     lower 11.6719762
## 6         16     lower 12.6216087
table(adata$fits)
## < table of extent 0 >
pdata <- data.frame(Weeknumber=rep(dataandfits$Weeknumber, 2), 
                    fits=c(rep("fitted", k), rep("observed", k)),  
                    vfits=c(dataandfits$fits, dataandfits$covid))
pdata$fits <- as.factor(pdata$fits)
pdata$fits <- factor(pdata$fits, levels=levels(pdata$fits))
levels(adata$fits)
## NULL
levels(pdata$fits)
## [1] "fitted"   "observed"

head(pdata)
##   Weeknumber   fits      vfits
## 1         11 fitted  0.4060188
## 2         12 fitted  1.0919396
## 3         13 fitted  2.9866707
## 4         14 fitted  7.2098516
## 5         15 fitted 11.9309467
## 6         16 fitted 12.8956561
pwkfit <- ggplot() + 
  geom_line(data=adata, aes(x=factor(Weeknumber), y=vfits, group=intervals, color=intervals)) +
  geom_point(data=pdata, aes(x=factor(Weeknumber), y=vfits,  shape=fits)) + 
  labs(x ="Week number", y = "Average number of covid deaths per 100,000")+
  theme(legend.position=c(0.65, 0.5))
pwkfit


ggsave(paste0(figurepath, "weekly_death_rate_fitted.png"))
## Saving 7 x 5 in image

if (longrun){
  
f3 <-  sqrt(no2) ~  jsa + log10(houseprice) + log(popdensity) 

scol <- "spaceid"
tcol <-  "Weeknumber"

M3st_linear <- Bcartime(formula=f3, data=engdeaths, scol=scol, tcol=tcol, 
               W=Weng, model="linear", family="gaussian", package="CARBayesST", 
               N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
summary(M3st_linear)

M3st_anova <- Bcartime(formula=f3, data=engdeaths, scol=scol, tcol=tcol, 
                        W=Weng, model="anova", family="gaussian", package="CARBayesST", 
                       N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
summary(M3st_anova)


M3st_ar <- Bcartime(formula=f3, data=engdeaths, scol=scol, tcol=tcol, 
               W=Weng, model="ar", family="gaussian", package="CARBayesST", 
               N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
summary(M3st_ar)


M3st_ar2 <- Bcartime(formula=f3, data=engdeaths, scol=scol, tcol=tcol, 
               W=Weng, model="ar", AR=2,  family="gaussian", package="CARBayesST", 
               N=N, burn.in=burn.in, thin=thin, verbose=FALSE)
summary(M3st_ar2)



u <- rbind(M3st_linear$mchoice, M3st_anova$mchoice, M3st_ar$mchoice, 
           M3st_ar2$mchoice)

rownames(u) <- c("Linear", "Anova", "AR (1)", "AR (2)")

table10.12 <- u[, c(2, 1, 4, 3)]
table10.13  <- M3st_ar2$params[, 1:3]
dput(table10.12, file=paste0(filepath, "table10.12.txt"))
dput(table10.13, file=paste0(filepath, "table10.13.txt"))
} else {
  table10.12 <- dget(file=paste0(filepath, "table10.12.txt"))
  table10.13 <- dget(file=paste0(filepath, "table10.13.txt"))
}
round(table10.12, 1)
##           p.d     DIC    p.w    WAIC
## Linear  151.6 12899.9  152.2 12904.9
## Anova   118.3 12707.2  116.7 12708.0
## AR (1) 1789.8 11530.0 1449.0 11491.9
## AR (2) 1766.7 11504.8 1433.6 11468.8
round(table10.13, 4)
##                      Mean    2.5%   97.5%
## (Intercept)       -1.0372 -1.9093 -0.1506
## jsa                0.0078 -0.1027  0.1192
## log10(houseprice)  0.7321  0.5707  0.8921
## log(popdensity)    0.1162  0.0974  0.1354
## tau2               0.3519  0.2919  0.4163
## nu2                0.2777  0.2574  0.2979
## rho.S              0.9416  0.9001  0.9724
## rho1.T            -0.1761 -0.2713 -0.0876
## rho2.T            -0.0659 -0.1577  0.0186

Predictions with 95% limits against observations for two models: AR (2) on the left panel and INLA on the right panel.

## Saving 9 x 5 in image

# All done 
end.time <- proc.time()[3]
comp.time<- (end.time-start.time)/60
# comp.time<-fancy.time(comp.time)
# With long run 122 minutes 
print(comp.time)
##   elapsed 
## 0.8158667