Chapter 6: Bayesian modeling for point referenced spatial data

Sujit K. Sahu

University of Southampton

2022-03-30

Abstract

This file contains all the code for reproducing the figures in Chapter 6.

start.time<-proc.time()[3]
figurepath <- "figures/chap6figures/"
mappath <- "mapfiles"
filepath <-  "txttables/"
longrun <- FALSE
if (!longrun) {
  table6.3 <- dget(file=paste0(filepath, "table6.3.txt"))
  table6.5 <- dget(file=paste0(filepath, "table6.5.txt"))
  table6.6 <- dget(file=paste0(filepath, "table6.6.txt"))
}
library(bmstdr)
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated
library(ggplot2)
library(akima)
library(tidyr)
library(RColorBrewer)
library(ggpubr)
library(spdep)
library(GGally)
library(geoR)
library(fields)
library(doBy)

1 Code to reproduce Figure6.1

head(nyspatial)
##   s.index Longitude Latitude     utmx    utmy      yo3 xmaxtemp    xwdsp
## 1       1   -73.757   42.681 601838.1 4726140 45.08065 27.53184 4.142083
## 2       2   -73.881   40.866 594300.8 4524485 48.34758 29.60294 5.666202
## 3       3   -79.587   42.291 121812.7 4692285 54.65677 26.28780 5.742751
## 4       4   -76.802   42.111 351019.8 4663672 44.95774 27.44725 4.842872
## 5       5   -73.743   41.782 604457.3 4626336 43.92903 28.27555 4.427053
## 6       6   -78.771   42.993 192581.3 4766941 55.37726 26.61813 6.146428
##        xrh
## 1 3.686737
## 2 3.478130
## 3 3.950145
## 4 3.699996
## 5 3.698446
## 6 3.869971
M1 <- Bspatial(formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial)
## ##
## # Total time taken:: 0.01  - Sec.
print(M1)
## 
##  The lm model has been fitted using bmstdr code in R.
## Call:
## Bspatial(formula = yo3 ~ xmaxtemp + xwdsp + xrh, data = nyspatial)
## 
## Model formula
## yo3 ~ xmaxtemp + xwdsp + xrh
## 
## 
## Coefficients:
## [1] -52.776   1.526   3.252  11.240  10.998
## 
## ##
## # Total time taken:: 0.01  - Sec.
plot(M1)
## 
##  No other plots implemented for this model fitting method.
## 
## Summary of the residuals
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -8.264164 -1.888167  0.234218 -0.000188  2.175525  8.134383

residuals(M1)
## 
## Summary of the residuals
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -8.264164 -1.888167  0.234218 -0.000188  2.175525  8.134383

##  [1]  0.93423105 -1.57103871  4.24211590 -1.48774171 -2.41060320  4.04689047
##  [7]  8.13438342  1.19708052  0.08478264 -4.55991285 -2.01951831 -0.96188897
## [13] -1.84438370 -1.39188272  3.31235485  0.38365423  1.86240427 -4.38346529
## [19]  1.56620807  3.71451313  3.11488849 -2.65240154  3.45918976  0.88766396
## [25] -5.69143315 -8.26416410 -1.05410468  1.34690057
summary(M1)
## 
##  The lm model has been fitted using bmstdr code in R.
## Call:
## Bspatial(formula = yo3 ~ xmaxtemp + xwdsp + xrh, data = nyspatial)
## ##
## # Total time taken:: 0.01  - Sec.
## Model formula
## yo3 ~ xmaxtemp + xwdsp + xrh
## 
## 
## Parameter Estimates:
##                mean     sd     2.5%  97.5%
## (Intercept) -52.776 52.909 -157.126 51.573
## xmaxtemp      1.526  0.989   -0.425  3.477
## xwdsp         3.252  1.185    0.914  5.590
## xrh          11.240  8.462   -5.449 27.930
## sigma2       10.998  2.939    6.668 18.038

## Spatial model fitting

M2 <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
      coordtype="utm", coords=4:5, phi=0.4)
##               mean    sd     low    up
## (Intercept) -58.17 53.84 -164.35 48.01
## xmaxtemp      1.65  1.00   -0.33  3.62
## xwdsp         3.28  1.19    0.94  5.62
## xrh          11.74  8.61   -5.25 28.73
## sigma2       10.90  2.91    6.61 17.87
## ##
## # Total time taken:: 0.01  - Sec.
names(M2)
##  [1] "params"           "fit"              "max.d"            "fitteds"         
##  [5] "phi"              "residuals"        "sn"               "tn"              
##  [9] "formula"          "scale.transform"  "package"          "model"           
## [13] "call"             "computation.time"
print(M2)
## 
##  The spat model has been fitted using bmstdr code in R.
## Call:
## Bspatial(formula = yo3 ~ xmaxtemp + xwdsp + xrh, data = nyspatial, 
##     model = "spat", coordtype = "utm", coords = 4:5, phi = 0.4)
## 
## Model formula
## yo3 ~ xmaxtemp + xwdsp + xrh
## 
## 
## Coefficients:
## [1] -58.168   1.648   3.281  11.741  10.897
## 
## ##
## # Total time taken:: 0.01  - Sec.
plot(M2)
## 
##  No other plots implemented for this model fitting method.
## 
## Summary of the residuals
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -8.17500 -1.81981  0.26879  0.02192  2.26535  8.37285

ggsave(filename = paste0(figurepath, "M2_fitv_resid.png"))
## Saving 7 x 5 in image
residuals(M2)
## 
## Summary of the residuals
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -8.17500 -1.81981  0.26879  0.02192  2.26535  8.37285

##  [1]  1.0126866 -1.6845595  4.2933407 -1.4260899 -2.4367621  4.0862733
##  [7]  8.3728457  1.4339752  0.2256680 -4.4204139 -1.9068074 -0.8676949
## [13] -1.7908124 -1.3168398  3.2676868  0.3119068  1.9634197 -4.2975435
## [19]  1.3987367  3.5257330  3.1711345 -2.5857731  3.3522672  0.9462576
## [25] -5.8067424 -8.1750019 -1.2457093  1.2125946
ggsave(filename = paste0(figurepath, "M2_resid_v_obsn.png"))
## Saving 7 x 5 in image
summary(M2)
## 
##  The spat model has been fitted using bmstdr code in R.
## Call:
## Bspatial(formula = yo3 ~ xmaxtemp + xwdsp + xrh, data = nyspatial, 
##     model = "spat", coordtype = "utm", coords = 4:5, phi = 0.4)
## ##
## # Total time taken:: 0.01  - Sec.
## Model formula
## yo3 ~ xmaxtemp + xwdsp + xrh
## 
## 
## Parameter Estimates:
##                mean     sd     2.5%  97.5%
## (Intercept) -58.168 53.836 -164.345 48.010
## xmaxtemp      1.648  1.002   -0.329  3.624
## xwdsp         3.281  1.187    0.941  5.622
## xrh          11.741  8.613   -5.247 28.728
## sigma2       10.897  2.912    6.607 17.872

2 Code to reproduce Table 6.1

table6.1 <- cbind(M1$params, M2$params)
round(table6.1, 2)
##               mean    sd    2.5% 97.5%   mean    sd    2.5% 97.5%
## (Intercept) -52.78 52.91 -157.13 51.57 -58.17 53.84 -164.35 48.01
## xmaxtemp      1.53  0.99   -0.42  3.48   1.65  1.00   -0.33  3.62
## xwdsp         3.25  1.19    0.91  5.59   3.28  1.19    0.94  5.62
## xrh          11.24  8.46   -5.45 27.93  11.74  8.61   -5.25 28.73
## sigma2       11.00  2.94    6.67 18.04  10.90  2.91    6.61 17.87
# xtable(table6.1, digits=2)
dput(table6.1, file=paste0(filepath, "table6.1.txt"))

3 Code for fitting models with spBayes

library(spBayes)
library(coda)
set.seed(44)
N <- 20000
p <- 4
fdat <- nyspatial
starting <- list("phi"=1, "sigma.sq"=10, "tau.sq"=1)
tuning <- list("phi"=0.3, "sigma.sq"=0.3, "tau.sq"=0.3)
priors.1 <- list("beta.Norm"=list(rep(0,p), diag(1000,p)),
                 "phi.Unif"=c(0.05, 2), "sigma.sq.IG"=c(2, 2),
                 "tau.sq.IG"=c(2, 0.1))
cov.model <- "exponential"
verbose <- FALSE
coords <- cbind(fdat$utmx, fdat$utmy)
mod3 <- spLM(sqrt(yo3)~xmaxtemp+xwdsp+xrh, data=fdat, coords=coords, starting=starting,
             tuning=tuning, priors=priors.1, cov.model=cov.model,
             n.samples=N, verbose=verbose, n.report=5)

burn.in <- 0.5* N + 1
m.3 <- spRecover(mod3, start=burn.in, verbose=FALSE)
round(summary(m.3$p.theta.recover.samples)$quantiles[,c(3,1,5)],2)
##           50% 2.5% 97.5%
## sigma.sq 0.22 0.13  0.40
## tau.sq   0.03 0.01  0.09
## phi      0.99 0.07  1.94
round(summary(m.3$p.beta.recover.samples)$quantiles[,c(3,1,5)],2)
##               50%   2.5% 97.5%
## (Intercept) -0.33 -16.09 15.82
## xmaxtemp     0.11  -0.19  0.41
## xwdsp        0.23  -0.13  0.59
## xrh          0.81  -1.79  3.31

# names(m.3)
samps <- mcmc(data=cbind(m.3$p.beta.recover.samples, m.3$p.theta.recover.samples), thin=100)
# samps <- as.mcmc(cbind(m.4$p.theta.recover.samples))
head(samps)
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1 
## End = 601 
## Thinning interval = 100 
##      (Intercept)    xmaxtemp      xwdsp        xrh  sigma.sq     tau.sq
## [1,]  -2.5753937  0.12428318 0.33685313  1.1498303 0.1977640 0.01913157
## [2,]  12.5734909 -0.04935851 0.43433537 -1.7303863 0.1977640 0.01913157
## [3,]   7.7296075 -0.04375927 0.24290350 -0.2196203 0.3256541 0.02434937
## [4,]   7.0140414  0.03878646 0.24547142 -0.6880487 0.3256541 0.02434937
## [5,]   8.0222950 -0.11055973 0.01093527  0.4825042 0.3256541 0.02434937
## [6,]   0.6392681  0.04496527 0.39199269  0.7834181 0.3256541 0.02434937
## [7,]   3.3188478  0.03734955 0.15802698  0.4525282 0.1989606 0.02165145
##           phi
## [1,] 1.956952
## [2,] 1.956952
## [3,] 1.911967
## [4,] 1.911967
## [5,] 1.911967
## [6,] 1.911967
## [7,] 1.884529
dim(samps)
## [1] 10000     7
dev.off()
## null device 
##           1
plot(samps)
# autocorr.plot(samps, lag.max=75)
rejectionRate(samps)
## (Intercept)    xmaxtemp       xwdsp         xrh    sigma.sq      tau.sq 
##   0.0000000   0.0000000   0.0000000   0.0000000   0.6256626   0.6256626 
##         phi 
##   0.6256626

# autocorr(samps, lags = c(0, 1, 5, 10, 50), relative=TRUE)
autocorr.plot(samps)
effectiveSize(samps)
## (Intercept)    xmaxtemp       xwdsp         xrh    sigma.sq      tau.sq 
##  10534.4089  10905.7348  10485.4410  10702.3782   1608.0239    802.6024 
##         phi 
##    110.2447
batchSE(samps, batchSize=100)
## (Intercept)    xmaxtemp       xwdsp         xrh    sigma.sq      tau.sq 
## 0.077162667 0.001428252 0.001783563 0.012311022 0.001663686 0.000672879 
##         phi 
## 0.044493143
raftery.diag(samps, q=0.025, r=0.005, s=0.95, converge.eps=0.001)
## 
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95 
##                                                       
##              Burn-in  Total    Lower bound  Dependence
##              (M)      (N)      (Nmin)       factor (I)
##  (Intercept) 200      380200   3746          101.0    
##  xmaxtemp    200      377100   3746          101.0    
##  xwdsp       200      374100   3746           99.9    
##  xrh         200      374100   3746           99.9    
##  sigma.sq    1700     1796100  3746          479.0    
##  tau.sq      1900     2079700  3746          555.0    
##  phi         15000    16369300 3746         4370.0
geweke.diag(samps, frac1=0.1, frac2=0.5)
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
## (Intercept)    xmaxtemp       xwdsp         xrh    sigma.sq      tau.sq 
##     0.04128    -0.06384     0.11126    -0.04710     0.18335    -1.20123 
##         phi 
##     4.02084
crosscorr(samps)
##               (Intercept)     xmaxtemp         xwdsp          xrh      sigma.sq
## (Intercept)  1.0000000000 -0.936920895  0.5173241877 -0.960589167  0.0010916228
## xmaxtemp    -0.9369208954  1.000000000 -0.6028366414  0.818465028  0.0017644462
## xwdsp        0.5173241877 -0.602836641  1.0000000000 -0.535934737 -0.0001728054
## xrh         -0.9605891667  0.818465028 -0.5359347370  1.000000000 -0.0034203620
## sigma.sq     0.0010916228  0.001764446 -0.0001728054 -0.003420362  1.0000000000
## tau.sq      -0.0002154989 -0.002218384 -0.0067863042  0.003684749  0.1163776973
## phi          0.0131530848 -0.018115634  0.0112277381 -0.008590094 -0.0144629564
##                    tau.sq          phi
## (Intercept) -0.0002154989  0.013153085
## xmaxtemp    -0.0022183838 -0.018115634
## xwdsp       -0.0067863042  0.011227738
## xrh          0.0036847493 -0.008590094
## sigma.sq     0.1163776973 -0.014462956
## tau.sq       1.0000000000  0.037640622
## phi          0.0376406222  1.000000000
densplot(samps)
rejectionRate(samps)
## (Intercept)    xmaxtemp       xwdsp         xrh    sigma.sq      tau.sq 
##   0.0000000   0.0000000   0.0000000   0.0000000   0.6256626   0.6256626 
##         phi 
##   0.6256626
###
## Now we obtain parameter estimates
a <- summary(samps)
b <- HPDinterval(samps,prob=0.95)
estimates <- cbind(a$statistics[, 1:2], b)
round(estimates, 2)
##              Mean   SD  lower upper
## (Intercept) -0.27 8.11 -16.47 15.32
## xmaxtemp     0.11 0.15  -0.18  0.42
## xwdsp        0.23 0.18  -0.12  0.59
## xrh          0.79 1.30  -1.74  3.35
## sigma.sq     0.23 0.07   0.12  0.36
## tau.sq       0.04 0.02   0.01  0.08
## phi          0.99 0.56   0.05  1.88
# xtable(estimates, digits=2)

## Model diagnostics using spBayes
## Calculates DIC and PMCC
spDiag(m.3)
## -------------------------------------------------
##      Calculating scores
## -------------------------------------------------
## $DIC
##                 value
## bar.D       -69.46052
## D.bar.Omega -90.69995
## pD           21.23943
## DIC         -48.22109
## 
## $GP
##        value
## G 0.03723636
## P 1.96268848
## D 1.99992484
## 
## $GRS
## [1] 73.89896

4 Code to reproduce Table 6.2

M3 <- Bspatial(package="spBayes", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5, prior.phi=c(0.005, 2), n.report=2)
## ----------------------------------------
##  General model description
## ----------------------------------------
## Model fit with 28 observations.
## 
## Number of covariates 4 (including intercept if specified).
## 
## Using the exponential spatial correlation model.
## 
## Number of MCMC samples 5000.
## 
## Priors and hyperpriors:
##  beta normal:
##  mu: 0.000   0.000   0.000   0.000   
##  cov:
##  10000.000   0.000   0.000   0.000   
##  0.000   10000.000   0.000   0.000   
##  0.000   0.000   10000.000   0.000   
##  0.000   0.000   0.000   10000.000   
## 
##  sigma.sq IG hyperpriors shape=2.00000 and scale=1.00000
##  tau.sq IG hyperpriors shape=2.00000 and scale=0.10000
##  phi Unif hyperpriors a=0.00500 and b=2.00000
## -------------------------------------------------
##      Sampling
## -------------------------------------------------
## Sampled: 2500 of 5000, 50.00%
## Report interval Metrop. Acceptance rate: 41.96%
## Overall Metrop. Acceptance rate: 41.96%
## -------------------------------------------------
## Sampled: 5000 of 5000, 100.00%
## Report interval Metrop. Acceptance rate: 41.24%
## Overall Metrop. Acceptance rate: 41.60%
## -------------------------------------------------
## ##
## # Total time taken:: 0.32  - Sec.

table6.2 <- M3$params
round(table6.2, 2)
##                mean    sd    2.5% 97.5%
## X.Intercept. -41.15 49.46 -138.32 58.49
## xmaxtemp       1.32  0.94   -0.53  3.14
## xwdsp          3.38  1.22    1.00  5.79
## xrh            9.48  7.94   -6.52 24.72
## sigma.sq      12.52  3.65    7.46 21.31
## tau.sq         0.09  0.23    0.02  0.31
## phi            1.09  0.62    0.06  1.96
dput(table6.2, file=paste0(filepath, "table6.2.txt"))
# xtable(table6.2, digits=2)

5 Stan code from Section 6.6.2 to fit the marginal model

Save the following code in the file spatial_model.stan in the current working directory. This code does not run in R. See the next set of code chunk to run this code using the rstan package.

// data block. This must contain the same variables as data list
// that will come from R

data {
int<lower=0> n; // number of sites
int<lower=0> p; // number of covariates
vector[n] y;
matrix[n, p] X;
matrix[n, n] dist; // to hold n by n distance matrix
real<lower=0.00001> phi;
vector<lower=0>[2] priorsigma2;
vector<lower=0>[2] priortau2;
}

// These will never change during the MCMC computation. 
transformed data {
  real delta=1e-5;
  vector[n] mu_0 = rep_vector(0, n);
}

// Declare all the parameters to be sampled here

parameters {
  vector[p] beta;
  real<lower=0> sigma_sq;
  vector[n] eta;
  real<lower=0> tau_sq;
}

// Transformations of parameters that help to specify
// the model.

transformed parameters {
real xbmodel[n];
real dats[n];
matrix[n, n] L;
matrix[n, n] Sigma;
real u;

  for (i in 1:n) {
   for (j in 1:n) {
     Sigma[i, j] = sigma_sq * exp((-1)*phi*dist[i,j]);
   }
    Sigma[i, i] = Sigma[i, i] + delta;
  }

 L = cholesky_decompose(Sigma);

 for ( i in 1:n) {
   u =0.0;
   for (j in 1:p) {
     u += beta[j] * X[i, j];
     }
     xbmodel[i] = u;
     dats[i] = y[i] - eta[i];
   }
}

// Model specification

model {
 eta ~   multi_normal_cholesky(mu_0, L);
 sigma_sq ~ inv_gamma(priorsigma2[1], priorsigma2[2]);
 tau_sq ~ inv_gamma(priortau2[1], priortau2[2]);
 dats ~ normal(xbmodel, sqrt(tau_sq));

}

Here is the code block to fit the marginal model using the rstan package.

rm(list=ls())
library(bmstdr)
head(nyspatial)
n <- nrow(nyspatial)
y <- nyspatial$yo3
X <- cbind(1, nyspatial[, 7:9])
p <- ncol(X)
head(X)
phi <- 0.4
distmat <-  as.matrix(dist(as.matrix(nyspatial[, 4:5]))/1000)
dim(distmat)
distmat[1:5, 1:5]
max(distmat)
datatostan <- list(n=n, p=p, y = y,  X=as.matrix(X),
      priorsigma2 = c(2, 1), priortau2 = c(2, 1),
      phi=phi, dist=distmat)

M0 <- lm(formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial)
coef(M0)
initfun <- function() {
  # starting values near the lm estimates
  # variations will work as well
  list(sigma_sq = 1, tau_sq=1, beta=coef(M0))
}
library(rstan)
stanfit <- stan(data=datatostan,  file = "spatial_model.stan", seed = 44, chains = 1, iter = 1000, warmup = 500, init=initfun)

stanestimates  <- rstan::summary(stanfit, pars =c("beta", "tau_sq", "sigma_sq"), probs = c(.025, .975))
names(stanestimates)
params <- data.frame(stanestimates$summary)
print(params)

Here is the bmstdr command to fit the marginal model using rstan. This command does everything that has been noted in the previous two code blocks.

if (longrun) {
M4 <- Bspatial(package="stan", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5,phi=0.4)

table6.3 <- M4$params
dput(table6.3, file=paste0(filepath, "table6.3.txt"))
} else {
  cat ("Table 6.3 has been read from the disk")
}
## Table 6.3 has been read from the disk
round(table6.3, 2)
##               mean    sd    2.5% 97.5%
## (Intercept) -55.13 58.87 -168.17 58.38
## xmaxtemp      1.63  1.13   -0.52  3.84
## xwdsp         3.22  1.30    0.63  5.77
## xrh          11.15  9.24   -6.75 29.16
## tausq         0.10  0.13    0.02  0.43
## sigmasq      12.43  3.70    7.21 21.06
# xtable(table6.3, digits=2)

6 Code to necessary to fit spatial models using INLA

library(INLA)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:spam':
## 
##     det
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: foreach
## Loading required package: parallel
## This is INLA_21.02.23 built 2021-07-22 12:47:15 UTC.
##  - See www.r-inla.org/contact-us for how to get help.
##  - To enable PARDISO sparse library; see inla.pardiso()
##  - Save 273.9Mb of storage running 'inla.prune()'
## 
## Attaching package: 'INLA'
## The following object is masked from 'package:spam':
## 
##     Oral
coords <- nyspatial[, c("utmx", "utmy")]/1000
max.edge  <- diff(range(coords[,1]))/15
bound.outer <- diff(range(coords[,2]))/3
mesh <- inla.mesh.2d(loc = coords, max.edge = c(1,5)*max.edge,
                     offset = c(max.edge, bound.outer), cutoff = max.edge/5)
plot(mesh)

if (longrun) ggsave(filename=paste0(figurepath, "ny_triangulation.png"))
## 
spde <- inla.spde2.pcmatern(mesh = mesh, alpha = 1.5,  prior.range = c(1, 0.5), prior.sigma = c(1, 0.05))
## 
A  <- inla.spde.make.A(mesh = mesh, loc = as.matrix(coords))
##
stack <- inla.stack(tag = 'est', data = list(yo3 = nyspatial$yo3), A = list(A, 1),
                effects = list(se = 1:spde$n.spde, 
                               Xcov = cbind(intercept=1, nyspatial[, 7:9])))
## 
hyper <- list(prec = list(prior = "loggamma", param = c(2,0.1)))
##
f1 <- yo3~ - 1 + xmaxtemp+xwdsp+xrh   + f(se, model = spde)
ifit <- inla(formula=f1, data = inla.stack.data(stack), family = "gaussian", 
      control.family = list(hyper = hyper), 
control.predictor = list(A = inla.stack.A(stack), compute = T), 
control.compute = list(config = T, dic = T, waic = T), verbose = F)
names(ifit$marginals.hyperpar)
## [1] "Precision for the Gaussian observations"
## [2] "Range for se"                           
## [3] "Stdev for se"
##
N <- 5000

beta1.samp <-  inla.rmarginal(N, ifit$marginals.fixed[[1]])
beta2.samp <-  inla.rmarginal(N, ifit$marginals.fixed[[2]])
beta3.samp <-  inla.rmarginal(N, ifit$marginals.fixed[[3]])
names(ifit$marginals.hyperpar)
## [1] "Precision for the Gaussian observations"
## [2] "Range for se"                           
## [3] "Stdev for se"
prec.samp  <- inla.rmarginal(N, ifit$marginals.hyperpar[[1]])
sig2e.samp <- 1/prec.samp
range.samp <- inla.rmarginal(N, ifit$marginals.hyperpar[[2]])
phi.samp <- 3/range.samp
sd.samp <- inla.rmarginal(N, ifit$marginals.hyperpar[[3]])
sig2w.samp <- sd.samp^2
psamps <- cbind(beta1.samp, beta2.samp, beta3.samp,  phi.samp, sig2w.samp, sig2e.samp)
print(get_parameter_estimates(psamps))
##                  mean         sd           low        up
## beta1.samp  0.6023813  0.3506826 -0.0925198926  1.279406
## beta2.samp  3.8846246  1.0678077  1.7675689764  5.988309
## beta3.samp  3.1298227  2.4168513 -1.6998730083  7.850223
## phi.samp    8.4328492 32.2571675  0.3291781029 40.729637
## sig2w.samp  0.3009525  0.9435805  0.0009784489  2.035566
## sig2e.samp 12.5268316  3.3875180  7.4082205505 20.509760

Here is the bmstdr command to fit the marginal model using INLA. This command does everything that has been noted in the preceding code block.


M5  <- Bspatial(package="inla",formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5)
## INLA will be used.
## ##
## # Total time taken:: 1.8  - Sec.

table6.4 <- M5$params
round(table6.4, 2)
##                mean    sd   2.5% 97.5%
## X.Intercept. -12.42 27.34 -65.77 41.77
## xmaxtemp       0.83  0.60  -0.38  2.00
## xwdsp          3.67  1.10   1.54  5.91
## xrh            5.13  4.78  -4.38 14.47
## phi            8.64 30.19   0.40 40.55
## sigmasq        0.13  0.40   0.00  0.82
## tausq         12.58  3.42   7.35 20.71
# xtable(table6.4, digits=2)

dput(table6.4, file=paste0(filepath, "table6.4.txt"))

7 Code for Section 6.7 on Model Choice

if (longrun) {
M1.c <- Bspatial(formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, mchoice=T)
M2.c <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5, phi=0.4, mchoice=T)
M3.c <- Bspatial(package="spBayes", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5, prior.phi=c(0.005, 2), mchoice=T, n.report=2)

M4.c <- Bspatial(package="stan", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5,phi=0.4, mchoice=T)

M5.c  <-Bspatial(package="inla",formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5, mchoice=T)

## Reproduce the estimates
a3 <- Bmchoice(case="MC.sigma2.unknown", y=ydata)
a5 <- rep(NA, 11)
a5[c(1, 3, 5, 7, 9:11)] <- unlist(M5.c$mchoice)
table6.5 <- cbind.data.frame(unlist(a3), M1.c$mchoice, M2.c$mchoice, M3.c$mchoice, M4.c$mchoice,  a5)
dput(table6.5, file = paste0(filepath, "table6.5.txt"))
} else {
  cat("Table 6.5 has been read from the disk")
}
## Table 6.5 has been read from the disk
colnames(table6.5) <- paste("M", 0:5, sep="")
round(table6.5, 2)
##              M0     M1     M2     M3     M4     M5
## pdic       2.07   4.99   4.98   5.17   4.85   4.17
## pdicalt   13.58   5.17   5.16   7.83   5.13     NA
## dic      169.20 158.36 158.06 158.68 157.84 157.23
## dicalt   192.22 158.72 158.41 163.99 158.39     NA
## pwaic1     1.82   5.20   4.93   4.88   4.42   4.73
## pwaic2     2.52   6.32   5.91   6.77   5.29     NA
## waic1    168.95 158.57 157.51 158.70 156.99 158.46
## waic2    170.35 160.82 159.47 162.48 158.72     NA
## gof      593.54 327.98 330.08 323.56 320.49 334.03
## penalty  575.80 351.52 346.73 396.63 393.92  39.17
## pmcc    1169.34 679.50 676.82 720.18 714.41 373.19
# xtable(table6.5, digits=2)

8 Code for model validation

8.1 Code for Figure 6.3


nymap <- map_data(database="state",regions="new york")
head(nymap)
##        long      lat group order   region subregion
## 1 -73.92874 40.80605     1     1 new york manhattan
## 2 -73.93448 40.78886     1     2 new york manhattan
## 3 -73.95166 40.77741     1     3 new york manhattan
## 4 -73.96312 40.75449     1     4 new york manhattan
## 5 -73.96885 40.73730     1     5 new york manhattan
## 6 -73.97458 40.72584     1     6 new york manhattan
s <- c(8,11,12,14,18,21,24,28)
fcoords <- nyspatial[-s, c("Longitude", "Latitude")]
vcoords <- nyspatial[s,  c("Longitude", "Latitude")]
library(tidyr)
label <- tibble(
  long = -76.1,
  lat = 41.5,
  label = "20 fitted (circles) & 8  \n  validation (numbered) sites"
)

vsites8 <- ggplot() +
  geom_polygon(data=nymap, aes(x=long, y=lat, group=group),
               color="black", size = 0.6, fill=NA) +
  geom_point(data =fcoords, aes(x=Longitude,y=Latitude)) +
  geom_text(data=vcoords, aes(x=Longitude,y=Latitude, label=s), col=4) +
  labs( title= "28 air pollution monitoring sites in New York", x="Longitude", y = "Latitude") +
  geom_text(aes(label=label, x=long, y=lat), data = label, vjust = "top", hjust = "right")  +
  # geom_rect(mapping=aes(xmin=-80.2, xmax=-77.3, ymin=41, ymax=41.6), color="black", fill=NA) + 
  geom_rect(mapping=aes(xmin=-78.7, xmax=-75.8, ymin=41, ymax=41.6), color="black", fill=NA) + 
  ggsn::scalebar(data =nymap, dist = 100, location = "bottomleft", transform=T, dist_unit = "km",
                 st.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84") +
  ggsn::north(data=nymap, location="topleft", symbol=12) 
vsites8


ggsave(filename=paste0(figurepath, "8_validation_sites_nydata.png"))
## Saving 7 x 5 in image
if (longrun) {
s <- c(8,11,12,14,18,21,24,28)
M1.v <- Bspatial(model="lm", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                coordtype="utm", coords=4:5,validrows=s, mchoice=F)
M2.v <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                coordtype="utm", coords=4:5,validrows=s, mchoice=F, phi=0.4)
M3.v <- Bspatial(package="spBayes",formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                coordtype="utm", coords=4:5, validrows=s, mchoice=F, 
                n.report=2, prior.phi=c(0.005, 2))
M4.v <- Bspatial(package="stan", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                coordtype="utm", coords=4:5,validrows=s, phi=0.4)
M5.v <- Bspatial(package="inla", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                 coordtype="utm", coords=4:5, validrows=s, mchoice=F)
table6.6 <- cbind(unlist(M1.v$stats), unlist(M2.v$stats), unlist(M3.v$stats), unlist(M4.v$stats), unlist(M5.v$stats))
colnames(table6.6) <- paste("M", 1:5, sep="")
dput(table6.6, file=paste0(filepath, "table6.6.txt"))
} else {
 cat("Table 6.6 has been read from the disk")  
}
## Table 6.6 has been read from the disk

round(table6.6, 3)
##           M1      M2      M3      M4      M5
## rmse   2.447   2.400   2.428   2.423   2.386
## mae    2.135   2.015   2.043   2.035   1.983
## crps   2.891   2.885   2.891   2.199   1.944
## cvg  100.000 100.000 100.000 100.000 100.000
# xtable(table6.6[-4, ], digits=3)

9 Code to choose the phi parameter in the spatial model


asave <- phichoice_sp()
##    rmse     mae    crps     cvg 
##   2.447   2.135   2.891 100.000
## ##
## # Total time taken:: 0.84  - Sec.
##               mean     sd     low     up
## (Intercept) -45.68 121.94 -286.63 195.28
## xmaxtemp      0.65   2.95   -5.19   6.48
## xwdsp         1.23   2.93   -4.56   7.01
## xrh          18.94  15.31  -11.31  49.19
## sigma2       43.73  13.83   24.44  77.57
##   rmse    mae   crps    cvg 
##  4.436  4.105  2.263 75.000
## ##
## # Total time taken:: 0.85  - Sec.
##               mean    sd     low    up
## (Intercept) -56.11 67.65 -189.78 77.56
## xmaxtemp      1.54  1.22   -0.86  3.95
## xwdsp         3.09  1.52    0.08  6.09
## xrh          12.30 11.37  -10.17 34.77
## sigma2       13.28  4.20    7.42 23.57
##    rmse     mae    crps     cvg 
##   2.913   2.593   2.742 100.000
## ##
## # Total time taken:: 0.83  - Sec.
##               mean    sd     low    up
## (Intercept) -58.34 65.42 -187.61 70.93
## xmaxtemp      1.55  1.19   -0.79  3.90
## xwdsp         3.05  1.46    0.15  5.94
## xrh          12.86 10.84   -8.56 34.28
## sigma2       13.01  4.11    7.27 23.08
##    rmse     mae    crps     cvg 
##   2.528   2.261   2.845 100.000
## ##
## # Total time taken:: 0.86  - Sec.
##               mean    sd     low    up
## (Intercept) -60.88 64.83 -188.98 67.22
## xmaxtemp      1.57  1.19   -0.77  3.91
## xwdsp         2.99  1.45    0.12  5.85
## xrh          13.48 10.65   -7.56 34.52
## sigma2       13.00  4.11    7.26 23.06
##    rmse     mae    crps     cvg 
##   2.419   2.101   2.875 100.000
## ##
## # Total time taken:: 0.81  - Sec.
##               mean    sd     low    up
## (Intercept) -62.56 64.54 -190.09 64.97
## xmaxtemp      1.58  1.18   -0.76  3.93
## xwdsp         2.95  1.44    0.10  5.80
## xrh          13.88 10.55   -6.96 34.72
## sigma2       13.01  4.11    7.27 23.08
##    rmse     mae    crps     cvg 
##   2.400   2.015   2.885 100.000
## ##
## # Total time taken:: 0.81  - Sec.
##               mean    sd     low    up
## (Intercept) -63.56 64.38 -190.77 63.66
## xmaxtemp      1.59  1.18   -0.75  3.93
## xwdsp         2.92  1.44    0.08  5.77
## xrh          14.12 10.49   -6.60 34.84
## sigma2       13.02  4.12    7.28 23.10
##    rmse     mae    crps     cvg 
##   2.407   2.018   2.888 100.000
## ##
## # Total time taken:: 0.82  - Sec.
##               mean    sd     low    up
## (Intercept) -64.14 64.29 -191.18 62.91
## xmaxtemp      1.60  1.18   -0.74  3.94
## xwdsp         2.91  1.44    0.07  5.75
## xrh          14.26 10.45   -6.40 34.92
## sigma2       13.03  4.12    7.28 23.11
##    rmse     mae    crps     cvg 
##   2.419   2.062   2.890 100.000
## ##
## # Total time taken:: 0.83  - Sec.
##               mean    sd     low    up
## (Intercept) -64.46 64.24 -191.41 62.48
## xmaxtemp      1.60  1.18   -0.74  3.94
## xwdsp         2.90  1.43    0.07  5.74
## xrh          14.34 10.43   -6.28 34.96
## sigma2       13.03  4.12    7.28 23.12
##    rmse     mae    crps     cvg 
##   2.428   2.089   2.890 100.000
## ##
## # Total time taken:: 0.83  - Sec.
##               mean    sd     low    up
## (Intercept) -64.65 64.21 -191.54 62.24
## xmaxtemp      1.60  1.18   -0.74  3.94
## xwdsp         2.90  1.43    0.06  5.73
## xrh          14.38 10.42   -6.21 34.98
## sigma2       13.03  4.12    7.28 23.12
##    rmse     mae    crps     cvg 
##   2.435   2.106   2.891 100.000
## ##
## # Total time taken:: 0.79  - Sec.
##               mean    sd     low    up
## (Intercept) -64.75 64.20 -191.61 62.11
## xmaxtemp      1.60  1.18   -0.74  3.94
## xwdsp         2.90  1.43    0.06  5.73
## xrh          14.41 10.42   -6.18 34.99
## sigma2       13.04  4.12    7.29 23.13
##    rmse     mae    crps     cvg 
##   2.439   2.117   2.891 100.000
## ##
## # Total time taken:: 0.82  - Sec.
##               mean    sd     low    up
## (Intercept) -64.81 64.19 -191.65 62.03
## xmaxtemp      1.60  1.18   -0.74  3.94
## xwdsp         2.89  1.43    0.06  5.73
## xrh          14.42 10.41   -6.16 35.00
## sigma2       13.04  4.12    7.29 23.13
##    rmse     mae    crps     cvg 
##   2.442   2.124   2.891 100.000
## ##
## # Total time taken:: 0.79  - Sec.


lmresults <- Bspatial(model="lm",formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                      coordtype="utm", coords=4:5, validrows= c(8,11,12,14,18,21,24,28))
##    rmse     mae    crps     cvg 
##   2.447   2.135   2.891 100.000
## ##
## # Total time taken:: 0.81  - Sec.
phi1res <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                    coordtype="utm", coords=4:5,validrows= c(8,11,12,14,18,21,24,28))
##               mean     sd     low     up
## (Intercept) -45.68 121.94 -286.63 195.28
## xmaxtemp      0.65   2.95   -5.19   6.48
## xwdsp         1.23   2.93   -4.56   7.01
## xrh          18.94  15.31  -11.31  49.19
## sigma2       43.73  13.83   24.44  77.57
##   rmse    mae   crps    cvg 
##  4.436  4.105  2.263 75.000
## ##
## # Total time taken:: 0.82  - Sec.
phi2res <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                    coordtype="utm", coords=4:5,validrows= c(8,11,12,14,18,21,24,28), phi=2)
##               mean    sd     low    up
## (Intercept) -64.88 64.18 -191.70 61.93
## xmaxtemp      1.60  1.18   -0.74  3.94
## xwdsp         2.89  1.43    0.06  5.72
## xrh          14.44 10.41   -6.13 35.01
## sigma2       13.04  4.12    7.29 23.13
##    rmse     mae    crps     cvg 
##   2.447   2.135   2.891 100.000
## ##
## # Total time taken:: 0.79  - Sec.
phihalfres <-  Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
                        coordtype="utm", coords=4:5,validrows= c(8,11,12,14,18,21,24,28), phi=0.5)
##               mean    sd     low    up
## (Intercept) -63.56 64.38 -190.77 63.66
## xmaxtemp      1.59  1.18   -0.75  3.93
## xwdsp         2.92  1.44    0.08  5.77
## xrh          14.12 10.49   -6.60 34.84
## sigma2       13.02  4.12    7.28 23.10
##    rmse     mae    crps     cvg 
##   2.407   2.018   2.888 100.000
## ##
## # Total time taken:: 0.8  - Sec.
validation_results <- cbind(unlist(lmresults$stats), unlist(phihalfres$stats),
                            unlist(phi1res$stats), unlist(phi2res$stats))
dimnames(validation_results)[[2]] <- c("lm", "phi=0.5", "phi=1", "phi=2")
round(validation_results, 3)
##           lm phi=0.5  phi=1   phi=2
## rmse   2.447   2.407  4.436   2.447
## mae    2.135   2.018  4.105   2.135
## crps   2.891   2.888  2.263   2.891
## cvg  100.000 100.000 75.000 100.000
# xtable(validation_results, digits=3)

10 Code to reproduce Table 6.7 and Figure 6.4

## K fold cross-validation for M2 only
set.seed(44)
x <- runif(n=28)
u <- order(x)
# Here are the four folds
s1 <- u[1:7]
s2 <- u[8:14]
s3 <- u[15:21]
s4 <- u[22:28]
summary((1:28) - sort(c(s1, s2, s3, s4))) ## check
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0

v1 <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5,validrows= s1, phi=0.4, mchoice=F)
##               mean    sd     low    up
## (Intercept) -82.15 65.60 -211.74 47.43
## xmaxtemp      2.23  1.28   -0.31  4.76
## xwdsp         2.43  1.59   -0.71  5.56
## xrh          15.04 10.14   -5.00 35.08
## sigma2       12.69  3.92    7.18 22.24
##    rmse     mae    crps     cvg 
##   2.441   1.789   2.085 100.000
## ##
## # Total time taken:: 0.72  - Sec.
v2 <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5,validrows= s2,  phi=0.4, mchoice=F)
##              mean    sd     low     up
## (Intercept) 58.16 82.58 -104.97 221.29
## xmaxtemp     0.11  1.43   -2.72   2.94
## xwdsp        4.67  1.34    2.02   7.31
## xrh         -9.87 13.26  -36.06  16.31
## sigma2      10.42  3.22    5.90  18.27
##   rmse    mae   crps    cvg 
##  5.005  3.545  2.077 85.714
## ##
## # Total time taken:: 0.74  - Sec.

v4 <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5,validrows= s4,  phi=0.4, mchoice=F)
##               mean    sd     low    up
## (Intercept) -52.56 60.81 -172.69 67.58
## xmaxtemp      1.56  1.14   -0.70  3.83
## xwdsp         3.43  1.57    0.33  6.53
## xrh          10.55 10.03   -9.25 30.36
## sigma2       12.46  3.85    7.05 21.85
##    rmse     mae    crps     cvg 
##   2.508   2.145   2.072 100.000
## ##
## # Total time taken:: 0.74  - Sec.
v3 <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, 
               coordtype="utm", coords=4:5, validrows= s3, phi=0.4, mchoice=F)
##               mean    sd     low     up
## (Intercept) -87.61 38.21 -163.10 -12.12
## xmaxtemp      1.76  0.73    0.32   3.21
## xwdsp         3.10  0.84    1.44   4.76
## xrh          19.09  5.99    7.25  30.92
## sigma2        4.65  1.43    2.63   8.15
##   rmse    mae   crps    cvg 
##  5.865  5.462  1.228 57.143
## ##
## # Total time taken:: 0.72  - Sec.

ggsave(file=paste0(figurepath, "fold3_obs_v_pred_nydata.png"))
## Saving 7 x 5 in image

table6.7 <- cbind(unlist(v1$stats), unlist(v2$stats), unlist(v3$stats), unlist(v4$stats))

dimnames(table6.7)[[2]] <- paste("Fold", 1:4, sep="")
round(table6.7, 3)
##        Fold1  Fold2  Fold3   Fold4
## rmse   2.441  5.005  5.865   2.508
## mae    1.789  3.545  5.462   2.145
## crps   2.085  2.077  1.228   2.072
## cvg  100.000 85.714 57.143 100.000
# xtable(table6.7, digits=3)
dput(table6.7, file=paste0(filepath, "table6.7.txt"))

## Use v3 to demonstrate posterior predictive checks
library(bayesplot)
## This is bayesplot version 1.8.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
color_scheme_set("brightblue")

y1 <- v2$yobs_preds$yo3
dim(v2$valpreds)
## [1] 5000    7
ypred <- v2$valpreds

ppc_dens_overlay(y1, ypred)

if (longrun) ggsave(filename = paste0(figurepath, "ppc_sp.png"))

ppc_stat(y1, ypred, stat = "max", binwidth = 0.5)

# ppc_scatter(y1, ypred)
# available_ppc()

if (longrun) ggsave(filename = paste0(figurepath, "ppc_sp_max.png"))

# All done 
end.time <- proc.time()[3]
comp.time<- (end.time-start.time)/60
# comp.time<-fancy.time(comp.time)
print(comp.time)
##  elapsed 
## 1.293167