Chapter 9: Bayesian forecasting for point referenced data

Sujit K. Sahu

University of Southampton

2022-03-30

Abstract

This file contains all the code for reproducing the figures in Chapter 9.

## Note the start time
start.time<-proc.time()[3]
# Download mapfiles and data files from https://github.com/sujit-sahu/bookbmstdr
# Set the following paths 
figurepath <- "figures/chap9figures/" # Folder to save or read the figures from
filepath <- "txttables/" # Folder to save or read the tables from 
mappath <- "mapfiles" # Contains the map files 
dpath <- "datafiles/" # Contains the data files 
longrun <- FALSE  # Should we run the MCMC models to check the results. 
# If FALSE we use the results, e.g. tables, figures and model fits from previous runs
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated

1 Code to reproduce Figure 9.1


u <- read.table(paste0(dpath, "C9newminimummses.txt"), head=T)
u$hour <- u$hour +1
plot(u$hour, sqrt(u$cmaqmse), pch="+", las=1, cex=1.5, ylim=range(30, 2, sqrt(c(u$cmaqmse, u$modmse))),  ylab="RMSE", xlab="Hour of Day (EDT)", col=6)
points(u$hour, sqrt(u$modmse), pch="*", cex=1.5, col=2)
points(u$hour, sqrt(u$linfitmse), pch="a", cex=1, col=4)


k <- nrow(u)
unew <- data.frame(phisw=rep(u$phis, 3), phitw= rep(u$phitw, 3),
                   rmse=c(rep("CMAQ", k), rep("Linear", k), rep("Bayes", k)),  
                   vcrit=sqrt(c(u$cmaqmse, u$linfitmse, u$modmse)), 
                   hour=rep(u$hour, 3), day=rep(u$day, 3))
unew$rmse <- factor(unew$rmse, levels=c("CMAQ", "Linear", "Bayes"))
unew$day <- as.factor(unew$day)
unew$hour <- as.factor(unew$hour)
# aphi$Criteria <- factor(aphi$Criteria, levels=rev(levels(aphi$Criteria)))


p <- ggplot() + 
  geom_point(data=unew, aes(x=hour, y=vcrit, group=rmse, color=rmse, shape=rmse)) +
  theme(legend.position=c(0.87, 0.87))  +
  scale_x_discrete(name="Hour of day", breaks=seq(from=6, to =16, by=2), 
                   labels=c("6AM", "8AM", "10AM", "Noon", "2PM", "4PM")) + 
  scale_y_continuous(name="Root mean square error")
p


ggsave(filename = paste0(figurepath, "easrernusforecastrmse.png"))
## Saving 7 x 5 in image

2 Figures in Section 9.4 for nysptiime data set

if (longrun){
f2 <- y8hrmax~xmaxtemp+xwdsp+xrh
vs <- getvalidrows(sn=28, tn=62, valids=1:28, validt=61:62)
length(vs)

M4 <- Bsptime(package="stan",formula=f2, data=nysptime, validrows=vs,
                coordtype="utm", coords=4:5, scale.transform = "SQRT",
                N=1500, burn.in=500, mchoice=T,
                verbose = F)
summary(M4)
dfit <- nysptime[-vs, ]
dfore <- nysptime[vs, ]

M3new <- Bsptime(package="spTimer", formula=f2, data=dfit,  coordtype="utm",
              coords=4:5, scale.transform = "SQRT", N=5000, n.report = 1)
summary(M3new)

nfore <- predict(M3new$fit,newdata=dfore, newcoords=~Longitude+Latitude,
                   type="temporal", foreStep=2, tol=0.05)

res1 <- calculate_validation_statistics(yval=dfore$y8hrmax,
                            yits=nfore$fore.samples, summarystat = "median")
# unlist(res1$stats)
# unlist(M4$stats)

a <- data.frame(sptimer=unlist(res1$stats), stan=unlist(M4$stats))
a

yobs <- dfore$y8hrmax


stanpreds <- M4$valpreds
dim(stanpreds)
sptpreds <- nfore$fore.samples
dim(sptpreds)

psumspt <- get_validation_summaries(t(sptpreds))
psumstan <- get_validation_summaries(t(stanpreds))
# cbind(yobs, psumspt, psumstan)

spthitfa65 <- hitandfalsealarm(thresh=65, yobs=yobs, ypred=psumspt$medianpred)
spthitfa75 <- hitandfalsealarm(thresh=75, yobs=yobs, ypred=psumspt$medianpred)
# spthitfa65
# spthitfa75

stanhitfa65 <- hitandfalsealarm(thresh=65, yobs=yobs, ypred=psumstan$medianpred)
stanhitfa75 <- hitandfalsealarm(thresh=75, yobs=yobs, ypred=psumstan$medianpred)
# stanhitfa65
# stanhitfa75

#

ints50 <- get_parameter_estimates(t(sptpreds), level=50)
ints90 <- get_parameter_estimates(t(sptpreds), level=90)
ints50lenspt <- ints50$up - ints50$low
ints90lenspt <- ints90$up - ints90$low

ints50 <- get_parameter_estimates(t(stanpreds), level=50)
ints90 <- get_parameter_estimates(t(stanpreds), level=90)
ints50lenstan <- ints50$up - ints50$low
ints90lenstan <- ints90$up - ints90$low

k <- length(ints50lenspt)
ids <- rep(c("spTimer50", "spTimer90", "stan50", "stan90"), each=k)
package <- rep(c("spTimer", "spTimer", "stan", "stan"), each=k)
lendat <- data.frame(lenths = c(ints50lenspt, ints90lenspt, ints50lenstan, ints90lenstan),
                     ids = factor(ids), package=factor(package))
head(lendat)
# lendat

findprop <- function(x) {
  value <- x[1]
  samples <- x[-1]
  prop <- length(samples[samples<value])/length(samples)
  prop
}

x <- cbind(yobs, sptpreds)
dim(x)
y <- na.omit(x)
dim(y)
findprop((y[1, ]))
sptpitvals <- apply(y, 1, findprop)

x <- cbind(yobs, stanpreds)
dim(x)
y <- na.omit(x)
dim(y)
findprop((y[1, ]))
stanpitvals <- apply(y, 1, findprop)
k <- length(stanpitvals)

pitvals <- data.frame(pit=c(sptpitvals, stanpitvals), package=rep(c("spTimer", "Stan"), each=k))



fbarhat <- function(yobs, ysample) {
  # yobs m by 1
  # ysample N by m

  x <- cbind(yobs, t(ysample))
  dim(x)
  y <- na.omit(x)
  dim(y)
  x <- y[, -1]
  dim(x)
  length(yk)
  N <- ncol(x)
  N
  # x is the matrix of MCMC  samples
  m <- nrow(x)
  amat <- matrix(NA, nrow=m, ncol=K)
  for (i in 1:m) {
    ynewmat <- matrix(x[i, ], nrow=N, ncol=K, byrow=TRUE)
    dim(yomat)
    u <- rbind(yk, ynewmat)
    dim(u)
    v <- apply(u, 2, findprop)
    amat[i, ] <- as.vector(v)
  }
  Fbarhat <- apply(amat, 2, mean)
  # dans <- Fbarhat - Ghat
  Fbarhat
}

K <- 100
ygood <- na.omit(yobs)
summary(ygood)
yk <- seq(from=min(ygood), to = max(ygood), length=K)
yomat <- matrix(ygood, nrow=K, ncol=length(ygood), byrow=TRUE)
dim(yomat)
u <- cbind(yk, yomat)
dim(u)
Ghat <- apply(u, 1, findprop)
length(Ghat)
summary(Ghat)


dansspt <- fbarhat(yobs, t(sptpreds)) - Ghat
summary(dansspt)


dansstan <- fbarhat(yobs, t(stanpreds)) - Ghat
summary(dansstan)

package <- factor(rep(c("spTimer", "Stan"),  each=K))
ddat <- data.frame(dans=c(dansspt, dansstan), yk = rep(yk, 2), package=package)
head(ddat)
summary(ddat)
save(lendat, pitvals, ddat, file="forecast/nysptimeresults.RData")
} else{
load(file="forecast/nysptimeresults.RData")
}

sharpdiag <- ggplot(data=lendat,  aes(x=ids, y=lenths, col=package)) +
  geom_boxplot() +
  labs(x = "", y = "", title="Sharpness diagram")
sharpdiag


# Figure 9.2
ggsave(filename = paste0(figurepath, "ny_spt_stan_sharpdiagram.png"))


pitdiag <- ggplot(data=pitvals,  aes(pit, colour=package, fill=package)) +
  geom_histogram() +
  facet_wrap(~ package,  ncol = 2) +
  labs(x = "", y = "")  +
  scale_colour_manual(values = c("spTimer" = "red",
                                 "Stan" = "blue"))
pitdiag


# Figure 9.3
ggsave(filename = paste0(figurepath, "ny_sptimer_Stan_pitdiagram.png"))


mcpdiag <- ggplot(data=ddat,  aes(x=yk, y=dans, colour=package, linetype=package)) +
  geom_line() +
  labs(x = "", y = "")  +
  geom_abline(slope=0, intercept = 0, size=1, col="black")+
  scale_colour_manual(values = c("spTimer" = "red",
                                 "Stan" = "blue"))

mcpdiag 

ggsave(filename =paste0(figurepath, "ny_sptstanmcp_diagram.png"))

3 Figures and tables in Section 9.5



load(file=paste0(dpath, "C94states.RData"))
cmaqsites <- unique(cmaqdata4[, 1:4])


delta <- 0.5
a <- c(min(data4$long)-1.5*delta, max(data4$long)+delta)
b <- c(min(data4$lat)-delta, max(data4$lat)+1.5*delta)
a
## [1] -92.0858 -80.0725
b
## [1] 36.1081 43.2169
knots.coords <- spT.grid.coords(Lon=a, Lat=b, by=c(15, 15))
dim(knots.coords)
## [1] 225   2
knots <- data.frame(s.index=1:nrow(knots.coords), long=knots.coords[,1], lat=knots.coords[,2])
head(knots)
##   s.index     long      lat
## 1       1 -92.0858 36.10810
## 2       2 -92.0858 36.61587
## 3       3 -92.0858 37.12364
## 4       4 -92.0858 37.63141
## 5       5 -92.0858 38.13919
## 6       6 -92.0858 38.64696

a <- maps::map.where(database = "state", x =knots$long, y = knots$lat)
b <- data.frame(knots, states=a)
head(b)
##   s.index     long      lat   states
## 1       1 -92.0858 36.10810 arkansas
## 2       2 -92.0858 36.61587 missouri
## 3       3 -92.0858 37.12364 missouri
## 4       4 -92.0858 37.63141 missouri
## 5       5 -92.0858 38.13919 missouri
## 6       6 -92.0858 38.64696 missouri
d <- data.frame(states=c("ohio", "indiana", "illinois", "kentucky"))
knots4 <- merge(b, d)
dim(knots4)
## [1] 108   4
head(knots4)
##     states s.index      long      lat
## 1 illinois      70 -88.65343 40.67804
## 2 illinois      38 -90.36961 39.66250
## 3 illinois      67 -88.65343 39.15473
## 4 illinois      68 -88.65343 39.66250
## 5 illinois      69 -88.65343 40.17027
## 6 illinois      71 -88.65343 41.18581
knots4 <- knots4[, c(2, 1, 3, 4)]
head(data4)
##    s.index   states     long     lat year month day obsO8hrmax cmaqO8hrmax
## 9       56 illinois -91.3358 39.9175 2010     7   1   54.28571    54.65538
## 13      56 illinois -91.3358 39.9175 2010     7   2   78.71429    78.54122
## 7       56 illinois -91.3358 39.9175 2010     7   3   77.00000    70.05393
## 12      56 illinois -91.3358 39.9175 2010     7   4   62.85714    47.91287
## 14      56 illinois -91.3358 39.9175 2010     7   5   47.25000    56.27459
## 8       56 illinois -91.3358 39.9175 2010     7   6   35.71429    45.65875
fourstates <- unique(data4[, 1:4])
dim(fourstates)
## [1] 156   4
map4states <- map_data(database="state",regions=name4)
fourstates$type <- "Data"
knots4$type <- "Knots"
cmaqsites$type <- "Prediction"
head(fourstates)
##    s.index   states     long     lat type
## 9       56 illinois -91.3358 39.9175 Data
## 18      78 illinois -90.5172 41.5144 Data
## 41      84 illinois -90.3286 39.1097 Data
## 48      92 illinois -90.1603 38.6122 Data
## 61      93 illinois -90.1481 38.8903 Data
## 82      95 illinois -90.1058 38.8606 Data
head(cmaqsites)
##    s.index   states      long      lat       type
## 12    7225 kentucky -89.25056 36.52489 Prediction
## 17    7365 kentucky -88.70091 36.59464 Prediction
## 34    7505 kentucky -88.28477 36.67218 Prediction
## 54    7645 kentucky -87.59857 36.72611 Prediction
## 69    7771 kentucky -88.93400 36.93809 Prediction
## 84    7785 kentucky -87.04594 36.78782 Prediction
head(knots4)
##   s.index   states      long      lat  type
## 1      70 illinois -88.65343 40.67804 Knots
## 2      38 illinois -90.36961 39.66250 Knots
## 3      67 illinois -88.65343 39.15473 Knots
## 4      68 illinois -88.65343 39.66250 Knots
## 5      69 illinois -88.65343 40.17027 Knots
## 6      71 illinois -88.65343 41.18581 Knots
allsites <- rbind(fourstates, knots4, cmaqsites)
allsites <- rbind(fourstates, knots4)
head(allsites)
##    s.index   states     long     lat type
## 9       56 illinois -91.3358 39.9175 Data
## 18      78 illinois -90.5172 41.5144 Data
## 41      84 illinois -90.3286 39.1097 Data
## 48      92 illinois -90.1603 38.6122 Data
## 61      93 illinois -90.1481 38.8903 Data
## 82      95 illinois -90.1058 38.8606 Data
p4 <-  ggplot(data=map4states, aes(x=long, y=lat, group = group)) +
  geom_polygon(colour='black',size=0.8, fill=NA) +
  # geom_point(data=fourstates, aes(x=long, y = lat),  inherit.aes = FALSE, col="Red",  size=1.2) +
  # geom_point(data=cmaqsites, aes(x=long, y = lat),  inherit.aes = FALSE, col="blue",  size=0.5) +
  # geom_point(data=knots4, aes(x=long, y = lat),  inherit.aes = FALSE, col="green",  size=1) +
  geom_point(data=allsites, aes(x=long, y = lat, col=type, shape=type),  inherit.aes = FALSE, size=1.5) +
    labs( title= "156 data and 108 knot locations", x="", y = "") +
  theme(legend.position =c(0.92, 0.2))  +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ggsn::north(data=map4states, location="bottomleft", symbol=12) +
  ggsn::scalebar(data =map4states, dist =100, location ="topright", transform=T, dist_unit = "km",
                 st.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84")
p4

ggsave(filename=paste0(figurepath, "4statesmap.png"))
## Saving 7 x 5 in image

if (longrun) { 
head(data4)
summary(data4)
f2 <- obsO8hrmax~ sqrt(cmaqO8hrmax)
M0 <- Bsptime(package="spTimer", model="GP",  formula=f2, data=data4,
             coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=2)

summary(M0)
# plot(M0)
# head(data4)

d1 <- data4[data4$day<8, ]
vd1 <-  data4[data4$day==8, ]
dim(vd1)
d2 <- data4[(data4$day>1) & (data4$day<9), ]
table(d2$day)
vd2 <- data4[data4$day==9, ]
dim(vd2)

d3 <- data4[(data4$day>2) & (data4$day<10), ]
table(d3$day)
vd3 <- data4[data4$day==10, ]
dim(vd3)


d4 <- data4[(data4$day>3) & (data4$day<11), ]
table(d4$day)
vd4 <- data4[data4$day==11, ]
dim(vd4)


d5 <- data4[(data4$day>4) & (data4$day<12), ]
table(d5$day)
vd5 <- data4[data4$day==12, ]
dim(vd5)

d6 <- data4[(data4$day>5) & (data4$day<13), ]
table(d6$day)
vd6 <- data4[data4$day==13, ]
dim(vd6)


d7 <- data4[(data4$day>6) & (data4$day<14),  ]
table(d7$day)
vd7 <- data4[data4$day==14, ]
dim(vd7)

# save(fourstates, data4, name4, cmaqdata4, cmaqsites, allsites,  knots4,
#     eaststates, d1, d2, d3, d4, d5, d6, d7, vd1, vd2, vd3, vd4, vd4, vd5, vd6, vd7,
#     file=paste0(dpath, "4states.RData"))

f2 <- obsO8hrmax~ sqrt(cmaqO8hrmax)

M1 <- Bsptime(package="spTimer", model="GP", formula=f2, data=d1,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M2 <- Bsptime(package="spTimer", model="GP", formula=f2, data=d2,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M3 <- Bsptime(package="spTimer", model="GP", formula=f2, data=d3,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M4 <- Bsptime(package="spTimer", model="GP", formula=f2, data=d4,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M5 <- Bsptime(package="spTimer", model="GP", formula=f2, data=d5,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M6 <- Bsptime(package="spTimer", model="GP", formula=f2, data=d6,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M7 <- Bsptime(package="spTimer", model="GP", formula=f2, data=d7,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)

gpf1 <- predict(M1$fit,newdata=vd1, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
gpf2 <- predict(M2$fit,newdata=vd2, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
gpf3 <- predict(M3$fit,newdata=vd3, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
gpf4 <- predict(M4$fit,newdata=vd4, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
gpf5 <- predict(M5$fit,newdata=vd5, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
gpf6 <- predict(M6$fit,newdata=vd6, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
gpf7 <- predict(M7$fit,newdata=vd7, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)

ugp1 <- calculate_validation_statistics(vd1$obsO8hrmax, yits=gpf1$fore.samples, summarystat = "median")
ugp2 <- calculate_validation_statistics(vd2$obsO8hrmax, yits=gpf2$fore.samples, summarystat = "median")
ugp3 <- calculate_validation_statistics(vd3$obsO8hrmax, yits=gpf3$fore.samples, summarystat = "median")
ugp4 <- calculate_validation_statistics(vd4$obsO8hrmax, yits=gpf4$fore.samples, summarystat = "median")
ugp5 <- calculate_validation_statistics(vd5$obsO8hrmax, yits=gpf5$fore.samples, summarystat = "median")
ugp6 <- calculate_validation_statistics(vd6$obsO8hrmax, yits=gpf6$fore.samples, summarystat = "median")
ugp7 <- calculate_validation_statistics(vd7$obsO8hrmax, yits=gpf7$fore.samples, summarystat = "median")

allugp <- rbind(unlist(ugp1$stats), unlist(ugp2$stats),  unlist(ugp3$stats),
                unlist(ugp4$stats), unlist(ugp5$stats), unlist(ugp6$stats), unlist(ugp7$stats))
# allugp
allugp <- data.frame(allugp, model="GP")
allugp
M1gpp <- Bsptime(package="spTimer", model="GPP", formula=f2, data=d1,
              coordtype="lonlat", knots.coords = knots4[, 3:4],  coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M2gpp <- Bsptime(package="spTimer", model="GPP", formula=f2, data=d2,
              coordtype="lonlat",knots.coords = knots4[, 3:4],   coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M3gpp <- Bsptime(package="spTimer", model="GPP", formula=f2, data=d3,
              coordtype="lonlat",knots.coords = knots4[, 3:4],   coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M4gpp <- Bsptime(package="spTimer", model="GPP", formula=f2, data=d4,
              coordtype="lonlat", knots.coords = knots4[, 3:4],  coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M5gpp <- Bsptime(package="spTimer", model="GPP", formula=f2, data=d5,
              coordtype="lonlat",knots.coords = knots4[, 3:4],   coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M6gpp <- Bsptime(package="spTimer", model="GPP", formula=f2, data=d6,
              coordtype="lonlat", knots.coords = knots4[, 3:4],  coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M7gpp <- Bsptime(package="spTimer", model="GPP", formula=f2, data=d7,
              coordtype="lonlat",knots.coords = knots4[, 3:4],   coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)



fore1 <- predict(M1gpp$fit,newdata=vd1, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore2 <- predict(M2gpp$fit,newdata=vd2, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore3 <- predict(M3gpp$fit,newdata=vd3, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore4 <- predict(M4gpp$fit,newdata=vd4, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore5 <- predict(M5gpp$fit,newdata=vd5, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore6 <- predict(M6gpp$fit,newdata=vd6, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore7 <- predict(M7gpp$fit,newdata=vd7, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)

u1 <- calculate_validation_statistics(vd1$obsO8hrmax, yits=fore1$fore.samples, summarystat = "median")
u2 <- calculate_validation_statistics(vd2$obsO8hrmax, yits=fore2$fore.samples, summarystat = "median")
u3 <- calculate_validation_statistics(vd3$obsO8hrmax, yits=fore3$fore.samples, summarystat = "median")
u4 <- calculate_validation_statistics(vd4$obsO8hrmax, yits=fore4$fore.samples, summarystat = "median")
u5 <- calculate_validation_statistics(vd5$obsO8hrmax, yits=fore5$fore.samples, summarystat = "median")
u6 <- calculate_validation_statistics(vd6$obsO8hrmax, yits=fore6$fore.samples, summarystat = "median")
u7 <- calculate_validation_statistics(vd7$obsO8hrmax, yits=fore7$fore.samples, summarystat = "median")

agpp <- rbind(unlist(u1$stats), unlist(u2$stats),  unlist(u3$stats), unlist(u4$stats),
              unlist(u5$stats), unlist(u6$stats), unlist(u7$stats))
agpp <- data.frame(agpp, model="GPP")
# agpp
# Fit AR models
M1ar <- Bsptime(package="spTimer", model="AR", formula=f2, data=d1,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M2ar <- Bsptime(package="spTimer", model="AR", formula=f2, data=d2,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M3ar <- Bsptime(package="spTimer", model="AR", formula=f2, data=d3,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M4ar <- Bsptime(package="spTimer", model="AR", formula=f2, data=d4,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M5ar <- Bsptime(package="spTimer", model="AR", formula=f2, data=d5,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M6ar <- Bsptime(package="spTimer", model="AR", formula=f2, data=d6,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)
M7ar <- Bsptime(package="spTimer", model="AR", formula=f2, data=d7,
              coordtype="lonlat", coords=3:4, scale.transform = "SQRT", n.report=1, N=5000)

# Forecast using AR models

arf1 <- predict(M1ar$fit,newdata=vd1, newcoords=~long+lat,
                type="temporal", foreStep=1, tol=0.005)
arf2 <- predict(M2ar$fit,newdata=vd2, newcoords=~long+lat,
                type="temporal", foreStep=1, tol=0.005)
arf3 <- predict(M3ar$fit,newdata=vd3, newcoords=~long+lat,
                type="temporal", foreStep=1, tol=0.005)
arf4 <- predict(M4ar$fit,newdata=vd4, newcoords=~long+lat,
                type="temporal", foreStep=1, tol=0.005)
arf5 <- predict(M5ar$fit,newdata=vd5, newcoords=~long+lat,
                type="temporal", foreStep=1, tol=0.005)
arf6 <- predict(M6ar$fit,newdata=vd6, newcoords=~long+lat,
                type="temporal", foreStep=1, tol=0.005)
arf7 <- predict(M7ar$fit,newdata=vd7, newcoords=~long+lat,
                type="temporal", foreStep=1, tol=0.005)

# Calculate validation statistics
uar1 <- calculate_validation_statistics(vd1$obsO8hrmax, yits=arf1$fore.samples, summarystat = "median")
uar2 <- calculate_validation_statistics(vd2$obsO8hrmax, yits=arf2$fore.samples, summarystat = "median")
uar3 <- calculate_validation_statistics(vd3$obsO8hrmax, yits=arf3$fore.samples, summarystat = "median")
uar4 <- calculate_validation_statistics(vd4$obsO8hrmax, yits=arf4$fore.samples, summarystat = "median")
uar5 <- calculate_validation_statistics(vd5$obsO8hrmax, yits=arf5$fore.samples, summarystat = "median")
uar6 <- calculate_validation_statistics(vd6$obsO8hrmax, yits=arf6$fore.samples, summarystat = "median")
uar7 <- calculate_validation_statistics(vd7$obsO8hrmax, yits=arf7$fore.samples, summarystat = "median")

alluar <- rbind(unlist(uar1$stats), unlist(uar2$stats),  unlist(uar3$stats),
                unlist(uar4$stats), unlist(uar5$stats), unlist(uar6$stats), unlist(uar7$stats))
dim(alluar)
alluar <- data.frame(alluar, model="AR")

dim(alluar)
colnames(alluar)

allstats <- rbind(allugp, alluar, agpp)
allstats$days <- rep(8:14, 3)
allstats$criteria <- c("rmse")
allstats$lt2 <- c("mae")
allstats$lt3 <- c("crps")
# allstats



pcomp <- ggplot() +
  geom_line(data=allstats, aes(x=days, y=rmse,  color=model, linetype=criteria)) +
  geom_line(data=allstats, aes(x=days, y=mae,  color=model, linetype=lt2)) +
  geom_line(data=allstats, aes(x=days, y=crps,  color=model, linetype=lt3))+
  geom_point(data=allstats, aes(x=days, y=rmse,  color=model, shape=model)) +
  geom_point(data=allstats, aes(x=days, y=mae,  color=model, shape=model)) +
  geom_point(data=allstats, aes(x=days, y=crps,  color=model, shape=model))+
  labs(x="Forecast day", y="Criteria value") # +
  #facet_wrap(~ ,  ncol = 1) +
pcomp
# Not used 


dim(allstats)
head(allstats)
values <- c(allstats$rmse, allstats$mae, allstats$crps, allstats$cvg)
length(values)
model <- rep(allstats$model, 4)
days <- rep(allstats$days, 4)
criteria <- c(allstats$criteria, allstats$lt2, allstats$lt3, rep("Coverage", 21))
length(criteria)
length(days)
length(model)
allbig <- data.frame(values=values, model=model, days=days, criteria=criteria)
dim(allbig)
head(allbig)

pnewcomp <- ggplot() +
  geom_line(data=allbig, aes(x=days, y=values,  color=model, linetype=criteria)) +
  geom_point(data=allbig, aes(x=days, y=values,  color=model, shape=model)) +
  facet_wrap(~ criteria, ncol= 2, scales = "free")
  pnewcomp

ggsave(filename=paste0(figurepath, "fourstates_rmse_mae.png"))



# Calculate Hit and False Alarm Rates

names(arf1)
ans <- hitandfalsealarm(65, yobs = vd1$obsO8hrmax,  ypred=arf1$Median)
ans
ans <- hitandfalsealarm(75, yobs = vd1$obsO8hrmax,  ypred=arf1$Median)
ans


yobs <- c(vd1$obsO8hrmax, vd2$obsO8hrmax, vd3$obsO8hrmax, vd4$obsO8hrmax,
          vd5$obsO8hrmax, vd6$obsO8hrmax, vd7$obsO8hrmax)
yarpred <- c(arf1$Median, arf2$Median,arf3$Median, arf4$Median,arf5$Median,
             arf6$Median, arf7$Median)

ansar65 <- hitandfalsealarm(65, yobs =yobs,  ypred=yarpred)
ansar75 <- hitandfalsealarm(75, yobs =yobs,  ypred=yarpred)

ansar65
ansar75

ygp_pred <- c(gpf1$Median,gpf2$Median, gpf3$Median,gpf4$Median,gpf5$Median,gpf6$Median,gpf7$Median)

ansgp65 <- hitandfalsealarm(65, yobs =yobs,  ypred=ygp_pred)
ansgp75 <- hitandfalsealarm(75, yobs =yobs,  ypred=ygp_pred)

ansgp65
ansgp75

ygpp_pred <- c(fore1$Median, fore2$Median, fore3$Median, fore4$Median,
               fore5$Median, fore6$Median,fore7$Median)

ansgpp65 <- hitandfalsealarm(65, yobs =yobs,  ypred=ygpp_pred)
ansgpp75 <- hitandfalsealarm(75, yobs =yobs,  ypred=ygpp_pred)
ansgpp65
ansgpp75

a1 <- c(ansgp65$hitrate,  ansar65$hitrate, ansgpp65$hitrate,
        ansgp75$hitrate, ansar75$hitrate, ansgpp75$hitrate)

a2 <- c(ansgp65$falsealarm,  ansar65$falsealarm, ansgpp65$falsealarm,
        ansgp75$falsealarm, ansar75$falsealarm, ansgpp75$falsealarm)
u <- rbind(a1, a2)
table9.1 <- u 
# library(xtable)
# xtable(table9.1, digits=2)
dput(table9.1, file=paste0(filepath, "table9.1.txt"))

ints50gp1 <- get_parameter_estimates(t(gpf1$fore.samples), level=50)
ints50gp2 <- get_parameter_estimates(t(gpf2$fore.samples), level=50)
ints50gp3 <- get_parameter_estimates(t(gpf3$fore.samples), level=50)
ints50gp4 <- get_parameter_estimates(t(gpf4$fore.samples), level=50)
ints50gp5 <- get_parameter_estimates(t(gpf5$fore.samples), level=50)
ints50gp6 <- get_parameter_estimates(t(gpf6$fore.samples), level=50)
ints50gp7 <- get_parameter_estimates(t(gpf7$fore.samples), level=50)

ints90gp1 <- get_parameter_estimates(t(gpf1$fore.samples), level=90)
ints90gp2 <- get_parameter_estimates(t(gpf2$fore.samples), level=90)
ints90gp3 <- get_parameter_estimates(t(gpf3$fore.samples),  level=90)
ints90gp4 <- get_parameter_estimates(t(gpf4$fore.samples), level=90)
ints90gp5 <- get_parameter_estimates(t(gpf5$fore.samples),  level=90)
ints90gp6 <- get_parameter_estimates(t(gpf6$fore.samples), level=90)
ints90gp7 <- get_parameter_estimates(t(gpf7$fore.samples), level=90)

gp50ints <- rbind(ints50gp1,ints50gp2, ints50gp3, ints50gp4, ints50gp5, ints50gp6, ints50gp7)
gp90ints <- rbind(ints90gp1,ints90gp2, ints90gp3, ints90gp4, ints90gp5, ints90gp6, ints90gp7)

ints50ar1 <- get_parameter_estimates(t(arf1$fore.samples),  level=50)
ints50ar2 <- get_parameter_estimates(t(arf2$fore.samples), level=50)
ints50ar3 <- get_parameter_estimates(t(arf3$fore.samples),  level=50)
ints50ar4 <- get_parameter_estimates(t(arf4$fore.samples),  level=50)
ints50ar5 <- get_parameter_estimates(t(arf5$fore.samples),  level=50)
ints50ar6 <- get_parameter_estimates(t(arf6$fore.samples),  level=50)
ints50ar7 <- get_parameter_estimates(t(arf7$fore.samples), level=50)

ints90ar1 <- get_parameter_estimates(t(arf1$fore.samples), level=90)
ints90ar2 <- get_parameter_estimates(t(arf2$fore.samples),  level=90)
ints90ar3 <- get_parameter_estimates(t(arf3$fore.samples),  level=90)
ints90ar4 <- get_parameter_estimates(t(arf4$fore.samples),  level=90)
ints90ar5 <- get_parameter_estimates(t(arf5$fore.samples), level=90)
ints90ar6 <- get_parameter_estimates(t(arf6$fore.samples), level=90)
ints90ar7 <- get_parameter_estimates(t(arf7$fore.samples), level=90)

ar50ints <- rbind(ints50ar1,ints50ar2, ints50ar3, ints50ar4, ints50ar5, ints50ar6, ints50ar7)
ar90ints <- rbind(ints90ar1,ints90ar2, ints90ar3, ints90ar4, ints90ar5, ints90ar6, ints90ar7)


ints50gpp1 <- get_parameter_estimates(t(fore1$fore.samples), level=50)
ints50gpp2 <- get_parameter_estimates(t(fore2$fore.samples),  level=50)
ints50gpp3 <- get_parameter_estimates(t(fore3$fore.samples), level=50)
ints50gpp4 <- get_parameter_estimates(t(fore4$fore.samples), level=50)
ints50gpp5 <- get_parameter_estimates(t(fore5$fore.samples), level=50)
ints50gpp6 <- get_parameter_estimates(t(fore6$fore.samples), level=50)
ints50gpp7 <- get_parameter_estimates(t(fore7$fore.samples), level=50)

ints90gpp1 <- get_parameter_estimates(t(fore1$fore.samples), level=90)
ints90gpp2 <- get_parameter_estimates(t(fore2$fore.samples), level=90)
ints90gpp3 <- get_parameter_estimates(t(fore3$fore.samples), level=90)
ints90gpp4 <- get_parameter_estimates(t(fore4$fore.samples), level=90)
ints90gpp5 <- get_parameter_estimates(t(fore5$fore.samples), level=90)
ints90gpp6 <- get_parameter_estimates(t(fore6$fore.samples), level=90)
ints90gpp7 <- get_parameter_estimates(t(fore7$fore.samples), level=90)

gpp50ints <- rbind(ints50gpp1,ints50gpp2, ints50gpp3, ints50gpp4, ints50gpp5, ints50gpp6, ints50gpp7)
gpp90ints <- rbind(ints90gpp1,ints90gpp2, ints90gpp3, ints90gpp4, ints90gpp5, ints90gpp6, ints90gpp7)

head(gp50ints)
gp50len <- gp50ints$up-  gp50ints$low
gp90len <- gp90ints$up-  gp90ints$low
summary(gp50len)
summary(gp90len)

gpp50len <- gpp50ints$up-  gpp50ints$low
gpp90len <- gpp90ints$up-  gpp90ints$low

ar50len <- ar50ints$up-  ar50ints$low
ar90len <- ar90ints$up-  ar90ints$low

k <- length(ar50len)
ids <- rep(c("GP50", "AR50", "GPP50", "GP90", "AR90", "GPP90"), each=k)
model <- rep(c("GP", "AR", "GPP", "GP", "AR", "GPP"), each=k)
lendat <- data.frame(lenths = c(gp50len, ar50len, gpp50len, gp90len, ar90len, gpp90len),
    ids = factor(ids), model=factor(model))
head(lendat)

head(lendat)
dim(lendat)

length(yobs)


ugp <- cbind(t(gpf1$fore.samples), t(gpf2$fore.samples), t(gpf3$fore.samples),
             t(gpf4$fore.samples), t(gpf5$fore.samples),
             t(gpf6$fore.samples), t(gpf7$fore.samples))
dim(ugp)

uar <-  cbind(t(arf1$fore.samples), t(arf2$fore.samples), t(arf3$fore.samples),
              t(arf4$fore.samples), t(arf5$fore.samples),
              t(arf6$fore.samples), t(arf7$fore.samples))
dim(uar)
ugpp <- cbind(t(fore1$fore.samples), t(fore2$fore.samples), t(fore3$fore.samples),
              t(fore4$fore.samples), t(fore5$fore.samples),
              t(fore6$fore.samples), t(fore7$fore.samples))
dim(ugp)
findprop <- function(x) {
  value <- x[1]
  samples <- x[-1]
  prop <- length(samples[samples<value])/length(samples)
  prop
}
x <- cbind(yobs, t(ugp))
dim(x)
y <- na.omit(x)
dim(y)
findprop((y[1, ]))
pitgp <- apply(y, 1, findprop)

x <- cbind(yobs, t(uar))
dim(x)
y <- na.omit(x)
dim(y)
pitar <- apply(y, 1, findprop)


x <- cbind(yobs, t(ugpp))
dim(x)
y <- na.omit(x)
dim(y)
pitgpp <- apply(y, 1, findprop)
m <- length(pitgpp)
model <- factor(rep(c("GP", "AR", "GPP"), each=m))
adat <- data.frame(pits=c(pitgp, pitar, pitgpp), model=model)
head(adat)
summary(adat)


twodat <- adat[adat$model !="GP", ]


K <- 100
ygood <- na.omit(yobs)
summary(ygood)
yk <- seq(from=min(ygood), to = max(ygood), length=K)
yomat <- matrix(ygood, nrow=K, ncol=length(ygood), byrow=TRUE)
dim(yomat)
u <- cbind(yk, yomat)
dim(u)
Ghat <- apply(u, 1, findprop)
length(Ghat)


fbarhat <- function(yobs, ysample) {
  # yobs m by 1
  # ysample N by m

 x <- cbind(yobs, t(ysample))
 dim(x)
 y <- na.omit(x)
 dim(y)
 x <- y[, -1]
 dim(x)
 length(yk)
 N <- ncol(x)
 N
# x is the matrix of MCMC  samples
  m <- nrow(x)
  amat <- matrix(NA, nrow=m, ncol=K)
    for (i in 1:m) {
      ynewmat <- matrix(x[i, ], nrow=N, ncol=K, byrow=TRUE)
      dim(yomat)
      u <- rbind(yk, ynewmat)
      dim(u)
      v <- apply(u, 2, findprop)
      amat[i, ] <- as.vector(v)
    }
    Fbarhat <- apply(amat, 2, mean)
 # dans <- Fbarhat - Ghat
 Fbarhat
}

dansgp <- fbarhat(yobs, ugp) - Ghat
summary(dansgp)

dansar <- fbarhat(yobs, uar) - Ghat
summary(dansar)

dansgpp <- fbarhat(yobs, ugpp) - Ghat
summary(dansgpp)

model <- factor(rep(c("GP", "AR", "GPP"), each=K))
ddat <- data.frame(dans=c(dansgp, dansar, dansgpp), yk = rep(yk, 3), model=model)
head(ddat)
summary(ddat)


ggsave(filename=paste0(figurepath, "fourstates_mcp_diagram.png"))
rm(list=ls(pattern("M")))
rm(list=ls(pattern("arf")))
rm(list=ls(pattern("fore")))

save(table9.1, allbig,  lendat, adat, twodat, ddat, file="forecast/fourstatesresults.RData" )

} else {
load(file="forecast/fourstatesresults.RData")
table9.1<-   dget(file=paste0(filepath, "table9.1.txt"))
}

table9.1
##        [,1]      [,2]        [,3]      [,4]      [,5]     [,6]
## a1 83.00469 90.610329 90.51643192 95.774648 96.338028 96.52582
## a2 14.08451  5.258216  0.09389671  1.784038  1.596244  0.00000

sharpdiag <- ggplot(data=lendat,  aes(x=ids, y=lenths, col=model)) +
  geom_boxplot() +
  labs(x = "", y = "")  +
  scale_colour_manual(values = c("GP" = "green",
                                 "AR" = "red",
                                  "GPP" = "blue"))
sharpdiag



ggsave(filename=paste0(figurepath, "fourstates_sharpness.png"))
## Saving 7 x 5 in image


pitdiag <- ggplot(data=adat,  aes(pits, colour=model, fill=model)) +
  geom_histogram() +
  facet_wrap(~ model,  ncol = 3) +
  labs(x = "", y = "")  +
  scale_colour_manual(values = c("GP" = "green",
                                 "AR" = "red",
                                 "GPP" = "blue"))
pitdiag
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


ggsave(paste0(figurepath, "fourstates_pitdiagram.png"))
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pitdiag <- ggplot(data=twodat,  aes(pits, colour=model, fill=model)) +
  geom_histogram() +
  facet_wrap(~ model,  ncol = 2) +
  labs(x = "", y = "")  +
  scale_colour_manual(values = c("GP" = "green",
                                 "AR" = "red",
                                 "GPP" = "blue"))
pitdiag
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(paste0(figurepath, "fourstates_pitdiagram_2.png"))
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcpdiag <- ggplot(data=ddat,  aes(x=yk, y=dans, colour=model, linetype=model)) +
  geom_line() +
  labs(x = "", y = "")  +
  geom_abline(slope=0, intercept = 0, size=1, col="black")+
  scale_colour_manual(values = c("GP" = "green",
                                 "AR" = "red",
                                 "GPP" = "blue"))
mcpdiag

4 Figures and tables in Section 9.6


if (longrun) {
load(file=paste0(dpath, "C9dailydata.Rdata"))
 # save(cmaqdailypred1451, f2,  eaststates, dailyobs14day, delete.fnc.coords, knots.coords.151,
  #  knots.coords.158, knots.coords.105, fnc.delete.map.XYZ, file=paste0(dpath, "dailydata.Rdata"))
 f2 <- obsO8hrmax~ sqrt(cmaqO8hrmax)
head(dailyobs14day)
head(cmaqdailypred1451)
dim(dailyobs14day)
# 9716 by 8
9716/14
# 694 sites times 14 days

M2 <- Bsptime(formula=f2, data=dailyobs14day, package="spTimer", model="GPP",
              coordtype = "lonlat", coords=2:3,
              prior.phi="Gamm", scale.transform = "SQRT",
              knots.coords = knots.coords.158, n.report=2, N=5000)
plot(M2$fit)
summary(M2)

table9.2 <- M2$params
dput(table9.2, file=paste0(filepath, "table9.2.txt"))
round(table9.2, 4)
# library(xtable)
# xtable(table9.2, digits=4)

# Forecast validation for moving window of 7 days
# Prepare the data sets
head(dailyobs14day)

d1 <- dailyobs14day[dailyobs14day$day<8, ]
table(d1$day)
vd1 <- dailyobs14day[dailyobs14day$day==8, ]
dim(vd1)
head(vd1)


d2 <- dailyobs14day[(dailyobs14day$day>1) & (dailyobs14day$day<9), ]
table(d2$day)
vd2 <- dailyobs14day[dailyobs14day$day==9, ]
dim(vd2)


d3 <- dailyobs14day[(dailyobs14day$day>2) & (dailyobs14day$day<10), ]
table(d3$day)
vd3 <- dailyobs14day[dailyobs14day$day==10, ]
dim(vd3)


d4 <- dailyobs14day[(dailyobs14day$day>3) & (dailyobs14day$day<11), ]
table(d4$day)
vd4 <- dailyobs14day[dailyobs14day$day==11, ]
dim(vd4)


d5 <- dailyobs14day[(dailyobs14day$day>4) & (dailyobs14day$day<12), ]
table(d5$day)
vd5 <- dailyobs14day[dailyobs14day$day==12, ]
dim(vd5)

d6 <- dailyobs14day[(dailyobs14day$day>5) & (dailyobs14day$day<13), ]
table(d6$day)
vd6 <- dailyobs14day[dailyobs14day$day==13, ]
dim(vd6)


d7 <- dailyobs14day[(dailyobs14day$day>6) & (dailyobs14day$day<14),  ]
table(d7$day)
vd7 <- dailyobs14day[dailyobs14day$day==14, ]
dim(vd7)

# Fit the models

M1gpp <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=d1,
                 coordtype="lonlat", coords=2:3, knots.coords= knots.coords.158,
                 scale.transform = "SQRT", n.report=1, N=5000)

M2gpp <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=d2,
                 coordtype="lonlat", coords=2:3, knots.coords= knots.coords.158,
                 scale.transform = "SQRT", n.report=1, N=5000)

M3gpp <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=d3,
                 coordtype="lonlat", coords=2:3, knots.coords= knots.coords.158,
                 scale.transform = "SQRT", n.report=1, N=5000)

M4gpp <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=d4,
                 coordtype="lonlat", coords=2:3, knots.coords= knots.coords.158,
                  scale.transform = "SQRT", n.report=1, N=5000)

M5gpp <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=d5,
                 coordtype="lonlat", coords=2:3, knots.coords= knots.coords.158,
                 scale.transform = "SQRT", n.report=1, N=5000)

M6gpp <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=d6,
                 coordtype="lonlat", coords=2:3, knots.coords= knots.coords.158,
                 scale.transform = "SQRT", n.report=1, N=5000)

M7gpp <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=d7,
                 coordtype="lonlat", coords=2:3, knots.coords= knots.coords.158,
                 scale.transform = "SQRT", n.report=1, N=5000)


fore1 <- predict(M1gpp$fit,newdata=vd1, newcoords=~long+lat,
                   type="temporal", foreStep=1, tol=0.005)
fore2 <- predict(M2gpp$fit,newdata=vd2, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore3 <- predict(M3gpp$fit,newdata=vd3, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore4 <- predict(M4gpp$fit,newdata=vd4, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore5 <- predict(M5gpp$fit,newdata=vd5, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore6 <- predict(M6gpp$fit,newdata=vd6, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)
fore7 <- predict(M7gpp$fit,newdata=vd7, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)

u1 <- calculate_validation_statistics(vd1$obsO8hrmax, yits=fore1$fore.samples, summarystat = "median")
u2 <- calculate_validation_statistics(vd2$obsO8hrmax, yits=fore2$fore.samples, summarystat = "median")
u3 <- calculate_validation_statistics(vd3$obsO8hrmax, yits=fore3$fore.samples, summarystat = "median")
u4 <- calculate_validation_statistics(vd4$obsO8hrmax, yits=fore4$fore.samples, summarystat = "median")
u5 <- calculate_validation_statistics(vd5$obsO8hrmax, yits=fore5$fore.samples, summarystat = "median")
u6 <- calculate_validation_statistics(vd6$obsO8hrmax, yits=fore6$fore.samples, summarystat = "median")
u7 <- calculate_validation_statistics(vd7$obsO8hrmax, yits=fore7$fore.samples, summarystat = "median")

a <- rbind(unlist(u1), unlist(u2),  unlist(u3), unlist(u4), unlist(u5), unlist(u6), unlist(u7))
a
v1 <- spT.validation(vd1$obsO8hrmax, vd1$cmaqO8hrmax)
v2 <- spT.validation(vd2$obsO8hrmax, vd2$cmaqO8hrmax)
v3 <- spT.validation(vd3$obsO8hrmax, vd3$cmaqO8hrmax)
v4 <- spT.validation(vd4$obsO8hrmax, vd4$cmaqO8hrmax)
v5 <- spT.validation(vd5$obsO8hrmax, vd5$cmaqO8hrmax)
v6 <- spT.validation(vd6$obsO8hrmax, vd6$cmaqO8hrmax)
v7 <- spT.validation(vd7$obsO8hrmax, vd7$cmaqO8hrmax)

b <- c(v1[2], v2[2], v3[2], v4[2], v5[2], v6[2], v7[2])
b
table9.3 <- data.frame(Startday=1:7, Forecastday=8:14, cmaq=b, a)
dput(table9.3, file=paste0(filepath, "table9.3.txt"))
table9.3
library(xtable)
xtable(table9.3[, -1], digits=2)
v1
v2


# Prediction plot not included
fsums <- get_validation_summaries(t(fore7$fore.samples))
head(fsums)
dim(fsums)
pplots <- obs_v_pred_plot(yobs=vd7$obsO8hrmax, predsums = fsums, segments =F)

###

## Prediction Map
summary(d7)
## Fit model for days 7 to 13
M7 <- Bsptime(package="spTimer", model="GPP",  formula=f2, data=d7,
              coordtype="lonlat", coords=2:3, knots.coords= knots.coords.158,
              scale.transform = "SQRT", n.report=1, N=5000)

summary(M7)
names(M7$fit)
M7$fit$PMCC
# Forecast at the 694 data sites
vd7 <- dailyobs14day[dailyobs14day$day==14, ]
head(vd7)
fore7 <- predict(M7$fit, newdata=vd7, newcoords=~long+lat,
                 type="temporal", foreStep=1, tol=0.005)

head(vd7)

a7res <- data.frame(vd7[, -7], forecasts=fore7$Mean[1, ],
                    foremed=fore7$Median[1, ], foresd=fore7$SD[1, ])

head(a7res)
##  Forecast at un-observed grid locations on day 14
dim(cmaqdailypred1451)
head(cmaqdailypred1451)

forecastdata <- cmaqdailypred1451[cmaqdailypred1451$day==14, ]
dim(forecastdata)
head(forecastdata)

forepred <- predict(M7$fit, newdata=forecastdata, newcoords=~long+lat,
                    type="temporal", foreStep=1, tol=0.005)

names(forepred)
length(forepred$Mean[1, ])
summary(forepred$Mean[1, ])
summary(forepred$Median[1, ])
summary(forepred$SD[1, ])

foresum <- data.frame(forecastdata, forecasts=forepred$Mean[1, ],
                      foremed=forepred$Median[1, ], foresd=forepred$SD[1, ])

head(foresum)
allforecasts <- rbind(a7res, foresum)
head(allforecasts)
dim(allforecasts) # 2145 by 10 (694+1451)
save(table9.3, allforecasts, file="forecast/useasternresults.RData")

} else  {
 load(file="forecast/useasternresults.RData")
  table9.2 <- dget(file=paste0(filepath, "table9.2.txt"))
}


load(file=paste0(dpath, "eaststates.rda"))
round(table9.3, 2)
##   Startday Forecastday  cmaq stats.rmse stats.mae stats.crps stats.cvg
## 1        1           8 17.81      12.91     10.24       7.39     97.68
## 2        2           9 16.52      10.78      8.29       7.59     98.12
## 3        3          10 15.82       9.24      7.05       7.37     98.09
## 4        4          11 13.75       9.13      7.33       7.33     98.97
## 5        5          12 13.91       9.59      7.28       7.64     98.24
## 6        6          13 16.97      10.61      8.00       7.02     95.47
## 7        7          14 18.38      11.70      8.56       6.91     96.76

coord <- allforecasts[, c("long","lat")]
library(akima)
library(tidyr)
xo <- seq(from=min(coord$long)-0.5, to = max(coord$long)+0.8, length=200)
yo <- seq(from=min(coord$lat)-0.25, to = max(coord$lat)+0.8, length=200)
# use the median
surf <- interp(coord$long, coord$lat, allforecasts$foremed,  xo=xo, yo=yo)

names(surf)
## [1] "x" "y" "z"
surf <- list(x=surf$x, y=surf$y, z=surf$z)
v <- fnc.delete.map.XYZ(xyz=surf)

interp1 <- data.frame(long = v$x, v$z )
names(interp1)[1:length(v$y)+1] <- v$y
interp1 <- gather(interp1,key = lat,value =Forecast,-long,convert = TRUE)


useastmap <- map_data(database="state",regions=eaststates)
head(useastmap)
##        long      lat group order  region subregion
## 1 -87.46201 30.38968     1     1 alabama      <NA>
## 2 -87.48493 30.37249     1     2 alabama      <NA>
## 3 -87.52503 30.37249     1     3 alabama      <NA>
## 4 -87.53076 30.33239     1     4 alabama      <NA>
## 5 -87.57087 30.32665     1     5 alabama      <NA>
## 6 -87.58806 30.32665     1     6 alabama      <NA>
com <- rev(c("firebrick4","firebrick2","white","dodgerblue2","dodgerblue4"))#colour palette
zr <- range(interp1$Forecast, na.rm=T)
P <- ggplot() +
  geom_raster(data=interp1, aes(x = long, y = lat,fill = Forecast)) +
  geom_polygon(data=useastmap, aes(x=long, y=lat, group=group), color="black", size = 0.6, fill=NA) +
  scale_fill_gradientn(colours=com, na.value="gray95", limits=zr) +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  ggsn::scalebar(data =interp1, dist = 500, location = "bottomleft", transform=T, dist_unit = "km",
                 st.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84") +
  ggsn::north(data=interp1, location="bottomright", symbol=12) +
  labs(title= "Forecast map ozone levels for July 14, 2010", x="", y = "", size=2.5)

P

ggsave(filename=paste0(figurepath, "forecastmapJuly14.png"))
## Saving 7 x 5 in image

## Repeat for SD

head(allforecasts)
##    s.index     long     lat year month day cmaqO8hrmax forecasts  foremed
## 14       1 -94.6933 43.1233 2010     7  14    52.49989  44.85407 44.12131
## 28       2 -94.5886 38.7589 2010     7  14    63.97456  48.48949 47.45123
## 42       3 -94.5680 39.3322 2010     7  14    75.76489  51.88261 50.93674
## 56       5 -94.4175 37.2297 2010     7  14    51.08922  43.62887 42.54945
## 70       6 -94.3764 39.3030 2010     7  14    72.91255  50.81945 49.92338
## 84       7 -94.2833 39.4167 2010     7  14    64.30513  48.87018 47.75485
##      foresd
## 14 11.40912
## 28 13.42955
## 42 14.03164
## 56 12.83883
## 70 11.79357
## 84 13.68402
surf <- interp(coord$long, coord$lat, allforecasts$foresd,  xo=xo, yo=yo)
names(surf)
## [1] "x" "y" "z"
surf <- list(x=surf$x, y=surf$y, z=surf$z)
v <- fnc.delete.map.XYZ(xyz=surf)

interp1 <- data.frame(long = v$x, v$z )
names(interp1)[1:length(v$y)+1] <- v$y
interp1 <- gather(interp1,key = lat,value =sd,-long,convert = TRUE)

useastmap <- map_data(database="state",regions=eaststates)
head(useastmap)
##        long      lat group order  region subregion
## 1 -87.46201 30.38968     1     1 alabama      <NA>
## 2 -87.48493 30.37249     1     2 alabama      <NA>
## 3 -87.52503 30.37249     1     3 alabama      <NA>
## 4 -87.53076 30.33239     1     4 alabama      <NA>
## 5 -87.57087 30.32665     1     5 alabama      <NA>
## 6 -87.58806 30.32665     1     6 alabama      <NA>
com <- rev(c("firebrick4","firebrick2","white","dodgerblue2","dodgerblue4"))#colour palette
zr <- range(interp1$sd, na.rm=T)
Psd <- ggplot() +
  geom_raster(data=interp1, aes(x = long, y = lat,fill = sd)) +
  geom_polygon(data=useastmap, aes(x=long, y=lat, group=group), color="black", size = 0.6, fill=NA) +
  scale_fill_gradientn(colours=com, na.value="gray95", limits=zr) +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  ggsn::scalebar(data =interp1, dist = 500, location = "bottomleft", transform=T, dist_unit = "km",
                 st.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84") +
  ggsn::north(data=interp1, location="bottomright", symbol=12) +
  labs(title= "Sd of forecasted ozone levels for July 14, 2010", x="", y = "", size=2.5)

Psd

ggsave(filename=paste0(figurepath, "forecastmapJuly14_sd.png"))
## Saving 7 x 5 in image

# All done 
end.time <- proc.time()[3]
comp.time<- (end.time-start.time)/60
# comp.time<-fancy.time(comp.time)
print(comp.time)
##   elapsed 
## 0.2175833