Chapter 1: Examples of spatio-temporal data

Sujit K. Sahu

University of Southampton

2022-03-30

Abstract

This file contains all the code for reproducing the figures in Chapter 1.

## Note the start time
start.time<-proc.time()[3]
# Download mapfiles and data files from https://github.com/sujit-sahu/bookbmstdr
# figurepath <- "Your path to save figures"   # Folder to save  the figures 
# dpath <- "your data path/" # Folder containing the data files 
mappath <- "mapfiles/" # Folder containing the required map files 
figurepath <- "figures/chap1figures/"
dpath <- "datafiles/"
load(file=paste0(dpath, "C8ewpollution.RData"))
load(file=paste0(dpath, "C11us48cancerdata.RData"))
load(file=paste0(dpath, "C8euso3.RData"))
load(file=paste0(dpath, "C11England_LA_map_and_Respiratory.RData"))
load(file=paste0(dpath, "C11childpovertylondon.RData"))
load(file=paste0(mappath, "englamap.rda"))
load(file=paste0(mappath, "englainfo.rda"))
load(file=paste0(mappath, "aurnsites.rda"))
load(file=paste0(mappath, "engregmap.rda"))
load(file=paste0(mappath, "colpalette.rda"))
kdat <- read.csv(file=paste0(dpath, "C11Kenya_vaccine.csv"))
# Load all the libraries 
library(bmstdr)
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated
library(ggplot2)
require(ggsn)
library(tidyr)
library(RColorBrewer)
library(akima)
library(rgdal)
library(broom)
library(doBy)
library(directlabels)
library(usmap)

1 Code to reproduce Figure1.1

nymap <- map_data(database="state",regions="new york")
head(nymap)
##        long      lat group order   region subregion
## 1 -73.92874 40.80605     1     1 new york manhattan
## 2 -73.93448 40.78886     1     2 new york manhattan
## 3 -73.95166 40.77741     1     3 new york manhattan
## 4 -73.96312 40.75449     1     4 new york manhattan
## 5 -73.96885 40.73730     1     5 new york manhattan
## 6 -73.97458 40.72584     1     6 new york manhattan
p <- ggplot() +
  geom_polygon(data=nymap, aes(x=long, y=lat, group=group), 
               color="black", size = 0.6, fill=NA) +
  geom_point(data =nyspatial, aes(x=Longitude,y=Latitude)) +
  labs( title= "28 air pollution monitoring sites in New York", x="Longitude", y = "Latitude") +
  scalebar(data =nymap, dist = 100, location = "bottomleft", transform=T, dist_unit = "km",
                 st.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84") +
  north(data=nymap, location="topleft", symbol=12)
p

ggsave(filename=paste0(figurepath, "nysites.png"))
## Saving 7 x 5 in image

2 Code to reproduce Figure1.2

wmap <- readOGR(dsn=path.expand(mappath),  layer="infuse_ctry_2011", verbose = FALSE)
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS =
## dumpSRS, : Discarded datum OSGB_1936 in Proj4 definition: +proj=tmerc +lat_0=49
## +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs
ewmap <- wmap[-grep(pattern="N92000002", wmap$label),  ]
ewmap <- ewmap[-grep(pattern="S92000003", ewmap$label),  ]
engw <- tidy(ewmap)
head(engw)
# load(file=paste0(dpath, "C8ewpollution.RData"))
head(p2011data)
##      index       lon      lat easting northing  type validation year month day
## 1462     4 -3.034167 52.50361  329899   290027 Rural          1 2011     1   1
## 1463     4 -3.034167 52.50361  329899   290027 Rural          1 2011     1   2
## 1464     4 -3.034167 52.50361  329899   290027 Rural          1 2011     1   3
## 1465     4 -3.034167 52.50361  329899   290027 Rural          1 2011     1   4
## 1466     4 -3.034167 52.50361  329899   290027 Rural          1 2011     1   5
## 1467     4 -3.034167 52.50361  329899   290027 Rural          1 2011     1   6
##       aqum_no2 aqum_ozone aqum_pm10 aqum_pm2p5 obs_no2 obs_ozone obs_pm10
## 1462 25.328662   72.91099  4.701968   4.699310     9.9     57.75       NA
## 1463 17.231038   42.87047  4.801431   4.794895     7.4     70.25       NA
## 1464 20.505962   32.46326  5.794126   5.788368     2.5     71.25       NA
## 1465  5.899658   58.21732  3.547942   3.544742     6.9     61.75       NA
## 1466  7.551908   60.83800  1.651302   1.647939     2.5     70.25       NA
## 1467  4.496976   60.15438  1.067562   1.066221     7.4     66.75       NA
##      obs_pm2p5
## 1462        NA
## 1463        NA
## 1464        NA
## 1465        NA
## 1466        NA
## 1467        NA
grid <- unique(p2011data[, 1:6])
dim(grid)
## [1] 144   6
# head(grid)
# table(grid$type)
pew <-  ggplot(data=engw, aes(x=long, y=lat, group = group)) +
  geom_polygon(colour='black',size=0.8, fill=NA) +
  geom_point(data=grid, aes(x=easting, y = northing, shape=type, col=type),  inherit.aes = FALSE, size=1.5) +
  labs( title= "144 air pollution monitoring sites in England and Wales", x="", y = "") +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
  theme(legend.title = element_blank()) +
  theme(legend.position =c(0.1, 0.6))  +
  ggsn::scalebar(data =engw, dist =100, location = "topleft", transform=F, dist_unit = "km", st.dist = .05, st.size = 5, height = .06, st.bottom=T) +
  ggsn::north(data=engw, location="topright", symbol=12)
pew

ggsave(filename=paste0(figurepath, "engwsites.png"))
## Saving 7 x 5 in image

3 Code to reproduce Figure1.3

# load(file=paste0(dpath, "C8euso3.RData"))
head(euso3)
##   s.index Longitude Latitude Year Month Day o8hrmax     cMAX torder   sqcmax
## 1       1   -85.802   33.281 1997     5   1   59.75 25.59053      1 5.058708
## 2       1   -85.802   33.281 1997     5   2   52.75 27.03593      2 5.199608
## 3       1   -85.802   33.281 1997     5   3   51.75 24.16542      3 4.915833
## 4       1   -85.802   33.281 1997     5   4   59.38 20.42035      4 4.518888
## 5       1   -85.802   33.281 1997     5   5   66.17 23.00181      5 4.796020
## 6       1   -85.802   33.281 1997     5   6   75.50 25.79922      6 5.079294
grid <- unique(euso3[, 1:3])
head(grid)
##      s.index Longitude Latitude
## 1          1   -85.802   33.281
## 1531       2   -86.137   32.498
## 3061       3   -86.915   33.486
## 4591       4   -87.004   33.331
## 6121       5   -86.817   33.386
## 7651       6   -86.669   33.705
useastmap <- map_data(database="state",regions=eaststates)
head(useastmap)
##        long      lat group order  region subregion
## 1 -87.46201 30.38968     1     1 alabama      <NA>
## 2 -87.48493 30.37249     1     2 alabama      <NA>
## 3 -87.52503 30.37249     1     3 alabama      <NA>
## 4 -87.53076 30.33239     1     4 alabama      <NA>
## 5 -87.57087 30.32665     1     5 alabama      <NA>
## 6 -87.58806 30.32665     1     6 alabama      <NA>
p <-  ggplot(data=useastmap, aes(x=long, y=lat, group = group)) +
  geom_polygon(colour='black',size=0.8, fill=NA) +
  geom_point(data=grid, aes(x=Longitude, y = Latitude),  inherit.aes = FALSE, col="Red",  size=0.6) +
  labs( title= "691 sites in the Eastern US", x="", y = "") +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ggsn::north(data=useastmap, location="bottomright", symbol=12) +
  ggsn::scalebar(data =useastmap, dist =500, location ="bottomleft", transform=T, dist_unit = "km",
               st.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84")
p

ggsave(file=paste0(figurepath, "uso3sites.png"), width=12, height =10, units="cm")

4 Code to reproduce Figure 1.4

## Function to find 4th highest value 
oz4th<-function(x){
  y<-sort(x,na.last=F)
  an4th<-y[length(y)-3]
}
##
##  Function to calculate 3 year rolling averages
three_year_rolling_av <- function(x) {
  n <- length(x)
  y <- rep(NA, n-2)
  for (k in 3:n) {
    y[k-2] <- mean(x[(k-2):k])
  }
  y
}

## Evaluate the 4th highest
ob4th <- aggregate.data.frame(euso3$o8hrmax,
      by=list(s.index=euso3$s.index, Year=euso3$Year), oz4th)

head(ob4th)
##   s.index Year      x
## 1       1 1997 79.000
## 2       2 1997 70.130
## 3       3 1997 86.250
## 4       4 1997 79.880
## 5       5 1997 83.125
## 6       6 1997 78.500
dim(ob4th)
## [1] 6910    3


a <- summaryBy(x ~ s.index, data = ob4th, FUN = three_year_rolling_av)
dim(a)
## [1] 691   9
head(a)
##   s.index   x.FUN1   x.FUN2   x.FUN3   x.FUN4   x.FUN5   x.FUN6   x.FUN7
## 1       1 88.00000 87.41667 83.83333 81.16667 79.27333 75.40000 71.61000
## 2       2 79.88000 84.58667 79.71000 80.70667 76.08000 74.66333 71.00000
## 3       3 92.66667 92.87667 85.00333 83.04667 79.33667 76.83667 75.79333
## 4       4 88.92000 93.87667 89.79333 86.91667 79.54333 76.00000 77.29333
## 5       5 95.06333 99.29333 94.83500 90.54500 84.39833 82.06500 80.83500
## 6       6 86.04000 89.62333 85.66667 82.92000 77.75333 73.75333 71.62667
##     x.FUN8
## 1 72.46333
## 2 71.83667
## 3 78.79333
## 4 81.20667
## 5 84.35667
## 6 73.29333
b <- as.vector(t(a[,-1]))
rolling <- data.frame(s.index=rep(a$s.index, each=8), Year=rep(1999:2006, 691),  ave3=b)
head(rolling)
##   s.index Year     ave3
## 1       1 1999 88.00000
## 2       1 2000 87.41667
## 3       1 2001 83.83333
## 4       1 2002 81.16667
## 5       1 2003 79.27333
## 6       1 2004 75.40000

head(ob4th)
##   s.index Year      x
## 1       1 1997 79.000
## 2       2 1997 70.130
## 3       3 1997 86.250
## 4       4 1997 79.880
## 5       5 1997 83.125
## 6       6 1997 78.500
u <- merge(ob4th, rolling, all.x=T)
udat <- u[order(u$s.index, u$Year), ]
head(udat)
##   s.index Year     x     ave3
## 1       1 1997 79.00       NA
## 2       1 1998 94.00       NA
## 3       1 1999 91.00 88.00000
## 4       1 2000 77.25 87.41667
## 5       1 2001 83.25 83.83333
## 6       1 2002 83.00 81.16667

udat$s.index <- as.factor(udat$s.index)
p1 <- ggplot(data=udat, aes(x=Year, y=x, color=s.index)) +
  geom_line() +
  geom_abline(slope=0, intercept=85, col="black", size=1)+
  labs(x="Year", y="Annual 4th highest ozone") +
  scale_colour_discrete(guide="none")
p1
## Warning: Removed 564 row(s) containing missing values (geom_path).


ggsave(filename = paste0(figurepath, "an4thmax_97_06_base85.png"))
## Saving 7 x 5 in image
## Warning: Removed 564 row(s) containing missing values (geom_path).


udat <- na.omit(udat)
p2 <- ggplot(data=udat, aes(x=Year, y=ave3, color=s.index)) +
  geom_line() +
  geom_abline(slope=0, intercept=85, col="black", size=1)+
  labs(x="Year", y="Three year rolling average") +
  scale_colour_discrete(guide="none")
p2

ggsave(filename = paste0(figurepath, "3yave_99_06_base85.png"))
## Saving 7 x 5 in image

5 Code to reproduce Figure 1.6


long <- readOGR(dsn=path.expand(mappath), layer="Longhurst_world_v4_2010", verbose = FALSE)
# head(long@data)
# dim(long@data)

long_df <- tidy(long)
# head(long_df)
long_df$area <- as.numeric(long_df$id)
long_df$id <- as.numeric(long_df$id)
summary(long_df$id)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   15.00   20.97   43.00   53.00

## Just the outline map of the Longhurst regions


provsorder <- c(NA,NA,NA,13,11,15,4,10,1,8,NA,NA,NA,NA,NA,NA,NA,14,NA,NA,NA,2,3,NA,NA,NA,NA,NA,NA,NA,18,19,12,16,17,23,20,6,5,7,9,NA,NA,NA,NA,NA,NA,NA,NA,NA,21,22,NA,NA)#slightly different due to null region
long_df$truid <- NA

for(i in 1:54){
  long_df$truid[which(long_df$id==i)] <- provsorder[i]
}
centroids_df <- as.data.frame(coordinates(long))
head(centroids_df)
##          V1       V2
## 0 -12.32365 79.01508
## 1 -19.88359 66.41352
## 2  11.21437 67.31430
## 3 -25.60820 49.66515
## 4 -57.87931 40.08621
## 5 -52.85529 32.19891
centroids_df[5,] <- c(-55.3,40.7) #small number of regions centrepoints not optimal
centroids_df[33,] <- c(149.3,39) #small number of regions centrepoints not optimal
centroids_df[36,] <- c(160.6,-36.3)
centroids_df[51,] <- c(75,-36.2)

long_df$lonc <- NA
long_df$latc <- NA
head(long_df)
## # A tibble: 6 × 11
##    long   lat order hole  piece group    id  area truid lonc  latc 
##   <dbl> <dbl> <int> <lgl> <fct> <fct> <dbl> <dbl> <dbl> <lgl> <lgl>
## 1  180   89.1     1 FALSE 1     0.1       0     0    NA NA    NA   
## 2  180   71.5     2 FALSE 1     0.1       0     0    NA NA    NA   
## 3  180.  71.5     3 FALSE 1     0.1       0     0    NA NA    NA   
## 4  180.  71.5     4 FALSE 1     0.1       0     0    NA NA    NA   
## 5  180.  71.5     5 FALSE 1     0.1       0     0    NA NA    NA   
## 6  180.  71.5     6 FALSE 1     0.1       0     0    NA NA    NA
for(i in c(4,5,6,7,8,9,10,18,22,23,31,32,33,34,35,36,37,38,39,40,41,51,52)){
  long_df$lonc[which(long_df$area==i)] <- centroids_df$V1[i]
  long_df$latc[which(long_df$area==i)] <- centroids_df$V2[i]
}
p <-  ggplot(data=long_df, aes(x=long, y=lat, group = group,fill=4)) +
   geom_polygon(colour='black',size=0.25)+ #longhurst outlines with half width size
   geom_text(data=na.omit(long_df),aes(label = truid, x = lonc, y = latc,family="Times", col="red"),size=5) +
   ylab("Latitude")+xlab("Longitude")+
  coord_cartesian(ylim = c(-75, 75),expand=F)+
  scale_y_continuous(breaks=seq(-60,60,30))  +
  scale_x_continuous(breaks=seq(-180,180,60))  +
  #annotate("text",x=-133,y=-38.1,label="21",size=4,colour='black',family="Times")+ #extra label for region 21 to show that it wraps
  theme_bw()+theme(text=element_text(family="Times"))+
  theme(legend.title = element_blank()) +
  theme(legend.position="none")
plot(p)


ggsave(paste0(figurepath, "blank_longhurst.png"),  width=8.27, height=3.44, dpi=300)

6 Code to reproduce Figure 1.7

## Argofloat Example
atlmap <- map_data("world", xlim=c(-70, 10), ylim=c(15, 65))
head(atlmap)
##        long      lat group order   region subregion
## 1 -63.00122 18.22178     1     1 Anguilla      <NA>
## 2 -63.16001 18.17139     1     2 Anguilla      <NA>
## 3 -63.15332 18.20029     1     3 Anguilla      <NA>
## 4 -63.02603 18.26973     1     4 Anguilla      <NA>
## 5 -62.97959 18.26480     1     5 Anguilla      <NA>
## 6 -63.00122 18.22178     1     6 Anguilla      <NA>

atlmap <- atlmap[atlmap$long < 5, ]
atlmap <- atlmap[atlmap$long > -70, ]

atlmap <- atlmap[atlmap$lat < 65, ]
atlmap <- atlmap[atlmap$lat > 10, ]

argo <- argo_floats_atlantic_2003

deep <- argo[argo$depth==3, ]
deep$month <- factor(deep$month)

p <- ggplot() +
  geom_polygon(data=atlmap, aes(x=long, y=lat, group=group),
               color="black", size = 0.6, fill=NA) +
  geom_point(data =deep, aes(x=lon, y=lat, colour=month), size=1) +
  labs( title= "Argo float locations in deep ocean in 2003", x="Longitude", y = "Latitude") +
  ggsn::scalebar(data =atlmap, dist = 1000, location = "bottomleft", transform=T, dist_unit = "km",
                 st.dist = .05, st.size =5, height = .05, st.bottom=T, model="WGS84") +
  ggsn::north(data=atlmap, location="topright", symbol=12)
p

ggsave(paste0(figurepath, "argo_float_locations_deep.png"),  width=8.27, height=5, dpi=300)

7 Code to reproduce Figure 1.8


bdf <- merge(englamap, engtotals, by.x="id", by.y="mapid", all.y=T, all.x=F)
# head(bdf)

u <- englainfo[, c(1, 3, 7:8)]
colnames(u)
## [1] "Areacode"       "Areaname"       "centreEasting"  "centreNorthing"

#head(u)
cities <- c("Birmingham", "City of London", "Manchester", "Newcastle upon Tyne")
v <- u[match(cities, u$Areaname), ]
v$aname <- c("Birmingham", "London","Manchester", "Newcastle")
head(v)
##      Areacode            Areaname centreEasting centreNorthing      aname
## 268 E08000025          Birmingham      408600.1       287857.5 Birmingham
## 281 E09000001      City of London      532479.9       181273.7     London
## 247 E08000003          Manchester      384654.7       394994.0 Manchester
## 264 E08000021 Newcastle upon Tyne      421956.1       568241.6  Newcastle
v$xend <- v$centreEasting
v$yend <- v$centreNorthing
v$x <-  v$centreEasting + c(-150000, 0, -150000, 80000)
v$y <- v$centreNorthing + c(0, -150000,  0,  50000)
v$tx <- v$centreEasting + 100000 * c(-1.5, 0, -1.5, 1)
v$ty <- v$y + 50000* c(0.5, -0.1, 0.5, 0.5)
#v
spatframe <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill="White")) +
  # scale_fill_gradientn(colours=com, na.value="black",limits=range(bdf$no2))  +
  geom_path(colour='red',size=0.5) +
  geom_polygon(data=engregmap, aes(x=long, y=lat, group = group),  fill=NA, colour='black',size=0.6)  +
  coord_equal()  +
  theme_bw()+theme(text=element_text(family="Times")) +
  labs(title= "A map of local authorities and regions in England",  x="", y = "", size=2.5) +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())   +
  ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
                 st.dist = .05, st.size =4, height = .06, st.bottom=T) +
    annotate("text", x = v$tx, y = v$ty, label = v$aname) +
  annotate("segment", x=v$x, xend=v$xend, y=v$y, yend=v$yend, arrow=arrow(), color = "purple", size=1) +
  geom_point(data=aurnsites, aes(x=Easting, y=Northing), inherit.aes = FALSE, size = 1, colour = "blue")
spatframe


ggsave(filename=paste0(figurepath, "empty_frame_LA_regions.png"))
## Saving 7 x 5 in image

8 Code to reproduce Figure 1.9



plimits <- range(bdf$covid)
plimits
## [1]    4 1223
praw <-  ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=covid)) +
  scale_fill_gradientn(colours=colpalette, na.value="black",limits=plimits)  +
  geom_polygon(colour='black',size=0.25) +
  geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6)  +
  coord_equal() + guides(fill=guide_colorbar(title="No of deaths")) +
  theme_bw()+theme(text=element_text(family="Times")) +
  labs(title= "Observed number of Covid deaths during Mar 13 to Jul 31, 2020",  x="", y = "", size=2.5) +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())   +
  theme(legend.position =c(0.2, 0.5)) +
  ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
                 st.dist = .05, st.size =4, height = .06, st.bottom=T)
plot(praw)


ggsave(filename=paste0(figurepath, "raw_covid_death_numbers.png"))
## Saving 7 x 5 in image


bdf$covidrate <- bdf$covid/bdf$popn*100000
plimits <- range(bdf$covidrate)
prate <-  ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=covidrate)) +
  scale_fill_gradientn(colours=colpalette, na.value="black",limits=plimits)  +
  geom_polygon(colour='black',size=0.25) +
  geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6)  +
  coord_equal() + guides(fill=guide_colorbar(title="Death rate")) +
  theme_bw()+theme(text=element_text(family="Times")) +
  labs(title= "Covid death rate per 100,000 during Mar 13 to Jul 31, 2020",  x="", y = "", size=2.5) +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())   +
  theme(legend.position =c(0.2, 0.5)) +
  ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
                 st.dist = .05, st.size =4, height = .06, st.bottom=T)
prate


ggsave(filename=paste0(figurepath, "covid_death_rate.png"))
## Saving 7 x 5 in image

9 Code to reproduce Figure 1.10


head(engdeaths)
##    Areacode mapid spaceid    Region  popn jsa houseprice popdensity  startdate
## 1 E06000001     0       1 NorthEast 93663 0.6     127000        999 2020-03-13
## 2 E06000001     0       1 NorthEast 93663 0.6     127000        999 2020-03-20
## 3 E06000001     0       1 NorthEast 93663 0.6     127000        999 2020-03-27
## 4 E06000001     0       1 NorthEast 93663 0.6     127000        999 2020-04-03
## 5 E06000001     0       1 NorthEast 93663 0.6     127000        999 2020-04-10
## 6 E06000001     0       1 NorthEast 93663 0.6     127000        999 2020-04-17
##   Weeknumber       no2 covid allcause noofcases lag1noofcases lag2noofcases
## 1         11  8.285714     0       18         2             0             0
## 2         12 12.000000     1       24         4             2             0
## 3         13  6.142857     1       18        19             4             2
## 4         14  7.142857     5       31        50            19             4
## 5         15  8.571429    14       33        45            50            19
## 6         16  2.428571    10       35        99            45            50
##   lag3noofcases lag4noofcases         n0         n1         n2         n3
## 1             0             0 -2.3634075 -7.6009025 -7.6009025 -7.6009025
## 2             0             0 -1.6702603 -2.3634075 -7.6009025 -7.6009025
## 3             0             0 -0.1121157 -1.6702603 -2.3634075 -7.6009025
## 4             2             0  0.8554683 -0.1121157 -1.6702603 -2.3634075
## 5             4             2  0.7501078  0.8554683 -0.1121157 -1.6702603
## 6            19             4  1.5385652  0.7501078  0.8554683 -0.1121157
##          n4  Edeaths  Ecases logEdeaths logEcases highdeathsmr
## 1 -7.600902 3.817255 21.2542   1.339532  3.056555            0
## 2 -7.600902 5.089673 21.2542   1.627214  3.056555            0
## 3 -7.600902 3.817255 21.2542   1.339532  3.056555            0
## 4 -7.600902 6.574161 21.2542   1.883147  3.056555            0
## 5 -2.363408 6.998301 21.2542   1.945667  3.056555            1
## 6 -1.670260 7.422440 21.2542   2.004508  3.056555            1
engdeaths$covidrate <- 100000*engdeaths$covid/engdeaths$popn
summary(engdeaths$covidrate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.2832  1.9303  4.2369  6.3483 40.0309
ptime <- ggplot(data=engdeaths,  aes(x=factor(Weeknumber), y=covidrate)) +
  geom_boxplot() +
  labs(x = "Week", y = "Death rate per 100,000")  +
  stat_summary(fun=median, geom="line", aes(group=1, col="red")) +
  theme(legend.position = "none")
ptime

ggsave(filename=paste0(figurepath, "deathrate_weekly_boxplots.png"))
## Saving 7 x 5 in image

10 Code to reproduce Figure 1.11


Kmap <- readOGR(dsn=path.expand(mappath), layer="sdr_subnational_boundaries2", verbose = FALSE)
names(Kmap)
##  [1] "ISO"        "FIPS"       "DHSCC"      "SVYTYPE"    "SVYYEAR"   
##  [6] "CNTRYNAMEE" "CNTRYNAMEF" "CNTRYNAMES" "DHSREGEN"   "DHSREGFR"  
## [11] "DHSREGSP"   "SVYID"      "REG_ID"     "Svy_Map"    "MULTLEVEL" 
## [16] "LEVELRNK"   "REGVAR"     "REGCODE"    "REGNAME"    "OTHREGVAR" 
## [21] "OTHREGCO"   "OTHREGNA"   "LEVELCO"    "LEVELNA"    "REPALLIND" 
## [26] "REGNOTES"   "SVYNOTES"
# head(Kmap@data)
adf <- tidy(Kmap)
#head(adf)
# table(adf$id)


# head(adf)
# head(kdat)
kdat$vacprop <- kdat$yvac/kdat$n
udf <- merge(kdat, adf)
# head(udf)

a <- range(udf$vacprop)
vmap <- ggplot(data=udf, aes(x=long, y=lat, group = group, fill=vacprop)) +
  scale_fill_gradientn(colours=colpalette, na.value="black",limits=a)  +
  # geom_path(colour='black',size=0.5)  +
  geom_polygon(colour='black',size=0.25) +
  theme_bw()+theme(text=element_text(family="Times")) +
  labs(title= "Observed vaccination map of 47 Counties in Kenya",  x="", y = "", size=2.5) +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())
vmap


ggsave(file=paste0(figurepath, "Observed_vaccination_rate.png"))
## Saving 7 x 5 in image

11 Code to reproduce Figures 1.12 and 1.13


# load(file=paste0(dpath, "C11us48cancerdata.RData"))

head(us48cancertot)
##          Name state fips  totcount   totpop  totrate   AIAN.HL  AIAN.NHL
## 1     Alabama    AL    1  25611.33  4736261 540.1333 0.1272515 0.5892589
## 2     Arizona    AZ    4  29450.87  6363790 462.0467 1.3033103 4.2843893
## 3    Arkansas    AR    5  15938.33  2894066 550.0467 0.1995402 0.7781714
## 4  California    CA    6 165372.00 37325590 442.7733 1.2801805 0.5671331
## 5    Colorado    CO    8  22118.60  5047750 437.6800 0.9988665 0.7593700
## 6 Connecticut    CT    9  21617.73  3555387 607.9467 0.2868551 0.2620113
##       API.HL   API.NHL    BAA.HL   BAA.NHL  White.HL White.NHL
## 1 0.08855458  1.264443 0.2878583 26.567691  3.073277  68.00167
## 2 0.37435044  3.157346 0.6966057  4.198725 27.147177  58.83810
## 3 0.08576009  1.550466 0.2291451 15.805733  5.679722  75.67146
## 4 0.81832412 14.032092 0.8891200  6.373004 34.193313  41.84683
## 5 0.25679949  3.199021 0.5706648  4.324408 18.605895  71.28497
## 6 0.18256169  4.045786 1.5652350 10.071039 11.344880  72.24163
plot_usmap(data = us48cancertot, values = "totrate", color = "red", exclude=c("AK", "HI")) +
  # scale_fill_continuous(name = "Cancer rate", label = scales::comma) +
  scale_fill_gradientn(colours=colpalette,na.value="black",limits=range(us48cancertot$totrate),
                       name = "Cancer rate") +
  theme(legend.position = "right")


ggsave(paste0(figurepath, "uscancer_rate.png"),  width=8.27, height=3.44, dpi=300)


head(us48cancer0317)
##   fips state Year totcount  totpop totrate malescount malespop malesrate
## 1    1    AL 2003    21472 4503491   476.8      11228  2179422     515.2
## 2    1    AL 2004    22951 4530729   506.6      12024  2192872     548.3
## 3    1    AL 2005    23342 4565917   511.2      12172  2211403     550.4
## 4    1    AL 2006    24113 4628981   520.9      12798  2243501     570.4
## 5    1    AL 2007    25086 4672840   536.8      13236  2265565     584.2
## 6    1    AL 2008    26093 4718206   553.0      13708  2287949     599.1
##   femalescount femalespop femalesrate    AIAN.HL  AIAN.NHL     API.HL   API.NHL
## 1        10244    2324069       440.8 0.06708129 0.5561019 0.05142677 0.9124255
## 2        10927    2337857       467.4 0.07808898 0.5614108 0.06045385 0.9714331
## 3        11170    2354514       474.4 0.08941301 0.5661292 0.06757400 1.0243107
## 4        11315    2385480       474.3 0.10133980 0.5719185 0.07634510 1.0712509
## 5        11850    2407275       492.3 0.11241558 0.5782351 0.08480924 1.1150392
## 6        12385    2430257       509.6 0.12699742 0.5833997 0.09126350 1.1586819
##      BAA.HL  BAA.NHL White.HL White.NHL unemployment
## 1 0.2035754 26.22443 1.975512  70.00944          6.0
## 2 0.2141819 26.25379 2.147623  69.71302          5.7
## 3 0.2288719 26.26904 2.345592  69.40907          4.5
## 4 0.2426236 26.34064 2.557496  69.03839          4.0
## 5 0.2551981 26.36388 2.771569  68.71885          4.0
## 6 0.2730275 26.41459 2.978441  68.37359          5.7

a1 <- sum(us48cancer0317$totcount)
a2 <- sum(us48cancer0317$totpop)
rr <- a1/a2
us48cancer0317$exptot <- rr * us48cancer0317$totpop
summary(us48cancer0317$exptot)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2658   10061   23959   33637   38976  208034

us48cancer0317$obs_smr <- us48cancer0317$totcount/us48cancer0317$exptot

colnames(us48cancer0317)
##  [1] "fips"         "state"        "Year"         "totcount"     "totpop"      
##  [6] "totrate"      "malescount"   "malespop"     "malesrate"    "femalescount"
## [11] "femalespop"   "femalesrate"  "AIAN.HL"      "AIAN.NHL"     "API.HL"      
## [16] "API.NHL"      "BAA.HL"       "BAA.NHL"      "White.HL"     "White.NHL"   
## [21] "unemployment" "exptot"       "obs_smr"
u <- us48cancer0317[, c("state", "Year", "obs_smr")]
u <- u[u$Year==2003, ]
u <- u[order(u$obs_smr), ]
# u
pstate <- data.frame(state=c("PA", "WV", "ME", "UT", "TX","TN", "FL", "CA", "ND", "NY"))

dp <- merge(pstate, us48cancer0317)
dp2 <- merge(pstate, us48cancer0317)
dim(dp)
## [1] 150  23
# head(dp)
p1 <- ggplot(data=dp, aes(x=Year, y=obs_smr, color=state, group=state)) +
  # geom_boxplot() + facet_wrap(~year, ncol=5) +
  geom_line() +
  labs(x="Year", y="Annual observed SMR") +
  scale_colour_discrete(guide="none") +
  geom_dl(data=dp, aes(x=Year, y=obs_smr, label =state), method = list(dl.combine("last.bumpup")), cex = 0.8)
p1


ggsave(filename=paste0(figurepath, "obsd_SMR_10.png"))
## Saving 7 x 5 in image

12 Code to reproduce Figure 1.14



# load(file=paste0(dpath, "C11England_LA_map_and_Respiratory.RData"))
head(eng323_la_map)
## # A tibble: 6 × 7
##      long     lat order hole  piece group       id       
##     <dbl>   <dbl> <int> <lgl> <fct> <fct>       <chr>    
## 1 548881. 191088.     1 FALSE 1     E09000002.1 E09000002
## 2 548852. 190846.     2 FALSE 1     E09000002.1 E09000002
## 3 548886. 190844.     3 FALSE 1     E09000002.1 E09000002
## 4 548881. 190797.     4 FALSE 1     E09000002.1 E09000002
## 5 549000. 190881.     5 FALSE 1     E09000002.1 E09000002
## 6 549097. 190702.     6 FALSE 1     E09000002.1 E09000002

la_mean <- summaryBy(smr ~ Areacode, data = Engrespriratory, FUN =mean)
head(la_mean)
##    Areacode  smr.mean
## 1 E06000001 0.7508640
## 2 E06000002 1.1941685
## 3 E06000003 0.9379811
## 4 E06000004 1.0992236
## 5 E06000005 0.7233094
## 6 E06000006 0.8726364
summary(la_mean)
##       Areacode      smr.mean     
##  E06000001:  1   Min.   :0.1682  
##  E06000002:  1   1st Qu.:0.4481  
##  E06000003:  1   Median :0.5635  
##  E06000004:  1   Mean   :0.6729  
##  E06000005:  1   3rd Qu.:0.8291  
##  E06000006:  1   Max.   :2.2239  
##  (Other)  :317

bdf <- merge(eng323_la_map, la_mean,  by.x="id", by.y="Areacode", all.y=T, all.x=F)
mr <- range(bdf$smr.mean)
psmr <-  ggplot( data=bdf, aes(x=long, y=lat, group = group,fill=smr.mean)) +
  scale_fill_gradientn(colours=colpalette,na.value="black",limits=mr)  +
  geom_polygon(colour='black',size=0.25)  + #longhurst outlines with half width size
  coord_equal() + guides(fill=guide_colorbar(title="SMR"))+
  theme_bw()+theme(text=element_text(family="Times")) +
  labs(title= "Mean standardised mortality rate, 2007-11",  x="", y = "", size=2.5) +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
  theme(legend.position =c(0.2, 0.5)) +
  ggsn::scalebar(data=bdf, dist =50, location = "bottomright", transform=F, dist_unit = "km",
                 st.dist = .05, st.size =4, height = .06, st.bottom=T)
psmr


png(file=paste0(figurepath, "smr_england_07-11.png"))
ggsn::north2(psmr, x=0.65, y=0.80, symbol=12)
dev.off()
## png 
##   2

ptime <- ggplot(data=Engrespriratory,  aes(x=month, y=smr, color=month)) + geom_boxplot() +
  facet_wrap(~year, ncol=5) +
  theme(legend.position = "none") +
  labs(#title="Boxplot of square-root precipitation against months by year",
    x = "Month", y = "SMR")
ptime


ggsave(filename=paste0(figurepath, "smr_eng_boxplot.png"))
## Saving 7 x 5 in image

13 Code to reproduce Figures 1.15 and 1.16

# load(file=paste0(dpath, "C11childpovertylondon.RData"))

dim(childpoverty)
## [1] 330  19
colnames(childpoverty)
##  [1] "id"              "Areacode"        "LA"              "Areaname"       
##  [5] "Areahectares"    "landareaSqKm"    "imdavscore07"    "year"           
##  [9] "population"      "medianincome"    "econinactive"    "houseprice"     
## [13] "nGPregistration" "punder16"        "pall"            "mincome"        
## [17] "log10price"      "logitpall"       "logitinactive"
# summary(childpoverty)
## Get the average fix data
head(lainfo)
##   id  Areacode             Areaname  hectares Abbrevname  centerx  centery
## 1  0 E09000021 Kingston upon Thames  3726.117   Kingston 519297.6 166820.0
## 2  1 E09000008              Croydon  8649.441    Croydon 533290.2 163541.2
## 3  2 E09000006              Bromley 15013.487    Bromley 542895.5 165655.5
## 4  3 E09000018             Hounslow  5658.541   Hounslow 513515.5 175643.2
## 5  4 E09000009               Ealing  5554.428     Ealing 515887.9 181715.5
## 6  5 E09000016             Havering 11445.735   Havering 554049.0 187392.0
u <- aggregate.data.frame(childpoverty[, c(7, 9:15)], by=list(id=childpoverty$id), FUN = mean)
lonfixdata <- merge(lainfo, u)
lonfixdata <- lonfixdata [order(as.numeric(lonfixdata$id)), ]

colnames(lonfixdata)
##  [1] "id"              "Areacode"        "Areaname"        "hectares"       
##  [5] "Abbrevname"      "centerx"         "centery"         "imdavscore07"   
##  [9] "population"      "medianincome"    "econinactive"    "houseprice"     
## [13] "nGPregistration" "punder16"        "pall"
b <- c(0, 20, 25, 30, 40, 100)
labss <- c("< 20", "20-25", "25-30", "30-40", "> 40")
lonfixdata$cutpall <- factor(cut(lonfixdata$pall, breaks=b,  labels=labss))
lonfixdata$cutpunder16 <- factor(cut(lonfixdata$punder16, breaks=b,  labels=labss))

bdf <- merge(lonadf, lonfixdata, by="id")
# head(bdf)
# 1 = City of London, 2 = Tower Hamlets,
# 3 = Islington, 4 = Camden, 5 = Westminster, 6 = Kensington and Chelsea,
# 7 = Hammersmith and Fulham.
colnames(bdf)
##  [1] "id"              "long"            "lat"             "order"          
##  [5] "hole"            "piece"           "group"           "Areacode"       
##  [9] "Areaname"        "hectares"        "Abbrevname"      "centerx"        
## [13] "centery"         "imdavscore07"    "population"      "medianincome"   
## [17] "econinactive"    "houseprice"      "nGPregistration" "punder16"       
## [21] "pall"            "cutpall"         "cutpunder16"
com <- c("lightyellow2", "lightyellow3" , "yellow2", "firebrick2", "purple")
lonpmap <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=cutpall)) +
  geom_polygon(colour='black',size=0.25) +
  geom_text(data=lainfo, inherit.aes=F, fontface="bold",  aes(label =Abbrevname, x=centerx, y=centery),  size = 3) +
  scale_fill_manual(values =com, guides("Percentage"), guide = guide_legend(reverse=TRUE)) +
  theme_bw()+theme(text=element_text(family="Times")) +
  theme(legend.position = c(0.9, 0.25)) +
  coord_equal()  +
  labs(title= "Percentage of children living in poverty, 2006-15",  x="", y = "", size=2.5) +
  theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())
lonpmap


ggsave(filename=paste(figurepath, "london_poverty.png", sep=""))
## Saving 7 x 5 in image


head(childpoverty)
##     id  Areacode   LA             Areaname Areahectares landareaSqKm
## 201  0 E09000021 00AX Kingston upon Thames         3726        37.25
## 202  0 E09000021 00AX Kingston upon Thames         3726        37.25
## 203  0 E09000021 00AX Kingston upon Thames         3726        37.25
## 204  0 E09000021 00AX Kingston upon Thames         3726        37.25
## 205  0 E09000021 00AX Kingston upon Thames         3726        37.25
## 206  0 E09000021 00AX Kingston upon Thames         3726        37.25
##     imdavscore07 year population medianincome econinactive houseprice
## 201         13.1 2006     153700        22400         22.2     249999
## 202         13.1 2007     154500        22900         25.4     290000
## 203         13.1 2008     156000        24050         23.4     285000
## 204         13.1 2009     157300        25200         21.6     270000
## 205         13.1 2010     158600        25600         22.4     300000
## 206         13.1 2011     160400        24800         25.9     300000
##     nGPregistration punder16 pall      mincome log10price logitpall
## 201            3622     16.1 15.2 -0.330229493   5.397938 -1.719000
## 202            3721     16.8 16.1 -0.259822636   5.462398 -1.650806
## 203            3700     16.2 15.7 -0.097886866   5.454845 -1.680721
## 204            3576     16.1 15.8  0.064048904   5.431364 -1.673185
## 205            3632     15.0 14.9  0.120374389   5.477121 -1.742466
## 206            3950     13.6 13.8  0.007723419   5.477121 -1.832002
##     logitinactive
## 201     -1.254049
## 202     -1.077391
## 203     -1.185861
## 204     -1.289131
## 205     -1.242506
## 206     -1.051173
pall <- ggplot(data=childpoverty,  aes(x=year, y=pall, color=Areaname)) +
  geom_line() +
  theme(legend.position = "none") +
  labs(title="Percentage of children living in poverty", y = "Percentage") +
  scale_x_continuous(name="Year", breaks=2006:2015)
pall


ggsave(filename=paste0(figurepath, "tsplot_poverty.png"))
## Saving 7 x 5 in image

# All done 
end.time <- proc.time()[3]
comp.time<- (end.time-start.time)/60
# comp.time<-fancy.time(comp.time)
print(comp.time)
##  elapsed 
## 1.028433