With no mountains or coastline to limit the city during the postwar boom, Atlanta and its surrounds has ranked as the most sprawling large metropolitan area in the country. To prevent “the city in a forest” from being devoured by low-density single-family houses, we here present an environment model to forecast its urban development in 2020. By gathering and wrangling land cover, population and development data, we construct a binary logistic regression model that predicts change in development from 2001 to 2011. After examining the goodness of fit metrics and spatial generalizability, we come up with two planning scenarios (one forecasting a supply change and the other forecasting a demand change) and compare the prediction results. Last but not least, we provide recommendations for future allocation procedure, which may become decent strategies for the region’s next large-scale comprehensive planning process.
First, load the requisite packages and create necessary functions that are used to standardize the map outputs created below.
library(tidyverse)
library(sf)
library(raster)
library(knitr)
library(kableExtra)
library(tidycensus)
library(tigris)
library(FNN)
library(QuantPsyc)
library(caret)
library(yardstick)
library(pscl)
library(plotROC)
library(ggrepel)
library(pROC)
library(grid)
library(gridExtra)
library(viridis)
mapTheme <- function(base_size = 12) {
theme(
text = element_text( color = "black"),
plot.title = element_text(size = 14,colour = "black"),
plot.subtitle=element_text(face="italic"),
plot.caption=element_text(hjust=0),
axis.ticks = element_blank(),
panel.background = element_blank(),axis.title = element_blank(),
axis.text = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=2)
)
}
plotTheme <- function(base_size = 12) {
theme(
text = element_text( color = "black"),
plot.title = element_text(size = 14,colour = "black"),
plot.subtitle = element_text(face="italic"),
plot.caption = element_text(hjust=0),
axis.ticks = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_line("grey80", size = 0.1),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=2),
strip.background = element_rect(fill = "grey80", color = "white"),
strip.text = element_text(size=12),
axis.title = element_text(size=12),
axis.text = element_text(size=10),
plot.background = element_blank(),
legend.background = element_blank(),
legend.title = element_text(colour = "black", face = "italic"),
legend.text = element_text(colour = "black", face = "italic"),
strip.text.x = element_text(size = 14)
)
}
palette2 <- c("#41b6c4","#253494")
palette4 <- c("#a1dab4","#41b6c4","#2c7fb8","#253494")
palette5 <- c("#ffffcc","#a1dab4","#41b6c4","#2c7fb8","#253494")
palette10 <- c("#f7fcf0","#e0f3db","#ccebc5","#a8ddb5","#7bccc4",
"#4eb3d3","#2b8cbe","#0868ac","#084081","#f7fcf0")
#this function converts a column in to quintiles. It is used for mapping.
quintileBreaks <- function(df,variable) {
as.character(quantile(df[[variable]],
c(.01,.2,.4,.6,.8),na.rm=T))
}
#This function can be used to convert a polygon sf to centroids xy coords.
xyC <- function(aPolygonSF) {
as.data.frame(
cbind(x=st_coordinates(st_centroid(aPolygonSF))[,1],
y=st_coordinates(st_centroid(aPolygonSF))[,2]))
}
#this function convert a raster to a data frame so it can be plotted in ggplot
rast <- function(inRaster) {
data.frame(
xyFromCell(inRaster, 1:ncell(inRaster)),
value = getValues(inRaster)) }
Download data and create base maps. To examine land cover change between 2001 and 2011 - the dependent variable we wish to forecast - we plot and reclassify the raster data from the USGS. A “sprawl belt” can be observed surrounding core metro Atlanta.
houstonMSA <-
st_read("./MSA.shp") %>%
st_transform(3521)
## Reading layer `MSA' from data source `D:\!courses\CPLN675\assignments\final\Atlanta\MSA.shp' using driver `ESRI Shapefile'
## Simple feature collection with 29 features and 20 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 1926428 ymin: 1035024 xmax: 2497260 ymax: 1679928
## epsg (SRID): NA
## proj4string: +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=699999.9999999999 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
lc_change = raster("develop.tif")
ggplot() +
geom_sf(data=houstonMSA) +
geom_raster(data=rast(lc_change) %>% na.omit %>% filter(value > 0),
aes(x,y,fill=as.factor(value))) +
scale_fill_viridis(direction = -1, discrete=TRUE, name ="Land Cover\nChange") +
labs(title = "Land Cover Change") +
mapTheme()
reclassMatrix <-
matrix(c(
0,12,0,
12,24,1,
24,Inf,0),
ncol=3, byrow=T)
reclassMatrix
## [,1] [,2] [,3]
## [1,] 0 12 0
## [2,] 12 24 1
## [3,] 24 Inf 0
lc_change2 <-
reclassify(lc_change,reclassMatrix)
lc_change2[lc_change2 < 1] <- NA
names(lc_change2) <- "lc_change"
ggplot() +
geom_sf(data=houstonMSA) +
geom_raster(data=rast(lc_change2) %>% na.omit,
aes(x,y,fill=as.factor(value))) +
scale_fill_viridis(discrete=TRUE, name ="Land Cover\nChange") +
labs(title="Development land use change") +
mapTheme()
To parametrize spatial relationships in a regression context, we integrate the land use change raster with a vector fishnet.
houstonMSA_fishnet <-
st_make_grid(houstonMSA, 4000) %>%
st_sf()
houstonMSA_fishnet <-
houstonMSA_fishnet[houstonMSA,]
ggplot() +
geom_sf(data=houstonMSA_fishnet) +
labs(title="Fishnet, 4000 foot resolution") +
mapTheme()
changePoints <-
rasterToPoints(lc_change2) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(houstonMSA_fishnet))
fishnet <-
aggregate(changePoints, houstonMSA_fishnet, sum) %>%
mutate(lc_change = ifelse(is.na(lc_change),0,1),
lc_change = as.factor(lc_change))
ggplot() +
geom_sf(data=houstonMSA) +
geom_point(data=fishnet,
aes(x=xyC(fishnet)$x, y=xyC(fishnet)$y, colour=lc_change)) +
scale_colour_manual(values = palette2,
labels=c("No Change","New Development"),
name = "") +
labs(title = "Land cover development change", subtitle = "As fishnet centroids") +
mapTheme()
Next, we gather, wrangle and plot the 2001 land cover data, then integrate it with the fishnet.
lc_2001 <- raster("lc20014k.tif")
ggplot() +
geom_sf(data=houstonMSA) +
geom_raster(data=rast(lc_2001) %>% na.omit %>% filter(value > 0),
aes(x,y,fill=as.factor(value))) +
scale_fill_viridis(discrete=TRUE, name ="") +
labs(title = "Land Cover, 2001") +
mapTheme()
developed <- lc_2001 == 21 | lc_2001 == 22 | lc_2001 == 23 | lc_2001 == 24
forest <- lc_2001 == 41 | lc_2001 == 42 | lc_2001 == 43
farm <- lc_2001 == 81 | lc_2001 == 82
wetlands <- lc_2001 == 90 | lc_2001 == 95
otherUndeveloped <- lc_2001 == 52 | lc_2001 == 71 | lc_2001 == 31
water <- lc_2001 == 11
names(developed) <- "developed"
names(forest) <- "forest"
names(farm) <- "farm"
names(wetlands) <- "wetlands"
names(otherUndeveloped) <- "otherUndeveloped"
names(water) <- "water"
aggregateRaster <- function(inputRasterList, theFishnet) {
#create an empty fishnet with the same dimensions as the input fishnet
theseFishnets <- theFishnet %>% dplyr::select()
#for each raster in the raster list
for (i in inputRasterList) {
#create a variable name corresponding to the ith raster
varName <- names(i)
#convert raster to points as an sf
thesePoints <-
rasterToPoints(i) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(theFishnet)) %>%
filter(.[[1]] == 1)
#aggregate to the fishnet
thisFishnet <-
aggregate(thesePoints, theFishnet, length) %>%
mutate(!!varName := ifelse(is.na(.[[1]]),0,1))
#add to the larger fishnet
theseFishnets <- cbind(theseFishnets,thisFishnet)
}
#output all aggregates as one large fishnet
return(theseFishnets)
}
theRasterList <- c(developed,forest,farm,wetlands,otherUndeveloped,water)
aggregatedRasters <-
aggregateRaster(theRasterList, houstonMSA_fishnet) %>%
dplyr::select(developed,forest,farm,wetlands,otherUndeveloped,water) %>%
mutate_if(is.numeric,as.factor)
aggregatedRasters %>%
gather(var,value,developed:water) %>%
st_cast("POLYGON") %>% #just to make sure no weird geometries slipped in
mutate(X = xyC(.)$x,
Y = xyC(.)$y) %>%
ggplot() +
geom_sf(data=houstonMSA) +
geom_point(aes(X,Y, colour=as.factor(value))) +
facet_wrap(~var) +
scale_colour_manual(values = palette2,
labels=c("Other","Land Cover"),
name = "") +
labs(title = "Land cover types, 2001",
subtitle = "As fishnet centroids") +
mapTheme()
Population and population change is obviously an critical demand-side component of predicting future urban development. From the 2000 and 2010 census data for the 29 target counties, we can see that there is obvious population growth in the “sprawl belt”.
census_api_key("297d56371f9e37ecc822ec40bfca6f9c8e3ffa2b")
#cname <- houstonMSA[,c(6)]
#tail(cname)
houstonPop00 <-
get_decennial(geography = "tract", variables = "P001001", year = 2000,
state = "GA", geometry = TRUE,
county=c("Fayette County","Carroll County","Rockdale County","Cobb County","Forsyth County",
"Clayton County","Henry County","Dawson County","Bartow County","Lamar County",
"Haralson County","Meriwether County","Newton County","Gwinnett County",
"Fulton County","Pickens County","Hall County","Spalding County","Douglas County",
"Coweta County","Heard County","Butts County","Jasper County",
"DeKalb County","Cherokee County","Walton County","Pike County","Barrow County",
"Paulding County")) %>%
rename(pop_2000 = value) %>%
st_transform(st_crs(houstonMSA_fishnet))
##
|
| | 0%
|
|= | 2%
|
|== | 2%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|======= | 10%
|
|======== | 12%
|
|========= | 14%
|
|========== | 14%
|
|============ | 17%
|
|============= | 19%
|
|=============== | 21%
|
|================ | 24%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|===================== | 30%
|
|======================= | 33%
|
|======================== | 35%
|
|========================== | 37%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================= | 46%
|
|================================== | 49%
|
|==================================== | 51%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================= | 58%
|
|========================================== | 60%
|
|============================================ | 63%
|
|============================================= | 65%
|
|=============================================== | 67%
|
|================================================= | 69%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|====================================================== | 78%
|
|======================================================= | 79%
|
|========================================================= | 81%
|
|========================================================== | 83%
|
|============================================================ | 85%
|
|============================================================= | 88%
|
|=============================================================== | 90%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================== | 95%
|
|==================================================================== | 97%
|
|======================================================================| 100%
houstonPop10 <-
get_decennial(geography = "tract", variables = "P001001", year = 2010,
state = "GA", geometry = TRUE,
county=c("Fayette County","Carroll County","Rockdale County","Cobb County","Forsyth County",
"Clayton County","Henry County","Dawson County","Bartow County","Lamar County",
"Haralson County","Meriwether County","Newton County","Gwinnett County",
"Fulton County","Pickens County","Hall County","Spalding County","Douglas County",
"Coweta County","Heard County","Butts County","Jasper County",
"DeKalb County","Cherokee County","Walton County","Pike County","Barrow County",
"Paulding County")) %>%
rename(pop_2010 = value) %>%
st_transform(st_crs(houstonMSA_fishnet)) %>%
st_buffer(-1)
##
Downloading: 16 kB
Downloading: 16 kB
Downloading: 25 kB
Downloading: 25 kB
Downloading: 49 kB
Downloading: 49 kB
Downloading: 49 kB
Downloading: 49 kB
Downloading: 59 kB
Downloading: 59 kB
Downloading: 65 kB
Downloading: 65 kB
Downloading: 97 kB
Downloading: 97 kB
Downloading: 110 kB
Downloading: 110 kB
Downloading: 110 kB
Downloading: 110 kB
Downloading: 120 kB
Downloading: 120 kB
Downloading: 130 kB
Downloading: 130 kB
Downloading: 150 kB
Downloading: 150 kB
Downloading: 170 kB
Downloading: 170 kB
Downloading: 170 kB
Downloading: 170 kB
Downloading: 190 kB
Downloading: 190 kB
Downloading: 190 kB
Downloading: 190 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 230 kB
Downloading: 230 kB
Downloading: 230 kB
Downloading: 230 kB
Downloading: 250 kB
Downloading: 250 kB
Downloading: 250 kB
Downloading: 250 kB
Downloading: 260 kB
Downloading: 260 kB
Downloading: 280 kB
Downloading: 280 kB
Downloading: 290 kB
Downloading: 290 kB
Downloading: 290 kB
Downloading: 290 kB
Downloading: 360 kB
Downloading: 360 kB
Downloading: 360 kB
Downloading: 360 kB
Downloading: 370 kB
Downloading: 370 kB
Downloading: 370 kB
Downloading: 370 kB
Downloading: 410 kB
Downloading: 410 kB
Downloading: 410 kB
Downloading: 410 kB
Downloading: 430 kB
Downloading: 430 kB
Downloading: 440 kB
Downloading: 440 kB
Downloading: 440 kB
Downloading: 440 kB
Downloading: 490 kB
Downloading: 490 kB
Downloading: 490 kB
Downloading: 490 kB
Downloading: 490 kB
Downloading: 490 kB
Downloading: 500 kB
Downloading: 500 kB
Downloading: 560 kB
Downloading: 560 kB
Downloading: 570 kB
Downloading: 570 kB
Downloading: 590 kB
Downloading: 590 kB
Downloading: 610 kB
Downloading: 610 kB
Downloading: 620 kB
Downloading: 620 kB
Downloading: 620 kB
Downloading: 620 kB
Downloading: 640 kB
Downloading: 640 kB
Downloading: 640 kB
Downloading: 640 kB
Downloading: 670 kB
Downloading: 670 kB
Downloading: 690 kB
Downloading: 690 kB
Downloading: 690 kB
Downloading: 690 kB
Downloading: 700 kB
Downloading: 700 kB
Downloading: 710 kB
Downloading: 710 kB
Downloading: 740 kB
Downloading: 740 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 780 kB
Downloading: 780 kB
Downloading: 800 kB
Downloading: 800 kB
Downloading: 830 kB
Downloading: 830 kB
Downloading: 830 kB
Downloading: 830 kB
Downloading: 830 kB
Downloading: 830 kB
Downloading: 850 kB
Downloading: 850 kB
Downloading: 870 kB
Downloading: 870 kB
Downloading: 870 kB
Downloading: 870 kB
Downloading: 880 kB
Downloading: 880 kB
Downloading: 880 kB
Downloading: 880 kB
Downloading: 890 kB
Downloading: 890 kB
Downloading: 900 kB
Downloading: 900 kB
Downloading: 930 kB
Downloading: 930 kB
Downloading: 930 kB
Downloading: 930 kB
Downloading: 940 kB
Downloading: 940 kB
Downloading: 940 kB
Downloading: 940 kB
Downloading: 970 kB
Downloading: 970 kB
Downloading: 990 kB
Downloading: 990 kB
Downloading: 990 kB
Downloading: 990 kB
Downloading: 1 MB
Downloading: 1 MB
Downloading: 1 MB
Downloading: 1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.1 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.2 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.3 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
Downloading: 1.4 MB
grid.arrange(
ggplot() +
geom_sf(data = houstonPop00, aes(fill=factor(ntile(pop_2000,5))), colour=NA) +
scale_fill_manual(values = palette5,
labels=quintileBreaks(houstonPop00,"pop_2000"),
name="Quintile\nBreaks") +
labs(title="Population, Atlanta MSA: 2000") +
mapTheme(),
ggplot() +
geom_sf(data = houstonPop10, aes(fill=factor(ntile(pop_2010,5))), colour=NA) +
scale_fill_manual(values = palette5,
labels=quintileBreaks(houstonPop10,"pop_2010"),
name="Quintile\nBreaks") +
labs(title="Population, Atlanta MSA: 2010") +
mapTheme(), ncol=2)
Using the areal weighted interpolation method, we reconcile tract boundaries and fishnet grid cells.
houstonMSA_fishnet <-
houstonMSA_fishnet %>%
rownames_to_column("fishnetID") %>%
mutate(fishnetID = as.numeric(fishnetID)) %>%
dplyr::select(fishnetID)
fishnetPopulation00 <-
st_interpolate_aw(houstonPop00["pop_2000"],houstonMSA_fishnet, extensive=TRUE) %>%
as.data.frame(.) %>%
left_join(houstonMSA_fishnet, ., by=c("fishnetID"='Group.1')) %>%
mutate(pop_2000 = replace_na(pop_2000,0)) %>%
dplyr::select(pop_2000)
fishnetPopulation10 <-
st_interpolate_aw(houstonPop10["pop_2010"],houstonMSA_fishnet, extensive=TRUE) %>%
as.data.frame(.) %>%
left_join(houstonMSA_fishnet, ., by=c("fishnetID"='Group.1')) %>%
mutate(pop_2010 = replace_na(pop_2010,0)) %>%
dplyr::select(pop_2010)
fishnetPopulation <-
cbind(fishnetPopulation00,fishnetPopulation10) %>%
dplyr::select(pop_2000,pop_2010) %>%
mutate(pop_Change = pop_2010 - pop_2000)
grid.arrange(
ggplot() +
geom_sf(data=houstonPop10, aes(fill=factor(ntile(pop_2010,5))),colour=NA) +
scale_fill_manual(values = palette5,
labels=substr(quintileBreaks(houstonPop10,"pop_2010"),1,4),
name="Quintile\nBreaks") +
labs(title="Population, Atlanta MSA: 2010",
subtitle="Represented as tracts; Boundaries omitted") +
mapTheme(),
ggplot() +
geom_sf(data=fishnetPopulation, aes(fill=factor(ntile(pop_2010,5))),colour=NA) +
scale_fill_manual(values = palette5,
labels=substr(quintileBreaks(fishnetPopulation,"pop_2010"),1,4),
name="Quintile\nBreaks") +
labs(title="Population, Atlanta MSA: 2010",
subtitle="Represented as fishnet gridcells; Boundaries omitted") +
mapTheme(), ncol=2)
Accessibility is another key determinant of urban development potential. Below, new development is mapped with the state highway overlay.
houstonHighways <-
st_read("./highway.shp") %>%
st_transform(st_crs(houstonMSA)) %>%
st_intersection(houstonMSA)
## Reading layer `highway' from data source `D:\!courses\CPLN675\assignments\final\Atlanta\highway.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2157 features and 6 fields
## geometry type: LINESTRING
## dimension: XY
## bbox: xmin: 1934735 ymin: 1035043 xmax: 2497275 ymax: 1665574
## epsg (SRID): NA
## proj4string: +proj=tmerc +lat_0=30 +lon_0=-84.16666666666667 +k=0.9999 +x_0=699999.9999999999 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
ggplot() +
geom_point(data=fishnet,
aes(x=xyC(fishnet)[,1], y=xyC(fishnet)[,2],colour=lc_change),size=1.5) +
geom_sf(data=houstonHighways) +
scale_colour_manual(values = palette2,
labels=c("No Change","New Development")) +
labs(title = "New Development and Highways",
subtitle = "As fishnet centroids") +
mapTheme()
To measure accessibility, the distance from each fishnet grid cell to its nearest highway segment is calculated.
emptyRaster <- lc_change
emptyRaster[] <- NA
highway_raster <-
as(houstonHighways,'Spatial') %>%
rasterize(.,emptyRaster)
highway_raster_distance <- distance(highway_raster)
names(highway_raster_distance) <- "distance_highways"
highwayPoints <-
rasterToPoints(highway_raster_distance) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(houstonMSA_fishnet))
highwayPoints_fishnet <-
aggregate(highwayPoints, houstonMSA_fishnet, mean) %>%
mutate(distance_highways = ifelse(is.na(distance_highways),0,distance_highways))
ggplot() +
geom_sf(data=houstonMSA) +
geom_point(data=highwayPoints_fishnet, aes(x=xyC(highwayPoints_fishnet)[,1],
y=xyC(highwayPoints_fishnet)[,2],
colour=factor(ntile(distance_highways,5))),size=1.5) +
scale_colour_manual(values = palette5,
labels=substr(quintileBreaks(highwayPoints_fishnet,"distance_highways"),1,8),
name="Quintile\nBreaks") +
geom_sf(data=houstonHighways, colour = "red") +
labs(title = "Distance to Highways",
subtitle = "As fishnet centroids; Highways visualized in red") +
mapTheme()
In the traditional ‘bid-rent’ economic model of urban development, development demand is a function of central city access. However, in cities facing urban sprawl like Atlanta, suburban locations can also be quite desirable. To measure the relative accessibility to nearby developments, we calculate the average distance from each grid cell to its 2 nearest developed neighboring grid cells in 2001.
nn_function <- function(measureFrom,measureTo,k) {
#convert the sf layers to matrices
measureFrom_Matrix <-
as.matrix(measureFrom)
measureTo_Matrix <-
as.matrix(measureTo)
nn <-
get.knnx(measureTo, measureFrom, k)$nn.dist
output <-
as.data.frame(nn) %>%
rownames_to_column(var = "thisPoint") %>%
gather(points, point_distance, V1:ncol(.)) %>%
arrange(as.numeric(thisPoint)) %>%
group_by(thisPoint) %>%
summarize(pointDistance = mean(point_distance)) %>%
arrange(as.numeric(thisPoint)) %>%
dplyr::select(-thisPoint) %>%
pull()
return(output)
}
fishnet$lagDevelopment <-
nn_function(xyC(fishnet),
xyC(filter(aggregatedRasters,developed==1)),
2)
ggplot() +
geom_sf(data=houstonMSA) +
geom_point(data=fishnet,
aes(x=xyC(fishnet)[,1], y=xyC(fishnet)[,2],
colour=factor(ntile(lagDevelopment,5))), size=1.5) +
scale_colour_manual(values = palette5,
labels=substr(quintileBreaks(fishnet,"lagDevelopment"),1,7),
name="Quintile\nBreaks") +
labs(title = "Spatial lag to 2001 development",
subtitle = "As fishnet centroids") +
mapTheme()
To better adapt policy making needs, we downloaded the county boundaries and integrate it with the fishnet.
options(tigris_class = "sf")
studyAreaCounties <-
counties("Georgia") %>%
st_transform(st_crs(houstonMSA)) %>%
dplyr::select(NAME) %>%
.[st_buffer(houstonMSA,-1000), , op=st_intersects]
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
ggplot() +
geom_sf(data=studyAreaCounties) +
labs(title = "Study area counties") +
mapTheme()
Finally, we bring together all the disparate feature layers into a final dataset that can be used for regression analysis.
dat <-
cbind(
fishnet, highwayPoints_fishnet, fishnetPopulation, aggregatedRasters) %>%
dplyr::select(lc_change, developed, forest, farm, wetlands, otherUndeveloped, water,
pop_2000, pop_2010, pop_Change, distance_highways,lagDevelopment) %>%
st_join(studyAreaCounties) %>%
mutate(developed10 = ifelse(lc_change == 1 & developed == 1, 0, developed)) %>%
filter(water == 0)
In this section we explore the extent to which each feature is associated with development change.
The below bar plots indicate that compared to undeveloped grids, developed grids are nearer to highways and existing developments. As expected, the accessibility features have a positive influence on urban development.
dat %>%
dplyr::select(distance_highways,lagDevelopment,lc_change) %>%
gather(Variable, Value, -lc_change, -geometry) %>%
ggplot(., aes(lc_change, Value, fill=lc_change)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean") +
facet_wrap(~Variable) +
scale_fill_manual(values = palette2,
labels=c("No Change","New Development"),
name="") +
labs(title="New development as a function of the countinuous variables") +
plotTheme()
Next, the same visualization is created for the population related variables. Grids with higher population and population change are more favorable for future development.
dat %>%
dplyr::select(pop_2000,pop_2010,pop_Change,lc_change) %>%
gather(Variable, Value, -lc_change, -geometry) %>%
ggplot(., aes(lc_change, Value, fill=lc_change)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean") +
facet_wrap(~Variable) +
scale_fill_manual(values = palette2,
labels=c("No Change","New Development"),
name="") +
labs(title="New development as a function of factor variables") +
plotTheme()
Finally, a table of land cover conversion between 2001 and 2011 is created. It is obvious that “the city in a forest” is losing its forest land due to urban sprawl.
dat %>%
dplyr::select(lc_change:otherUndeveloped,developed) %>%
gather(Land_Cover_Type, Value, -lc_change, -geometry) %>%
st_set_geometry(NULL) %>%
group_by(lc_change, Land_Cover_Type) %>%
summarize(n = sum(as.numeric(Value))) %>%
ungroup() %>%
mutate(Conversion_Rate = paste0(round(100 * n/sum(n), 2), "%")) %>%
filter(lc_change == 1) %>%
dplyr::select(Land_Cover_Type,Conversion_Rate) %>%
kable() %>% kable_styling(full_width = F)
Land_Cover_Type | Conversion_Rate |
---|---|
developed | 0.64% |
farm | 0.8% |
forest | 3.06% |
otherUndeveloped | 0.49% |
wetlands | 0.02% |
In this section, six separate logistic regression models are estimated to predict development change between 2001 and 2011 - with each subsequent model more sophisticated then the last.
To do so, the data is split into 50% training/test sets. Models are estimated on the training set, while their accuracy and generalizability are examined on the test set.
set.seed(3456)
trainIndex <-
createDataPartition(dat$developed, p = .50,
list = FALSE,
times = 1)
datTrain <- dat[ trainIndex,]
datTest <- dat[-trainIndex,]
Instead of adopting the model with the greatest goodness of fit (i.e. “Pseudo” R Squared statistic), we use the most comprehensive model for the purposes of prediction. The reason is that the goodness of fit metrics across models are relatively similar, and the comprehensive model is more likely to generalize across space.
The final model includes the 2001 land cover types, population change between 2000 and 2010, distance to the highways and spatial lag of 2001 development as predicting factors for 2010 urban development.
Model1 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped,
family="binomial"(link="logit"), data = datTrain)
Model2 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment,
family="binomial"(link="logit"), data = datTrain)
Model3 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop_2000,
family="binomial"(link="logit"), data = datTrain)
Model4 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop_2000 +
pop_2010,
family="binomial"(link="logit"), data = datTrain)
Model5 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop_Change,
family="binomial"(link="logit"), data = datTrain)
Model6 <- glm(lc_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop_Change +
distance_highways,
family="binomial"(link="logit"), data = datTrain)
modelList <- paste0("Model", 1:6)
map_dfc(modelList, function(x)pR2(get(x)))[4,] %>%
setNames(paste0("Model",1:6)) %>%
gather(Model,McFadden) %>%
ggplot(aes(Model,McFadden)) +
geom_bar(stat="identity") +
labs(title= "McFadden R-Squared by Model") +
plotTheme()
summary(Model6)
##
## Call:
## glm(formula = lc_change ~ wetlands + forest + farm + otherUndeveloped +
## lagDevelopment + pop_Change + distance_highways, family = binomial(link = "logit"),
## data = datTrain)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4448 -0.3363 -0.2208 -0.1033 4.0184
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.00832417 0.17243317 -17.446 < 0.0000000000000002 ***
## wetlands1 0.78509529 0.60903614 1.289 0.197
## forest1 2.35105895 0.17959567 13.091 < 0.0000000000000002 ***
## farm1 2.18321199 0.21600029 10.107 < 0.0000000000000002 ***
## otherUndeveloped1 3.38976698 0.25770717 13.154 < 0.0000000000000002 ***
## lagDevelopment -0.00028465 0.00002144 -13.276 < 0.0000000000000002 ***
## pop_Change 0.00146642 0.00025171 5.826 0.00000000568 ***
## distance_highways -0.00001295 0.00001325 -0.978 0.328
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3255.5 on 8494 degrees of freedom
## Residual deviance: 2737.3 on 8487 degrees of freedom
## AIC: 2753.3
##
## Number of Fisher Scoring iterations: 8
Using our final model, we output the predicted probability of a test set fishnet cell being developed. As shown in the below plot, only a small number of predicted probabilities are greater than or equal to 50%, which is reasonable given how rare of an event development is in our dataset. In order to judge our model with a confusion matrix, a smaller development classification threshold must be employed.
testSetProbs <-
data.frame(class = datTest$lc_change,
probs = predict(Model6, datTest, type="response"))
ggplot(testSetProbs, aes(probs)) +
geom_density(aes(fill=class), alpha=0.5) +
scale_fill_manual(values = palette2,
labels=c("No Change","New Development")) +
labs(title = "Histogram of test set predicted probabilities",
x="Predicted Probabilities",y="Density") +
plotTheme()
To pick a predicted probability threshold to classify an area as having new development, we have to tradeoff between Sensitivity and Specificity in our model. As shown in the table below, the 5% threshold correctly predicts a higher number of new development areas (Sensitivity), but incorrectly predicts a lower number of no change areas (Specificity). Conversely, the 17% threshold has a lower Sensitivity rate and but a far higher Specificity rate.
options(yardstick.event_first = FALSE)
testSetProbs <-
testSetProbs %>%
mutate(predClass_05 = as.factor(ifelse(testSetProbs$probs >= 0.05 ,1,0)),
predClass_17 = as.factor(ifelse(testSetProbs$probs >= 0.17 ,1,0)))
testSetProbs %>%
dplyr::select(-probs) %>%
gather(Variable, Value, -class) %>%
group_by(Variable) %>%
summarize(Sensitivity = round(yardstick::sens_vec(class,factor(Value)),2),
Specificity = round(yardstick::spec_vec(class,factor(Value)),2),
Accuracy = round(yardstick::accuracy_vec(class,factor(Value)),2)) %>%
kable() %>%
kable_styling(full_width = F)
Variable | Sensitivity | Specificity | Accuracy |
---|---|---|---|
predClass_05 | 0.73 | 0.72 | 0.72 |
predClass_17 | 0.29 | 0.97 | 0.93 |
Given the use case and the spatial distribution of land cover change, it may be more useful to have a model that predicts generally where new development occurs rather than one that predicts precisely where. As illustrated below, the 17% threshold provides this outcome.
predsForMap <-
dat %>%
mutate(probs = predict(Model6, dat, type="response") ,
Threshold_5_Pct = as.factor(ifelse(probs >= 0.05 ,1,0)),
Threshold_17_Pct = as.factor(ifelse(probs >= 0.17 ,1,0))) %>%
dplyr::select(lc_change,Threshold_5_Pct,Threshold_17_Pct) %>%
gather(Variable,Value, -geometry) %>%
st_cast("POLYGON")
ggplot() +
geom_point(data=predsForMap, aes(x=xyC(predsForMap)[,1], y=xyC(predsForMap)[,2], colour=Value)) +
facet_wrap(~Variable) +
scale_colour_manual(values = palette2, labels=c("No Change","New Development"),
name="") +
labs(title="Development predictions - Low threshold") + mapTheme()
ConfusionMatrix.metrics <-
dat %>%
mutate(probs = predict(Model6, dat, type="response") ,
Threshold_5_Pct = as.factor(ifelse(probs >= 0.05 ,1,0)),
Threshold_17_Pct = as.factor(ifelse(probs >= 0.17 ,1,0))) %>%
mutate(TrueP_05 = ifelse(lc_change == 1 & Threshold_5_Pct == 1, 1,0),
TrueN_05 = ifelse(lc_change == 0 & Threshold_5_Pct == 0, 1,0),
TrueP_17 = ifelse(lc_change == 1 & Threshold_17_Pct == 1, 1,0),
TrueN_17 = ifelse(lc_change == 0 & Threshold_17_Pct == 0, 1,0)) %>%
dplyr::select(., starts_with("True")) %>%
gather(Variable, Value, -geometry) %>%
st_cast("POLYGON")
ggplot(data=ConfusionMatrix.metrics) +
geom_point(aes(x=xyC(ConfusionMatrix.metrics)[,1],
y=xyC(ConfusionMatrix.metrics)[,2], colour = as.factor(Value))) +
facet_wrap(~Variable) +
scale_colour_manual(values = palette2, labels=c("Correct","Incorrect"),
name="") +
labs(title="Development predictions - Low threshold") + mapTheme()
For this use case, we are more concerned about whether our model is comparable to each county in the study area despite any possible differences in land use or land use planning. Thus, we examine the goodness of fit metrics for spatial cross-validation instead of regular cross-validation. For the most part, these confusion matrix metrics suggest the model is generalizable to those counties that underwent significant development change.
spatialCV <- function(dataFrame, uniqueID, dependentVariable, modelName) {
#initialize a data frame
endList <- list()
#create a list that is all the spatial group unqiue ids in the data frame (ie counties)
uniqueID_List <- unique(dataFrame[[uniqueID]])
x <- 1
y <- length(uniqueID_List)
#create a counter and while it is less than the number of counties...
while(x <= y)
{
#call a current county
currentUniqueID <- uniqueID_List[x]
#create a training set comprised of units not in that county and a test set of units
#that are that county
training <- dataFrame[ which(dataFrame[[uniqueID]] != uniqueID_List[x]),]
testing <- dataFrame[ which(dataFrame[[uniqueID]] == uniqueID_List[x]),]
#create seperate xy vectors
trainingX <- training[ , -which(names(training) %in% c(dependentVariable))]
testingX <- testing[ , -which(names(testing) %in% c(dependentVariable))]
trainY <- training[[dependentVariable]]
testY <- testing[[dependentVariable]]
#Calculate predictions on the test county as part of a data frame including the observed
#outcome and the unique county ID
thisPrediction <-
data.frame(class = testY,
probs = predict(modelName, testingX, type="response"),
county = currentUniqueID)
#Row bind the predictions to a data farme
endList <- rbind(endList, thisPrediction)
#iterate counter
x <- x + 1
}
#return the final list of counties and associated predictions
return (as.data.frame(endList))
}
#is.na(dat[13])
dat<-na.omit(dat)
spatialCV_counties <-
spatialCV(dat,"NAME","lc_change", Model6) %>%
mutate(predClass = as.factor(ifelse(probs >= 0.17 ,1,0)))
spatialCV_metrics <-
spatialCV_counties %>%
group_by(county) %>%
summarize(Observed_Change = sum(as.numeric(as.character(class))),
Sensitivity = round(yardstick::sens_vec(class,predClass),2),
Specificity = round(yardstick::spec_vec(class,predClass),2),
Accuracy = round(yardstick::accuracy_vec(class,predClass),2))
spatialCV_metrics %>%
kable() %>%
kable_styling(full_width = F)
county | Observed_Change | Sensitivity | Specificity | Accuracy |
---|---|---|---|---|
Meriwether | 3 | 0.00 | 1.00 | 0.99 |
Lamar | 3 | 0.00 | 1.00 | 0.99 |
Pike | 0 | NA | 1.00 | 1.00 |
Heard | 2 | 0.00 | 1.00 | 0.99 |
Jasper | 0 | NA | 1.00 | 1.00 |
Spalding | 5 | 0.00 | 0.99 | 0.98 |
Butts | 2 | 0.00 | 0.99 | 0.99 |
Coweta | 23 | 0.26 | 0.97 | 0.95 |
Fayette | 15 | 0.07 | 0.99 | 0.95 |
Henry | 60 | 0.25 | 0.95 | 0.88 |
Clayton | 24 | 0.25 | 0.94 | 0.88 |
Newton | 23 | 0.35 | 0.98 | 0.96 |
Carroll | 13 | 0.00 | 0.98 | 0.97 |
Fulton | 92 | 0.22 | 0.94 | 0.87 |
Rockdale | 24 | 0.17 | 0.97 | 0.90 |
Douglas | 27 | 0.19 | 0.93 | 0.88 |
Walton | 21 | 0.05 | 0.99 | 0.96 |
DeKalb | 49 | 0.27 | 0.92 | 0.86 |
Haralson | 9 | 0.00 | 0.99 | 0.98 |
Cobb | 73 | 0.32 | 0.93 | 0.86 |
Gwinnett | 115 | 0.48 | 0.86 | 0.81 |
Paulding | 44 | 0.11 | 0.94 | 0.88 |
Barrow | 19 | 0.00 | 0.98 | 0.92 |
Forsyth | 51 | 0.41 | 0.90 | 0.84 |
Cherokee | 49 | 0.27 | 0.98 | 0.93 |
Bartow | 27 | 0.07 | 0.98 | 0.95 |
Hall | 45 | 0.16 | 0.98 | 0.93 |
Dawson | 8 | 0.00 | 1.00 | 0.98 |
Pickens | 6 | 0.17 | 0.98 | 0.97 |
After training and validating our model on 2000-2010 data, we apply it on the 2010-2020 data to predict 2020 urban development.
First, we update the population change and spatial lag of development features in our model. Note that in this demand-change scenario, 2020 population is projected for each county, and then distributed to grids based on the cell’s existing population.
dat <-
dat %>%
mutate(lagDevelopment = nn_function(xyC(.), xyC(filter(.,developed10 == 1)),2))
countyPopulation_2020 <-
data.frame(
NAME =
c("Fayette","Carroll","Rockdale","Cobb","Forsyth",
"Clayton","Henry","Dawson","Bartow","Lamar",
"Haralson","Meriwether","Newton","Gwinnett",
"Fulton","Pickens","Hall","Spalding","Douglas",
"Coweta","Heard","Butts","Jasper",
"DeKalb","Cherokee","Walton","Pike","Barrow",
"Paulding"),
county_projection_2020 =
c(121912.648,126442.888,97485.96,787161.232,200784.584,296781.056,233286.768,25545.52,114579.608,20954.648,32924.32,25158.848,114351.952,921287.224,1053144.664,33669.064,205558.496,73299.512,151469.032,145650.648,13538.096,27061.32,15901.6,791525.592,245211.824,95830.592,20442.136,79355.848,162818.656)) %>%
left_join(dat %>%
st_set_geometry(NULL) %>%
group_by(NAME) %>%
summarize(county_population_2010 = round(sum(pop_2010))))
countyPopulation_2020 %>%
gather(Variable,Value, -NAME) %>%
ggplot(aes(reorder(NAME,-Value),Value)) +
geom_bar(aes(fill=Variable), stat = "identity") +
scale_fill_manual(values = palette2,
labels=c("2020","2010"),
name="Population") +
labs(title="Population Change by county: 2010 - 2020",
x="County", y="Population") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
plotTheme()
The map of predicted probabilities is plotted to demonstrate predicted development demand in 2020.
dat_infill <-
dat %>%
#calculate population change
left_join(countyPopulation_2020) %>%
mutate(proportion_of_county_pop = pop_2010 / county_population_2010,
pop_2020.infill = proportion_of_county_pop * county_projection_2020,
pop_Change = round(pop_2020.infill - pop_2010),2) %>%
dplyr::select(-county_projection_2020, -county_population_2010,
-proportion_of_county_pop, -pop_2020.infill) %>%
#predict for 2020
mutate(predict_2020.infill = predict(Model6,. , type="response"))
dat_infill %>%
ggplot() +
geom_point(aes(x=xyC(dat_infill)[,1], y=xyC(dat_infill)[,2], colour = factor(ntile(predict_2020.infill,5)))) +
scale_colour_manual(values = palette5,
labels=substr(quintileBreaks(dat_infill,"predict_2020.infill"),1,4),
name="Quintile\nBreaks") +
geom_sf(data=studyAreaCounties, fill=NA, colour="black", size=1.5) +
labs(title= "Development Demand in 2020: Predicted Probabilities") +
mapTheme()
To balance predicted development demand with the supply of environmentally sensitive land, in this section we develop several measures of environmental sensitivity by county using 2011 land cover data. For this analysis, farmland and undeveloped land are be deemed Suitable, while environmentally sensitive areas like wetlands and forest are be deemed Not Suitable.
lc_2011 <- raster("lc20114k.tif")
developed11 <- lc_2011 == 21 | lc_2011 == 22 | lc_2011 == 23 | lc_2011 == 24
forest11 <- lc_2011 == 41 | lc_2011 == 42 | lc_2011 == 43
farm11 <- lc_2011 == 81 | lc_2011 == 82
wetlands11 <- lc_2011 == 90 | lc_2011 == 95
otherUndeveloped11 <- lc_2011 == 52 | lc_2011 == 71 | lc_2011 == 31
water11 <- lc_2011 == 11
names(developed11) <- "developed11"
names(forest11) <- "forest11"
names(farm11) <- "farm11"
names(wetlands11) <- "wetlands11"
names(otherUndeveloped11) <- "otherUndeveloped11"
names(water11) <- "water11"
ggplot() +
geom_sf(data=houstonMSA) +
geom_raster(data = rbind(rast(lc_2001) %>% mutate(label = "2001"),
rast(lc_2011) %>% mutate(label = "2011")) %>%
na.omit %>% filter(value > 0),
aes(x,y,fill=as.factor(value))) +
facet_wrap(~label) +
scale_fill_viridis(discrete=TRUE, name ="") +
labs(title = "Land Cover, 2001 & 2011a") +
mapTheme() + theme(legend.position = "none")
The environmentally sensitivity indicators include: 1 The total amount of wetlands and forest land cover area in 2011.
theRasterList11 <- c(developed11,forest11,farm11,wetlands11,otherUndeveloped11,water11)
dat2 <-
aggregateRaster(theRasterList11, dat) %>%
dplyr::select(developed11,forest11,farm11,wetlands11,otherUndeveloped11,water11) %>%
st_set_geometry(NULL) %>%
bind_cols(.,dat) %>%
st_sf() %>%
st_cast("POLYGON")
dat2 %>%
gather(var,value,developed11:water11) %>%
st_centroid() %>%
mutate(X = st_coordinates(.)[,1],
Y = st_coordinates(.)[,2]) %>%
ggplot() +
geom_sf(data=houstonMSA) +
geom_point(aes(X,Y, colour=as.factor(value))) +
facet_wrap(~var) +
scale_colour_manual(values = palette2,
labels=c("Other","Land Cover"),
name = "") +
labs(title = "Land cover types, 2011",
subtitle = "As fishnet centroids") +
mapTheme()
2 The amount of sensitive land (wetland and forest) lost between 2001 and 2011.
dat2 <-
dat2 %>%
mutate(sensitive_lost11 = ifelse(forest == 1 & forest11 == 0 |
wetlands == 1 & wetlands11 == 0,1,0))
ggplot() +
geom_point(data=dat2, aes(x=xyC(dat2)[,1], y=xyC(dat2)[,2], colour=as.factor(sensitive_lost11))) +
scale_colour_manual(values = palette2,
labels=c("No Change","Sensitive Lost"),
name = "") +
labs(title = "Sensitive lands lost: 2001 - 2011",
subtitle = "As fishnet centroids") +
mapTheme()
3 The total area of large sensitive landscape ‘patches’ in 2011.Only sensitive_regions with areas greater than 1 acre are included.
#install.packages("igraph")
sensitiveRegions <-
clump(wetlands11 + forest11) %>%
rasterToPolygons() %>%
st_as_sf() %>%
group_by(clumps) %>% summarize() %>%
mutate(Acres = as.numeric(st_area(.) * 0.0000229568)) %>%
filter(Acres > 3954) %>%
dplyr::select() %>%
rasterize(.,emptyRaster)
sensitiveRegions[sensitiveRegions > 0] <- 1
names(sensitiveRegions) <- "sensitiveRegions"
dat2 <-
aggregateRaster(c(sensitiveRegions), dat2) %>%
dplyr::select(sensitiveRegions) %>%
st_set_geometry(NULL) %>%
bind_cols(.,dat2) %>%
st_sf()
ggplot() +
geom_point(data=dat2, aes(x=xyC(dat2)[,1], y=xyC(dat2)[,2], colour=as.factor(sensitiveRegions))) +
scale_colour_manual(values = palette2,
labels=c("Other","Sensitive Regions"),
name="") +
labs(title = "Sensitive regions",
subtitle = "Continous areas of either wetlands or forests\ngreater than 1 acre") +
mapTheme()
To sum up, a small multiple plot is created to provide both supply and demand side analytics by county. The plot gives a sense for development demand (Demand-Side), suitable land for development (Suitable) and sensitive land (Not Suitable).
county_specific_metrics <-
dat2 %>%
#predict development demand from our model
mutate(Development_Demand = predict(Model6, dat2, type="response")) %>%
#get a count count of grid cells by county which we can use to calculate rates below
left_join(st_set_geometry(dat, NULL) %>% group_by(NAME) %>% summarize(count = n())) %>%
#calculate summary statistics by county
group_by(NAME) %>%
summarize(Total_Farmland = sum(farm11) / max(count),
Total_Forest = sum(forest11) / max(count),
Total_Wetlands = sum(wetlands11) / max(count),
Total_Undeveloped = sum(otherUndeveloped11) / max(count),
Sensitive_Land_Lost = sum(sensitive_lost11) / max(count),
Sensitive_Regions = sum(sensitiveRegions) / max(count),
Mean_Development_Demand = mean(Development_Demand)) %>%
#get population data by county
left_join(countyPopulation_2020 %>%
mutate(Population_Change = county_projection_2020 - county_population_2010,
Population_Change_Rate = Population_Change / county_projection_2020) %>%
dplyr::select(NAME,Population_Change_Rate))
county_specific_metrics %>%
gather(Variable, Value, -NAME, -geometry) %>%
mutate(Variable = factor(Variable, levels=c("Population_Change_Rate","Mean_Development_Demand",
"Total_Farmland","Total_Undeveloped","Total_Forest",
"Total_Wetlands","Sensitive_Land_Lost","Sensitive_Regions",
ordered = TRUE))) %>%
mutate(Planning_Designation = case_when(
Variable == "Population_Change_Rate" | Variable == "Mean_Development_Demand" ~ "Demand-Side",
Variable == "Total_Farmland" | Variable == "Total_Undeveloped" ~ "Suitable",
TRUE ~ "Not Suitable")) %>%
ggplot(aes(x=Variable, y=Value, fill=Planning_Designation)) +
geom_bar(stat="identity", position=position_dodge(), colour="black") +
facet_wrap(~NAME, ncol=5) +
coord_flip() +
scale_y_continuous(breaks = seq(.25, 1, by = .25)) +
geom_vline(xintercept = 2.5) + geom_vline(xintercept = 4.5) +
scale_fill_manual(values=c("black","red","darkgreen")) +
labs(title= "County Specific Allocation Metrics", subtitle= "As rates", x="Indicator", y="Rate") +
plotTheme() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="bottom")
In this scenario, we can clearly see that the counties with much development demand are mostly the counties with much forest land. These counties are located around the boundary of Atlanta MSA, indicating a new wave of sprawl and forest loss if no action is taken. On the other hand, the 5 counties of core metro Atlanta are not showing much development demand.
In the previous demand-change scenario, each county’s 2020 population is projected based on the 2010 population and a general growth trend (14.4% increase / decade) in the state of Georgia. In this alternative scenario, we consider a new large-scale development, which will change the supply side and result in population redistribution.
In 2018, the Metropolitan Atlanta Rapid Transit Authority (MARTA) agreed $570m funding for light rail along 15 miles of the BeltLine – a 22-mile ring of abandoned and active freight rail lines that is being slowly transformed into a transit and trails loop.
BeltLine Proposal
If the transformation is completed, Atlanta can look forward to a future of high-density housing and walkable neighborhoods of cycling and public transit. This supply-side change will increase population in core metro Atlanta and slow down urban sprawl in marginal counties. Therefore, we adjust the 5 core counties’ population growth rates to 20% and the other counties’ population growth rates to 10% to redistribute population in the MSA.
dat <-
dat %>%
mutate(lagDevelopment = nn_function(xyC(.), xyC(filter(.,developed10 == 1)),2))
countyPopulation_2020 <-
data.frame(
NAME =
c("Fayette","Carroll","Rockdale","Cobb","Forsyth",
"Clayton","Henry","Dawson","Bartow","Lamar",
"Haralson","Meriwether","Newton","Gwinnett",
"Fulton","Pickens","Hall","Spalding","Douglas",
"Coweta","Heard","Butts","Jasper",
"DeKalb","Cherokee","Walton","Pike","Barrow",
"Paulding"),
county_projection_2020 =
c(117223.7,121579.7,93736.5,825693.6,193062.1,311308.8,224314.2,24563,110172.7,20148.7,31658,24191.2,109953.8,966385.2,1104697.2,32374.1,197652.4,70480.3,145643.3,140048.7,13017.4,26020.5,15290,830271.6,235780.6,92144.8,19655.9,76303.7,156556.4)) %>%
left_join(dat %>%
st_set_geometry(NULL) %>%
group_by(NAME) %>%
summarize(county_population_2010 = round(sum(pop_2010))))
countyPopulation_2020 %>%
gather(Variable,Value, -NAME) %>%
ggplot(aes(reorder(NAME,-Value),Value)) +
geom_bar(aes(fill=Variable), stat = "identity") +
scale_fill_manual(values = palette2,
labels=c("2020","2010"),
name="Population") +
labs(title="Population Change by county: 2010 - 2020",
x="County", y="Population") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
plotTheme()
dat_infill <-
dat %>%
#calculate population change
left_join(countyPopulation_2020) %>%
mutate(proportion_of_county_pop = pop_2010 / county_population_2010,
pop_2020.infill = proportion_of_county_pop * county_projection_2020,
pop_Change = round(pop_2020.infill - pop_2010),2) %>%
dplyr::select(-county_projection_2020, -county_population_2010,
-proportion_of_county_pop, -pop_2020.infill) %>%
#predict for 2020
mutate(predict_2020.infill = predict(Model6,. , type="response"))
dat_infill %>%
ggplot() +
geom_point(aes(x=xyC(dat_infill)[,1], y=xyC(dat_infill)[,2], colour = factor(ntile(predict_2020.infill,5)))) +
scale_colour_manual(values = palette5,
labels=substr(quintileBreaks(dat_infill,"predict_2020.infill"),1,4),
name="Quintile\nBreaks") +
geom_sf(data=studyAreaCounties, fill=NA, colour="black", size=1.5) +
labs(title= "Development Demand in 2020: Predicted Probabilities") +
mapTheme()
As a result, the development demand increased around the core metro Atlanta and decreased in marginal counties, which is beneficial for the preventing further loss of forest land.
dat2 <-
aggregateRaster(theRasterList11, dat) %>%
dplyr::select(developed11,forest11,farm11,wetlands11,otherUndeveloped11,water11) %>%
st_set_geometry(NULL) %>%
bind_cols(.,dat) %>%
st_sf() %>%
st_cast("POLYGON")
dat2 <-
dat2 %>%
mutate(sensitive_lost11 = ifelse(forest == 1 & forest11 == 0 |
wetlands == 1 & wetlands11 == 0,1,0))
dat2 <-
aggregateRaster(c(sensitiveRegions), dat2) %>%
dplyr::select(sensitiveRegions) %>%
st_set_geometry(NULL) %>%
bind_cols(.,dat2) %>%
st_sf()
county_specific_metrics <-
dat2 %>%
#predict development demand from our model
mutate(Development_Demand = predict(Model6, dat2, type="response")) %>%
#get a count count of grid cells by county which we can use to calculate rates below
left_join(st_set_geometry(dat, NULL) %>% group_by(NAME) %>% summarize(count = n())) %>%
#calculate summary statistics by county
group_by(NAME) %>%
summarize(Total_Farmland = sum(farm11) / max(count),
Total_Forest = sum(forest11) / max(count),
Total_Wetlands = sum(wetlands11) / max(count),
Total_Undeveloped = sum(otherUndeveloped11) / max(count),
Sensitive_Land_Lost = sum(sensitive_lost11) / max(count),
Sensitive_Regions = sum(sensitiveRegions) / max(count),
Mean_Development_Demand = mean(Development_Demand)) %>%
#get population data by county
left_join(countyPopulation_2020 %>%
mutate(Population_Change = county_projection_2020 - county_population_2010,
Population_Change_Rate = Population_Change / county_projection_2020) %>%
dplyr::select(NAME,Population_Change_Rate))
county_specific_metrics %>%
gather(Variable, Value, -NAME, -geometry) %>%
mutate(Variable = factor(Variable, levels=c("Population_Change_Rate","Mean_Development_Demand",
"Total_Farmland","Total_Undeveloped","Total_Forest",
"Total_Wetlands","Sensitive_Land_Lost","Sensitive_Regions",
ordered = TRUE))) %>%
mutate(Planning_Designation = case_when(
Variable == "Population_Change_Rate" | Variable == "Mean_Development_Demand" ~ "Demand-Side",
Variable == "Total_Farmland" | Variable == "Total_Undeveloped" ~ "Suitable",
TRUE ~ "Not Suitable")) %>%
ggplot(aes(x=Variable, y=Value, fill=Planning_Designation)) +
geom_bar(stat="identity", position=position_dodge(), colour="black") +
facet_wrap(~NAME, ncol=5) +
coord_flip() +
scale_y_continuous(breaks = seq(.25, 1, by = .25)) +
geom_vline(xintercept = 2.5) + geom_vline(xintercept = 4.5) +
scale_fill_manual(values=c("black","red","darkgreen")) +
labs(title= "County Specific Allocation Metrics", subtitle= "As rates", x="Indicator", y="Rate") +
plotTheme() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="bottom")
Now that both demand and supply is understood, Planners can allocate development rights accordingly. This detailed allocation procedure should be decided by county, and could take many forms of regulation including zoning, subdivision approval or outright conservation. However, the Metropolitan Planning Organization (MPO) will allocate the total number of development permission in each county according to the demand-side development trend and supply-side development suitability.
In this section, we visualize the demand and supply for two counties, DeKalb and Barrow, to illustrate the MPO allocation procedure.
fortBend <-
dat2 %>%
mutate(Development_Demand = predict(Model6, dat2, type="response")) %>%
filter(NAME == "DeKalb")
fortBend_landUse <- rbind(
filter(fortBend, forest11 == 1 | wetlands11 == 1 ) %>%
dplyr::select() %>% mutate(Land_Use = "Not Suitable"),
filter(fortBend, developed11 == 1) %>%
dplyr::select() %>% mutate(Land_Use = "Developed"))
grid.arrange(
ggplot() +
geom_sf(data=fortBend, aes(fill=factor(ntile(Development_Demand,5))), colour=NA) +
geom_point(data=fortBend_landUse, aes(x=xyC(fortBend_landUse)[,1],
y=xyC(fortBend_landUse)[,2], colour=Land_Use),
shape = 15, size = 2) +
geom_sf(data=st_intersection(houstonHighways,filter(studyAreaCounties, NAME=="Fort Bend")), size=2) +
scale_fill_manual(values = palette5, name="Development\nDemand",
labels=substr(quintileBreaks(fortBend,"Development_Demand"),1,5)) +
scale_colour_manual(values = c("black","red")) +
labs(title = "Development Potential, 2020: DeKalb") + mapTheme() +
guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)),
ggplot() +
geom_sf(data=fortBend, aes(fill=factor(ntile(pop_Change,5))), colour=NA) +
geom_point(data=fortBend_landUse, aes(x=xyC(fortBend_landUse)[,1],
y=xyC(fortBend_landUse)[,2], colour=Land_Use),
shape = 15, size = 2) +
geom_sf(data=st_intersection(houstonHighways,filter(studyAreaCounties, NAME=="Fort Bend")), size=2) +
scale_fill_manual(values = palette5, name="Population\nChange",
labels=substr(quintileBreaks(fortBend,"pop_Change"),1,5)) +
scale_colour_manual(values = c("black","red")) +
labs(title = "Projected Population, 2020: DeKalb") + mapTheme() +
guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)), ncol=2)
In DeKalb County, an area west of core metro Atlanta, most suitable land is already developed. Any further allocation would cause forest land loss. Thus, it is less conducive to growth.
fortBend <-
dat2 %>%
mutate(Development_Demand = predict(Model6, dat2, type="response")) %>%
filter(NAME == "Barrow")
fortBend_landUse <- rbind(
filter(fortBend, forest11 == 1 | wetlands11 == 1 ) %>%
dplyr::select() %>% mutate(Land_Use = "Not Suitable"),
filter(fortBend, developed11 == 1) %>%
dplyr::select() %>% mutate(Land_Use = "Developed"))
grid.arrange(
ggplot() +
geom_sf(data=fortBend, aes(fill=factor(ntile(Development_Demand,5))), colour=NA) +
geom_point(data=fortBend_landUse, aes(x=xyC(fortBend_landUse)[,1],
y=xyC(fortBend_landUse)[,2], colour=Land_Use),
shape = 15, size = 2) +
geom_sf(data=st_intersection(houstonHighways,filter(studyAreaCounties, NAME=="Fort Bend")), size=2) +
scale_fill_manual(values = palette5, name="Development\nDemand",
labels=substr(quintileBreaks(fortBend,"Development_Demand"),1,5)) +
scale_colour_manual(values = c("black","red")) +
labs(title = "Development Potential, 2020: Barrow") + mapTheme() +
guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)),
ggplot() +
geom_sf(data=fortBend, aes(fill=factor(ntile(pop_Change,5))), colour=NA) +
geom_point(data=fortBend_landUse, aes(x=xyC(fortBend_landUse)[,1],
y=xyC(fortBend_landUse)[,2], colour=Land_Use),
shape = 15, size = 2) +
geom_sf(data=st_intersection(houstonHighways,filter(studyAreaCounties, NAME=="Fort Bend")), size=2) +
scale_fill_manual(values = palette5, name="Population\nChange",
labels=substr(quintileBreaks(fortBend,"pop_Change"),1,5)) +
scale_colour_manual(values = c("black","red")) +
labs(title = "Projected Population, 2020: Barrow") + mapTheme() +
guides(fill = guide_legend(order = 1), colour = guide_legend(order = 2)), ncol=2)
Conversely, in Barrow, the county north and west of Atlanta, the data suggests both population and development demand will increase. In addition, there is a high rate of developable land and a relatively low supply of sensitive forest land. Thus, it is well suitable to allocate new development.