library(tidyverse)
library(sf)
library(units)
library(hrbrthemes)
library(gganimate)
library(tweenr)
library(scales)
library(leaflet)
library(leaflet.extras)
library(widgetframe)
theme_set(theme_bw(base_size = 13))
options(digits = 4, scipen = 999)
This March, researchers at the University of Georgia and Florida State University released the HHUUD10 dataset, which contains estimates of the number of housing units for decennial census years 1940-2010 and 2019. A “housing unit” could be a studio apartment or 5 bedroom single-family home. The data uses 2010 census tracts, which allows for historical comparison of housing trends across constant geometry. The full paper explains the approach.
This paper and the dataset can be used for a wide variety of socioeconomic issues. I will focus on suburbanization trends in the Pittsburgh area.
Configuration and load data
The dataset is available in multiple formats, but I chose the Esri geodatabase format because plays nice with {sf}
. The dataset uses the value 2035
in some numeric fields to indicate missing data, so I replace those values with NA
. I also use the fancy new tigris::erase_water()
function to easily “erase” the water from the census tracts to make the rivers stand out.
<- st_read("post_data/HHUUD10.gdb") |>
ac_housing rename(geometry = Shape) |>
mutate(UY1 = na_if(UY1, 2035),
UY2 = na_if(UY2, 2035)) |>
filter(STATE == "PA",
== "Allegheny") |>
COUNTY select(STATE, COUNTY, GEOID10, UY1, UY2, starts_with("hu"), starts_with("sqmi"), starts_with("pdev")) |>
::erase_water() tigris
Reading layer `HHUUD10' from data source
`/Users/conorotompkins/Documents/github_repos/ctompkins_quarto_blog/posts/suburbanization-of-allegheny-county/post_data/HHUUD10.gdb'
using driver `OpenFileGDB'
Simple feature collection with 72271 features and 29 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -2356000 ymin: -1338000 xmax: 2258000 ymax: 1566000
Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
<- ac_housing |>
best_crs ::suggest_crs() |>
crsuggestslice_head(n = 1) |>
select(crs_code) |>
pull() |>
as.numeric()
<- ac_housing |>
ac_housing st_transform(crs = best_crs)
glimpse(ac_housing)
Rows: 402
Columns: 27
$ STATE <chr> "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "…
$ COUNTY <chr> "Allegheny", "Allegheny", "Allegheny", "Allegheny", "Alleghen…
$ GEOID10 <chr> "42003560500", "42003560400", "42003552400", "42003552300", "…
$ UY1 <dbl> 1940, 1940, 1940, 1940, 1940, 1940, 1940, 1940, 1940, 1940, 1…
$ UY2 <dbl> 1940, 1940, 1940, 1940, 1940, 1940, 1940, 1940, 1940, 1940, 1…
$ hu40 <dbl> 1348.923, 1329.439, 2069.012, 1951.163, 1041.277, 1900.687, 1…
$ hu50 <dbl> 1509.124, 1409.404, 2099.138, 1928.245, 1034.381, 1940.134, 1…
$ hu60 <dbl> 1514.716, 1405.699, 1873.231, 2184.759, 909.613, 1981.987, 97…
$ hu70 <dbl> 1440.54, 943.96, 1838.68, 2038.15, 828.13, 1402.99, 964.57, 1…
$ hu80 <dbl> 1423.674, 1219.525, 1687.919, 1935.480, 842.828, 1174.994, 74…
$ hu90 <dbl> 1433.35, 1151.33, 1674.71, 1725.23, 767.89, 909.00, 882.08, 1…
$ hu00 <dbl> 1381.24, 1069.21, 1607.74, 1454.10, 692.14, 639.00, 928.18, 1…
$ hu10 <dbl> 1349, 994, 1560, 1230, 582, 569, 849, 1386, 731, 778, 746, 12…
$ hu19 <dbl> 1487, 1140, 1759, 1336, 622, 574, 807, 1381, 157, 877, 855, 1…
$ sqmi40 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ sqmi50 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ sqmi60 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ sqmi70 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ sqmi80 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ sqmi90 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ sqmi00 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ sqmi10 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ sqmi19 <dbl> 0.17941, 0.19734, 0.81347, 0.32084, 0.31473, 0.27161, 0.21986…
$ pdev92 <dbl> 0.8767, 0.9417, 0.7430, 0.8684, 0.7597, 0.8391, 0.7064, 0.924…
$ pdev01 <dbl> 1.0000, 1.0000, 0.8913, 1.0000, 0.9370, 0.9887, 0.9029, 1.000…
$ pdev11 <dbl> 1.0000, 1.0000, 0.9079, 1.0000, 0.9370, 0.9887, 0.9029, 1.000…
$ geometry <POLYGON [US_survey_foot]> POLYGON ((1373906 410182, 1..., POLYGON …
Overall trend
Fix date formatting
Since the data comes in a wide format, I pivot it long and fix up the year column to make it easy to graph with.
<- ac_housing |>
ac_housing_hu select(GEOID10, starts_with("hu")) |>
pivot_longer(cols = starts_with("hu"), names_to = "year", values_to = "housing_units")
<- ac_housing_hu |>
year_lookup st_drop_geometry() |>
distinct(year) |>
mutate(year_fixed = c(1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2019))
<- ac_housing_hu |>
ac_housing_hu left_join(year_lookup) |>
select(-year) |>
rename(year = year_fixed)
glimpse(ac_housing_hu)
Rows: 3,618
Columns: 4
$ GEOID10 <chr> "42003560500", "42003560500", "42003560500", "4200356050…
$ geometry <POLYGON [US_survey_foot]> POLYGON ((1373906 410182, 1..., POL…
$ housing_units <dbl> 1349, 1509, 1515, 1441, 1424, 1433, 1381, 1349, 1487, 13…
$ year <dbl> 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2019, 19…
The number of housing units in the county stagnated after 1960, which is expected given the collapse of the steel industry.
|>
ac_housing_hu st_drop_geometry() |>
mutate(year = as.character(year) |> fct_inorder()) |>
group_by(year) |>
summarize(housing_units = sum(housing_units)) |>
ungroup() |>
ggplot(aes(year, housing_units, group = 1)) +
geom_line() +
geom_point() +
scale_y_comma() +
labs(x = "Year",
y = "Housing units")
The decennial difference in “gross” housing units also shows that growth stagnated after 1960.
|>
ac_housing_hu st_drop_geometry() |>
mutate(year = as.character(year) |> fct_inorder()) |>
group_by(year) |>
summarize(housing_units = sum(housing_units)) |>
ungroup() |>
mutate(diff = housing_units - lag(housing_units)) |>
ggplot(aes(year, diff, group = 1)) +
geom_line() +
geom_point() +
scale_y_comma(prefix = "+ ") +
coord_cartesian(ylim = c(0, 90000)) +
labs(title = "Growth stagnated after 1960",
x = "Year",
y = "Change in housing units")
Change from 1940 to 2019
This interactive map shows the areas that gained or lost the most housing units from 1940-2019. Dense housing around industrial areas along the Allegheny and Monongahela Rivers was erased. Homestead and Braddock stand out.
<- ac_housing_hu |>
hu_diff group_by(GEOID10) |>
filter(year == min(year) | year == max(year)) |>
ungroup() |>
select(GEOID10, year, housing_units) |>
as_tibble() |>
pivot_wider(names_from = year, names_prefix = "units_", values_from = housing_units) |>
mutate(diff = units_2019 - units_1940) |>
st_as_sf()
<- colorNumeric(
pal palette = "viridis",
domain = hu_diff$diff)
<- hu_diff |>
leaflet_map mutate(diff_formatted = comma(diff, accuracy = 1),
diff_label = str_c("Census tract: ", GEOID10, "<br/>", "Difference: ", diff_formatted)) |>
st_transform(crs = 4326) |>
leaflet() |>
setView(lat = 40.441606, lng = -80.010957, zoom = 10) |>
addProviderTiles(providers$Stamen.TonerLite,
options = providerTileOptions(noWrap = TRUE,
minZoom = 9),
group = "Base map") |>
addPolygons(popup = ~ diff_label,
fillColor = ~pal(diff),
fillOpacity = .7,
color = "black",
weight = 1,
group = "Housing") |>
addLegend("bottomright", pal = pal, values = ~diff,
title = "Difference",
opacity = 1) |>
addLayersControl(overlayGroups = c("Base map", "Housing"),
options = layersControlOptions(collapsed = FALSE)) |>
addFullscreenControl()
leaflet_map