Processing shape data - sparklabnyc/resources GitHub Wiki
Need to be careful because US mainland has three island counties. I connected them by nearest ferry.
library(sf)
library(spdep)
shape <- read_sf("data/raw/cb_2015_us_county_500k.shp") |>
st_as_sf() |>
mutate(fips = GEOID)
# df is only mainland US
# 02 Alaska, 15 Hawaii, 60 American Samoa, 69 Northern Mariana islands, 72 Puerto Rico, 78 Virgin Islands
shape |> filter(!(fips %in% df$fips)) |> pull(fips) |> unique()
# mainland US only
shape <- shape |>
filter(fips %in% df$fips) |>
mutate(fips_id = as.integer(as.factor(GEOID))) |>
select(fips, fips_id, geometry) |>
arrange(fips_id)
# Calculate the adjacency weights
nb <- poly2nb(shape)
# 25007 – Duke's county (aka Martha's Vineyard). Connect to 25001
# 25019 – Nantucket county. Connect to 25007
# 53055 – San Juan county, WA. Connect to 53057
mat <- nb2mat(nb, style = "B", zero.policy = TRUE)
# add in the missing connections
# fips_id from shape |> filter(fips == "53057")
# 1187 - 1184
# 1187 - 1193
# 2947 - 2948
mat[1187, 1184] <- 1
mat[1184, 1187] <- 1
mat[1187, 1193] <- 1
mat[1193, 1187] <- 1
mat[2947, 2948] <- 1
mat[2948, 2947] <- 1
# check matrix is symmetric
all(mat == t(mat))
# check matrix has no nero neighbours
min(colSums(mat))
adj <- data.frame(mat)
colnames(adj) <- shape$fips
row.names(adj) <- shape$fips
write.csv(adj, "data/raw/countyadj.csv", row.names = TRUE)
centroids <- shape |>
st_centroid() |>
st_coordinates() |>
as_tibble() |>
bind_cols(shape) |> # add fips
select(fips, X, Y) |>
arrange(fips)
write_csv(centroids, "data/raw/county_centroids.csv")