Introduction to Redistribution • redistribute

Built with R 4.3.1

The basic premise behind any form of redistribution is that a given set of observation can be translated to some superset or subset.

This means the basic requirements for redistribution are:

a source with observations
a target without observations
a map between source and target entities

The map associates a given source ID to a given target ID. If one source ID maps onto many target IDs, then we are disaggregating the source observation (translating one observation to many). If each source ID maps into a single target ID (and potentially many source IDs are mapping onto that target ID), then we are aggregating the source observations (translating many observations to one).

Basic Example

Say we have 1 set of 5 regions:

regions <- data.frame(id = 1:5)
regions
#>   id
#> 1  1
#> 2  2
#> 3  3
#> 4  4
#> 5  5

Disaggregate

If we have a single observation for the entire set, we could disaggregate it to the regions. With no additional information, our best guess for the value of each region is a proportional split – in this case, one fifth of the observed value for each region:

# install if needed: remotes::install_github("uva-bi-sdad/redistribute")
library(redistribute)

set_value <- 1
(redistribute(set_value, regions))
#>   id source
#> 1  1    0.2
#> 2  2    0.2
#> 3  3    0.2
#> 4  4    0.2
#> 5  5    0.2

Or, maybe we know a little more about the regions, such as their size; then we could use that information to adjust the proportion allotted to each region:

region_values <- redistribute(
  set_value, regions,
  weight = c(1, 10, 10, 20, 50)
)
region_values
#>   id     source
#> 1  1 0.01098901
#> 2  2 0.10989011
#> 3  3 0.10989011
#> 4  4 0.21978022
#> 5  5 0.54945055

Aggregate

If we have observations for all regions, we could aggregate to a single value for the set. In this case, we can re-aggregate what we initially disaggregated, to recover the original observation:

(redistribute(region_values))
#>   id source
#> 1  1      1

Applied Example

One use case for redistribution is converting demographics data between geographic layers.

For illustration, we can look at U.S. Census data in Fairfax, Virginia:

base_dir <- "~/Downloads"

# remotes::install_github("uva-bi-sdad/catchment")
library(catchment)
library(sf)

# download population and data shapes
population <- download_census_population(
  base_dir, "VA", 2020,
  counties = c("51059", "51600")
)$estimates
#> ℹ loading existing Virginia population data
#> ℹ subsetting results
population[, -1] <- vapply(
  population[, -1], as.numeric, numeric(nrow(population))
)
rownames(population) <- population$GEOID
block_groups <- st_transform(
  download_census_shapes(base_dir, "VA", "bg", year = 2020), "WGS84"
)
block_groups <- block_groups[block_groups$GEOID %in% population$GEOID, ]
population <- population[block_groups$GEOID, ]
st_geometry(population) <- block_groups$geometry
population$Overall <- population$TOTAL.POPULATION_Total

This population information is provided at the Census Block Group level at the lowest, but we might want to look at population within zip codes.

We could try aggregating directly from block groups to zip codes, which involves calculating proportional intersections between region polygons:

# Download shapes if needed
zipcode_file <- paste0(base_dir, "/zipcode_va_fairfax.geojson")
if (!file.exists(zipcode_file)) {
  download.file(paste0(
    "https://www.fairfaxcounty.gov/euclid/rest/services/IPLS/IPLSMap",
    "/MapServer/3/query?where=1=1&outFields=*&outSR=4326&f=geojson"
  ), zipcode_file)
}
zipcodes <- read_sf(zipcode_file)

# redistribute population data from block groups
zipcode_population <- redistribute(
  population, zipcodes,
  target_id = "ZIPCODE"
)

We could also disaggregate to the parcel level, which are point locations, then aggregate to zipcodes:

# Download shapes if needed
## https://data-fairfaxcountygis.opendata.arcgis.com/datasets/current-population
parcel_file <- paste0(base_dir, "/parcel_va_fairfax.geojson")
if (!file.exists(parcel_file)) {
  download.file(paste0(
    "https://opendata.arcgis.com/api/v3/datasets/",
    "314bfe4019754952a715be3a33384d9d_0/downloads/data",
    "?format=geojson&spatialRefId=4326&where=1=1"
  ), parcel_file)
}
parcels <- read_sf(parcel_file)

# disaggregate population data from block groups to parcels
bg_parcel_population <- redistribute(
  population, parcels,
  weight = "CURRE_POPUL"
)

# then aggregate from parcels to zip codes
bg_parcel_zipcode_population <- redistribute(
  bg_parcel_population, zipcodes,
  source_id = "id", target_id = "ZIPCODE", default_value = 0
)

# since it's provided in this case, we can also just aggregate
# up from parcels directly
parcel_zipcode_population <- redistribute(
  parcels, zipcodes,
  source_id = "PIN", target_id = "ZIPCODE", default_value = 0
)

Now we can compare the estimated total population of these different methods with those provided:

library(leaflet)
all_values <- c(
  zipcodes$POPULATION, zipcode_population$Overall,
  bg_parcel_zipcode_population$Overall, parcel_zipcode_population$CURRE_POPUL
)
pal <- colorNumeric(scico::scico(255, palette = "lajolla"), population$Overall)
pal_zip <- colorNumeric(scico::scico(255, palette = "lajolla"), all_values)
zip_combined <- cbind(
  zipcodes[, c("ZIPCODE", "POPULATION")],
  parcel_zip = parcel_zipcode_population$CURRE_POPUL,
  bg_zip = zipcode_population$Overall,
  bg_parcel_zip = bg_parcel_zipcode_population$Overall
)
leaflet(
  rmapshaper::ms_simplify(zip_combined, keep_shapes = TRUE),
  options = leafletOptions(attributionControl = FALSE)
) |>
  addProviderTiles("CartoDB.Positron") |>
  addScaleBar("bottomleft") |>
  addControl("Total Population", "topright") |>
  addLayersControl(
    position = "topleft", overlayGroups = "Block Groups",
    baseGroups = c(
      "Zip Codes", "Parcels -> Zip Codes", "Block Groups -> Zip Codes",
      "Block Groups -> Parcels -> Zip Codes"
    )
  ) |>
  addPolygons(
    data = rmapshaper::ms_simplify(
      population[, c("GEOID", "Overall")], keep_shapes = TRUE
    ),
    fillColor = ~ pal(Overall), fillOpacity = 1, weight = 1,
    color = "#000", highlightOptions = highlightOptions(color = "#fff"),
    group = "Block Groups", label = ~ paste(
      GEOID, "Population:", round(Overall, 3)
    )
  ) |>
  addLegend(
    "bottomright", pal, population$Overall,
    title = "Block Groups", opacity = 1
  ) |>
  addPolygons(
    fillColor = ~ pal_zip(POPULATION), fillOpacity = .8, weight = 1,
    color = "#000", highlightOptions = highlightOptions(color = "#fff"),
    group = "Zip Codes", label = ~ paste(
      ZIPCODE, "Population:", round(POPULATION, 3)
    )
  ) |>
  hideGroup("Zip Codes") |>
  addPolygons(
    fillColor = ~ pal_zip(parcel_zip), fillOpacity = .8, weight = 1,
    color = "#000", highlightOptions = highlightOptions(color = "#fff"),
    group = "Parcels -> Zip Codes", label = ~ paste(
      ZIPCODE, "Population:", round(parcel_zip, 3)
    )
  ) |>
  hideGroup("Parcels -> Zip Codes") |>
  addPolygons(
    fillColor = ~ pal_zip(bg_zip), fillOpacity = .8, weight = 1,
    color = "#000", highlightOptions = highlightOptions(color = "#fff"),
    group = "Block Groups -> Zip Codes", label = ~ paste(
      ZIPCODE, "Population:", round(bg_zip, 3)
    )
  ) |>
  hideGroup("Block Groups -> Zip Codes") |>
  addPolygons(
    fillColor = ~ pal_zip(bg_parcel_zip), fillOpacity = .8, weight = 1,
    color = "#000", highlightOptions = highlightOptions(color = "#fff"),
    group = "Block Groups -> Parcels -> Zip Codes", label = ~ paste(
      ZIPCODE, "Population:", round(bg_parcel_zip, 3)
    )
  ) |>
  showGroup("Block Groups -> Parcels -> Zip Codes") |>
  addLegend("bottomright", pal_zip, all_values, opacity = 1, title = "Zip Codes")