catchment coverage - 82.80%

Files
Source

#' @rdname catchment_ratio
#' @examples
#'
#' # gives weight only to costs under 2
#' catchment_weight(matrix(c(1, 2, 1, 3, 1, 2), 3), 2)
#' @return \code{catchment_weight}: A sparse matrix of weights.
#' @export

catchment_weight <- function(cost, weight = NULL, max_cost = NULL, adjust_zeros = 1e-6, scale = 2,
                             normalize_weight = FALSE, verbose = FALSE) {
  if (is.null(dim(cost))) cost <- matrix(cost, ncol = 1)
  if (is.data.frame(cost)) cost <- as.matrix(cost)
  if (!is.null(weight) && is.numeric(adjust_zeros)) {
    zero_costs <- !is.na(cost) & cost == 0
    if (any(zero_costs)) {
      if (verbose) cli_alert_info(paste("setting non-NA 0s to", adjust_zeros))
      cost[zero_costs] <- adjust_zeros
      rm(zero_costs)
    }
  }
  if (anyNA(cost)) cost[is.na(cost)] <- 0
  if (is.numeric(weight) && is.null(dim(weight)) && length(weight) > 1) {
    weight <- matrix(weight, ncol = ncol(cost))
  }
  if (is.null(weight) && !is.null(max_cost)) {
    weight <- max_cost
    max_cost <- NULL
  }
  if (is.null(weight)) {
    if (verbose) cli_alert_info("weight: cost")
    w <- as(cost, "CsparseMatrix")
  } else if (is.null(dim(weight))) {
    if (is.numeric(weight)) {
      # single buffer value means a uniformly weighted catchment area (original)
      if (verbose) cli_alert_info("weight: cost over {.field 0} and under {.field {weight[[1]]}}")
      w <- as(cost > 0 & cost < weight[[1]], "CsparseMatrix") * 1
    } else if (is.list(weight)) {
      # list of steps for roughly graded weightings (enhanced)
      if (verbose) cli_alert_info("weight: cost over {.field 0} and under steps of {.arg weight}")
      weight <- weight[order(-vapply(weight, "[[", 1, 1))]
      w <- as((cost <= weight[[1]][1]) * weight[[1]][2], "CsparseMatrix")
      for (s in weight[-1]) w[cost <= s[1]] <- s[2]
      w[cost <= 0] <- 0
    } else if (is.character(weight)) {
      # name of a weight function (kernel density)
      if (verbose) cli_alert_info("weight: {weight} weight function")
      weight <- tolower(weight[[1]])
      if ("units" %in% class(cost)) {
        cost <- matrix(
          as.numeric(cost), nrow(cost), ncol(cost),
          dimnames = dimnames(cost)
        )
      }
      if (grepl("^(?:gr|n)", weight)) {
        # gravity / normal kernel
        w <- as(sqrt(1 / cost^scale), "CsparseMatrix")
      } else if (grepl("^e", weight)) {
        # exponential kernel
        w <- as(exp(-cost * scale), "CsparseMatrix")
      } else if (grepl("^loga", weight)) {
        # logarithmic kernel
        w <- as(1 / (1 + log(cost, scale)), "CsparseMatrix")
      } else if (grepl("^li", weight)) {
        # linear kernel
        w <- as((scale - cost) / scale, "CsparseMatrix")
        w[w < 0] <- 0
      } else if (grepl("^l", weight)) {
        # logistic kernel
        w <- as(1 / (1 + exp(scale * cost)), "CsparseMatrix")
      } else if (grepl("^ga", weight)) {
        # Gaussian kernel
        w <- as(exp(-cost^2 / (2 * scale^2)), "CsparseMatrix")
      } else if (grepl("^d", weight) && exists(weight)) {
        # assumed to be a density function like dnorm
        w <- as(cost, "CsparseMatrix")
        w@x <- do.call(weight, list(w@x, 0, scale))
      } else if (grepl("^p", weight) && exists(weight)) {
        # assumed to be a distribution function like pnorm
        w <- as(cost, "CsparseMatrix")
        w@x <- do.call(weight, list(-w@x, 0, scale))
      } else {
        cli_abort("{.arg weight} not recognized")
      }
      w[cost <= 0 | !is.finite(w)] <- 0
    } else if (is.function(weight)) {
      if (verbose) cli_alert_info("weight: custom weight function")
      w <- as(weight(cost), "CsparseMatrix")
      w[cost <= 0 | !is.finite(w)] <- 0
    }
  } else {
    if (verbose) cli_alert_info("weight: {.arg weight}")
    w <- as(if (is.data.frame(weight)) as.matrix(weight) else weight, "CsparseMatrix")
  }
  if (!is.null(max_cost)) {
    if (verbose) cli_alert_info("zerod-out cost over {max_cost}")
    w[cost > max_cost] <- 0
  }
  if (normalize_weight) {
    wr <- rowSums(w)
    wr[wr == 0] <- 1
    w <- w * sweep(w, 1, wr, "/")
  }
  w
}

#' Download U.S. Census Population Data
#'
#' Download and load U.S. census American Community Survey 5-year summary files at the block group level,
#' optionally including home-work commute statistics.
#'
#' @param dir Directory in which to save the file(s).
#' @param state Name, abbreviation, or FIPS code of the state.
#' @param year 4 digit year, between 2009 and the most recent year.
#' @param include_margins Logical; if \code{TRUE}, will save and include estimate margins or error, which is a
#' \code{data.frame} with the same dimensions as the estimates \code{data.frame}.
#' @param include_commutes Logical; if \code{TRUE}, will download the
#' \href{https://lehd.ces.census.gov}{Longitudinal Employer-Household Dynamics (LEHD)}
#' \href{https://lehd.ces.census.gov/data/#lodes}{Origin-Destination Employment Statistics (LODES)}
#' data for the state and block groups within selected tracts.
#' @param counties,tracts,blockgroups A vector of counties, tracts, or block group GEOIDs within the specified state to
#' filter block groups for (block groups within the specified counties or tracts, or matching the specified block group
#' GEOID). Only one can be specified (lower levels overwrite higher levels). Defaults to all block groups.
#' @param overwrite Logical; if \code{TRUE}, will remove any existing files before downloading and saving new versions.
#' @param verbose Logical; if \code{FALSE}, will not print status messages.
#' @examples
#' \dontrun{
#' download_census_population(".", "va")
#' }
#' @return A list with at least an \code{estimates} entry, and optionally \code{margins} and/or \code{commutes}
#' entries. The \code{extimates} and \code{margins} entries are \code{data.frame}s with block groups in rows,
#' population variables in columns, and person counts in cells. The \code{commutes} entry is a sparse matrix with
#' home block groups in rows, work block groups in columns, and job counts in cells (all jobs, summed across blocks).
#' @export

download_census_population <- function(dir, state, year = 2019, include_margins = FALSE, include_commutes = FALSE,
                                       counties = NULL, tracts = NULL, blockgroups = NULL, overwrite = FALSE, verbose = TRUE) {
  us_fips <- list(
    name = c(
      "UnitedStates", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
      "Delaware", "DistrictOfColumbia", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa",
      "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
      "Missouri", "Montana", "Nebraska", "Nevada", "NewHampshire", "NewJersey", "NewMexico", "NewYork",
      "NorthCarolina", "NorthDakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "RhodeIsland", "SouthCarolina",
      "CouthDakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "WestVirginia", "Wisconsin",
      "Wyoming", "PuertoRico"
    ),
    post = c(
      "us", "al", "ak", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", "ga", "hi", "id", "il", "in", "ia", "ks", "ky",
      "la", "me", "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny", "nc", "nd", "oh", "ok",
      "or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy", "pr"
    ),
    fips = c(
      "us", 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
      33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 72
    )
  )
  state <- gsub("^0|[^a-z0-9]", "", tolower(state[[1]]))
  fid <- which(us_fips$fips == state)
  if (!length(fid)) {
    fid <- if (nchar(state) == 2) {
      which(us_fips$post == state)
    } else {
      pmatch(state, tolower(us_fips$name))
    }
    if (!length(fid) || is.na(fid)) cli_abort("failed to recognize {.arg state}")
  }
  state <- us_fips$name[fid]
  state_post <- us_fips$post[fid]
  display_state <- gsub("([a-z])([A-Z])", "\\1 \\2", state)

  if (!is.null(dir)) {
    dir <- tdir <- paste0(normalizePath(dir, "/", FALSE), "/")
    if (!dir.exists(dir)) dir.create(dir, FALSE, TRUE)
  } else {
    tdir <- paste0(tempdir(), "/")
  }
  folder <- paste0(tdir, state, "_Tracts_Block_Groups_Only/")
  url <- paste0(
    "https://www2.census.gov/programs-surveys/acs/summary_file/", year,
    if (year > 2020) "/sequence-based-SF", "/data/5_year_seq_by_state/", state,
    "/Tracts_Block_Groups_Only/"
  )
  files_zip <- paste0(year, "5", state_post, "000", 1:3, "000.zip")
  files <- paste0(folder, "e", year, "5", state_post, "000", 1:3, "000.txt")
  final_files <- paste0(if (is.null(dir)) tdir else dir, state_post, "_", c(
    "population",
    if (include_margins) "population_margins"
  ), "_", year, ".csv")
  if (overwrite) unlink(c(paste0(tdir, files_zip), files, final_files))
  if (!all(file.exists(final_files)) && !all(file.exists(paste0(tdir, files_zip)) | file.exists(files))) {
    if (verbose) cli_alert_info(paste("downloading", display_state, "American Community Survey summary files"))
    for (i in seq_along(files)) {
      if (!file.exists(files[i])) {
        download.file(paste0(url, files_zip[i]), paste0(tdir, files_zip[i]))
      }
    }
  }
  if (any(file.exists(paste0(tdir, files_zip)))) {
    if (verbose) cli_alert_info(paste("decompressing", display_state, "American Community Survey summary files"))
    dir.create(folder, FALSE)
    for (f in files_zip[file.exists(paste0(tdir, files_zip))]) {
      system2("unzip", c("-qd", shQuote(folder), shQuote(paste0(tdir, f))))
      unlink(paste0(tdir, f))
    }
  }
  if (!all(file.exists(final_files))) {
    if (verbose) cli_alert_info(paste("reformatting", display_state, "American Community Survey summary files"))

    # column names
    headers <- read.csv(paste0(
      "https://www2.census.gov/programs-surveys/acs/summary_file/", year,
      if (year > 2020) {
        "/sequence-based-SF/documentation/user_tools/ACS_5yr_Seq"
      } else {
        paste0("/documentation/", if (year < 2013) {
          "5_year/user_tools/Sequence_Number_and"
        } else {
          "user_tools/ACS_5yr_Seq"
        })
      }, "_Table_Number_Lookup.txt"
    ), nrows = 2000)
    headers <- headers[seq_len(grep("ANCESTRY", headers$Table.Title, fixed = TRUE)[1] - 1), ]
    title_id <- which(headers$Total.Cells.in.Table != "")
    names <- sub("^Universe:\\s*", "", headers$Table.Title)
    sub_id <- which(grepl(":", names, fixed = TRUE))
    headers$name <- sub("\\.$", "", sub("_\\w+_Total", "_Total", gsub("._", "_", paste0(
      rep(gsub("[;:(),\\s]+", ".", names[title_id], perl = TRUE), c(title_id[-1], nrow(headers)) - title_id),
      "_",
      rep(gsub("[;:(),\\s]+", ".", names[sub_id], perl = TRUE), c(sub_id[-1], nrow(headers)) - c(1, sub_id[-1])),
      "_",
      gsub("[;:(),\\s]+", ".", sub(" years", "", names, fixed = TRUE), perl = TRUE)
    ), fixed = TRUE)))
    start_id <- which(!is.na(headers$Start.Position))
    offset <- headers$Line.Number - 1 + rep(
      headers[start_id, "Start.Position"],
      c(start_id[-1], nrow(headers) + 1) - start_id
    )
    ind_start <- which(!is.na(offset == min(offset, na.rm = TRUE)) & offset == min(offset, na.rm = TRUE))
    ind_end <- c(ind_start[-1], length(offset)) - ind_start
    inds <- lapply(seq_along(ind_start), function(i) {
      is <- seq(ind_start[i], ind_end[i])
      is[!is.na(offset[is]) & offset[is] %% 1 == 0]
    })
    subs <- as.numeric(sub("\\D.*$", "", headers$Total.Cells.in.Table))
    ind_start <- which(
      !is.na(headers$Start.Position) &
        headers$Start.Position == min(headers$Start.Position, na.rm = TRUE)
    )
    ind_end <- c(ind_start[-1] - 1, length(subs))
    inds <- lapply(seq_along(ind_start), function(i) {
      is <- seq(ind_start[i], ind_end[i])
      is[!is.na(headers[is, "Line.Number"]) & headers[is, "Line.Number"] %% 1 == 0]
    })

    # row geoids
    rows <- read.csv(paste0(
      "https://www2.census.gov/programs-surveys/acs/summary_file/", year,
      if (year > 2020) "/sequence-based-SF", "/data/5_year_seq_by_state/", state,
      "/Tracts_Block_Groups_Only/g", year, "5", state_post, ".csv"
    ), header = FALSE)
    rows$GEOID <- sub("\\D.*$", "", paste0(
      formatC(rows$V10, width = 2, flag = 0),
      formatC(rows$V11, width = 3, flag = 0),
      formatC(rows$V14, width = 6, flag = 0),
      rows$V15
    ))

    # estimates
    estimates <- do.call(cbind, lapply(seq_along(files), function(i) {
      os <- c(6, offset[inds[[i]]])
      tab <- read.csv(
        files[i],
        header = FALSE, colClasses = c(rep("character", 5), rep("numeric", length(os))),
        na.string = "."
      )
      colnames(tab)[os] <- c("GEOID", headers[inds[[i]], "name"])
      tab$GEOID <- rows[tab$GEOID, "GEOID"]
      tab[nchar(tab$GEOID) == 12, -seq(1, if (i == 1) 5 else 6)]
    }))

    # margins
    if (include_margins) {
      files_m <- paste0(folder, "m", year, "5", state_post, "000", 1:3, "000.txt")
      margins <- do.call(cbind, lapply(seq_along(files_m), function(i) {
        os <- c(6, offset[inds[[i]]])
        tab <- read.csv(
          files_m[i],
          header = FALSE, colClasses = c(rep("character", 5), rep("numeric", length(os))),
          na.string = "."
        )
        colnames(tab)[os] <- c("GEOID", headers[inds[[i]], "name"])
        tab$GEOID <- rows[tab$GEOID, "GEOID"]
        tab[nchar(tab$GEOID) == 12, -seq(1, if (i == 1) 5 else 6)]
      }))
    }
    su <- vapply(estimates, function(v) all(is.na(v)), TRUE)
    if (any(su)) {
      estimates <- estimates[, !su, drop = FALSE]
      if (include_margins) {
        margins <- margins[, !su, drop = FALSE]
      }
    }
    su <- !is.na(estimates[, 2]) & estimates[, 2] == 0
    if (any(su)) {
      estimates <- estimates[!su, ]
      if (include_margins) margins <- margins[!su, ]
    }
  } else {
    if (verbose) cli_alert_info(paste("loading existing", display_state, "population data"))
    classes <- c("character", rep("numeric", length(
      scan(final_files[1], "", sep = ",", nlines = 1, quiet = TRUE)
    ) - 1))
    estimates <- read.csv(final_files[1], colClasses = classes)
    if (include_margins) margins <- read.csv(final_files[2], colClasses = classes)
  }
  # commutes
  if (include_commutes) {
    file <- paste0(tdir, state_post, "_commutes_", year, ".csv")
    filegz <- paste0(file, ".gz")
    if (overwrite) unlink(c(file, filegz))
    if (!any(file.exists(c(file, filegz)))) {
      if (verbose) cli_alert_info(paste("downloading", display_state, "Origin-Destination Employment Statistics file"))
      download.file(paste0(
        "https://lehd.ces.census.gov/data/lodes/LODES7/", state_post,
        "/od/", state_post, "_od_main_JT00_", year, ".csv.gz"
      ), filegz)
    }
    if (fresh <- file.exists(filegz)) system2("gzip", c("-df", filegz))
    check <- file.exists(file)
    if (check && fresh && Sys.which("openssl")[[1]] != "") {
      hashes <- paste(readLines(paste0(
        "https://lehd.ces.census.gov/data/lodes/LODES7/", state_post, "/lodes_", state_post, ".sha256sum"
      )), collapse = "")
      hash_loc <- regexec(paste0("_od_main_JT00_", year), hashes, fixed = TRUE)[[1]]
      check <- substr(hashes, hash_loc - 68, hash_loc - 5) == strsplit(
        system2("openssl", c("dgst", "-sha256", shQuote(file)), TRUE), " ",
        fixed = TRUE
      )[[1]][2]
      if (!check) cli_warn("integrity check failed for {.file {file}}")
    }
    if (check) {
      if (verbose) cli_alert_info(paste0("loading ", if (!fresh) "existing ", display_state, " commutes data"))
      commutes <- read.csv(file)
      if (!is.null(commutes$w_geocode)) {
        fresh <- TRUE
        if (verbose) {
          cli_alert_info(paste(
            "reformatting", display_state, "Origin-Destination Employment Statistics file"
          ))
        }
        commutes$w_geocode <- substr(commutes$w_geocode, 1, 12)
        commutes$h_geocode <- substr(commutes$h_geocode, 1, 12)
        commutes <- commutes[commutes$w_geocode %in% estimates$GEOID & commutes$h_geocode %in% estimates$GEOID, ]
        ids <- structure(seq_along(estimates$GEOID), names = estimates$GEOID)
        edges <- tapply(commutes$S000, paste0(commutes$w_geocode, commutes$h_geocode), sum)
        commutes <- sparseMatrix(
          i = ids[substr(names(edges), 13, 24)],
          j = ids[substr(names(edges), 1, 12)],
          x = edges,
          dimnames = list(estimates$GEOID, estimates$GEOID)
        )
      } else if (colnames(commutes)[1] == "GEOID") {
        rownames(commutes) <- colnames(commutes)[-1] <- commutes[, 1]
        commutes <- as(as.matrix(commutes[, -1]), "dgCMatrix")
      }
    }
  }
  if (!is.null(dir)) {
    files <- paste0(dir, state_post, "_", c(
      "population",
      if (include_margins) "population_margins",
      if (include_commutes && check) "commutes"
    ), "_", year, ".csv")
    if (!all(file.exists(files)) || (include_commutes && fresh && check)) {
      if (verbose) cli_alert_info(paste("writing", display_state, "files"))
      ck <- c(!file.exists(files[1]), !file.exists(files[2]) && include_margins, include_commutes && fresh && check)
      if (ck[1]) write.csv(estimates, files[1], row.names = FALSE)
      if (ck[2]) write.csv(margins, files[2], row.names = FALSE)
      if (ck[3]) write.csv(cbind(GEOID = rownames(commutes), as.matrix(commutes)), files[3], row.names = FALSE)
      if (verbose) {
        cli_bullets(c(
          v = paste0("wrote file", if (sum(ck) == 1) ":" else "s:"),
          structure(paste0("{.file ", files[ck], "}"), names = rep("*", sum(ck)))
        ))
      }
    }
  }
  subsetter <- !c(is.null(blockgroups), is.null(tracts), is.null(counties))
  if (any(subsetter)) {
    su <- if (subsetter[1]) {
      estimates$GEOID %in% blockgroups
    } else if (subsetter[2]) {
      substr(estimates$GEOID, 1, 11) %in% tracts
    } else if (subsetter[3]) {
      substr(estimates$GEOID, 1, 5) %in% counties
    }
    if (any(su) && !all(su)) {
      if (verbose) cli_alert_info("subsetting results")
      estimates <- estimates[su, ]
      if (include_margins) margins <- margins[su, ]
    } else {
      cli_warn("a subset was requested, but no GEOIDs matched")
    }
  }
  invisible(list(
    estimates = estimates,
    margins = if (include_margins) margins,
    commutes = if (include_commutes) commutes
  ))
}

#' Extract Connections Within Catchment Areas
#'
#' Extract connected consumer (\code{from}) and provider (\code{to}) locations within a catchment area,
#' as defined by cost and weights.
#'
#' @param from Consumer locations; a matrix-like object with columns containing latitudes and longitudes
#' (identified by \code{from_coords}), and a column of IDs corresponding to rows in \code{cost} (identified by
#' \code{from_id}). Each identifying column can alternatively contain the associated matrix or vector, and
#' IDs can be in row names.
#' @param to Provider locations;a matrix-like object with columns containing latitudes and longitudes
#' (identified by \code{to_coords}), and a column of IDs corresponding to columns in \code{cost} (identified by
#' \code{to_id}). Each identifying column can alternatively contain the associated matrix or vector, and
#' IDs can be in row names.
#' @param cost A cost matrix, with row names corresponding to IDs of \code{from}, and column names corresponding to
#' IDs of \code{to}.
#' @param weight A weight matrix with the same dimensions as \code{cost}, such as returned from
#' \code{\link{catchment_weight}}.
#' @param ... Passes arguments to \code{catchment_weight} if \code{weight} is not a weight matrix.
#' @param return_type Specify whether to return a \code{data.frame} (default) with connection ids, weights, and costs,
#' an \code{sf} \code{data.frame} (\code{"sf"}) with linestring geometries added for each connection,
#' or a GeoJSON-formatted list (\code{"list"}).
#' @param from_id,from_coords,to_id,to_coords Names of ID and coordinate columns in \code{from} and \code{to},
#' or vectors of IDs and matrices of coordinates.
#' @examples
#' pop <- simulate_catchments()
#' connections <- catchment_connections(
#'   pop$consumers, pop$providers,
#'   weight = "gaussian", max_cost = 1,
#'   return_type = "sf"
#' )
#' if (require("leaflet", quiet = TRUE)) {
#'   leaflet() |>
#'     addPolylines(
#'       data = connections, weight = 3, color = "#777",
#'       highlightOptions = highlightOptions(color = "#fff", weight = 4),
#'       label = ~ paste0("From: ", from, ", To: ", to, ", Weight: ", weight, ", Cost: ", cost)
#'     ) |>
#'     addCircles(
#'       data = pop$consumers, label = ~ paste0("Consumers: ", count, ", Access: ", access)
#'     ) |>
#'     addCircles(
#'       data = pop$providers, color = "#000", label = ~ paste("Provider", id)
#'     )
#' }
#' @return An object with connection, weight, and cost information, with a format depending on \code{return_type}.
#' @export

catchment_connections <- function(from, to, cost = NULL, weight = 1, ..., return_type = "data.frame",
                                  from_id = "GEOID", from_coords = c("X", "Y"), to_id = "GEOID",
                                  to_coords = c("X", "Y")) {
  if (missing(from) && !missing(from_coords)) {
    from <- from_coords
    from_coords <- c("X", "Y")
  }
  if (missing(to) && !missing(to_coords)) {
    to <- to_coords
    to_coords <- c("X", "Y")
  }
  from_ids <- if (length(from_id) != 1) {
    from_id
  } else if (from_id %in% colnames(from)) {
    from[, from_id, drop = TRUE]
  } else {
    if (is.null(rownames(from))) rownames(from) <- seq_len(nrow(from))
    rownames(from)
  }
  if (is.null(from_ids)) cli_abort("failed to resolve {.arg from} ids")
  if (is.numeric(from_ids) && max(from_ids) > nrow(from)) from_ids <- as.character(from_ids)
  to_ids <- if (length(to_id) != 1) {
    to_id
  } else if (to_id %in% colnames(to)) {
    to[, to_id, drop = TRUE]
  } else {
    if (is.null(rownames(to))) rownames(to) <- seq_len(nrow(to))
    rownames(to)
  }
  if (is.null(to_id)) cli_abort("failed to resolve {.arg to} ids")
  if (is.numeric(to_ids) && max(to_ids) > nrow(to)) to_ids <- as.character(to_ids)
  if (is.character(from_coords) && all(from_coords %in% colnames(from))) {
    from <- from[, from_coords, drop = TRUE]
  } else if (any(grepl("^sfc?$", class(from)))) {
    from <- st_coordinates(from)
  } else if (!is.null(dim(from)) && ncol(from) > 1) {
    from <- from[, 1:2, drop = TRUE]
  }
  if (is.null(dim(from)) || ncol(from) != 2) cli_abort("{.arg from} must have 2 columns")
  if (!is.numeric(from[, 1]) || !is.numeric(from[, 2])) {
    cli_abort("{.arg from} columns must all be numeric")
  }
  if (length(from_ids) != nrow(from)) cli_abort("From IDs and coordinates do not align")
  if (is.character(to_coords) && all(to_coords %in% colnames(to))) {
    to <- to[, to_coords, drop = TRUE]
  } else if (any(grepl("^sfc?$", class(to)))) {
    to <- st_coordinates(to)
  } else if (!is.null(dim(to)) && ncol(to) > 1) {
    to <- to[, 1:2, drop = TRUE]
  }
  if (is.null(dim(to)) || ncol(to) != 2) cli_abort("{.arg to} must have 2 columns")
  if (!is.numeric(to[, 1]) || !is.numeric(to[, 2])) {
    cli_abort("{.arg to} columns must all be numeric")
  }
  if (length(to_ids) != nrow(to)) cli_abort("To IDs and coordinates do not align")
  if (is.null(cost)) {
    cost <- 1 / lma_simets(from, to, "euclidean", symmetrical = TRUE, pairwise = TRUE) - 1
    dimnames(cost) <- list(from_ids, to_ids)
  }
  if (is.null(dim(cost))) cost <- matrix(cost, nrow(from), dimnames = list(from_ids, to_ids))
  if ((is.null(dim(weight)) && length(weight) != length(cost)) || length(substitute(...()))) {
    weight <- catchment_weight(cost = cost, weight = weight, ...)
  } else if (is.null(dim(weight))) {
    weight <- matrix(weight, nrow(from), dimnames = list(from_ids, to_ids))
  } else if (any(dim(cost) != dim(weight))) cli_abort("{.arg weight} does not align with {.arg cost}")
  if (is.null(rownames(cost))) {
    if (nrow(cost) != length(from_ids)) {
      cli_abort("{.arg cost} does not have row names, and is not the same length as {.arg from}'s ids")
    }
  } else if (!identical(as.character(from_ids), rownames(cost))) {
    if (!all(from_ids %in% rownames(cost))) {
      cli_abort("not all {.arg from} ids are in {.arg cost}'s rownames")
    }
    if (!identical(rownames(cost), rownames(weight))) rownames(weight) <- rownames(cost)
    cost <- cost[as.character(from_ids), ]
    weight <- weight[as.character(from_ids), ]
  }
  if (is.null(colnames(cost))) {
    if (ncol(cost) != length(to_ids)) {
      cli_abort("{.arg cost} does not have column names, and is not the same length as {.arg to}'s ids")
    }
  } else if (!identical(as.character(to_ids), colnames(cost))) {
    if (!all(to_ids %in% colnames(cost))) {
      cli_abort("not all {.arg to} ids are in {.arg cost}'s colnames")
    }
    if (!identical(colnames(cost), colnames(weight))) colnames(weight) <- colnames(cost)
    cost <- cost[, as.character(to_ids)]
    weight <- weight[, as.character(to_ids)]
  }
  fcoords <- split(from, from_ids)[from_ids]
  tcoords <- split(to, to_ids)[to_ids]
  from_ids <- names(fcoords)
  to_ids <- names(tcoords)
  if (grepl("^[slg]", return_type, TRUE)) {
    res <- list(
      type = "FeatureCollection",
      features = unlist(lapply(unname(which(rowSums(weight != 0) != 0)), function(r) {
        su <- which(weight[r, ] != 0)
        np <- names(su)
        w <- weight[r, su]
        wc <- cost[r, su]
        fc <- as.numeric(fcoords[[r]])
        tc <- tcoords[to_ids %in% np]
        lapply(seq_along(np), function(i) {
          list(
            type = "Feature",
            properties = list(from = from_ids[[r]], to = np[[i]], weight = w[[i]], cost = wc[[i]]),
            geometry = list(
              type = "LineString",
              coordinates = list(fc, as.numeric(tc[[np[[i]]]]))
            )
          )
        })
      }), FALSE)
    )
    if (!length(res$features)) cli_abort("there are no connections")
    if (grepl("^s", return_type, TRUE)) {
      read_sf(toJSON(res, auto_unbox = TRUE), as_tibble = FALSE)
    } else {
      res
    }
  } else {
    rs <- unname(which(rowSums(weight != 0) != 0))
    if (!length(rs)) cli_abort("there are no connections")
    do.call(rbind, lapply(rs, function(r) {
      su <- which(weight[r, ] != 0)
      res <- data.frame(
        from = from_ids[[r]], to = names(su), weight = weight[r, su], cost = cost[r, su]
      )
      rownames(res) <- NULL
      res
    }))
  }
}

#' Aggregate Floating Catchment Area Ratios
#'
#' Calculate aggregate sums or means, potentially weighted by number of consumers.
#'
#' @param from A matrix-like object with IDs, values, and number of consumers, or a vector
#' of values. These are the lower-level entities to be aggregated up to entries in \code{to}.
#' @param to A matrix-like object with IDs and total consumers. These are higher-level
#' entities containing \code{from} entities.
#' @param id The column name of IDs in \code{from}, or a vector of IDs.
#' @param value The column name of values in \code{from} or a vector of values to be aggregated.
#' @param consumers The column name of consumer totals in \code{from}, or a vector
#' of said totals of the same length as \code{from} IDs.
#' @param to_id The column name of IDs in \code{to}, or a vector if IDs.
#' @param to_consumers The column name of consumer totals in \code{to}, or a vector
#' of said totals of the same length as \code{to} IDs.
#' @param map A named list, with names as or corresponding to \code{to} IDs, and vectors
#' containing \code{from} IDs as entries.
#' @param original_from Logical indicating whether the \code{from} values are original (default).
#' If \code{from} values are original and \code{consumers} are specified, \code{from} values will be
#' multiplied by \code{consumers} before aggregation. Set this to \code{FALSE} if this has already been done
#' (such as if you set \code{return_type} to \code{"region"} in \code{\link{catchment_ratio}}).
#' @param return_type Determines the values that are returned: \code{"original"} (default) for an average
#' for each \code{to} ID based on the region sum of \code{consumers}, or specified \code{to_consumers}.
#' If a number, original scores will be multiplied by the number then averaged like \code{"original"}. If
#' \code{"region"}, the sum for each \code{to} ID is returned.
#' @param verbose Logical; if \code{TRUE}, will print a log of the aggregation process.
#' @examples
#' # lower-level entries prepended with higher-level IDs (like GEOIDs)
#' higher <- data.frame(id = c(100, 110), pop = c(500, 500))
#' lower <- data.frame(
#'   id = paste0(higher$id, 1:5),
#'   value = 1:5 / 10,
#'   pop = c(50, 10, 100, 40, 30)
#' )
#' catchment_aggregate(
#'   lower, higher,
#'   consumers = "pop", id = "id", value = "value",
#'   to_id = "id", verbose = TRUE
#' )
#'
#' # same thing with IDs specified in a map
#' catchment_aggregate(
#'   lower, higher,
#'   consumers = "pop", id = "id", value = "value",
#'   map = list("100" = c(1001, 1003, 1005), "110" = c(1102, 1104)), verbose = TRUE
#' )
#' @return A vector with an aggregate value (determined by \code{return_type})
#' for each entry in \code{to}.
#' @export

catchment_aggregate <- function(from, to = NULL, id = "GEOID", value = "access", consumers = "population",
                                to_id = id, to_consumers = consumers, map = NULL, original_from = TRUE,
                                return_type = "original", verbose = FALSE) {
  if (verbose) cli_rule("Aggregating Values")
  if (missing(from)) {
    if (!missing(value)) {
      from <- value
    } else {
      cli_abort("{.arg from} must be provided")
    }
  }
  fid <- if (length(id) > 1) {
    if (verbose) cli_alert_info("from IDs: {.arg id} vector")
    id
  } else if (is.null(dim(from))) {
    if (is.null(names(from))) cli_abort("{.arg id} must be specified if they are not provided in {.arg from}")
    if (verbose) cli_alert_info("from IDs: {.arg from} names")
    names(from)
  } else {
    if (verbose) cli_alert_info("from IDs: {.var {id}} column")
    from[[id]]
  }
  if (is.null(fid)) cli_abort("failed to resolve from IDs")
  fv <- if (length(value) > 1) {
    if (verbose) cli_alert_info("from values: {.arg value} vector")
    value
  } else if (is.null(dim(from))) {
    if (verbose) cli_alert_info("from values: entered vector")
    from
  } else if (!is.null(value) && value %in% colnames(from)) {
    if (verbose) cli_alert_info("from values: {.var {value}} column")
    from[[value]]
  } else {
    if (verbose) cli_alert_info("from values: {.feild 1}")
    rep(1, length(fid))
  }
  if (is.null(fv) || length(fv) != length(fid)) cli_abort("failed to resolve {.arg from} values")

  n <- length(fv)
  if (length(fid) != n) cli_abort("IDs and values were not the same length")
  cv <- if (length(consumers) > 1) {
    if (verbose) cli_alert_info("consumers: {.arg consumers} vector")
    consumers
  } else if (!is.null(consumers) && consumers %in% colnames(from)) {
    if (verbose) cli_alert_info("consumers: {.var {consumers}} column")
    from[[consumers]]
  } else {
    NULL
  }
  if (!is.null(cv) && length(cv) != n) cli_abort("{.arg consumers} was not the same length as values")

  if (original_from && !is.null(cv) && length(cv) == n) fv <- fv * cv
  # `from` values should now be either per-region values or normalized

  # mapping `from` ids to `to` ids
  if (is.null(to)) to <- if (!missing(to_id) && (!is.character(to_id) || length(to_id) > 1)) to_id else from
  tid <- if (length(to_id) > 1) {
    if (verbose) cli_alert_info("to IDs: {.arg to_id} vector")
    to_id
  } else if (!is.null(to_id) && to_id %in% colnames(to)) {
    if (verbose) cli_alert_info("to IDs: {.var {to_id}} column")
    to[[to_id]]
  } else if (length(to) == 1) {
    if (is.function(to)) {
      if (verbose) cli_alert_info("to IDs: {.arg from} IDs converted by {.arg to} function")
      to(fid)
    } else if (is.numeric(to)) {
      if (verbose) cli_alert_info(paste("to IDs: first", to, "characters of {.arg from} IDs"))
      substr(fid, 1, to)
    } else {
      if (verbose) cli_alert_info("to IDs: {.var {to}} column of {.arg from}")
      from[[to]]
    }
  } else if (is.null(dim(to))) {
    if (verbose) cli_alert_info("to IDs: {.arg to} vector")
    to
  } else {
    NULL
  }
  if (is.null(tid)) cli_abort("failed to resolve {.arg to} IDs")

  tcv <- if (length(to_consumers) > 1) {
    if (verbose) cli_alert_info("to consumers: {.arg to_consumers} vector")
    to_consumers
  } else if (!is.null(to_consumers) && to_consumers %in% colnames(to)) {
    if (verbose) cli_alert_info("to consumers: {.var {to_consumers}} column")
    to[[to_consumers]]
  } else {
    NULL
  }
  aggregate_consumers <- is.null(tcv)
  fv <- cbind(fv, if (!aggregate_consumers || is.null(cv)) 1 else cv)

  if (is.list(map)) {
    if (is.null(names(map))) cli_abort("{.arg map} must have names")
    if (verbose) cli_alert_info("mapping: by map list")
    if (is.null(tid)) tid <- names(map)
    utid <- as.character(unique(tid))
    av <- vapply(utid, function(h) {
      l <- map[[h]]
      su <- fid %in% l
      if (is.null(l) || !length(su)) {
        return(c(NA, NA))
      }
      colSums(fv[su, , drop = FALSE], na.rm = TRUE)
    }, numeric(2))
  } else {
    if (length(tid) == n) {
      if (verbose) cli_alert_info("mapping: by {.arg to} ID")
      fid <- tid
      tid <- unique(tid)
    } else {
      tnc <- nchar(tid[1])
      if (tnc > nchar(fid[1]) || any(nchar(tid) != tnc)) {
        if (!any(fid %in% tid)) cli_abort("failed to figure out how to map {.arg from} ids to {.arg to} ids")
        if (verbose) cli_alert_info("mapping: by ID match")
      } else {
        if (verbose) cli_alert_info(paste("mapping: by first", tnc, "character match"))
        fid <- substr(fid, 1, tnc)
      }
    }
    utid <- as.character(unique(tid))
    av <- vapply(utid, function(h) {
      su <- fid == h
      if (any(su)) colSums(fv[su, , drop = FALSE], na.rm = TRUE) else c(NA, NA)
    }, numeric(2))
  }
  if (length(utid) != length(tid)) {
    if (verbose) cli_alert_info("to repeated IDs")
    av <- rbind(unname(av[1, ][tid]), unname(av[2, ][tid]))
  }

  if (is.numeric(return_type)) {
    if (verbose) cli_alert_info(paste("multiplying by", return_type))
    av[1, ] <- av[1, ] * return_type
  }
  if (is.character(return_type) && grepl("^r", return_type, TRUE)) {
    if (verbose) cli_bullets(c(v = "returning sum of value per {.arg to} ID"))
    av[1, ]
  } else {
    if (aggregate_consumers && !is.null(tcv) && ncol(av) != length(tcv)) aggregate_consumers <- FALSE
    if (verbose) {
      cli_bullets(c(v = paste0(
        "returning sum of value over total {.arg ",
        if (aggregate_consumers) "from" else "to",
        "} consumers per {.arg to} ID"
      )))
    }
    if (aggregate_consumers) {
      av[2, av[2, ] == 0] <- 1
      av[1, ] / av[2, ]
    } else {
      tcv[tcv == 0] <- 1
      av[1, ] / tcv
    }
  }
}

#' Simulate Catchment Areas
#'
#' Generate a random population of consumers and providers.
#'
#' @param n Number of consumers to generate.
#' @param weight Weight method used on Euclidean distances between consumers and providers.
#' @param ... Passes additional arguments to \code{\link{catchment_weight}}.
#' @param consumers_range A numeric vector with entries for minimal and maximal consumer counts
#' from which to sample.
#' @param long_center Longitude to randomly place consumers around.
#' @param long_spread Standard deviation of the consumer's longitude distribution.
#' @param lat_center Latitude to randomly place consumers around.
#' @param lat_spread Standard deviation of the consumer's latitude distribution.
#' @param as_sf Logical; if \code{FALSE}, consumer and provider data are returned in regular \code{data.frame}s.
#' @examples
#' pop <- simulate_catchments()
#' if (require("leaflet", quiet = TRUE)) {
#'   leaflet(pop$providers) |>
#'     addCircles(
#'       radius = 1e5, color = "#666", stroke = FALSE
#'     ) |>
#'     addCircles(
#'       data = pop$consumers, label = ~ paste0("Consumers: ", count, ", Access: ", access)
#'     ) |>
#'     addCircles(
#'       color = "#000", label = ~ paste("Provider", id)
#'     )
#' }
#' @return A list with \code{consumers}, \code{providers}, \code{cost}, and \code{weight} entries.
#' @export

simulate_catchments <- function(n = 100, weight = 1, ..., consumers_range = c(10, 5000), long_center = -99,
                                long_spread = 1, lat_center = 40, lat_spread = 1, as_sf = TRUE) {
  # roll consumers
  consumers <- data.frame(
    count = sample(seq(consumers_range[1], consumers_range[2]), n, TRUE),
    X = rnorm(n, long_center, long_spread),
    Y = rnorm(n, lat_center, lat_spread)
  )
  locs <- consumers[, c("X", "Y")]
  consumers_dist <- 1 / lma_simets(locs, "euclidean", symmetrical = TRUE) - 1
  diag(consumers_dist) <- Inf
  start <- which.max(diag(consumers_dist[, max.col(-consumers_dist)]))
  diag(consumers_dist) <- 0
  providers <- data.frame(id = seq_len(nrow(consumers)), X = 0, Y = 0)
  # placing providers
  for (i in providers$id) {
    sel <- c(start, which(consumers_dist[start, ] * catchment_weight(
      consumers_dist[start, , drop = FALSE],
      weight, ...
    ) != 0))
    providers[i, c("X", "Y")] <- colMeans(locs[sel, , drop = FALSE] + rnorm(length(sel) * 2, 0, .1))
    r <- catchment_ratio(consumers, providers[seq_len(i), , drop = FALSE], weight = weight, ...)
    if (all(r != 0)) {
      providers <- providers[seq_len(i), ]
      break
    }
    start <- which.min(r)
  }
  consumers$access <- r
  cost <- 1 / lma_simets(consumers[, -1], providers[, -1], "euclidean", symmetrical = TRUE) - 1
  list(
    consumers = if (as_sf) st_as_sf(consumers, coords = c("X", "Y"), crs = 4326) else consumers,
    providers = if (as_sf) st_as_sf(providers, coords = c("X", "Y"), crs = 4326) else providers,
    cost = cost, weight = catchment_weight(cost, weight, ...)
  )
}

#' Download and Prepare Map Shapes
#'
#' Download cartographic boundary files from the \href{https://www.census.gov}{U.S. Census Bureau},
#' and reformat them to simplified GeoJSON files.
#'
#' @param dir Path to a directory in which to save the reformatted files. If these are to be
#' used in a published site, they should be in the \code{"docs"} directory of the site directory. If not specified,
#' the shapes are downloaded and returned, but not saved.
#' @param fips State Federal Information Processing Standard (FIPS) code, United States Postal Service (USPS)
#' code, or name; see \href{state.txt}{https://www2.census.gov/geo/docs/reference/state.txt}.
#' @param entity Entity name (e.g., \code{"county"}, \code{"tract"}, or \code{"bg"}); see
#' \href{2019_file_name_def.pdf}{https://www2.census.gov/geo/tiger/GENZ2019/2019_file_name_def.pdf}
#' (or appropriate year's file). States and counties are only available at the national level, so if these are
#' requested for a particular state, the national files will be downloaded and subset.
#' @param name Name for the GeoJSON file (without extension) to be written.
#' @param year Year of the shapes, 2013 and after.
#' @param resolution Resolution of the shapes; one of \code{"500k"} (default), \code{"5m"}, or \code{"20m"}.
#' @param strip_features Logical; if \code{TRUE}, will strip all features other than IDs.
#' @param simplify A function to simplify shapes with, such as \code{rmapshaper::ms_simplify}. This
#' function should take the \code{sf} \code{data.frame} as its first argument, and return that object
#' with its \code{geometry} replaced.
#' @param ... Passes additional arguments to \code{simplify}; e.g., for \code{rmapshaper::ms_simplify},
#' you might want to specify \code{keep} and set \code{keep_shapes} to \code{TRUE}.
#' @param force Logical; if \code{TRUE}, will force a re-download, and overwrite any existing files.
#' @examples
#' \dontrun{
#' # download Virginia counties shapes
#' shapes <- download_census_shapes(
#'   "docs/shapes", "va", "county",
#'   name = "counties",
#'   simplify = rmapshaper::ms_simplify, keep_shapes = TRUE
#' )
#' }
#' @return An \code{sf} \code{data.frame} with a geometry column containing shapes, and other features in columns.
#' @export

download_census_shapes <- function(dir = NULL, fips = "us", entity = "state", name = NULL, year = 2019,
                                   resolution = "500k", strip_features = FALSE, simplify = NULL, ..., force = FALSE) {
  us_fips <- list(
    name = c(
      "united states", "alabama", "alaska", "arizona", "arkansas", "california", "colorado", "connecticut",
      "delaware", "district of columbia", "florida", "georgia", "hawaii", "idaho", "illinois", "indiana", "iowa",
      "kansas", "kentucky", "louisiana", "maine", "maryland", "massachusetts", "michigan", "minnesota", "mississippi",
      "missouri", "montana", "nebraska", "nevada", "new hampshire", "new jersey", "new mexico", "new york",
      "north carolina", "north dakota", "ohio", "oklahoma", "oregon", "pennsylvania", "rhode island", "south carolina",
      "south dakota", "tennessee", "texas", "utah", "vermont", "virginia", "washington", "west virginia", "wisconsin",
      "wyoming", "american samoa", "guam", "northern mariana islands", "puerto rico", "minor outlying islands",
      "virgin islands"
    ),
    post = c(
      "us", "al", "ak", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", "ga", "hi", "id", "il", "in", "ia", "ks", "ky",
      "la", "me", "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny", "nc", "nd", "oh", "ok",
      "or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy", "as", "gu", "mp", "pr", "um",
      "vi"
    ),
    fips = c(
      "us", 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
      33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 60, 66, 69, 72, 74, 78
    )
  )
  entities <- c(
    "aiannh", "anrc", "bg", "cbsa", "cd116", "cnecta", "concity", "county", "county_within_cd116",
    "county_within_ua", "cousub", "csa", "division", "elsd", "nation", "necta", "place", "puma10", "region", "scsd",
    "sldl", "sldu", "state", "subbarrio", "tract", "ua10", "unsd", "zcta510"
  )
  fips <- gsub("^0|[^a-z0-9\\s]", "", tolower(fips[[1]]))
  fid <- which(us_fips$fips == fips)
  if (!length(fid)) {
    fid <- if (nchar(fips) == 2) {
      which(us_fips$post == fips)
    } else {
      pmatch(fips, us_fips$name)
    }
    if (!length(fid) || is.na(fid)) cli_abort("failed to recognize {.arg fips}")
  }
  fips <- us_fips$fips[fid]
  if (nchar(fips) == 1) fips <- paste0("0", fips)
  if (nchar(year) < 4) year <- as.numeric(paste0("20", year))
  if (year < 2013) cli_abort("only years 2013 and after are available from this function")
  entity <- tolower(entity)
  entity <- match.arg(entity, entities)
  subset <- NULL
  if (fips != "us" && (entity == "county" || entity == "state")) {
    subset <- fips
    fips <- "us"
  }
  resolution <- tolower(resolution)
  resolution <- match.arg(resolution, c("500k", "5m", "20m"))
  file <- paste0("cb_", year, "_", fips, "_", entity, "_", resolution)
  url <- paste0("https://www2.census.gov/geo/tiger/GENZ", year, if (year == 2013) "/" else "/shp/", file, ".zip")
  temp <- tempdir()
  temp_file <- paste0(temp, "/", file, ".zip")
  out_file <- paste0(dir, "/", if (!is.null(name)) name else file, ".geojson")
  if (force && file.exists(out_file)) unlink(out_file)
  if (!file.exists(out_file)) {
    if (!force && file.exists(paste0(temp, "/", file))) temp_file <- paste0(temp, "/", file)
    if (force || !file.exists(temp_file)) {
      download.file(url, temp_file)
      unzip(temp_file, exdir = paste0(temp, "/", file))
      file.remove(temp_file)
      temp_file <- paste0(temp, "/", file)
    }
    shapes <- st_read(temp_file, quiet = TRUE)
    if (!is.null(subset)) shapes <- shapes[shapes$STATEFP == subset, ]
    if (strip_features) shapes <- shapes[, "GEOID", drop = FALSE]
    if (is.function(simplify)) shapes <- simplify(shapes, ...)
    if (!is.null(dir)) {
      if (!file.exists(dir)) dir.create(dir, FALSE, TRUE)
      st_write(shapes, out_file)
    }
  } else {
    shapes <- st_read(out_file, quiet = TRUE)
  }
  invisible(shapes)
}

#' Extract a network of consumers and providers
#'
#' Extract an interconnected set of consumers and providers from a set of connections within catchment areas.
#'
#' @param connections A list- or matrix-like object with \code{"from"} and \code{"to"} entries, such as that
#' returned from \code{\link{catchment_connections}}.
#' @param from_start,to_start The ID of a \code{from} (consumer) or \code{to} (provider) in \code{connections} from
#' which to trace out a network.
#' @examples
#' pop <- simulate_catchments()
#' connections <- catchment_connections(
#'   pop$consumers, pop$providers,
#'   weight = 1, max_cost = .5,
#' )
#' catchment_network(connections, 1)
#' @return A subsetted version of \code{connections} containing only connections traceable from \code{from_start}
#' and/or \code{to_start}.
#' @export

catchment_network <- function(connections, from_start = NULL, to_start = NULL) {
  if (any(!c("from", "to") %in% colnames(connections))) cli_abort("{.arg connections} not recognized")
  from <- connections[["from"]]
  to <- connections[["to"]]
  from_ids <- unique(from)
  froms <- if (is.null(from_start) && is.null(to_start)) from[1] else from_start
  tos <- to_start
  for (i in seq_along(from_ids)) {
    tos <- unique(c(tos, to[from %in% froms]))
    froms <- unique(c(froms, from[to %in% tos]))
    if (all(from_ids %in% froms)) break
  }
  connections[from %in% froms | to %in% tos, ]
}

1		#' @rdname catchment_ratio
2		#' @examples
3		#'
4		#' # gives weight only to costs under 2
5		#' catchment_weight(matrix(c(1, 2, 1, 3, 1, 2), 3), 2)
6		#' @return \code{catchment_weight}: A sparse matrix of weights.
7		#' @export
8
9		catchment_weight <- function(cost, weight = NULL, max_cost = NULL, adjust_zeros = 1e-6, scale = 2,
10		normalize_weight = FALSE, verbose = FALSE) {
11	3x	if (is.null(dim(cost))) cost <- matrix(cost, ncol = 1)
12	2x	if (is.data.frame(cost)) cost <- as.matrix(cost)
13	192x	if (!is.null(weight) && is.numeric(adjust_zeros)) {
14	183x	zero_costs <- !is.na(cost) & cost == 0
15	183x	if (any(zero_costs)) {
16	!	if (verbose) cli_alert_info(paste("setting non-NA 0s to", adjust_zeros))
17	64x	cost[zero_costs] <- adjust_zeros
18	64x	rm(zero_costs)
19		}
20		}
21	11x	if (anyNA(cost)) cost[is.na(cost)] <- 0
22	192x	if (is.numeric(weight) && is.null(dim(weight)) && length(weight) > 1) {
23	!	weight <- matrix(weight, ncol = ncol(cost))
24		}
25	192x	if (is.null(weight) && !is.null(max_cost)) {
26	!	weight <- max_cost
27	!	max_cost <- NULL
28		}
29	192x	if (is.null(weight)) {
30	2x	if (verbose) cli_alert_info("weight: cost")
31	8x	w <- as(cost, "CsparseMatrix")
32	184x	} else if (is.null(dim(weight))) {
33	176x	if (is.numeric(weight)) {
34		# single buffer value means a uniformly weighted catchment area (original)
35	!	if (verbose) cli_alert_info("weight: cost over {.field 0} and under {.field {weight[[1]]}}")
36	152x	w <- as(cost > 0 & cost < weight[[1]], "CsparseMatrix") * 1
37	24x	} else if (is.list(weight)) {
38		# list of steps for roughly graded weightings (enhanced)
39	!	if (verbose) cli_alert_info("weight: cost over {.field 0} and under steps of {.arg weight}")
40	6x	weight <- weight[order(-vapply(weight, "[[", 1, 1))]
41	6x	w <- as((cost <= weight[[1]][1]) * weight[[1]][2], "CsparseMatrix")
42	6x	for (s in weight[-1]) w[cost <= s[1]] <- s[2]
43	6x	w[cost <= 0] <- 0
44	18x	} else if (is.character(weight)) {
45		# name of a weight function (kernel density)
46	!	if (verbose) cli_alert_info("weight: {weight} weight function")
47	15x	weight <- tolower(weight[[1]])
48	15x	if ("units" %in% class(cost)) {
49	!	cost <- matrix(
50	!	as.numeric(cost), nrow(cost), ncol(cost),
51	!	dimnames = dimnames(cost)
52		)
53		}
54	15x	if (grepl("^(?:gr\|n)", weight)) {
55		# gravity / normal kernel
56	4x	w <- as(sqrt(1 / cost^scale), "CsparseMatrix")
57	11x	} else if (grepl("^e", weight)) {
58		# exponential kernel
59	1x	w <- as(exp(-cost * scale), "CsparseMatrix")
60	10x	} else if (grepl("^loga", weight)) {
61		# logarithmic kernel
62	1x	w <- as(1 / (1 + log(cost, scale)), "CsparseMatrix")
63	9x	} else if (grepl("^li", weight)) {
64		# linear kernel
65	3x	w <- as((scale - cost) / scale, "CsparseMatrix")
66	3x	w[w < 0] <- 0
67	6x	} else if (grepl("^l", weight)) {
68		# logistic kernel
69	1x	w <- as(1 / (1 + exp(scale * cost)), "CsparseMatrix")
70	5x	} else if (grepl("^ga", weight)) {
71		# Gaussian kernel
72	2x	w <- as(exp(-cost^2 / (2 * scale^2)), "CsparseMatrix")
73	3x	} else if (grepl("^d", weight) && exists(weight)) {
74		# assumed to be a density function like dnorm
75	2x	w <- as(cost, "CsparseMatrix")
76	2x	w@x <- do.call(weight, list(w@x, 0, scale))
77	1x	} else if (grepl("^p", weight) && exists(weight)) {
78		# assumed to be a distribution function like pnorm
79	1x	w <- as(cost, "CsparseMatrix")
80	1x	w@x <- do.call(weight, list(-w@x, 0, scale))
81		} else {
82	!	cli_abort("{.arg weight} not recognized")
83		}
84	15x	w[cost <= 0 \| !is.finite(w)] <- 0
85	3x	} else if (is.function(weight)) {
86	!	if (verbose) cli_alert_info("weight: custom weight function")
87	3x	w <- as(weight(cost), "CsparseMatrix")
88	3x	w[cost <= 0 \| !is.finite(w)] <- 0
89		}
90		} else {
91	!	if (verbose) cli_alert_info("weight: {.arg weight}")
92	8x	w <- as(if (is.data.frame(weight)) as.matrix(weight) else weight, "CsparseMatrix")
93		}
94	192x	if (!is.null(max_cost)) {
95	!	if (verbose) cli_alert_info("zerod-out cost over {max_cost}")
96	1x	w[cost > max_cost] <- 0
97		}
98	192x	if (normalize_weight) {
99	1x	wr <- rowSums(w)
100	1x	wr[wr == 0] <- 1
101	1x	w <- w * sweep(w, 1, wr, "/")
102		}
103	192x	w
104		}

1		#' Download U.S. Census Population Data
2		#'
3		#' Download and load U.S. census American Community Survey 5-year summary files at the block group level,
4		#' optionally including home-work commute statistics.
5		#'
6		#' @param dir Directory in which to save the file(s).
7		#' @param state Name, abbreviation, or FIPS code of the state.
8		#' @param year 4 digit year, between 2009 and the most recent year.
9		#' @param include_margins Logical; if \code{TRUE}, will save and include estimate margins or error, which is a
10		#' \code{data.frame} with the same dimensions as the estimates \code{data.frame}.
11		#' @param include_commutes Logical; if \code{TRUE}, will download the
12		#' \href{https://lehd.ces.census.gov}{Longitudinal Employer-Household Dynamics (LEHD)}
13		#' \href{https://lehd.ces.census.gov/data/#lodes}{Origin-Destination Employment Statistics (LODES)}
14		#' data for the state and block groups within selected tracts.
15		#' @param counties,tracts,blockgroups A vector of counties, tracts, or block group GEOIDs within the specified state to
16		#' filter block groups for (block groups within the specified counties or tracts, or matching the specified block group
17		#' GEOID). Only one can be specified (lower levels overwrite higher levels). Defaults to all block groups.
18		#' @param overwrite Logical; if \code{TRUE}, will remove any existing files before downloading and saving new versions.
19		#' @param verbose Logical; if \code{FALSE}, will not print status messages.
20		#' @examples
21		#' \dontrun{
22		#' download_census_population(".", "va")
23		#' }
24		#' @return A list with at least an \code{estimates} entry, and optionally \code{margins} and/or \code{commutes}
25		#' entries. The \code{extimates} and \code{margins} entries are \code{data.frame}s with block groups in rows,
26		#' population variables in columns, and person counts in cells. The \code{commutes} entry is a sparse matrix with
27		#' home block groups in rows, work block groups in columns, and job counts in cells (all jobs, summed across blocks).
28		#' @export
29
30		download_census_population <- function(dir, state, year = 2019, include_margins = FALSE, include_commutes = FALSE,
31		counties = NULL, tracts = NULL, blockgroups = NULL, overwrite = FALSE, verbose = TRUE) {
32	1x	us_fips <- list(
33	1x	name = c(
34	1x	"UnitedStates", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
35	1x	"Delaware", "DistrictOfColumbia", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa",
36	1x	"Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
37	1x	"Missouri", "Montana", "Nebraska", "Nevada", "NewHampshire", "NewJersey", "NewMexico", "NewYork",
38	1x	"NorthCarolina", "NorthDakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "RhodeIsland", "SouthCarolina",
39	1x	"CouthDakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "WestVirginia", "Wisconsin",
40	1x	"Wyoming", "PuertoRico"
41		),
42	1x	post = c(
43	1x	"us", "al", "ak", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", "ga", "hi", "id", "il", "in", "ia", "ks", "ky",
44	1x	"la", "me", "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny", "nc", "nd", "oh", "ok",
45	1x	"or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy", "pr"
46		),
47	1x	fips = c(
48	1x	"us", 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
49	1x	33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 72
50		)
51		)
52	1x	state <- gsub("^0\|[^a-z0-9]", "", tolower(state[[1]]))
53	1x	fid <- which(us_fips$fips == state)
54	1x	if (!length(fid)) {
55	1x	fid <- if (nchar(state) == 2) {
56	1x	which(us_fips$post == state)
57		} else {
58	!	pmatch(state, tolower(us_fips$name))
59		}
60	!	if (!length(fid) \|\| is.na(fid)) cli_abort("failed to recognize {.arg state}")
61		}
62	1x	state <- us_fips$name[fid]
63	1x	state_post <- us_fips$post[fid]
64	1x	display_state <- gsub("([a-z])([A-Z])", "\\1 \\2", state)
65
66	1x	if (!is.null(dir)) {
67	1x	dir <- tdir <- paste0(normalizePath(dir, "/", FALSE), "/")
68	!	if (!dir.exists(dir)) dir.create(dir, FALSE, TRUE)
69		} else {
70	!	tdir <- paste0(tempdir(), "/")
71		}
72	1x	folder <- paste0(tdir, state, "_Tracts_Block_Groups_Only/")
73	1x	url <- paste0(
74	1x	"https://www2.census.gov/programs-surveys/acs/summary_file/", year,
75	1x	if (year > 2020) "/sequence-based-SF", "/data/5_year_seq_by_state/", state,
76	1x	"/Tracts_Block_Groups_Only/"
77		)
78	1x	files_zip <- paste0(year, "5", state_post, "000", 1:3, "000.zip")
79	1x	files <- paste0(folder, "e", year, "5", state_post, "000", 1:3, "000.txt")
80	1x	final_files <- paste0(if (is.null(dir)) tdir else dir, state_post, "_", c(
81	1x	"population",
82	1x	if (include_margins) "population_margins"
83	1x	), "_", year, ".csv")
84	!	if (overwrite) unlink(c(paste0(tdir, files_zip), files, final_files))
85	1x	if (!all(file.exists(final_files)) && !all(file.exists(paste0(tdir, files_zip)) \| file.exists(files))) {
86	1x	if (verbose) cli_alert_info(paste("downloading", display_state, "American Community Survey summary files"))
87	1x	for (i in seq_along(files)) {
88	3x	if (!file.exists(files[i])) {
89	3x	download.file(paste0(url, files_zip[i]), paste0(tdir, files_zip[i]))
90		}
91		}
92		}
93	1x	if (any(file.exists(paste0(tdir, files_zip)))) {
94	1x	if (verbose) cli_alert_info(paste("decompressing", display_state, "American Community Survey summary files"))
95	1x	dir.create(folder, FALSE)
96	1x	for (f in files_zip[file.exists(paste0(tdir, files_zip))]) {
97	3x	system2("unzip", c("-qd", shQuote(folder), shQuote(paste0(tdir, f))))
98	3x	unlink(paste0(tdir, f))
99		}
100		}
101	1x	if (!all(file.exists(final_files))) {
102	1x	if (verbose) cli_alert_info(paste("reformatting", display_state, "American Community Survey summary files"))
103
104		# column names
105	1x	headers <- read.csv(paste0(
106	1x	"https://www2.census.gov/programs-surveys/acs/summary_file/", year,
107	1x	if (year > 2020) {
108	!	"/sequence-based-SF/documentation/user_tools/ACS_5yr_Seq"
109		} else {
110	1x	paste0("/documentation/", if (year < 2013) {
111	!	"5_year/user_tools/Sequence_Number_and"
112		} else {
113	1x	"user_tools/ACS_5yr_Seq"
114		})
115	1x	}, "_Table_Number_Lookup.txt"
116	1x	), nrows = 2000)
117	1x	headers <- headers[seq_len(grep("ANCESTRY", headers$Table.Title, fixed = TRUE)[1] - 1), ]
118	1x	title_id <- which(headers$Total.Cells.in.Table != "")
119	1x	names <- sub("^Universe:\\s*", "", headers$Table.Title)
120	1x	sub_id <- which(grepl(":", names, fixed = TRUE))
121	1x	headers$name <- sub("\\.$", "", sub("_\\w+_Total", "_Total", gsub("._", "_", paste0(
122	1x	rep(gsub("[;:(),\\s]+", ".", names[title_id], perl = TRUE), c(title_id[-1], nrow(headers)) - title_id),
123		"_",
124	1x	rep(gsub("[;:(),\\s]+", ".", names[sub_id], perl = TRUE), c(sub_id[-1], nrow(headers)) - c(1, sub_id[-1])),
125		"_",
126	1x	gsub("[;:(),\\s]+", ".", sub(" years", "", names, fixed = TRUE), perl = TRUE)
127	1x	), fixed = TRUE)))
128	1x	start_id <- which(!is.na(headers$Start.Position))
129	1x	offset <- headers$Line.Number - 1 + rep(
130	1x	headers[start_id, "Start.Position"],
131	1x	c(start_id[-1], nrow(headers) + 1) - start_id
132		)
133	1x	ind_start <- which(!is.na(offset == min(offset, na.rm = TRUE)) & offset == min(offset, na.rm = TRUE))
134	1x	ind_end <- c(ind_start[-1], length(offset)) - ind_start
135	1x	inds <- lapply(seq_along(ind_start), function(i) {
136	4x	is <- seq(ind_start[i], ind_end[i])
137	4x	is[!is.na(offset[is]) & offset[is] %% 1 == 0]
138		})
139	1x	subs <- as.numeric(sub("\\D.*$", "", headers$Total.Cells.in.Table))
140	1x	ind_start <- which(
141	1x	!is.na(headers$Start.Position) &
142	1x	headers$Start.Position == min(headers$Start.Position, na.rm = TRUE)
143		)
144	1x	ind_end <- c(ind_start[-1] - 1, length(subs))
145	1x	inds <- lapply(seq_along(ind_start), function(i) {
146	4x	is <- seq(ind_start[i], ind_end[i])
147	4x	is[!is.na(headers[is, "Line.Number"]) & headers[is, "Line.Number"] %% 1 == 0]
148		})
149
150		# row geoids
151	1x	rows <- read.csv(paste0(
152	1x	"https://www2.census.gov/programs-surveys/acs/summary_file/", year,
153	1x	if (year > 2020) "/sequence-based-SF", "/data/5_year_seq_by_state/", state,
154	1x	"/Tracts_Block_Groups_Only/g", year, "5", state_post, ".csv"
155	1x	), header = FALSE)
156	1x	rows$GEOID <- sub("\\D.*$", "", paste0(
157	1x	formatC(rows$V10, width = 2, flag = 0),
158	1x	formatC(rows$V11, width = 3, flag = 0),
159	1x	formatC(rows$V14, width = 6, flag = 0),
160	1x	rows$V15
161		))
162
163		# estimates
164	1x	estimates <- do.call(cbind, lapply(seq_along(files), function(i) {
165	3x	os <- c(6, offset[inds[[i]]])
166	3x	tab <- read.csv(
167	3x	files[i],
168	3x	header = FALSE, colClasses = c(rep("character", 5), rep("numeric", length(os))),
169	3x	na.string = "."
170		)
171	3x	colnames(tab)[os] <- c("GEOID", headers[inds[[i]], "name"])
172	3x	tab$GEOID <- rows[tab$GEOID, "GEOID"]
173	3x	tab[nchar(tab$GEOID) == 12, -seq(1, if (i == 1) 5 else 6)]
174		}))
175
176		# margins
177	1x	if (include_margins) {
178	1x	files_m <- paste0(folder, "m", year, "5", state_post, "000", 1:3, "000.txt")
179	1x	margins <- do.call(cbind, lapply(seq_along(files_m), function(i) {
180	3x	os <- c(6, offset[inds[[i]]])
181	3x	tab <- read.csv(
182	3x	files_m[i],
183	3x	header = FALSE, colClasses = c(rep("character", 5), rep("numeric", length(os))),
184	3x	na.string = "."
185		)
186	3x	colnames(tab)[os] <- c("GEOID", headers[inds[[i]], "name"])
187	3x	tab$GEOID <- rows[tab$GEOID, "GEOID"]
188	3x	tab[nchar(tab$GEOID) == 12, -seq(1, if (i == 1) 5 else 6)]
189		}))
190		}
191	1x	su <- vapply(estimates, function(v) all(is.na(v)), TRUE)
192	1x	if (any(su)) {
193	1x	estimates <- estimates[, !su, drop = FALSE]
194	1x	if (include_margins) {
195	1x	margins <- margins[, !su, drop = FALSE]
196		}
197		}
198	1x	su <- !is.na(estimates[, 2]) & estimates[, 2] == 0
199	1x	if (any(su)) {
200	!	estimates <- estimates[!su, ]
201	!	if (include_margins) margins <- margins[!su, ]
202		}
203		} else {
204	!	if (verbose) cli_alert_info(paste("loading existing", display_state, "population data"))
205	!	classes <- c("character", rep("numeric", length(
206	!	scan(final_files[1], "", sep = ",", nlines = 1, quiet = TRUE)
207	!	) - 1))
208	!	estimates <- read.csv(final_files[1], colClasses = classes)
209	!	if (include_margins) margins <- read.csv(final_files[2], colClasses = classes)
210		}
211		# commutes
212	1x	if (include_commutes) {
213	1x	file <- paste0(tdir, state_post, "_commutes_", year, ".csv")
214	1x	filegz <- paste0(file, ".gz")
215	!	if (overwrite) unlink(c(file, filegz))
216	1x	if (!any(file.exists(c(file, filegz)))) {
217	1x	if (verbose) cli_alert_info(paste("downloading", display_state, "Origin-Destination Employment Statistics file"))
218	1x	download.file(paste0(
219	1x	"https://lehd.ces.census.gov/data/lodes/LODES7/", state_post,
220	1x	"/od/", state_post, "_od_main_JT00_", year, ".csv.gz"
221	1x	), filegz)
222		}
223	1x	if (fresh <- file.exists(filegz)) system2("gzip", c("-df", filegz))
224	1x	check <- file.exists(file)
225	1x	if (check && fresh && Sys.which("openssl")[[1]] != "") {
226	1x	hashes <- paste(readLines(paste0(
227	1x	"https://lehd.ces.census.gov/data/lodes/LODES7/", state_post, "/lodes_", state_post, ".sha256sum"
228	1x	)), collapse = "")
229	1x	hash_loc <- regexec(paste0("_od_main_JT00_", year), hashes, fixed = TRUE)[[1]]
230	1x	check <- substr(hashes, hash_loc - 68, hash_loc - 5) == strsplit(
231	1x	system2("openssl", c("dgst", "-sha256", shQuote(file)), TRUE), " ",
232	1x	fixed = TRUE
233	1x	)[[1]][2]
234	!	if (!check) cli_warn("integrity check failed for {.file {file}}")
235		}
236	1x	if (check) {
237	1x	if (verbose) cli_alert_info(paste0("loading ", if (!fresh) "existing ", display_state, " commutes data"))
238	1x	commutes <- read.csv(file)
239	1x	if (!is.null(commutes$w_geocode)) {
240	1x	fresh <- TRUE
241	1x	if (verbose) {
242	1x	cli_alert_info(paste(
243	1x	"reformatting", display_state, "Origin-Destination Employment Statistics file"
244		))
245		}
246	1x	commutes$w_geocode <- substr(commutes$w_geocode, 1, 12)
247	1x	commutes$h_geocode <- substr(commutes$h_geocode, 1, 12)
248	1x	commutes <- commutes[commutes$w_geocode %in% estimates$GEOID & commutes$h_geocode %in% estimates$GEOID, ]
249	1x	ids <- structure(seq_along(estimates$GEOID), names = estimates$GEOID)
250	1x	edges <- tapply(commutes$S000, paste0(commutes$w_geocode, commutes$h_geocode), sum)
251	1x	commutes <- sparseMatrix(
252	1x	i = ids[substr(names(edges), 13, 24)],
253	1x	j = ids[substr(names(edges), 1, 12)],
254	1x	x = edges,
255	1x	dimnames = list(estimates$GEOID, estimates$GEOID)
256		)
257	!	} else if (colnames(commutes)[1] == "GEOID") {
258	!	rownames(commutes) <- colnames(commutes)[-1] <- commutes[, 1]
259	!	commutes <- as(as.matrix(commutes[, -1]), "dgCMatrix")
260		}
261		}
262		}
263	1x	if (!is.null(dir)) {
264	1x	files <- paste0(dir, state_post, "_", c(
265	1x	"population",
266	1x	if (include_margins) "population_margins",
267	1x	if (include_commutes && check) "commutes"
268	1x	), "_", year, ".csv")
269	1x	if (!all(file.exists(files)) \|\| (include_commutes && fresh && check)) {
270	1x	if (verbose) cli_alert_info(paste("writing", display_state, "files"))
271	1x	ck <- c(!file.exists(files[1]), !file.exists(files[2]) && include_margins, include_commutes && fresh && check)
272	1x	if (ck[1]) write.csv(estimates, files[1], row.names = FALSE)
273	1x	if (ck[2]) write.csv(margins, files[2], row.names = FALSE)
274	1x	if (ck[3]) write.csv(cbind(GEOID = rownames(commutes), as.matrix(commutes)), files[3], row.names = FALSE)
275	1x	if (verbose) {
276	1x	cli_bullets(c(
277	1x	v = paste0("wrote file", if (sum(ck) == 1) ":" else "s:"),
278	1x	structure(paste0("{.file ", files[ck], "}"), names = rep("*", sum(ck)))
279		))
280		}
281		}
282		}
283	1x	subsetter <- !c(is.null(blockgroups), is.null(tracts), is.null(counties))
284	1x	if (any(subsetter)) {
285	!	su <- if (subsetter[1]) {
286	!	estimates$GEOID %in% blockgroups
287	!	} else if (subsetter[2]) {
288	!	substr(estimates$GEOID, 1, 11) %in% tracts
289	!	} else if (subsetter[3]) {
290	!	substr(estimates$GEOID, 1, 5) %in% counties
291		}
292	!	if (any(su) && !all(su)) {
293	!	if (verbose) cli_alert_info("subsetting results")
294	!	estimates <- estimates[su, ]
295	!	if (include_margins) margins <- margins[su, ]
296		} else {
297	!	cli_warn("a subset was requested, but no GEOIDs matched")
298		}
299		}
300	1x	invisible(list(
301	1x	estimates = estimates,
302	1x	margins = if (include_margins) margins,
303	1x	commutes = if (include_commutes) commutes
304		))
305		}

1		#' Extract Connections Within Catchment Areas
2		#'
3		#' Extract connected consumer (\code{from}) and provider (\code{to}) locations within a catchment area,
4		#' as defined by cost and weights.
5		#'
6		#' @param from Consumer locations; a matrix-like object with columns containing latitudes and longitudes
7		#' (identified by \code{from_coords}), and a column of IDs corresponding to rows in \code{cost} (identified by
8		#' \code{from_id}). Each identifying column can alternatively contain the associated matrix or vector, and
9		#' IDs can be in row names.
10		#' @param to Provider locations;a matrix-like object with columns containing latitudes and longitudes
11		#' (identified by \code{to_coords}), and a column of IDs corresponding to columns in \code{cost} (identified by
12		#' \code{to_id}). Each identifying column can alternatively contain the associated matrix or vector, and
13		#' IDs can be in row names.
14		#' @param cost A cost matrix, with row names corresponding to IDs of \code{from}, and column names corresponding to
15		#' IDs of \code{to}.
16		#' @param weight A weight matrix with the same dimensions as \code{cost}, such as returned from
17		#' \code{\link{catchment_weight}}.
18		#' @param ... Passes arguments to \code{catchment_weight} if \code{weight} is not a weight matrix.
19		#' @param return_type Specify whether to return a \code{data.frame} (default) with connection ids, weights, and costs,
20		#' an \code{sf} \code{data.frame} (\code{"sf"}) with linestring geometries added for each connection,
21		#' or a GeoJSON-formatted list (\code{"list"}).
22		#' @param from_id,from_coords,to_id,to_coords Names of ID and coordinate columns in \code{from} and \code{to},
23		#' or vectors of IDs and matrices of coordinates.
24		#' @examples
25		#' pop <- simulate_catchments()
26		#' connections <- catchment_connections(
27		#' pop$consumers, pop$providers,
28		#' weight = "gaussian", max_cost = 1,
29		#' return_type = "sf"
30		#' )
31		#' if (require("leaflet", quiet = TRUE)) {
32		#' leaflet() \|>
33		#' addPolylines(
34		#' data = connections, weight = 3, color = "#777",
35		#' highlightOptions = highlightOptions(color = "#fff", weight = 4),
36		#' label = ~ paste0("From: ", from, ", To: ", to, ", Weight: ", weight, ", Cost: ", cost)
37		#' ) \|>
38		#' addCircles(
39		#' data = pop$consumers, label = ~ paste0("Consumers: ", count, ", Access: ", access)
40		#' ) \|>
41		#' addCircles(
42		#' data = pop$providers, color = "#000", label = ~ paste("Provider", id)
43		#' )
44		#' }
45		#' @return An object with connection, weight, and cost information, with a format depending on \code{return_type}.
46		#' @export
47
48		catchment_connections <- function(from, to, cost = NULL, weight = 1, ..., return_type = "data.frame",
49		from_id = "GEOID", from_coords = c("X", "Y"), to_id = "GEOID",
50		to_coords = c("X", "Y")) {
51	17x	if (missing(from) && !missing(from_coords)) {
52	1x	from <- from_coords
53	1x	from_coords <- c("X", "Y")
54		}
55	17x	if (missing(to) && !missing(to_coords)) {
56	1x	to <- to_coords
57	1x	to_coords <- c("X", "Y")
58		}
59	17x	from_ids <- if (length(from_id) != 1) {
60	1x	from_id
61	17x	} else if (from_id %in% colnames(from)) {
62	1x	from[, from_id, drop = TRUE]
63		} else {
64	1x	if (is.null(rownames(from))) rownames(from) <- seq_len(nrow(from))
65	15x	rownames(from)
66		}
67	!	if (is.null(from_ids)) cli_abort("failed to resolve {.arg from} ids")
68	!	if (is.numeric(from_ids) && max(from_ids) > nrow(from)) from_ids <- as.character(from_ids)
69	17x	to_ids <- if (length(to_id) != 1) {
70	1x	to_id
71	17x	} else if (to_id %in% colnames(to)) {
72	1x	to[, to_id, drop = TRUE]
73		} else {
74	1x	if (is.null(rownames(to))) rownames(to) <- seq_len(nrow(to))
75	15x	rownames(to)
76		}
77	!	if (is.null(to_id)) cli_abort("failed to resolve {.arg to} ids")
78	!	if (is.numeric(to_ids) && max(to_ids) > nrow(to)) to_ids <- as.character(to_ids)
79	17x	if (is.character(from_coords) && all(from_coords %in% colnames(from))) {
80	2x	from <- from[, from_coords, drop = TRUE]
81	15x	} else if (any(grepl("^sfc?$", class(from)))) {
82	15x	from <- st_coordinates(from)
83	!	} else if (!is.null(dim(from)) && ncol(from) > 1) {
84	!	from <- from[, 1:2, drop = TRUE]
85		}
86	!	if (is.null(dim(from)) \|\| ncol(from) != 2) cli_abort("{.arg from} must have 2 columns")
87	17x	if (!is.numeric(from[, 1]) \|\| !is.numeric(from[, 2])) {
88	!	cli_abort("{.arg from} columns must all be numeric")
89		}
90	!	if (length(from_ids) != nrow(from)) cli_abort("From IDs and coordinates do not align")
91	17x	if (is.character(to_coords) && all(to_coords %in% colnames(to))) {
92	2x	to <- to[, to_coords, drop = TRUE]
93	15x	} else if (any(grepl("^sfc?$", class(to)))) {
94	15x	to <- st_coordinates(to)
95	!	} else if (!is.null(dim(to)) && ncol(to) > 1) {
96	!	to <- to[, 1:2, drop = TRUE]
97		}
98	!	if (is.null(dim(to)) \|\| ncol(to) != 2) cli_abort("{.arg to} must have 2 columns")
99	17x	if (!is.numeric(to[, 1]) \|\| !is.numeric(to[, 2])) {
100	!	cli_abort("{.arg to} columns must all be numeric")
101		}
102	!	if (length(to_ids) != nrow(to)) cli_abort("To IDs and coordinates do not align")
103	17x	if (is.null(cost)) {
104	12x	cost <- 1 / lma_simets(from, to, "euclidean", symmetrical = TRUE, pairwise = TRUE) - 1
105	12x	dimnames(cost) <- list(from_ids, to_ids)
106		}
107	1x	if (is.null(dim(cost))) cost <- matrix(cost, nrow(from), dimnames = list(from_ids, to_ids))
108	17x	if ((is.null(dim(weight)) && length(weight) != length(cost)) \|\| length(substitute(...()))) {
109	14x	weight <- catchment_weight(cost = cost, weight = weight, ...)
110	3x	} else if (is.null(dim(weight))) {
111	1x	weight <- matrix(weight, nrow(from), dimnames = list(from_ids, to_ids))
112	!	} else if (any(dim(cost) != dim(weight))) cli_abort("{.arg weight} does not align with {.arg cost}")
113	17x	if (is.null(rownames(cost))) {
114	2x	if (nrow(cost) != length(from_ids)) {
115	!	cli_abort("{.arg cost} does not have row names, and is not the same length as {.arg from}'s ids")
116		}
117	15x	} else if (!identical(as.character(from_ids), rownames(cost))) {
118	1x	if (!all(from_ids %in% rownames(cost))) {
119	!	cli_abort("not all {.arg from} ids are in {.arg cost}'s rownames")
120		}
121	!	if (!identical(rownames(cost), rownames(weight))) rownames(weight) <- rownames(cost)
122	1x	cost <- cost[as.character(from_ids), ]
123	1x	weight <- weight[as.character(from_ids), ]
124		}
125	17x	if (is.null(colnames(cost))) {
126	!	if (ncol(cost) != length(to_ids)) {
127	!	cli_abort("{.arg cost} does not have column names, and is not the same length as {.arg to}'s ids")
128		}
129	17x	} else if (!identical(as.character(to_ids), colnames(cost))) {
130	1x	if (!all(to_ids %in% colnames(cost))) {
131	!	cli_abort("not all {.arg to} ids are in {.arg cost}'s colnames")
132		}
133	!	if (!identical(colnames(cost), colnames(weight))) colnames(weight) <- colnames(cost)
134	1x	cost <- cost[, as.character(to_ids)]
135	1x	weight <- weight[, as.character(to_ids)]
136		}
137	17x	fcoords <- split(from, from_ids)[from_ids]
138	17x	tcoords <- split(to, to_ids)[to_ids]
139	17x	from_ids <- names(fcoords)
140	17x	to_ids <- names(tcoords)
141	17x	if (grepl("^[slg]", return_type, TRUE)) {
142	2x	res <- list(
143	2x	type = "FeatureCollection",
144	2x	features = unlist(lapply(unname(which(rowSums(weight != 0) != 0)), function(r) {
145	200x	su <- which(weight[r, ] != 0)
146	200x	np <- names(su)
147	200x	w <- weight[r, su]
148	200x	wc <- cost[r, su]
149	200x	fc <- as.numeric(fcoords[[r]])
150	200x	tc <- tcoords[to_ids %in% np]
151	200x	lapply(seq_along(np), function(i) {
152	402x	list(
153	402x	type = "Feature",
154	402x	properties = list(from = from_ids[[r]], to = np[[i]], weight = w[[i]], cost = wc[[i]]),
155	402x	geometry = list(
156	402x	type = "LineString",
157	402x	coordinates = list(fc, as.numeric(tc[[np[[i]]]]))
158		)
159		)
160		})
161	2x	}), FALSE)
162		)
163	!	if (!length(res$features)) cli_abort("there are no connections")
164	2x	if (grepl("^s", return_type, TRUE)) {
165	1x	read_sf(toJSON(res, auto_unbox = TRUE), as_tibble = FALSE)
166		} else {
167	1x	res
168		}
169		} else {
170	15x	rs <- unname(which(rowSums(weight != 0) != 0))
171	!	if (!length(rs)) cli_abort("there are no connections")
172	15x	do.call(rbind, lapply(rs, function(r) {
173	1500x	su <- which(weight[r, ] != 0)
174	1500x	res <- data.frame(
175	1500x	from = from_ids[[r]], to = names(su), weight = weight[r, su], cost = cost[r, su]
176		)
177	1500x	rownames(res) <- NULL
178	1500x	res
179		}))
180		}
181		}

1		#' Aggregate Floating Catchment Area Ratios
2		#'
3		#' Calculate aggregate sums or means, potentially weighted by number of consumers.
4		#'
5		#' @param from A matrix-like object with IDs, values, and number of consumers, or a vector
6		#' of values. These are the lower-level entities to be aggregated up to entries in \code{to}.
7		#' @param to A matrix-like object with IDs and total consumers. These are higher-level
8		#' entities containing \code{from} entities.
9		#' @param id The column name of IDs in \code{from}, or a vector of IDs.
10		#' @param value The column name of values in \code{from} or a vector of values to be aggregated.
11		#' @param consumers The column name of consumer totals in \code{from}, or a vector
12		#' of said totals of the same length as \code{from} IDs.
13		#' @param to_id The column name of IDs in \code{to}, or a vector if IDs.
14		#' @param to_consumers The column name of consumer totals in \code{to}, or a vector
15		#' of said totals of the same length as \code{to} IDs.
16		#' @param map A named list, with names as or corresponding to \code{to} IDs, and vectors
17		#' containing \code{from} IDs as entries.
18		#' @param original_from Logical indicating whether the \code{from} values are original (default).
19		#' If \code{from} values are original and \code{consumers} are specified, \code{from} values will be
20		#' multiplied by \code{consumers} before aggregation. Set this to \code{FALSE} if this has already been done
21		#' (such as if you set \code{return_type} to \code{"region"} in \code{\link{catchment_ratio}}).
22		#' @param return_type Determines the values that are returned: \code{"original"} (default) for an average
23		#' for each \code{to} ID based on the region sum of \code{consumers}, or specified \code{to_consumers}.
24		#' If a number, original scores will be multiplied by the number then averaged like \code{"original"}. If
25		#' \code{"region"}, the sum for each \code{to} ID is returned.
26		#' @param verbose Logical; if \code{TRUE}, will print a log of the aggregation process.
27		#' @examples
28		#' # lower-level entries prepended with higher-level IDs (like GEOIDs)
29		#' higher <- data.frame(id = c(100, 110), pop = c(500, 500))
30		#' lower <- data.frame(
31		#' id = paste0(higher$id, 1:5),
32		#' value = 1:5 / 10,
33		#' pop = c(50, 10, 100, 40, 30)
34		#' )
35		#' catchment_aggregate(
36		#' lower, higher,
37		#' consumers = "pop", id = "id", value = "value",
38		#' to_id = "id", verbose = TRUE
39		#' )
40		#'
41		#' # same thing with IDs specified in a map
42		#' catchment_aggregate(
43		#' lower, higher,
44		#' consumers = "pop", id = "id", value = "value",
45		#' map = list("100" = c(1001, 1003, 1005), "110" = c(1102, 1104)), verbose = TRUE
46		#' )
47		#' @return A vector with an aggregate value (determined by \code{return_type})
48		#' for each entry in \code{to}.
49		#' @export
50
51		catchment_aggregate <- function(from, to = NULL, id = "GEOID", value = "access", consumers = "population",
52		to_id = id, to_consumers = consumers, map = NULL, original_from = TRUE,
53		return_type = "original", verbose = FALSE) {
54	3x	if (verbose) cli_rule("Aggregating Values")
55	12x	if (missing(from)) {
56	1x	if (!missing(value)) {
57	1x	from <- value
58		} else {
59	!	cli_abort("{.arg from} must be provided")
60		}
61		}
62	12x	fid <- if (length(id) > 1) {
63	1x	if (verbose) cli_alert_info("from IDs: {.arg id} vector")
64	1x	id
65	12x	} else if (is.null(dim(from))) {
66	!	if (is.null(names(from))) cli_abort("{.arg id} must be specified if they are not provided in {.arg from}")
67	!	if (verbose) cli_alert_info("from IDs: {.arg from} names")
68	2x	names(from)
69		} else {
70	2x	if (verbose) cli_alert_info("from IDs: {.var {id}} column")
71	9x	from[[id]]
72		}
73	!	if (is.null(fid)) cli_abort("failed to resolve from IDs")
74	12x	fv <- if (length(value) > 1) {
75	1x	if (verbose) cli_alert_info("from values: {.arg value} vector")
76	1x	value
77	12x	} else if (is.null(dim(from))) {
78	!	if (verbose) cli_alert_info("from values: entered vector")
79	2x	from
80	12x	} else if (!is.null(value) && value %in% colnames(from)) {
81	1x	if (verbose) cli_alert_info("from values: {.var {value}} column")
82	8x	from[[value]]
83		} else {
84	1x	if (verbose) cli_alert_info("from values: {.feild 1}")
85	1x	rep(1, length(fid))
86		}
87	!	if (is.null(fv) \|\| length(fv) != length(fid)) cli_abort("failed to resolve {.arg from} values")
88
89	12x	n <- length(fv)
90	!	if (length(fid) != n) cli_abort("IDs and values were not the same length")
91	12x	cv <- if (length(consumers) > 1) {
92	!	if (verbose) cli_alert_info("consumers: {.arg consumers} vector")
93	2x	consumers
94	12x	} else if (!is.null(consumers) && consumers %in% colnames(from)) {
95	1x	if (verbose) cli_alert_info("consumers: {.var {consumers}} column")
96	8x	from[[consumers]]
97		} else {
98	2x	NULL
99		}
100	!	if (!is.null(cv) && length(cv) != n) cli_abort("{.arg consumers} was not the same length as values")
101
102	9x	if (original_from && !is.null(cv) && length(cv) == n) fv <- fv * cv
103		# `from` values should now be either per-region values or normalized
104
105		# mapping `from` ids to `to` ids
106	1x	if (is.null(to)) to <- if (!missing(to_id) && (!is.character(to_id) \|\| length(to_id) > 1)) to_id else from
107	12x	tid <- if (length(to_id) > 1) {
108	1x	if (verbose) cli_alert_info("to IDs: {.arg to_id} vector")
109	1x	to_id
110	12x	} else if (!is.null(to_id) && to_id %in% colnames(to)) {
111	1x	if (verbose) cli_alert_info("to IDs: {.var {to_id}} column")
112	7x	to[[to_id]]
113	12x	} else if (length(to) == 1) {
114	3x	if (is.function(to)) {
115	!	if (verbose) cli_alert_info("to IDs: {.arg from} IDs converted by {.arg to} function")
116	1x	to(fid)
117	2x	} else if (is.numeric(to)) {
118	1x	if (verbose) cli_alert_info(paste("to IDs: first", to, "characters of {.arg from} IDs"))
119	1x	substr(fid, 1, to)
120		} else {
121	!	if (verbose) cli_alert_info("to IDs: {.var {to}} column of {.arg from}")
122	1x	from[[to]]
123		}
124	12x	} else if (is.null(dim(to))) {
125	!	if (verbose) cli_alert_info("to IDs: {.arg to} vector")
126	1x	to
127		} else {
128	!	NULL
129		}
130	!	if (is.null(tid)) cli_abort("failed to resolve {.arg to} IDs")
131
132	12x	tcv <- if (length(to_consumers) > 1) {
133	!	if (verbose) cli_alert_info("to consumers: {.arg to_consumers} vector")
134	4x	to_consumers
135	12x	} else if (!is.null(to_consumers) && to_consumers %in% colnames(to)) {
136	!	if (verbose) cli_alert_info("to consumers: {.var {to_consumers}} column")
137	4x	to[[to_consumers]]
138		} else {
139	4x	NULL
140		}
141	12x	aggregate_consumers <- is.null(tcv)
142	12x	fv <- cbind(fv, if (!aggregate_consumers \|\| is.null(cv)) 1 else cv)
143
144	12x	if (is.list(map)) {
145	!	if (is.null(names(map))) cli_abort("{.arg map} must have names")
146	!	if (verbose) cli_alert_info("mapping: by map list")
147	!	if (is.null(tid)) tid <- names(map)
148	1x	utid <- as.character(unique(tid))
149	1x	av <- vapply(utid, function(h) {
150	2x	l <- map[[h]]
151	2x	su <- fid %in% l
152	2x	if (is.null(l) \|\| !length(su)) {
153	!	return(c(NA, NA))
154		}
155	2x	colSums(fv[su, , drop = FALSE], na.rm = TRUE)
156	1x	}, numeric(2))
157		} else {
158	11x	if (length(tid) == n) {
159	1x	if (verbose) cli_alert_info("mapping: by {.arg to} ID")
160	3x	fid <- tid
161	3x	tid <- unique(tid)
162		} else {
163	8x	tnc <- nchar(tid[1])
164	8x	if (tnc > nchar(fid[1]) \|\| any(nchar(tid) != tnc)) {
165	!	if (!any(fid %in% tid)) cli_abort("failed to figure out how to map {.arg from} ids to {.arg to} ids")
166	!	if (verbose) cli_alert_info("mapping: by ID match")
167		} else {
168	2x	if (verbose) cli_alert_info(paste("mapping: by first", tnc, "character match"))
169	8x	fid <- substr(fid, 1, tnc)
170		}
171		}
172	11x	utid <- as.character(unique(tid))
173	11x	av <- vapply(utid, function(h) {
174	22x	su <- fid == h
175	!	if (any(su)) colSums(fv[su, , drop = FALSE], na.rm = TRUE) else c(NA, NA)
176	11x	}, numeric(2))
177		}
178	12x	if (length(utid) != length(tid)) {
179	!	if (verbose) cli_alert_info("to repeated IDs")
180	!	av <- rbind(unname(av[1, ][tid]), unname(av[2, ][tid]))
181		}
182
183	12x	if (is.numeric(return_type)) {
184	!	if (verbose) cli_alert_info(paste("multiplying by", return_type))
185	1x	av[1, ] <- av[1, ] * return_type
186		}
187	12x	if (is.character(return_type) && grepl("^r", return_type, TRUE)) {
188	1x	if (verbose) cli_bullets(c(v = "returning sum of value per {.arg to} ID"))
189	3x	av[1, ]
190		} else {
191	!	if (aggregate_consumers && !is.null(tcv) && ncol(av) != length(tcv)) aggregate_consumers <- FALSE
192	9x	if (verbose) {
193	2x	cli_bullets(c(v = paste0(
194	2x	"returning sum of value over total {.arg ",
195	2x	if (aggregate_consumers) "from" else "to",
196	2x	"} consumers per {.arg to} ID"
197		)))
198		}
199	9x	if (aggregate_consumers) {
200	2x	av[2, av[2, ] == 0] <- 1
201	2x	av[1, ] / av[2, ]
202		} else {
203	7x	tcv[tcv == 0] <- 1
204	7x	av[1, ] / tcv
205		}
206		}
207		}

1		#' Simulate Catchment Areas
2		#'
3		#' Generate a random population of consumers and providers.
4		#'
5		#' @param n Number of consumers to generate.
6		#' @param weight Weight method used on Euclidean distances between consumers and providers.
7		#' @param ... Passes additional arguments to \code{\link{catchment_weight}}.
8		#' @param consumers_range A numeric vector with entries for minimal and maximal consumer counts
9		#' from which to sample.
10		#' @param long_center Longitude to randomly place consumers around.
11		#' @param long_spread Standard deviation of the consumer's longitude distribution.
12		#' @param lat_center Latitude to randomly place consumers around.
13		#' @param lat_spread Standard deviation of the consumer's latitude distribution.
14		#' @param as_sf Logical; if \code{FALSE}, consumer and provider data are returned in regular \code{data.frame}s.
15		#' @examples
16		#' pop <- simulate_catchments()
17		#' if (require("leaflet", quiet = TRUE)) {
18		#' leaflet(pop$providers) \|>
19		#' addCircles(
20		#' radius = 1e5, color = "#666", stroke = FALSE
21		#' ) \|>
22		#' addCircles(
23		#' data = pop$consumers, label = ~ paste0("Consumers: ", count, ", Access: ", access)
24		#' ) \|>
25		#' addCircles(
26		#' color = "#000", label = ~ paste("Provider", id)
27		#' )
28		#' }
29		#' @return A list with \code{consumers}, \code{providers}, \code{cost}, and \code{weight} entries.
30		#' @export
31
32		simulate_catchments <- function(n = 100, weight = 1, ..., consumers_range = c(10, 5000), long_center = -99,
33		long_spread = 1, lat_center = 40, lat_spread = 1, as_sf = TRUE) {
34		# roll consumers
35	4x	consumers <- data.frame(
36	4x	count = sample(seq(consumers_range[1], consumers_range[2]), n, TRUE),
37	4x	X = rnorm(n, long_center, long_spread),
38	4x	Y = rnorm(n, lat_center, lat_spread)
39		)
40	4x	locs <- consumers[, c("X", "Y")]
41	4x	consumers_dist <- 1 / lma_simets(locs, "euclidean", symmetrical = TRUE) - 1
42	4x	diag(consumers_dist) <- Inf
43	4x	start <- which.max(diag(consumers_dist[, max.col(-consumers_dist)]))
44	4x	diag(consumers_dist) <- 0
45	4x	providers <- data.frame(id = seq_len(nrow(consumers)), X = 0, Y = 0)
46		# placing providers
47	4x	for (i in providers$id) {
48	50x	sel <- c(start, which(consumers_dist[start, ] * catchment_weight(
49	50x	consumers_dist[start, , drop = FALSE],
50	50x	weight, ...
51	50x	) != 0))
52	50x	providers[i, c("X", "Y")] <- colMeans(locs[sel, , drop = FALSE] + rnorm(length(sel) * 2, 0, .1))
53	50x	r <- catchment_ratio(consumers, providers[seq_len(i), , drop = FALSE], weight = weight, ...)
54	50x	if (all(r != 0)) {
55	4x	providers <- providers[seq_len(i), ]
56	4x	break
57		}
58	46x	start <- which.min(r)
59		}
60	4x	consumers$access <- r
61	4x	cost <- 1 / lma_simets(consumers[, -1], providers[, -1], "euclidean", symmetrical = TRUE) - 1
62	4x	list(
63	4x	consumers = if (as_sf) st_as_sf(consumers, coords = c("X", "Y"), crs = 4326) else consumers,
64	4x	providers = if (as_sf) st_as_sf(providers, coords = c("X", "Y"), crs = 4326) else providers,
65	4x	cost = cost, weight = catchment_weight(cost, weight, ...)
66		)
67		}

1		#' Download and Prepare Map Shapes
2		#'
3		#' Download cartographic boundary files from the \href{https://www.census.gov}{U.S. Census Bureau},
4		#' and reformat them to simplified GeoJSON files.
5		#'
6		#' @param dir Path to a directory in which to save the reformatted files. If these are to be
7		#' used in a published site, they should be in the \code{"docs"} directory of the site directory. If not specified,
8		#' the shapes are downloaded and returned, but not saved.
9		#' @param fips State Federal Information Processing Standard (FIPS) code, United States Postal Service (USPS)
10		#' code, or name; see \href{state.txt}{https://www2.census.gov/geo/docs/reference/state.txt}.
11		#' @param entity Entity name (e.g., \code{"county"}, \code{"tract"}, or \code{"bg"}); see
12		#' \href{2019_file_name_def.pdf}{https://www2.census.gov/geo/tiger/GENZ2019/2019_file_name_def.pdf}
13		#' (or appropriate year's file). States and counties are only available at the national level, so if these are
14		#' requested for a particular state, the national files will be downloaded and subset.
15		#' @param name Name for the GeoJSON file (without extension) to be written.
16		#' @param year Year of the shapes, 2013 and after.
17		#' @param resolution Resolution of the shapes; one of \code{"500k"} (default), \code{"5m"}, or \code{"20m"}.
18		#' @param strip_features Logical; if \code{TRUE}, will strip all features other than IDs.
19		#' @param simplify A function to simplify shapes with, such as \code{rmapshaper::ms_simplify}. This
20		#' function should take the \code{sf} \code{data.frame} as its first argument, and return that object
21		#' with its \code{geometry} replaced.
22		#' @param ... Passes additional arguments to \code{simplify}; e.g., for \code{rmapshaper::ms_simplify},
23		#' you might want to specify \code{keep} and set \code{keep_shapes} to \code{TRUE}.
24		#' @param force Logical; if \code{TRUE}, will force a re-download, and overwrite any existing files.
25		#' @examples
26		#' \dontrun{
27		#' # download Virginia counties shapes
28		#' shapes <- download_census_shapes(
29		#' "docs/shapes", "va", "county",
30		#' name = "counties",
31		#' simplify = rmapshaper::ms_simplify, keep_shapes = TRUE
32		#' )
33		#' }
34		#' @return An \code{sf} \code{data.frame} with a geometry column containing shapes, and other features in columns.
35		#' @export
36
37		download_census_shapes <- function(dir = NULL, fips = "us", entity = "state", name = NULL, year = 2019,
38		resolution = "500k", strip_features = FALSE, simplify = NULL, ..., force = FALSE) {
39	1x	us_fips <- list(
40	1x	name = c(
41	1x	"united states", "alabama", "alaska", "arizona", "arkansas", "california", "colorado", "connecticut",
42	1x	"delaware", "district of columbia", "florida", "georgia", "hawaii", "idaho", "illinois", "indiana", "iowa",
43	1x	"kansas", "kentucky", "louisiana", "maine", "maryland", "massachusetts", "michigan", "minnesota", "mississippi",
44	1x	"missouri", "montana", "nebraska", "nevada", "new hampshire", "new jersey", "new mexico", "new york",
45	1x	"north carolina", "north dakota", "ohio", "oklahoma", "oregon", "pennsylvania", "rhode island", "south carolina",
46	1x	"south dakota", "tennessee", "texas", "utah", "vermont", "virginia", "washington", "west virginia", "wisconsin",
47	1x	"wyoming", "american samoa", "guam", "northern mariana islands", "puerto rico", "minor outlying islands",
48	1x	"virgin islands"
49		),
50	1x	post = c(
51	1x	"us", "al", "ak", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", "ga", "hi", "id", "il", "in", "ia", "ks", "ky",
52	1x	"la", "me", "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny", "nc", "nd", "oh", "ok",
53	1x	"or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy", "as", "gu", "mp", "pr", "um",
54	1x	"vi"
55		),
56	1x	fips = c(
57	1x	"us", 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
58	1x	33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 60, 66, 69, 72, 74, 78
59		)
60		)
61	1x	entities <- c(
62	1x	"aiannh", "anrc", "bg", "cbsa", "cd116", "cnecta", "concity", "county", "county_within_cd116",
63	1x	"county_within_ua", "cousub", "csa", "division", "elsd", "nation", "necta", "place", "puma10", "region", "scsd",
64	1x	"sldl", "sldu", "state", "subbarrio", "tract", "ua10", "unsd", "zcta510"
65		)
66	1x	fips <- gsub("^0\|[^a-z0-9\\s]", "", tolower(fips[[1]]))
67	1x	fid <- which(us_fips$fips == fips)
68	1x	if (!length(fid)) {
69	1x	fid <- if (nchar(fips) == 2) {
70	1x	which(us_fips$post == fips)
71		} else {
72	!	pmatch(fips, us_fips$name)
73		}
74	!	if (!length(fid) \|\| is.na(fid)) cli_abort("failed to recognize {.arg fips}")
75		}
76	1x	fips <- us_fips$fips[fid]
77	!	if (nchar(fips) == 1) fips <- paste0("0", fips)
78	!	if (nchar(year) < 4) year <- as.numeric(paste0("20", year))
79	!	if (year < 2013) cli_abort("only years 2013 and after are available from this function")
80	1x	entity <- tolower(entity)
81	1x	entity <- match.arg(entity, entities)
82	1x	subset <- NULL
83	1x	if (fips != "us" && (entity == "county" \|\| entity == "state")) {
84	1x	subset <- fips
85	1x	fips <- "us"
86		}
87	1x	resolution <- tolower(resolution)
88	1x	resolution <- match.arg(resolution, c("500k", "5m", "20m"))
89	1x	file <- paste0("cb_", year, "_", fips, "_", entity, "_", resolution)
90	1x	url <- paste0("https://www2.census.gov/geo/tiger/GENZ", year, if (year == 2013) "/" else "/shp/", file, ".zip")
91	1x	temp <- tempdir()
92	1x	temp_file <- paste0(temp, "/", file, ".zip")
93	1x	out_file <- paste0(dir, "/", if (!is.null(name)) name else file, ".geojson")
94	!	if (force && file.exists(out_file)) unlink(out_file)
95	1x	if (!file.exists(out_file)) {
96	!	if (!force && file.exists(paste0(temp, "/", file))) temp_file <- paste0(temp, "/", file)
97	1x	if (force \|\| !file.exists(temp_file)) {
98	1x	download.file(url, temp_file)
99	1x	unzip(temp_file, exdir = paste0(temp, "/", file))
100	1x	file.remove(temp_file)
101	1x	temp_file <- paste0(temp, "/", file)
102		}
103	1x	shapes <- st_read(temp_file, quiet = TRUE)
104	1x	if (!is.null(subset)) shapes <- shapes[shapes$STATEFP == subset, ]
105	1x	if (strip_features) shapes <- shapes[, "GEOID", drop = FALSE]
106	1x	if (is.function(simplify)) shapes <- simplify(shapes, ...)
107	1x	if (!is.null(dir)) {
108	!	if (!file.exists(dir)) dir.create(dir, FALSE, TRUE)
109	1x	st_write(shapes, out_file)
110		}
111		} else {
112	!	shapes <- st_read(out_file, quiet = TRUE)
113		}
114	1x	invisible(shapes)
115		}

1		#' Extract a network of consumers and providers
2		#'
3		#' Extract an interconnected set of consumers and providers from a set of connections within catchment areas.
4		#'
5		#' @param connections A list- or matrix-like object with \code{"from"} and \code{"to"} entries, such as that
6		#' returned from \code{\link{catchment_connections}}.
7		#' @param from_start,to_start The ID of a \code{from} (consumer) or \code{to} (provider) in \code{connections} from
8		#' which to trace out a network.
9		#' @examples
10		#' pop <- simulate_catchments()
11		#' connections <- catchment_connections(
12		#' pop$consumers, pop$providers,
13		#' weight = 1, max_cost = .5,
14		#' )
15		#' catchment_network(connections, 1)
16		#' @return A subsetted version of \code{connections} containing only connections traceable from \code{from_start}
17		#' and/or \code{to_start}.
18		#' @export
19
20		catchment_network <- function(connections, from_start = NULL, to_start = NULL) {
21	!	if (any(!c("from", "to") %in% colnames(connections))) cli_abort("{.arg connections} not recognized")
22	2x	from <- connections[["from"]]
23	2x	to <- connections[["to"]]
24	2x	from_ids <- unique(from)
25	2x	froms <- if (is.null(from_start) && is.null(to_start)) from[1] else from_start
26	2x	tos <- to_start
27	2x	for (i in seq_along(from_ids)) {
28	8x	tos <- unique(c(tos, to[from %in% froms]))
29	8x	froms <- unique(c(froms, from[to %in% tos]))
30	2x	if (all(from_ids %in% froms)) break
31		}
32	2x	connections[from %in% froms \| to %in% tos, ]
33		}

1		#' Calculate Floating Catchment Area Ratios
2		#'
3		#' Calculate provider (supply) to consumer (demand) ratios within floating catchment areas.
4		#'
5		#' @param consumers Number of consumers (demand); either a vector with consumer amounts (such as population counts),
6		#' or a matrix-like object with a column of IDs (located by \code{consumers_id}) and a column of amounts (located by
7		#' \code{consumers_value}).
8		#' @param providers Number of providers (supply); either a vector with provider amounts (such as number of doctors),
9		#' or a matrix-like object with a column of IDs (located by \code{providers_id}) and a column of amounts (located by
10		#' \code{providers_value}).
11		#' @param cost A matrix-like object of cost associated with each pair of \code{consumers} and \code{providers}
12		#' (such as distance or travel times), with \code{consumers} in rows and \code{providers} in columns.
13		#' \code{cost}'s dimensions should be \code{c(length(consumers), length(providers))} if \code{consumers} and
14		#' \code{providers} are vectors, or will be aligned by name if available (as vector names or in \code{_id} columns).
15		#' If \code{NULL}, coordinate information will be looked for in \code{consumers} and \code{providers} (based on
16		#' \code{consumers_location} and \code{providers_location}), from which to calculate Euclidean distances.
17		#' Costs equal to \code{0} are set to \code{adjust_zeros} by default, and any missing costs are then set to \code{0}
18		#' -- if \code{0} indicates no connection in your \code{cost} matrix, set \code{adjust_zeros} to \code{FALSE}, or
19		#' set \code{0} to \code{NA} in your \code{cost} matrix.
20		#' @param weight Means of defining catchment areas and their topology / friction / impedance. The simplest is a single
21		#' number representing a maximum distance between \code{consumers} and \code{providers} (2-step floating catchment area;
22		#' 2SFCA; Luo & Wang, 2003).
23		#' An enhancement of this is a list of vectors with two values each: the first is a distance, and the second a weight
24		#' to associate with that distance (e.g., \code{list(c(10, 1), c(20, .5))}, which will give consumers within a
25		#' \code{cost} of 10 full weight, and those within a \code{cost} of 20 half weight; enhanced 2-step floating catchment
26		#' area; E2SFCA; Lou & Qi 2009). If a character, refers to a weighting function (kernel density 2-step floating
27		#' catchment area; KD2SFCA; Dai, 2010; in order from most gradual to steepest between costs of \code{1} and \code{6}):
28		#' \tabular{ll}{
29		#' \code{linear} (\code{li}) \tab \code{{w <- (scale - cost) / scale; w[w < 0] <- 0; w}}\cr
30		#' \code{gaussian} (\code{ga}) \tab \code{exp(-cost^2 / (2 * scale ^ 2))}\cr
31		#' \code{d*} (name of a density function; e.g., \code{"dnorm"}) \tab
32		#' \code{weight(cost, 0, scale)}
33		#' \cr
34		#' \code{p*} (name of a distribution function; e.g., \code{"pnorm"}) \tab \code{weight(-cost, 0, scale)}\cr
35		#' \code{gravity} / \code{normal} (\code{gr} or \code{n}) \tab \code{sqrt(1 / cost^scale)}\cr
36		#' \code{logarithmic} (\code{loga}) \tab \code{1 / (1 + log(cost, scale))}\cr
37		#' \code{logistic} (\code{l}) \tab \code{1 / (1 + exp(scale * cost))}\cr
38		#' \code{exponential} (\code{e}) \tab \code{exp(-cost * scale)}\cr
39		#' }
40		#' If a function, this will be passed \code{cost} as its first argument -- its output should be a matrix