| 1 |
#' Calculate Floating Catchment Area Ratios |
|
| 2 |
#' |
|
| 3 |
#' Calculate provider (supply) to consumer (demand) ratios within floating catchment areas. |
|
| 4 |
#' |
|
| 5 |
#' @param consumers Number of consumers (demand); either a vector with consumer amounts (such as population counts), |
|
| 6 |
#' or a matrix-like object with a column of IDs (located by \code{consumers_id}) and a column of amounts (located by
|
|
| 7 |
#' \code{consumers_value}).
|
|
| 8 |
#' @param providers Number of providers (supply); either a vector with provider amounts (such as number of doctors), |
|
| 9 |
#' or a matrix-like object with a column of IDs (located by \code{providers_id}) and a column of amounts (located by
|
|
| 10 |
#' \code{providers_value}).
|
|
| 11 |
#' @param cost A matrix-like object of cost associated with each pair of \code{consumers} and \code{providers}
|
|
| 12 |
#' (such as distance or travel times), with \code{consumers} in rows and \code{providers} in columns.
|
|
| 13 |
#' \code{cost}'s dimensions should be \code{c(length(consumers), length(providers))} if \code{consumers} and
|
|
| 14 |
#' \code{providers} are vectors, or will be aligned by name if available (as vector names or in \code{_id} columns).
|
|
| 15 |
#' If \code{NULL}, coordinate information will be looked for in \code{consumers} and \code{providers} (based on
|
|
| 16 |
#' \code{consumers_location} and \code{providers_location}), from which to calculate Euclidean distances.
|
|
| 17 |
#' Costs equal to \code{0} are set to \code{adjust_zeros} by default, and any missing costs are then set to \code{0}
|
|
| 18 |
#' -- if \code{0} indicates no connection in your \code{cost} matrix, set \code{adjust_zeros} to \code{FALSE}, or
|
|
| 19 |
#' set \code{0} to \code{NA} in your \code{cost} matrix.
|
|
| 20 |
#' @param weight Means of defining catchment areas and their topology / friction / impedance. The simplest is a single |
|
| 21 |
#' number representing a maximum distance between \code{consumers} and \code{providers} (2-step floating catchment area;
|
|
| 22 |
#' 2SFCA; Luo & Wang, 2003). |
|
| 23 |
#' An enhancement of this is a list of vectors with two values each: the first is a distance, and the second a weight |
|
| 24 |
#' to associate with that distance (e.g., \code{list(c(10, 1), c(20, .5))}, which will give consumers within a
|
|
| 25 |
#' \code{cost} of 10 full weight, and those within a \code{cost} of 20 half weight; enhanced 2-step floating catchment
|
|
| 26 |
#' area; E2SFCA; Lou & Qi 2009). If a character, refers to a weighting function (kernel density 2-step floating |
|
| 27 |
#' catchment area; KD2SFCA; Dai, 2010; in order from most gradual to steepest between costs of \code{1} and \code{6}):
|
|
| 28 |
#' \tabular{ll}{
|
|
| 29 |
#' \code{linear} (\code{li}) \tab \code{{w <- (scale - cost) / scale; w[w < 0] <- 0; w}}\cr
|
|
| 30 |
#' \code{gaussian} (\code{ga}) \tab \code{exp(-cost^2 / (2 * scale ^ 2))}\cr
|
|
| 31 |
#' \code{d*} (name of a density function; e.g., \code{"dnorm"}) \tab
|
|
| 32 |
#' \code{weight(cost, 0, scale)}
|
|
| 33 |
#' \cr |
|
| 34 |
#' \code{p*} (name of a distribution function; e.g., \code{"pnorm"}) \tab \code{weight(-cost, 0, scale)}\cr
|
|
| 35 |
#' \code{gravity} / \code{normal} (\code{gr} or \code{n}) \tab \code{sqrt(1 / cost^scale)}\cr
|
|
| 36 |
#' \code{logarithmic} (\code{loga}) \tab \code{1 / (1 + log(cost, scale))}\cr
|
|
| 37 |
#' \code{logistic} (\code{l}) \tab \code{1 / (1 + exp(scale * cost))}\cr
|
|
| 38 |
#' \code{exponential} (\code{e}) \tab \code{exp(-cost * scale)}\cr
|
|
| 39 |
#' } |
|
| 40 |
#' If a function, this will be passed \code{cost} as its first argument -- its output should be a matrix
|
|
| 41 |
#' convertible to a sparse matrix, of the same dimensions as \code{cost}. If a matrix-like object,
|
|
| 42 |
#' this will be converted to a sparse matrix. |
|
| 43 |
#' @param normalize_weight Logical; if \code{TRUE}, weight is row-normalized such that \code{consumers} weights
|
|
| 44 |
#' are spread across \code{providers} in range. This can help correct for the increased weight of \code{consumers}
|
|
| 45 |
#' when they are in range of multiple \code{providers}. Selection weights like this make the difference between 2-
|
|
| 46 |
#' and 3-step floating catchment areas (3SFCA; Wan, Zou, & Sternberg, 2012). |
|
| 47 |
#' @param scale Numeric scaling factor if \code{weight} is the name of a decay function.
|
|
| 48 |
#' @param max_cost Numeric limit on \code{cost}. This is the same as setting \code{weight} to a single value,
|
|
| 49 |
#' or specifying a list of steps as \code{weight} (where the most distant step is effectively \code{max_cost}),
|
|
| 50 |
#' although a single-value weight is exclusive (\code{cost < weight}) where steps are inclusive. This is most useful
|
|
| 51 |
#' when \code{weight} is a weighing function, where \code{max_cost} will trim the tail of the weight distribution.
|
|
| 52 |
#' @param adjust_consumers,adjust_providers A function to adjust weights when applied to \code{consumers} or
|
|
| 53 |
#' \code{providers}; should take the sparse weight matrix as its first argument, and return an adjusted matrix of the
|
|
| 54 |
#' same type. For example, you could square provider weights for the modified 2-step floating catchment area |
|
| 55 |
#' (M2SFCA; Delamater, 2013) with \code{adjust_providers = function(w) w ^ 2}, or standardize both weights for the
|
|
| 56 |
#' balanced floating catchment area (BFCA; Paez, Higgins, & Vivona, 2019) with \code{adjust_consumers = }
|
|
| 57 |
#' \code{function(w) w / rowSums(w)} and \code{adjust_providers = function(w) sweep(w, 2, colSums(w), "/")}.
|
|
| 58 |
#' When weights are adjusted independently in this way, region scores will likely no longer sum to the sum |
|
| 59 |
#' of \code{providers} (fewer than the total number of providers will be distributed).
|
|
| 60 |
#' @param adjust_zeros A number to set real \code{0}s to. Set to \code{FALSE} to prevent \code{0}s from being adjusted.
|
|
| 61 |
#' @param return_type Determines the values that are returned: \code{"original"} (default) for \code{providers}
|
|
| 62 |
#' per \code{consumers} (e.g., how many, likely fractional, doctors are accessible by each person within each region),
|
|
| 63 |
#' \code{"region"} for number of \code{providers} per \code{consumers} entry (\code{consumers * original}; e.g.,
|
|
| 64 |
#' how many doctors are accessible within each region), or \code{"normalized"} for \code{original} divided by
|
|
| 65 |
#' \code{sum(region) / sum(consumers)}. Can also be a number by which to multiply the original values (e.g., \code{1000}
|
|
| 66 |
#' for \code{providers} per 1,000 \code{consumers}). Alternatively \code{"supply"} will return just the
|
|
| 67 |
#' number of resources allocated to each consumer location (total weighted resources within each consumer's catchment area), |
|
| 68 |
#' or \code{"demand"} will return just the population per provider (total weighted population within each provider's catchment area).
|
|
| 69 |
#' @param consumers_commutes A square, consumers source x consumers origin matrix with counts of origins, |
|
| 70 |
#' used to specify multiple possible origins for each consumer location (e.g., consumers living in location 1 |
|
| 71 |
#' may work in locations 1 and 3, so the first row of \code{consumers_commutes} should have values in columns 1 and 3).
|
|
| 72 |
#' This can also be entered in place of \code{consumers}, assuming it includes all consumers (e.g., in a worker commute
|
|
| 73 |
#' matrix, you may need to add non-workers to the diagonal, if they are also consumers). |
|
| 74 |
#' @param consumers_id,consumers_value,consumers_location,providers_id,providers_value,providers_location Column |
|
| 75 |
#' names in \code{consumers} and/or \code{providers} to extract IDs, values, and location data (referring to a single
|
|
| 76 |
#' \code{sf} geometry column, or multiple columns with coordinates). These can also be used to directly enter
|
|
| 77 |
#' ID, value, and/or location vectors (or matrices for location coordinates). |
|
| 78 |
#' @param distance_metric Name of the distance metric to be used, if costs are being calculated from coordinates; |
|
| 79 |
#' defaults to \code{"euclidean"}; see \code{\link[lingmatch]{lma_simets}}.
|
|
| 80 |
#' @param verbose Logical; if \code{TRUE}, will print logs, and the type of floating catchment area that was calculated.
|
|
| 81 |
#' @examples |
|
| 82 |
#' pop <- c(5, 10, 50) |
|
| 83 |
#' doc <- c(50, 100) |
|
| 84 |
#' travel_time <- matrix(c(5, 50, 25, 70, 40, 30), ncol = 2) |
|
| 85 |
#' |
|
| 86 |
#' # 2-step floating catchment area |
|
| 87 |
#' catchment_ratio(pop, doc, travel_time, 30) |
|
| 88 |
#' |
|
| 89 |
#' # kernel density (Gaussian) 2-step floating catchment area |
|
| 90 |
#' catchment_ratio(pop, doc, travel_time, "gaussian") |
|
| 91 |
#' |
|
| 92 |
#' # enhanced 2-step floating catchment area |
|
| 93 |
#' step_weights <- list(c(60, .22), c(40, .68), c(20, 1)) |
|
| 94 |
#' catchment_ratio(pop, doc, travel_time, step_weights) |
|
| 95 |
#' |
|
| 96 |
#' # modified 2-step floating catchment area |
|
| 97 |
#' catchment_ratio(pop, doc, travel_time, step_weights, adjust_providers = function(m) m^2) |
|
| 98 |
#' |
|
| 99 |
#' # balanced 2-step floating catchment area |
|
| 100 |
#' catchment_ratio( |
|
| 101 |
#' pop, doc, travel_time, step_weights, |
|
| 102 |
#' adjust_consumers = function(w) sweep(w, 1, rowSums(w), "/", FALSE), |
|
| 103 |
#' adjust_providers = function(w) sweep(w, 2, colSums(w), "/", FALSE), |
|
| 104 |
#' ) |
|
| 105 |
#' |
|
| 106 |
#' # 3-step floating catchment area |
|
| 107 |
#' catchment_ratio(pop, doc, travel_time, step_weights, normalize_weight = TRUE) |
|
| 108 |
#' |
|
| 109 |
#' # visualized weight functions |
|
| 110 |
#' if (require("splot", quietly = TRUE)) {
|
|
| 111 |
#' cost <- 1:10 |
|
| 112 |
#' scale <- 2 |
|
| 113 |
#' splot(list( |
|
| 114 |
#' linear = (10 - cost) / 10, |
|
| 115 |
#' gaussian = exp(-cost^2 / (2 * scale^2)), |
|
| 116 |
#' dnorm = dnorm(cost, 0, scale), |
|
| 117 |
#' pnorm = pnorm(-cost, 0, scale), |
|
| 118 |
#' gravity = sqrt(1 / cost^scale), |
|
| 119 |
#' logarithmic = 1 / (1 + log(cost, scale)), |
|
| 120 |
#' logistic = 1 / (1 + exp(scale * cost)), |
|
| 121 |
#' exponential = exp(-cost * scale) |
|
| 122 |
#' ) ~ cost, title = "Decay Functions", laby = "Weight", labx = "Cost", lines = "con", note = FALSE) |
|
| 123 |
#' } |
|
| 124 |
#' @return \code{catchment_ratio}: A vector with a ratio (determined by \code{return_type})
|
|
| 125 |
#' for each entry in \code{consumers}. If \code{return_type} is \code{"supply"}, values
|
|
| 126 |
#' will be the number of resources within the location, rather than a ratio. |
|
| 127 |
#' If \code{return_type} is \code{"demand"}, the vector will have an entry for each provider
|
|
| 128 |
#' rather than consumer location, and values will be number of consumers. |
|
| 129 |
#' @references |
|
| 130 |
#' Dai, D. (2010). Black residential segregation, disparities in spatial access to health care facilities, and |
|
| 131 |
#' late-stage breast cancer diagnosis in metropolitan Detroit. \emph{Health & place, 16}, 1038-1052.
|
|
| 132 |
#' doi: \href{https://doi.org/10.1016/j.healthplace.2010.06.012}{10.1016/j.healthplace.2010.06.012}
|
|
| 133 |
#' |
|
| 134 |
#' Delamater, P. L. (2013). Spatial accessibility in suboptimally configured health care systems: a modified |
|
| 135 |
#' two-step floating catchment area (M2SFCA) metric. \emph{Health & place, 24}, 30-43.
|
|
| 136 |
#' doi: \href{https://doi.org/10.1016/j.healthplace.2013.07.012}{10.1016/j.healthplace.2013.07.012}
|
|
| 137 |
#' |
|
| 138 |
#' Lou, W. & Qi, Y. (2009). An enhanced two-step floating catchment area (E2SFCA) method for measuring spatial |
|
| 139 |
#' accessibility to primary care physicians. \emph{Health & Place, 15}, 1100-1107.
|
|
| 140 |
#' doi: \href{https://doi.org/10.1016/j.healthplace.2009.06.002}{10.1016/j.healthplace.2009.06.002}
|
|
| 141 |
#' |
|
| 142 |
#' Luo, W. & Wang, F. (2003). Measures of spatial accessibility to health care in a GIS environment: synthesis |
|
| 143 |
#' and a case study in the Chicago region. \emph{Environment and Planning B: Planning and Design, 30}, 865-884.
|
|
| 144 |
#' doi: \href{https://doi.org/10.1068/b29120}{10.1068/b29120}
|
|
| 145 |
#' |
|
| 146 |
#' Paez, A., Higgins, C. D., & Vivona, S. F. (2019). Demand and level of service inflation in Floating Catchment |
|
| 147 |
#' Area (FCA) methods. \emph{Plos one, 14}, e0218773. doi:
|
|
| 148 |
#' \href{https://doi.org/10.1371/journal.pone.0218773}{10.1371/journal.pone.0218773}
|
|
| 149 |
#' |
|
| 150 |
#' Wan, N., Zou, B., & Sternberg, T. (2012). A three-step floating catchment area method for analyzing spatial |
|
| 151 |
#' access to health services. \emph{International Journal of Geographical Information Science, 26}, 1073-1089.
|
|
| 152 |
#' doi: \href{https://doi.org/10.1080/13658816.2011.624987}{10.1080/13658816.2011.624987}
|
|
| 153 |
#' @export |
|
| 154 | ||
| 155 |
catchment_ratio <- function(consumers = NULL, providers = NULL, cost = NULL, weight = NULL, normalize_weight = FALSE, |
|
| 156 |
scale = 2, max_cost = NULL, adjust_consumers = NULL, adjust_providers = NULL, |
|
| 157 |
adjust_zeros = 1e-6, return_type = "original", consumers_commutes = NULL, consumers_id = "GEOID", |
|
| 158 |
consumers_value = "count", consumers_location = c("X", "Y"), providers_id = "GEOID",
|
|
| 159 |
providers_value = "count", providers_location = c("X", "Y"),
|
|
| 160 |
distance_metric = "euclidean", verbose = FALSE) {
|
|
| 161 | 2x |
if (verbose) cli_rule("Calculating a Floating Catchment Area Ratio")
|
| 162 | 104x |
type <- "" |
| 163 | 104x |
if (is.null(consumers_commutes)) {
|
| 164 | 102x |
if (!missing(consumers_value)) {
|
| 165 | 13x |
dims <- dim(consumers_value) |
| 166 | 13x |
if (!is.null(dims) && dims[1] == dims[2]) {
|
| 167 | 1x |
consumers_commutes <- consumers_value |
| 168 | 1x |
if (is.null(consumers)) consumers <- rowSums(consumers_commutes) |
| 169 |
} |
|
| 170 |
} else {
|
|
| 171 | 89x |
dims <- dim(consumers) |
| 172 | 89x |
if (missing(consumers_id) && missing(consumers_location) && !is.null(dims) && dims[1] == dims[2]) {
|
| 173 | 1x |
consumers_commutes <- consumers |
| 174 | 1x |
consumers <- rowSums(consumers) |
| 175 |
} |
|
| 176 |
} |
|
| 177 |
} |
|
| 178 | 104x |
input_data <- c(is.null(dim(providers)), is.null(dim(consumers))) |
| 179 |
# getting provider and consumer value vectors |
|
| 180 | 104x |
cv <- if (input_data[2]) {
|
| 181 | 43x |
if (is.null(consumers)) {
|
| 182 | 3x |
if (is.numeric(consumers_value)) {
|
| 183 | 3x |
consumers_value |
| 184 |
} else {
|
|
| 185 | ! |
cli_abort("{.arg consumers} is not specified, and {.arg consumers_value} is non-numeric")
|
| 186 |
} |
|
| 187 |
} else {
|
|
| 188 | ! |
if (verbose) cli_alert_info("consumers value: {.arg consumers} vector")
|
| 189 | 40x |
as.numeric(consumers) |
| 190 |
} |
|
| 191 | 104x |
} else if (length(consumers_value) == 1 && consumers_value %in% colnames(consumers)) {
|
| 192 | 1x |
if (verbose) cli_alert_info("consumers value: {.var {consumers_value}} column")
|
| 193 | 60x |
as.numeric(consumers[, consumers_value, drop = TRUE]) |
| 194 |
} else {
|
|
| 195 | 1x |
if (verbose) cli_alert_info("consumers value: {.field 1}")
|
| 196 | 1x |
rep(1, nrow(consumers)) |
| 197 |
} |
|
| 198 | 104x |
cn <- length(cv) |
| 199 | ! |
if (!cn) cli_abort("failed to recognize values in {.arg consumers}")
|
| 200 | 1x |
if (anyNA(cv)) cv[is.na(cv)] <- 0 |
| 201 | 104x |
pv <- if (input_data[1]) {
|
| 202 | 43x |
if (is.null(providers)) {
|
| 203 | 4x |
if (is.numeric(providers_value)) {
|
| 204 | 4x |
providers_value |
| 205 |
} else {
|
|
| 206 | ! |
cli_abort("{.arg providers} is not specified, and {.arg providers_value} is non-numeric")
|
| 207 |
} |
|
| 208 |
} else {
|
|
| 209 | ! |
if (verbose) cli_alert_info("providers value: {.arg providers} vector")
|
| 210 | 39x |
as.numeric(providers) |
| 211 |
} |
|
| 212 | 104x |
} else if (length(providers_value) == 1 && providers_value %in% colnames(providers)) {
|
| 213 | 1x |
if (verbose) cli_alert_info("providers value: {.var {providers_value}} column")
|
| 214 | 9x |
as.numeric(providers[, providers_value, drop = TRUE]) |
| 215 |
} else {
|
|
| 216 | 1x |
if (verbose) cli_alert_info("providers value: {.field 1}")
|
| 217 | 52x |
rep(1, nrow(providers)) |
| 218 |
} |
|
| 219 | ! |
if (!length(pv)) cli_abort("failed to recognize values in {.arg providers}")
|
| 220 | 1x |
if (anyNA(pv)) pv[is.na(pv)] <- 0 |
| 221 |
# getting provider and consumer ids |
|
| 222 | 104x |
cid <- if (input_data[1]) {
|
| 223 | 43x |
if (!missing(consumers_id) && cn == length(consumers_id)) {
|
| 224 | ! |
if (verbose) cli_alert_info("consumers id: {.arg consumers_id} vector")
|
| 225 | 5x |
consumers_id |
| 226 | 38x |
} else if (!is.null(names(consumers))) {
|
| 227 | ! |
if (verbose) cli_alert_info("consumers id: {.arg consumers} names")
|
| 228 | 17x |
names(consumers) |
| 229 | 21x |
} else if (!is.null(colnames(cost))) {
|
| 230 | ! |
if (verbose) cli_alert_info("consumers id: {.arg cost} column names")
|
| 231 | ! |
colnames(cost) |
| 232 |
} else {
|
|
| 233 | ! |
if (verbose) cli_alert_info("consumers id: sequence along consumers value")
|
| 234 | 21x |
seq_along(cv) |
| 235 |
} |
|
| 236 | 104x |
} else if (length(consumers_id) == 1 && consumers_id %in% colnames(consumers)) {
|
| 237 | 1x |
if (verbose) cli_alert_info("consumers id: {.var {consumers_id}} column")
|
| 238 | 9x |
consumers[, consumers_id, drop = TRUE] |
| 239 |
} else {
|
|
| 240 | 1x |
if (verbose) cli_alert_info("consumers id: sequence along consumers value")
|
| 241 | 52x |
seq_along(cv) |
| 242 |
} |
|
| 243 | ! |
if (!length(cid)) cli_abort("failed to recognize IDs in {.arg consumers}")
|
| 244 | 104x |
pid <- if (input_data[1]) {
|
| 245 | 43x |
if (!missing(providers_id) && length(pv) == length(providers_id)) {
|
| 246 | ! |
if (verbose) cli_alert_info("providers id: {.arg providers_id} vector")
|
| 247 | 6x |
providers_id |
| 248 | 37x |
} else if (!is.null(names(providers))) {
|
| 249 | ! |
if (verbose) cli_alert_info("providers id: {.arg providers} names")
|
| 250 | 16x |
names(providers) |
| 251 | 21x |
} else if (!is.null(colnames(cost))) {
|
| 252 | ! |
if (verbose) cli_alert_info("providers id: {.arg cost} column names")
|
| 253 | ! |
colnames(cost) |
| 254 |
} else {
|
|
| 255 | ! |
if (verbose) cli_alert_info("providers id: sequence along providers value")
|
| 256 | 21x |
seq_along(pv) |
| 257 |
} |
|
| 258 | 104x |
} else if (length(providers_id) == 1 && providers_id %in% colnames(providers)) {
|
| 259 | 1x |
if (verbose) cli_alert_info("providers id: {.var {providers_id}} column")
|
| 260 | 9x |
providers[, providers_id, drop = TRUE] |
| 261 |
} else {
|
|
| 262 | 1x |
if (verbose) cli_alert_info("providers id: sequence along providers value")
|
| 263 | 52x |
seq_along(pv) |
| 264 |
} |
|
| 265 | ! |
if (!length(pid)) cli_abort("failed to recognize IDs in {.arg providers}")
|
| 266 | 104x |
if (is.null(cost)) {
|
| 267 | 62x |
if (missing(consumers_location) && !all(consumers_location %in% colnames(consumers))) {
|
| 268 | 7x |
consumers_location <- "geometry" |
| 269 |
} |
|
| 270 | 62x |
if (missing(providers_location) && !all(providers_location %in% colnames(providers))) {
|
| 271 | 7x |
providers_location <- "geometry" |
| 272 |
} |
|
| 273 | 62x |
if ((!is.character(providers_location) || all(providers_location %in% colnames(providers))) && |
| 274 | 62x |
(!is.character(consumers_location) || all(consumers_location %in% colnames(consumers)))) {
|
| 275 | 1x |
if (verbose) cli_bullets("calculating cost from locations...")
|
| 276 | 56x |
ccords <- if (is.character(consumers_location)) {
|
| 277 | 1x |
if (verbose) cli_alert_info("consumers location: {.arg consumers} columns ({.field {consumers_location}})")
|
| 278 | 54x |
consumers[, consumers_location, drop = TRUE] |
| 279 |
} else {
|
|
| 280 | ! |
if (verbose) cli_alert_info("consumers location: {.arg consumers_location}")
|
| 281 | 2x |
consumers_location |
| 282 |
} |
|
| 283 | 56x |
if (any(grepl("^sf", class(ccords)))) {
|
| 284 | 2x |
if (any(grepl("POLY", class(ccords), fixed = TRUE))) {
|
| 285 | ! |
if (verbose) cli_alert_info("calculating centroids of consumers location geometry")
|
| 286 | ! |
ccords <- st_centroid(ccords) |
| 287 |
} |
|
| 288 | ! |
if (verbose) cli_alert_info("using coordinates from consumers location geometry")
|
| 289 | 2x |
ccords <- st_coordinates(ccords) |
| 290 |
} |
|
| 291 | 56x |
pcords <- if (is.character(providers_location)) {
|
| 292 | 1x |
if (verbose) cli_alert_info("providers location: {.arg providers} columns ({.field {providers_location}})")
|
| 293 | 54x |
providers[, providers_location, drop = FALSE] |
| 294 |
} else {
|
|
| 295 | ! |
if (verbose) cli_alert_info("providers location: {.arg providers_location}")
|
| 296 | 2x |
providers_location |
| 297 |
} |
|
| 298 | 56x |
if (any(grepl("^sf", class(pcords)))) {
|
| 299 | 3x |
if (any(grepl("POLY", class(pcords), fixed = TRUE))) {
|
| 300 | ! |
if (verbose) cli_alert_info("calculating centroids of providers location geometry")
|
| 301 | ! |
pcords <- st_centroid(pcords) |
| 302 |
} |
|
| 303 | ! |
if (verbose) cli_alert_info("using coordinates from providers location geometry")
|
| 304 | 3x |
pcords <- st_coordinates(pcords) |
| 305 |
} |
|
| 306 | 56x |
if (ncol(pcords) == ncol(ccords)) {
|
| 307 | 1x |
if (verbose) cli_alert_info(paste("cost: calculated", distance_metric, "distances"))
|
| 308 | 56x |
cost <- 1 / lma_simets(ccords, pcords, metric = distance_metric, pairwise = TRUE) - 1 |
| 309 | 56x |
if (is.null(dim(cost))) {
|
| 310 | 4x |
cost <- t(cost) |
| 311 | 4x |
if (nrow(pcords) == 1) cost <- t(cost) |
| 312 |
} |
|
| 313 | 56x |
dimnames(cost) <- list(cid, pid) |
| 314 |
} else {
|
|
| 315 | ! |
cli_abort( |
| 316 | ! |
"{.arg cost} is NULL, and failed to calculate it from provided locations (differing number of columns)"
|
| 317 |
) |
|
| 318 |
} |
|
| 319 |
} |
|
| 320 | 62x |
if (is.null(cost)) {
|
| 321 | 1x |
if (verbose) cli_alert_info("cost: {.feild 1}")
|
| 322 | 6x |
cost <- sparseMatrix( |
| 323 |
{},
|
|
| 324 |
{},
|
|
| 325 | 6x |
x = 0, |
| 326 | 6x |
dims = c(cn, length(pv)), |
| 327 | 6x |
dimnames = list(cid, pid) |
| 328 |
) |
|
| 329 | 6x |
cost[] <- 1 |
| 330 |
} |
|
| 331 | 42x |
} else if (is.null(dim(cost))) {
|
| 332 | 1x |
cost <- matrix(cost, length(cv)) |
| 333 | 1x |
if (identical(dim(cost), c(cn, length(pv)))) {
|
| 334 | 1x |
dimnames(cost) <- list(cid, pid) |
| 335 | ! |
if (verbose) cli_alert_info("cost: {.arg cost} vector")
|
| 336 |
} else {
|
|
| 337 | ! |
cli_abort("{.arg cost} must be a matrix-like object")
|
| 338 |
} |
|
| 339 | ! |
} else if (verbose) cli_alert_info("cost: {.arg cost} matrix")
|
| 340 | 104x |
w <- catchment_weight( |
| 341 | 104x |
cost, weight, |
| 342 | 104x |
max_cost = max_cost, adjust_zeros = adjust_zeros, scale = scale, |
| 343 | 104x |
normalize_weight = FALSE, verbose = verbose |
| 344 |
) |
|
| 345 | 104x |
if (!is.null(consumers_commutes)) {
|
| 346 | 4x |
dims <- dim(consumers_commutes) |
| 347 | ! |
if (dims[1] != dims[2]) cli_abort("{.arg consumers_commutes} must be a square matrix")
|
| 348 | 4x |
if (dims[1] != cn || (!is.null(colnames(consumers_commutes)) && !identical(pid, colnames(consumers_commutes)))) {
|
| 349 | ! |
if (is.numeric(cid) && dims[1] <= max(cid)) cid <- as.character(cid) |
| 350 | 1x |
if (all(cid %in% colnames(consumers_commutes))) {
|
| 351 | ! |
if (verbose) cli_alert_info("ordering and/or trimming {.arg consumers_commutes} by {.arg consumers} IDs")
|
| 352 | 1x |
consumers_commutes <- consumers_commutes[cid, cid] |
| 353 |
} else {
|
|
| 354 | ! |
cli_abort("{.arg consumers_commutes} could not be resolved with {.arg consumers}")
|
| 355 |
} |
|
| 356 |
} |
|
| 357 | 4x |
noncommuters <- diag(consumers_commutes) / cv |
| 358 | ! |
if (any(cv == 0)) noncommuters[cv == 0] <- 1 |
| 359 | 4x |
diag(consumers_commutes) <- 0 |
| 360 | 4x |
diag(consumers_commutes) <- rowSums(consumers_commutes) |
| 361 | 4x |
wr <- rowSums(consumers_commutes) |
| 362 | 4x |
wr[wr == 0] <- 1 |
| 363 | 4x |
consumers_commutes <- consumers_commutes / wr |
| 364 | 4x |
w_total <- rowSums(w) |
| 365 | 4x |
w_total[w_total == 0] <- 1 |
| 366 | 4x |
w_commutes <- consumers_commutes %*% (w / w_total) * w_total |
| 367 | 4x |
w <- w_commutes * (1 - noncommuters) + w * noncommuters |
| 368 |
} |
|
| 369 |
if ( |
|
| 370 | 104x |
cn != nrow(w) && |
| 371 | 104x |
(cn == ncol(w) || |
| 372 | 104x |
(!is.null(rownames(w)) && !is.null(colnames(w)) && sum(cid %in% colnames(w)) > sum(cid %in% rownames(w))) |
| 373 |
) |
|
| 374 |
) {
|
|
| 375 | ! |
w <- t(w) |
| 376 |
} |
|
| 377 | 104x |
wr <- rowSums(w) |
| 378 | 104x |
if (nrow(w) < cn && !is.null(rownames(w)) && |
| 379 | 104x |
(!all(cid %in% rownames(w)) || !all(cid == rownames(w)))) {
|
| 380 | ! |
if (verbose) cli_alert_info("selected weight rows by consumers ID names")
|
| 381 | 2x |
cid <- as.character(cid) |
| 382 | 2x |
mcs <- cid[!cid %in% rownames(w)] |
| 383 | 2x |
w <- rbind( |
| 384 | 2x |
w, sparseMatrix( |
| 385 |
{},
|
|
| 386 |
{},
|
|
| 387 | 2x |
x = 0, |
| 388 | 2x |
dims = c(length(mcs), ncol(w)), |
| 389 | 2x |
dimnames = list(mcs, pid) |
| 390 |
) |
|
| 391 | 2x |
)[cid, ] |
| 392 |
} |
|
| 393 | 104x |
if (!is.null(colnames(w)) && is.character(pid) && any(su <- pid %in% colnames(w)) && |
| 394 | 104x |
(!all(su) || length(pv) != ncol(w) || !all(pid == colnames(w)))) {
|
| 395 | ! |
if (verbose) cli_alert_info("selected weight columns by providers ID names")
|
| 396 | 6x |
pid <- pid[su] |
| 397 | 6x |
pv <- pv[su] |
| 398 | 6x |
w <- w[, pid] |
| 399 |
} |
|
| 400 | 104x |
if (nrow(w) != cn) {
|
| 401 | 2x |
if (is.numeric(cid) && min(cid) > 0 && max(cid) <= nrow(w)) {
|
| 402 | ! |
if (verbose) cli_alert_info("selected weight rows by consumers ID indices")
|
| 403 | 1x |
} else if (any(su <- cid %in% rownames(w))) {
|
| 404 | ! |
if (verbose) cli_alert_info("selected weight rows by consumers IDs")
|
| 405 | 1x |
cv <- cv[su] |
| 406 | 1x |
cid <- as.character(cid[su]) |
| 407 |
} else {
|
|
| 408 | ! |
cli_abort("consumers values are not the same length as weight rows, and could not be aligned by name")
|
| 409 |
} |
|
| 410 | 2x |
w <- w[cid, ] |
| 411 |
} |
|
| 412 | 104x |
if (ncol(w) != length(pv) && is.numeric(pid)) {
|
| 413 | 3x |
if (min(pid) > 0 && max(pid) <= ncol(w)) {
|
| 414 | ! |
if (verbose) cli_alert_info("selected weight rows by providers ID indices")
|
| 415 |
} else {
|
|
| 416 | 1x |
pid_len <- nchar(colnames(w)[1]) |
| 417 | ! |
if (all(nchar(colnames(w)) == pid_len)) pid <- formatC(pid, width = pid_len, flag = 0) |
| 418 | 1x |
if (any(su <- pid %in% colnames(w))) {
|
| 419 | ! |
if (verbose) cli_alert_info("selected weight rows by providers IDs")
|
| 420 | 1x |
pv <- pv[su] |
| 421 | 1x |
pid <- as.character(pid[su]) |
| 422 |
} else {
|
|
| 423 | ! |
cli_abort("providers IDs are numeric, but are out of range or weights and do not all appear in its column names")
|
| 424 |
} |
|
| 425 |
} |
|
| 426 | 3x |
w <- w[, pid] |
| 427 |
} |
|
| 428 | 104x |
if (!all(dim(w) == c(cn, length(pv)))) {
|
| 429 | ! |
cli_abort("failed to align weight matrix with consumer and provider values")
|
| 430 |
} |
|
| 431 | 104x |
if (normalize_weight) {
|
| 432 |
# adjust weights by selection probability (3-step) |
|
| 433 | 1x |
if (verbose) cli_alert_info("normalizing weight")
|
| 434 | 2x |
type <- paste0(type, "3-step floating catchment area") |
| 435 | 1x |
if (!is.null(rownames(w)) && all(rownames(w) %in% names(wr))) wr <- wr[rownames(w)] |
| 436 | 2x |
wr[wr == 0] <- 1 |
| 437 | 2x |
w <- w * sweep(w, 1, wr, "/", FALSE) |
| 438 |
} else {
|
|
| 439 | 102x |
type <- paste0(type, "2-step floating catchment area") |
| 440 |
} |
|
| 441 | 104x |
provided_return_type <- !missing(return_type) |
| 442 | 104x |
if (is.character(return_type)) {
|
| 443 | 103x |
return_type <- tolower(substr(return_type, 1, 1)) |
| 444 | 103x |
if (return_type == "s") {
|
| 445 | ! |
if (verbose) cli_bullets(c(v = paste("calculated", type, "(resources per consumer location)")))
|
| 446 | 1x |
return(as.numeric(if (is.function(adjust_providers)) {
|
| 447 | ! |
environment(adjust_providers) <- environment() |
| 448 | ! |
adjust_providers(w) |
| 449 |
} else {
|
|
| 450 |
{
|
|
| 451 | 1x |
w |
| 452 | 1x |
} %*% pv |
| 453 |
})) |
|
| 454 |
} |
|
| 455 |
} |
|
| 456 | 103x |
wd <- crossprod(if (is.function(adjust_consumers)) {
|
| 457 | 1x |
environment(adjust_consumers) <- environment() |
| 458 | 1x |
adjust_consumers(w) |
| 459 |
} else {
|
|
| 460 | 102x |
w |
| 461 | 103x |
}, cv) |
| 462 | 103x |
if (return_type == "d") {
|
| 463 | ! |
if (verbose) cli_bullets(c(v = paste("calculated", type, "(consumers per provider location)")))
|
| 464 | 1x |
return(as.numeric(wd)) |
| 465 |
} |
|
| 466 | 102x |
wd[abs(wd) < .Machine$double.eps] <- 1 |
| 467 | 102x |
r <- as.numeric((if (is.function(adjust_providers)) {
|
| 468 | 1x |
environment(adjust_providers) <- environment() |
| 469 | 1x |
adjust_providers(w) |
| 470 |
} else {
|
|
| 471 | 101x |
w |
| 472 | 102x |
}) %*% (pv / wd)) |
| 473 | 102x |
if (provided_return_type) {
|
| 474 | 3x |
if (is.numeric(return_type)) {
|
| 475 | 1x |
r <- r * return_type |
| 476 | 1x |
type <- paste(type, "(resources per", return_type, "consumers)") |
| 477 |
} else {
|
|
| 478 | 2x |
if ("n" == return_type) {
|
| 479 | 1x |
type <- paste(type, "(normalized)") |
| 480 | ! |
if (verbose) cli_alert_info("normalizing ratios")
|
| 481 | 1x |
r <- r / (sum(r * cv) / sum(cv)) |
| 482 | 1x |
} else if ("r" == return_type) {
|
| 483 | 1x |
type <- paste(type, "(resources per region)") |
| 484 | ! |
if (verbose) cli_alert_info("multiplying ratios by consumers value")
|
| 485 | 1x |
r <- r * cv |
| 486 |
} |
|
| 487 |
} |
|
| 488 |
} else {
|
|
| 489 | 99x |
type <- paste(type, "(resources per consumer)") |
| 490 |
} |
|
| 491 | 2x |
if (verbose) cli_bullets(c(v = paste("calculated", type)))
|
| 492 | 102x |
r |
| 493 |
} |
| 1 |
#' @rdname catchment_ratio |
|
| 2 |
#' @examples |
|
| 3 |
#' |
|
| 4 |
#' # gives weight only to costs under 2 |
|
| 5 |
#' catchment_weight(matrix(c(1, 2, 1, 3, 1, 2), 3), 2) |
|
| 6 |
#' @return \code{catchment_weight}: A sparse matrix of weights.
|
|
| 7 |
#' @export |
|
| 8 | ||
| 9 |
catchment_weight <- function(cost, weight = NULL, max_cost = NULL, adjust_zeros = 1e-6, scale = 2, |
|
| 10 |
normalize_weight = FALSE, verbose = FALSE) {
|
|
| 11 | 3x |
if (is.null(dim(cost))) cost <- matrix(cost, ncol = 1) |
| 12 | 2x |
if (is.data.frame(cost)) cost <- as.matrix(cost) |
| 13 | 192x |
if (!is.null(weight) && is.numeric(adjust_zeros)) {
|
| 14 | 183x |
zero_costs <- !is.na(cost) & cost == 0 |
| 15 | 183x |
if (any(zero_costs)) {
|
| 16 | ! |
if (verbose) cli_alert_info(paste("setting non-NA 0s to", adjust_zeros))
|
| 17 | 64x |
cost[zero_costs] <- adjust_zeros |
| 18 | 64x |
rm(zero_costs) |
| 19 |
} |
|
| 20 |
} |
|
| 21 | 11x |
if (anyNA(cost)) cost[is.na(cost)] <- 0 |
| 22 | 192x |
if (is.numeric(weight) && is.null(dim(weight)) && length(weight) > 1) {
|
| 23 | ! |
weight <- matrix(weight, ncol = ncol(cost)) |
| 24 |
} |
|
| 25 | 192x |
if (is.null(weight) && !is.null(max_cost)) {
|
| 26 | ! |
weight <- max_cost |
| 27 | ! |
max_cost <- NULL |
| 28 |
} |
|
| 29 | 192x |
if (is.null(weight)) {
|
| 30 | 2x |
if (verbose) cli_alert_info("weight: cost")
|
| 31 | 8x |
w <- as(cost, "CsparseMatrix") |
| 32 | 184x |
} else if (is.null(dim(weight))) {
|
| 33 | 176x |
if (is.numeric(weight)) {
|
| 34 |
# single buffer value means a uniformly weighted catchment area (original) |
|
| 35 | ! |
if (verbose) cli_alert_info("weight: cost over {.field 0} and under {.field {weight[[1]]}}")
|
| 36 | 152x |
w <- as(cost > 0 & cost < weight[[1]], "CsparseMatrix") * 1 |
| 37 | 24x |
} else if (is.list(weight)) {
|
| 38 |
# list of steps for roughly graded weightings (enhanced) |
|
| 39 | ! |
if (verbose) cli_alert_info("weight: cost over {.field 0} and under steps of {.arg weight}")
|
| 40 | 6x |
weight <- weight[order(-vapply(weight, "[[", 1, 1))] |
| 41 | 6x |
w <- as((cost <= weight[[1]][1]) * weight[[1]][2], "CsparseMatrix") |
| 42 | 6x |
for (s in weight[-1]) w[cost <= s[1]] <- s[2] |
| 43 | 6x |
w[cost <= 0] <- 0 |
| 44 | 18x |
} else if (is.character(weight)) {
|
| 45 |
# name of a weight function (kernel density) |
|
| 46 | ! |
if (verbose) cli_alert_info("weight: {weight} weight function")
|
| 47 | 15x |
weight <- tolower(weight[[1]]) |
| 48 | 15x |
if ("units" %in% class(cost)) {
|
| 49 | ! |
cost <- matrix( |
| 50 | ! |
as.numeric(cost), nrow(cost), ncol(cost), |
| 51 | ! |
dimnames = dimnames(cost) |
| 52 |
) |
|
| 53 |
} |
|
| 54 | 15x |
if (grepl("^(?:gr|n)", weight)) {
|
| 55 |
# gravity / normal kernel |
|
| 56 | 4x |
w <- as(sqrt(1 / cost^scale), "CsparseMatrix") |
| 57 | 11x |
} else if (grepl("^e", weight)) {
|
| 58 |
# exponential kernel |
|
| 59 | 1x |
w <- as(exp(-cost * scale), "CsparseMatrix") |
| 60 | 10x |
} else if (grepl("^loga", weight)) {
|
| 61 |
# logarithmic kernel |
|
| 62 | 1x |
w <- as(1 / (1 + log(cost, scale)), "CsparseMatrix") |
| 63 | 9x |
} else if (grepl("^li", weight)) {
|
| 64 |
# linear kernel |
|
| 65 | 3x |
w <- as((scale - cost) / scale, "CsparseMatrix") |
| 66 | 3x |
w[w < 0] <- 0 |
| 67 | 6x |
} else if (grepl("^l", weight)) {
|
| 68 |
# logistic kernel |
|
| 69 | 1x |
w <- as(1 / (1 + exp(scale * cost)), "CsparseMatrix") |
| 70 | 5x |
} else if (grepl("^ga", weight)) {
|
| 71 |
# Gaussian kernel |
|
| 72 | 2x |
w <- as(exp(-cost^2 / (2 * scale^2)), "CsparseMatrix") |
| 73 | 3x |
} else if (grepl("^d", weight) && exists(weight)) {
|
| 74 |
# assumed to be a density function like dnorm |
|
| 75 | 2x |
w <- as(cost, "CsparseMatrix") |
| 76 | 2x |
w@x <- do.call(weight, list(w@x, 0, scale)) |
| 77 | 1x |
} else if (grepl("^p", weight) && exists(weight)) {
|
| 78 |
# assumed to be a distribution function like pnorm |
|
| 79 | 1x |
w <- as(cost, "CsparseMatrix") |
| 80 | 1x |
w@x <- do.call(weight, list(-w@x, 0, scale)) |
| 81 |
} else {
|
|
| 82 | ! |
cli_abort("{.arg weight} not recognized")
|
| 83 |
} |
|
| 84 | 15x |
w[cost <= 0 | !is.finite(w)] <- 0 |
| 85 | 3x |
} else if (is.function(weight)) {
|
| 86 | ! |
if (verbose) cli_alert_info("weight: custom weight function")
|
| 87 | 3x |
w <- as(weight(cost), "CsparseMatrix") |
| 88 | 3x |
w[cost <= 0 | !is.finite(w)] <- 0 |
| 89 |
} |
|
| 90 |
} else {
|
|
| 91 | ! |
if (verbose) cli_alert_info("weight: {.arg weight}")
|
| 92 | 8x |
w <- as(if (is.data.frame(weight)) as.matrix(weight) else weight, "CsparseMatrix") |
| 93 |
} |
|
| 94 | 192x |
if (!is.null(max_cost)) {
|
| 95 | ! |
if (verbose) cli_alert_info("zerod-out cost over {max_cost}")
|
| 96 | 1x |
w[cost > max_cost] <- 0 |
| 97 |
} |
|
| 98 | 192x |
if (normalize_weight) {
|
| 99 | 1x |
wr <- rowSums(w) |
| 100 | 1x |
wr[wr == 0] <- 1 |
| 101 | 1x |
w <- w * sweep(w, 1, wr, "/") |
| 102 |
} |
|
| 103 | 192x |
w |
| 104 |
} |
| 1 |
#' Download U.S. Census Population Data |
|
| 2 |
#' |
|
| 3 |
#' Download and load U.S. census American Community Survey 5-year summary files at the block group level, |
|
| 4 |
#' optionally including home-work commute statistics. |
|
| 5 |
#' |
|
| 6 |
#' @param dir Directory in which to save the file(s). |
|
| 7 |
#' @param state Name, abbreviation, or FIPS code of the state. |
|
| 8 |
#' @param year 4 digit year, between 2009 and the most recent year. |
|
| 9 |
#' @param include_margins Logical; if \code{TRUE}, will save and include estimate margins or error, which is a
|
|
| 10 |
#' \code{data.frame} with the same dimensions as the estimates \code{data.frame}.
|
|
| 11 |
#' @param include_commutes Logical; if \code{TRUE}, will download the
|
|
| 12 |
#' \href{https://lehd.ces.census.gov}{Longitudinal Employer-Household Dynamics (LEHD)}
|
|
| 13 |
#' \href{https://lehd.ces.census.gov/data/#lodes}{Origin-Destination Employment Statistics (LODES)}
|
|
| 14 |
#' data for the state and block groups within selected tracts. |
|
| 15 |
#' @param counties,tracts,blockgroups A vector of counties, tracts, or block group GEOIDs within the specified state to |
|
| 16 |
#' filter block groups for (block groups within the specified counties or tracts, or matching the specified block group |
|
| 17 |
#' GEOID). Only one can be specified (lower levels overwrite higher levels). Defaults to all block groups. |
|
| 18 |
#' @param overwrite Logical; if \code{TRUE}, will remove any existing files before downloading and saving new versions.
|
|
| 19 |
#' @param verbose Logical; if \code{FALSE}, will not print status messages.
|
|
| 20 |
#' @examples |
|
| 21 |
#' \dontrun{
|
|
| 22 |
#' download_census_population(".", "va")
|
|
| 23 |
#' } |
|
| 24 |
#' @return A list with at least an \code{estimates} entry, and optionally \code{margins} and/or \code{commutes}
|
|
| 25 |
#' entries. The \code{extimates} and \code{margins} entries are \code{data.frame}s with block groups in rows,
|
|
| 26 |
#' population variables in columns, and person counts in cells. The \code{commutes} entry is a sparse matrix with
|
|
| 27 |
#' home block groups in rows, work block groups in columns, and job counts in cells (all jobs, summed across blocks). |
|
| 28 |
#' @export |
|
| 29 | ||
| 30 |
download_census_population <- function(dir, state, year = 2019, include_margins = FALSE, include_commutes = FALSE, |
|
| 31 |
counties = NULL, tracts = NULL, blockgroups = NULL, overwrite = FALSE, verbose = TRUE) {
|
|
| 32 | 1x |
us_fips <- list( |
| 33 | 1x |
name = c( |
| 34 | 1x |
"UnitedStates", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", |
| 35 | 1x |
"Delaware", "DistrictOfColumbia", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", |
| 36 | 1x |
"Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", |
| 37 | 1x |
"Missouri", "Montana", "Nebraska", "Nevada", "NewHampshire", "NewJersey", "NewMexico", "NewYork", |
| 38 | 1x |
"NorthCarolina", "NorthDakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "RhodeIsland", "SouthCarolina", |
| 39 | 1x |
"CouthDakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "WestVirginia", "Wisconsin", |
| 40 | 1x |
"Wyoming", "PuertoRico" |
| 41 |
), |
|
| 42 | 1x |
post = c( |
| 43 | 1x |
"us", "al", "ak", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", "ga", "hi", "id", "il", "in", "ia", "ks", "ky", |
| 44 | 1x |
"la", "me", "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny", "nc", "nd", "oh", "ok", |
| 45 | 1x |
"or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy", "pr" |
| 46 |
), |
|
| 47 | 1x |
fips = c( |
| 48 | 1x |
"us", 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
| 49 | 1x |
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 72 |
| 50 |
) |
|
| 51 |
) |
|
| 52 | 1x |
state <- gsub("^0|[^a-z0-9]", "", tolower(state[[1]]))
|
| 53 | 1x |
fid <- which(us_fips$fips == state) |
| 54 | 1x |
if (!length(fid)) {
|
| 55 | 1x |
fid <- if (nchar(state) == 2) {
|
| 56 | 1x |
which(us_fips$post == state) |
| 57 |
} else {
|
|
| 58 | ! |
pmatch(state, tolower(us_fips$name)) |
| 59 |
} |
|
| 60 | ! |
if (!length(fid) || is.na(fid)) cli_abort("failed to recognize {.arg state}")
|
| 61 |
} |
|
| 62 | 1x |
state <- us_fips$name[fid] |
| 63 | 1x |
state_post <- us_fips$post[fid] |
| 64 | 1x |
display_state <- gsub("([a-z])([A-Z])", "\\1 \\2", state)
|
| 65 | ||
| 66 | 1x |
if (!is.null(dir)) {
|
| 67 | 1x |
dir <- tdir <- paste0(normalizePath(dir, "/", FALSE), "/") |
| 68 | ! |
if (!dir.exists(dir)) dir.create(dir, FALSE, TRUE) |
| 69 |
} else {
|
|
| 70 | ! |
tdir <- paste0(tempdir(), "/") |
| 71 |
} |
|
| 72 | 1x |
folder <- paste0(tdir, state, "_Tracts_Block_Groups_Only/") |
| 73 | 1x |
url <- paste0( |
| 74 | 1x |
"https://www2.census.gov/programs-surveys/acs/summary_file/", year, |
| 75 | 1x |
if (year > 2020) "/sequence-based-SF", "/data/5_year_seq_by_state/", state, |
| 76 | 1x |
"/Tracts_Block_Groups_Only/" |
| 77 |
) |
|
| 78 | 1x |
files_zip <- paste0(year, "5", state_post, "000", 1:3, "000.zip") |
| 79 | 1x |
files <- paste0(folder, "e", year, "5", state_post, "000", 1:3, "000.txt") |
| 80 | 1x |
final_files <- paste0(if (is.null(dir)) tdir else dir, state_post, "_", c( |
| 81 | 1x |
"population", |
| 82 | 1x |
if (include_margins) "population_margins" |
| 83 | 1x |
), "_", year, ".csv") |
| 84 | ! |
if (overwrite) unlink(c(paste0(tdir, files_zip), files, final_files)) |
| 85 | 1x |
if (!all(file.exists(final_files)) && !all(file.exists(paste0(tdir, files_zip)) | file.exists(files))) {
|
| 86 | 1x |
if (verbose) cli_alert_info(paste("downloading", display_state, "American Community Survey summary files"))
|
| 87 | 1x |
for (i in seq_along(files)) {
|
| 88 | 3x |
if (!file.exists(files[i])) {
|
| 89 | 3x |
download.file(paste0(url, files_zip[i]), paste0(tdir, files_zip[i])) |
| 90 |
} |
|
| 91 |
} |
|
| 92 |
} |
|
| 93 | 1x |
if (any(file.exists(paste0(tdir, files_zip)))) {
|
| 94 | 1x |
if (verbose) cli_alert_info(paste("decompressing", display_state, "American Community Survey summary files"))
|
| 95 | 1x |
dir.create(folder, FALSE) |
| 96 | 1x |
for (f in files_zip[file.exists(paste0(tdir, files_zip))]) {
|
| 97 | 3x |
system2("unzip", c("-qd", shQuote(folder), shQuote(paste0(tdir, f))))
|
| 98 | 3x |
unlink(paste0(tdir, f)) |
| 99 |
} |
|
| 100 |
} |
|
| 101 | 1x |
if (!all(file.exists(final_files))) {
|
| 102 | 1x |
if (verbose) cli_alert_info(paste("reformatting", display_state, "American Community Survey summary files"))
|
| 103 | ||
| 104 |
# column names |
|
| 105 | 1x |
headers <- read.csv(paste0( |
| 106 | 1x |
"https://www2.census.gov/programs-surveys/acs/summary_file/", year, |
| 107 | 1x |
if (year > 2020) {
|
| 108 | ! |
"/sequence-based-SF/documentation/user_tools/ACS_5yr_Seq" |
| 109 |
} else {
|
|
| 110 | 1x |
paste0("/documentation/", if (year < 2013) {
|
| 111 | ! |
"5_year/user_tools/Sequence_Number_and" |
| 112 |
} else {
|
|
| 113 | 1x |
"user_tools/ACS_5yr_Seq" |
| 114 |
}) |
|
| 115 | 1x |
}, "_Table_Number_Lookup.txt" |
| 116 | 1x |
), nrows = 2000) |
| 117 | 1x |
headers <- headers[seq_len(grep("ANCESTRY", headers$Table.Title, fixed = TRUE)[1] - 1), ]
|
| 118 | 1x |
title_id <- which(headers$Total.Cells.in.Table != "") |
| 119 | 1x |
names <- sub("^Universe:\\s*", "", headers$Table.Title)
|
| 120 | 1x |
sub_id <- which(grepl(":", names, fixed = TRUE))
|
| 121 | 1x |
headers$name <- sub("\\.$", "", sub("_\\w+_Total", "_Total", gsub("._", "_", paste0(
|
| 122 | 1x |
rep(gsub("[;:(),\\s]+", ".", names[title_id], perl = TRUE), c(title_id[-1], nrow(headers)) - title_id),
|
| 123 |
"_", |
|
| 124 | 1x |
rep(gsub("[;:(),\\s]+", ".", names[sub_id], perl = TRUE), c(sub_id[-1], nrow(headers)) - c(1, sub_id[-1])),
|
| 125 |
"_", |
|
| 126 | 1x |
gsub("[;:(),\\s]+", ".", sub(" years", "", names, fixed = TRUE), perl = TRUE)
|
| 127 | 1x |
), fixed = TRUE))) |
| 128 | 1x |
start_id <- which(!is.na(headers$Start.Position)) |
| 129 | 1x |
offset <- headers$Line.Number - 1 + rep( |
| 130 | 1x |
headers[start_id, "Start.Position"], |
| 131 | 1x |
c(start_id[-1], nrow(headers) + 1) - start_id |
| 132 |
) |
|
| 133 | 1x |
ind_start <- which(!is.na(offset == min(offset, na.rm = TRUE)) & offset == min(offset, na.rm = TRUE)) |
| 134 | 1x |
ind_end <- c(ind_start[-1], length(offset)) - ind_start |
| 135 | 1x |
inds <- lapply(seq_along(ind_start), function(i) {
|
| 136 | 4x |
is <- seq(ind_start[i], ind_end[i]) |
| 137 | 4x |
is[!is.na(offset[is]) & offset[is] %% 1 == 0] |
| 138 |
}) |
|
| 139 | 1x |
subs <- as.numeric(sub("\\D.*$", "", headers$Total.Cells.in.Table))
|
| 140 | 1x |
ind_start <- which( |
| 141 | 1x |
!is.na(headers$Start.Position) & |
| 142 | 1x |
headers$Start.Position == min(headers$Start.Position, na.rm = TRUE) |
| 143 |
) |
|
| 144 | 1x |
ind_end <- c(ind_start[-1] - 1, length(subs)) |
| 145 | 1x |
inds <- lapply(seq_along(ind_start), function(i) {
|
| 146 | 4x |
is <- seq(ind_start[i], ind_end[i]) |
| 147 | 4x |
is[!is.na(headers[is, "Line.Number"]) & headers[is, "Line.Number"] %% 1 == 0] |
| 148 |
}) |
|
| 149 | ||
| 150 |
# row geoids |
|
| 151 | 1x |
rows <- read.csv(paste0( |
| 152 | 1x |
"https://www2.census.gov/programs-surveys/acs/summary_file/", year, |
| 153 | 1x |
if (year > 2020) "/sequence-based-SF", "/data/5_year_seq_by_state/", state, |
| 154 | 1x |
"/Tracts_Block_Groups_Only/g", year, "5", state_post, ".csv" |
| 155 | 1x |
), header = FALSE) |
| 156 | 1x |
rows$GEOID <- sub("\\D.*$", "", paste0(
|
| 157 | 1x |
formatC(rows$V10, width = 2, flag = 0), |
| 158 | 1x |
formatC(rows$V11, width = 3, flag = 0), |
| 159 | 1x |
formatC(rows$V14, width = 6, flag = 0), |
| 160 | 1x |
rows$V15 |
| 161 |
)) |
|
| 162 | ||
| 163 |
# estimates |
|
| 164 | 1x |
estimates <- do.call(cbind, lapply(seq_along(files), function(i) {
|
| 165 | 3x |
os <- c(6, offset[inds[[i]]]) |
| 166 | 3x |
tab <- read.csv( |
| 167 | 3x |
files[i], |
| 168 | 3x |
header = FALSE, colClasses = c(rep("character", 5), rep("numeric", length(os))),
|
| 169 | 3x |
na.string = "." |
| 170 |
) |
|
| 171 | 3x |
colnames(tab)[os] <- c("GEOID", headers[inds[[i]], "name"])
|
| 172 | 3x |
tab$GEOID <- rows[tab$GEOID, "GEOID"] |
| 173 | 3x |
tab[nchar(tab$GEOID) == 12, -seq(1, if (i == 1) 5 else 6)] |
| 174 |
})) |
|
| 175 | ||
| 176 |
# margins |
|
| 177 | 1x |
if (include_margins) {
|
| 178 | 1x |
files_m <- paste0(folder, "m", year, "5", state_post, "000", 1:3, "000.txt") |
| 179 | 1x |
margins <- do.call(cbind, lapply(seq_along(files_m), function(i) {
|
| 180 | 3x |
os <- c(6, offset[inds[[i]]]) |
| 181 | 3x |
tab <- read.csv( |
| 182 | 3x |
files_m[i], |
| 183 | 3x |
header = FALSE, colClasses = c(rep("character", 5), rep("numeric", length(os))),
|
| 184 | 3x |
na.string = "." |
| 185 |
) |
|
| 186 | 3x |
colnames(tab)[os] <- c("GEOID", headers[inds[[i]], "name"])
|
| 187 | 3x |
tab$GEOID <- rows[tab$GEOID, "GEOID"] |
| 188 | 3x |
tab[nchar(tab$GEOID) == 12, -seq(1, if (i == 1) 5 else 6)] |
| 189 |
})) |
|
| 190 |
} |
|
| 191 | 1x |
su <- vapply(estimates, function(v) all(is.na(v)), TRUE) |
| 192 | 1x |
if (any(su)) {
|
| 193 | 1x |
estimates <- estimates[, !su, drop = FALSE] |
| 194 | 1x |
if (include_margins) {
|
| 195 | 1x |
margins <- margins[, !su, drop = FALSE] |
| 196 |
} |
|
| 197 |
} |
|
| 198 | 1x |
su <- !is.na(estimates[, 2]) & estimates[, 2] == 0 |
| 199 | 1x |
if (any(su)) {
|
| 200 | ! |
estimates <- estimates[!su, ] |
| 201 | ! |
if (include_margins) margins <- margins[!su, ] |
| 202 |
} |
|
| 203 |
} else {
|
|
| 204 | ! |
if (verbose) cli_alert_info(paste("loading existing", display_state, "population data"))
|
| 205 | ! |
classes <- c("character", rep("numeric", length(
|
| 206 | ! |
scan(final_files[1], "", sep = ",", nlines = 1, quiet = TRUE) |
| 207 | ! |
) - 1)) |
| 208 | ! |
estimates <- read.csv(final_files[1], colClasses = classes) |
| 209 | ! |
if (include_margins) margins <- read.csv(final_files[2], colClasses = classes) |
| 210 |
} |
|
| 211 |
# commutes |
|
| 212 | 1x |
if (include_commutes) {
|
| 213 | 1x |
file <- paste0(tdir, state_post, "_commutes_", year, ".csv") |
| 214 | 1x |
filegz <- paste0(file, ".gz") |
| 215 | ! |
if (overwrite) unlink(c(file, filegz)) |
| 216 | 1x |
if (!any(file.exists(c(file, filegz)))) {
|
| 217 | 1x |
if (verbose) cli_alert_info(paste("downloading", display_state, "Origin-Destination Employment Statistics file"))
|
| 218 | 1x |
download.file(paste0( |
| 219 | 1x |
"https://lehd.ces.census.gov/data/lodes/LODES7/", state_post, |
| 220 | 1x |
"/od/", state_post, "_od_main_JT00_", year, ".csv.gz" |
| 221 | 1x |
), filegz) |
| 222 |
} |
|
| 223 | 1x |
if (fresh <- file.exists(filegz)) system2("gzip", c("-df", filegz))
|
| 224 | 1x |
check <- file.exists(file) |
| 225 | 1x |
if (check && fresh && Sys.which("openssl")[[1]] != "") {
|
| 226 | 1x |
hashes <- paste(readLines(paste0( |
| 227 | 1x |
"https://lehd.ces.census.gov/data/lodes/LODES7/", state_post, "/lodes_", state_post, ".sha256sum" |
| 228 | 1x |
)), collapse = "") |
| 229 | 1x |
hash_loc <- regexec(paste0("_od_main_JT00_", year), hashes, fixed = TRUE)[[1]]
|
| 230 | 1x |
check <- substr(hashes, hash_loc - 68, hash_loc - 5) == strsplit( |
| 231 | 1x |
system2("openssl", c("dgst", "-sha256", shQuote(file)), TRUE), " ",
|
| 232 | 1x |
fixed = TRUE |
| 233 | 1x |
)[[1]][2] |
| 234 | ! |
if (!check) cli_warn("integrity check failed for {.file {file}}")
|
| 235 |
} |
|
| 236 | 1x |
if (check) {
|
| 237 | 1x |
if (verbose) cli_alert_info(paste0("loading ", if (!fresh) "existing ", display_state, " commutes data"))
|
| 238 | 1x |
commutes <- read.csv(file) |
| 239 | 1x |
if (!is.null(commutes$w_geocode)) {
|
| 240 | 1x |
fresh <- TRUE |
| 241 | 1x |
if (verbose) {
|
| 242 | 1x |
cli_alert_info(paste( |
| 243 | 1x |
"reformatting", display_state, "Origin-Destination Employment Statistics file" |
| 244 |
)) |
|
| 245 |
} |
|
| 246 | 1x |
commutes$w_geocode <- substr(commutes$w_geocode, 1, 12) |
| 247 | 1x |
commutes$h_geocode <- substr(commutes$h_geocode, 1, 12) |
| 248 | 1x |
commutes <- commutes[commutes$w_geocode %in% estimates$GEOID & commutes$h_geocode %in% estimates$GEOID, ] |
| 249 | 1x |
ids <- structure(seq_along(estimates$GEOID), names = estimates$GEOID) |
| 250 | 1x |
edges <- tapply(commutes$S000, paste0(commutes$w_geocode, commutes$h_geocode), sum) |
| 251 | 1x |
commutes <- sparseMatrix( |
| 252 | 1x |
i = ids[substr(names(edges), 13, 24)], |
| 253 | 1x |
j = ids[substr(names(edges), 1, 12)], |
| 254 | 1x |
x = edges, |
| 255 | 1x |
dimnames = list(estimates$GEOID, estimates$GEOID) |
| 256 |
) |
|
| 257 | ! |
} else if (colnames(commutes)[1] == "GEOID") {
|
| 258 | ! |
rownames(commutes) <- colnames(commutes)[-1] <- commutes[, 1] |
| 259 | ! |
commutes <- as(as.matrix(commutes[, -1]), "dgCMatrix") |
| 260 |
} |
|
| 261 |
} |
|
| 262 |
} |
|
| 263 | 1x |
if (!is.null(dir)) {
|
| 264 | 1x |
files <- paste0(dir, state_post, "_", c( |
| 265 | 1x |
"population", |
| 266 | 1x |
if (include_margins) "population_margins", |
| 267 | 1x |
if (include_commutes && check) "commutes" |
| 268 | 1x |
), "_", year, ".csv") |
| 269 | 1x |
if (!all(file.exists(files)) || (include_commutes && fresh && check)) {
|
| 270 | 1x |
if (verbose) cli_alert_info(paste("writing", display_state, "files"))
|
| 271 | 1x |
ck <- c(!file.exists(files[1]), !file.exists(files[2]) && include_margins, include_commutes && fresh && check) |
| 272 | 1x |
if (ck[1]) write.csv(estimates, files[1], row.names = FALSE) |
| 273 | 1x |
if (ck[2]) write.csv(margins, files[2], row.names = FALSE) |
| 274 | 1x |
if (ck[3]) write.csv(cbind(GEOID = rownames(commutes), as.matrix(commutes)), files[3], row.names = FALSE) |
| 275 | 1x |
if (verbose) {
|
| 276 | 1x |
cli_bullets(c( |
| 277 | 1x |
v = paste0("wrote file", if (sum(ck) == 1) ":" else "s:"),
|
| 278 | 1x |
structure(paste0("{.file ", files[ck], "}"), names = rep("*", sum(ck)))
|
| 279 |
)) |
|
| 280 |
} |
|
| 281 |
} |
|
| 282 |
} |
|
| 283 | 1x |
subsetter <- !c(is.null(blockgroups), is.null(tracts), is.null(counties)) |
| 284 | 1x |
if (any(subsetter)) {
|
| 285 | ! |
su <- if (subsetter[1]) {
|
| 286 | ! |
estimates$GEOID %in% blockgroups |
| 287 | ! |
} else if (subsetter[2]) {
|
| 288 | ! |
substr(estimates$GEOID, 1, 11) %in% tracts |
| 289 | ! |
} else if (subsetter[3]) {
|
| 290 | ! |
substr(estimates$GEOID, 1, 5) %in% counties |
| 291 |
} |
|
| 292 | ! |
if (any(su) && !all(su)) {
|
| 293 | ! |
if (verbose) cli_alert_info("subsetting results")
|
| 294 | ! |
estimates <- estimates[su, ] |
| 295 | ! |
if (include_margins) margins <- margins[su, ] |
| 296 |
} else {
|
|
| 297 | ! |
cli_warn("a subset was requested, but no GEOIDs matched")
|
| 298 |
} |
|
| 299 |
} |
|
| 300 | 1x |
invisible(list( |
| 301 | 1x |
estimates = estimates, |
| 302 | 1x |
margins = if (include_margins) margins, |
| 303 | 1x |
commutes = if (include_commutes) commutes |
| 304 |
)) |
|
| 305 |
} |
| 1 |
#' Extract Connections Within Catchment Areas |
|
| 2 |
#' |
|
| 3 |
#' Extract connected consumer (\code{from}) and provider (\code{to}) locations within a catchment area,
|
|
| 4 |
#' as defined by cost and weights. |
|
| 5 |
#' |
|
| 6 |
#' @param from Consumer locations; a matrix-like object with columns containing latitudes and longitudes |
|
| 7 |
#' (identified by \code{from_coords}), and a column of IDs corresponding to rows in \code{cost} (identified by
|
|
| 8 |
#' \code{from_id}). Each identifying column can alternatively contain the associated matrix or vector, and
|
|
| 9 |
#' IDs can be in row names. |
|
| 10 |
#' @param to Provider locations;a matrix-like object with columns containing latitudes and longitudes |
|
| 11 |
#' (identified by \code{to_coords}), and a column of IDs corresponding to columns in \code{cost} (identified by
|
|
| 12 |
#' \code{to_id}). Each identifying column can alternatively contain the associated matrix or vector, and
|
|
| 13 |
#' IDs can be in row names. |
|
| 14 |
#' @param cost A cost matrix, with row names corresponding to IDs of \code{from}, and column names corresponding to
|
|
| 15 |
#' IDs of \code{to}.
|
|
| 16 |
#' @param weight A weight matrix with the same dimensions as \code{cost}, such as returned from
|
|
| 17 |
#' \code{\link{catchment_weight}}.
|
|
| 18 |
#' @param ... Passes arguments to \code{catchment_weight} if \code{weight} is not a weight matrix.
|
|
| 19 |
#' @param return_type Specify whether to return a \code{data.frame} (default) with connection ids, weights, and costs,
|
|
| 20 |
#' an \code{sf} \code{data.frame} (\code{"sf"}) with linestring geometries added for each connection,
|
|
| 21 |
#' or a GeoJSON-formatted list (\code{"list"}).
|
|
| 22 |
#' @param from_id,from_coords,to_id,to_coords Names of ID and coordinate columns in \code{from} and \code{to},
|
|
| 23 |
#' or vectors of IDs and matrices of coordinates. |
|
| 24 |
#' @examples |
|
| 25 |
#' pop <- simulate_catchments() |
|
| 26 |
#' connections <- catchment_connections( |
|
| 27 |
#' pop$consumers, pop$providers, |
|
| 28 |
#' weight = "gaussian", max_cost = 1, |
|
| 29 |
#' return_type = "sf" |
|
| 30 |
#' ) |
|
| 31 |
#' if (require("leaflet", quiet = TRUE)) {
|
|
| 32 |
#' leaflet() |> |
|
| 33 |
#' addPolylines( |
|
| 34 |
#' data = connections, weight = 3, color = "#777", |
|
| 35 |
#' highlightOptions = highlightOptions(color = "#fff", weight = 4), |
|
| 36 |
#' label = ~ paste0("From: ", from, ", To: ", to, ", Weight: ", weight, ", Cost: ", cost)
|
|
| 37 |
#' ) |> |
|
| 38 |
#' addCircles( |
|
| 39 |
#' data = pop$consumers, label = ~ paste0("Consumers: ", count, ", Access: ", access)
|
|
| 40 |
#' ) |> |
|
| 41 |
#' addCircles( |
|
| 42 |
#' data = pop$providers, color = "#000", label = ~ paste("Provider", id)
|
|
| 43 |
#' ) |
|
| 44 |
#' } |
|
| 45 |
#' @return An object with connection, weight, and cost information, with a format depending on \code{return_type}.
|
|
| 46 |
#' @export |
|
| 47 | ||
| 48 |
catchment_connections <- function(from, to, cost = NULL, weight = 1, ..., return_type = "data.frame", |
|
| 49 |
from_id = "GEOID", from_coords = c("X", "Y"), to_id = "GEOID",
|
|
| 50 |
to_coords = c("X", "Y")) {
|
|
| 51 | 17x |
if (missing(from) && !missing(from_coords)) {
|
| 52 | 1x |
from <- from_coords |
| 53 | 1x |
from_coords <- c("X", "Y")
|
| 54 |
} |
|
| 55 | 17x |
if (missing(to) && !missing(to_coords)) {
|
| 56 | 1x |
to <- to_coords |
| 57 | 1x |
to_coords <- c("X", "Y")
|
| 58 |
} |
|
| 59 | 17x |
from_ids <- if (length(from_id) != 1) {
|
| 60 | 1x |
from_id |
| 61 | 17x |
} else if (from_id %in% colnames(from)) {
|
| 62 | 1x |
from[, from_id, drop = TRUE] |
| 63 |
} else {
|
|
| 64 | 1x |
if (is.null(rownames(from))) rownames(from) <- seq_len(nrow(from)) |
| 65 | 15x |
rownames(from) |
| 66 |
} |
|
| 67 | ! |
if (is.null(from_ids)) cli_abort("failed to resolve {.arg from} ids")
|
| 68 | ! |
if (is.numeric(from_ids) && max(from_ids) > nrow(from)) from_ids <- as.character(from_ids) |
| 69 | 17x |
to_ids <- if (length(to_id) != 1) {
|
| 70 | 1x |
to_id |
| 71 | 17x |
} else if (to_id %in% colnames(to)) {
|
| 72 | 1x |
to[, to_id, drop = TRUE] |
| 73 |
} else {
|
|
| 74 | 1x |
if (is.null(rownames(to))) rownames(to) <- seq_len(nrow(to)) |
| 75 | 15x |
rownames(to) |
| 76 |
} |
|
| 77 | ! |
if (is.null(to_id)) cli_abort("failed to resolve {.arg to} ids")
|
| 78 | ! |
if (is.numeric(to_ids) && max(to_ids) > nrow(to)) to_ids <- as.character(to_ids) |
| 79 | 17x |
if (is.character(from_coords) && all(from_coords %in% colnames(from))) {
|
| 80 | 2x |
from <- from[, from_coords, drop = TRUE] |
| 81 | 15x |
} else if (any(grepl("^sfc?$", class(from)))) {
|
| 82 | 15x |
from <- st_coordinates(from) |
| 83 | ! |
} else if (!is.null(dim(from)) && ncol(from) > 1) {
|
| 84 | ! |
from <- from[, 1:2, drop = TRUE] |
| 85 |
} |
|
| 86 | ! |
if (is.null(dim(from)) || ncol(from) != 2) cli_abort("{.arg from} must have 2 columns")
|
| 87 | 17x |
if (!is.numeric(from[, 1]) || !is.numeric(from[, 2])) {
|
| 88 | ! |
cli_abort("{.arg from} columns must all be numeric")
|
| 89 |
} |
|
| 90 | ! |
if (length(from_ids) != nrow(from)) cli_abort("From IDs and coordinates do not align")
|
| 91 | 17x |
if (is.character(to_coords) && all(to_coords %in% colnames(to))) {
|
| 92 | 2x |
to <- to[, to_coords, drop = TRUE] |
| 93 | 15x |
} else if (any(grepl("^sfc?$", class(to)))) {
|
| 94 | 15x |
to <- st_coordinates(to) |
| 95 | ! |
} else if (!is.null(dim(to)) && ncol(to) > 1) {
|
| 96 | ! |
to <- to[, 1:2, drop = TRUE] |
| 97 |
} |
|
| 98 | ! |
if (is.null(dim(to)) || ncol(to) != 2) cli_abort("{.arg to} must have 2 columns")
|
| 99 | 17x |
if (!is.numeric(to[, 1]) || !is.numeric(to[, 2])) {
|
| 100 | ! |
cli_abort("{.arg to} columns must all be numeric")
|
| 101 |
} |
|
| 102 | ! |
if (length(to_ids) != nrow(to)) cli_abort("To IDs and coordinates do not align")
|
| 103 | 17x |
if (is.null(cost)) {
|
| 104 | 12x |
cost <- 1 / lma_simets(from, to, "euclidean", symmetrical = TRUE, pairwise = TRUE) - 1 |
| 105 | 12x |
dimnames(cost) <- list(from_ids, to_ids) |
| 106 |
} |
|
| 107 | 1x |
if (is.null(dim(cost))) cost <- matrix(cost, nrow(from), dimnames = list(from_ids, to_ids)) |
| 108 | 17x |
if ((is.null(dim(weight)) && length(weight) != length(cost)) || length(substitute(...()))) {
|
| 109 | 14x |
weight <- catchment_weight(cost = cost, weight = weight, ...) |
| 110 | 3x |
} else if (is.null(dim(weight))) {
|
| 111 | 1x |
weight <- matrix(weight, nrow(from), dimnames = list(from_ids, to_ids)) |
| 112 | ! |
} else if (any(dim(cost) != dim(weight))) cli_abort("{.arg weight} does not align with {.arg cost}")
|
| 113 | 17x |
if (is.null(rownames(cost))) {
|
| 114 | 2x |
if (nrow(cost) != length(from_ids)) {
|
| 115 | ! |
cli_abort("{.arg cost} does not have row names, and is not the same length as {.arg from}'s ids")
|
| 116 |
} |
|
| 117 | 15x |
} else if (!identical(as.character(from_ids), rownames(cost))) {
|
| 118 | 1x |
if (!all(from_ids %in% rownames(cost))) {
|
| 119 | ! |
cli_abort("not all {.arg from} ids are in {.arg cost}'s rownames")
|
| 120 |
} |
|
| 121 | ! |
if (!identical(rownames(cost), rownames(weight))) rownames(weight) <- rownames(cost) |
| 122 | 1x |
cost <- cost[as.character(from_ids), ] |
| 123 | 1x |
weight <- weight[as.character(from_ids), ] |
| 124 |
} |
|
| 125 | 17x |
if (is.null(colnames(cost))) {
|
| 126 | ! |
if (ncol(cost) != length(to_ids)) {
|
| 127 | ! |
cli_abort("{.arg cost} does not have column names, and is not the same length as {.arg to}'s ids")
|
| 128 |
} |
|
| 129 | 17x |
} else if (!identical(as.character(to_ids), colnames(cost))) {
|
| 130 | 1x |
if (!all(to_ids %in% colnames(cost))) {
|
| 131 | ! |
cli_abort("not all {.arg to} ids are in {.arg cost}'s colnames")
|
| 132 |
} |
|
| 133 | ! |
if (!identical(colnames(cost), colnames(weight))) colnames(weight) <- colnames(cost) |
| 134 | 1x |
cost <- cost[, as.character(to_ids)] |
| 135 | 1x |
weight <- weight[, as.character(to_ids)] |
| 136 |
} |
|
| 137 | 17x |
fcoords <- split(from, from_ids)[from_ids] |
| 138 | 17x |
tcoords <- split(to, to_ids)[to_ids] |
| 139 | 17x |
from_ids <- names(fcoords) |
| 140 | 17x |
to_ids <- names(tcoords) |
| 141 | 17x |
if (grepl("^[slg]", return_type, TRUE)) {
|
| 142 | 2x |
res <- list( |
| 143 | 2x |
type = "FeatureCollection", |
| 144 | 2x |
features = unlist(lapply(unname(which(rowSums(weight != 0) != 0)), function(r) {
|
| 145 | 200x |
su <- which(weight[r, ] != 0) |
| 146 | 200x |
np <- names(su) |
| 147 | 200x |
w <- weight[r, su] |
| 148 | 200x |
wc <- cost[r, su] |
| 149 | 200x |
fc <- as.numeric(fcoords[[r]]) |
| 150 | 200x |
tc <- tcoords[to_ids %in% np] |
| 151 | 200x |
lapply(seq_along(np), function(i) {
|
| 152 | 402x |
list( |
| 153 | 402x |
type = "Feature", |
| 154 | 402x |
properties = list(from = from_ids[[r]], to = np[[i]], weight = w[[i]], cost = wc[[i]]), |
| 155 | 402x |
geometry = list( |
| 156 | 402x |
type = "LineString", |
| 157 | 402x |
coordinates = list(fc, as.numeric(tc[[np[[i]]]])) |
| 158 |
) |
|
| 159 |
) |
|
| 160 |
}) |
|
| 161 | 2x |
}), FALSE) |
| 162 |
) |
|
| 163 | ! |
if (!length(res$features)) cli_abort("there are no connections")
|
| 164 | 2x |
if (grepl("^s", return_type, TRUE)) {
|
| 165 | 1x |
read_sf(toJSON(res, auto_unbox = TRUE), as_tibble = FALSE) |
| 166 |
} else {
|
|
| 167 | 1x |
res |
| 168 |
} |
|
| 169 |
} else {
|
|
| 170 | 15x |
rs <- unname(which(rowSums(weight != 0) != 0)) |
| 171 | ! |
if (!length(rs)) cli_abort("there are no connections")
|
| 172 | 15x |
do.call(rbind, lapply(rs, function(r) {
|
| 173 | 1500x |
su <- which(weight[r, ] != 0) |
| 174 | 1500x |
res <- data.frame( |
| 175 | 1500x |
from = from_ids[[r]], to = names(su), weight = weight[r, su], cost = cost[r, su] |
| 176 |
) |
|
| 177 | 1500x |
rownames(res) <- NULL |
| 178 | 1500x |
res |
| 179 |
})) |
|
| 180 |
} |
|
| 181 |
} |
| 1 |
#' Aggregate Floating Catchment Area Ratios |
|
| 2 |
#' |
|
| 3 |
#' Calculate aggregate sums or means, potentially weighted by number of consumers. |
|
| 4 |
#' |
|
| 5 |
#' @param from A matrix-like object with IDs, values, and number of consumers, or a vector |
|
| 6 |
#' of values. These are the lower-level entities to be aggregated up to entries in \code{to}.
|
|
| 7 |
#' @param to A matrix-like object with IDs and total consumers. These are higher-level |
|
| 8 |
#' entities containing \code{from} entities.
|
|
| 9 |
#' @param id The column name of IDs in \code{from}, or a vector of IDs.
|
|
| 10 |
#' @param value The column name of values in \code{from} or a vector of values to be aggregated.
|
|
| 11 |
#' @param consumers The column name of consumer totals in \code{from}, or a vector
|
|
| 12 |
#' of said totals of the same length as \code{from} IDs.
|
|
| 13 |
#' @param to_id The column name of IDs in \code{to}, or a vector if IDs.
|
|
| 14 |
#' @param to_consumers The column name of consumer totals in \code{to}, or a vector
|
|
| 15 |
#' of said totals of the same length as \code{to} IDs.
|
|
| 16 |
#' @param map A named list, with names as or corresponding to \code{to} IDs, and vectors
|
|
| 17 |
#' containing \code{from} IDs as entries.
|
|
| 18 |
#' @param original_from Logical indicating whether the \code{from} values are original (default).
|
|
| 19 |
#' If \code{from} values are original and \code{consumers} are specified, \code{from} values will be
|
|
| 20 |
#' multiplied by \code{consumers} before aggregation. Set this to \code{FALSE} if this has already been done
|
|
| 21 |
#' (such as if you set \code{return_type} to \code{"region"} in \code{\link{catchment_ratio}}).
|
|
| 22 |
#' @param return_type Determines the values that are returned: \code{"original"} (default) for an average
|
|
| 23 |
#' for each \code{to} ID based on the region sum of \code{consumers}, or specified \code{to_consumers}.
|
|
| 24 |
#' If a number, original scores will be multiplied by the number then averaged like \code{"original"}. If
|
|
| 25 |
#' \code{"region"}, the sum for each \code{to} ID is returned.
|
|
| 26 |
#' @param verbose Logical; if \code{TRUE}, will print a log of the aggregation process.
|
|
| 27 |
#' @examples |
|
| 28 |
#' # lower-level entries prepended with higher-level IDs (like GEOIDs) |
|
| 29 |
#' higher <- data.frame(id = c(100, 110), pop = c(500, 500)) |
|
| 30 |
#' lower <- data.frame( |
|
| 31 |
#' id = paste0(higher$id, 1:5), |
|
| 32 |
#' value = 1:5 / 10, |
|
| 33 |
#' pop = c(50, 10, 100, 40, 30) |
|
| 34 |
#' ) |
|
| 35 |
#' catchment_aggregate( |
|
| 36 |
#' lower, higher, |
|
| 37 |
#' consumers = "pop", id = "id", value = "value", |
|
| 38 |
#' to_id = "id", verbose = TRUE |
|
| 39 |
#' ) |
|
| 40 |
#' |
|
| 41 |
#' # same thing with IDs specified in a map |
|
| 42 |
#' catchment_aggregate( |
|
| 43 |
#' lower, higher, |
|
| 44 |
#' consumers = "pop", id = "id", value = "value", |
|
| 45 |
#' map = list("100" = c(1001, 1003, 1005), "110" = c(1102, 1104)), verbose = TRUE
|
|
| 46 |
#' ) |
|
| 47 |
#' @return A vector with an aggregate value (determined by \code{return_type})
|
|
| 48 |
#' for each entry in \code{to}.
|
|
| 49 |
#' @export |
|
| 50 | ||
| 51 |
catchment_aggregate <- function(from, to = NULL, id = "GEOID", value = "access", consumers = "population", |
|
| 52 |
to_id = id, to_consumers = consumers, map = NULL, original_from = TRUE, |
|
| 53 |
return_type = "original", verbose = FALSE) {
|
|
| 54 | 3x |
if (verbose) cli_rule("Aggregating Values")
|
| 55 | 12x |
if (missing(from)) {
|
| 56 | 1x |
if (!missing(value)) {
|
| 57 | 1x |
from <- value |
| 58 |
} else {
|
|
| 59 | ! |
cli_abort("{.arg from} must be provided")
|
| 60 |
} |
|
| 61 |
} |
|
| 62 | 12x |
fid <- if (length(id) > 1) {
|
| 63 | 1x |
if (verbose) cli_alert_info("from IDs: {.arg id} vector")
|
| 64 | 1x |
id |
| 65 | 12x |
} else if (is.null(dim(from))) {
|
| 66 | ! |
if (is.null(names(from))) cli_abort("{.arg id} must be specified if they are not provided in {.arg from}")
|
| 67 | ! |
if (verbose) cli_alert_info("from IDs: {.arg from} names")
|
| 68 | 2x |
names(from) |
| 69 |
} else {
|
|
| 70 | 2x |
if (verbose) cli_alert_info("from IDs: {.var {id}} column")
|
| 71 | 9x |
from[[id]] |
| 72 |
} |
|
| 73 | ! |
if (is.null(fid)) cli_abort("failed to resolve from IDs")
|
| 74 | 12x |
fv <- if (length(value) > 1) {
|
| 75 | 1x |
if (verbose) cli_alert_info("from values: {.arg value} vector")
|
| 76 | 1x |
value |
| 77 | 12x |
} else if (is.null(dim(from))) {
|
| 78 | ! |
if (verbose) cli_alert_info("from values: entered vector")
|
| 79 | 2x |
from |
| 80 | 12x |
} else if (!is.null(value) && value %in% colnames(from)) {
|
| 81 | 1x |
if (verbose) cli_alert_info("from values: {.var {value}} column")
|
| 82 | 8x |
from[[value]] |
| 83 |
} else {
|
|
| 84 | 1x |
if (verbose) cli_alert_info("from values: {.feild 1}")
|
| 85 | 1x |
rep(1, length(fid)) |
| 86 |
} |
|
| 87 | ! |
if (is.null(fv) || length(fv) != length(fid)) cli_abort("failed to resolve {.arg from} values")
|
| 88 | ||
| 89 | 12x |
n <- length(fv) |
| 90 | ! |
if (length(fid) != n) cli_abort("IDs and values were not the same length")
|
| 91 | 12x |
cv <- if (length(consumers) > 1) {
|
| 92 | ! |
if (verbose) cli_alert_info("consumers: {.arg consumers} vector")
|
| 93 | 2x |
consumers |
| 94 | 12x |
} else if (!is.null(consumers) && consumers %in% colnames(from)) {
|
| 95 | 1x |
if (verbose) cli_alert_info("consumers: {.var {consumers}} column")
|
| 96 | 8x |
from[[consumers]] |
| 97 |
} else {
|
|
| 98 | 2x |
NULL |
| 99 |
} |
|
| 100 | ! |
if (!is.null(cv) && length(cv) != n) cli_abort("{.arg consumers} was not the same length as values")
|
| 101 | ||
| 102 | 9x |
if (original_from && !is.null(cv) && length(cv) == n) fv <- fv * cv |
| 103 |
# `from` values should now be either per-region values or normalized |
|
| 104 | ||
| 105 |
# mapping `from` ids to `to` ids |
|
| 106 | 1x |
if (is.null(to)) to <- if (!missing(to_id) && (!is.character(to_id) || length(to_id) > 1)) to_id else from |
| 107 | 12x |
tid <- if (length(to_id) > 1) {
|
| 108 | 1x |
if (verbose) cli_alert_info("to IDs: {.arg to_id} vector")
|
| 109 | 1x |
to_id |
| 110 | 12x |
} else if (!is.null(to_id) && to_id %in% colnames(to)) {
|
| 111 | 1x |
if (verbose) cli_alert_info("to IDs: {.var {to_id}} column")
|
| 112 | 7x |
to[[to_id]] |
| 113 | 12x |
} else if (length(to) == 1) {
|
| 114 | 3x |
if (is.function(to)) {
|
| 115 | ! |
if (verbose) cli_alert_info("to IDs: {.arg from} IDs converted by {.arg to} function")
|
| 116 | 1x |
to(fid) |
| 117 | 2x |
} else if (is.numeric(to)) {
|
| 118 | 1x |
if (verbose) cli_alert_info(paste("to IDs: first", to, "characters of {.arg from} IDs"))
|
| 119 | 1x |
substr(fid, 1, to) |
| 120 |
} else {
|
|
| 121 | ! |
if (verbose) cli_alert_info("to IDs: {.var {to}} column of {.arg from}")
|
| 122 | 1x |
from[[to]] |
| 123 |
} |
|
| 124 | 12x |
} else if (is.null(dim(to))) {
|
| 125 | ! |
if (verbose) cli_alert_info("to IDs: {.arg to} vector")
|
| 126 | 1x |
to |
| 127 |
} else {
|
|
| 128 | ! |
NULL |
| 129 |
} |
|
| 130 | ! |
if (is.null(tid)) cli_abort("failed to resolve {.arg to} IDs")
|
| 131 | ||
| 132 | 12x |
tcv <- if (length(to_consumers) > 1) {
|
| 133 | ! |
if (verbose) cli_alert_info("to consumers: {.arg to_consumers} vector")
|
| 134 | 4x |
to_consumers |
| 135 | 12x |
} else if (!is.null(to_consumers) && to_consumers %in% colnames(to)) {
|
| 136 | ! |
if (verbose) cli_alert_info("to consumers: {.var {to_consumers}} column")
|
| 137 | 4x |
to[[to_consumers]] |
| 138 |
} else {
|
|
| 139 | 4x |
NULL |
| 140 |
} |
|
| 141 | 12x |
aggregate_consumers <- is.null(tcv) |
| 142 | 12x |
fv <- cbind(fv, if (!aggregate_consumers || is.null(cv)) 1 else cv) |
| 143 | ||
| 144 | 12x |
if (is.list(map)) {
|
| 145 | ! |
if (is.null(names(map))) cli_abort("{.arg map} must have names")
|
| 146 | ! |
if (verbose) cli_alert_info("mapping: by map list")
|
| 147 | ! |
if (is.null(tid)) tid <- names(map) |
| 148 | 1x |
utid <- as.character(unique(tid)) |
| 149 | 1x |
av <- vapply(utid, function(h) {
|
| 150 | 2x |
l <- map[[h]] |
| 151 | 2x |
su <- fid %in% l |
| 152 | 2x |
if (is.null(l) || !length(su)) {
|
| 153 | ! |
return(c(NA, NA)) |
| 154 |
} |
|
| 155 | 2x |
colSums(fv[su, , drop = FALSE], na.rm = TRUE) |
| 156 | 1x |
}, numeric(2)) |
| 157 |
} else {
|
|
| 158 | 11x |
if (length(tid) == n) {
|
| 159 | 1x |
if (verbose) cli_alert_info("mapping: by {.arg to} ID")
|
| 160 | 3x |
fid <- tid |
| 161 | 3x |
tid <- unique(tid) |
| 162 |
} else {
|
|
| 163 | 8x |
tnc <- nchar(tid[1]) |
| 164 | 8x |
if (tnc > nchar(fid[1]) || any(nchar(tid) != tnc)) {
|
| 165 | ! |
if (!any(fid %in% tid)) cli_abort("failed to figure out how to map {.arg from} ids to {.arg to} ids")
|
| 166 | ! |
if (verbose) cli_alert_info("mapping: by ID match")
|
| 167 |
} else {
|
|
| 168 | 2x |
if (verbose) cli_alert_info(paste("mapping: by first", tnc, "character match"))
|
| 169 | 8x |
fid <- substr(fid, 1, tnc) |
| 170 |
} |
|
| 171 |
} |
|
| 172 | 11x |
utid <- as.character(unique(tid)) |
| 173 | 11x |
av <- vapply(utid, function(h) {
|
| 174 | 22x |
su <- fid == h |
| 175 | ! |
if (any(su)) colSums(fv[su, , drop = FALSE], na.rm = TRUE) else c(NA, NA) |
| 176 | 11x |
}, numeric(2)) |
| 177 |
} |
|
| 178 | 12x |
if (length(utid) != length(tid)) {
|
| 179 | ! |
if (verbose) cli_alert_info("to repeated IDs")
|
| 180 | ! |
av <- rbind(unname(av[1, ][tid]), unname(av[2, ][tid])) |
| 181 |
} |
|
| 182 | ||
| 183 | 12x |
if (is.numeric(return_type)) {
|
| 184 | ! |
if (verbose) cli_alert_info(paste("multiplying by", return_type))
|
| 185 | 1x |
av[1, ] <- av[1, ] * return_type |
| 186 |
} |
|
| 187 | 12x |
if (is.character(return_type) && grepl("^r", return_type, TRUE)) {
|
| 188 | 1x |
if (verbose) cli_bullets(c(v = "returning sum of value per {.arg to} ID"))
|
| 189 | 3x |
av[1, ] |
| 190 |
} else {
|
|
| 191 | ! |
if (aggregate_consumers && !is.null(tcv) && ncol(av) != length(tcv)) aggregate_consumers <- FALSE |
| 192 | 9x |
if (verbose) {
|
| 193 | 2x |
cli_bullets(c(v = paste0( |
| 194 | 2x |
"returning sum of value over total {.arg ",
|
| 195 | 2x |
if (aggregate_consumers) "from" else "to", |
| 196 | 2x |
"} consumers per {.arg to} ID"
|
| 197 |
))) |
|
| 198 |
} |
|
| 199 | 9x |
if (aggregate_consumers) {
|
| 200 | 2x |
av[2, av[2, ] == 0] <- 1 |
| 201 | 2x |
av[1, ] / av[2, ] |
| 202 |
} else {
|
|
| 203 | 7x |
tcv[tcv == 0] <- 1 |
| 204 | 7x |
av[1, ] / tcv |
| 205 |
} |
|
| 206 |
} |
|
| 207 |
} |
| 1 |
#' Simulate Catchment Areas |
|
| 2 |
#' |
|
| 3 |
#' Generate a random population of consumers and providers. |
|
| 4 |
#' |
|
| 5 |
#' @param n Number of consumers to generate. |
|
| 6 |
#' @param weight Weight method used on Euclidean distances between consumers and providers. |
|
| 7 |
#' @param ... Passes additional arguments to \code{\link{catchment_weight}}.
|
|
| 8 |
#' @param consumers_range A numeric vector with entries for minimal and maximal consumer counts |
|
| 9 |
#' from which to sample. |
|
| 10 |
#' @param long_center Longitude to randomly place consumers around. |
|
| 11 |
#' @param long_spread Standard deviation of the consumer's longitude distribution. |
|
| 12 |
#' @param lat_center Latitude to randomly place consumers around. |
|
| 13 |
#' @param lat_spread Standard deviation of the consumer's latitude distribution. |
|
| 14 |
#' @param as_sf Logical; if \code{FALSE}, consumer and provider data are returned in regular \code{data.frame}s.
|
|
| 15 |
#' @examples |
|
| 16 |
#' pop <- simulate_catchments() |
|
| 17 |
#' if (require("leaflet", quiet = TRUE)) {
|
|
| 18 |
#' leaflet(pop$providers) |> |
|
| 19 |
#' addCircles( |
|
| 20 |
#' radius = 1e5, color = "#666", stroke = FALSE |
|
| 21 |
#' ) |> |
|
| 22 |
#' addCircles( |
|
| 23 |
#' data = pop$consumers, label = ~ paste0("Consumers: ", count, ", Access: ", access)
|
|
| 24 |
#' ) |> |
|
| 25 |
#' addCircles( |
|
| 26 |
#' color = "#000", label = ~ paste("Provider", id)
|
|
| 27 |
#' ) |
|
| 28 |
#' } |
|
| 29 |
#' @return A list with \code{consumers}, \code{providers}, \code{cost}, and \code{weight} entries.
|
|
| 30 |
#' @export |
|
| 31 | ||
| 32 |
simulate_catchments <- function(n = 100, weight = 1, ..., consumers_range = c(10, 5000), long_center = -99, |
|
| 33 |
long_spread = 1, lat_center = 40, lat_spread = 1, as_sf = TRUE) {
|
|
| 34 |
# roll consumers |
|
| 35 | 4x |
consumers <- data.frame( |
| 36 | 4x |
count = sample(seq(consumers_range[1], consumers_range[2]), n, TRUE), |
| 37 | 4x |
X = rnorm(n, long_center, long_spread), |
| 38 | 4x |
Y = rnorm(n, lat_center, lat_spread) |
| 39 |
) |
|
| 40 | 4x |
locs <- consumers[, c("X", "Y")]
|
| 41 | 4x |
consumers_dist <- 1 / lma_simets(locs, "euclidean", symmetrical = TRUE) - 1 |
| 42 | 4x |
diag(consumers_dist) <- Inf |
| 43 | 4x |
start <- which.max(diag(consumers_dist[, max.col(-consumers_dist)])) |
| 44 | 4x |
diag(consumers_dist) <- 0 |
| 45 | 4x |
providers <- data.frame(id = seq_len(nrow(consumers)), X = 0, Y = 0) |
| 46 |
# placing providers |
|
| 47 | 4x |
for (i in providers$id) {
|
| 48 | 50x |
sel <- c(start, which(consumers_dist[start, ] * catchment_weight( |
| 49 | 50x |
consumers_dist[start, , drop = FALSE], |
| 50 | 50x |
weight, ... |
| 51 | 50x |
) != 0)) |
| 52 | 50x |
providers[i, c("X", "Y")] <- colMeans(locs[sel, , drop = FALSE] + rnorm(length(sel) * 2, 0, .1))
|
| 53 | 50x |
r <- catchment_ratio(consumers, providers[seq_len(i), , drop = FALSE], weight = weight, ...) |
| 54 | 50x |
if (all(r != 0)) {
|
| 55 | 4x |
providers <- providers[seq_len(i), ] |
| 56 | 4x |
break |
| 57 |
} |
|
| 58 | 46x |
start <- which.min(r) |
| 59 |
} |
|
| 60 | 4x |
consumers$access <- r |
| 61 | 4x |
cost <- 1 / lma_simets(consumers[, -1], providers[, -1], "euclidean", symmetrical = TRUE) - 1 |
| 62 | 4x |
list( |
| 63 | 4x |
consumers = if (as_sf) st_as_sf(consumers, coords = c("X", "Y"), crs = 4326) else consumers,
|
| 64 | 4x |
providers = if (as_sf) st_as_sf(providers, coords = c("X", "Y"), crs = 4326) else providers,
|
| 65 | 4x |
cost = cost, weight = catchment_weight(cost, weight, ...) |
| 66 |
) |
|
| 67 |
} |
| 1 |
#' Download and Prepare Map Shapes |
|
| 2 |
#' |
|
| 3 |
#' Download cartographic boundary files from the \href{https://www.census.gov}{U.S. Census Bureau},
|
|
| 4 |
#' and reformat them to simplified GeoJSON files. |
|
| 5 |
#' |
|
| 6 |
#' @param dir Path to a directory in which to save the reformatted files. If these are to be |
|
| 7 |
#' used in a published site, they should be in the \code{"docs"} directory of the site directory. If not specified,
|
|
| 8 |
#' the shapes are downloaded and returned, but not saved. |
|
| 9 |
#' @param fips State Federal Information Processing Standard (FIPS) code, United States Postal Service (USPS) |
|
| 10 |
#' code, or name; see \href{state.txt}{https://www2.census.gov/geo/docs/reference/state.txt}.
|
|
| 11 |
#' @param entity Entity name (e.g., \code{"county"}, \code{"tract"}, or \code{"bg"}); see
|
|
| 12 |
#' \href{2019_file_name_def.pdf}{https://www2.census.gov/geo/tiger/GENZ2019/2019_file_name_def.pdf}
|
|
| 13 |
#' (or appropriate year's file). States and counties are only available at the national level, so if these are |
|
| 14 |
#' requested for a particular state, the national files will be downloaded and subset. |
|
| 15 |
#' @param name Name for the GeoJSON file (without extension) to be written. |
|
| 16 |
#' @param year Year of the shapes, 2013 and after. |
|
| 17 |
#' @param resolution Resolution of the shapes; one of \code{"500k"} (default), \code{"5m"}, or \code{"20m"}.
|
|
| 18 |
#' @param strip_features Logical; if \code{TRUE}, will strip all features other than IDs.
|
|
| 19 |
#' @param simplify A function to simplify shapes with, such as \code{rmapshaper::ms_simplify}. This
|
|
| 20 |
#' function should take the \code{sf} \code{data.frame} as its first argument, and return that object
|
|
| 21 |
#' with its \code{geometry} replaced.
|
|
| 22 |
#' @param ... Passes additional arguments to \code{simplify}; e.g., for \code{rmapshaper::ms_simplify},
|
|
| 23 |
#' you might want to specify \code{keep} and set \code{keep_shapes} to \code{TRUE}.
|
|
| 24 |
#' @param force Logical; if \code{TRUE}, will force a re-download, and overwrite any existing files.
|
|
| 25 |
#' @examples |
|
| 26 |
#' \dontrun{
|
|
| 27 |
#' # download Virginia counties shapes |
|
| 28 |
#' shapes <- download_census_shapes( |
|
| 29 |
#' "docs/shapes", "va", "county", |
|
| 30 |
#' name = "counties", |
|
| 31 |
#' simplify = rmapshaper::ms_simplify, keep_shapes = TRUE |
|
| 32 |
#' ) |
|
| 33 |
#' } |
|
| 34 |
#' @return An \code{sf} \code{data.frame} with a geometry column containing shapes, and other features in columns.
|
|
| 35 |
#' @export |
|
| 36 | ||
| 37 |
download_census_shapes <- function(dir = NULL, fips = "us", entity = "state", name = NULL, year = 2019, |
|
| 38 |
resolution = "500k", strip_features = FALSE, simplify = NULL, ..., force = FALSE) {
|
|
| 39 | 1x |
us_fips <- list( |
| 40 | 1x |
name = c( |
| 41 | 1x |
"united states", "alabama", "alaska", "arizona", "arkansas", "california", "colorado", "connecticut", |
| 42 | 1x |
"delaware", "district of columbia", "florida", "georgia", "hawaii", "idaho", "illinois", "indiana", "iowa", |
| 43 | 1x |
"kansas", "kentucky", "louisiana", "maine", "maryland", "massachusetts", "michigan", "minnesota", "mississippi", |
| 44 | 1x |
"missouri", "montana", "nebraska", "nevada", "new hampshire", "new jersey", "new mexico", "new york", |
| 45 | 1x |
"north carolina", "north dakota", "ohio", "oklahoma", "oregon", "pennsylvania", "rhode island", "south carolina", |
| 46 | 1x |
"south dakota", "tennessee", "texas", "utah", "vermont", "virginia", "washington", "west virginia", "wisconsin", |
| 47 | 1x |
"wyoming", "american samoa", "guam", "northern mariana islands", "puerto rico", "minor outlying islands", |
| 48 | 1x |
"virgin islands" |
| 49 |
), |
|
| 50 | 1x |
post = c( |
| 51 | 1x |
"us", "al", "ak", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", "ga", "hi", "id", "il", "in", "ia", "ks", "ky", |
| 52 | 1x |
"la", "me", "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny", "nc", "nd", "oh", "ok", |
| 53 | 1x |
"or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy", "as", "gu", "mp", "pr", "um", |
| 54 | 1x |
"vi" |
| 55 |
), |
|
| 56 | 1x |
fips = c( |
| 57 | 1x |
"us", 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
| 58 | 1x |
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 60, 66, 69, 72, 74, 78 |
| 59 |
) |
|
| 60 |
) |
|
| 61 | 1x |
entities <- c( |
| 62 | 1x |
"aiannh", "anrc", "bg", "cbsa", "cd116", "cnecta", "concity", "county", "county_within_cd116", |
| 63 | 1x |
"county_within_ua", "cousub", "csa", "division", "elsd", "nation", "necta", "place", "puma10", "region", "scsd", |
| 64 | 1x |
"sldl", "sldu", "state", "subbarrio", "tract", "ua10", "unsd", "zcta510" |
| 65 |
) |
|
| 66 | 1x |
fips <- gsub("^0|[^a-z0-9\\s]", "", tolower(fips[[1]]))
|
| 67 | 1x |
fid <- which(us_fips$fips == fips) |
| 68 | 1x |
if (!length(fid)) {
|
| 69 | 1x |
fid <- if (nchar(fips) == 2) {
|
| 70 | 1x |
which(us_fips$post == fips) |
| 71 |
} else {
|
|
| 72 | ! |
pmatch(fips, us_fips$name) |
| 73 |
} |
|
| 74 | ! |
if (!length(fid) || is.na(fid)) cli_abort("failed to recognize {.arg fips}")
|
| 75 |
} |
|
| 76 | 1x |
fips <- us_fips$fips[fid] |
| 77 | ! |
if (nchar(fips) == 1) fips <- paste0("0", fips)
|
| 78 | ! |
if (nchar(year) < 4) year <- as.numeric(paste0("20", year))
|
| 79 | ! |
if (year < 2013) cli_abort("only years 2013 and after are available from this function")
|
| 80 | 1x |
entity <- tolower(entity) |
| 81 | 1x |
entity <- match.arg(entity, entities) |
| 82 | 1x |
subset <- NULL |
| 83 | 1x |
if (fips != "us" && (entity == "county" || entity == "state")) {
|
| 84 | 1x |
subset <- fips |
| 85 | 1x |
fips <- "us" |
| 86 |
} |
|
| 87 | 1x |
resolution <- tolower(resolution) |
| 88 | 1x |
resolution <- match.arg(resolution, c("500k", "5m", "20m"))
|
| 89 | 1x |
file <- paste0("cb_", year, "_", fips, "_", entity, "_", resolution)
|
| 90 | 1x |
url <- paste0("https://www2.census.gov/geo/tiger/GENZ", year, if (year == 2013) "/" else "/shp/", file, ".zip")
|
| 91 | 1x |
temp <- tempdir() |
| 92 | 1x |
temp_file <- paste0(temp, "/", file, ".zip") |
| 93 | 1x |
out_file <- paste0(dir, "/", if (!is.null(name)) name else file, ".geojson") |
| 94 | ! |
if (force && file.exists(out_file)) unlink(out_file) |
| 95 | 1x |
if (!file.exists(out_file)) {
|
| 96 | ! |
if (!force && file.exists(paste0(temp, "/", file))) temp_file <- paste0(temp, "/", file) |
| 97 | 1x |
if (force || !file.exists(temp_file)) {
|
| 98 | 1x |
download.file(url, temp_file) |
| 99 | 1x |
unzip(temp_file, exdir = paste0(temp, "/", file)) |
| 100 | 1x |
file.remove(temp_file) |
| 101 | 1x |
temp_file <- paste0(temp, "/", file) |
| 102 |
} |
|
| 103 | 1x |
shapes <- st_read(temp_file, quiet = TRUE) |
| 104 | 1x |
if (!is.null(subset)) shapes <- shapes[shapes$STATEFP == subset, ] |
| 105 | 1x |
if (strip_features) shapes <- shapes[, "GEOID", drop = FALSE] |
| 106 | 1x |
if (is.function(simplify)) shapes <- simplify(shapes, ...) |
| 107 | 1x |
if (!is.null(dir)) {
|
| 108 | ! |
if (!file.exists(dir)) dir.create(dir, FALSE, TRUE) |
| 109 | 1x |
st_write(shapes, out_file) |
| 110 |
} |
|
| 111 |
} else {
|
|
| 112 | ! |
shapes <- st_read(out_file, quiet = TRUE) |
| 113 |
} |
|
| 114 | 1x |
invisible(shapes) |
| 115 |
} |
| 1 |
#' Extract a network of consumers and providers |
|
| 2 |
#' |
|
| 3 |
#' Extract an interconnected set of consumers and providers from a set of connections within catchment areas. |
|
| 4 |
#' |
|
| 5 |
#' @param connections A list- or matrix-like object with \code{"from"} and \code{"to"} entries, such as that
|
|
| 6 |
#' returned from \code{\link{catchment_connections}}.
|
|
| 7 |
#' @param from_start,to_start The ID of a \code{from} (consumer) or \code{to} (provider) in \code{connections} from
|
|
| 8 |
#' which to trace out a network. |
|
| 9 |
#' @examples |
|
| 10 |
#' pop <- simulate_catchments() |
|
| 11 |
#' connections <- catchment_connections( |
|
| 12 |
#' pop$consumers, pop$providers, |
|
| 13 |
#' weight = 1, max_cost = .5, |
|
| 14 |
#' ) |
|
| 15 |
#' catchment_network(connections, 1) |
|
| 16 |
#' @return A subsetted version of \code{connections} containing only connections traceable from \code{from_start}
|
|
| 17 |
#' and/or \code{to_start}.
|
|
| 18 |
#' @export |
|
| 19 | ||
| 20 |
catchment_network <- function(connections, from_start = NULL, to_start = NULL) {
|
|
| 21 | ! |
if (any(!c("from", "to") %in% colnames(connections))) cli_abort("{.arg connections} not recognized")
|
| 22 | 2x |
from <- connections[["from"]] |
| 23 | 2x |
to <- connections[["to"]] |
| 24 | 2x |
from_ids <- unique(from) |
| 25 | 2x |
froms <- if (is.null(from_start) && is.null(to_start)) from[1] else from_start |
| 26 | 2x |
tos <- to_start |
| 27 | 2x |
for (i in seq_along(from_ids)) {
|
| 28 | 8x |
tos <- unique(c(tos, to[from %in% froms])) |
| 29 | 8x |
froms <- unique(c(froms, from[to %in% tos])) |
| 30 | 2x |
if (all(from_ids %in% froms)) break |
| 31 |
} |
|
| 32 | 2x |
connections[from %in% froms | to %in% tos, ] |
| 33 |
} |