1 |
#' Calculate Floating Catchment Area Ratios |
|
2 |
#' |
|
3 |
#' Calculate provider (supply) to consumer (demand) ratios within floating catchment areas. |
|
4 |
#' |
|
5 |
#' @param consumers Number of consumers (demand); either a vector with consumer amounts (such as population counts), |
|
6 |
#' or a matrix-like object with a column of IDs (located by \code{consumers_id}) and a column of amounts (located by |
|
7 |
#' \code{consumers_value}). |
|
8 |
#' @param providers Number of providers (supply); either a vector with provider amounts (such as number of doctors), |
|
9 |
#' or a matrix-like object with a column of IDs (located by \code{providers_id}) and a column of amounts (located by |
|
10 |
#' \code{providers_value}). |
|
11 |
#' @param cost A matrix-like object of cost associated with each pair of \code{consumers} and \code{providers} |
|
12 |
#' (such as distance or travel times), with \code{consumers} in rows and \code{providers} in columns. |
|
13 |
#' \code{cost}'s dimensions should be \code{c(length(consumers), length(providers))} if \code{consumers} and |
|
14 |
#' \code{providers} are vectors, or will be aligned by name if available (as vector names or in \code{_id} columns). |
|
15 |
#' If \code{NULL}, coordinate information will be looked for in \code{consumers} and \code{providers} (based on |
|
16 |
#' \code{consumers_location} and \code{providers_location}), from which to calculate Euclidean distances. |
|
17 |
#' Costs equal to \code{0} are set to \code{adjust_zeros} by default, and any missing costs are then set to \code{0} |
|
18 |
#' -- if \code{0} indicates no connection in your \code{cost} matrix, set \code{adjust_zeros} to \code{FALSE}, or |
|
19 |
#' set \code{0} to \code{NA} in your \code{cost} matrix. |
|
20 |
#' @param weight Means of defining catchment areas and their topology / friction / impedance. The simplest is a single |
|
21 |
#' number representing a maximum distance between \code{consumers} and \code{providers} (2-step floating catchment area; |
|
22 |
#' 2SFCA; Luo & Wang, 2003). |
|
23 |
#' An enhancement of this is a list of vectors with two values each: the first is a distance, and the second a weight |
|
24 |
#' to associate with that distance (e.g., \code{list(c(10, 1), c(20, .5))}, which will give consumers within a |
|
25 |
#' \code{cost} of 10 full weight, and those within a \code{cost} of 20 half weight; enhanced 2-step floating catchment |
|
26 |
#' area; E2SFCA; Lou & Qi 2009). If a character, refers to a weighting function (kernel density 2-step floating |
|
27 |
#' catchment area; KD2SFCA; Dai, 2010; in order from most gradual to steepest between costs of \code{1} and \code{6}): |
|
28 |
#' \tabular{ll}{ |
|
29 |
#' \code{linear} (\code{li}) \tab \code{{w <- (scale - cost) / scale; w[w < 0] <- 0; w}}\cr |
|
30 |
#' \code{gaussian} (\code{ga}) \tab \code{exp(-cost^2 / (2 * scale ^ 2))}\cr |
|
31 |
#' \code{d*} (name of a density function; e.g., \code{"dnorm"}) \tab |
|
32 |
#' \code{weight(cost, 0, scale)} |
|
33 |
#' \cr |
|
34 |
#' \code{p*} (name of a distribution function; e.g., \code{"pnorm"}) \tab \code{weight(-cost, 0, scale)}\cr |
|
35 |
#' \code{gravity} / \code{normal} (\code{gr} or \code{n}) \tab \code{sqrt(1 / cost^scale)}\cr |
|
36 |
#' \code{logarithmic} (\code{loga}) \tab \code{1 / (1 + log(cost, scale))}\cr |
|
37 |
#' \code{logistic} (\code{l}) \tab \code{1 / (1 + exp(scale * cost))}\cr |
|
38 |
#' \code{exponential} (\code{e}) \tab \code{exp(-cost * scale)}\cr |
|
39 |
#' } |
|
40 |
#' If a function, this will be passed \code{cost} as its first argument -- its output should be a matrix |
|
41 |
#' convertible to a sparse matrix, of the same dimensions as \code{cost}. If a matrix-like object, |
|
42 |
#' this will be converted to a sparse matrix. |
|
43 |
#' @param normalize_weight Logical; if \code{TRUE}, weight is row-normalized such that \code{consumers} weights |
|
44 |
#' are spread across \code{providers} in range. This can help correct for the increased weight of \code{consumers} |
|
45 |
#' when they are in range of multiple \code{providers}. Selection weights like this make the difference between 2- |
|
46 |
#' and 3-step floating catchment areas (3SFCA; Wan, Zou, & Sternberg, 2012). |
|
47 |
#' @param scale Numeric scaling factor if \code{weight} is the name of a decay function. |
|
48 |
#' @param max_cost Numeric limit on \code{cost}. This is the same as setting \code{weight} to a single value, |
|
49 |
#' or specifying a list of steps as \code{weight} (where the most distant step is effectively \code{max_cost}), |
|
50 |
#' although a single-value weight is exclusive (\code{cost < weight}) where steps are inclusive. This is most useful |
|
51 |
#' when \code{weight} is a weighing function, where \code{max_cost} will trim the tail of the weight distribution. |
|
52 |
#' @param adjust_consumers,adjust_providers A function to adjust weights when applied to \code{consumers} or |
|
53 |
#' \code{providers}; should take the sparse weight matrix as its first argument, and return an adjusted matrix of the |
|
54 |
#' same type. For example, you could square provider weights for the modified 2-step floating catchment area |
|
55 |
#' (M2SFCA; Delamater, 2013) with \code{adjust_providers = function(w) w ^ 2}, or standardize both weights for the |
|
56 |
#' balanced floating catchment area (BFCA; Paez, Higgins, & Vivona, 2019) with \code{adjust_consumers = } |
|
57 |
#' \code{function(w) w / rowSums(w)} and \code{adjust_providers = function(w) sweep(w, 2, colSums(w), "/")}. |
|
58 |
#' When weights are adjusted independently in this way, region scores will likely no longer sum to the sum |
|
59 |
#' of \code{providers} (fewer than the total number of providers will be distributed). |
|
60 |
#' @param adjust_zeros A number to set real \code{0}s to. Set to \code{FALSE} to prevent \code{0}s from being adjusted. |
|
61 |
#' @param return_type Determines the values that are returned: \code{"original"} (default) for \code{providers} |
|
62 |
#' per \code{consumers} (e.g., how many, likely fractional, doctors are accessible by each person within each region), |
|
63 |
#' \code{"region"} for number of \code{providers} per \code{consumers} entry (\code{consumers * original}; e.g., |
|
64 |
#' how many doctors are accessible within each region), or \code{"normalized"} for \code{original} divided by |
|
65 |
#' \code{sum(region) / sum(consumers)}. Can also be a number by which to multiply the original values (e.g., \code{1000} |
|
66 |
#' for \code{providers} per 1,000 \code{consumers}). Alternatively \code{"supply"} will return just the |
|
67 |
#' number of resources allocated to each consumer location (total weighted resources within each consumer's catchment area), |
|
68 |
#' or \code{"demand"} will return just the population per provider (total weighted population within each provider's catchment area). |
|
69 |
#' @param consumers_commutes A square, consumers source x consumers origin matrix with counts of origins, |
|
70 |
#' used to specify multiple possible origins for each consumer location (e.g., consumers living in location 1 |
|
71 |
#' may work in locations 1 and 3, so the first row of \code{consumers_commutes} should have values in columns 1 and 3). |
|
72 |
#' This can also be entered in place of \code{consumers}, assuming it includes all consumers (e.g., in a worker commute |
|
73 |
#' matrix, you may need to add non-workers to the diagonal, if they are also consumers). |
|
74 |
#' @param consumers_id,consumers_value,consumers_location,providers_id,providers_value,providers_location Column |
|
75 |
#' names in \code{consumers} and/or \code{providers} to extract IDs, values, and location data (referring to a single |
|
76 |
#' \code{sf} geometry column, or multiple columns with coordinates). These can also be used to directly enter |
|
77 |
#' ID, value, and/or location vectors (or matrices for location coordinates). |
|
78 |
#' @param distance_metric Name of the distance metric to be used, if costs are being calculated from coordinates; |
|
79 |
#' defaults to \code{"euclidean"}; see \code{\link[lingmatch]{lma_simets}}. |
|
80 |
#' @param verbose Logical; if \code{TRUE}, will print logs, and the type of floating catchment area that was calculated. |
|
81 |
#' @examples |
|
82 |
#' pop <- c(5, 10, 50) |
|
83 |
#' doc <- c(50, 100) |
|
84 |
#' travel_time <- matrix(c(5, 50, 25, 70, 40, 30), ncol = 2) |
|
85 |
#' |
|
86 |
#' # 2-step floating catchment area |
|
87 |
#' catchment_ratio(pop, doc, travel_time, 30) |
|
88 |
#' |
|
89 |
#' # kernel density (Gaussian) 2-step floating catchment area |
|
90 |
#' catchment_ratio(pop, doc, travel_time, "gaussian") |
|
91 |
#' |
|
92 |
#' # enhanced 2-step floating catchment area |
|
93 |
#' step_weights <- list(c(60, .22), c(40, .68), c(20, 1)) |
|
94 |
#' catchment_ratio(pop, doc, travel_time, step_weights) |
|
95 |
#' |
|
96 |
#' # modified 2-step floating catchment area |
|
97 |
#' catchment_ratio(pop, doc, travel_time, step_weights, adjust_providers = function(m) m^2) |
|
98 |
#' |
|
99 |
#' # balanced 2-step floating catchment area |
|
100 |
#' catchment_ratio( |
|
101 |
#' pop, doc, travel_time, step_weights, |
|
102 |
#' adjust_consumers = function(w) sweep(w, 1, rowSums(w), "/", FALSE), |
|
103 |
#' adjust_providers = function(w) sweep(w, 2, colSums(w), "/", FALSE), |
|
104 |
#' ) |
|
105 |
#' |
|
106 |
#' # 3-step floating catchment area |
|
107 |
#' catchment_ratio(pop, doc, travel_time, step_weights, normalize_weight = TRUE) |
|
108 |
#' |
|
109 |
#' # visualized weight functions |
|
110 |
#' if (require("splot", quietly = TRUE)) { |
|
111 |
#' cost <- 1:10 |
|
112 |
#' scale <- 2 |
|
113 |
#' splot(list( |
|
114 |
#' linear = (10 - cost) / 10, |
|
115 |
#' gaussian = exp(-cost^2 / (2 * scale^2)), |
|
116 |
#' dnorm = dnorm(cost, 0, scale), |
|
117 |
#' pnorm = pnorm(-cost, 0, scale), |
|
118 |
#' gravity = sqrt(1 / cost^scale), |
|
119 |
#' logarithmic = 1 / (1 + log(cost, scale)), |
|
120 |
#' logistic = 1 / (1 + exp(scale * cost)), |
|
121 |
#' exponential = exp(-cost * scale) |
|
122 |
#' ) ~ cost, title = "Decay Functions", laby = "Weight", labx = "Cost", lines = "con", note = FALSE) |
|
123 |
#' } |
|
124 |
#' @return \code{catchment_ratio}: A vector with a ratio (determined by \code{return_type}) |
|
125 |
#' for each entry in \code{consumers}. If \code{return_type} is \code{"supply"}, values |
|
126 |
#' will be the number of resources within the location, rather than a ratio. |
|
127 |
#' If \code{return_type} is \code{"demand"}, the vector will have an entry for each provider |
|
128 |
#' rather than consumer location, and values will be number of consumers. |
|
129 |
#' @references |
|
130 |
#' Dai, D. (2010). Black residential segregation, disparities in spatial access to health care facilities, and |
|
131 |
#' late-stage breast cancer diagnosis in metropolitan Detroit. \emph{Health & place, 16}, 1038-1052. |
|
132 |
#' doi: \href{https://doi.org/10.1016/j.healthplace.2010.06.012}{10.1016/j.healthplace.2010.06.012} |
|
133 |
#' |
|
134 |
#' Delamater, P. L. (2013). Spatial accessibility in suboptimally configured health care systems: a modified |
|
135 |
#' two-step floating catchment area (M2SFCA) metric. \emph{Health & place, 24}, 30-43. |
|
136 |
#' doi: \href{https://doi.org/10.1016/j.healthplace.2013.07.012}{10.1016/j.healthplace.2013.07.012} |
|
137 |
#' |
|
138 |
#' Lou, W. & Qi, Y. (2009). An enhanced two-step floating catchment area (E2SFCA) method for measuring spatial |
|
139 |
#' accessibility to primary care physicians. \emph{Health & Place, 15}, 1100-1107. |
|
140 |
#' doi: \href{https://doi.org/10.1016/j.healthplace.2009.06.002}{10.1016/j.healthplace.2009.06.002} |
|
141 |
#' |
|
142 |
#' Luo, W. & Wang, F. (2003). Measures of spatial accessibility to health care in a GIS environment: synthesis |
|
143 |
#' and a case study in the Chicago region. \emph{Environment and Planning B: Planning and Design, 30}, 865-884. |
|
144 |
#' doi: \href{https://doi.org/10.1068/b29120}{10.1068/b29120} |
|
145 |
#' |
|
146 |
#' Paez, A., Higgins, C. D., & Vivona, S. F. (2019). Demand and level of service inflation in Floating Catchment |
|
147 |
#' Area (FCA) methods. \emph{Plos one, 14}, e0218773. doi: |
|
148 |
#' \href{https://doi.org/10.1371/journal.pone.0218773}{10.1371/journal.pone.0218773} |
|
149 |
#' |
|
150 |
#' Wan, N., Zou, B., & Sternberg, T. (2012). A three-step floating catchment area method for analyzing spatial |
|
151 |
#' access to health services. \emph{International Journal of Geographical Information Science, 26}, 1073-1089. |
|
152 |
#' doi: \href{https://doi.org/10.1080/13658816.2011.624987}{10.1080/13658816.2011.624987} |
|
153 |
#' @export |
|
154 | ||
155 |
catchment_ratio <- function(consumers = NULL, providers = NULL, cost = NULL, weight = NULL, normalize_weight = FALSE, |
|
156 |
scale = 2, max_cost = NULL, adjust_consumers = NULL, adjust_providers = NULL, |
|
157 |
adjust_zeros = 1e-6, return_type = "original", consumers_commutes = NULL, consumers_id = "GEOID", |
|
158 |
consumers_value = "count", consumers_location = c("X", "Y"), providers_id = "GEOID", |
|
159 |
providers_value = "count", providers_location = c("X", "Y"), |
|
160 |
distance_metric = "euclidean", verbose = FALSE) { |
|
161 | 2x |
if (verbose) cli_rule("Calculating a Floating Catchment Area Ratio") |
162 | 104x |
type <- "" |
163 | 104x |
if (is.null(consumers_commutes)) { |
164 | 102x |
if (!missing(consumers_value)) { |
165 | 13x |
dims <- dim(consumers_value) |
166 | 13x |
if (!is.null(dims) && dims[1] == dims[2]) { |
167 | 1x |
consumers_commutes <- consumers_value |
168 | 1x |
if (is.null(consumers)) consumers <- rowSums(consumers_commutes) |
169 |
} |
|
170 |
} else { |
|
171 | 89x |
dims <- dim(consumers) |
172 | 89x |
if (missing(consumers_id) && missing(consumers_location) && !is.null(dims) && dims[1] == dims[2]) { |
173 | 1x |
consumers_commutes <- consumers |
174 | 1x |
consumers <- rowSums(consumers) |
175 |
} |
|
176 |
} |
|
177 |
} |
|
178 | 104x |
input_data <- c(is.null(dim(providers)), is.null(dim(consumers))) |
179 |
# getting provider and consumer value vectors |
|
180 | 104x |
cv <- if (input_data[2]) { |
181 | 43x |
if (is.null(consumers)) { |
182 | 3x |
if (is.numeric(consumers_value)) { |
183 | 3x |
consumers_value |
184 |
} else { |
|
185 | ! |
cli_abort("{.arg consumers} is not specified, and {.arg consumers_value} is non-numeric") |
186 |
} |
|
187 |
} else { |
|
188 | ! |
if (verbose) cli_alert_info("consumers value: {.arg consumers} vector") |
189 | 40x |
as.numeric(consumers) |
190 |
} |
|
191 | 104x |
} else if (length(consumers_value) == 1 && consumers_value %in% colnames(consumers)) { |
192 | 1x |
if (verbose) cli_alert_info("consumers value: {.var {consumers_value}} column") |
193 | 60x |
as.numeric(consumers[, consumers_value, drop = TRUE]) |
194 |
} else { |
|
195 | 1x |
if (verbose) cli_alert_info("consumers value: {.field 1}") |
196 | 1x |
rep(1, nrow(consumers)) |
197 |
} |
|
198 | 104x |
cn <- length(cv) |
199 | ! |
if (!cn) cli_abort("failed to recognize values in {.arg consumers}") |
200 | 1x |
if (anyNA(cv)) cv[is.na(cv)] <- 0 |
201 | 104x |
pv <- if (input_data[1]) { |
202 | 43x |
if (is.null(providers)) { |
203 | 4x |
if (is.numeric(providers_value)) { |
204 | 4x |
providers_value |
205 |
} else { |
|
206 | ! |
cli_abort("{.arg providers} is not specified, and {.arg providers_value} is non-numeric") |
207 |
} |
|
208 |
} else { |
|
209 | ! |
if (verbose) cli_alert_info("providers value: {.arg providers} vector") |
210 | 39x |
as.numeric(providers) |
211 |
} |
|
212 | 104x |
} else if (length(providers_value) == 1 && providers_value %in% colnames(providers)) { |
213 | 1x |
if (verbose) cli_alert_info("providers value: {.var {providers_value}} column") |
214 | 9x |
as.numeric(providers[, providers_value, drop = TRUE]) |
215 |
} else { |
|
216 | 1x |
if (verbose) cli_alert_info("providers value: {.field 1}") |
217 | 52x |
rep(1, nrow(providers)) |
218 |
} |
|
219 | ! |
if (!length(pv)) cli_abort("failed to recognize values in {.arg providers}") |
220 | 1x |
if (anyNA(pv)) pv[is.na(pv)] <- 0 |
221 |
# getting provider and consumer ids |
|
222 | 104x |
cid <- if (input_data[1]) { |
223 | 43x |
if (!missing(consumers_id) && cn == length(consumers_id)) { |
224 | ! |
if (verbose) cli_alert_info("consumers id: {.arg consumers_id} vector") |
225 | 5x |
consumers_id |
226 | 38x |
} else if (!is.null(names(consumers))) { |
227 | ! |
if (verbose) cli_alert_info("consumers id: {.arg consumers} names") |
228 | 17x |
names(consumers) |
229 | 21x |
} else if (!is.null(colnames(cost))) { |
230 | ! |
if (verbose) cli_alert_info("consumers id: {.arg cost} column names") |
231 | ! |
colnames(cost) |
232 |
} else { |
|
233 | ! |
if (verbose) cli_alert_info("consumers id: sequence along consumers value") |
234 | 21x |
seq_along(cv) |
235 |
} |
|
236 | 104x |
} else if (length(consumers_id) == 1 && consumers_id %in% colnames(consumers)) { |
237 | 1x |
if (verbose) cli_alert_info("consumers id: {.var {consumers_id}} column") |
238 | 9x |
consumers[, consumers_id, drop = TRUE] |
239 |
} else { |
|
240 | 1x |
if (verbose) cli_alert_info("consumers id: sequence along consumers value") |
241 | 52x |
seq_along(cv) |
242 |
} |
|
243 | ! |
if (!length(cid)) cli_abort("failed to recognize IDs in {.arg consumers}") |
244 | 104x |
pid <- if (input_data[1]) { |
245 | 43x |
if (!missing(providers_id) && length(pv) == length(providers_id)) { |
246 | ! |
if (verbose) cli_alert_info("providers id: {.arg providers_id} vector") |
247 | 6x |
providers_id |
248 | 37x |
} else if (!is.null(names(providers))) { |
249 | ! |
if (verbose) cli_alert_info("providers id: {.arg providers} names") |
250 | 16x |
names(providers) |
251 | 21x |
} else if (!is.null(colnames(cost))) { |
252 | ! |
if (verbose) cli_alert_info("providers id: {.arg cost} column names") |
253 | ! |
colnames(cost) |
254 |
} else { |
|
255 | ! |
if (verbose) cli_alert_info("providers id: sequence along providers value") |
256 | 21x |
seq_along(pv) |
257 |
} |
|
258 | 104x |
} else if (length(providers_id) == 1 && providers_id %in% colnames(providers)) { |
259 | 1x |
if (verbose) cli_alert_info("providers id: {.var {providers_id}} column") |
260 | 9x |
providers[, providers_id, drop = TRUE] |
261 |
} else { |
|
262 | 1x |
if (verbose) cli_alert_info("providers id: sequence along providers value") |
263 | 52x |
seq_along(pv) |
264 |
} |
|
265 | ! |
if (!length(pid)) cli_abort("failed to recognize IDs in {.arg providers}") |
266 | 104x |
if (is.null(cost)) { |
267 | 62x |
if (missing(consumers_location) && !all(consumers_location %in% colnames(consumers))) { |
268 | 7x |
consumers_location <- "geometry" |
269 |
} |
|
270 | 62x |
if (missing(providers_location) && !all(providers_location %in% colnames(providers))) { |
271 | 7x |
providers_location <- "geometry" |
272 |
} |
|
273 | 62x |
if ((!is.character(providers_location) || all(providers_location %in% colnames(providers))) && |
274 | 62x |
(!is.character(consumers_location) || all(consumers_location %in% colnames(consumers)))) { |
275 | 1x |
if (verbose) cli_bullets("calculating cost from locations...") |
276 | 56x |
ccords <- if (is.character(consumers_location)) { |
277 | 1x |
if (verbose) cli_alert_info("consumers location: {.arg consumers} columns ({.field {consumers_location}})") |
278 | 54x |
consumers[, consumers_location, drop = TRUE] |
279 |
} else { |
|
280 | ! |
if (verbose) cli_alert_info("consumers location: {.arg consumers_location}") |
281 | 2x |
consumers_location |
282 |
} |
|
283 | 56x |
if (any(grepl("^sf", class(ccords)))) { |
284 | 2x |
if (any(grepl("POLY", class(ccords), fixed = TRUE))) { |
285 | ! |
if (verbose) cli_alert_info("calculating centroids of consumers location geometry") |
286 | ! |
ccords <- st_centroid(ccords) |
287 |
} |
|
288 | ! |
if (verbose) cli_alert_info("using coordinates from consumers location geometry") |
289 | 2x |
ccords <- st_coordinates(ccords) |
290 |
} |
|
291 | 56x |
pcords <- if (is.character(providers_location)) { |
292 | 1x |
if (verbose) cli_alert_info("providers location: {.arg providers} columns ({.field {providers_location}})") |
293 | 54x |
providers[, providers_location, drop = FALSE] |
294 |
} else { |
|
295 | ! |
if (verbose) cli_alert_info("providers location: {.arg providers_location}") |
296 | 2x |
providers_location |
297 |
} |
|
298 | 56x |
if (any(grepl("^sf", class(pcords)))) { |
299 | 3x |
if (any(grepl("POLY", class(pcords), fixed = TRUE))) { |
300 | ! |
if (verbose) cli_alert_info("calculating centroids of providers location geometry") |
301 | ! |
pcords <- st_centroid(pcords) |
302 |
} |
|
303 | ! |
if (verbose) cli_alert_info("using coordinates from providers location geometry") |
304 | 3x |
pcords <- st_coordinates(pcords) |
305 |
} |
|
306 | 56x |
if (ncol(pcords) == ncol(ccords)) { |
307 | 1x |
if (verbose) cli_alert_info(paste("cost: calculated", distance_metric, "distances")) |
308 | 56x |
cost <- 1 / lma_simets(ccords, pcords, metric = distance_metric, pairwise = TRUE) - 1 |
309 | 56x |
if (is.null(dim(cost))) { |
310 | 4x |
cost <- t(cost) |
311 | 4x |
if (nrow(pcords) == 1) cost <- t(cost) |
312 |
} |
|
313 | 56x |
dimnames(cost) <- list(cid, pid) |
314 |
} else { |
|
315 | ! |
cli_abort( |
316 | ! |
"{.arg cost} is NULL, and failed to calculate it from provided locations (differing number of columns)" |
317 |
) |
|
318 |
} |
|
319 |
} |
|
320 | 62x |
if (is.null(cost)) { |
321 | 1x |
if (verbose) cli_alert_info("cost: {.feild 1}") |
322 | 6x |
cost <- sparseMatrix( |
323 |
{}, |
|
324 |
{}, |
|
325 | 6x |
x = 0, |
326 | 6x |
dims = c(cn, length(pv)), |
327 | 6x |
dimnames = list(cid, pid) |
328 |
) |
|
329 | 6x |
cost[] <- 1 |
330 |
} |
|
331 | 42x |
} else if (is.null(dim(cost))) { |
332 | 1x |
cost <- matrix(cost, length(cv)) |
333 | 1x |
if (identical(dim(cost), c(cn, length(pv)))) { |
334 | 1x |
dimnames(cost) <- list(cid, pid) |
335 | ! |
if (verbose) cli_alert_info("cost: {.arg cost} vector") |
336 |
} else { |
|
337 | ! |
cli_abort("{.arg cost} must be a matrix-like object") |
338 |
} |
|
339 | ! |
} else if (verbose) cli_alert_info("cost: {.arg cost} matrix") |
340 | 104x |
w <- catchment_weight( |
341 | 104x |
cost, weight, |
342 | 104x |
max_cost = max_cost, adjust_zeros = adjust_zeros, scale = scale, |
343 | 104x |
normalize_weight = FALSE, verbose = verbose |
344 |
) |
|
345 | 104x |
if (!is.null(consumers_commutes)) { |
346 | 4x |
dims <- dim(consumers_commutes) |
347 | ! |
if (dims[1] != dims[2]) cli_abort("{.arg consumers_commutes} must be a square matrix") |
348 | 4x |
if (dims[1] != cn || (!is.null(colnames(consumers_commutes)) && !identical(pid, colnames(consumers_commutes)))) { |
349 | ! |
if (is.numeric(cid) && dims[1] <= max(cid)) cid <- as.character(cid) |
350 | 1x |
if (all(cid %in% colnames(consumers_commutes))) { |
351 | ! |
if (verbose) cli_alert_info("ordering and/or trimming {.arg consumers_commutes} by {.arg consumers} IDs") |
352 | 1x |
consumers_commutes <- consumers_commutes[cid, cid] |
353 |
} else { |
|
354 | ! |
cli_abort("{.arg consumers_commutes} could not be resolved with {.arg consumers}") |
355 |
} |
|
356 |
} |
|
357 | 4x |
noncommuters <- diag(consumers_commutes) / cv |
358 | ! |
if (any(cv == 0)) noncommuters[cv == 0] <- 1 |
359 | 4x |
diag(consumers_commutes) <- 0 |
360 | 4x |
diag(consumers_commutes) <- rowSums(consumers_commutes) |
361 | 4x |
wr <- rowSums(consumers_commutes) |
362 | 4x |
wr[wr == 0] <- 1 |
363 | 4x |
consumers_commutes <- consumers_commutes / wr |
364 | 4x |
w_total <- rowSums(w) |
365 | 4x |
w_total[w_total == 0] <- 1 |
366 | 4x |
w_commutes <- consumers_commutes %*% (w / w_total) * w_total |
367 | 4x |
w <- w_commutes * (1 - noncommuters) + w * noncommuters |
368 |
} |
|
369 |
if ( |
|
370 | 104x |
cn != nrow(w) && |
371 | 104x |
(cn == ncol(w) || |
372 | 104x |
(!is.null(rownames(w)) && !is.null(colnames(w)) && sum(cid %in% colnames(w)) > sum(cid %in% rownames(w))) |
373 |
) |
|
374 |
) { |
|
375 | ! |
w <- t(w) |
376 |
} |
|
377 | 104x |
wr <- rowSums(w) |
378 | 104x |
if (nrow(w) < cn && !is.null(rownames(w)) && |
379 | 104x |
(!all(cid %in% rownames(w)) || !all(cid == rownames(w)))) { |
380 | ! |
if (verbose) cli_alert_info("selected weight rows by consumers ID names") |
381 | 2x |
cid <- as.character(cid) |
382 | 2x |
mcs <- cid[!cid %in% rownames(w)] |
383 | 2x |
w <- rbind( |
384 | 2x |
w, sparseMatrix( |
385 |
{}, |
|
386 |
{}, |
|
387 | 2x |
x = 0, |
388 | 2x |
dims = c(length(mcs), ncol(w)), |
389 | 2x |
dimnames = list(mcs, pid) |
390 |
) |
|
391 | 2x |
)[cid, ] |
392 |
} |
|
393 | 104x |
if (!is.null(colnames(w)) && is.character(pid) && any(su <- pid %in% colnames(w)) && |
394 | 104x |
(!all(su) || length(pv) != ncol(w) || !all(pid == colnames(w)))) { |
395 | ! |
if (verbose) cli_alert_info("selected weight columns by providers ID names") |
396 | 6x |
pid <- pid[su] |
397 | 6x |
pv <- pv[su] |
398 | 6x |
w <- w[, pid] |
399 |
} |
|
400 | 104x |
if (nrow(w) != cn) { |
401 | 2x |
if (is.numeric(cid) && min(cid) > 0 && max(cid) <= nrow(w)) { |
402 | ! |
if (verbose) cli_alert_info("selected weight rows by consumers ID indices") |
403 | 1x |
} else if (any(su <- cid %in% rownames(w))) { |
404 | ! |
if (verbose) cli_alert_info("selected weight rows by consumers IDs") |
405 | 1x |
cv <- cv[su] |
406 | 1x |
cid <- as.character(cid[su]) |
407 |
} else { |
|
408 | ! |
cli_abort("consumers values are not the same length as weight rows, and could not be aligned by name") |
409 |
} |
|
410 | 2x |
w <- w[cid, ] |
411 |
} |
|
412 | 104x |
if (ncol(w) != length(pv) && is.numeric(pid)) { |
413 | 3x |
if (min(pid) > 0 && max(pid) <= ncol(w)) { |
414 | ! |
if (verbose) cli_alert_info("selected weight rows by providers ID indices") |
415 |
} else { |
|
416 | 1x |
pid_len <- nchar(colnames(w)[1]) |
417 | ! |
if (all(nchar(colnames(w)) == pid_len)) pid <- formatC(pid, width = pid_len, flag = 0) |
418 | 1x |
if (any(su <- pid %in% colnames(w))) { |
419 | ! |
if (verbose) cli_alert_info("selected weight rows by providers IDs") |
420 | 1x |
pv <- pv[su] |
421 | 1x |
pid <- as.character(pid[su]) |
422 |
} else { |
|
423 | ! |
cli_abort("providers IDs are numeric, but are out of range or weights and do not all appear in its column names") |
424 |
} |
|
425 |
} |
|
426 | 3x |
w <- w[, pid] |
427 |
} |
|
428 | 104x |
if (!all(dim(w) == c(cn, length(pv)))) { |
429 | ! |
cli_abort("failed to align weight matrix with consumer and provider values") |
430 |
} |
|
431 | 104x |
if (normalize_weight) { |
432 |
# adjust weights by selection probability (3-step) |
|
433 | 1x |
if (verbose) cli_alert_info("normalizing weight") |
434 | 2x |
type <- paste0(type, "3-step floating catchment area") |
435 | 1x |
if (!is.null(rownames(w)) && all(rownames(w) %in% names(wr))) wr <- wr[rownames(w)] |
436 | 2x |
wr[wr == 0] <- 1 |
437 | 2x |
w <- w * sweep(w, 1, wr, "/", FALSE) |
438 |
} else { |
|
439 | 102x |
type <- paste0(type, "2-step floating catchment area") |
440 |
} |
|
441 | 104x |
provided_return_type <- !missing(return_type) |
442 | 104x |
if (is.character(return_type)) { |
443 | 103x |
return_type <- tolower(substr(return_type, 1, 1)) |
444 | 103x |
if (return_type == "s") { |
445 | ! |
if (verbose) cli_bullets(c(v = paste("calculated", type, "(resources per consumer location)"))) |
446 | 1x |
return(as.numeric(if (is.function(adjust_providers)) { |
447 | ! |
environment(adjust_providers) <- environment() |
448 | ! |
adjust_providers(w) |
449 |
} else { |
|
450 |
{ |
|
451 | 1x |
w |
452 | 1x |
} %*% pv |
453 |
})) |
|
454 |
} |
|
455 |
} |
|
456 | 103x |
wd <- crossprod(if (is.function(adjust_consumers)) { |
457 | 1x |
environment(adjust_consumers) <- environment() |
458 | 1x |
adjust_consumers(w) |
459 |
} else { |
|
460 | 102x |
w |
461 | 103x |
}, cv) |
462 | 103x |
if (return_type == "d") { |
463 | ! |
if (verbose) cli_bullets(c(v = paste("calculated", type, "(consumers per provider location)"))) |
464 | 1x |
return(as.numeric(wd)) |
465 |
} |
|
466 | 102x |
wd[abs(wd) < .Machine$double.eps] <- 1 |
467 | 102x |
r <- as.numeric((if (is.function(adjust_providers)) { |
468 | 1x |
environment(adjust_providers) <- environment() |
469 | 1x |
adjust_providers(w) |
470 |
} else { |
|
471 | 101x |
w |
472 | 102x |
}) %*% (pv / wd)) |
473 | 102x |
if (provided_return_type) { |
474 | 3x |
if (is.numeric(return_type)) { |
475 | 1x |
r <- r * return_type |
476 | 1x |
type <- paste(type, "(resources per", return_type, "consumers)") |
477 |
} else { |
|
478 | 2x |
if ("n" == return_type) { |
479 | 1x |
type <- paste(type, "(normalized)") |
480 | ! |
if (verbose) cli_alert_info("normalizing ratios") |
481 | 1x |
r <- r / (sum(r * cv) / sum(cv)) |
482 | 1x |
} else if ("r" == return_type) { |
483 | 1x |
type <- paste(type, "(resources per region)") |
484 | ! |
if (verbose) cli_alert_info("multiplying ratios by consumers value") |
485 | 1x |
r <- r * cv |
486 |
} |
|
487 |
} |
|
488 |
} else { |
|
489 | 99x |
type <- paste(type, "(resources per consumer)") |
490 |
} |
|
491 | 2x |
if (verbose) cli_bullets(c(v = paste("calculated", type))) |
492 | 102x |
r |
493 |
} |
1 |
#' @rdname catchment_ratio |
|
2 |
#' @examples |
|
3 |
#' |
|
4 |
#' # gives weight only to costs under 2 |
|
5 |
#' catchment_weight(matrix(c(1, 2, 1, 3, 1, 2), 3), 2) |
|
6 |
#' @return \code{catchment_weight}: A sparse matrix of weights. |
|
7 |
#' @export |
|
8 | ||
9 |
catchment_weight <- function(cost, weight = NULL, max_cost = NULL, adjust_zeros = 1e-6, scale = 2, |
|
10 |
normalize_weight = FALSE, verbose = FALSE) { |
|
11 | 3x |
if (is.null(dim(cost))) cost <- matrix(cost, ncol = 1) |
12 | 2x |
if (is.data.frame(cost)) cost <- as.matrix(cost) |
13 | 192x |
if (!is.null(weight) && is.numeric(adjust_zeros)) { |
14 | 183x |
zero_costs <- !is.na(cost) & cost == 0 |
15 | 183x |
if (any(zero_costs)) { |
16 | ! |
if (verbose) cli_alert_info(paste("setting non-NA 0s to", adjust_zeros)) |
17 | 64x |
cost[zero_costs] <- adjust_zeros |
18 | 64x |
rm(zero_costs) |
19 |
} |
|
20 |
} |
|
21 | 11x |
if (anyNA(cost)) cost[is.na(cost)] <- 0 |
22 | 192x |
if (is.numeric(weight) && is.null(dim(weight)) && length(weight) > 1) { |
23 | ! |
weight <- matrix(weight, ncol = ncol(cost)) |
24 |
} |
|
25 | 192x |
if (is.null(weight) && !is.null(max_cost)) { |
26 | ! |
weight <- max_cost |
27 | ! |
max_cost <- NULL |
28 |
} |
|
29 | 192x |
if (is.null(weight)) { |
30 | 2x |
if (verbose) cli_alert_info("weight: cost") |
31 | 8x |
w <- as(cost, "CsparseMatrix") |
32 | 184x |
} else if (is.null(dim(weight))) { |
33 | 176x |
if (is.numeric(weight)) { |
34 |
# single buffer value means a uniformly weighted catchment area (original) |
|
35 | ! |
if (verbose) cli_alert_info("weight: cost over {.field 0} and under {.field {weight[[1]]}}") |
36 | 152x |
w <- as(cost > 0 & cost < weight[[1]], "CsparseMatrix") * 1 |
37 | 24x |
} else if (is.list(weight)) { |
38 |
# list of steps for roughly graded weightings (enhanced) |
|
39 | ! |
if (verbose) cli_alert_info("weight: cost over {.field 0} and under steps of {.arg weight}") |
40 | 6x |
weight <- weight[order(-vapply(weight, "[[", 1, 1))] |
41 | 6x |
w <- as((cost <= weight[[1]][1]) * weight[[1]][2], "CsparseMatrix") |
42 | 6x |
for (s in weight[-1]) w[cost <= s[1]] <- s[2] |
43 | 6x |
w[cost <= 0] <- 0 |
44 | 18x |
} else if (is.character(weight)) { |
45 |
# name of a weight function (kernel density) |
|
46 | ! |
if (verbose) cli_alert_info("weight: {weight} weight function") |
47 | 15x |
weight <- tolower(weight[[1]]) |
48 | 15x |
if ("units" %in% class(cost)) { |
49 | ! |
cost <- matrix( |
50 | ! |
as.numeric(cost), nrow(cost), ncol(cost), |
51 | ! |
dimnames = dimnames(cost) |
52 |
) |
|
53 |
} |
|
54 | 15x |
if (grepl("^(?:gr|n)", weight)) { |
55 |
# gravity / normal kernel |
|
56 | 4x |
w <- as(sqrt(1 / cost^scale), "CsparseMatrix") |
57 | 11x |
} else if (grepl("^e", weight)) { |
58 |
# exponential kernel |
|
59 | 1x |
w <- as(exp(-cost * scale), "CsparseMatrix") |
60 | 10x |
} else if (grepl("^loga", weight)) { |
61 |
# logarithmic kernel |
|
62 | 1x |
w <- as(1 / (1 + log(cost, scale)), "CsparseMatrix") |
63 | 9x |
} else if (grepl("^li", weight)) { |
64 |
# linear kernel |
|
65 | 3x |
w <- as((scale - cost) / scale, "CsparseMatrix") |
66 | 3x |
w[w < 0] <- 0 |
67 | 6x |
} else if (grepl("^l", weight)) { |
68 |
# logistic kernel |
|
69 | 1x |
w <- as(1 / (1 + exp(scale * cost)), "CsparseMatrix") |
70 | 5x |
} else if (grepl("^ga", weight)) { |
71 |
# Gaussian kernel |
|
72 | 2x |
w <- as(exp(-cost^2 / (2 * scale^2)), "CsparseMatrix") |
73 | 3x |
} else if (grepl("^d", weight) && exists(weight)) { |
74 |
# assumed to be a density function like dnorm |
|
75 | 2x |
w <- as(cost, "CsparseMatrix") |
76 | 2x |
w@x <- do.call(weight, list(w@x, 0, scale)) |
77 | 1x |
} else if (grepl("^p", weight) && exists(weight)) { |
78 |
# assumed to be a distribution function like pnorm |
|
79 | 1x |
w <- as(cost, "CsparseMatrix") |
80 | 1x |
w@x <- do.call(weight, list(-w@x, 0, scale)) |
81 |
} else { |
|
82 | ! |
cli_abort("{.arg weight} not recognized") |
83 |
} |
|
84 | 15x |
w[cost <= 0 | !is.finite(w)] <- 0 |
85 | 3x |
} else if (is.function(weight)) { |
86 | ! |
if (verbose) cli_alert_info("weight: custom weight function") |
87 | 3x |
w <- as(weight(cost), "CsparseMatrix") |
88 | 3x |
w[cost <= 0 | !is.finite(w)] <- 0 |
89 |
} |
|
90 |
} else { |
|
91 | ! |
if (verbose) cli_alert_info("weight: {.arg weight}") |
92 | 8x |
w <- as(if (is.data.frame(weight)) as.matrix(weight) else weight, "CsparseMatrix") |
93 |
} |
|
94 | 192x |
if (!is.null(max_cost)) { |
95 | ! |
if (verbose) cli_alert_info("zerod-out cost over {max_cost}") |
96 | 1x |
w[cost > max_cost] <- 0 |
97 |
} |
|
98 | 192x |
if (normalize_weight) { |
99 | 1x |
wr <- rowSums(w) |
100 | 1x |
wr[wr == 0] <- 1 |
101 | 1x |
w <- w * sweep(w, 1, wr, "/") |
102 |
} |
|
103 | 192x |
w |
104 |
} |
1 |
#' Download U.S. Census Population Data |
|
2 |
#' |
|
3 |
#' Download and load U.S. census American Community Survey 5-year summary files at the block group level, |
|
4 |
#' optionally including home-work commute statistics. |
|
5 |
#' |
|
6 |
#' @param dir Directory in which to save the file(s). |
|
7 |
#' @param state Name, abbreviation, or FIPS code of the state. |
|
8 |
#' @param year 4 digit year, between 2009 and the most recent year. |
|
9 |
#' @param include_margins Logical; if \code{TRUE}, will save and include estimate margins or error, which is a |
|
10 |
#' \code{data.frame} with the same dimensions as the estimates \code{data.frame}. |
|
11 |
#' @param include_commutes Logical; if \code{TRUE}, will download the |
|
12 |
#' \href{https://lehd.ces.census.gov}{Longitudinal Employer-Household Dynamics (LEHD)} |
|
13 |
#' \href{https://lehd.ces.census.gov/data/#lodes}{Origin-Destination Employment Statistics (LODES)} |
|
14 |
#' data for the state and block groups within selected tracts. |
|
15 |
#' @param counties,tracts,blockgroups A vector of counties, tracts, or block group GEOIDs within the specified state to |
|
16 |
#' filter block groups for (block groups within the specified counties or tracts, or matching the specified block group |
|
17 |
#' GEOID). Only one can be specified (lower levels overwrite higher levels). Defaults to all block groups. |
|
18 |
#' @param overwrite Logical; if \code{TRUE}, will remove any existing files before downloading and saving new versions. |
|
19 |
#' @param verbose Logical; if \code{FALSE}, will not print status messages. |
|
20 |
#' @examples |
|
21 |
#' \dontrun{ |
|
22 |
#' download_census_population(".", "va") |
|
23 |
#' } |
|
24 |
#' @return A list with at least an \code{estimates} entry, and optionally \code{margins} and/or \code{commutes} |
|
25 |
#' entries. The \code{extimates} and \code{margins} entries are \code{data.frame}s with block groups in rows, |
|
26 |
#' population variables in columns, and person counts in cells. The \code{commutes} entry is a sparse matrix with |
|
27 |
#' home block groups in rows, work block groups in columns, and job counts in cells (all jobs, summed across blocks). |
|
28 |
#' @export |
|
29 | ||
30 |
download_census_population <- function(dir, state, year = 2019, include_margins = FALSE, include_commutes = FALSE, |
|
31 |
counties = NULL, tracts = NULL, blockgroups = NULL, overwrite = FALSE, verbose = TRUE) { |
|
32 | 1x |
us_fips <- list( |
33 | 1x |
name = c( |
34 | 1x |
"UnitedStates", "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", |
35 | 1x |
"Delaware", "DistrictOfColumbia", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", |
36 | 1x |
"Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", |
37 | 1x |
"Missouri", "Montana", "Nebraska", "Nevada", "NewHampshire", "NewJersey", "NewMexico", "NewYork", |
38 | 1x |
"NorthCarolina", "NorthDakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "RhodeIsland", "SouthCarolina", |
39 | 1x |
"CouthDakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "WestVirginia", "Wisconsin", |
40 | 1x |
"Wyoming", "PuertoRico" |
41 |
), |
|
42 | 1x |
post = c( |
43 | 1x |
"us", "al", "ak", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", "ga", "hi", "id", "il", "in", "ia", "ks", "ky", |
44 | 1x |
"la", "me", "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny", "nc", "nd", "oh", "ok", |
45 | 1x |
"or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy", "pr" |
46 |
), |
|
47 | 1x |
fips = c( |
48 | 1x |
"us", 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
49 | 1x |
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 72 |
50 |
) |
|
51 |
) |
|
52 | 1x |
state <- gsub("^0|[^a-z0-9]", "", tolower(state[[1]])) |
53 | 1x |
fid <- which(us_fips$fips == state) |
54 | 1x |
if (!length(fid)) { |
55 | 1x |
fid <- if (nchar(state) == 2) { |
56 | 1x |
which(us_fips$post == state) |
57 |
} else { |
|
58 | ! |
pmatch(state, tolower(us_fips$name)) |
59 |
} |
|
60 | ! |
if (!length(fid) || is.na(fid)) cli_abort("failed to recognize {.arg state}") |
61 |
} |
|
62 | 1x |
state <- us_fips$name[fid] |
63 | 1x |
state_post <- us_fips$post[fid] |
64 | 1x |
display_state <- gsub("([a-z])([A-Z])", "\\1 \\2", state) |
65 | ||
66 | 1x |
if (!is.null(dir)) { |
67 | 1x |
dir <- tdir <- paste0(normalizePath(dir, "/", FALSE), "/") |
68 | ! |
if (!dir.exists(dir)) dir.create(dir, FALSE, TRUE) |
69 |
} else { |
|
70 | ! |
tdir <- paste0(tempdir(), "/") |
71 |
} |
|
72 | 1x |
folder <- paste0(tdir, state, "_Tracts_Block_Groups_Only/") |
73 | 1x |
url <- paste0( |
74 | 1x |
"https://www2.census.gov/programs-surveys/acs/summary_file/", year, |
75 | 1x |
if (year > 2020) "/sequence-based-SF", "/data/5_year_seq_by_state/", state, |
76 | 1x |
"/Tracts_Block_Groups_Only/" |
77 |
) |
|
78 | 1x |
files_zip <- paste0(year, "5", state_post, "000", 1:3, "000.zip") |
79 | 1x |
files <- paste0(folder, "e", year, "5", state_post, "000", 1:3, "000.txt") |
80 | 1x |
final_files <- paste0(if (is.null(dir)) tdir else dir, state_post, "_", c( |
81 | 1x |
"population", |
82 | 1x |
if (include_margins) "population_margins" |
83 | 1x |
), "_", year, ".csv") |
84 | ! |
if (overwrite) unlink(c(paste0(tdir, files_zip), files, final_files)) |
85 | 1x |
if (!all(file.exists(final_files)) && !all(file.exists(paste0(tdir, files_zip)) | file.exists(files))) { |
86 | 1x |
if (verbose) cli_alert_info(paste("downloading", display_state, "American Community Survey summary files")) |
87 | 1x |
for (i in seq_along(files)) { |
88 | 3x |
if (!file.exists(files[i])) { |
89 | 3x |
download.file(paste0(url, files_zip[i]), paste0(tdir, files_zip[i])) |
90 |
} |
|
91 |
} |
|
92 |
} |
|
93 | 1x |
if (any(file.exists(paste0(tdir, files_zip)))) { |
94 | 1x |
if (verbose) cli_alert_info(paste("decompressing", display_state, "American Community Survey summary files")) |
95 | 1x |
dir.create(folder, FALSE) |
96 | 1x |
for (f in files_zip[file.exists(paste0(tdir, files_zip))]) { |
97 | 3x |
system2("unzip", c("-qd", shQuote(folder), shQuote(paste0(tdir, f)))) |
98 | 3x |
unlink(paste0(tdir, f)) |
99 |
} |
|
100 |
} |
|
101 | 1x |
if (!all(file.exists(final_files))) { |
102 | 1x |
if (verbose) cli_alert_info(paste("reformatting", display_state, "American Community Survey summary files")) |
103 | ||
104 |
# column names |
|
105 | 1x |
headers <- read.csv(paste0( |
106 | 1x |
"https://www2.census.gov/programs-surveys/acs/summary_file/", year, |
107 | 1x |
if (year > 2020) { |
108 | ! |
"/sequence-based-SF/documentation/user_tools/ACS_5yr_Seq" |
109 |
} else { |
|
110 | 1x |
paste0("/documentation/", if (year < 2013) { |
111 | ! |
"5_year/user_tools/Sequence_Number_and" |
112 |
} else { |
|
113 | 1x |
"user_tools/ACS_5yr_Seq" |
114 |
}) |
|
115 | 1x |
}, "_Table_Number_Lookup.txt" |
116 | 1x |
), nrows = 2000) |
117 | 1x |
headers <- headers[seq_len(grep("ANCESTRY", headers$Table.Title, fixed = TRUE)[1] - 1), ] |
118 | 1x |
title_id <- which(headers$Total.Cells.in.Table != "") |
119 | 1x |
names <- sub("^Universe:\\s*", "", headers$Table.Title) |
120 | 1x |
sub_id <- which(grepl(":", names, fixed = TRUE)) |
121 | 1x |
headers$name <- sub("\\.$", "", sub("_\\w+_Total", "_Total", gsub("._", "_", paste0( |
122 | 1x |
rep(gsub("[;:(),\\s]+", ".", names[title_id], perl = TRUE), c(title_id[-1], nrow(headers)) - title_id), |
123 |
"_", |
|
124 | 1x |
rep(gsub("[;:(),\\s]+", ".", names[sub_id], perl = TRUE), c(sub_id[-1], nrow(headers)) - c(1, sub_id[-1])), |
125 |
"_", |
|
126 | 1x |
gsub("[;:(),\\s]+", ".", sub(" years", "", names, fixed = TRUE), perl = TRUE) |
127 | 1x |
), fixed = TRUE))) |
128 | 1x |
start_id <- which(!is.na(headers$Start.Position)) |
129 | 1x |
offset <- headers$Line.Number - 1 + rep( |
130 | 1x |
headers[start_id, "Start.Position"], |
131 | 1x |
c(start_id[-1], nrow(headers) + 1) - start_id |
132 |
) |
|
133 | 1x |
ind_start <- which(!is.na(offset == min(offset, na.rm = TRUE)) & offset == min(offset, na.rm = TRUE)) |
134 | 1x |
ind_end <- c(ind_start[-1], length(offset)) - ind_start |
135 | 1x |
inds <- lapply(seq_along(ind_start), function(i) { |
136 | 4x |
is <- seq(ind_start[i], ind_end[i]) |
137 | 4x |
is[!is.na(offset[is]) & offset[is] %% 1 == 0] |
138 |
}) |
|
139 | 1x |
subs <- as.numeric(sub("\\D.*$", "", headers$Total.Cells.in.Table)) |
140 | 1x |
ind_start <- which( |
141 | 1x |
!is.na(headers$Start.Position) & |
142 | 1x |
headers$Start.Position == min(headers$Start.Position, na.rm = TRUE) |
143 |
) |
|
144 | 1x |
ind_end <- c(ind_start[-1] - 1, length(subs)) |
145 | 1x |
inds <- lapply(seq_along(ind_start), function(i) { |
146 | 4x |
is <- seq(ind_start[i], ind_end[i]) |
147 | 4x |
is[!is.na(headers[is, "Line.Number"]) & headers[is, "Line.Number"] %% 1 == 0] |
148 |
}) |
|
149 | ||
150 |
# row geoids |
|
151 | 1x |
rows <- read.csv(paste0( |
152 | 1x |
"https://www2.census.gov/programs-surveys/acs/summary_file/", year, |
153 | 1x |
if (year > 2020) "/sequence-based-SF", "/data/5_year_seq_by_state/", state, |
154 | 1x |
"/Tracts_Block_Groups_Only/g", year, "5", state_post, ".csv" |
155 | 1x |
), header = FALSE) |
156 | 1x |
rows$GEOID <- sub("\\D.*$", "", paste0( |
157 | 1x |
formatC(rows$V10, width = 2, flag = 0), |
158 | 1x |
formatC(rows$V11, width = 3, flag = 0), |
159 | 1x |
formatC(rows$V14, width = 6, flag = 0), |
160 | 1x |
rows$V15 |
161 |
)) |
|
162 | ||
163 |
# estimates |
|
164 | 1x |
estimates <- do.call(cbind, lapply(seq_along(files), function(i) { |
165 | 3x |
os <- c(6, offset[inds[[i]]]) |
166 | 3x |
tab <- read.csv( |
167 | 3x |
files[i], |
168 | 3x |
header = FALSE, colClasses = c(rep("character", 5), rep("numeric", length(os))), |
169 | 3x |
na.string = "." |
170 |
) |
|
171 | 3x |
colnames(tab)[os] <- c("GEOID", headers[inds[[i]], "name"]) |
172 | 3x |
tab$GEOID <- rows[tab$GEOID, "GEOID"] |
173 | 3x |
tab[nchar(tab$GEOID) == 12, -seq(1, if (i == 1) 5 else 6)] |
174 |
})) |
|
175 | ||
176 |
# margins |
|
177 | 1x |
if (include_margins) { |
178 | 1x |
files_m <- paste0(folder, "m", year, "5", state_post, "000", 1:3, "000.txt") |
179 | 1x |
margins <- do.call(cbind, lapply(seq_along(files_m), function(i) { |
180 | 3x |
os <- c(6, offset[inds[[i]]]) |
181 | 3x |
tab <- read.csv( |
182 | 3x |
files_m[i], |
183 | 3x |
header = FALSE, colClasses = c(rep("character", 5), rep("numeric", length(os))), |
184 | 3x |
na.string = "." |
185 |
) |
|
186 | 3x |
colnames(tab)[os] <- c("GEOID", headers[inds[[i]], "name"]) |
187 | 3x |
tab$GEOID <- rows[tab$GEOID, "GEOID"] |
188 | 3x |
tab[nchar(tab$GEOID) == 12, -seq(1, if (i == 1) 5 else 6)] |
189 |
})) |
|
190 |
} |
|
191 | 1x |
su <- vapply(estimates, function(v) all(is.na(v)), TRUE) |
192 | 1x |
if (any(su)) { |
193 | 1x |
estimates <- estimates[, !su, drop = FALSE] |
194 | 1x |
if (include_margins) { |
195 | 1x |
margins <- margins[, !su, drop = FALSE] |
196 |
} |
|
197 |
} |
|
198 | 1x |
su <- !is.na(estimates[, 2]) & estimates[, 2] == 0 |
199 | 1x |
if (any(su)) { |
200 | ! |
estimates <- estimates[!su, ] |
201 | ! |
if (include_margins) margins <- margins[!su, ] |
202 |
} |
|
203 |
} else { |
|
204 | ! |
if (verbose) cli_alert_info(paste("loading existing", display_state, "population data")) |
205 | ! |
classes <- c("character", rep("numeric", length( |
206 | ! |
scan(final_files[1], "", sep = ",", nlines = 1, quiet = TRUE) |
207 | ! |
) - 1)) |
208 | ! |
estimates <- read.csv(final_files[1], colClasses = classes) |
209 | ! |
if (include_margins) margins <- read.csv(final_files[2], colClasses = classes) |
210 |
} |
|
211 |
# commutes |
|
212 | 1x |
if (include_commutes) { |
213 | 1x |
file <- paste0(tdir, state_post, "_commutes_", year, ".csv") |
214 | 1x |
filegz <- paste0(file, ".gz") |
215 | ! |
if (overwrite) unlink(c(file, filegz)) |
216 | 1x |
if (!any(file.exists(c(file, filegz)))) { |
217 | 1x |
if (verbose) cli_alert_info(paste("downloading", display_state, "Origin-Destination Employment Statistics file")) |
218 | 1x |
download.file(paste0( |
219 | 1x |
"https://lehd.ces.census.gov/data/lodes/LODES7/", state_post, |
220 | 1x |
"/od/", state_post, "_od_main_JT00_", year, ".csv.gz" |
221 | 1x |
), filegz) |
222 |
} |
|
223 | 1x |
if (fresh <- file.exists(filegz)) system2("gzip", c("-df", filegz)) |
224 | 1x |
check <- file.exists(file) |
225 | 1x |
if (check && fresh && Sys.which("openssl")[[1]] != "") { |
226 | 1x |
hashes <- paste(readLines(paste0( |
227 | 1x |
"https://lehd.ces.census.gov/data/lodes/LODES7/", state_post, "/lodes_", state_post, ".sha256sum" |
228 | 1x |
)), collapse = "") |
229 | 1x |
hash_loc <- regexec(paste0("_od_main_JT00_", year), hashes, fixed = TRUE)[[1]] |
230 | 1x |
check <- substr(hashes, hash_loc - 68, hash_loc - 5) == strsplit( |
231 | 1x |
system2("openssl", c("dgst", "-sha256", shQuote(file)), TRUE), " ", |
232 | 1x |
fixed = TRUE |
233 | 1x |
)[[1]][2] |
234 | ! |
if (!check) cli_warn("integrity check failed for {.file {file}}") |
235 |
} |
|
236 | 1x |
if (check) { |
237 | 1x |
if (verbose) cli_alert_info(paste0("loading ", if (!fresh) "existing ", display_state, " commutes data")) |
238 | 1x |
commutes <- read.csv(file) |
239 | 1x |
if (!is.null(commutes$w_geocode)) { |
240 | 1x |
fresh <- TRUE |
241 | 1x |
if (verbose) { |
242 | 1x |
cli_alert_info(paste( |
243 | 1x |
"reformatting", display_state, "Origin-Destination Employment Statistics file" |
244 |
)) |
|
245 |
} |
|
246 | 1x |
commutes$w_geocode <- substr(commutes$w_geocode, 1, 12) |
247 | 1x |
commutes$h_geocode <- substr(commutes$h_geocode, 1, 12) |
248 | 1x |
commutes <- commutes[commutes$w_geocode %in% estimates$GEOID & commutes$h_geocode %in% estimates$GEOID, ] |
249 | 1x |
ids <- structure(seq_along(estimates$GEOID), names = estimates$GEOID) |
250 | 1x |
edges <- tapply(commutes$S000, paste0(commutes$w_geocode, commutes$h_geocode), sum) |
251 | 1x |
commutes <- sparseMatrix( |
252 | 1x |
i = ids[substr(names(edges), 13, 24)], |
253 | 1x |
j = ids[substr(names(edges), 1, 12)], |
254 | 1x |
x = edges, |
255 | 1x |
dimnames = list(estimates$GEOID, estimates$GEOID) |
256 |
) |
|
257 | ! |
} else if (colnames(commutes)[1] == "GEOID") { |
258 | ! |
rownames(commutes) <- colnames(commutes)[-1] <- commutes[, 1] |
259 | ! |
commutes <- as(as.matrix(commutes[, -1]), "dgCMatrix") |
260 |
} |
|
261 |
} |
|
262 |
} |
|
263 | 1x |
if (!is.null(dir)) { |
264 | 1x |
files <- paste0(dir, state_post, "_", c( |
265 | 1x |
"population", |
266 | 1x |
if (include_margins) "population_margins", |
267 | 1x |
if (include_commutes && check) "commutes" |
268 | 1x |
), "_", year, ".csv") |
269 | 1x |
if (!all(file.exists(files)) || (include_commutes && fresh && check)) { |
270 | 1x |
if (verbose) cli_alert_info(paste("writing", display_state, "files")) |
271 | 1x |
ck <- c(!file.exists(files[1]), !file.exists(files[2]) && include_margins, include_commutes && fresh && check) |
272 | 1x |
if (ck[1]) write.csv(estimates, files[1], row.names = FALSE) |
273 | 1x |
if (ck[2]) write.csv(margins, files[2], row.names = FALSE) |
274 | 1x |
if (ck[3]) write.csv(cbind(GEOID = rownames(commutes), as.matrix(commutes)), files[3], row.names = FALSE) |
275 | 1x |
if (verbose) { |
276 | 1x |
cli_bullets(c( |
277 | 1x |
v = paste0("wrote file", if (sum(ck) == 1) ":" else "s:"), |
278 | 1x |
structure(paste0("{.file ", files[ck], "}"), names = rep("*", sum(ck))) |
279 |
)) |
|
280 |
} |
|
281 |
} |
|
282 |
} |
|
283 | 1x |
subsetter <- !c(is.null(blockgroups), is.null(tracts), is.null(counties)) |
284 | 1x |
if (any(subsetter)) { |
285 | ! |
su <- if (subsetter[1]) { |
286 | ! |
estimates$GEOID %in% blockgroups |
287 | ! |
} else if (subsetter[2]) { |
288 | ! |
substr(estimates$GEOID, 1, 11) %in% tracts |
289 | ! |
} else if (subsetter[3]) { |
290 | ! |
substr(estimates$GEOID, 1, 5) %in% counties |
291 |
} |
|
292 | ! |
if (any(su) && !all(su)) { |
293 | ! |
if (verbose) cli_alert_info("subsetting results") |
294 | ! |
estimates <- estimates[su, ] |
295 | ! |
if (include_margins) margins <- margins[su, ] |
296 |
} else { |
|
297 | ! |
cli_warn("a subset was requested, but no GEOIDs matched") |
298 |
} |
|
299 |
} |
|
300 | 1x |
invisible(list( |
301 | 1x |
estimates = estimates, |
302 | 1x |
margins = if (include_margins) margins, |
303 | 1x |
commutes = if (include_commutes) commutes |
304 |
)) |
|
305 |
} |
1 |
#' Extract Connections Within Catchment Areas |
|
2 |
#' |
|
3 |
#' Extract connected consumer (\code{from}) and provider (\code{to}) locations within a catchment area, |
|
4 |
#' as defined by cost and weights. |
|
5 |
#' |
|
6 |
#' @param from Consumer locations; a matrix-like object with columns containing latitudes and longitudes |
|
7 |
#' (identified by \code{from_coords}), and a column of IDs corresponding to rows in \code{cost} (identified by |
|
8 |
#' \code{from_id}). Each identifying column can alternatively contain the associated matrix or vector, and |
|
9 |
#' IDs can be in row names. |
|
10 |
#' @param to Provider locations;a matrix-like object with columns containing latitudes and longitudes |
|
11 |
#' (identified by \code{to_coords}), and a column of IDs corresponding to columns in \code{cost} (identified by |
|
12 |
#' \code{to_id}). Each identifying column can alternatively contain the associated matrix or vector, and |
|
13 |
#' IDs can be in row names. |
|
14 |
#' @param cost A cost matrix, with row names corresponding to IDs of \code{from}, and column names corresponding to |
|
15 |
#' IDs of \code{to}. |
|
16 |
#' @param weight A weight matrix with the same dimensions as \code{cost}, such as returned from |
|
17 |
#' \code{\link{catchment_weight}}. |
|
18 |
#' @param ... Passes arguments to \code{catchment_weight} if \code{weight} is not a weight matrix. |
|
19 |
#' @param return_type Specify whether to return a \code{data.frame} (default) with connection ids, weights, and costs, |
|
20 |
#' an \code{sf} \code{data.frame} (\code{"sf"}) with linestring geometries added for each connection, |
|
21 |
#' or a GeoJSON-formatted list (\code{"list"}). |
|
22 |
#' @param from_id,from_coords,to_id,to_coords Names of ID and coordinate columns in \code{from} and \code{to}, |
|
23 |
#' or vectors of IDs and matrices of coordinates. |
|
24 |
#' @examples |
|
25 |
#' pop <- simulate_catchments() |
|
26 |
#' connections <- catchment_connections( |
|
27 |
#' pop$consumers, pop$providers, |
|
28 |
#' weight = "gaussian", max_cost = 1, |
|
29 |
#' return_type = "sf" |
|
30 |
#' ) |
|
31 |
#' if (require("leaflet", quiet = TRUE)) { |
|
32 |
#' leaflet() |> |
|
33 |
#' addPolylines( |
|
34 |
#' data = connections, weight = 3, color = "#777", |
|
35 |
#' highlightOptions = highlightOptions(color = "#fff", weight = 4), |
|
36 |
#' label = ~ paste0("From: ", from, ", To: ", to, ", Weight: ", weight, ", Cost: ", cost) |
|
37 |
#' ) |> |
|
38 |
#' addCircles( |
|
39 |
#' data = pop$consumers, label = ~ paste0("Consumers: ", count, ", Access: ", access) |
|
40 |
#' ) |> |
|
41 |
#' addCircles( |
|
42 |
#' data = pop$providers, color = "#000", label = ~ paste("Provider", id) |
|
43 |
#' ) |
|
44 |
#' } |
|
45 |
#' @return An object with connection, weight, and cost information, with a format depending on \code{return_type}. |
|
46 |
#' @export |
|
47 | ||
48 |
catchment_connections <- function(from, to, cost = NULL, weight = 1, ..., return_type = "data.frame", |
|
49 |
from_id = "GEOID", from_coords = c("X", "Y"), to_id = "GEOID", |
|
50 |
to_coords = c("X", "Y")) { |
|
51 | 17x |
if (missing(from) && !missing(from_coords)) { |
52 | 1x |
from <- from_coords |
53 | 1x |
from_coords <- c("X", "Y") |
54 |
} |
|
55 | 17x |
if (missing(to) && !missing(to_coords)) { |
56 | 1x |
to <- to_coords |
57 | 1x |
to_coords <- c("X", "Y") |
58 |
} |
|
59 | 17x |
from_ids <- if (length(from_id) != 1) { |
60 | 1x |
from_id |
61 | 17x |
} else if (from_id %in% colnames(from)) { |
62 | 1x |
from[, from_id, drop = TRUE] |
63 |
} else { |
|
64 | 1x |
if (is.null(rownames(from))) rownames(from) <- seq_len(nrow(from)) |
65 | 15x |
rownames(from) |
66 |
} |
|
67 | ! |
if (is.null(from_ids)) cli_abort("failed to resolve {.arg from} ids") |
68 | ! |
if (is.numeric(from_ids) && max(from_ids) > nrow(from)) from_ids <- as.character(from_ids) |
69 | 17x |
to_ids <- if (length(to_id) != 1) { |
70 | 1x |
to_id |
71 | 17x |
} else if (to_id %in% colnames(to)) { |
72 | 1x |
to[, to_id, drop = TRUE] |
73 |
} else { |
|
74 | 1x |
if (is.null(rownames(to))) rownames(to) <- seq_len(nrow(to)) |
75 | 15x |
rownames(to) |
76 |
} |
|
77 | ! |
if (is.null(to_id)) cli_abort("failed to resolve {.arg to} ids") |
78 | ! |
if (is.numeric(to_ids) && max(to_ids) > nrow(to)) to_ids <- as.character(to_ids) |
79 | 17x |
if (is.character(from_coords) && all(from_coords %in% colnames(from))) { |
80 | 2x |
from <- from[, from_coords, drop = TRUE] |
81 | 15x |
} else if (any(grepl("^sfc?$", class(from)))) { |
82 | 15x |
from <- st_coordinates(from) |
83 | ! |
} else if (!is.null(dim(from)) && ncol(from) > 1) { |
84 | ! |
from <- from[, 1:2, drop = TRUE] |
85 |
} |
|
86 | ! |
if (is.null(dim(from)) || ncol(from) != 2) cli_abort("{.arg from} must have 2 columns") |
87 | 17x |
if (!is.numeric(from[, 1]) || !is.numeric(from[, 2])) { |
88 | ! |
cli_abort("{.arg from} columns must all be numeric") |
89 |
} |
|
90 | ! |
if (length(from_ids) != nrow(from)) cli_abort("From IDs and coordinates do not align") |
91 | 17x |
if (is.character(to_coords) && all(to_coords %in% colnames(to))) { |
92 | 2x |
to <- to[, to_coords, drop = TRUE] |
93 | 15x |
} else if (any(grepl("^sfc?$", class(to)))) { |
94 | 15x |
to <- st_coordinates(to) |
95 | ! |
} else if (!is.null(dim(to)) && ncol(to) > 1) { |
96 | ! |
to <- to[, 1:2, drop = TRUE] |
97 |
} |
|
98 | ! |
if (is.null(dim(to)) || ncol(to) != 2) cli_abort("{.arg to} must have 2 columns") |
99 | 17x |
if (!is.numeric(to[, 1]) || !is.numeric(to[, 2])) { |
100 | ! |
cli_abort("{.arg to} columns must all be numeric") |
101 |
} |
|
102 | ! |
if (length(to_ids) != nrow(to)) cli_abort("To IDs and coordinates do not align") |
103 | 17x |
if (is.null(cost)) { |
104 | 12x |
cost <- 1 / lma_simets(from, to, "euclidean", symmetrical = TRUE, pairwise = TRUE) - 1 |
105 | 12x |
dimnames(cost) <- list(from_ids, to_ids) |
106 |
} |
|
107 | 1x |
if (is.null(dim(cost))) cost <- matrix(cost, nrow(from), dimnames = list(from_ids, to_ids)) |
108 | 17x |
if ((is.null(dim(weight)) && length(weight) != length(cost)) || length(substitute(...()))) { |
109 | 14x |
weight <- catchment_weight(cost = cost, weight = weight, ...) |
110 | 3x |
} else if (is.null(dim(weight))) { |
111 | 1x |
weight <- matrix(weight, nrow(from), dimnames = list(from_ids, to_ids)) |
112 | ! |
} else if (any(dim(cost) != dim(weight))) cli_abort("{.arg weight} does not align with {.arg cost}") |
113 | 17x |
if (is.null(rownames(cost))) { |
114 | 2x |
if (nrow(cost) != length(from_ids)) { |
115 | ! |
cli_abort("{.arg cost} does not have row names, and is not the same length as {.arg from}'s ids") |
116 |
} |
|
117 | 15x |
} else if (!identical(as.character(from_ids), rownames(cost))) { |
118 | 1x |
if (!all(from_ids %in% rownames(cost))) { |
119 | ! |
cli_abort("not all {.arg from} ids are in {.arg cost}'s rownames") |
120 |
} |
|
121 | ! |
if (!identical(rownames(cost), rownames(weight))) rownames(weight) <- rownames(cost) |
122 | 1x |
cost <- cost[as.character(from_ids), ] |
123 | 1x |
weight <- weight[as.character(from_ids), ] |
124 |
} |
|
125 | 17x |
if (is.null(colnames(cost))) { |
126 | ! |
if (ncol(cost) != length(to_ids)) { |
127 | ! |
cli_abort("{.arg cost} does not have column names, and is not the same length as {.arg to}'s ids") |
128 |
} |
|
129 | 17x |
} else if (!identical(as.character(to_ids), colnames(cost))) { |
130 | 1x |
if (!all(to_ids %in% colnames(cost))) { |
131 | ! |
cli_abort("not all {.arg to} ids are in {.arg cost}'s colnames") |
132 |
} |
|
133 | ! |
if (!identical(colnames(cost), colnames(weight))) colnames(weight) <- colnames(cost) |
134 | 1x |
cost <- cost[, as.character(to_ids)] |
135 | 1x |
weight <- weight[, as.character(to_ids)] |
136 |
} |
|
137 | 17x |
fcoords <- split(from, from_ids)[from_ids] |
138 | 17x |
tcoords <- split(to, to_ids)[to_ids] |
139 | 17x |
from_ids <- names(fcoords) |
140 | 17x |
to_ids <- names(tcoords) |
141 | 17x |
if (grepl("^[slg]", return_type, TRUE)) { |
142 | 2x |
res <- list( |
143 | 2x |
type = "FeatureCollection", |
144 | 2x |
features = unlist(lapply(unname(which(rowSums(weight != 0) != 0)), function(r) { |
145 | 200x |
su <- which(weight[r, ] != 0) |
146 | 200x |
np <- names(su) |
147 | 200x |
w <- weight[r, su] |
148 | 200x |
wc <- cost[r, su] |
149 | 200x |
fc <- as.numeric(fcoords[[r]]) |
150 | 200x |
tc <- tcoords[to_ids %in% np] |
151 | 200x |
lapply(seq_along(np), function(i) { |
152 | 402x |
list( |
153 | 402x |
type = "Feature", |
154 | 402x |
properties = list(from = from_ids[[r]], to = np[[i]], weight = w[[i]], cost = wc[[i]]), |
155 | 402x |
geometry = list( |
156 | 402x |
type = "LineString", |
157 | 402x |
coordinates = list(fc, as.numeric(tc[[np[[i]]]])) |
158 |
) |
|
159 |
) |
|
160 |
}) |
|
161 | 2x |
}), FALSE) |
162 |
) |
|
163 | ! |
if (!length(res$features)) cli_abort("there are no connections") |
164 | 2x |
if (grepl("^s", return_type, TRUE)) { |
165 | 1x |
read_sf(toJSON(res, auto_unbox = TRUE), as_tibble = FALSE) |
166 |
} else { |
|
167 | 1x |
res |
168 |
} |
|
169 |
} else { |
|
170 | 15x |
rs <- unname(which(rowSums(weight != 0) != 0)) |
171 | ! |
if (!length(rs)) cli_abort("there are no connections") |
172 | 15x |
do.call(rbind, lapply(rs, function(r) { |
173 | 1500x |
su <- which(weight[r, ] != 0) |
174 | 1500x |
res <- data.frame( |
175 | 1500x |
from = from_ids[[r]], to = names(su), weight = weight[r, su], cost = cost[r, su] |
176 |
) |
|
177 | 1500x |
rownames(res) <- NULL |
178 | 1500x |
res |
179 |
})) |
|
180 |
} |
|
181 |
} |
1 |
#' Aggregate Floating Catchment Area Ratios |
|
2 |
#' |
|
3 |
#' Calculate aggregate sums or means, potentially weighted by number of consumers. |
|
4 |
#' |
|
5 |
#' @param from A matrix-like object with IDs, values, and number of consumers, or a vector |
|
6 |
#' of values. These are the lower-level entities to be aggregated up to entries in \code{to}. |
|
7 |
#' @param to A matrix-like object with IDs and total consumers. These are higher-level |
|
8 |
#' entities containing \code{from} entities. |
|
9 |
#' @param id The column name of IDs in \code{from}, or a vector of IDs. |
|
10 |
#' @param value The column name of values in \code{from} or a vector of values to be aggregated. |
|
11 |
#' @param consumers The column name of consumer totals in \code{from}, or a vector |
|
12 |
#' of said totals of the same length as \code{from} IDs. |
|
13 |
#' @param to_id The column name of IDs in \code{to}, or a vector if IDs. |
|
14 |
#' @param to_consumers The column name of consumer totals in \code{to}, or a vector |
|
15 |
#' of said totals of the same length as \code{to} IDs. |
|
16 |
#' @param map A named list, with names as or corresponding to \code{to} IDs, and vectors |
|
17 |
#' containing \code{from} IDs as entries. |
|
18 |
#' @param original_from Logical indicating whether the \code{from} values are original (default). |
|
19 |
#' If \code{from} values are original and \code{consumers} are specified, \code{from} values will be |
|
20 |
#' multiplied by \code{consumers} before aggregation. Set this to \code{FALSE} if this has already been done |
|
21 |
#' (such as if you set \code{return_type} to \code{"region"} in \code{\link{catchment_ratio}}). |
|
22 |
#' @param return_type Determines the values that are returned: \code{"original"} (default) for an average |
|
23 |
#' for each \code{to} ID based on the region sum of \code{consumers}, or specified \code{to_consumers}. |
|
24 |
#' If a number, original scores will be multiplied by the number then averaged like \code{"original"}. If |
|
25 |
#' \code{"region"}, the sum for each \code{to} ID is returned. |
|
26 |
#' @param verbose Logical; if \code{TRUE}, will print a log of the aggregation process. |
|
27 |
#' @examples |
|
28 |
#' # lower-level entries prepended with higher-level IDs (like GEOIDs) |
|
29 |
#' higher <- data.frame(id = c(100, 110), pop = c(500, 500)) |
|
30 |
#' lower <- data.frame( |
|
31 |
#' id = paste0(higher$id, 1:5), |
|
32 |
#' value = 1:5 / 10, |
|
33 |
#' pop = c(50, 10, 100, 40, 30) |
|
34 |
#' ) |
|
35 |
#' catchment_aggregate( |
|
36 |
#' lower, higher, |
|
37 |
#' consumers = "pop", id = "id", value = "value", |
|
38 |
#' to_id = "id", verbose = TRUE |
|
39 |
#' ) |
|
40 |
#' |
|
41 |
#' # same thing with IDs specified in a map |
|
42 |
#' catchment_aggregate( |
|
43 |
#' lower, higher, |
|
44 |
#' consumers = "pop", id = "id", value = "value", |
|
45 |
#' map = list("100" = c(1001, 1003, 1005), "110" = c(1102, 1104)), verbose = TRUE |
|
46 |
#' ) |
|
47 |
#' @return A vector with an aggregate value (determined by \code{return_type}) |
|
48 |
#' for each entry in \code{to}. |
|
49 |
#' @export |
|
50 | ||
51 |
catchment_aggregate <- function(from, to = NULL, id = "GEOID", value = "access", consumers = "population", |
|
52 |
to_id = id, to_consumers = consumers, map = NULL, original_from = TRUE, |
|
53 |
return_type = "original", verbose = FALSE) { |
|
54 | 3x |
if (verbose) cli_rule("Aggregating Values") |
55 | 12x |
if (missing(from)) { |
56 | 1x |
if (!missing(value)) { |
57 | 1x |
from <- value |
58 |
} else { |
|
59 | ! |
cli_abort("{.arg from} must be provided") |
60 |
} |
|
61 |
} |
|
62 | 12x |
fid <- if (length(id) > 1) { |
63 | 1x |
if (verbose) cli_alert_info("from IDs: {.arg id} vector") |
64 | 1x |
id |
65 | 12x |
} else if (is.null(dim(from))) { |
66 | ! |
if (is.null(names(from))) cli_abort("{.arg id} must be specified if they are not provided in {.arg from}") |
67 | ! |
if (verbose) cli_alert_info("from IDs: {.arg from} names") |
68 | 2x |
names(from) |
69 |
} else { |
|
70 | 2x |
if (verbose) cli_alert_info("from IDs: {.var {id}} column") |
71 | 9x |
from[[id]] |
72 |
} |
|
73 | ! |
if (is.null(fid)) cli_abort("failed to resolve from IDs") |
74 | 12x |
fv <- if (length(value) > 1) { |
75 | 1x |
if (verbose) cli_alert_info("from values: {.arg value} vector") |
76 | 1x |
value |
77 | 12x |
} else if (is.null(dim(from))) { |
78 | ! |
if (verbose) cli_alert_info("from values: entered vector") |
79 | 2x |
from |
80 | 12x |
} else if (!is.null(value) && value %in% colnames(from)) { |
81 | 1x |
if (verbose) cli_alert_info("from values: {.var {value}} column") |
82 | 8x |
from[[value]] |
83 |
} else { |
|
84 | 1x |
if (verbose) cli_alert_info("from values: {.feild 1}") |
85 | 1x |
rep(1, length(fid)) |
86 |
} |
|
87 | ! |
if (is.null(fv) || length(fv) != length(fid)) cli_abort("failed to resolve {.arg from} values") |
88 | ||
89 | 12x |
n <- length(fv) |
90 | ! |
if (length(fid) != n) cli_abort("IDs and values were not the same length") |
91 | 12x |
cv <- if (length(consumers) > 1) { |
92 | ! |
if (verbose) cli_alert_info("consumers: {.arg consumers} vector") |
93 | 2x |
consumers |
94 | 12x |
} else if (!is.null(consumers) && consumers %in% colnames(from)) { |
95 | 1x |
if (verbose) cli_alert_info("consumers: {.var {consumers}} column") |
96 | 8x |
from[[consumers]] |
97 |
} else { |
|
98 | 2x |
NULL |
99 |
} |
|
100 | ! |
if (!is.null(cv) && length(cv) != n) cli_abort("{.arg consumers} was not the same length as values") |
101 | ||
102 | 9x |
if (original_from && !is.null(cv) && length(cv) == n) fv <- fv * cv |
103 |
# `from` values should now be either per-region values or normalized |
|
104 | ||
105 |
# mapping `from` ids to `to` ids |
|
106 | 1x |
if (is.null(to)) to <- if (!missing(to_id) && (!is.character(to_id) || length(to_id) > 1)) to_id else from |
107 | 12x |
tid <- if (length(to_id) > 1) { |
108 | 1x |
if (verbose) cli_alert_info("to IDs: {.arg to_id} vector") |
109 | 1x |
to_id |
110 | 12x |
} else if (!is.null(to_id) && to_id %in% colnames(to)) { |
111 | 1x |
if (verbose) cli_alert_info("to IDs: {.var {to_id}} column") |
112 | 7x |
to[[to_id]] |
113 | 12x |
} else if (length(to) == 1) { |
114 | 3x |
if (is.function(to)) { |
115 | ! |
if (verbose) cli_alert_info("to IDs: {.arg from} IDs converted by {.arg to} function") |
116 | 1x |
to(fid) |
117 | 2x |
} else if (is.numeric(to)) { |
118 | 1x |
if (verbose) cli_alert_info(paste("to IDs: first", to, "characters of {.arg from} IDs")) |
119 | 1x |
substr(fid, 1, to) |
120 |
} else { |
|
121 | ! |
if (verbose) cli_alert_info("to IDs: {.var {to}} column of {.arg from}") |
122 | 1x |
from[[to]] |
123 |
} |
|
124 | 12x |
} else if (is.null(dim(to))) { |
125 | ! |
if (verbose) cli_alert_info("to IDs: {.arg to} vector") |
126 | 1x |
to |
127 |
} else { |
|
128 | ! |
NULL |
129 |
} |
|
130 | ! |
if (is.null(tid)) cli_abort("failed to resolve {.arg to} IDs") |
131 | ||
132 | 12x |
tcv <- if (length(to_consumers) > 1) { |
133 | ! |
if (verbose) cli_alert_info("to consumers: {.arg to_consumers} vector") |
134 | 4x |
to_consumers |
135 | 12x |
} else if (!is.null(to_consumers) && to_consumers %in% colnames(to)) { |
136 | ! |
if (verbose) cli_alert_info("to consumers: {.var {to_consumers}} column") |
137 | 4x |
to[[to_consumers]] |
138 |
} else { |
|
139 | 4x |
NULL |
140 |
} |
|
141 | 12x |
aggregate_consumers <- is.null(tcv) |
142 | 12x |
fv <- cbind(fv, if (!aggregate_consumers || is.null(cv)) 1 else cv) |
143 | ||
144 | 12x |
if (is.list(map)) { |
145 | ! |
if (is.null(names(map))) cli_abort("{.arg map} must have names") |
146 | ! |
if (verbose) cli_alert_info("mapping: by map list") |
147 | ! |
if (is.null(tid)) tid <- names(map) |
148 | 1x |
utid <- as.character(unique(tid)) |
149 | 1x |
av <- vapply(utid, function(h) { |
150 | 2x |
l <- map[[h]] |
151 | 2x |
su <- fid %in% l |
152 | 2x |
if (is.null(l) || !length(su)) { |
153 | ! |
return(c(NA, NA)) |
154 |
} |
|
155 | 2x |
colSums(fv[su, , drop = FALSE], na.rm = TRUE) |
156 | 1x |
}, numeric(2)) |
157 |
} else { |
|
158 | 11x |
if (length(tid) == n) { |
159 | 1x |
if (verbose) cli_alert_info("mapping: by {.arg to} ID") |
160 | 3x |
fid <- tid |
161 | 3x |
tid <- unique(tid) |
162 |
} else { |
|
163 | 8x |
tnc <- nchar(tid[1]) |
164 | 8x |
if (tnc > nchar(fid[1]) || any(nchar(tid) != tnc)) { |
165 | ! |
if (!any(fid %in% tid)) cli_abort("failed to figure out how to map {.arg from} ids to {.arg to} ids") |
166 | ! |
if (verbose) cli_alert_info("mapping: by ID match") |
167 |
} else { |
|
168 | 2x |
if (verbose) cli_alert_info(paste("mapping: by first", tnc, "character match")) |
169 | 8x |
fid <- substr(fid, 1, tnc) |
170 |
} |
|
171 |
} |
|
172 | 11x |
utid <- as.character(unique(tid)) |
173 | 11x |
av <- vapply(utid, function(h) { |
174 | 22x |
su <- fid == h |
175 | ! |
if (any(su)) colSums(fv[su, , drop = FALSE], na.rm = TRUE) else c(NA, NA) |
176 | 11x |
}, numeric(2)) |
177 |
} |
|
178 | 12x |
if (length(utid) != length(tid)) { |
179 | ! |
if (verbose) cli_alert_info("to repeated IDs") |
180 | ! |
av <- rbind(unname(av[1, ][tid]), unname(av[2, ][tid])) |
181 |
} |
|
182 | ||
183 | 12x |
if (is.numeric(return_type)) { |
184 | ! |
if (verbose) cli_alert_info(paste("multiplying by", return_type)) |
185 | 1x |
av[1, ] <- av[1, ] * return_type |
186 |
} |
|
187 | 12x |
if (is.character(return_type) && grepl("^r", return_type, TRUE)) { |
188 | 1x |
if (verbose) cli_bullets(c(v = "returning sum of value per {.arg to} ID")) |
189 | 3x |
av[1, ] |
190 |
} else { |
|
191 | ! |
if (aggregate_consumers && !is.null(tcv) && ncol(av) != length(tcv)) aggregate_consumers <- FALSE |
192 | 9x |
if (verbose) { |
193 | 2x |
cli_bullets(c(v = paste0( |
194 | 2x |
"returning sum of value over total {.arg ", |
195 | 2x |
if (aggregate_consumers) "from" else "to", |
196 | 2x |
"} consumers per {.arg to} ID" |
197 |
))) |
|
198 |
} |
|
199 | 9x |
if (aggregate_consumers) { |
200 | 2x |
av[2, av[2, ] == 0] <- 1 |
201 | 2x |
av[1, ] / av[2, ] |
202 |
} else { |
|
203 | 7x |
tcv[tcv == 0] <- 1 |
204 | 7x |
av[1, ] / tcv |
205 |
} |
|
206 |
} |
|
207 |
} |
1 |
#' Simulate Catchment Areas |
|
2 |
#' |
|
3 |
#' Generate a random population of consumers and providers. |
|
4 |
#' |
|
5 |
#' @param n Number of consumers to generate. |
|
6 |
#' @param weight Weight method used on Euclidean distances between consumers and providers. |
|
7 |
#' @param ... Passes additional arguments to \code{\link{catchment_weight}}. |
|
8 |
#' @param consumers_range A numeric vector with entries for minimal and maximal consumer counts |
|
9 |
#' from which to sample. |
|
10 |
#' @param long_center Longitude to randomly place consumers around. |
|
11 |
#' @param long_spread Standard deviation of the consumer's longitude distribution. |
|
12 |
#' @param lat_center Latitude to randomly place consumers around. |
|
13 |
#' @param lat_spread Standard deviation of the consumer's latitude distribution. |
|
14 |
#' @param as_sf Logical; if \code{FALSE}, consumer and provider data are returned in regular \code{data.frame}s. |
|
15 |
#' @examples |
|
16 |
#' pop <- simulate_catchments() |
|
17 |
#' if (require("leaflet", quiet = TRUE)) { |
|
18 |
#' leaflet(pop$providers) |> |
|
19 |
#' addCircles( |
|
20 |
#' radius = 1e5, color = "#666", stroke = FALSE |
|
21 |
#' ) |> |
|
22 |
#' addCircles( |
|
23 |
#' data = pop$consumers, label = ~ paste0("Consumers: ", count, ", Access: ", access) |
|
24 |
#' ) |> |
|
25 |
#' addCircles( |
|
26 |
#' color = "#000", label = ~ paste("Provider", id) |
|
27 |
#' ) |
|
28 |
#' } |
|
29 |
#' @return A list with \code{consumers}, \code{providers}, \code{cost}, and \code{weight} entries. |
|
30 |
#' @export |
|
31 | ||
32 |
simulate_catchments <- function(n = 100, weight = 1, ..., consumers_range = c(10, 5000), long_center = -99, |
|
33 |
long_spread = 1, lat_center = 40, lat_spread = 1, as_sf = TRUE) { |
|
34 |
# roll consumers |
|
35 | 4x |
consumers <- data.frame( |
36 | 4x |
count = sample(seq(consumers_range[1], consumers_range[2]), n, TRUE), |
37 | 4x |
X = rnorm(n, long_center, long_spread), |
38 | 4x |
Y = rnorm(n, lat_center, lat_spread) |
39 |
) |
|
40 | 4x |
locs <- consumers[, c("X", "Y")] |
41 | 4x |
consumers_dist <- 1 / lma_simets(locs, "euclidean", symmetrical = TRUE) - 1 |
42 | 4x |
diag(consumers_dist) <- Inf |
43 | 4x |
start <- which.max(diag(consumers_dist[, max.col(-consumers_dist)])) |
44 | 4x |
diag(consumers_dist) <- 0 |
45 | 4x |
providers <- data.frame(id = seq_len(nrow(consumers)), X = 0, Y = 0) |
46 |
# placing providers |
|
47 | 4x |
for (i in providers$id) { |
48 | 50x |
sel <- c(start, which(consumers_dist[start, ] * catchment_weight( |
49 | 50x |
consumers_dist[start, , drop = FALSE], |
50 | 50x |
weight, ... |
51 | 50x |
) != 0)) |
52 | 50x |
providers[i, c("X", "Y")] <- colMeans(locs[sel, , drop = FALSE] + rnorm(length(sel) * 2, 0, .1)) |
53 | 50x |
r <- catchment_ratio(consumers, providers[seq_len(i), , drop = FALSE], weight = weight, ...) |
54 | 50x |
if (all(r != 0)) { |
55 | 4x |
providers <- providers[seq_len(i), ] |
56 | 4x |
break |
57 |
} |
|
58 | 46x |
start <- which.min(r) |
59 |
} |
|
60 | 4x |
consumers$access <- r |
61 | 4x |
cost <- 1 / lma_simets(consumers[, -1], providers[, -1], "euclidean", symmetrical = TRUE) - 1 |
62 | 4x |
list( |
63 | 4x |
consumers = if (as_sf) st_as_sf(consumers, coords = c("X", "Y"), crs = 4326) else consumers, |
64 | 4x |
providers = if (as_sf) st_as_sf(providers, coords = c("X", "Y"), crs = 4326) else providers, |
65 | 4x |
cost = cost, weight = catchment_weight(cost, weight, ...) |
66 |
) |
|
67 |
} |
1 |
#' Download and Prepare Map Shapes |
|
2 |
#' |
|
3 |
#' Download cartographic boundary files from the \href{https://www.census.gov}{U.S. Census Bureau}, |
|
4 |
#' and reformat them to simplified GeoJSON files. |
|
5 |
#' |
|
6 |
#' @param dir Path to a directory in which to save the reformatted files. If these are to be |
|
7 |
#' used in a published site, they should be in the \code{"docs"} directory of the site directory. If not specified, |
|
8 |
#' the shapes are downloaded and returned, but not saved. |
|
9 |
#' @param fips State Federal Information Processing Standard (FIPS) code, United States Postal Service (USPS) |
|
10 |
#' code, or name; see \href{state.txt}{https://www2.census.gov/geo/docs/reference/state.txt}. |
|
11 |
#' @param entity Entity name (e.g., \code{"county"}, \code{"tract"}, or \code{"bg"}); see |
|
12 |
#' \href{2019_file_name_def.pdf}{https://www2.census.gov/geo/tiger/GENZ2019/2019_file_name_def.pdf} |
|
13 |
#' (or appropriate year's file). States and counties are only available at the national level, so if these are |
|
14 |
#' requested for a particular state, the national files will be downloaded and subset. |
|
15 |
#' @param name Name for the GeoJSON file (without extension) to be written. |
|
16 |
#' @param year Year of the shapes, 2013 and after. |
|
17 |
#' @param resolution Resolution of the shapes; one of \code{"500k"} (default), \code{"5m"}, or \code{"20m"}. |
|
18 |
#' @param strip_features Logical; if \code{TRUE}, will strip all features other than IDs. |
|
19 |
#' @param simplify A function to simplify shapes with, such as \code{rmapshaper::ms_simplify}. This |
|
20 |
#' function should take the \code{sf} \code{data.frame} as its first argument, and return that object |
|
21 |
#' with its \code{geometry} replaced. |
|
22 |
#' @param ... Passes additional arguments to \code{simplify}; e.g., for \code{rmapshaper::ms_simplify}, |
|
23 |
#' you might want to specify \code{keep} and set \code{keep_shapes} to \code{TRUE}. |
|
24 |
#' @param force Logical; if \code{TRUE}, will force a re-download, and overwrite any existing files. |
|
25 |
#' @examples |
|
26 |
#' \dontrun{ |
|
27 |
#' # download Virginia counties shapes |
|
28 |
#' shapes <- download_census_shapes( |
|
29 |
#' "docs/shapes", "va", "county", |
|
30 |
#' name = "counties", |
|
31 |
#' simplify = rmapshaper::ms_simplify, keep_shapes = TRUE |
|
32 |
#' ) |
|
33 |
#' } |
|
34 |
#' @return An \code{sf} \code{data.frame} with a geometry column containing shapes, and other features in columns. |
|
35 |
#' @export |
|
36 | ||
37 |
download_census_shapes <- function(dir = NULL, fips = "us", entity = "state", name = NULL, year = 2019, |
|
38 |
resolution = "500k", strip_features = FALSE, simplify = NULL, ..., force = FALSE) { |
|
39 | 1x |
us_fips <- list( |
40 | 1x |
name = c( |
41 | 1x |
"united states", "alabama", "alaska", "arizona", "arkansas", "california", "colorado", "connecticut", |
42 | 1x |
"delaware", "district of columbia", "florida", "georgia", "hawaii", "idaho", "illinois", "indiana", "iowa", |
43 | 1x |
"kansas", "kentucky", "louisiana", "maine", "maryland", "massachusetts", "michigan", "minnesota", "mississippi", |
44 | 1x |
"missouri", "montana", "nebraska", "nevada", "new hampshire", "new jersey", "new mexico", "new york", |
45 | 1x |
"north carolina", "north dakota", "ohio", "oklahoma", "oregon", "pennsylvania", "rhode island", "south carolina", |
46 | 1x |
"south dakota", "tennessee", "texas", "utah", "vermont", "virginia", "washington", "west virginia", "wisconsin", |
47 | 1x |
"wyoming", "american samoa", "guam", "northern mariana islands", "puerto rico", "minor outlying islands", |
48 | 1x |
"virgin islands" |
49 |
), |
|
50 | 1x |
post = c( |
51 | 1x |
"us", "al", "ak", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", "ga", "hi", "id", "il", "in", "ia", "ks", "ky", |
52 | 1x |
"la", "me", "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm", "ny", "nc", "nd", "oh", "ok", |
53 | 1x |
"or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy", "as", "gu", "mp", "pr", "um", |
54 | 1x |
"vi" |
55 |
), |
|
56 | 1x |
fips = c( |
57 | 1x |
"us", 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
58 | 1x |
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 60, 66, 69, 72, 74, 78 |
59 |
) |
|
60 |
) |
|
61 | 1x |
entities <- c( |
62 | 1x |
"aiannh", "anrc", "bg", "cbsa", "cd116", "cnecta", "concity", "county", "county_within_cd116", |
63 | 1x |
"county_within_ua", "cousub", "csa", "division", "elsd", "nation", "necta", "place", "puma10", "region", "scsd", |
64 | 1x |
"sldl", "sldu", "state", "subbarrio", "tract", "ua10", "unsd", "zcta510" |
65 |
) |
|
66 | 1x |
fips <- gsub("^0|[^a-z0-9\\s]", "", tolower(fips[[1]])) |
67 | 1x |
fid <- which(us_fips$fips == fips) |
68 | 1x |
if (!length(fid)) { |
69 | 1x |
fid <- if (nchar(fips) == 2) { |
70 | 1x |
which(us_fips$post == fips) |
71 |
} else { |
|
72 | ! |
pmatch(fips, us_fips$name) |
73 |
} |
|
74 | ! |
if (!length(fid) || is.na(fid)) cli_abort("failed to recognize {.arg fips}") |
75 |
} |
|
76 | 1x |
fips <- us_fips$fips[fid] |
77 | ! |
if (nchar(fips) == 1) fips <- paste0("0", fips) |
78 | ! |
if (nchar(year) < 4) year <- as.numeric(paste0("20", year)) |
79 | ! |
if (year < 2013) cli_abort("only years 2013 and after are available from this function") |
80 | 1x |
entity <- tolower(entity) |
81 | 1x |
entity <- match.arg(entity, entities) |
82 | 1x |
subset <- NULL |
83 | 1x |
if (fips != "us" && (entity == "county" || entity == "state")) { |
84 | 1x |
subset <- fips |
85 | 1x |
fips <- "us" |
86 |
} |
|
87 | 1x |
resolution <- tolower(resolution) |
88 | 1x |
resolution <- match.arg(resolution, c("500k", "5m", "20m")) |
89 | 1x |
file <- paste0("cb_", year, "_", fips, "_", entity, "_", resolution) |
90 | 1x |
url <- paste0("https://www2.census.gov/geo/tiger/GENZ", year, if (year == 2013) "/" else "/shp/", file, ".zip") |
91 | 1x |
temp <- tempdir() |
92 | 1x |
temp_file <- paste0(temp, "/", file, ".zip") |
93 | 1x |
out_file <- paste0(dir, "/", if (!is.null(name)) name else file, ".geojson") |
94 | ! |
if (force && file.exists(out_file)) unlink(out_file) |
95 | 1x |
if (!file.exists(out_file)) { |
96 | ! |
if (!force && file.exists(paste0(temp, "/", file))) temp_file <- paste0(temp, "/", file) |
97 | 1x |
if (force || !file.exists(temp_file)) { |
98 | 1x |
download.file(url, temp_file) |
99 | 1x |
unzip(temp_file, exdir = paste0(temp, "/", file)) |
100 | 1x |
file.remove(temp_file) |
101 | 1x |
temp_file <- paste0(temp, "/", file) |
102 |
} |
|
103 | 1x |
shapes <- st_read(temp_file, quiet = TRUE) |
104 | 1x |
if (!is.null(subset)) shapes <- shapes[shapes$STATEFP == subset, ] |
105 | 1x |
if (strip_features) shapes <- shapes[, "GEOID", drop = FALSE] |
106 | 1x |
if (is.function(simplify)) shapes <- simplify(shapes, ...) |
107 | 1x |
if (!is.null(dir)) { |
108 | ! |
if (!file.exists(dir)) dir.create(dir, FALSE, TRUE) |
109 | 1x |
st_write(shapes, out_file) |
110 |
} |
|
111 |
} else { |
|
112 | ! |
shapes <- st_read(out_file, quiet = TRUE) |
113 |
} |
|
114 | 1x |
invisible(shapes) |
115 |
} |
1 |
#' Extract a network of consumers and providers |
|
2 |
#' |
|
3 |
#' Extract an interconnected set of consumers and providers from a set of connections within catchment areas. |
|
4 |
#' |
|
5 |
#' @param connections A list- or matrix-like object with \code{"from"} and \code{"to"} entries, such as that |
|
6 |
#' returned from \code{\link{catchment_connections}}. |
|
7 |
#' @param from_start,to_start The ID of a \code{from} (consumer) or \code{to} (provider) in \code{connections} from |
|
8 |
#' which to trace out a network. |
|
9 |
#' @examples |
|
10 |
#' pop <- simulate_catchments() |
|
11 |
#' connections <- catchment_connections( |
|
12 |
#' pop$consumers, pop$providers, |
|
13 |
#' weight = 1, max_cost = .5, |
|
14 |
#' ) |
|
15 |
#' catchment_network(connections, 1) |
|
16 |
#' @return A subsetted version of \code{connections} containing only connections traceable from \code{from_start} |
|
17 |
#' and/or \code{to_start}. |
|
18 |
#' @export |
|
19 | ||
20 |
catchment_network <- function(connections, from_start = NULL, to_start = NULL) { |
|
21 | ! |
if (any(!c("from", "to") %in% colnames(connections))) cli_abort("{.arg connections} not recognized") |
22 | 2x |
from <- connections[["from"]] |
23 | 2x |
to <- connections[["to"]] |
24 | 2x |
from_ids <- unique(from) |
25 | 2x |
froms <- if (is.null(from_start) && is.null(to_start)) from[1] else from_start |
26 | 2x |
tos <- to_start |
27 | 2x |
for (i in seq_along(from_ids)) { |
28 | 8x |
tos <- unique(c(tos, to[from %in% froms])) |
29 | 8x |
froms <- unique(c(froms, from[to %in% tos])) |
30 | 2x |
if (all(from_ids %in% froms)) break |
31 |
} |
|
32 | 2x |
connections[from %in% froms | to %in% tos, ] |
33 |
} |