| 1 |
#' Search for Companies (Legacy) |
|
| 2 |
#' |
|
| 3 |
#' Search for companies based on Standard Industrial Classification. |
|
| 4 |
#' @param SIC Standard Industrial Classification code (e.g., \code{2080}).
|
|
| 5 |
#' @param start Number of results to skip. |
|
| 6 |
#' @param limit Maximum number of results to return, in steps of 100. |
|
| 7 |
#' @param user User agent string (e.g., \code{"Company Name AdminContact@example.com"});
|
|
| 8 |
#' defaults to the \code{SEC_USER} environment variable, which can be set with
|
|
| 9 |
#' \code{\link{Sys.setenv}} (e.g., \code{Sys.setenv(SEC_USER = "...")}).
|
|
| 10 |
#' @param verbose Logical; if \code{FALSE}, will not show status updates.
|
|
| 11 |
#' @seealso For more complete results, see \code{\link{sec_search}}.
|
|
| 12 |
#' @examples |
|
| 13 |
#' \dontrun{
|
|
| 14 |
#' |
|
| 15 |
#' # get a list of beverage companies |
|
| 16 |
#' sec_search_companies(2080) |
|
| 17 |
#' } |
|
| 18 |
#' @returns A \code{data.frame} of results, with columns for \code{CIK} (Central Index Key),
|
|
| 19 |
#' \code{Company}, and \code{Location}.
|
|
| 20 |
#' @export |
|
| 21 | ||
| 22 |
sec_search_companies <- function(SIC, start = 0, limit = Inf, user = Sys.getenv("SEC_USER"), verbose = TRUE) {
|
|
| 23 | 1x |
base_url <- paste("https://www.sec.gov/cgi-bin/browse-edgar?SIC=")
|
| 24 | 1x |
count <- min(100, limit) |
| 25 | 1x |
extract_table <- function(req) {
|
| 26 | 3x |
d <- rawToChar(req$content) |
| 27 | 3x |
tab <- regmatches(d, regexec("<table.*</table>", d))[[1]]
|
| 28 | 3x |
if (length(tab)) {
|
| 29 | 2x |
rows <- strsplit(tab, "<tr[^>]*>")[[1]][-(1:2)] |
| 30 | 2x |
p <- as.data.frame(do.call(rbind, lapply(strsplit(rows, "\\s*<[^>]+>"), "[", c(3, 6, 9)))) |
| 31 | 2x |
colnames(p) <- c("CIK", "Company", "Location")
|
| 32 | 2x |
p |
| 33 |
} |
|
| 34 |
} |
|
| 35 | 1x |
user_agent <- httr::user_agent(user) |
| 36 | 1x |
req <- httr::GET( |
| 37 | 1x |
paste0(base_url, SIC, "&start=", start, "&count=", count), |
| 38 | 1x |
user_agent |
| 39 |
) |
|
| 40 | 1x |
res <- NULL |
| 41 | 1x |
if (req$status_code == 200) {
|
| 42 | 1x |
res <- extract_table(req) |
| 43 | 1x |
if (nrow(res) < limit && nrow(res) >= 100) {
|
| 44 | 1x |
while (nrow(res) < limit) {
|
| 45 | 2x |
req <- httr::GET( |
| 46 | 2x |
paste0(base_url, SIC, "&start=", nrow(res), "&count=100"), |
| 47 | 2x |
user_agent |
| 48 |
) |
|
| 49 | 2x |
if (req$status_code != 200) {
|
| 50 | ! |
if (verbose) cli_alert_warning("page {nrow(res) / 100 + 1} failed: {rawToChar(req$content)}")
|
| 51 | ! |
break |
| 52 |
} |
|
| 53 | 2x |
res2 <- extract_table(req) |
| 54 | 1x |
if (!length(res2)) break |
| 55 | 1x |
res <- rbind(res, res2) |
| 56 |
} |
|
| 57 |
} |
|
| 58 |
} else {
|
|
| 59 | ! |
stop("request failed: ", rawToChar(req$content), call. = FALSE)
|
| 60 |
} |
|
| 61 | 1x |
res |
| 62 |
} |
| 1 |
#' Download Company Information |
|
| 2 |
#' |
|
| 3 |
#' Retrieve information about companies. |
|
| 4 |
#' |
|
| 5 |
#' @param ciks A vector of Central Index Keys, or a \code{data.frame} with a \code{ciks}
|
|
| 6 |
#' column. |
|
| 7 |
#' @param type Type of information to collect, between \code{submissions}, which includes things
|
|
| 8 |
#' like Standard Industrial Classification, ticker, and previous names, as well as information |
|
| 9 |
#' about recent filings, and \code{facts}, including financial
|
|
| 10 |
#' @param out Path to a directory in which to save filings. Each filing will be saved |
|
| 11 |
#' within a subdirectory: \code{{out}/{cik}/{SEC Accession Number}}.
|
|
| 12 |
#' @param load Logical; if \code{TRUE}, will read in the downloaded info file.
|
|
| 13 |
#' @param overwrite Logical; if \code{TRUE}, download the info file even if it already exists.
|
|
| 14 |
#' @param user User agent string (e.g., \code{"Company Name AdminContact@example.com"});
|
|
| 15 |
#' defaults to the \code{SEC_USER} environment variable, which can be set with
|
|
| 16 |
#' \code{\link{Sys.setenv}} (e.g., \code{Sys.setenv(SEC_USER = "...")}).
|
|
| 17 |
#' @param verbose Logical; if \code{FALSE}, will not show status updates.
|
|
| 18 |
#' @seealso Search for filings with \code{\link{sec_search}}.
|
|
| 19 |
#' @examples |
|
| 20 |
#' \dontrun{
|
|
| 21 |
#' |
|
| 22 |
#' # retrieve information about Pepsico |
|
| 23 |
#' pepsi <- sec_companies("0000077476")
|
|
| 24 |
#' pepsi$`0000077476`[1:10] |
|
| 25 |
#' |
|
| 26 |
#' # retrieve financial information about Pepsico |
|
| 27 |
#' pepsi_facts <- sec_companies("0000077476", "facts")
|
|
| 28 |
#' pepsi_facts$`0000077476`$facts$`us-gaap`$Revenues |
|
| 29 |
#' } |
|
| 30 |
#' @returns An invisible list with named entries for each successful CIK if \code{load} is \code{TRUE},
|
|
| 31 |
#' or a vector of HTTP status codes otherwise. |
|
| 32 |
#' @export |
|
| 33 | ||
| 34 |
sec_companies <- function(ciks, type = "submissions", out = tempdir(), load = TRUE, overwrite = FALSE, |
|
| 35 |
user = Sys.getenv("SEC_USER"), verbose = TRUE) {
|
|
| 36 | ! |
if (is.data.frame(ciks)) ciks <- ciks$ciks |
| 37 | 2x |
ciks <- formatC(unique(ciks), width = 10, flag = 0) |
| 38 | 2x |
dir.create(out, FALSE, TRUE) |
| 39 | 2x |
out <- paste0(normalizePath(out, "/"), "/") |
| 40 | 2x |
type <- if (grepl("^s", tolower(type))) "submissions" else "facts"
|
| 41 | 2x |
base_url <- paste0( |
| 42 | 2x |
"https://data.sec.gov/", |
| 43 | 2x |
if (type == "submissions") "submissions" else "api/xbrl/companyfacts", |
| 44 | 2x |
"/CIK" |
| 45 |
) |
|
| 46 | 2x |
user_agent <- httr::user_agent(user) |
| 47 | 2x |
n <- length(ciks) |
| 48 | 2x |
reqs <- structure(rep(200, n), names = ciks) |
| 49 | 2x |
res <- list() |
| 50 | 2x |
if (missing(verbose) && n == 1) verbose <- FALSE |
| 51 | 2x |
if (verbose) {
|
| 52 | ! |
cli_progress_step( |
| 53 | ! |
"0 / {n} downloaded",
|
| 54 | ! |
msg_done = "finished; {sum(reqs == 200)} of {n} successful", spinner = TRUE
|
| 55 |
) |
|
| 56 |
} |
|
| 57 | 2x |
for (i in seq_len(n)) {
|
| 58 | 2x |
dir <- paste0(out, ciks[[i]], "/") |
| 59 | 2x |
dir.create(dir, FALSE, TRUE) |
| 60 | 2x |
file <- paste0(dir, type, ".json") |
| 61 | 2x |
if (overwrite || !file.exists(file)) {
|
| 62 | 2x |
req <- httr::GET(paste0(base_url, ciks[[i]], ".json"), httr::write_disk(file), user_agent) |
| 63 | ! |
if (req$status_code != 200) unlink(file) |
| 64 | 2x |
Sys.sleep(max(.001, .1 - req$times[["total"]])) |
| 65 | 2x |
reqs[i] <- req$status_code |
| 66 |
} |
|
| 67 | 2x |
if (load && file.exists(file)) {
|
| 68 | 2x |
res[[ciks[[i]]]] <- jsonlite::read_json(file, simplifyVector = TRUE) |
| 69 |
} |
|
| 70 | ! |
if (verbose) cli_progress_update() |
| 71 |
} |
|
| 72 | 2x |
invisible(if (load) res else reqs) |
| 73 |
} |
| 1 |
#' Search for Filings |
|
| 2 |
#' |
|
| 3 |
#' Search for forms by keywords, Central Index Keys, and/or Standard Industrial Classification |
|
| 4 |
#' codes, filed after 2001. |
|
| 5 |
#' |
|
| 6 |
#' @param query A character with words or phrases to search for within the form |
|
| 7 |
#' (e.g., \code{grape "fruit juice"}).
|
|
| 8 |
#' @param entity A character with a name, ticker, or Central Index Key. |
|
| 9 |
#' @param CIKs A vector of Central Index Keys (e.g., \code{c(0000317540, 0000077476)}).
|
|
| 10 |
#' @param SICs A vector of Standard Industrial Classification codes (e.g., \code{2080}).
|
|
| 11 |
#' @param forms A vector of forms to include (e.g., \code{"10-K"}).
|
|
| 12 |
#' @param date_to Latest date to include results from (e.g., \code{"2019-01-01"}); defaults to current.
|
|
| 13 |
#' @param date_from Earliest date to include results from; defaults to the earliest available: |
|
| 14 |
#' \code{"2001-01-01"}.
|
|
| 15 |
#' @param outFile Path of a file to write results to. |
|
| 16 |
#' @param overwrite Logical; if \code{FALSE}, will read in an existing results file rather than
|
|
| 17 |
#' executing the search. |
|
| 18 |
#' @param start Number of results to skip. |
|
| 19 |
#' @param limit Maximum number of results to return, in steps of 100. |
|
| 20 |
#' @param verbose Logical; if \code{FALSE}, will no display status messages.
|
|
| 21 |
#' @seealso To retrieve a simpler list of companies, see \code{\link{sec_search_companies}}.
|
|
| 22 |
#' @examples |
|
| 23 |
#' \dontrun{
|
|
| 24 |
#' |
|
| 25 |
#' # search for 10-K filings by beverage companies |
|
| 26 |
#' results <- sec_search(SICs = 2080, forms = "10-K", limit = 100) |
|
| 27 |
#' } |
|
| 28 |
#' @returns A \code{data.frame} of results.
|
|
| 29 |
#' @export |
|
| 30 | ||
| 31 |
sec_search <- function(query = NULL, entity = NULL, CIKs = NULL, SICs = NULL, forms = NULL, date_to = NULL, |
|
| 32 |
date_from = "2001-01-01", outFile = NULL, overwrite = TRUE, start = 0, limit = Inf, |
|
| 33 |
verbose = TRUE) {
|
|
| 34 | 2x |
if (!overwrite && !is.null(outFile) && file.exists(outFile)) {
|
| 35 | ! |
return(read.csv(outFile)) |
| 36 |
} |
|
| 37 | 2x |
if (!is.null(query) && !is.character(query)) {
|
| 38 | ! |
cli_abort("query must be a character; use other arguments to enter numeric CIKs or SICs")
|
| 39 |
} |
|
| 40 | 2x |
body <- Filter(length, list( |
| 41 | 2x |
dateRange = "custom", |
| 42 | 2x |
q = query, |
| 43 | 2x |
entityName = as.character(entity), |
| 44 | 2x |
ciks = as.list(formatC(CIKs, width = 10, flag = 0)), |
| 45 | 2x |
sics = as.list(as.character(SICs)), |
| 46 | 2x |
startdt = date_to, |
| 47 | 2x |
enddt = date_from, |
| 48 | 2x |
forms = as.list(toupper(forms)), |
| 49 | 2x |
from = start |
| 50 |
)) |
|
| 51 | 2x |
req <- httr::POST( |
| 52 | 2x |
"https://efts.sec.gov/LATEST/search-index", |
| 53 | 2x |
body = body, encode = "json" |
| 54 |
) |
|
| 55 | 2x |
req$status_code |
| 56 | 2x |
res <- jsonlite::fromJSON(rawToChar(req$content)) |
| 57 | ! |
if (!is.null(res$errorMessage)) cli_abort("request failed\n message: {res$errorType} {res$errorMessage}")
|
| 58 | ! |
if (res$hits$total$value == 0) cli_abort("no results found")
|
| 59 | 2x |
if (verbose) {
|
| 60 | 2x |
cli_alert_info(paste0( |
| 61 | 2x |
"found {res$hits$total$value} records", if (res$hits$total$value > limit) "; returning {limit}"
|
| 62 |
)) |
|
| 63 |
} |
|
| 64 | 2x |
extract_hits <- function(res) {
|
| 65 | 7x |
d <- res$hits$hits |
| 66 | 7x |
if (nrow(d)) {
|
| 67 | 7x |
d[["_source"]]$id <- d[["_id"]] |
| 68 | 7x |
d <- d[["_source"]] |
| 69 | 7x |
d$items <- NULL |
| 70 | 7x |
do.call(rbind, lapply(seq_len(nrow(d)), function(r) {
|
| 71 | 610x |
if (length(d[r, 1][[1]]) == 1) {
|
| 72 | 609x |
do.call(cbind, lapply(d[r, ], as.character)) |
| 73 |
} else {
|
|
| 74 | 1x |
row <- lapply(d[r, ], function(x) lapply(x, function(v) if (length(v)) v else "")[[1]]) |
| 75 | 1x |
if (length(row$biz_states) != length(row$biz_locations)) {
|
| 76 | 1x |
row$biz_states <- sub("^.*, ", "", row$biz_locations)
|
| 77 |
} |
|
| 78 | 1x |
do.call(cbind, row) |
| 79 |
} |
|
| 80 |
})) |
|
| 81 |
} |
|
| 82 |
} |
|
| 83 | 2x |
hits <- extract_hits(res) |
| 84 | 2x |
limit <- min(limit, res$hits$total$value) |
| 85 | 2x |
if (limit > nrow(hits)) {
|
| 86 | 1x |
while (nrow(hits) < limit) {
|
| 87 | 5x |
body$from <- nrow(hits) |
| 88 | 5x |
req <- httr::POST( |
| 89 | 5x |
"https://efts.sec.gov/LATEST/search-index", |
| 90 | 5x |
body = body, encode = "json" |
| 91 |
) |
|
| 92 | 5x |
if (req$status_code != 200) {
|
| 93 | ! |
if (verbose) cli_alert_warning("page {nrow(hits) / 100 + 1} failed: {rawToChar(req$content)}")
|
| 94 | ! |
break |
| 95 |
} |
|
| 96 | 5x |
res <- jsonlite::fromJSON(rawToChar(req$content)) |
| 97 | ! |
if (!nrow(res$hits$hits)) break |
| 98 | 5x |
hits <- rbind(hits, extract_hits(res)) |
| 99 |
} |
|
| 100 |
} |
|
| 101 | 2x |
hits[hits == "character(0)"] <- "" |
| 102 | 2x |
hits <- as.data.frame(hits) |
| 103 | ! |
if (!is.null(outFile)) write.csv(hits, outFile, row.names = FALSE) |
| 104 | 2x |
hits |
| 105 |
} |
| 1 |
#' Download Filings |
|
| 2 |
#' |
|
| 3 |
#' Download SEC filings based on Central Index Key (CIK) and document ID (as returned |
|
| 4 |
#' from \code{\link{sec_search}}.
|
|
| 5 |
#' |
|
| 6 |
#' @param ciks A vector of Central Index Keys, or a \code{data.frame} with a \code{ciks}
|
|
| 7 |
#' column. |
|
| 8 |
#' @param out Path to a directory in which to save filings. Each filing will be saved |
|
| 9 |
#' within a subdirectory: \code{{out}/{cik}/{SEC Accession Number}}.
|
|
| 10 |
#' @param ids A vector of document IDs. These are combinations of SEC Accession Numbers |
|
| 11 |
#' and file names, separated by a colon (e.g., \code{"0001564590-22-032043:gs-424b2.htm"}).
|
|
| 12 |
#' If the file name is not included, this will default to the complete submission text file |
|
| 13 |
#' (e.g., \code{"0001564590-22-032043.txt"}).
|
|
| 14 |
#' @param complete Logical; if \code{TRUE}, will download the complete submission text file
|
|
| 15 |
#' rather than another files specified after a colon in \code{ids}.
|
|
| 16 |
#' @param user User agent string (e.g., \code{"Company Name AdminContact@example.com"});
|
|
| 17 |
#' defaults to the \code{SEC_USER} environment variable, which can be set with
|
|
| 18 |
#' \code{\link{Sys.setenv}} (e.g., \code{Sys.setenv(SEC_USER = "...")}).
|
|
| 19 |
#' @param verbose Logical; if \code{FALSE}, will not show status updates.
|
|
| 20 |
#' @seealso Search for filings with \code{\link{sec_search}}.
|
|
| 21 |
#' @examples |
|
| 22 |
#' \dontrun{
|
|
| 23 |
#' |
|
| 24 |
#' # search for filings based on SIC |
|
| 25 |
#' forms <- sec_search(SICs = 2080, forms = "10-K", limit = 100) |
|
| 26 |
#' |
|
| 27 |
#' # download some of those filings |
|
| 28 |
#' sec_download(forms[1:3, ], out = tempdir()) |
|
| 29 |
#' } |
|
| 30 |
#' @returns An invisible vector of HTTP status codes. |
|
| 31 |
#' @export |
|
| 32 | ||
| 33 |
sec_download <- function(ciks, out, ids = NULL, complete = FALSE, user = Sys.getenv("SEC_USER"), verbose = TRUE) {
|
|
| 34 | 1x |
if (is.data.frame(ciks)) {
|
| 35 | 1x |
if (is.null(ids)) ids <- ciks$id |
| 36 | 1x |
ciks <- ciks$ciks |
| 37 |
} |
|
| 38 | 1x |
ciks <- formatC(unique(ciks), width = 10, flag = 0) |
| 39 | ! |
if (missing(out)) cli_abort("{.arg out} must be specified")
|
| 40 | ! |
if (is.null(ids)) cli_abort("{.arg ids} must be specified if {.arg ciks} is not a data.frame containing it")
|
| 41 | 1x |
dir.create(out, FALSE, TRUE) |
| 42 | 1x |
out <- paste0(normalizePath(out, "/"), "/") |
| 43 | 1x |
base_url <- "https://www.sec.gov/Archives/edgar/data/" |
| 44 | 1x |
user_agent <- httr::user_agent(user) |
| 45 | 1x |
n <- length(ciks) |
| 46 | 1x |
reqs <- structure(rep(200, n), names = ciks) |
| 47 | 1x |
if (verbose) {
|
| 48 | 1x |
cli_progress_step( |
| 49 | 1x |
"0 / {n} downloaded",
|
| 50 | 1x |
msg_done = "finished; {sum(reqs == 200)} of {n} successful", spinner = TRUE
|
| 51 |
) |
|
| 52 |
} |
|
| 53 | 1x |
for (i in seq_len(n)) {
|
| 54 | 3x |
id <- strsplit(ids[[i]], ":", fixed = TRUE)[[1]] |
| 55 | 3x |
if (complete || length(id) < 2) {
|
| 56 | 3x |
id[2] <- id[1] |
| 57 | 3x |
if (!grepl("-", id[2])) {
|
| 58 | ! |
id[2] <- paste0( |
| 59 | ! |
substr(id[2], 1, 10), "-", substr(id[2], 11, 12), "-", substr(id[2], 12, 18) |
| 60 |
) |
|
| 61 |
} |
|
| 62 | 3x |
id[2] <- paste0(id[2], ".txt") |
| 63 |
} |
|
| 64 | 3x |
dir <- paste0(out, ciks[[i]], "/", id[1], "/") |
| 65 | 3x |
dir.create(dir, FALSE, TRUE) |
| 66 | 3x |
file <- paste0(dir, id[2]) |
| 67 | 3x |
if (!file.exists(file)) {
|
| 68 | 3x |
req <- httr::GET(paste0( |
| 69 | 3x |
base_url, ciks[[i]], "/", gsub("-", "", id[1], fixed = TRUE), "/", id[2]
|
| 70 | 3x |
), httr::write_disk(file), user_agent) |
| 71 | ! |
if (req$status_code != 200) unlink(file) |
| 72 | 3x |
Sys.sleep(max(.001, .1 - req$times[["total"]])) |
| 73 | 3x |
reqs[i] <- req$status_code |
| 74 |
} |
|
| 75 | 3x |
if (verbose) cli_progress_update() |
| 76 |
} |
|
| 77 | 1x |
invisible(reqs) |
| 78 |
} |
| 1 |
.onLoad <- function(lib, pkg) {
|
|
| 2 | ! |
if (Sys.getenv("SEC_USER") == "") {
|
| 3 | ! |
Sys.setenv(SEC_USER = paste( |
| 4 | ! |
"Name", Sys.Date(), "contact@example.com" |
| 5 |
)) |
|
| 6 |
} |
|
| 7 |
} |
|
| 8 | ||
| 9 |
#' @importFrom httr GET user_agent write_disk POST |
|
| 10 |
#' @importFrom jsonlite fromJSON |
|
| 11 |
#' @importFrom cli cli_abort cli_progress_step cli_progress_update cli_alert_info cli_alert_warning |
|
| 12 |
#' @importFrom utils read.csv write.csv |
|
| 13 |
#' @keywords internal |
|
| 14 |
"_PACKAGE" |
|
| 15 | ||
| 16 |
## usethis namespace: start |
|
| 17 |
## usethis namespace: end |
|
| 18 |
NULL |