We want to find all prescribing events for our drugs of interest and identify ever users for the construction of our cohorts.
This file was compiled on 2022-03-30 13:56:38 by jsimmeri
on argon-lc-f14-25.hpc.
We want to find all the RX events for
The Truven RX claims data have medications identified by the national drug code number (NDC). We use the Redbook data from 2015 to find the relevant NDC codes for these 6 medications. We accept any medication that matches our medication name on the generic name of at least one of the active ingredients.
redbook <- read_csv("/Shared/Statepi_Marketscan/databases/Truven/redbook.csv") %>%
select(NDCNUM, GENNME)
We use the groups tz/dz/az
, tamsulosin
, 5ari
, statin
for the medication based on the class. stringr::str_detect()
is used to do regular expressions matching on the generic name.
ndc_numbers <- bind_rows(
redbook %>%
filter(stringr::str_detect(tolower(GENNME), "alfuzosin")) %>%
mutate(drug = "tz/dz/az"),
redbook %>%
filter(stringr::str_detect(tolower(GENNME), "doxazosin")) %>%
mutate(drug = "tz/dz/az"),
redbook %>%
filter(stringr::str_detect(tolower(GENNME), "terazosin")) %>%
mutate(drug = "tz/dz/az"),
redbook %>%
filter(stringr::str_detect(tolower(GENNME), "tamsulosin")) %>%
mutate(drug = "tamsulosin"),
redbook %>%
filter(stringr::str_detect(tolower(GENNME), "dutasteride")) %>%
mutate(drug = "5ari"),
redbook %>%
filter(stringr::str_detect(tolower(GENNME), "finasteride")) %>%
mutate(drug = "5ari")
) %>%
select(drug, NDCNUM)
Now that we have the relevant NDC numbers from the Redbook for the six study drugs, we next want to filter all the RX claims for claims featuring the study drugs.
We are going to want to do this in parallel to speed up the processing time. To do this, we set up find_rx_events()
to take the source
and year
values as the first and second elements of a vector.
find_rx_events <- function(args, required_ndc) {
source <- args[1]
year <- args[2]
db <- src_sqlite(glue::glue("/Shared/Statepi_Marketscan/databases/Truven/truven_{year}.db"))
enrolid <- tbl(db, glue::glue("rx_core_{source}_{year}")) %>%
filter(ndcnum %in% local(required_ndc$NDCNUM)) %>%
select(enrolid, ndcnum, svcdate, daysupp) %>%
mutate(enrolid = as.character(enrolid)) %>%
collect()
return(enrolid)
}
We then make a list of those two element vectors for all the source
and year
combinations of interest.
args_list <- list(
c("ccae", "01"), c("ccae", "02"), c("ccae", "03"), c("ccae", "04"),
c("ccae", "05"), c("ccae", "06"), c("ccae", "07"), c("ccae", "08"),
c("ccae", "09"), c("ccae", "10"), c("ccae", "11"), c("ccae", "12"),
c("ccae", "13"), c("ccae", "14"), c("ccae", "15"), c("ccae", "16"),
c("ccae", "17"),
c("mdcr", "01"), c("mdcr", "02"), c("mdcr", "03"), c("mdcr", "04"),
c("mdcr", "05"), c("mdcr", "06"), c("mdcr", "07"), c("mdcr", "08"),
c("mdcr", "09"), c("mdcr", "10"), c("mdcr", "11"), c("mdcr", "12"),
c("mdcr", "13"), c("mdcr", "14"), c("mdcr", "15"), c("mdcr", "16"),
c("mdcr", "17")
)
Initialize the cluster, load tidyverse
, and export ndc_numbers
to the workers:
cluster <- makeCluster(length(args_list))
clusterEvalQ(cluster, library(tidyverse))
[[1]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[2]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[3]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[4]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[5]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[6]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[7]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[8]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[9]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[10]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[11]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[12]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[13]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[14]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[15]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[16]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[17]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[18]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[19]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[20]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[21]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[22]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[23]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[24]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[25]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[26]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[27]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[28]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[29]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[30]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[31]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[32]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[33]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[34]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
clusterExport(cluster, c("ndc_numbers"))
Using this cluster, we then apply the find_rx_events()
function to args_list
rx_events <- parLapply(cluster,
args_list,
find_rx_events,
required_ndc = ndc_numbers)
Once complete, kill the cluster to release the resources.
stopCluster(cluster)
We then want to bind the elements of the rx_events
list to a tibble.
rx_events <- bind_rows(rx_events)
And add the medication group label (tz/dz/az, tamsulosin, or 5ari):
rx_events <- rx_events %>%
inner_join(ndc_numbers,
by = c("ndcnum" = "NDCNUM"))
Most of our future tasks are based around knowing the ever users and loading all of the rx_events
would be longer and more costly than required. So we compute the ever users here to speed that up.
ever_users <- rx_events %>%
group_by(enrolid, drug) %>%
summarize(first_rx_date = min(svcdate))
We save the tibble of all the RX dispensing events and the ever user table of start dates for use in later scripts.
write_rds(rx_events, "/Shared/lss_jsimmeri_backup/data/tz-5ari-final/treated_rx_events.rds")
write_rds(ever_users, "/Shared/lss_jsimmeri_backup/data/tz-5ari-final/ever_users.rds")
R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04 LTS
Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats graphics grDevices utils datasets
[7] methods base
other attached packages:
[1] lubridate_1.7.9.2 forcats_0.5.1 stringr_1.4.0
[4] dplyr_1.0.4 purrr_0.3.4 readr_1.4.0
[7] tidyr_1.1.2 tibble_3.0.6 ggplot2_3.3.3
[10] tidyverse_1.3.0
loaded via a namespace (and not attached):
[1] tidyselect_1.1.0 xfun_0.21 haven_2.3.1
[4] colorspace_2.0-0 vctrs_0.3.6 generics_0.1.0
[7] htmltools_0.5.1.1 yaml_2.2.1 rlang_0.4.10
[10] pillar_1.4.7 withr_2.4.1 glue_1.4.2
[13] DBI_1.1.1 dbplyr_2.1.0 modelr_0.1.8
[16] readxl_1.3.1 lifecycle_1.0.0 munsell_0.5.0
[19] gtable_0.3.0 cellranger_1.1.0 rvest_0.3.6
[22] evaluate_0.14 knitr_1.31 ps_1.5.0
[25] fansi_0.4.2 broom_0.7.4 Rcpp_1.0.6
[28] backports_1.2.1 scales_1.1.1 jsonlite_1.7.2
[31] fs_1.5.0 distill_1.2 hms_1.0.0
[34] digest_0.6.27 stringi_1.5.3 grid_4.0.4
[37] cli_2.3.0 tools_4.0.4 magrittr_2.0.1
[40] crayon_1.4.1 pkgconfig_2.0.3 downlit_0.2.1
[43] ellipsis_0.3.1 xml2_1.3.2 reprex_1.0.0
[46] assertthat_0.2.1 rmarkdown_2.6 httr_1.4.2
[49] rstudioapi_0.13 R6_2.5.0 compiler_4.0.4