Find All RX Events

This file was compiled on 2022-03-30 13:56:38 by jsimmeri on argon-lc-f14-25.hpc.

library(tidyverse)
library(parallel)
library(lubridate)

Find NDC Numbers

We want to find all the RX events for

alfuzosin (az)
doxazosin (dz)
terazosin (tz)
tamsulosin (tamsulosin)
dutasteride (5ari)
finasteride (5ari)

The Truven RX claims data have medications identified by the national drug code number (NDC). We use the Redbook data from 2015 to find the relevant NDC codes for these 6 medications. We accept any medication that matches our medication name on the generic name of at least one of the active ingredients.

redbook <- read_csv("/Shared/Statepi_Marketscan/databases/Truven/redbook.csv") %>%
  select(NDCNUM, GENNME)

We use the groups tz/dz/az, tamsulosin, 5ari, statin for the medication based on the class. stringr::str_detect() is used to do regular expressions matching on the generic name.

ndc_numbers <- bind_rows(
  redbook %>%
    filter(stringr::str_detect(tolower(GENNME), "alfuzosin")) %>%
    mutate(drug = "tz/dz/az"),
  redbook %>%
    filter(stringr::str_detect(tolower(GENNME), "doxazosin")) %>%
    mutate(drug = "tz/dz/az"),
  redbook %>%
    filter(stringr::str_detect(tolower(GENNME), "terazosin")) %>%
    mutate(drug = "tz/dz/az"),
  redbook %>%
    filter(stringr::str_detect(tolower(GENNME), "tamsulosin")) %>%
    mutate(drug = "tamsulosin"),
  redbook %>%
    filter(stringr::str_detect(tolower(GENNME), "dutasteride")) %>%
    mutate(drug = "5ari"),
  redbook %>%
    filter(stringr::str_detect(tolower(GENNME), "finasteride")) %>%
    mutate(drug = "5ari")
) %>%
  select(drug, NDCNUM)

Find Dispensing Events

Configuring Cluster and Extraction Function

Now that we have the relevant NDC numbers from the Redbook for the six study drugs, we next want to filter all the RX claims for claims featuring the study drugs.

We are going to want to do this in parallel to speed up the processing time. To do this, we set up find_rx_events() to take the source and year values as the first and second elements of a vector.

find_rx_events <- function(args, required_ndc) {
  source <- args[1]
  year <- args[2]
  db <- src_sqlite(glue::glue("/Shared/Statepi_Marketscan/databases/Truven/truven_{year}.db"))
  enrolid <- tbl(db, glue::glue("rx_core_{source}_{year}")) %>%
    filter(ndcnum %in% local(required_ndc$NDCNUM)) %>%
    select(enrolid, ndcnum, svcdate, daysupp) %>%
    mutate(enrolid = as.character(enrolid)) %>%
    collect()
  return(enrolid)
}

We then make a list of those two element vectors for all the source and year combinations of interest.

args_list <- list(
  c("ccae", "01"), c("ccae", "02"), c("ccae", "03"), c("ccae", "04"),
  c("ccae", "05"), c("ccae", "06"), c("ccae", "07"), c("ccae", "08"),
  c("ccae", "09"), c("ccae", "10"), c("ccae", "11"), c("ccae", "12"),
  c("ccae", "13"), c("ccae", "14"), c("ccae", "15"), c("ccae", "16"),
  c("ccae", "17"),
  c("mdcr", "01"), c("mdcr", "02"), c("mdcr", "03"), c("mdcr", "04"),
  c("mdcr", "05"), c("mdcr", "06"), c("mdcr", "07"), c("mdcr", "08"),
  c("mdcr", "09"), c("mdcr", "10"), c("mdcr", "11"), c("mdcr", "12"),
  c("mdcr", "13"), c("mdcr", "14"), c("mdcr", "15"), c("mdcr", "16"),
  c("mdcr", "17")
)

Initialize the cluster, load tidyverse, and export ndc_numbers to the workers:

cluster <- makeCluster(length(args_list))
clusterEvalQ(cluster, library(tidyverse))

[[1]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[2]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[3]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[4]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[5]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[6]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[7]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[8]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[9]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[10]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[11]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[12]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[13]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[14]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[15]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[16]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[17]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[18]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[19]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[20]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[21]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[22]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[23]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[24]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[25]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[26]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[27]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[28]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[29]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[30]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[31]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[32]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[33]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[34]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"

clusterExport(cluster, c("ndc_numbers"))

Extract the Data

Using this cluster, we then apply the find_rx_events() function to args_list

rx_events <- parLapply(cluster,
                       args_list,
                       find_rx_events,
                       required_ndc = ndc_numbers)

Once complete, kill the cluster to release the resources.

stopCluster(cluster)

We then want to bind the elements of the rx_events list to a tibble.

rx_events <- bind_rows(rx_events)

And add the medication group label (tz/dz/az, tamsulosin, or 5ari):

rx_events <- rx_events %>%
  inner_join(ndc_numbers,
             by = c("ndcnum" = "NDCNUM"))

Data Processing

Most of our future tasks are based around knowing the ever users and loading all of the rx_events would be longer and more costly than required. So we compute the ever users here to speed that up.

ever_users <- rx_events %>%
  group_by(enrolid, drug) %>%
  summarize(first_rx_date = min(svcdate))

Data Save

We save the tibble of all the RX dispensing events and the ever user table of start dates for use in later scripts.

write_rds(rx_events, "/Shared/lss_jsimmeri_backup/data/tz-5ari-final/treated_rx_events.rds")
write_rds(ever_users, "/Shared/lss_jsimmeri_backup/data/tz-5ari-final/ever_users.rds")

Session Information

sessionInfo()

R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04 LTS

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets 
[7] methods   base     

other attached packages:
 [1] lubridate_1.7.9.2 forcats_0.5.1     stringr_1.4.0    
 [4] dplyr_1.0.4       purrr_0.3.4       readr_1.4.0      
 [7] tidyr_1.1.2       tibble_3.0.6      ggplot2_3.3.3    
[10] tidyverse_1.3.0  

loaded via a namespace (and not attached):
 [1] tidyselect_1.1.0  xfun_0.21         haven_2.3.1      
 [4] colorspace_2.0-0  vctrs_0.3.6       generics_0.1.0   
 [7] htmltools_0.5.1.1 yaml_2.2.1        rlang_0.4.10     
[10] pillar_1.4.7      withr_2.4.1       glue_1.4.2       
[13] DBI_1.1.1         dbplyr_2.1.0      modelr_0.1.8     
[16] readxl_1.3.1      lifecycle_1.0.0   munsell_0.5.0    
[19] gtable_0.3.0      cellranger_1.1.0  rvest_0.3.6      
[22] evaluate_0.14     knitr_1.31        ps_1.5.0         
[25] fansi_0.4.2       broom_0.7.4       Rcpp_1.0.6       
[28] backports_1.2.1   scales_1.1.1      jsonlite_1.7.2   
[31] fs_1.5.0          distill_1.2       hms_1.0.0        
[34] digest_0.6.27     stringi_1.5.3     grid_4.0.4       
[37] cli_2.3.0         tools_4.0.4       magrittr_2.0.1   
[40] crayon_1.4.1      pkgconfig_2.0.3   downlit_0.2.1    
[43] ellipsis_0.3.1    xml2_1.3.2        reprex_1.0.0     
[46] assertthat_0.2.1  rmarkdown_2.6     httr_1.4.2       
[49] rstudioapi_0.13   R6_2.5.0          compiler_4.0.4