Find First PD Date for All Truven Enrollees

Find the first date with a diagnosis of PD or a dispensing of levodopa across all enrollees in Truven.

Jacob Simmering, PhD https://jacobsimmering.com (University of Iowa)https://uiowa.edu
2022-03-30

This file was compiled on 2022-03-30 15:31:48 by jsimmeri on argon-lc-f14-25.hpc.

We define the index date of PD as either

  1. A diagnosis of PD with ICD-9-CM 332.0 or ICD-10-CM G20
  2. A claim for a levodopa prescription

We take whichever event occurs first as the index date.

To do this extraction, we build a function to find PD diagnosis events in the outpatient data:

find_outpatient_dx <- function(source, year) {
  db <- DBI::dbConnect(RSQLite::SQLite(),
                       glue::glue("/Shared/Statepi_Marketscan/databases/Truven/truven_{year}.db"))
  if (as.numeric(year) <= 14) {
    events <- tbl(db, glue::glue("outpatient_dx_{source}_{year}")) %>%
      filter(dx == "3320") %>%
      select(enrolid, svcdate) %>%
      mutate(enrolid = as.character(enrolid)) %>%
      collect() %>%
      distinct()
  } else {
    events9 <- tbl(db, glue::glue("outpatient_dx9_{source}_{year}")) %>%
      filter(dx == "3320") %>%
      select(enrolid, svcdate) %>%
      mutate(enrolid = as.character(enrolid)) %>%
      collect() %>%
      distinct()
    events10 <- tbl(db, glue::glue("outpatient_dx10_{source}_{year}")) %>%
      filter(dx == "G20") %>%
      select(enrolid, svcdate) %>%
      mutate(enrolid = as.character(enrolid)) %>%
      collect() %>%
      distinct()
    events <- rbind(events9, events10)
  }

  events <- events %>%
    select(enrolid, date = svcdate)
  DBI::dbDisconnect(db)
  return(events)
}

Inpatient data:

find_inpatient_dx <- function(source, year) {
  db <- DBI::dbConnect(RSQLite::SQLite(),
                       glue::glue("/Shared/Statepi_Marketscan/databases/Truven/truven_{year}.db"))
  if (as.numeric(year) <= 14) {
    events <- tbl(db, glue::glue("inpatient_dx_{source}_{year}")) %>%
      filter(dx == "3320") %>%
      select(caseid) %>%
      collect()
  } else {
    events9 <- tbl(db, glue::glue("inpatient_dx9_{source}_{year}")) %>%
      filter(dx == "3320") %>%
      select(caseid) %>%
      collect()
    events10 <- tbl(db, glue::glue("inpatient_dx10_{source}_{year}")) %>%
      filter(dx == "G20") %>%
      select(caseid) %>%
      collect()
    events <- rbind(events9, events10)
  }

  events <- tbl(db, glue::glue("inpatient_core_{source}_{year}")) %>%
    filter(caseid %in% local(events$caseid)) %>%
    select(enrolid, admdate) %>%
    mutate(enrolid = as.character(enrolid)) %>%
    collect() %>%
    select(enrolid, date = admdate)

  DBI::dbDisconnect(db)
  return(events)
}

And the RX dispensing claims

find_rx_events <- function(source, year) {
  levodopa <- read_csv("/Shared/Statepi_Marketscan/databases/Truven/redbook.csv") %>%
    filter(stringr::str_detect(tolower(GENNME), "levodopa")) %>%
    select(ndcnum = NDCNUM)

  db <- DBI::dbConnect(RSQLite::SQLite(),
                       glue::glue("/Shared/Statepi_Marketscan/databases/Truven/truven_{year}.db"))
  events <- tbl(db, glue::glue("rx_core_{source}_{year}")) %>%
    filter(ndcnum %in% local(levodopa$ndcnum)) %>%
    select(enrolid, svcdate) %>%
    mutate(enrolid = as.character(enrolid)) %>%
    collect()

  events <- events %>%
    select(enrolid, date = svcdate)

  DBI::dbDisconnect(db)
  return(events)
}

We are going to apply this using parLapply() for speed and so we want a wrapper that takes a vector of arguments.

find_pd_events <- function(args) {
  source <- args[[1]]
  year <- args[[2]]
  table <- args[[3]]

  if (table == "inpatient") {
    events <- find_inpatient_dx(source, year)
  } else if (table == "outpatient") {
    events <- find_outpatient_dx(source, year)
  } else if (table == "rx") {
    events <- find_rx_events(source, year)
  }
  return(events)
}

We then build the list of vectors. Each element of the list is a length 3 character vector with the first element being the source, second year and third table where table is one of outpatient, inpatient, or rx.

args_list <- vector("list", length = 2 * 17 * 3)
i <- 1
for (source in c("ccae", "mdcr")) {
  for (year in stringr::str_pad(1:17, width = 2, pad = "0")) {
    for (table in c("inpatient", "outpatient", "rx")) {
      args_list[[i]] <- c(source, year, table)
      i <- i + 1
    }
  }
}

We then initalize the cluster

cluster <- makeCluster(56)
clusterEvalQ(cluster, library(tidyverse))
[[1]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[2]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[3]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[4]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[5]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[6]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[7]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[8]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[9]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[10]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[11]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[12]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[13]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[14]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[15]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[16]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[17]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[18]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[19]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[20]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[21]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[22]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[23]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[24]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[25]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[26]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[27]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[28]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[29]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[30]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[31]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[32]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[33]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[34]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[35]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[36]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[37]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[38]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[39]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[40]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[41]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[42]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[43]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[44]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[45]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[46]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[47]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[48]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[49]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[50]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[51]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[52]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[53]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[54]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[55]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     

[[56]]
 [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
 [6] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
[11] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
[16] "base"     
clusterExport(cluster, c("find_inpatient_dx", "find_outpatient_dx",
                         "find_rx_events"))

And then apply find_pd_events() to args_list using the cluster cluster:

pd_events <- parLapply(cluster,
                       args_list,
                       find_pd_events)

We are done with the cluster and we release those resources:

stopCluster(cluster)

We stack all the claims for PD diagnosis or levodopa and, for each enrolid, take the first occurence as the value of pd_date.

first_pd_date <- pd_events %>%
  bind_rows() %>%
  group_by(enrolid) %>%
  summarize(pd_date = min(date))

Which we then save for later use.

write_rds(first_pd_date,
           "/Shared/lss_jsimmeri_backup/data/tz-5ari-final/first_pd_date.rds")

Session info is below.

R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04 LTS

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets 
[7] methods   base     

other attached packages:
[1] forcats_0.5.1   stringr_1.4.0   dplyr_1.0.4     purrr_0.3.4    
[5] readr_1.4.0     tidyr_1.1.2     tibble_3.0.6    ggplot2_3.3.3  
[9] tidyverse_1.3.0

loaded via a namespace (and not attached):
 [1] tidyselect_1.1.0  xfun_0.21         haven_2.3.1      
 [4] colorspace_2.0-0  vctrs_0.3.6       generics_0.1.0   
 [7] htmltools_0.5.1.1 yaml_2.2.1        rlang_0.4.10     
[10] pillar_1.4.7      withr_2.4.1       glue_1.4.2       
[13] DBI_1.1.1         dbplyr_2.1.0      modelr_0.1.8     
[16] readxl_1.3.1      lifecycle_1.0.0   munsell_0.5.0    
[19] gtable_0.3.0      cellranger_1.1.0  rvest_0.3.6      
[22] evaluate_0.14     knitr_1.31        ps_1.5.0         
[25] fansi_0.4.2       broom_0.7.4       Rcpp_1.0.6       
[28] backports_1.2.1   scales_1.1.1      jsonlite_1.7.2   
[31] fs_1.5.0          distill_1.2       hms_1.0.0        
[34] digest_0.6.27     stringi_1.5.3     grid_4.0.4       
[37] cli_2.3.0         tools_4.0.4       magrittr_2.0.1   
[40] crayon_1.4.1      pkgconfig_2.0.3   downlit_0.2.1    
[43] ellipsis_0.3.1    xml2_1.3.2        reprex_1.0.0     
[46] lubridate_1.7.9.2 assertthat_0.2.1  rmarkdown_2.6    
[49] httr_1.4.2        rstudioapi_0.13   R6_2.5.0         
[52] compiler_4.0.4