Find the first date with a diagnosis of PD or a dispensing of levodopa across all enrollees in Truven.
This file was compiled on 2022-03-30 15:31:48 by jsimmeri
on argon-lc-f14-25.hpc.
We define the index date of PD as either
We take whichever event occurs first as the index date.
To do this extraction, we build a function to find PD diagnosis events in the outpatient data:
find_outpatient_dx <- function(source, year) {
db <- DBI::dbConnect(RSQLite::SQLite(),
glue::glue("/Shared/Statepi_Marketscan/databases/Truven/truven_{year}.db"))
if (as.numeric(year) <= 14) {
events <- tbl(db, glue::glue("outpatient_dx_{source}_{year}")) %>%
filter(dx == "3320") %>%
select(enrolid, svcdate) %>%
mutate(enrolid = as.character(enrolid)) %>%
collect() %>%
distinct()
} else {
events9 <- tbl(db, glue::glue("outpatient_dx9_{source}_{year}")) %>%
filter(dx == "3320") %>%
select(enrolid, svcdate) %>%
mutate(enrolid = as.character(enrolid)) %>%
collect() %>%
distinct()
events10 <- tbl(db, glue::glue("outpatient_dx10_{source}_{year}")) %>%
filter(dx == "G20") %>%
select(enrolid, svcdate) %>%
mutate(enrolid = as.character(enrolid)) %>%
collect() %>%
distinct()
events <- rbind(events9, events10)
}
events <- events %>%
select(enrolid, date = svcdate)
DBI::dbDisconnect(db)
return(events)
}
Inpatient data:
find_inpatient_dx <- function(source, year) {
db <- DBI::dbConnect(RSQLite::SQLite(),
glue::glue("/Shared/Statepi_Marketscan/databases/Truven/truven_{year}.db"))
if (as.numeric(year) <= 14) {
events <- tbl(db, glue::glue("inpatient_dx_{source}_{year}")) %>%
filter(dx == "3320") %>%
select(caseid) %>%
collect()
} else {
events9 <- tbl(db, glue::glue("inpatient_dx9_{source}_{year}")) %>%
filter(dx == "3320") %>%
select(caseid) %>%
collect()
events10 <- tbl(db, glue::glue("inpatient_dx10_{source}_{year}")) %>%
filter(dx == "G20") %>%
select(caseid) %>%
collect()
events <- rbind(events9, events10)
}
events <- tbl(db, glue::glue("inpatient_core_{source}_{year}")) %>%
filter(caseid %in% local(events$caseid)) %>%
select(enrolid, admdate) %>%
mutate(enrolid = as.character(enrolid)) %>%
collect() %>%
select(enrolid, date = admdate)
DBI::dbDisconnect(db)
return(events)
}
And the RX dispensing claims
find_rx_events <- function(source, year) {
levodopa <- read_csv("/Shared/Statepi_Marketscan/databases/Truven/redbook.csv") %>%
filter(stringr::str_detect(tolower(GENNME), "levodopa")) %>%
select(ndcnum = NDCNUM)
db <- DBI::dbConnect(RSQLite::SQLite(),
glue::glue("/Shared/Statepi_Marketscan/databases/Truven/truven_{year}.db"))
events <- tbl(db, glue::glue("rx_core_{source}_{year}")) %>%
filter(ndcnum %in% local(levodopa$ndcnum)) %>%
select(enrolid, svcdate) %>%
mutate(enrolid = as.character(enrolid)) %>%
collect()
events <- events %>%
select(enrolid, date = svcdate)
DBI::dbDisconnect(db)
return(events)
}
We are going to apply this using parLapply()
for speed and so we want a wrapper that takes a vector of arguments.
find_pd_events <- function(args) {
source <- args[[1]]
year <- args[[2]]
table <- args[[3]]
if (table == "inpatient") {
events <- find_inpatient_dx(source, year)
} else if (table == "outpatient") {
events <- find_outpatient_dx(source, year)
} else if (table == "rx") {
events <- find_rx_events(source, year)
}
return(events)
}
We then build the list of vectors. Each element of the list is a length 3 character vector with the first element being the source
, second year
and third table
where table
is one of outpatient
, inpatient
, or rx
.
We then initalize the cluster
cluster <- makeCluster(56)
clusterEvalQ(cluster, library(tidyverse))
[[1]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[2]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[3]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[4]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[5]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[6]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[7]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[8]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[9]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[10]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[11]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[12]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[13]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[14]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[15]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[16]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[17]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[18]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[19]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[20]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[21]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[22]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[23]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[24]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[25]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[26]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[27]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[28]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[29]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[30]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[31]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[32]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[33]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[34]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[35]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[36]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[37]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[38]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[39]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[40]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[41]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[42]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[43]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[44]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[45]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[46]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[47]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[48]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[49]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[50]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[51]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[52]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[53]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[54]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[55]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
[[56]]
[1] "forcats" "stringr" "dplyr" "purrr" "readr"
[6] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
[11] "graphics" "grDevices" "utils" "datasets" "methods"
[16] "base"
clusterExport(cluster, c("find_inpatient_dx", "find_outpatient_dx",
"find_rx_events"))
And then apply find_pd_events()
to args_list
using the cluster cluster
:
pd_events <- parLapply(cluster,
args_list,
find_pd_events)
We are done with the cluster and we release those resources:
stopCluster(cluster)
We stack all the claims for PD diagnosis or levodopa and, for each enrolid
, take the first occurence as the value of pd_date
.
first_pd_date <- pd_events %>%
bind_rows() %>%
group_by(enrolid) %>%
summarize(pd_date = min(date))
Which we then save for later use.
write_rds(first_pd_date,
"/Shared/lss_jsimmeri_backup/data/tz-5ari-final/first_pd_date.rds")
Session info is below.
R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04 LTS
Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats graphics grDevices utils datasets
[7] methods base
other attached packages:
[1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.4 purrr_0.3.4
[5] readr_1.4.0 tidyr_1.1.2 tibble_3.0.6 ggplot2_3.3.3
[9] tidyverse_1.3.0
loaded via a namespace (and not attached):
[1] tidyselect_1.1.0 xfun_0.21 haven_2.3.1
[4] colorspace_2.0-0 vctrs_0.3.6 generics_0.1.0
[7] htmltools_0.5.1.1 yaml_2.2.1 rlang_0.4.10
[10] pillar_1.4.7 withr_2.4.1 glue_1.4.2
[13] DBI_1.1.1 dbplyr_2.1.0 modelr_0.1.8
[16] readxl_1.3.1 lifecycle_1.0.0 munsell_0.5.0
[19] gtable_0.3.0 cellranger_1.1.0 rvest_0.3.6
[22] evaluate_0.14 knitr_1.31 ps_1.5.0
[25] fansi_0.4.2 broom_0.7.4 Rcpp_1.0.6
[28] backports_1.2.1 scales_1.1.1 jsonlite_1.7.2
[31] fs_1.5.0 distill_1.2 hms_1.0.0
[34] digest_0.6.27 stringi_1.5.3 grid_4.0.4
[37] cli_2.3.0 tools_4.0.4 magrittr_2.0.1
[40] crayon_1.4.1 pkgconfig_2.0.3 downlit_0.2.1
[43] ellipsis_0.3.1 xml2_1.3.2 reprex_1.0.0
[46] lubridate_1.7.9.2 assertthat_0.2.1 rmarkdown_2.6
[49] httr_1.4.2 rstudioapi_0.13 R6_2.5.0
[52] compiler_4.0.4