Develop Propensity Score Models

Estimate the propensity score model, export propensity scores for matching

Jacob Simmering, PhD https://jacobsimmering.com (University of Iowa)https://uiowa.edu
2022-04-27

This file was compiled on 2022-04-27 17:22:01 by jsimmeri on argon-itf-bx47-28.hpc.

library(tidyverse)
library(mgcv)
library(parallel)

Define Models

We are using a GAM to estimate the propensity scores. Our full model specification is below:

ps_full <- treatment ~ s(age) + 
  s(in_rate) + 
  s(out_rate) + s(mean_ndx) + s(out_dx_rate) +
  start_year +
  cm_elix_Alcohol + cm_elix_Anemia + cm_elix_BloodLoss +
  cm_elix_CHF + cm_elix_Coagulopathy + cm_elix_Depression +
  cm_elix_DM + cm_elix_DMcx + cm_elix_Drugs +
  cm_elix_FluidsLytes + cm_elix_HIV + cm_elix_HTN + cm_elix_HTNcx +
  cm_elix_Hypothyroid + cm_elix_Liver + cm_elix_Lymphoma +
  cm_elix_Mets + cm_elix_NeuroOther + cm_elix_Obesity +
  cm_elix_Paralysis + cm_elix_PHTN + cm_elix_Psychoses +
  cm_elix_PUD + cm_elix_Pulmonary + cm_elix_PVD +
  cm_elix_Renal + cm_elix_Rheumatic + cm_elix_Tumor +
  cm_elix_Valvular + cm_elix_WeightLoss +
  cm_bph + cm_slow_stream + cm_abnormal_psa +
  psa_measured + uroflow + cystometrogram +
  cm_orthostatic + cm_other_hypo +
  cm_anxiety + cm_ed

Load Data and Make Data Sets for Model Estimation

Next, we need to load the data that was intended to be used to estimate the PSMs.

treated <- read_rds("/Shared/lss_jsimmeri_backup/data/tz-5ari-final/treated_model_data.rds")

And build the comparison data sets:

tz_tam <- treated %>%
  filter(drug %in% c("tz/dz/az", "tamsulosin")) %>%
  mutate(treatment = drug == "tz/dz/az")
tz_5ari <- treated %>%
  filter(drug %in% c("tz/dz/az", "5ari")) %>%
  mutate(treatment = drug == "tz/dz/az")
tam_5ari <- treated %>%
  filter(drug %in% c("5ari", "tamsulosin")) %>%
  mutate(treatment = drug == "tamsulosin")

Functions to Fit Propensity Score Models

For ease, using a function to wrap around bam() to provide detail on the PS model fitting process:

fit_gam_model <- function(data, ps_formula, k = 28) {
  obs_tx <- data$drug[data$treatment == 1][1]
  obs_ref <- data$drug[data$treatment == 0][1]
  n_tx <- sum(data$treatment)
  n_con <- sum(data$treatment == 0)

  message(glue::glue("{lubridate::now()} | Treatment = {obs_tx} [{n_tx}] | Ref = {obs_ref} [{n_con}]"))

  message(glue::glue("{lubridate::now()} | Starting model fitting"))
  cluster <- parallel::makeCluster(k)
  model <- bam(
    ps_formula,
    data = data,
    family = binomial(),
    nthreads = 28,
    cluster = cluster
  )
  parallel::stopCluster(cluster)
  message(glue::glue("{lubridate::now()} | Finished model fitting"))
  
  fitted_values <- data %>%
    select(enrolid, treatment, follow_up_time) %>%
    mutate(
      ptx = predict(model, type = "response")
    )

  return(fitted_values)
}

Fit Propensity Scores

Then generate all the data needed for matching:

TZ/DZ/AZ vs…

tz_tam_full <- fit_gam_model(tz_tam, ps_full)
tz_5ari_full <- fit_gam_model(tz_5ari, ps_full)

Tamsulosin vs…

tam_5ari_full <- fit_gam_model(tam_5ari, ps_full)

Save Data for Matching

And save the results for use in the matching script:

write_rds(
  tz_tam_full,
  "/Shared/lss_jsimmeri_backup/data/tz-5ari-final/matching_candidates/tz_tam.rds")
write_rds(
  tz_5ari_full,
  "/Shared/lss_jsimmeri_backup/data/tz-5ari-final/matching_candidates/tz_5ari.rds")

write_rds(
  tam_5ari_full,
  "/Shared/lss_jsimmeri_backup/data/tz-5ari-final/matching_candidates/tam_5ari.rds")

Session Info

R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04 LTS

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets 
[7] methods   base     

other attached packages:
 [1] mgcv_1.8-33     nlme_3.1-152    forcats_0.5.1   stringr_1.4.0  
 [5] dplyr_1.0.4     purrr_0.3.4     readr_1.4.0     tidyr_1.1.2    
 [9] tibble_3.0.6    ggplot2_3.3.3   tidyverse_1.3.0

loaded via a namespace (and not attached):
 [1] tidyselect_1.1.0  xfun_0.21         splines_4.0.4    
 [4] lattice_0.20-41   haven_2.3.1       colorspace_2.0-0 
 [7] vctrs_0.3.6       generics_0.1.0    htmltools_0.5.1.1
[10] yaml_2.2.1        rlang_0.4.10      pillar_1.4.7     
[13] withr_2.4.1       glue_1.4.2        DBI_1.1.1        
[16] dbplyr_2.1.0      modelr_0.1.8      readxl_1.3.1     
[19] lifecycle_1.0.0   munsell_0.5.0     gtable_0.3.0     
[22] cellranger_1.1.0  rvest_0.3.6       evaluate_0.14    
[25] knitr_1.31        ps_1.5.0          fansi_0.4.2      
[28] broom_0.7.4       Rcpp_1.0.6        backports_1.2.1  
[31] scales_1.1.1      jsonlite_1.7.2    fs_1.5.0         
[34] distill_1.2       hms_1.0.0         digest_0.6.27    
[37] stringi_1.5.3     grid_4.0.4        cli_2.3.0        
[40] tools_4.0.4       magrittr_2.0.1    crayon_1.4.1     
[43] pkgconfig_2.0.3   downlit_0.2.1     Matrix_1.3-2     
[46] ellipsis_0.3.1    xml2_1.3.2        reprex_1.0.0     
[49] lubridate_1.7.9.2 assertthat_0.2.1  rmarkdown_2.6    
[52] httr_1.4.2        rstudioapi_0.13   R6_2.5.0         
[55] compiler_4.0.4