Missing data is the norm in real-life data analysis. Multiple imputation via the mice package is a popular option in R. Here we introduce simple missingness and demonstrate use of regmedint along with mice.
For demonstration purpose, missing data is introduced here.
set.seed(138087069)
library(regmedint)
library(tidyverse)
## Prepare dataset
data(vv2015)
vv2015 <- vv2015 %>%
    select(-cens) %>%
    ## Generate exposure-dependent missing of mediator
    mutate(logit_p_m_miss = -1 + 0.5 * x,
           p_m_miss = exp(logit_p_m_miss) / (1 + exp(logit_p_m_miss)),
           ## Indicator draw
           ind_m_miss = rbinom(n = length(p_m_miss), size = 1, prob = p_m_miss),
           m_true = m,
           m = if_else(ind_m_miss == 1L, as.numeric(NA), m))Taking the advantage of the simulated setting, the true model is fit here.
regmedint_true <-
    regmedint(data = vv2015,
              ## Variables
              yvar = "y",
              avar = "x",
              mvar = "m_true",
              cvar = c("c"),
              eventvar = "event",
              ## Values at which effects are evaluated
              a0 = 0,
              a1 = 1,
              m_cde = 1,
              c_cond = 0.5,
              ## Model types
              mreg = "logistic",
              yreg = "survAFT_weibull",
              ## Additional specification
              interaction = TRUE,
              casecontrol = FALSE)
summary(regmedint_true)## ### Mediator model
## 
## Call:
## glm(formula = m_true ~ x + c, family = binomial(link = "logit"), 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5143  -1.1765   0.9177   1.1133   1.4602  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.3545     0.3252  -1.090    0.276
## x             0.3842     0.4165   0.922    0.356
## c             0.2694     0.2058   1.309    0.191
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.59  on 99  degrees of freedom
## Residual deviance: 136.08  on 97  degrees of freedom
## AIC: 142.08
## 
## Number of Fisher Scoring iterations: 4
## 
## ### Outcome model
## 
## Call:
## survival::survreg(formula = Surv(y, event) ~ x + m_true + x:m_true + 
##     c, data = data, dist = "weibull")
##               Value Std. Error     z       p
## (Intercept) -1.0424     0.1903 -5.48 4.3e-08
## x            0.4408     0.3008  1.47    0.14
## m_true       0.0905     0.2683  0.34    0.74
## c           -0.0669     0.0915 -0.73    0.46
## x:m_true     0.1003     0.4207  0.24    0.81
## Log(scale)  -0.0347     0.0810 -0.43    0.67
## 
## Scale= 0.966 
## 
## Weibull distribution
## Loglik(model)= -11.4   Loglik(intercept only)= -14.5
##  Chisq= 6.31 on 4 degrees of freedom, p= 0.18 
## Number of Newton-Raphson Iterations: 5 
## n= 100 
## 
## ### Mediation analysis 
##              est         se         Z          p       lower      upper
## cde  0.541070807 0.29422958 1.8389409 0.06592388 -0.03560858 1.11775019
## pnde 0.488930417 0.21049248 2.3227928 0.02019028  0.07637274 0.90148809
## tnie 0.018240025 0.03706111 0.4921608 0.62260566 -0.05439841 0.09087846
## tnde 0.498503455 0.21209540 2.3503737 0.01875457  0.08280410 0.91420281
## pnie 0.008666987 0.02730994 0.3173565 0.75097309 -0.04485951 0.06219348
## te   0.507170442 0.21090051 2.4047853 0.01618197  0.09381303 0.92052785
## pm   0.045436278 0.09119614 0.4982259 0.61832484 -0.13330488 0.22417743
## 
## Evaluated at:
## avar: x
##  a1 (intervened value of avar) = 1
##  a0 (reference value of avar)  = 0
## mvar: m_true
##  m_cde (intervend value of mvar for cde) = 1
## cvar: c
##  c_cond (covariate vector value) = 0.5
## 
## Note that effect estimates can vary over m_cde and c_cond values when interaction = TRUE.regmedint_cca <- vv2015 %>%
    filter(!is.na(m)) %>%
    regmedint(data = .,
              ## Variables
              yvar = "y",
              avar = "x",
              mvar = "m",
              cvar = c("c"),
              eventvar = "event",
              ## Values at which effects are evaluated
              a0 = 0,
              a1 = 1,
              m_cde = 1,
              c_cond = 0.5,
              ## Model types
              mreg = "logistic",
              yreg = "survAFT_weibull",
              ## Additional specification
              interaction = TRUE,
              casecontrol = FALSE)
summary(regmedint_cca)## ### Mediator model
## 
## Call:
## glm(formula = m ~ x + c, family = binomial(link = "logit"), data = data)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.306  -1.133  -1.028   1.183   1.360  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.2500     0.3880  -0.644    0.519
## x             0.1278     0.4883   0.262    0.794
## c             0.1587     0.2415   0.657    0.511
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 99.758  on 71  degrees of freedom
## Residual deviance: 99.287  on 69  degrees of freedom
## AIC: 105.29
## 
## Number of Fisher Scoring iterations: 3
## 
## ### Outcome model
## 
## Call:
## survival::survreg(formula = Surv(y, event) ~ x + m + x:m + c, 
##     data = data, dist = "weibull")
##               Value Std. Error     z       p
## (Intercept) -1.2689     0.2229 -5.69 1.2e-08
## x            0.7213     0.3315  2.18    0.03
## m            0.4517     0.2985  1.51    0.13
## c           -0.0652     0.1110 -0.59    0.56
## x:m         -0.2579     0.4750 -0.54    0.59
## Log(scale)  -0.0581     0.0958 -0.61    0.54
## 
## Scale= 0.944 
## 
## Weibull distribution
## Loglik(model)= -6.1   Loglik(intercept only)= -10.5
##  Chisq= 8.79 on 4 degrees of freedom, p= 0.066 
## Number of Newton-Raphson Iterations: 5 
## n= 72 
## 
## ### Mediation analysis 
##              est         se         Z          p       lower      upper
## cde  0.463323084 0.34636957 1.3376553 0.18100884 -0.21554880 1.14219497
## pnde 0.582515084 0.24557485 2.3720470 0.01768984  0.10119722 1.06383295
## tnie 0.006182822 0.02643657 0.2338738 0.81508294 -0.04563191 0.05799755
## tnde 0.574385442 0.24784035 2.3175623 0.02047312  0.08862728 1.06014360
## pnie 0.014312464 0.05541066 0.2582980 0.79617690 -0.09429043 0.12291536
## te   0.588697906 0.24809627 2.3728608 0.01765092  0.10243816 1.07495766
## pm   0.013852661 0.05856686 0.2365273 0.81302354 -0.10093628 0.12864160
## 
## Evaluated at:
## avar: x
##  a1 (intervened value of avar) = 1
##  a0 (reference value of avar)  = 0
## mvar: m
##  m_cde (intervend value of mvar for cde) = 1
## cvar: c
##  c_cond (covariate vector value) = 0.5
## 
## Note that effect estimates can vary over m_cde and c_cond values when interaction = TRUE.This specific data setting is a little tricky in that the outcome variable is a censored survival time variable. Here we will use a log transformed survival time.
library(mice)
vv2015_mod <- vv2015 %>%
    mutate(log_y = log(y)) %>%
    select(x,m,c,log_y,event)
## Run mice
vv2015_mice <- mice(data = vv2015_mod, m = 50, printFlag = FALSE)
## Object containig 50 imputed dataset
vv2015_mice## Class: mids
## Number of multiple imputations:  50 
## Imputation methods:
##     x     m     c log_y event 
##    "" "pmm"    ""    ""    "" 
## PredictorMatrix:
##       x m c log_y event
## x     0 1 1     1     1
## m     1 0 1     1     1
## c     1 1 0     1     1
## log_y 1 1 1     0     1
## event 1 1 1     1     0After creating such MI datasets, mediation analysis can be performed in each dataset.
## Fit in each MI dataset
vv2015_mice_regmedint <-
    vv2015_mice %>%
    ## Stacked up dataset
    mice::complete("long") %>%
    as_tibble() %>%
    mutate(y = exp(log_y)) %>%
    group_by(.imp) %>%
    ## Nested data frame
    nest() %>%
    mutate(fit = map(data, function(data) {
        regmedint(data = data,
                  ## Variables
                  yvar = "y",
                  avar = "x",
                  mvar = "m",
                  cvar = c("c"),
                  eventvar = "event",
                  ## Values at which effects are evaluated
                  a0 = 0,
                  a1 = 1,
                  m_cde = 1,
                  c_cond = 0.5,
                  ## Model types
                  mreg = "logistic",
                  yreg = "survAFT_weibull",
                  ## Additional specification
                  interaction = TRUE,
                  casecontrol = FALSE)
    })) %>%
    ## Extract point estimates and variance estimates
    mutate(coef_fit = map(fit, coef),
           vcov_fit = map(fit, vcov))
vv2015_mice_regmedint## # A tibble: 50 × 5
## # Groups:   .imp [50]
##     .imp data               fit            coef_fit  vcov_fit     
##    <int> <list>             <list>         <list>    <list>       
##  1     1 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
##  2     2 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
##  3     3 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
##  4     4 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
##  5     5 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
##  6     6 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
##  7     7 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
##  8     8 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
##  9     9 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
## 10    10 <tibble [100 × 7]> <regmednt [4]> <dbl [7]> <dbl [7 × 7]>
## # … with 40 more rowsThe results can be combined using the mitools package.
regmedint_mi <- mitools::MIcombine(results = vv2015_mice_regmedint$coef_fit,
                                   variances = vv2015_mice_regmedint$vcov_fit)
regmedint_mi_summary <- summary(regmedint_mi)## Multiple imputation results:
##       MIcombine.default(results = vv2015_mice_regmedint$coef_fit, variances = vv2015_mice_regmedint$vcov_fit)
##         results         se      (lower    upper) missInfo
## cde  0.35334826 0.31280990 -0.25988229 0.9665788      9 %
## pnde 0.48716281 0.21292928  0.06982892 0.9044967      0 %
## tnie 0.00486299 0.03096690 -0.05584982 0.0655758     11 %
## tnde 0.47324382 0.21699611  0.04793551 0.8985521      2 %
## pnie 0.01878197 0.06178469 -0.10246510 0.1400291     23 %
## te   0.49202580 0.21430966  0.07198654 0.9120651      0 %
## pm   0.01232649 0.07897926 -0.14251938 0.1671724     11 %We can observe the MI estimtates are generally more in alignment with the true estimates than the complete-case analysis estimates.
cbind(true = coef(regmedint_true),
      cca = coef(regmedint_cca),
      mi = regmedint_mi_summary$results)##             true         cca         mi
## cde  0.541070807 0.463323084 0.35334826
## pnde 0.488930417 0.582515084 0.48716281
## tnie 0.018240025 0.006182822 0.00486299
## tnde 0.498503455 0.574385442 0.47324382
## pnie 0.008666987 0.014312464 0.01878197
## te   0.507170442 0.588697906 0.49202580
## pm   0.045436278 0.013852661 0.01232649