-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy path14-g-est-snms-r.Rmd
135 lines (106 loc) · 4.2 KB
/
14-g-est-snms-r.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# 14. G-estimation of Structural Nested Models{-}
```{r setup, include=FALSE}
knitr::opts_chunk$set(collapse = TRUE, comment = '#>')
```
## Program 14.1
- Preprocessing, ranks of extreme observations, IP weights for censoring
- Data from NHEFS
```{r, results='hide', message=FALSE, warning=FALSE}
library(here)
```
```{r}
# install.packages("readxl") # install package if required
library("readxl")
nhefs <- read_excel(here("data", "NHEFS.xls"))
# some processing of the data
nhefs$cens <- ifelse(is.na(nhefs$wt82), 1, 0)
# ranking of extreme observations
#install.packages("Hmisc")
library(Hmisc)
describe(nhefs$wt82_71)
# estimation of denominator of ip weights for C
cw.denom <- glm(cens==0 ~ qsmk + sex + race + age + I(age^2)
+ as.factor(education) + smokeintensity + I(smokeintensity^2)
+ smokeyrs + I(smokeyrs^2) + as.factor(exercise)
+ as.factor(active) + wt71 + I(wt71^2),
data = nhefs, family = binomial("logit"))
summary(cw.denom)
nhefs$pd.c <- predict(cw.denom, nhefs, type="response")
nhefs$wc <- ifelse(nhefs$cens==0, 1/nhefs$pd.c, NA)
# observations with cens=1 only contribute to censoring models
```
## Program 14.2
- G-estimation of a 1-parameter structural nested mean model
- Brute force search
- Data from NHEFS
### G-estimation: Checking one possible value of psi
```{r}
#install.packages("geepack")
library("geepack")
nhefs$psi <- 3.446
nhefs$Hpsi <- nhefs$wt82_71 - nhefs$psi*nhefs$qsmk
fit <- geeglm(qsmk ~ sex + race + age + I(age*age) + as.factor(education)
+ smokeintensity + I(smokeintensity*smokeintensity) + smokeyrs
+ I(smokeyrs*smokeyrs) + as.factor(exercise) + as.factor(active)
+ wt71 + I(wt71*wt71) + Hpsi, family=binomial, data=nhefs,
weights=wc, id=seqn, corstr="independence")
summary(fit)
```
### G-estimation: Checking multiple possible values of psi
```{r}
#install.packages("geepack")
grid <- seq(from = 2,to = 5, by = 0.1)
j = 0
Hpsi.coefs <- cbind(rep(NA,length(grid)), rep(NA, length(grid)))
colnames(Hpsi.coefs) <- c("Estimate", "p-value")
for (i in grid){
psi = i
j = j+1
nhefs$Hpsi <- nhefs$wt82_71 - psi * nhefs$qsmk
gest.fit <- geeglm(qsmk ~ sex + race + age + I(age*age) + as.factor(education)
+ smokeintensity + I(smokeintensity*smokeintensity) + smokeyrs
+ I(smokeyrs*smokeyrs) + as.factor(exercise) + as.factor(active)
+ wt71 + I(wt71*wt71) + Hpsi, family=binomial, data=nhefs,
weights=wc, id=seqn, corstr="independence")
Hpsi.coefs[j,1] <- summary(gest.fit)$coefficients["Hpsi", "Estimate"]
Hpsi.coefs[j,2] <- summary(gest.fit)$coefficients["Hpsi", "Pr(>|W|)"]
}
Hpsi.coefs
```
## Program 14.3
- G-estimation for 2-parameter structural nested mean model
- Closed form estimator
- Data from NHEFS
### G-estimation: Closed form estimator linear mean models #
```{r}
logit.est <- glm(qsmk ~ sex + race + age + I(age^2) + as.factor(education)
+ smokeintensity + I(smokeintensity^2) + smokeyrs
+ I(smokeyrs^2) + as.factor(exercise) + as.factor(active)
+ wt71 + I(wt71^2), data = nhefs, weight = wc,
family = binomial())
summary(logit.est)
nhefs$pqsmk <- predict(logit.est, nhefs, type = "response")
describe(nhefs$pqsmk)
summary(nhefs$pqsmk)
# solve sum(w_c * H(psi) * (qsmk - E[qsmk | L])) = 0
# for a single psi and H(psi) = wt82_71 - psi * qsmk
# this can be solved as
# psi = sum( w_c * wt82_71 * (qsmk - pqsmk)) / sum(w_c * qsmk * (qsmk - pqsmk))
nhefs.c <- nhefs[which(!is.na(nhefs$wt82)),]
with(nhefs.c, sum(wc*wt82_71*(qsmk-pqsmk)) / sum(wc*qsmk*(qsmk - pqsmk)))
```
### G-estimation: Closed form estimator for 2-parameter model
```{r}
diff = with(nhefs.c, qsmk - pqsmk)
diff2 = with(nhefs.c, wc * diff)
lhs = matrix(0,2,2)
lhs[1,1] = with(nhefs.c, sum(qsmk * diff2))
lhs[1,2] = with(nhefs.c, sum(qsmk * smokeintensity * diff2))
lhs[2,1] = with(nhefs.c, sum(qsmk * smokeintensity * diff2))
lhs[2,2] = with(nhefs.c, sum(qsmk * smokeintensity * smokeintensity * diff2))
rhs = matrix(0,2,1)
rhs[1] = with(nhefs.c, sum(wt82_71 * diff2))
rhs[2] = with(nhefs.c, sum(wt82_71 * smokeintensity * diff2))
psi = t(solve(lhs,rhs))
psi
```