Exercise 23. Calculating SMRs/SIRs
library(biostat3) # for Surv and survfit
library(dplyr) # for data manipulation
(a)
data(melanoma)
scale <- 365.24
mel <- mutate(melanoma,
ydx=biostat3::year(dx),
adx=age+0.5, # mid-point approximation
dead=(status %in% c("Dead: cancer","Dead: other") & surv_mm<110)+0,
surv_mm=pmin(110,surv_mm),
astart=adx,
astop=adx+surv_mm/12)
mel.split <- survSplit(mel,
cut=1:110,
event="dead",start="astart", end="astop")
subset(mel.split, id<=2, select=c(id,astart,astop,dead))
## id astart astop dead
## 1 1 81.5 82.00000 0
## 2 1 82.0 83.00000 0
## 3 1 83.0 83.70833 1
## 4 2 75.5 76.00000 0
## 5 2 76.0 77.00000 0
## 6 2 77.0 78.00000 0
## 7 2 78.0 79.00000 0
## 8 2 79.0 80.00000 0
## 9 2 80.0 80.12500 1
(b)
mel.split <- mutate(mel.split,
ystart=year(dx)+astart-adx,
ystop=year(dx)+astop-adx)
mel.split2 <- survSplit(mel.split,
cut=1970:2000,event="dead",
start="ystart", end="ystop") %>%
mutate(astart=adx+ystart-ydx,
astop=adx+ystop-ydx,
age=floor(astop),
year=floor(ystop),
pt = ystop - ystart)
subset(mel.split2, id<=2, select=c(id,ystart,ystop,astart,astop,dead))
## id ystart ystop astart astop dead
## 1 1 1981.849 1982.000 81.50000 81.65124 0
## 2 1 1982.000 1982.349 81.65124 82.00000 0
## 3 1 1982.349 1983.000 82.00000 82.65124 0
## 4 1 1983.000 1983.349 82.65124 83.00000 0
## 5 1 1983.349 1984.000 83.00000 83.65124 0
## 6 1 1984.000 1984.057 83.65124 83.70833 1
## 7 2 1975.764 1976.000 75.50000 75.73612 0
## 8 2 1976.000 1976.264 75.73612 76.00000 0
## 9 2 1976.264 1977.000 76.00000 76.73612 0
## 10 2 1977.000 1977.264 76.73612 77.00000 0
## 11 2 1977.264 1978.000 77.00000 77.73612 0
## 12 2 1978.000 1978.264 77.73612 78.00000 0
## 13 2 1978.264 1979.000 78.00000 78.73612 0
## 14 2 1979.000 1979.264 78.73612 79.00000 0
## 15 2 1979.264 1980.000 79.00000 79.73612 0
## 16 2 1980.000 1980.264 79.73612 80.00000 0
## 17 2 1980.264 1980.389 80.00000 80.12500 1
(c)
xtabs(pt ~ age+year, data=mel.split2, subset = age>=50 & age<60)
## year
## age 1975 1976 1977 1978 1979 1980 1981
## 50 1.000000 3.603060 7.906637 11.925716 17.494141 20.496879 22.912350
## 51 0.500000 6.937242 8.486716 13.378194 18.352832 23.275632 23.493867
## 52 0.500000 2.996546 11.506658 10.193065 17.135965 26.424032 27.705951
## 53 1.500000 5.889945 8.513261 15.494141 15.766774 25.264265 29.231409
## 54 0.500000 7.171796 10.339179 10.055799 19.498576 16.952798 27.570625
## 55 0.000000 4.257365 11.056839 19.360640 15.994967 22.657317 20.551637
## 56 1.000000 3.543054 8.250849 16.044980 20.164933 18.723588 28.117325
## 57 1.500000 3.482532 8.764730 14.754184 21.427618 26.946902 22.319046
## 58 0.000000 5.403899 9.139716 13.280943 20.743041 23.920363 31.134145
## 59 1.500000 1.601577 8.486958 12.762207 18.902991 26.348935 26.806123
## year
## age 1982 1983 1984 1985 1986 1987 1988
## 50 20.286478 27.592597 30.238966 35.267431 42.891679 43.262827 46.144640
## 51 29.015661 27.370469 35.314369 33.376629 40.765793 46.657353 53.231624
## 52 27.958274 35.807328 31.110808 35.011572 36.030441 43.384564 45.869789
## 53 34.923908 30.832284 40.881849 32.550560 40.934313 40.546691 46.557734
## 54 36.238884 42.677290 34.475587 43.312242 36.751542 44.260651 47.423662
## 55 34.597954 42.596202 49.956631 39.821916 47.657266 43.382794 50.312028
## 56 24.859339 42.418387 47.143463 50.819949 41.177071 56.113127 48.171650
## 57 33.033357 29.985188 46.035210 55.091921 53.140168 46.543022 66.622248
## 58 26.057903 35.816559 35.888393 52.113327 56.908362 49.812726 44.128938
## 59 37.922165 33.310640 36.961382 40.945762 53.040371 60.711030 57.155364
## year
## age 1989 1990 1991 1992 1993 1994 1995
## 50 51.187986 54.479292 59.384683 56.472333 53.544336 69.878641 113.962824
## 51 58.294450 56.907951 52.096521 62.210556 53.357007 51.060764 108.927541
## 52 54.640077 57.963198 62.713832 51.338677 60.188734 61.241152 84.636326
## 53 50.451338 58.560581 64.672129 66.266355 55.040111 59.802582 74.505668
## 54 54.967145 62.090566 62.660283 62.693640 69.401124 60.657280 84.256790
## 55 56.845686 58.152690 64.330020 67.130293 69.179686 69.737469 82.842934
## 56 57.384802 59.999334 59.287902 71.289814 73.125059 75.621975 89.499927
## 57 61.501839 61.223707 58.135477 67.294934 71.303695 73.745063 106.993580
## 58 73.159014 60.210305 60.468578 63.772205 69.293843 76.750406 114.119488
## 59 56.006808 74.611488 57.769978 65.273446 77.193786 69.046093 119.298183
xtabs(dead ~ age+year, data=mel.split2, subset = age>=50 & age<60)
## year
## age 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991
## 50 0 1 0 1 2 0 4 2 0 0 2 1 3 1 4 2 3
## 51 0 1 1 1 1 3 0 2 2 0 2 1 1 3 1 4 2
## 52 0 1 1 1 0 2 1 0 3 1 0 1 1 1 1 1 2
## 53 0 1 2 0 2 4 0 3 2 2 0 5 2 0 0 0 3
## 54 0 2 1 0 0 0 1 2 2 4 2 0 0 3 2 3 5
## 55 0 0 0 4 3 1 0 2 4 0 2 2 5 1 3 3 2
## 56 0 1 0 3 0 2 3 1 3 3 1 2 3 0 3 6 0
## 57 0 0 2 1 2 2 3 4 2 0 4 3 2 2 3 2 3
## 58 0 0 2 1 1 2 1 3 2 2 5 2 2 2 3 5 2
## 59 0 0 2 1 3 1 1 3 3 1 2 3 4 3 4 0 0
## year
## age 1992 1993 1994 1995
## 50 3 4 1 5
## 51 2 2 0 2
## 52 1 0 3 6
## 53 2 4 1 1
## 54 1 0 3 2
## 55 3 5 0 2
## 56 1 4 4 1
## 57 5 3 2 1
## 58 2 4 3 0
## 59 0 7 1 3
(d)
mel.split2 <- mutate(mel.split2,
age10=cut(age,seq(0,110,by=10),right=FALSE),
year10=cut(year,seq(1970,2000,by=5),right=FALSE))
head(survRate(Surv(pt,dead)~sex+age10+year10, data=mel.split2))
## sex age10 year10 tstop event
## sex=Male, age10=[0,10) , year10=[1980,1985) Male [0,10) [1980,1985) 10.152283 0
## sex=Male, age10=[0,10) , year10=[1985,1990) Male [0,10) [1985,1990) 8.556456 0
## sex=Male, age10=[0,10) , year10=[1990,1995) Male [0,10) [1990,1995) 1.541667 1
## sex=Male, age10=[10,20) , year10=[1975,1980) Male [10,20) [1975,1980) 3.912062 1
## sex=Male, age10=[10,20) , year10=[1980,1985) Male [10,20) [1980,1985) 13.850322 1
## sex=Male, age10=[10,20) , year10=[1985,1990) Male [10,20) [1985,1990) 22.890446 0
## rate lower upper
## sex=Male, age10=[0,10) , year10=[1980,1985) 0.00000000 0.000000000 0.3633547
## sex=Male, age10=[0,10) , year10=[1985,1990) 0.00000000 0.000000000 0.4311224
## sex=Male, age10=[0,10) , year10=[1990,1995) 0.64864865 0.016422362 3.6140390
## sex=Male, age10=[10,20) , year10=[1975,1980) 0.25561965 0.006471729 1.4242215
## sex=Male, age10=[10,20) , year10=[1980,1985) 0.07220049 0.001827958 0.4022754
## sex=Male, age10=[10,20) , year10=[1985,1990) 0.00000000 0.000000000 0.1611537
(e)
pt <- mutate(mel.split2,sex=unclass(sex)) %>%
group_by(sex, age, year) %>%
summarise(pt=sum(pt))
## `summarise()` regrouping output by 'sex', 'age' (override with `.groups` argument)
expected <- inner_join(popmort, pt) %>%
mutate(pt=ifelse(is.na(pt),0,pt)) %>%
group_by(sex,year) %>%
summarise(E=sum(rate*pt)) %>% ungroup
## Joining, by = c("sex", "age", "year")
## `summarise()` regrouping output by 'sex' (override with `.groups` argument)
observed <- mutate(mel.split2, sex=as.numeric(unclass(sex))) %>%
group_by(sex, year) %>%
summarise(O=sum(dead)) %>% ungroup
## `summarise()` regrouping output by 'sex' (override with `.groups` argument)
joint <- inner_join(observed,expected) %>%
mutate(SMR = O/E)
## Joining, by = c("sex", "year")
(f)
## overall SMRs
by(joint, joint$sex, function(data) poisson.test(sum(data$O), sum(data$E)))
## joint$sex: 1
##
## Exact Poisson test
##
## data: sum(data$O) time base: sum(data$E)
## number of events = 1461, time base = 553.88, p-value < 2.2e-16
## alternative hypothesis: true event rate is not equal to 1
## 95 percent confidence interval:
## 2.504219 2.776567
## sample estimates:
## event rate
## 2.637757
##
## -------------------------------------------------------------------
## joint$sex: 2
##
## Exact Poisson test
##
## data: sum(data$O) time base: sum(data$E)
## number of events = 1259, time base = 528.14, p-value < 2.2e-16
## alternative hypothesis: true event rate is not equal to 1
## 95 percent confidence interval:
## 2.253942 2.519222
## sample estimates:
## event rate
## 2.383816
## utility function to draw a confidence interval
polygon.ci <- function(time, interval, col="lightgrey")
polygon(c(time,rev(time)), c(interval[,1],rev(interval[,2])), col=col, border=col)
## modelling by calendar period
summary(fit <- glm(O ~ sex*ns(year,df=3)+offset(log(E)), data=joint, family=poisson))
##
## Call:
## glm(formula = O ~ sex * ns(year, df = 3) + offset(log(E)), family = poisson,
## data = joint)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6070 -0.8545 -0.1148 0.9542 2.4271
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.9279 0.3342 5.769 7.99e-09 ***
## sex 0.1565 0.2213 0.707 0.4793
## ns(year, df = 3)1 -0.6453 0.2633 -2.451 0.0143 *
## ns(year, df = 3)2 -1.6645 0.7577 -2.197 0.0280 *
## ns(year, df = 3)3 -0.7814 0.1838 -4.251 2.13e-05 ***
## sex:ns(year, df = 3)1 -0.1821 0.1717 -1.061 0.2889
## sex:ns(year, df = 3)2 -0.5024 0.5004 -1.004 0.3154
## sex:ns(year, df = 3)3 -0.1331 0.1208 -1.102 0.2705
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 328.668 on 41 degrees of freedom
## Residual deviance: 54.545 on 34 degrees of freedom
## AIC: 316.45
##
## Number of Fisher Scoring iterations: 4
##
pred <- predict(fit,type="response",newdata=mutate(joint,E=1),se.fit=TRUE)
full <- cbind(mutate(joint,fit=pred$fit), confint.predictnl(pred))
ci.cols <- c("lightgrey", "grey")
matplot(full$year, full[,c("2.5 %", "97.5 %")], type="n", ylab="SMR", xlab="Calendar year")
for (i in 1:2) {
with(subset(full, sex==i), {
polygon.ci(year, cbind(`2.5 %`, `97.5 %`), col=ci.cols[i])
})
}
for (i in 1:2) {
with(subset(full, sex==i), {
lines(year,fit,col=i)
})
}
legend("topright", legend=levels(mel.split2$sex), lty=1, col=1:2, bty="n")