Exercise 23. Calculating SMRs/SIRs


library(biostat3) # for Surv and survfit
library(dplyr)    # for data manipulation

(a)

data(melanoma)
scale <- 365.24
mel <- mutate(melanoma,
              ydx=biostat3::year(dx),
              adx=age+0.5, # mid-point approximation
              dead=(status %in% c("Dead: cancer","Dead: other") & surv_mm<110)+0,
              surv_mm=pmin(110,surv_mm),
              astart=adx, 
              astop=adx+surv_mm/12)
mel.split <- survSplit(mel,
                       cut=1:110,
                       event="dead",start="astart", end="astop")
subset(mel.split, id<=2, select=c(id,astart,astop,dead))
##   id astart    astop dead
## 1  1   81.5 82.00000    0
## 2  1   82.0 83.00000    0
## 3  1   83.0 83.70833    1
## 4  2   75.5 76.00000    0
## 5  2   76.0 77.00000    0
## 6  2   77.0 78.00000    0
## 7  2   78.0 79.00000    0
## 8  2   79.0 80.00000    0
## 9  2   80.0 80.12500    1

(b)

mel.split <- mutate(mel.split,
                    ystart=year(dx)+astart-adx,
                    ystop=year(dx)+astop-adx)
mel.split2 <- survSplit(mel.split,
                       cut=1970:2000,event="dead",
                       start="ystart", end="ystop") %>%
    mutate(astart=adx+ystart-ydx,
           astop=adx+ystop-ydx,
           age=floor(astop),
           year=floor(ystop),
           pt = ystop - ystart)
subset(mel.split2, id<=2, select=c(id,ystart,ystop,astart,astop,dead))
##    id   ystart    ystop   astart    astop dead
## 1   1 1981.849 1982.000 81.50000 81.65124    0
## 2   1 1982.000 1982.349 81.65124 82.00000    0
## 3   1 1982.349 1983.000 82.00000 82.65124    0
## 4   1 1983.000 1983.349 82.65124 83.00000    0
## 5   1 1983.349 1984.000 83.00000 83.65124    0
## 6   1 1984.000 1984.057 83.65124 83.70833    1
## 7   2 1975.764 1976.000 75.50000 75.73612    0
## 8   2 1976.000 1976.264 75.73612 76.00000    0
## 9   2 1976.264 1977.000 76.00000 76.73612    0
## 10  2 1977.000 1977.264 76.73612 77.00000    0
## 11  2 1977.264 1978.000 77.00000 77.73612    0
## 12  2 1978.000 1978.264 77.73612 78.00000    0
## 13  2 1978.264 1979.000 78.00000 78.73612    0
## 14  2 1979.000 1979.264 78.73612 79.00000    0
## 15  2 1979.264 1980.000 79.00000 79.73612    0
## 16  2 1980.000 1980.264 79.73612 80.00000    0
## 17  2 1980.264 1980.389 80.00000 80.12500    1

(c)

xtabs(pt ~ age+year, data=mel.split2, subset = age>=50 & age<60)
##     year
## age        1975       1976       1977       1978       1979       1980       1981
##   50   1.000000   3.603060   7.906637  11.925716  17.494141  20.496879  22.912350
##   51   0.500000   6.937242   8.486716  13.378194  18.352832  23.275632  23.493867
##   52   0.500000   2.996546  11.506658  10.193065  17.135965  26.424032  27.705951
##   53   1.500000   5.889945   8.513261  15.494141  15.766774  25.264265  29.231409
##   54   0.500000   7.171796  10.339179  10.055799  19.498576  16.952798  27.570625
##   55   0.000000   4.257365  11.056839  19.360640  15.994967  22.657317  20.551637
##   56   1.000000   3.543054   8.250849  16.044980  20.164933  18.723588  28.117325
##   57   1.500000   3.482532   8.764730  14.754184  21.427618  26.946902  22.319046
##   58   0.000000   5.403899   9.139716  13.280943  20.743041  23.920363  31.134145
##   59   1.500000   1.601577   8.486958  12.762207  18.902991  26.348935  26.806123
##     year
## age        1982       1983       1984       1985       1986       1987       1988
##   50  20.286478  27.592597  30.238966  35.267431  42.891679  43.262827  46.144640
##   51  29.015661  27.370469  35.314369  33.376629  40.765793  46.657353  53.231624
##   52  27.958274  35.807328  31.110808  35.011572  36.030441  43.384564  45.869789
##   53  34.923908  30.832284  40.881849  32.550560  40.934313  40.546691  46.557734
##   54  36.238884  42.677290  34.475587  43.312242  36.751542  44.260651  47.423662
##   55  34.597954  42.596202  49.956631  39.821916  47.657266  43.382794  50.312028
##   56  24.859339  42.418387  47.143463  50.819949  41.177071  56.113127  48.171650
##   57  33.033357  29.985188  46.035210  55.091921  53.140168  46.543022  66.622248
##   58  26.057903  35.816559  35.888393  52.113327  56.908362  49.812726  44.128938
##   59  37.922165  33.310640  36.961382  40.945762  53.040371  60.711030  57.155364
##     year
## age        1989       1990       1991       1992       1993       1994       1995
##   50  51.187986  54.479292  59.384683  56.472333  53.544336  69.878641 113.962824
##   51  58.294450  56.907951  52.096521  62.210556  53.357007  51.060764 108.927541
##   52  54.640077  57.963198  62.713832  51.338677  60.188734  61.241152  84.636326
##   53  50.451338  58.560581  64.672129  66.266355  55.040111  59.802582  74.505668
##   54  54.967145  62.090566  62.660283  62.693640  69.401124  60.657280  84.256790
##   55  56.845686  58.152690  64.330020  67.130293  69.179686  69.737469  82.842934
##   56  57.384802  59.999334  59.287902  71.289814  73.125059  75.621975  89.499927
##   57  61.501839  61.223707  58.135477  67.294934  71.303695  73.745063 106.993580
##   58  73.159014  60.210305  60.468578  63.772205  69.293843  76.750406 114.119488
##   59  56.006808  74.611488  57.769978  65.273446  77.193786  69.046093 119.298183
xtabs(dead ~ age+year, data=mel.split2, subset = age>=50 & age<60)
##     year
## age  1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991
##   50    0    1    0    1    2    0    4    2    0    0    2    1    3    1    4    2    3
##   51    0    1    1    1    1    3    0    2    2    0    2    1    1    3    1    4    2
##   52    0    1    1    1    0    2    1    0    3    1    0    1    1    1    1    1    2
##   53    0    1    2    0    2    4    0    3    2    2    0    5    2    0    0    0    3
##   54    0    2    1    0    0    0    1    2    2    4    2    0    0    3    2    3    5
##   55    0    0    0    4    3    1    0    2    4    0    2    2    5    1    3    3    2
##   56    0    1    0    3    0    2    3    1    3    3    1    2    3    0    3    6    0
##   57    0    0    2    1    2    2    3    4    2    0    4    3    2    2    3    2    3
##   58    0    0    2    1    1    2    1    3    2    2    5    2    2    2    3    5    2
##   59    0    0    2    1    3    1    1    3    3    1    2    3    4    3    4    0    0
##     year
## age  1992 1993 1994 1995
##   50    3    4    1    5
##   51    2    2    0    2
##   52    1    0    3    6
##   53    2    4    1    1
##   54    1    0    3    2
##   55    3    5    0    2
##   56    1    4    4    1
##   57    5    3    2    1
##   58    2    4    3    0
##   59    0    7    1    3

(d)

mel.split2 <- mutate(mel.split2,
                     age10=cut(age,seq(0,110,by=10),right=FALSE),
                     year10=cut(year,seq(1970,2000,by=5),right=FALSE))
head(survRate(Surv(pt,dead)~sex+age10+year10, data=mel.split2))
##                                                sex   age10      year10     tstop event
## sex=Male, age10=[0,10)   , year10=[1980,1985) Male  [0,10) [1980,1985) 10.152283     0
## sex=Male, age10=[0,10)   , year10=[1985,1990) Male  [0,10) [1985,1990)  8.556456     0
## sex=Male, age10=[0,10)   , year10=[1990,1995) Male  [0,10) [1990,1995)  1.541667     1
## sex=Male, age10=[10,20)  , year10=[1975,1980) Male [10,20) [1975,1980)  3.912062     1
## sex=Male, age10=[10,20)  , year10=[1980,1985) Male [10,20) [1980,1985) 13.850322     1
## sex=Male, age10=[10,20)  , year10=[1985,1990) Male [10,20) [1985,1990) 22.890446     0
##                                                     rate       lower     upper
## sex=Male, age10=[0,10)   , year10=[1980,1985) 0.00000000 0.000000000 0.3633547
## sex=Male, age10=[0,10)   , year10=[1985,1990) 0.00000000 0.000000000 0.4311224
## sex=Male, age10=[0,10)   , year10=[1990,1995) 0.64864865 0.016422362 3.6140390
## sex=Male, age10=[10,20)  , year10=[1975,1980) 0.25561965 0.006471729 1.4242215
## sex=Male, age10=[10,20)  , year10=[1980,1985) 0.07220049 0.001827958 0.4022754
## sex=Male, age10=[10,20)  , year10=[1985,1990) 0.00000000 0.000000000 0.1611537

(e)

pt <- mutate(mel.split2,sex=unclass(sex)) %>%
    group_by(sex, age, year) %>%
    summarise(pt=sum(pt))
## `summarise()` regrouping output by 'sex', 'age' (override with `.groups` argument)
expected <- inner_join(popmort, pt) %>%
    mutate(pt=ifelse(is.na(pt),0,pt)) %>%
    group_by(sex,year) %>%
    summarise(E=sum(rate*pt)) %>% ungroup
## Joining, by = c("sex", "age", "year")
## `summarise()` regrouping output by 'sex' (override with `.groups` argument)
observed <- mutate(mel.split2, sex=as.numeric(unclass(sex))) %>%
    group_by(sex, year) %>%
    summarise(O=sum(dead)) %>% ungroup
## `summarise()` regrouping output by 'sex' (override with `.groups` argument)
joint <- inner_join(observed,expected) %>%
    mutate(SMR = O/E)
## Joining, by = c("sex", "year")

(f)

## overall SMRs
by(joint, joint$sex, function(data) poisson.test(sum(data$O), sum(data$E)))
## joint$sex: 1
## 
##  Exact Poisson test
## 
## data:  sum(data$O) time base: sum(data$E)
## number of events = 1461, time base = 553.88, p-value < 2.2e-16
## alternative hypothesis: true event rate is not equal to 1
## 95 percent confidence interval:
##  2.504219 2.776567
## sample estimates:
## event rate 
##   2.637757 
## 
## ------------------------------------------------------------------- 
## joint$sex: 2
## 
##  Exact Poisson test
## 
## data:  sum(data$O) time base: sum(data$E)
## number of events = 1259, time base = 528.14, p-value < 2.2e-16
## alternative hypothesis: true event rate is not equal to 1
## 95 percent confidence interval:
##  2.253942 2.519222
## sample estimates:
## event rate 
##   2.383816
## utility function to draw a confidence interval
polygon.ci <- function(time, interval, col="lightgrey") 
    polygon(c(time,rev(time)), c(interval[,1],rev(interval[,2])), col=col, border=col)

## modelling by calendar period
summary(fit <- glm(O ~ sex*ns(year,df=3)+offset(log(E)), data=joint, family=poisson))
## 
## Call:
## glm(formula = O ~ sex * ns(year, df = 3) + offset(log(E)), family = poisson, 
##     data = joint)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6070  -0.8545  -0.1148   0.9542   2.4271  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             1.9279     0.3342   5.769 7.99e-09 ***
## sex                     0.1565     0.2213   0.707   0.4793    
## ns(year, df = 3)1      -0.6453     0.2633  -2.451   0.0143 *  
## ns(year, df = 3)2      -1.6645     0.7577  -2.197   0.0280 *  
## ns(year, df = 3)3      -0.7814     0.1838  -4.251 2.13e-05 ***
## sex:ns(year, df = 3)1  -0.1821     0.1717  -1.061   0.2889    
## sex:ns(year, df = 3)2  -0.5024     0.5004  -1.004   0.3154    
## sex:ns(year, df = 3)3  -0.1331     0.1208  -1.102   0.2705    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 328.668  on 41  degrees of freedom
## Residual deviance:  54.545  on 34  degrees of freedom
## AIC: 316.45
## 
## Number of Fisher Scoring iterations: 4
##
pred <- predict(fit,type="response",newdata=mutate(joint,E=1),se.fit=TRUE)
full <- cbind(mutate(joint,fit=pred$fit), confint.predictnl(pred))
ci.cols <- c("lightgrey", "grey")
matplot(full$year, full[,c("2.5 %", "97.5 %")], type="n", ylab="SMR", xlab="Calendar year")
for (i in 1:2) {
    with(subset(full, sex==i), {
        polygon.ci(year, cbind(`2.5 %`, `97.5 %`), col=ci.cols[i])
    })
}
for (i in 1:2) {
    with(subset(full, sex==i), {
        lines(year,fit,col=i)
    })
}
legend("topright", legend=levels(mel.split2$sex), lty=1, col=1:2, bty="n")