First you need to download intsvy package from eldafani github repository and PIAAC package with piaac data from pbiecek repository.
library(devtools)
library(reshape)
library(ggplot2)
if (!require(intsvy)) {
install_github("eldafani/intsvy")
# install_github("pbiecek/intsvy")
}
if (!require(PIAAC)) {
install_github("pbiecek/PIAAC")
}
dim(piaac)
## [1] 152514 610
The piaac.table() function calculates proportions of groups defined by ‘variable’ in stratus defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.
# age distribution in whole dataset
(ptable <- piaac.table(variable="AGEG10LFS", data=piaac))
## AGEG10LFS Freq Percentage Std.err.
## 1 24 or less 29242 16.81 0.02
## 2 25-34 28779 20.25 0.03
## 3 35-44 30705 21.20 0.02
## 4 45-54 31338 21.53 0.02
## 5 55 plus 32450 20.22 0.02
# age distribution within countries
head(ptableC <- piaac.table(variable="AGEG10LFS", by="CNTRYID", data=piaac))
## CNTRYID AGEG10LFS Freq Percentage Std.err.
## 1 Austria 24 or less 898 16.00 0.04
## 2 Austria 25-34 958 19.11 0.06
## 3 Austria 35-44 1117 22.18 0.07
## 4 Austria 45-54 1188 23.83 0.07
## 5 Austria 55 plus 969 18.89 0.04
## 6 Belgium 24 or less 994 15.33 0.03
# age distribution within countries and gender segments
head(ptableCA <- piaac.table(variable="AGEG10LFS", by=c("CNTRYID", "GENDER_R"), data=piaac))
## CNTRYID GENDER_R AGEG10LFS Freq Percentage Std.err.
## 1 Austria Female 24 or less 450 15.55 0.06
## 2 Austria Female 25-34 479 19.32 0.07
## 3 Austria Female 35-44 557 22.09 0.10
## 4 Austria Female 45-54 607 23.84 0.09
## 5 Austria Female 55 plus 507 19.20 0.05
## 6 Austria Male 24 or less 448 16.45 0.07
The output of piaac.table() function is of the class intsvy.table and plot() is one of overloaded functions for this class.
# age distribution in whole dataset
plot(ptable)
# age distribution within countries
plot(ptableC, stacked=TRUE)
# age distribution within countries and gender segments
plot(na.omit(ptableCA), stacked=TRUE)
The piaac.mean.pv() function calculates averages of variable ‘pvlabel’ in stratus defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.
Note that ‘pvlab’ is one of ‘LIT’ (for literacy), ‘NUM’ (for numeracy), ‘PSL’ (for problem solving). In piaac data there are 10 plausible values for each of these dimensions.
#Table A2.2a from SkillsOutlook2013_ENG_Table_Chapter2
# Country averages
head(pmeansNC <- piaac.mean.pv(pvlabel="NUM", by="CNTRYID", data=piaac, export=FALSE))
## CNTRYID Freq Mean s.e. SD s.e
## 1 Austria 5130 275.0 0.88 48.84 0.64
## 2 Belgium 5463 280.4 0.83 49.27 0.67
## 3 Canada 26683 265.2 0.70 55.60 0.54
## 4 Czech Republic 6102 275.7 0.93 43.59 0.78
## 5 Denmark 7328 278.3 0.73 51.13 0.59
## 6 Estonia 7632 273.1 0.53 45.45 0.48
# Country averages for different age groups
head(pmeansNCA <- piaac.mean.pv(pvlabel="NUM", by=c("CNTRYID", "AGEG10LFS"), data=piaac, export=FALSE))
## CNTRYID AGEG10LFS Freq Mean s.e. SD s.e
## 1 Austria 24 or less 898 279.3 1.63 46.15 1.82
## 2 Austria 25-34 958 282.1 1.73 49.98 1.63
## 3 Austria 35-44 1117 281.4 2.01 50.26 1.40
## 4 Austria 45-54 1188 274.5 1.67 46.49 1.24
## 5 Austria 55 plus 969 257.5 1.74 46.83 1.47
## 6 Belgium 24 or less 994 282.8 1.74 45.07 1.63
# Country averages for different age and gender groups
head(pmeansNCAG <- piaac.mean.pv(pvlabel="NUM", by=c("CNTRYID", "AGEG10LFS", "GENDER_R"), data=piaac, export=FALSE))
## CNTRYID AGEG10LFS GENDER_R Freq Mean s.e. SD s.e
## 1 Austria 24 or less Female 450 274.3 2.46 44.10 2.12
## 2 Austria 24 or less Male 448 284.0 2.47 47.53 2.85
## 3 Austria 25-34 Female 479 275.5 2.50 48.82 2.28
## 4 Austria 25-34 Male 479 288.8 2.35 50.26 2.39
## 5 Austria 35-44 Female 557 273.6 2.58 48.85 1.95
## 6 Austria 35-44 Male 560 289.1 2.84 50.47 2.34
# Country averages for different age and gender groups (changed order)
head(pmeansNCGA <- piaac.mean.pv(pvlabel="NUM", by=c("CNTRYID", "GENDER_R", "AGEG10LFS"), data=piaac, export=FALSE))
## CNTRYID GENDER_R AGEG10LFS Freq Mean s.e. SD s.e
## 1 Austria Female 24 or less 450 274.3 2.46 44.10 2.12
## 2 Austria Female 25-34 479 275.5 2.50 48.82 2.28
## 3 Austria Female 35-44 557 273.6 2.58 48.85 1.95
## 4 Austria Female 45-54 607 268.3 2.09 43.44 1.50
## 5 Austria Female 55 plus 507 250.6 2.25 44.18 2.20
## 6 Austria Male 24 or less 448 284.0 2.47 47.53 2.85
The output of piaac.mean.pv() function is of the class intsvy.mean and plot() is one of overloaded functions for this class.
#
# plotting country average NUM performance
plot(pmeansNC) + ggtitle("Country performance in NUM")
# without se bars, not good idea
plot(pmeansNC, se=FALSE)
# sorted, thats better
plot(pmeansNC, sort=TRUE)
#
# plotting country average within
# age groups NUM performance
plot(pmeansNCA, sort=TRUE)
#
# plotting country average within
# age and gender groups NUM performance
plot(na.omit(pmeansNCGA), sort=TRUE)
The piaac.mean() function calculates averages of ‘variable’ in groups defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.
Note that ‘variable’ should be continuous and should not be any of plausible values (there is a separate function piaac.mean.pv() for them).
# average age in different countries
head(pmeansAC <- piaac.mean(variable="AGE_R", by="CNTRYID", data=piaac, export=FALSE))
## CNTRYID Freq Mean s.e.
## 1 Austria 0 NaN NaN
## 2 Belgium 5463 41.78 0.03
## 3 Canada 0 NaN NaN
## 4 Czech Republic 6102 40.54 0.04
## 5 Denmark 7328 41.03 0.04
## 6 Estonia 7632 40.05 0.03
# average age in different countries and for differet genders
head(pmeansACG <- piaac.mean(variable="AGE_R", by=c("CNTRYID","GENDER_R"), data=piaac, export=FALSE))
## CNTRYID GENDER_R Freq Mean s.e.
## 1 Austria Female 0 NaN NaN
## 2 Austria Male 0 NaN NaN
## 3 Belgium Female 2763 41.70 0.07
## 4 Belgium Male 2700 41.86 0.07
## 5 Canada Female 0 NaN NaN
## 6 Canada Male 0 NaN NaN
As for piaac.mean.pv() also for piaac.mean() the output is of the class intsvy.mean and plot() is overloaded for it.
plot(na.omit(pmeansAC), sort=TRUE)
plot(na.omit(pmeansACG), sort=TRUE)
The piaac.reg.pv() function runes linear regression model with ‘pvlabel’ as an dependent variable and variables ‘x’ as independent variables. Regression models are calculated in stratus defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.
Note that ‘pvlab’ is one of ‘LIT’ (for literacy), ‘NUM’ (for numeracy), ‘PSL’ (for problem solving). In piaac data there are 10 plausible values for each of these dimensions.
#
# LITeracy explained by GENDER in different countries
rmodelLG <- piaac.reg.pv(pvlabel="LIT", x="GENDER_R", by = "CNTRYID", data=piaac, export=FALSE)
rmodelLG[1:3]
## $Austria
## Estimate Std. Error t value
## (Intercept) 271.53 1.04 259.90
## GENDER_RFemale -4.14 1.32 -3.13
## R-squared 0.22 0.14 1.58
##
## $Belgium
## Estimate Std. Error t value
## (Intercept) 278.09 0.97 287.08
## GENDER_RFemale -5.27 1.21 -4.36
## R-squared 0.31 0.15 2.17
##
## $Canada
## Estimate Std. Error t value
## (Intercept) 274.49 0.86 317.75
## GENDER_RFemale -2.30 1.20 -1.92
## R-squared 0.06 0.05 1.04
#
# LITeracy explained by GENDER and level of trust in different countries
rmodelLGI <- piaac.reg.pv(pvlabel="LIT", x=c("GENDER_R", "I_Q06A"), by = "CNTRYID", data=piaac, export=FALSE)
rmodelLGI[1:3]
## $Austria
## Estimate Std. Error t value
## (Intercept) 255.24 1.47 173.70
## GENDER_RFemale -4.03 1.33 -3.04
## I_Q06AAgree 22.90 1.81 12.62
## I_Q06ANeither agree nor disagree 15.68 2.52 6.22
## I_Q06ADisagree 28.18 2.12 13.29
## I_Q06AStrongly disagree 24.47 2.64 9.28
## R-squared 7.19 0.85 8.47
##
## $Belgium
## Estimate Std. Error t value
## (Intercept) 263.52 1.78 147.72
## GENDER_RFemale -5.21 1.23 -4.22
## I_Q06AAgree 9.33 1.92 4.86
## I_Q06ANeither agree nor disagree 23.52 2.20 10.70
## I_Q06ADisagree 23.83 2.16 11.02
## I_Q06AStrongly disagree 22.75 3.57 6.37
## R-squared 4.37 0.63 7.00
##
## $Canada
## Estimate Std. Error t value
## (Intercept) 260.38 1.62 161.09
## GENDER_RFemale -2.78 1.17 -2.38
## I_Q06AAgree 8.15 1.94 4.21
## I_Q06ANeither agree nor disagree 14.57 1.93 7.55
## I_Q06ADisagree 26.30 1.72 15.30
## I_Q06AStrongly disagree 27.02 2.79 9.68
## R-squared 3.84 0.41 9.26
The piaac.mean.pv() function outputs an object of intsvy.reg class with overloaded plot() function.
# add se intervals
plot(rmodelLG, se=TRUE)
# sorted along R square
plot(rmodelLG, se=TRUE, sort=TRUE)
plot(rmodelLGI, se=TRUE)
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually. if you must have them.
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually. if you must have them.
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually. if you must have them.
The piaac.reg() function fits regression models with ‘y’ as dependent variable and ‘x’ as independent variables and groups defined by ‘by’ variables. The ‘data’ argument should be a data frame with column SPFWT0 (final weights in PIAAC) and SPFWT.. for BRR weights.
Note that ‘variable’ should be continuous and should not be any of plausible values (there is a separate function piaac.reg.pv() for them).
#
# regression GENDER on AGE per country
rmodelAGC <- piaac.reg(y="AGE_R", x="GENDER_R", by="CNTRYID", data=piaac, export=FALSE)
As for piaac.reg.pv() also for piaac.reg() the output is of the class intsvy.reg and plot() is overloaded for it.
plot(rmodelAGC, se=TRUE)
# sorted along R square
plot(rmodelAGC, se=TRUE, sort=TRUE)