library(ggplot2)
library(ComplexUpset)
movies = as.data.frame(ggplot2movies::movies)
head(movies, 3)
title | year | length | budget | rating | votes | r1 | r2 | r3 | r4 | ⋯ | r9 | r10 | mpaa | Action | Animation | Comedy | Drama | Documentary | Romance | Short | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <int> | <int> | <int> | <dbl> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <chr> | <int> | <int> | <int> | <int> | <int> | <int> | <int> | |
1 | $ | 1971 | 121 | NA | 6.4 | 348 | 4.5 | 4.5 | 4.5 | 4.5 | ⋯ | 4.5 | 4.5 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | |
2 | $1000 a Touchdown | 1939 | 71 | NA | 6.0 | 20 | 0.0 | 14.5 | 4.5 | 24.5 | ⋯ | 4.5 | 14.5 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | |
3 | $21 a Day Once a Month | 1941 | 7 | NA | 8.2 | 5 | 0.0 | 0.0 | 0.0 | 0.0 | ⋯ | 24.5 | 24.5 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
genres = colnames(movies)[18:24]
genres
Convert the genre indicator columns to use boolean values:
movies[genres] = movies[genres] == 1
t(head(movies[genres], 3))
1 | 2 | 3 | |
---|---|---|---|
Action | FALSE | FALSE | FALSE |
Animation | FALSE | FALSE | TRUE |
Comedy | TRUE | TRUE | FALSE |
Drama | TRUE | FALSE | FALSE |
Documentary | FALSE | FALSE | FALSE |
Romance | FALSE | FALSE | FALSE |
Short | FALSE | FALSE | TRUE |
To keep the examples fast to compile we will operate on a subset of the movies with complete data:
movies[movies$mpaa == '', 'mpaa'] = NA
movies = na.omit(movies)
Utility for changing output parameters in Jupyter notebooks (IRKernel kernel), not relevant if using RStudio or scripting R from terminal:
set_size = function(w, h, factor=1.5) {
s = 1 * factor
options(
repr.plot.width=w * s,
repr.plot.height=h * s,
repr.plot.res=100 / factor,
jupyter.plot_mimetypes='image/png',
jupyter.plot_scale=1
)
}
There are two required arguments:
Additional arguments can be provided, such as name
(specifies xlab()
for intersection matrix) or width_ratio
(specifies how much space should be occupied by the set size panel). Other such arguments are discussed at length later in this document.
set_size(8, 3)
upset(movies, genres, name='genre', width_ratio=0.1)
We will focus on the intersections with at least ten members (min_size=10)
and on a few variables which are significantly different between the intersections (see 2. Running statistical tests).
When using min_size
, the empty groups will be skipped by default (e.g. Short movies would have no overlap with size of 10). To keep all groups pass keep_empty_groups=TRUE
:
set_size(8, 3)
(
upset(movies, genres, name='genre', width_ratio=0.1, min_size=10, wrap=TRUE, set_sizes=FALSE)
+ ggtitle('Without empty groups (Short dropped)')
+ # adding plots is possible thanks to patchwork
upset(movies, genres, name='genre', width_ratio=0.1, min_size=10, keep_empty_groups=TRUE, wrap=TRUE, set_sizes=FALSE)
+ ggtitle('With empty groups')
)
When empty columns are detected a warning will be issued. The silence it, pass warn_when_dropping_groups=FALSE
. Complimentary max_size
can be used in tandem.
You can also select intersections by degree (min_degree
and max_degree
):
set_size(8, 3)
upset(
movies, genres, width_ratio=0.1,
min_degree=3,
)
Or request a constant number of intersections with n_intersections
:
set_size(8, 3)
upset(
movies, genres, width_ratio=0.1,
n_intersections=15
)
We can add multiple annotation components (also called panels):
set_size(8, 8)
set.seed(0) # keep the same jitter for identical plots
upset(
movies,
genres,
annotations = list(
'Length'=list(
aes=aes(x=intersection, y=length),
geom=geom_boxplot()
),
'Rating'=list(
aes=aes(x=intersection, y=rating),
geom=list(
# checkout ggbeeswarm::geom_quasirandom for better results!
geom_jitter(aes(color=log10(votes))),
geom_violin(alpha=0.5)
)
),
'Budget'=list(
aes=aes(x=intersection, y=budget),
geom=geom_boxplot()
)
),
min_size=10,
width_ratio=0.1
)
For simple annotations, such as the length above, you can use a shorthand notation of upset_annotate
:
set_size(8, 6)
upset(
movies,
genres,
annotations = list(
'Length'=upset_annotate('length', geom_boxplot()),
'Budget'=upset_annotate('budget', geom_boxplot())
),
min_size=10,
width_ratio=0.1
)
You can also use barplots to demonstrate differences in proportions of categorical variables:
set_size(8, 5)
upset(
movies,
genres,
annotations = list(
'MPAA Rating'=list(
aes=aes(x=intersection, fill=mpaa),
geom=list(
geom_bar(stat='count', position='fill'),
scale_y_continuous(labels=scales::percent_format()),
scale_fill_manual(values=c(
'R'='#E41A1C', 'PG'='#377EB8',
'PG-13'='#4DAF4A', 'NC-17'='#FF7F00'
))
)
)
),
width_ratio=0.1
)
upset_test(movies, genres)
[1] "year, length, budget, rating, votes, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, mpaa differ significantly between intersections"
variable | p.value | statistic | test | fdr | |
---|---|---|---|---|---|
<chr> | <dbl> | <dbl> | <chr> | <dbl> | |
length | length | 6.511525e-71 | 422.88444 | Kruskal-Wallis rank sum test | 1.106959e-69 |
rating | rating | 1.209027e-46 | 301.72764 | Kruskal-Wallis rank sum test | 1.027673e-45 |
budget | budget | 3.899860e-44 | 288.97476 | Kruskal-Wallis rank sum test | 2.209921e-43 |
r8 | r8 | 9.900004e-39 | 261.28815 | Kruskal-Wallis rank sum test | 4.207502e-38 |
mpaa | mpaa | 3.732200e-35 | 242.77939 | Kruskal-Wallis rank sum test | 1.268948e-34 |
r9 | r9 | 1.433256e-30 | 218.78160 | Kruskal-Wallis rank sum test | 4.060891e-30 |
r1 | r1 | 2.211600e-23 | 180.32740 | Kruskal-Wallis rank sum test | 5.371029e-23 |
r4 | r4 | 1.008119e-18 | 154.62772 | Kruskal-Wallis rank sum test | 2.142254e-18 |
r3 | r3 | 2.568227e-17 | 146.70217 | Kruskal-Wallis rank sum test | 4.851095e-17 |
r5 | r5 | 9.823827e-16 | 137.66310 | Kruskal-Wallis rank sum test | 1.670051e-15 |
r7 | r7 | 9.201549e-14 | 126.19243 | Kruskal-Wallis rank sum test | 1.422058e-13 |
r2 | r2 | 2.159955e-13 | 124.00604 | Kruskal-Wallis rank sum test | 3.059936e-13 |
r10 | r10 | 1.283470e-11 | 113.38113 | Kruskal-Wallis rank sum test | 1.678384e-11 |
votes | votes | 2.209085e-10 | 105.79588 | Kruskal-Wallis rank sum test | 2.682460e-10 |
r6 | r6 | 3.779129e-05 | 70.80971 | Kruskal-Wallis rank sum test | 4.283013e-05 |
year | year | 2.745818e-02 | 46.55972 | Kruskal-Wallis rank sum test | 2.917431e-02 |
title | title | 2.600003e-01 | 34.53375 | Kruskal-Wallis rank sum test | 2.600003e-01 |
Kruskal-Wallis rank sum test
is not always the best choice.
You can either change the test for:
test=your.test
), ortests=list(variable=some.test)
argument)The tests are called with (formula=variable ~ intersection, data)
signature, such as accepted by kruskal.test
. The result is expected to be a list with following members:
p.value
statistic
method
It is easy to adapt tests which do not obey this signature/output convention; for example the Chi-squared test and anova can be wrapped with two-line functions as follows:
chisq_from_formula = function(formula, data) {
chisq.test(
ftable(formula, data)
)
}
anova_single = function(formula, data) {
result = summary(aov(formula, data))
list(
p.value=result[[1]][['Pr(>F)']][[1]],
method='Analysis of variance Pr(>F)',
statistic=result[[1]][['F value']][[1]]
)
}
custom_tests = list(
mpaa=chisq_from_formula,
budget=anova_single
)
head(upset_test(movies, genres, tests=custom_tests))
Warning message in chisq.test(ftable(formula, data)):
“Chi-squared approximation may be incorrect”
[1] "year, length, budget, rating, votes, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, mpaa differ significantly between intersections"
variable | p.value | statistic | test | fdr | |
---|---|---|---|---|---|
<chr> | <dbl> | <dbl> | <chr> | <dbl> | |
length | length | 6.511525e-71 | 422.88444 | Kruskal-Wallis rank sum test | 1.106959e-69 |
budget | budget | 1.348209e-60 | 13.66395 | Analysis of variance Pr(>F) | 1.145977e-59 |
rating | rating | 1.209027e-46 | 301.72764 | Kruskal-Wallis rank sum test | 6.851151e-46 |
mpaa | mpaa | 9.799097e-42 | 406.33814 | Pearson’s Chi-squared test | 4.164616e-41 |
r8 | r8 | 9.900004e-39 | 261.28815 | Kruskal-Wallis rank sum test | 3.366002e-38 |
r9 | r9 | 1.433256e-30 | 218.78160 | Kruskal-Wallis rank sum test | 4.060891e-30 |
Many tests will require at least two observations in each group. You can skip intersections with less than two members with min_size=2
.
bartlett_results = suppressWarnings(upset_test(movies, genres, test=bartlett.test, min_size=2))
tail(bartlett_results)
[1] "NA, year, length, budget, rating, votes, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, NA differ significantly between intersections"
variable | p.value | statistic | test | fdr | |
---|---|---|---|---|---|
<chr> | <dbl> | <dbl> | <chr> | <dbl> | |
year | year | 1.041955e-67 | 386.53699 | Bartlett test of homogeneity of variances | 1.302444e-67 |
length | length | 3.982729e-67 | 383.70148 | Bartlett test of homogeneity of variances | 4.595457e-67 |
budget | budget | 7.637563e-50 | 298.89911 | Bartlett test of homogeneity of variances | 8.183103e-50 |
rating | rating | 3.980194e-06 | 66.63277 | Bartlett test of homogeneity of variances | 3.980194e-06 |
title | title | NA | NA | Bartlett test of homogeneity of variances | NA |
mpaa | mpaa | NA | NA | Bartlett test of homogeneity of variances | NA |
You may want to exclude variables which are:
In the movies example, the title variable is not a reasonable thing to compare. We can ignore it using:
# note: title no longer present
rownames(upset_test(movies, genres, ignore=c('title')))
[1] "year, length, budget, rating, votes, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, mpaa differ significantly between intersections"
The counts over the bars can be disabled:
set_size(8, 3)
upset(
movies,
genres,
base_annotations=list(
'Intersection size'=intersection_size(counts=FALSE)
),
min_size=10,
width_ratio=0.1
)
The colors can be changed, and additional annotations added:
set_size(8, 3)
upset(
movies,
genres,
base_annotations=list(
'Intersection size'=intersection_size(
text_colors=c(
on_background='brown', on_bar='yellow'
)
)
+ annotate(
geom='text', x=Inf, y=Inf,
label=paste('Total:', nrow(movies)),
vjust=1, hjust=1
)
+ ylab('Intersection size')
),
min_size=10,
width_ratio=0.1
)
Any parameter supported by geom_text
can be passed in text
list:
set_size(8, 3)
upset(
movies,
genres,
base_annotations=list(
'Intersection size'=intersection_size(
text=list(
vjust=-0.1,
hjust=-0.1,
angle=45
)
)
),
min_size=10,
width_ratio=0.1
)
set_size(8, 3)
upset(
movies,
genres,
base_annotations=list(
'Intersection size'=intersection_size(
counts=FALSE,
aes=aes(fill=mpaa)
)
),
width_ratio=0.1
)
set_size(8, 3)
upset(
movies,
genres,
base_annotations=list(
'Intersection size'=intersection_size(
counts=FALSE,
aes=aes(fill=mpaa)
) + scale_fill_manual(values=c(
'R'='#E41A1C', 'PG'='#377EB8',
'PG-13'='#4DAF4A', 'NC-17'='#FF7F00'
))
),
width_ratio=0.1
)
set_size(8, 3)
upset(
movies,
genres,
base_annotations=list(
'Intersection size'=intersection_size(
counts=FALSE,
aes=aes(fill='bars_color')
) + scale_fill_manual(values=c('bars_color'='blue'), guide='none')
),
width_ratio=0.1
)
Setting height_ratio=1
will cause the intersection matrix and the intersection size to have an equal height:
set_size(8, 3)
upset(
movies,
genres,
height_ratio=1,
width_ratio=0.1
)
You can always disable the intersection size altogether:
set_size(8, 1.6)
upset(
movies,
genres,
base_annotations=list(),
min_size=10,
width_ratio=0.1
)
It can be useful to visualise which intersections are larger than expected by chance (assuming equal probability of belonging to multiple sets); this can be achieved using the intersection size/union size ratio.
Note: this ratio cannot be computed for the null intersection (observations which do not belong to either of the groups), as denominator would be 0.
Important note: with early min/max trimming the intersection ratio uses the trimmed denominator. In most cases you probably want to set
min_max_early=FALSE
when plotting ratios with any kind of filtering imposed.
set_size(8, 6)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=10,
base_annotations=list(
'Intersection size'=intersection_size(),
'Intersection ratio'=intersection_ratio()
),
min_max_early=FALSE
)
Warning message:
“Removed 62 rows containing missing values (position_stack).”
The plot above tells us that the analysed documentary movies are almost always (in over 60% of cases) documentaries (and nothing more!), while comedies more often include elements of other genres (e.g. drama, romance) rather than being comedies alone (like stand-up shows).
text_aes
can be used to manipulate the aesthetics of the labels. Using the intersection_size
and union_size
one can calculate percentage of items in the intersection (relative to the potential size of the intersection). A upset_text_percentage(digits=0, sep='')
shorthand is provided for convenience; please note that it has to be used with aes_
rather than aes
:
set_size(8, 6)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=10,
base_annotations=list(
# with manual aes specification:
'Intersection size'=intersection_size(text_aes=aes(label=paste0(round(intersection_size/union_size * 100), '%'))),
# using shorthand:
'Intersection ratio'=intersection_ratio(text_aes=aes_(label=upset_text_percentage()))
),
min_max_early=FALSE
)
Warning message:
“Removed 62 rows containing missing values (position_stack).”
Also see 10. Display percentages.
set_size(8, 3)
upset(
movies, genres, width_ratio=0.1,
base_annotations = list(
'Intersection size'=(
intersection_size()
+ ylim(c(0, 700))
+ theme(plot.background=element_rect(fill='#E5D3B3'))
+ ylab('# observations in intersection')
)
),
min_size=10
)
To rotate the labels modify corresponding theme:
set_size(4, 3)
upset(
movies, genres,
min_size=100,
width_ratio=0.15,
set_sizes=(
upset_set_size(width=0.6)
+ theme(axis.text.x=element_text(angle=90))
)
)
To display the ticks:
set_size(4, 3)
upset(
movies, genres, width_ratio=0.3, min_size=100, wrap=TRUE,
set_sizes=(
upset_set_size(width=0.6)
+ theme(axis.ticks.x=element_line())
)
)
Arguments of the geom_bar
can be passed to upset_set_size
; it can even use a different geom, or be replaced with a custom list of layers altogether:
set_size(8, 3)
(
upset(
movies, genres, width_ratio=0.5, max_size=100, min_size=15, wrap=TRUE,
set_sizes=upset_set_size(
width=0.4
)
)
+
upset(
movies, genres, width_ratio=0.5, max_size=100, min_size=15, wrap=TRUE,
set_sizes=upset_set_size(
geom=geom_point,
stat='count',
color='blue'
)
)
+
upset(
movies, genres, width_ratio=0.5, max_size=100, min_size=15, wrap=TRUE,
set_sizes=(
upset_set_size(
geom=geom_point,
mapping=aes(y=..count../max(..count..)),
stat='count'
)
+ ylab('Size relative to the largest')
)
)
)
In order to use a log scale we need pass additional scale to in layers
argument. However, as the bars are on flipped coordinates, we need a reversed log transformation. Appropriate function, reverse_log_trans()
is provided:
set_size(5, 3)
upset(
movies, genres,
width_ratio=0.1,
min_size=10,
set_sizes=(
upset_set_size(width=0.4)
+ theme(axis.text.x=element_text(angle=90))
+ scale_y_continuous(trans=reverse_log_trans())
),
queries=list(upset_query(set='Drama', fill='blue'))
)
We can also modify the labels to display the logged values:
set_size(5, 3)
upset(
movies, genres,
min_size=10,
width_ratio=0.2,
set_sizes=upset_set_size(width=0.4)
+ scale_y_continuous(
trans=reverse_log_trans(),
labels=log10
)
+ ylab('log10(set size)')
)
Or display the actual count:
set_size(5, 3)
upset(
movies, genres,
min_size=10,
width_ratio=0.3,
set_sizes=(
upset_set_size(
width=0.4,
geom=function(...) {
list(
geom_bar(...),
geom_text(..., aes(label=..count..), hjust=1.1)
)
},
stat='count'
)
+ expand_limits(y=1100)
+ theme(axis.text.x=element_text(angle=90))
)
)
set_size(5, 3)
upset(
movies, genres,
min_size=10,
set_sizes=FALSE
)
Change the colors:
set_size(6, 4)
upset(
movies,
genres,
min_size=10,
width_ratio=0.2,
stripes=c('cornsilk1', 'deepskyblue1')
)
You can use multiple colors:
set_size(6, 4)
upset(
movies,
genres,
min_size=10,
width_ratio=0.2,
stripes=c('cornsilk1', 'deepskyblue1', 'grey90')
)
Or, set the color to white to effectively disable the stripes:
set_size(6, 4)
upset(
movies,
genres,
min_size=10,
width_ratio=0.2,
stripes='white'
)
Adding title with ggtitle
with add it to the intersection matrix:
set_size(6, 4)
upset(movies, genres, min_size=10) + ggtitle('Intersection matrix title')
In order to add a title for the entire plot, you need to wrap the plot:
set_size(6, 4)
upset(movies, genres, min_size=10, wrap=TRUE) + ggtitle('The overlap between genres')
You need to set the plot background to transparent and adjust colors of stripes to your liking:
set_size(6, 4)
(
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=10,
stripes=c(alpha('grey90', 0.45), alpha('white', 0.3))
)
& theme(plot.background=element_rect(fill='transparent', color=NA))
)
Use ggsave('upset.png', bg="transparent")
when exporting to PNG.
The themes for specific components are defined in upset_themes
list, which contains themes for:
names(upset_themes)
You can substitute this list for your own using themes
argument. While you can specify a theme for every component, if you omit one or more components those will be taken from the element named default
.
set_size(8, 4)
upset(movies, genres, min_size=10, themes=list(default=theme()))
You can also add themes for your custom panels/annotations:
set_size(8, 8)
upset(
movies,
genres,
annotations = list(
'Length'=list(
aes=aes(x=intersection, y=length),
geom=geom_boxplot()
),
'Rating'=list(
aes=aes(x=intersection, y=rating),
geom=list(
geom_jitter(aes(color=log10(votes))),
geom_violin(alpha=0.5)
)
)
),
min_size=10,
width_ratio=0.1,
themes=modifyList(
upset_themes,
list(Rating=theme_void(), Length=theme())
)
)
Modify all the default themes as once with upset_default_themes()
:
set_size(8, 4)
upset(
movies, genres, min_size=10, width_ratio=0.1,
themes=upset_default_themes(text=element_text(color='red'))
)
To modify only a subset of default themes use upset_modify_themes()
:
set_size(8, 4)
upset(
movies, genres,
base_annotations=list('Intersection size'=intersection_size(counts=FALSE)),
min_size=100,
width_ratio=0.1,
themes=upset_modify_themes(
list(
'intersections_matrix'=theme(text=element_text(size=20)),
'overall_sizes'=theme(axis.text.x=element_text(angle=90))
)
)
)
Pass a list of lists generated with upset_query()
utility to the optional queries
argument to selectively modify aesthetics of specific intersections or sets.
Use one of the arguments: set
or intersection
(not both) to specify what to highlight:
set
will highlight the bar of the set size,intersection
will highlight an intersection on all components (by default), or on components chosen with only_components
set_size(8, 6)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=10,
annotations = list(
'Length'=list(
aes=aes(x=intersection, y=length),
geom=geom_boxplot()
)
),
queries=list(
upset_query(
intersect=c('Drama', 'Comedy'),
color='red',
fill='red',
only_components=c('intersections_matrix', 'Intersection size')
),
upset_query(
set='Drama',
fill='blue'
),
upset_query(
intersect=c('Romance', 'Comedy'),
fill='yellow',
only_components=c('Length')
)
)
)
By degree:
set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_intersections_by='degree')
By ratio:
set_size(8, 4)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=10,
sort_intersections_by='ratio',
base_annotations=list(
'Intersection size'=intersection_size(text_aes=aes_(label=upset_text_percentage())),
'Intersection ratio'=intersection_ratio(text_aes=aes_(label=upset_text_percentage()))
)
)
The other way around:
set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_intersections='ascending')
Without any sorting:
set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_intersections=FALSE)
Ascending:
set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_sets='ascending')
Without sorting - preserving the order as in genres:
genres
set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_sets=FALSE)
Use group_by='sets'
to group intersections by set. If needed, the intersections will be repeated so that they appear in each set group. Use upset_query()
with group
argument to color the intersection matrix accordingly.
set_size(8, 3)
upset(
movies, c("Action", "Comedy", "Drama"),
width_ratio=0.2,
group_by='sets',
queries=list(
upset_query(
intersect=c('Drama', 'Comedy'),
color='red',
fill='red',
only_components=c('intersections_matrix', 'Intersection size')
),
upset_query(group='Drama', color='blue'),
upset_query(group='Comedy', color='orange'),
upset_query(group='Action', color='purple'),
upset_query(set='Drama', fill='blue'),
upset_query(set='Comedy', fill='orange'),
upset_query(set='Action', fill='purple')
)
)
Use aes_percentage()
utility preceded with !!
syntax to easily display percentages. In the examples below only percentages for the movies with R rating are shown to avoid visual clutter.
rating_scale = scale_fill_manual(values=c(
'R'='#E41A1C', 'PG'='#377EB8',
'PG-13'='#4DAF4A', 'NC-17'='#FF7F00'
))
show_hide_scale = scale_color_manual(values=c('show'='black', 'hide'='transparent'), guide=FALSE)
set_size(8, 5)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=100,
annotations =list(
'MPAA Rating'=list(
aes=aes(x=intersection, fill=mpaa),
geom=list(
geom_bar(stat='count', position='fill'),
geom_text(
aes(
label=!!aes_percentage(relative_to='intersection'),
color=ifelse(mpaa == 'R', 'show', 'hide')
),
stat='count',
position=position_fill(vjust = .5)
),
scale_y_continuous(labels=scales::percent_format()),
show_hide_scale,
rating_scale
)
)
)
)
set_size(8, 5)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=100,
annotations =list(
'MPAA Rating'=list(
aes=aes(x=intersection, fill=mpaa),
geom=list(
geom_bar(stat='count', position='fill'),
geom_text(
aes(
label=!!aes_percentage(relative_to='group'),
group=mpaa,
color=ifelse(mpaa == 'R', 'show', 'hide')
),
stat='count',
position=position_fill(vjust = .5)
),
scale_y_continuous(labels=scales::percent_format()),
show_hide_scale,
rating_scale
)
)
)
)
set_size(8, 5)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=100,
annotations =list(
'MPAA Rating'=list(
aes=aes(x=intersection, fill=mpaa),
geom=list(
geom_bar(stat='count', position='fill'),
geom_text(
aes(
label=!!aes_percentage(relative_to='all'),
color=ifelse(mpaa == 'R', 'show', 'hide')
),
stat='count',
position=position_fill(vjust = .5)
),
scale_y_continuous(labels=scales::percent_format()),
show_hide_scale,
rating_scale
)
)
)
)
set_size(8, 5)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=100,
annotations =list(
'MPAA Rating'=list(
aes=aes(x=intersection, fill=mpaa),
geom=list(
geom_bar(stat='count', position='fill'),
geom_text(
aes(label=ifelse(mpaa == 'R', 'R', NA)),
stat='count',
position=position_fill(vjust = .5),
na.rm=TRUE
),
show_hide_scale,
rating_scale
)
)
)
)
set_size(8, 5)
library(patchwork)
annotations = list(
'MPAA Rating'=list(
aes=aes(x=intersection, fill=mpaa),
geom=list(
geom_bar(stat='count', position='fill')
)
)
)
set.seed(0) # for replicable example only
data_1 = movies[sample(nrow(movies), 100), ]
data_2 = movies[sample(nrow(movies), 100), ]
u1 = upset(data_1, genres, min_size=5, base_annotations=annotations)
u2 = upset(data_2, genres, min_size=5, base_annotations=annotations)
(u1 | u2) + plot_layout(guides='collect')
set_size(8, 3.5)
upset(
movies, genres, name='genre', width_ratio=0.1, min_size=100,
annotations =list(
'MPAA Rating'=list(
aes=aes(x=intersection, fill=mpaa),
geom=list(
geom_bar(stat='count', position='fill'),
scale_y_continuous(labels=scales::percent_format())
)
)
)
) + patchwork::plot_layout(heights=c(0.5, 1, 0.5))