library(ggplot2)
library(ComplexUpset)

Prepare the datasets

movies = as.data.frame(ggplot2movies::movies)
head(movies, 3)
A data.frame: 3 × 24
titleyearlengthbudgetratingvotesr1r2r3r4r9r10mpaaActionAnimationComedyDramaDocumentaryRomanceShort
<chr><int><int><int><dbl><int><dbl><dbl><dbl><dbl><dbl><dbl><chr><int><int><int><int><int><int><int>
1$ 1971121NA6.43484.5 4.54.5 4.5 4.5 4.50011000
2$1000 a Touchdown 1939 71NA6.0 200.014.54.524.5 4.514.50010000
3$21 a Day Once a Month1941 7NA8.2 50.0 0.00.0 0.024.524.50100001
genres = colnames(movies)[18:24]
genres
  1. ‘Action’
  2. ‘Animation’
  3. ‘Comedy’
  4. ‘Drama’
  5. ‘Documentary’
  6. ‘Romance’
  7. ‘Short’

Convert the genre indicator columns to use boolean values:

movies[genres] = movies[genres] == 1
t(head(movies[genres], 3))
A matrix: 7 × 3 of type lgl
123
ActionFALSEFALSEFALSE
AnimationFALSEFALSE TRUE
Comedy TRUE TRUEFALSE
Drama TRUEFALSEFALSE
DocumentaryFALSEFALSEFALSE
RomanceFALSEFALSEFALSE
ShortFALSEFALSE TRUE

To keep the examples fast to compile we will operate on a subset of the movies with complete data:

movies[movies$mpaa == '', 'mpaa'] = NA
movies = na.omit(movies)

Utility for changing output parameters in Jupyter notebooks (IRKernel kernel), not relevant if using RStudio or scripting R from terminal:

set_size = function(w, h, factor=1.5) {
    s = 1 * factor
    options(
        repr.plot.width=w * s,
        repr.plot.height=h * s,
        repr.plot.res=100 / factor,
        jupyter.plot_mimetypes='image/png',
        jupyter.plot_scale=1
    )
}

0. Basic usage

There are two required arguments:

Additional arguments can be provided, such as name (specifies xlab() for intersection matrix) or width_ratio (specifies how much space should be occupied by the set size panel). Other such arguments are discussed at length later in this document.

set_size(8, 3)
upset(movies, genres, name='genre', width_ratio=0.1)

0.1 Selecting intersections

We will focus on the intersections with at least ten members (min_size=10) and on a few variables which are significantly different between the intersections (see 2. Running statistical tests).

When using min_size, the empty groups will be skipped by default (e.g. Short movies would have no overlap with size of 10). To keep all groups pass keep_empty_groups=TRUE:

set_size(8, 3)
(
    upset(movies, genres, name='genre', width_ratio=0.1, min_size=10, wrap=TRUE, set_sizes=FALSE)
    + ggtitle('Without empty groups (Short dropped)')
    +    # adding plots is possible thanks to patchwork
    upset(movies, genres, name='genre', width_ratio=0.1, min_size=10, keep_empty_groups=TRUE, wrap=TRUE, set_sizes=FALSE)
    + ggtitle('With empty groups')
)

When empty columns are detected a warning will be issued. The silence it, pass warn_when_dropping_groups=FALSE. Complimentary max_size can be used in tandem.

You can also select intersections by degree (min_degree and max_degree):

set_size(8, 3)
upset(
    movies, genres, width_ratio=0.1,
    min_degree=3,
)

Or request a constant number of intersections with n_intersections:

set_size(8, 3)
upset(
    movies, genres, width_ratio=0.1,
    n_intersections=15
)

1. Adding components

We can add multiple annotation components (also called panels):

set_size(8, 8)

set.seed(0)   # keep the same jitter for identical plots

upset(
    movies,
    genres,
    annotations = list(
        'Length'=list(
            aes=aes(x=intersection, y=length),
            geom=geom_boxplot()
        ),
        'Rating'=list(
            aes=aes(x=intersection, y=rating),
            geom=list(
                # checkout ggbeeswarm::geom_quasirandom for better results!
                geom_jitter(aes(color=log10(votes))),
                geom_violin(alpha=0.5)
            )
        ),
        'Budget'=list(
            aes=aes(x=intersection, y=budget),
            geom=geom_boxplot()
        )
    ),
    min_size=10,
    width_ratio=0.1
)

For simple annotations, such as the length above, you can use a shorthand notation of upset_annotate:

set_size(8, 6)

upset(
    movies,
    genres,
    annotations = list(
        'Length'=upset_annotate('length', geom_boxplot()),
        'Budget'=upset_annotate('budget', geom_boxplot())
    ),
    min_size=10,
    width_ratio=0.1
)

You can also use barplots to demonstrate differences in proportions of categorical variables:

set_size(8, 5)

upset(
    movies,
    genres,
    annotations = list(
        'MPAA Rating'=list(
            aes=aes(x=intersection, fill=mpaa),
            geom=list(
                geom_bar(stat='count', position='fill'),
                scale_y_continuous(labels=scales::percent_format()),
                scale_fill_manual(values=c(
                    'R'='#E41A1C', 'PG'='#377EB8',
                    'PG-13'='#4DAF4A', 'NC-17'='#FF7F00'
                ))
            )
        )
    ),
    width_ratio=0.1
)

2. Running statistical tests

upset_test(movies, genres)
[1] "year, length, budget, rating, votes, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, mpaa differ significantly between intersections"
A data.frame: 17 × 5
variablep.valuestatistictestfdr
<chr><dbl><dbl><chr><dbl>
lengthlength6.511525e-71422.88444Kruskal-Wallis rank sum test1.106959e-69
ratingrating1.209027e-46301.72764Kruskal-Wallis rank sum test1.027673e-45
budgetbudget3.899860e-44288.97476Kruskal-Wallis rank sum test2.209921e-43
r8r8 9.900004e-39261.28815Kruskal-Wallis rank sum test4.207502e-38
mpaampaa 3.732200e-35242.77939Kruskal-Wallis rank sum test1.268948e-34
r9r9 1.433256e-30218.78160Kruskal-Wallis rank sum test4.060891e-30
r1r1 2.211600e-23180.32740Kruskal-Wallis rank sum test5.371029e-23
r4r4 1.008119e-18154.62772Kruskal-Wallis rank sum test2.142254e-18
r3r3 2.568227e-17146.70217Kruskal-Wallis rank sum test4.851095e-17
r5r5 9.823827e-16137.66310Kruskal-Wallis rank sum test1.670051e-15
r7r7 9.201549e-14126.19243Kruskal-Wallis rank sum test1.422058e-13
r2r2 2.159955e-13124.00604Kruskal-Wallis rank sum test3.059936e-13
r10r10 1.283470e-11113.38113Kruskal-Wallis rank sum test1.678384e-11
votesvotes 2.209085e-10105.79588Kruskal-Wallis rank sum test2.682460e-10
r6r6 3.779129e-05 70.80971Kruskal-Wallis rank sum test4.283013e-05
yearyear 2.745818e-02 46.55972Kruskal-Wallis rank sum test2.917431e-02
titletitle 2.600003e-01 34.53375Kruskal-Wallis rank sum test2.600003e-01

Kruskal-Wallis rank sum test is not always the best choice.

You can either change the test for:

The tests are called with (formula=variable ~ intersection, data) signature, such as accepted by kruskal.test. The result is expected to be a list with following members:

It is easy to adapt tests which do not obey this signature/output convention; for example the Chi-squared test and anova can be wrapped with two-line functions as follows:

chisq_from_formula = function(formula, data) {
    chisq.test(
        ftable(formula, data)
    )
}

anova_single = function(formula, data) {
    result = summary(aov(formula, data))
    list(
        p.value=result[[1]][['Pr(>F)']][[1]],
        method='Analysis of variance Pr(>F)',
        statistic=result[[1]][['F value']][[1]]
    )
}

custom_tests = list(
    mpaa=chisq_from_formula,
    budget=anova_single
)
head(upset_test(movies, genres, tests=custom_tests))
Warning message in chisq.test(ftable(formula, data)):
“Chi-squared approximation may be incorrect”


[1] "year, length, budget, rating, votes, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, mpaa differ significantly between intersections"
A data.frame: 6 × 5
variablep.valuestatistictestfdr
<chr><dbl><dbl><chr><dbl>
lengthlength6.511525e-71422.88444Kruskal-Wallis rank sum test1.106959e-69
budgetbudget1.348209e-60 13.66395Analysis of variance Pr(>F) 1.145977e-59
ratingrating1.209027e-46301.72764Kruskal-Wallis rank sum test6.851151e-46
mpaampaa 9.799097e-42406.33814Pearson’s Chi-squared test 4.164616e-41
r8r8 9.900004e-39261.28815Kruskal-Wallis rank sum test3.366002e-38
r9r9 1.433256e-30218.78160Kruskal-Wallis rank sum test4.060891e-30

Many tests will require at least two observations in each group. You can skip intersections with less than two members with min_size=2.

bartlett_results = suppressWarnings(upset_test(movies, genres, test=bartlett.test, min_size=2))
tail(bartlett_results)
[1] "NA, year, length, budget, rating, votes, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, NA differ significantly between intersections"
A data.frame: 6 × 5
variablep.valuestatistictestfdr
<chr><dbl><dbl><chr><dbl>
yearyear 1.041955e-67386.53699Bartlett test of homogeneity of variances1.302444e-67
lengthlength3.982729e-67383.70148Bartlett test of homogeneity of variances4.595457e-67
budgetbudget7.637563e-50298.89911Bartlett test of homogeneity of variances8.183103e-50
ratingrating3.980194e-06 66.63277Bartlett test of homogeneity of variances3.980194e-06
titletitle NA NABartlett test of homogeneity of variances NA
mpaampaa NA NABartlett test of homogeneity of variances NA

2.1 Ignore specific variables

You may want to exclude variables which are:

In the movies example, the title variable is not a reasonable thing to compare. We can ignore it using:

# note: title no longer present
rownames(upset_test(movies, genres, ignore=c('title')))
[1] "year, length, budget, rating, votes, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, mpaa differ significantly between intersections"
  1. ‘length’
  2. ‘rating’
  3. ‘budget’
  4. ‘r8’
  5. ‘mpaa’
  6. ‘r9’
  7. ‘r1’
  8. ‘r4’
  9. ‘r3’
  10. ‘r5’
  11. ‘r7’
  12. ‘r2’
  13. ‘r10’
  14. ‘votes’
  15. ‘r6’
  16. ‘year’

3. Adjusting “Intersection size”

3.1 Counts

The counts over the bars can be disabled:

set_size(8, 3)

upset(
    movies,
    genres,
    base_annotations=list(
        'Intersection size'=intersection_size(counts=FALSE)
    ),
    min_size=10,
    width_ratio=0.1
)

The colors can be changed, and additional annotations added:

set_size(8, 3)

upset(
    movies,
    genres,
    base_annotations=list(
        'Intersection size'=intersection_size(
            text_colors=c(
                on_background='brown', on_bar='yellow'
            )
        )
        + annotate(
            geom='text', x=Inf, y=Inf,
            label=paste('Total:', nrow(movies)),
            vjust=1, hjust=1
        )
        + ylab('Intersection size')
    ),
    min_size=10,
    width_ratio=0.1
)

Any parameter supported by geom_text can be passed in text list:

set_size(8, 3)

upset(
    movies,
    genres,
    base_annotations=list(
        'Intersection size'=intersection_size(
            text=list(
                vjust=-0.1,
                hjust=-0.1,
                angle=45
            )
        )
    ),
    min_size=10,
    width_ratio=0.1
)

3.2 Fill the bars

set_size(8, 3)

upset(
    movies,
    genres,
    base_annotations=list(
        'Intersection size'=intersection_size(
            counts=FALSE,
            aes=aes(fill=mpaa)
        )
    ),
    width_ratio=0.1
)

set_size(8, 3)
upset(
    movies,
    genres,
    base_annotations=list(
        'Intersection size'=intersection_size(
            counts=FALSE,
            aes=aes(fill=mpaa)
        ) + scale_fill_manual(values=c(
            'R'='#E41A1C', 'PG'='#377EB8',
            'PG-13'='#4DAF4A', 'NC-17'='#FF7F00'
        ))
    ),
    width_ratio=0.1
)

set_size(8, 3)

upset(
    movies,
    genres,
    base_annotations=list(
        'Intersection size'=intersection_size(
            counts=FALSE,
            aes=aes(fill='bars_color')
        ) + scale_fill_manual(values=c('bars_color'='blue'), guide='none')
    ),
    width_ratio=0.1
)

3.3 Adjusting the height ratio

Setting height_ratio=1 will cause the intersection matrix and the intersection size to have an equal height:

set_size(8, 3)

upset(
    movies,
    genres,
    height_ratio=1,
    width_ratio=0.1
)

3.5 Hiding intersection size

You can always disable the intersection size altogether:

set_size(8, 1.6)
upset(
    movies,
    genres,
    base_annotations=list(),
    min_size=10,
    width_ratio=0.1
)

3.6 Showing intersection size/union size ratio

It can be useful to visualise which intersections are larger than expected by chance (assuming equal probability of belonging to multiple sets); this can be achieved using the intersection size/union size ratio.

Note: this ratio cannot be computed for the null intersection (observations which do not belong to either of the groups), as denominator would be 0.

Important note: with early min/max trimming the intersection ratio uses the trimmed denominator. In most cases you probably want to set min_max_early=FALSE when plotting ratios with any kind of filtering imposed.

set_size(8, 6)
upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=10,
    base_annotations=list(
        'Intersection size'=intersection_size(),
        'Intersection ratio'=intersection_ratio()
    ),
    min_max_early=FALSE
)
Warning message:
“Removed 62 rows containing missing values (position_stack).”

The plot above tells us that the analysed documentary movies are almost always (in over 60% of cases) documentaries (and nothing more!), while comedies more often include elements of other genres (e.g. drama, romance) rather than being comedies alone (like stand-up shows).

3.7 Showing percentages

text_aes can be used to manipulate the aesthetics of the labels. Using the intersection_size and union_size one can calculate percentage of items in the intersection (relative to the potential size of the intersection). A upset_text_percentage(digits=0, sep='') shorthand is provided for convenience; please note that it has to be used with aes_ rather than aes:

set_size(8, 6)
upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=10,
    base_annotations=list(
        # with manual aes specification:
        'Intersection size'=intersection_size(text_aes=aes(label=paste0(round(intersection_size/union_size * 100), '%'))),
        # using shorthand:
        'Intersection ratio'=intersection_ratio(text_aes=aes_(label=upset_text_percentage()))
    ),
    min_max_early=FALSE
)
Warning message:
“Removed 62 rows containing missing values (position_stack).”

Also see 10. Display percentages.

3.8 Further adjustments using ggplot2 functions

set_size(8, 3)
upset(
    movies, genres, width_ratio=0.1,
    base_annotations = list(
        'Intersection size'=(
            intersection_size()
            + ylim(c(0, 700))
            + theme(plot.background=element_rect(fill='#E5D3B3'))
            + ylab('# observations in intersection')
        )
    ),
    min_size=10
)

4. Adjusting “set size”

4.1 Rotate labels

To rotate the labels modify corresponding theme:

set_size(4, 3)
upset(
    movies, genres,
    min_size=100,
    width_ratio=0.15,
    set_sizes=(
        upset_set_size(width=0.6)
        + theme(axis.text.x=element_text(angle=90))
    )
)

To display the ticks:

set_size(4, 3)
upset(
    movies, genres, width_ratio=0.3, min_size=100, wrap=TRUE,
    set_sizes=(
        upset_set_size(width=0.6)
        + theme(axis.ticks.x=element_line())
    )
)

4.2 Modify geoms and other layers

Arguments of the geom_bar can be passed to upset_set_size; it can even use a different geom, or be replaced with a custom list of layers altogether:

set_size(8, 3)

(
    upset(
        movies, genres, width_ratio=0.5, max_size=100, min_size=15, wrap=TRUE,
        set_sizes=upset_set_size(
            width=0.4
        )
    )
    +
    upset(
        movies, genres, width_ratio=0.5, max_size=100, min_size=15, wrap=TRUE,
        set_sizes=upset_set_size(
            geom=geom_point,
            stat='count',
            color='blue'
        )
    )
    +
    upset(
        movies, genres, width_ratio=0.5, max_size=100, min_size=15, wrap=TRUE,
        set_sizes=(
            upset_set_size(
                geom=geom_point,
                mapping=aes(y=..count../max(..count..)),
                stat='count'
            )
            + ylab('Size relative to the largest')
        )
    )
)

4.3 Logarithmic scale

In order to use a log scale we need pass additional scale to in layers argument. However, as the bars are on flipped coordinates, we need a reversed log transformation. Appropriate function, reverse_log_trans() is provided:

set_size(5, 3)

upset(
    movies, genres,
    width_ratio=0.1,
    min_size=10,
    set_sizes=(
        upset_set_size(width=0.4)
        + theme(axis.text.x=element_text(angle=90))
        + scale_y_continuous(trans=reverse_log_trans())
    ),
    queries=list(upset_query(set='Drama', fill='blue'))
)

We can also modify the labels to display the logged values:

set_size(5, 3)

upset(
    movies, genres,
    min_size=10,
    width_ratio=0.2,
    set_sizes=upset_set_size(width=0.4)
        + scale_y_continuous(
            trans=reverse_log_trans(),
            labels=log10
        )
        + ylab('log10(set size)')
)

Or display the actual count:

set_size(5, 3)

upset(
    movies, genres,
    min_size=10,
    width_ratio=0.3,
    set_sizes=(
        upset_set_size(
            width=0.4,
            geom=function(...) {
                list(
                    geom_bar(...),
                    geom_text(..., aes(label=..count..), hjust=1.1)
                )
            },
            stat='count'
        )
        + expand_limits(y=1100)
        + theme(axis.text.x=element_text(angle=90))
    )
)

4.4 Hide the set sizes altogether

set_size(5, 3)

upset(
    movies, genres,
    min_size=10,
    set_sizes=FALSE
)

5. Adjusting other aesthetics

5.1 Stripes

Change the colors:

set_size(6, 4)
upset(
    movies,
    genres,
    min_size=10,
    width_ratio=0.2,
    stripes=c('cornsilk1', 'deepskyblue1')
)

You can use multiple colors:

set_size(6, 4)
upset(
    movies,
    genres,
    min_size=10,
    width_ratio=0.2,
    stripes=c('cornsilk1', 'deepskyblue1', 'grey90')
)

Or, set the color to white to effectively disable the stripes:

set_size(6, 4)
upset(
    movies,
    genres,
    min_size=10,
    width_ratio=0.2,
    stripes='white'
)

5.2 Adding title

Adding title with ggtitle with add it to the intersection matrix:

set_size(6, 4)
upset(movies, genres, min_size=10) + ggtitle('Intersection matrix title')

In order to add a title for the entire plot, you need to wrap the plot:

set_size(6, 4)
upset(movies, genres, min_size=10, wrap=TRUE) + ggtitle('The overlap between genres')

5.3 Making the plot transparent

You need to set the plot background to transparent and adjust colors of stripes to your liking:

set_size(6, 4)
(
    upset(
        movies, genres, name='genre', width_ratio=0.1, min_size=10,
        stripes=c(alpha('grey90', 0.45), alpha('white', 0.3))
    )
    & theme(plot.background=element_rect(fill='transparent', color=NA))
)

Use ggsave('upset.png', bg="transparent") when exporting to PNG.

6. Themes

The themes for specific components are defined in upset_themes list, which contains themes for:

names(upset_themes)
  1. ‘intersections_matrix’
  2. ‘Intersection size’
  3. ‘overall_sizes’
  4. ‘default’

You can substitute this list for your own using themes argument. While you can specify a theme for every component, if you omit one or more components those will be taken from the element named default.

6.1 Substituting themes

set_size(8, 4)
upset(movies, genres, min_size=10, themes=list(default=theme()))

You can also add themes for your custom panels/annotations:

set_size(8, 8)

upset(
    movies,
    genres,
    annotations = list(
        'Length'=list(
            aes=aes(x=intersection, y=length),
            geom=geom_boxplot()
        ),
        'Rating'=list(
            aes=aes(x=intersection, y=rating),
            geom=list(
                geom_jitter(aes(color=log10(votes))),
                geom_violin(alpha=0.5)
            )
        )
    ),
    min_size=10,
    width_ratio=0.1,
    themes=modifyList(
        upset_themes,
        list(Rating=theme_void(), Length=theme())
    )
)

6.2 Adjusting the default themes

Modify all the default themes as once with upset_default_themes():

set_size(8, 4)

upset(
    movies, genres, min_size=10, width_ratio=0.1,
    themes=upset_default_themes(text=element_text(color='red'))
)

To modify only a subset of default themes use upset_modify_themes():

set_size(8, 4)

upset(
    movies, genres,
    base_annotations=list('Intersection size'=intersection_size(counts=FALSE)),
    min_size=100,
    width_ratio=0.1,
    themes=upset_modify_themes(
        list(
            'intersections_matrix'=theme(text=element_text(size=20)),
            'overall_sizes'=theme(axis.text.x=element_text(angle=90))
        )
    )
)

7. Highlighting (queries)

Pass a list of lists generated with upset_query() utility to the optional queries argument to selectively modify aesthetics of specific intersections or sets.

Use one of the arguments: set or intersection (not both) to specify what to highlight:

set_size(8, 6)

upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=10,
    annotations = list(
        'Length'=list(
            aes=aes(x=intersection, y=length),
            geom=geom_boxplot()
        )
    ),
    queries=list(
        upset_query(
            intersect=c('Drama', 'Comedy'),
            color='red',
            fill='red',
            only_components=c('intersections_matrix', 'Intersection size')
        ),
        upset_query(
            set='Drama',
            fill='blue'
        ),
        upset_query(
            intersect=c('Romance', 'Comedy'),
            fill='yellow',
            only_components=c('Length')
        )
    )
)

8. Sorting

8.1 Sorting intersections

By degree:

set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_intersections_by='degree')

By ratio:

set_size(8, 4)
upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=10,
    sort_intersections_by='ratio',
    base_annotations=list(
        'Intersection size'=intersection_size(text_aes=aes_(label=upset_text_percentage())),
        'Intersection ratio'=intersection_ratio(text_aes=aes_(label=upset_text_percentage()))
    )
)

The other way around:

set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_intersections='ascending')

Without any sorting:

set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_intersections=FALSE)

8.2 Sorting sets

Ascending:

set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_sets='ascending')

Without sorting - preserving the order as in genres:

genres
  1. ‘Action’
  2. ‘Animation’
  3. ‘Comedy’
  4. ‘Drama’
  5. ‘Documentary’
  6. ‘Romance’
  7. ‘Short’
set_size(8, 3)
upset(movies, genres, width_ratio=0.1, sort_sets=FALSE)

9. Grouping

9.1 Grouping intersections

Use group_by='sets' to group intersections by set. If needed, the intersections will be repeated so that they appear in each set group. Use upset_query() with group argument to color the intersection matrix accordingly.

set_size(8, 3)

upset(
    movies, c("Action", "Comedy", "Drama"),
    width_ratio=0.2,
    group_by='sets',
    queries=list(
        upset_query(
            intersect=c('Drama', 'Comedy'),
            color='red',
            fill='red',
            only_components=c('intersections_matrix', 'Intersection size')
        ),
        upset_query(group='Drama', color='blue'),
        upset_query(group='Comedy', color='orange'),
        upset_query(group='Action', color='purple'),
        upset_query(set='Drama', fill='blue'),
        upset_query(set='Comedy', fill='orange'),
        upset_query(set='Action', fill='purple')
    )
)

10. Display percentages

Use aes_percentage() utility preceded with !! syntax to easily display percentages. In the examples below only percentages for the movies with R rating are shown to avoid visual clutter.

rating_scale = scale_fill_manual(values=c(
    'R'='#E41A1C', 'PG'='#377EB8',
    'PG-13'='#4DAF4A', 'NC-17'='#FF7F00'
))
show_hide_scale = scale_color_manual(values=c('show'='black', 'hide'='transparent'), guide=FALSE)

10.1 Within intersection

set_size(8, 5)

upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=100,
    annotations =list(
        'MPAA Rating'=list(
            aes=aes(x=intersection, fill=mpaa),
            geom=list(
                geom_bar(stat='count', position='fill'),
                geom_text(
                    aes(
                        label=!!aes_percentage(relative_to='intersection'),
                        color=ifelse(mpaa == 'R', 'show', 'hide')
                    ),
                    stat='count',
                    position=position_fill(vjust = .5)
                ),
                scale_y_continuous(labels=scales::percent_format()),
                show_hide_scale,
                rating_scale
            )
        )
    )
)

10.2 Relative to the group

set_size(8, 5)

upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=100,
    annotations =list(
        'MPAA Rating'=list(
            aes=aes(x=intersection, fill=mpaa),
            geom=list(
                geom_bar(stat='count', position='fill'),
                geom_text(
                    aes(
                        label=!!aes_percentage(relative_to='group'),
                        group=mpaa,
                        color=ifelse(mpaa == 'R', 'show', 'hide')
                    ),
                    stat='count',
                    position=position_fill(vjust = .5)
                ),
                scale_y_continuous(labels=scales::percent_format()),
                show_hide_scale,
                rating_scale
            )
        )
    )
)

10.3 Relative to all observed values

set_size(8, 5)

upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=100,
    annotations =list(
        'MPAA Rating'=list(
            aes=aes(x=intersection, fill=mpaa),
            geom=list(
                geom_bar(stat='count', position='fill'),
                geom_text(
                    aes(
                        label=!!aes_percentage(relative_to='all'),
                        color=ifelse(mpaa == 'R', 'show', 'hide')
                    ),
                    stat='count',
                    position=position_fill(vjust = .5)
                ),
                scale_y_continuous(labels=scales::percent_format()),
                show_hide_scale,
                rating_scale
            )
        )
    )
)

11. Advanced usage examples

11.1 Display text on some bars only

set_size(8, 5)

upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=100,
    annotations =list(
        'MPAA Rating'=list(
            aes=aes(x=intersection, fill=mpaa),
            geom=list(
                geom_bar(stat='count', position='fill'),
                geom_text(
                    aes(label=ifelse(mpaa == 'R', 'R', NA)),
                    stat='count',
                    position=position_fill(vjust = .5),
                    na.rm=TRUE
                ),
                show_hide_scale,
                rating_scale
            )
        )
    )
)

11.2 Combine multiple plots together

set_size(8, 5)
library(patchwork)

annotations = list(
    'MPAA Rating'=list(
        aes=aes(x=intersection, fill=mpaa),
        geom=list(
            geom_bar(stat='count', position='fill')
        )
    )
)
set.seed(0)    # for replicable example only

data_1 = movies[sample(nrow(movies), 100), ]
data_2 = movies[sample(nrow(movies), 100), ]

u1 = upset(data_1, genres, min_size=5, base_annotations=annotations)
u2 = upset(data_2, genres, min_size=5, base_annotations=annotations)

(u1 | u2) + plot_layout(guides='collect')

11.3 Change height of the annotations

set_size(8, 3.5)
upset(
    movies, genres, name='genre', width_ratio=0.1, min_size=100,
    annotations =list(
        'MPAA Rating'=list(
            aes=aes(x=intersection, fill=mpaa),
            geom=list(
                geom_bar(stat='count', position='fill'),
                scale_y_continuous(labels=scales::percent_format())
            )
        )
    )
) + patchwork::plot_layout(heights=c(0.5, 1, 0.5))