16

I have the following data frame:

library(tidyverse)

tdat <- structure(list(term = c("Hepatic Fibrosis / Hepatic Stellate Cell Activation", 
"Cellular Effects of Sildenafil (Viagra)", "Epithelial Adherens Junction Signaling", 
"STAT3 Pathway", "Nitric Oxide Signaling in the Cardiovascular System", 
"LXR/RXR Activation", "NF-κB Signaling", "PTEN Signaling", "Gap Junction Signaling", 
"G-Protein Coupled Receptor Signaling", "Role of Osteoblasts, Osteoclasts and Chondrocytes in Rheumatoid Arthritis", 
"Osteoarthritis Pathway", "VDR/RXR Activation", "Axonal Guidance Signaling", 
"Basal Cell Carcinoma Signaling", "Putrescine Degradation III", 
"Tryptophan Degradation X (Mammalian, via Tryptamine)", "Factors Promoting Cardiogenesis in Vertebrates", 
"Dopamine Degradation", "Complement System", "Role of BRCA1 in DNA Damage Response", 
"Granzyme B Signaling", "GADD45 Signaling", "ATM Signaling", 
"Hereditary Breast Cancer Signaling", "Aryl Hydrocarbon Receptor Signaling", 
"Role of Oct4 in Mammalian Embryonic Stem Cell Pluripotency", 
"Factors Promoting Cardiogenesis in Vertebrates", "Sumoylation Pathway", 
"Hepatic Fibrosis / Hepatic Stellate Cell Activation", "GP6 Signaling Pathway", 
"Hepatic Fibrosis / Hepatic Stellate Cell Activation", "Intrinsic Prothrombin Activation Pathway", 
"Atherosclerosis Signaling", "Gap Junction Signaling", "LXR/RXR Activation", 
"FXR/RXR Activation", "HIF1α Signaling", "Bladder Cancer Signaling", 
"Ephrin A Signaling"), tissue = c("tissue-A", "tissue-A", "tissue-A", 
"tissue-A", "tissue-A", "tissue-A", "tissue-A", "tissue-A", "tissue-A", "tissue-A", 
"tissue-B", "tissue-B", "tissue-B", "tissue-B", "tissue-B", "tissue-B", 
"tissue-B", "tissue-B", "tissue-B", "tissue-B", "tissue-C", "tissue-C", 
"tissue-C", "tissue-C", "tissue-C", "tissue-C", "tissue-C", "tissue-C", "tissue-C", 
"tissue-C", "tissue-D", "tissue-D", "tissue-D", "tissue-D", "tissue-D", 
"tissue-D", "tissue-D", "tissue-D", "tissue-D", "tissue-D"), score = c(2.85, 
2.81, 2.53, 2.28, 2.19, 2.18, 2.13, 2.01, 1.97, 1.94, 6.01, 5.78, 
4.29, 2.85, 2.75, 2.67, 2.56, 2.32, 2.22, 2.11, 5.61, 2.91, 2.6, 
2.55, 2.23, 1.86, 1.56, 1.4, 1.34, 1.31, 6.26, 5.87, 4.47, 3.94, 
3.2, 3.17, 3.07, 2.97, 2.71, 2.61)), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -40L), .Names = c("term", "tissue", 
"score"))

tdat

#> # A tibble: 40 x 3
#>    term                                                tissue   score
#>    <chr>                                               <chr>    <dbl>
#>  1 Hepatic Fibrosis / Hepatic Stellate Cell Activation tissue-A  2.85
#>  2 Cellular Effects of Sildenafil (Viagra)             tissue-A  2.81
#>  3 Epithelial Adherens Junction Signaling              tissue-A  2.53
#>  4 STAT3 Pathway                                       tissue-A  2.28
#>  5 Nitric Oxide Signaling in the Cardiovascular System tissue-A  2.19
#>  6 LXR/RXR Activation                                  tissue-A  2.18
#>  7 NF-κB Signaling                                     tissue-A  2.13
#>  8 PTEN Signaling                                      tissue-A  2.01
#>  9 Gap Junction Signaling                              tissue-A  1.97
#> 10 G-Protein Coupled Receptor Signaling                tissue-A  1.94
#> # ... with 30 more rows

What I want to do is to make a barplot like a plot grouped by tissue and ordered descendingly according to the score in each group.

I tried this:

term_order <- tdat$term[order(tdat$tissue, tdat$score)]
tdat$term <- factor(tdat$term, levels = unique(term_order))
tdat$tissue <- factor(tdat$tissue, levels = c("tissue-C", "tissue-A", "tissue-D", "tissue-B"), ordered = TRUE)

tp <- ggplot(tdat, aes(x = score, y = term)) + 
  geom_segment(aes(yend = term), xend = 0, colour = "grey50") + 
  geom_point(size = 3, aes(colour = tissue)) + 
  theme_bw() +
  scale_colour_brewer(palette = "Dark2") +
  theme(panel.grid.major.y = element_blank()) + 
  facet_grid(tissue ~ ., scales = "free_y", space = 'free_y')    

tp

But what I get is this plot:

enter image description here

Notice that in tissue-D the term is not sorted accordingly. What's the way to go about it?

1 Answer 1

35

We can use

(1) reorder_within() function to reorder term within tissue facets.

library(tidyverse)
library(forcats)

tdat <- tdat %>% 
  mutate(term = factor(term),
         tissue = factor(tissue, levels = c("tissue-C", "tissue-A", "tissue-D", "tissue-B"), 
                         ordered = TRUE))

reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) {
  new_x <- paste(x, within, sep = sep)
  stats::reorder(new_x, by, FUN = fun)
}

scale_x_reordered <- function(..., sep = "___") {
  reg <- paste0(sep, ".+$")
  ggplot2::scale_x_discrete(labels = function(x) gsub(reg, "", x), ...)
}

ggplot(tdat, aes(reorder_within(term, score, tissue), score)) +
  geom_segment(aes(xend = reorder_within(term, score, tissue), yend = 0), 
               colour = "grey50") +
  geom_point(size = 3, aes(colour = tissue)) + 
  scale_x_reordered() +
  facet_grid(tissue ~ ., scales = "free", space = "free") +
  coord_flip() +
  scale_colour_brewer(palette = "Dark2") +
  theme_bw() +
  theme(panel.grid.major.y = element_blank()) + 
  theme(legend.position = "bottom")

Or (2) similar idea

### https://trinkerrstuff.wordpress.com/2016/12/23/ordering-categories-within-ggplot2-facets/
tdat %>% 
  mutate(term = reorder(term, score)) %>%
  group_by(tissue, term) %>% 
  arrange(desc(score)) %>% 
  ungroup() %>% 
  mutate(term = factor(paste(term, tissue, sep = "__"), 
                       levels = rev(paste(term, tissue, sep = "__")))) %>%
    ggplot(aes(term, score)) +
        geom_segment(aes(xend = term, yend = 0), 
                   colour = "grey50") +
        geom_point(size = 3, aes(colour = tissue)) + 
        facet_grid(tissue ~., scales = "free", space = 'free') +
        scale_x_discrete(labels = function(x) gsub("__.+$", "", x)) +
        coord_flip() +
        scale_colour_brewer(palette = "Dark2") +
        theme_bw() +
        theme(panel.grid.major.y = element_blank()) + 
        theme(legend.position = "bottom",
              axis.ticks.y = element_blank())

Or (3) orders the entire data frame, and also orders the categories (tissue) within each facet group!

### https://drsimonj.svbtle.com/ordering-categories-within-ggplot2-facets
# 
tdat2 <- tdat %>% 
  # 1. Remove grouping
  ungroup() %>% 
  # 2. Arrange by
  #   i.  facet group (tissue)
  #   ii. value (score)
  arrange(tissue, score) %>%
  # 3. Add order column of row numbers
  mutate(order = row_number())
tdat2

#> # A tibble: 40 x 4
#>    term                                                tissue  score order
#>    <fct>                                               <ord>   <dbl> <int>
#>  1 Hepatic Fibrosis / Hepatic Stellate Cell Activation tissue~  1.31     1
#>  2 Sumoylation Pathway                                 tissue~  1.34     2
#>  3 Factors Promoting Cardiogenesis in Vertebrates      tissue~  1.4      3
#>  4 Role of Oct4 in Mammalian Embryonic Stem Cell Plur~ tissue~  1.56     4
#>  5 Aryl Hydrocarbon Receptor Signaling                 tissue~  1.86     5
#>  6 Hereditary Breast Cancer Signaling                  tissue~  2.23     6
#>  7 ATM Signaling                                       tissue~  2.55     7
#>  8 GADD45 Signaling                                    tissue~  2.6      8
#>  9 Granzyme B Signaling                                tissue~  2.91     9
#> 10 Role of BRCA1 in DNA Damage Response                tissue~  5.61    10
#> # ... with 30 more rows

ggplot(tdat2, aes(order, score)) +
  geom_segment(aes(xend = order, yend = 0), 
               colour = "grey50") +
  geom_point(size = 3, aes(colour = tissue)) +
  facet_grid(tissue ~ ., scales = "free", space = "free") +
  coord_flip() +
  scale_colour_brewer(palette = "Dark2") +
  theme_bw() +
  theme(panel.grid.major.y = element_blank()) + 
  theme(legend.position = "bottom")

# To finish we need to replace the numeric values on each x-axis 
# with the appropriate labels
ggplot(tdat2, aes(order, score)) +
  geom_segment(aes(xend = order, yend = 0), 
               colour = "grey50") +
  geom_point(size = 3, aes(colour = tissue)) + 
  scale_x_continuous(
    breaks = tdat2$order,
    labels = tdat2$term) +
  # scale_y_continuous(expand = c(0, 0)) +
  facet_grid(tissue ~ ., scales = "free", space = "free") +
  coord_flip() +
  scale_colour_brewer(palette = "Dark2") +
  theme_bw() +
  theme(panel.grid.major.y = element_blank()) + 
  theme(legend.position = "bottom",
        axis.ticks.y = element_blank())

Sign up to request clarification or add additional context in comments.

2 Comments

This answer is awesome! I used solution 2, still don't fully understand it, but it worked!
You just saved me so much time. I like solution 3. This is really elegant. Thank you.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.