Patchwork & ggplot2 to display PPD firearm data as a horizontal bar plot

Author

Evelyn Gorey

Load libraries

library(tidyverse)
library(lubridate)
library(ggplot2)
library(patchwork)
library(httr)
library(jsonlite)
library(here)

Get PPD shooting victims data from ODP

Load the Philadelphia Police Department (PPD) Shooting Victims Dataset from OpenDataPhilly and apply common changes so that the data match what’s presented publicly by PPD and the Comptroller.

## -- Get PPD shooting victims data from OpenDataPhilly

svd <- httr::GET("https://phl.carto.com/api/v2/sql?q=SELECT+*+FROM+shootings") %>%
  
  # take only the HTTP response
  content(as = 'text') %>%
  
  # flatten the retrieved JSON file
  fromJSON(flatten = TRUE) %>%
  
  # take only the resulting rows 
  .$rows %>%
  
  # remove unneeded rows
  select(year:fatal, -point_x, -point_y) %>%
  
  # get correct date and year
  mutate(
    date_ = janitor::convert_to_datetime(date_), 
    year_correct = year(date_),
    
    # if officer-involved, convert NA values in 'fatal' column to nonfatal to match what PPD & Comptroller display
    fatal = case_when(
      officer_involved == 'Y' ~ 0,
      .default = as.numeric(fatal)),
    
    # recategorize race/ethnicity
    race_ethnicity = case_when(
        latino == 1 ~ "Hispanic",
        (race == "B" & latino == 0) ~ "Black*",
        (race == "W" & latino == 0) ~ "White*",
        (race == "A" & latino == 0) ~ "Asian*",
        .default = "Unknown"),
    
    # recategorize sex
    sex_label = case_when(
      sex == "M" ~ "Male",
      sex == "F" ~ "Female",
      .default = "Unknown")
    )

Summarize into separate demographics dataframes

I found it easiest to create 3 separate summarized dataframes and then call them all into separate plots before combining them with patchwork.

# create a sex summary table
svd_gender <- svd %>%
  count(sex_label) %>%
  mutate(prop = prop.table(n) * 100)

# create a race/ethnicity summary table (excluding unknown race/ethnicity)
svd_raceeth <- svd %>%
  filter(race_ethnicity != "Unknown") %>%
  count(race_ethnicity) %>%
  mutate(prop = prop.table(n) * 100)

# create an age summary table using groups from OD Stat report
svd_age <- svd %>%
  mutate(agecat = cut(
   as.numeric(age), 
   breaks = c(-Inf, 18, 30, 40, 50, 60, Inf),
   labels = c("< 18",
              "18-29",
              "30-39",
              "40-49",
              "50-59",
              "60+"))) %>%
  filter(!is.na(agecat)) %>%
  count(agecat) %>%
  mutate(prop = prop.table(n) * 100)

Define theme for the plots

This theme code block will define grid styling, legend positioning, font elements such as family, size, color, and more.

theme_ghp_simple2_horiz <- function() {
  theme_bw(base_family = "Calibri") +
    theme(
      panel.grid.major.y = element_blank(),
      panel.grid.minor.y = element_blank(),
      panel.border = element_blank(),
      axis.title =  element_blank(),
      axis.ticks = element_blank(),
      legend.position = "bottom",
      legend.title = element_blank(),
      legend.key = element_rect(colour = "transparent", fill = "white"),
      plot.title = element_text(face = "bold", size = 18),
      plot.caption = element_text(colour = "#7a8489"),
      plot.subtitle = element_text(size = 16),
      axis.text.x = element_text(size = 15),
      axis.text.y = element_text(size = 15),
      axis.title.x = element_text(size = 15,
                                  margin = margin(t = 0.15,
                                                  r = 0,
                                                  b = 0,
                                                  l = 0,
                                                  unit = "cm")),
      axis.title.y = element_text(size = 15,
                                  margin = margin(t = 0.25,
                                                  r = 0,
                                                  b = 0,
                                                  l = 0,
                                                  unit = "cm"))
    )
}

Plot all 3 summarized dataframes into one horizontal bar plot with ggplot2 and patchwork

Note: the outputted plot will likely look a bit different than the displayed preview. If exporting, be sure to change the ggsave() filename parameter to your own path, either using here() or str_c() using your own specified outpath.

## -- Define consistent colors for each dataframe - all bars for each are the same color

age_color <- rep(c("#f2bf3b"), length.out = nrow(svd_age)) # dependent on number of rows in each dataframe
raceeth_color <- rep(c("#219EBC"), length.out = nrow(svd_raceeth))
gender_color <- rep(c("#2E548A"), length.out = nrow(svd_gender))



## -- Create horizontal plot for age dataframe

age_plot <- ggplot(svd_age, aes(x = prop, y = factor(agecat, levels = rev(agecat)), fill = agecat)) + # enforce order of display
  geom_bar(width = 0.9, stat = "identity") + # set width of bars
  labs(title = "Firearm injuries in Philadelphia by demographic \ncharacteristics, 2015-2023",
       subtitle = "Age",
       x = NULL, y = NULL, # so that axis titles and legend don't show up
       fill = NULL) +
  scale_fill_manual(values = age_color) + # however many bars in plot will be same color
  geom_text(aes(label=paste0(round(prop, digits = 0),"%")), position=position_dodge(width=0.99), size = 4, vjust=0.35, hjust = -.1) + # add labels in % format to outside of bar
  theme_ghp_simple2_horiz() + # set theme w/ font family, sizes, etc. - I defined this variable in another script
  theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) + # lower plots will have these elements, but can be empty for this one
  coord_cartesian(xlim = c(0, 100)) # scale from 0-100%


# Race/ethnicity plot
race_plot <- ggplot(svd_raceeth, aes(x = prop, y = factor(race_ethnicity, levels = rev(race_ethnicity)), fill = race_ethnicity)) +
  geom_bar(width = 0.9, stat = "identity") +
  labs(subtitle = "Race/ethnicity", # won't set title, axis titles are again null
       x = NULL, y = NULL,
       fill = NULL) +
  scale_fill_manual(values = raceeth_color) +
  geom_text(aes(label=paste0(round(prop, digits = 0),"%")), position=position_dodge(width=0.5), size = 4, vjust=0.35, hjust = -.1) +
  theme_ghp_simple2_horiz() +
  theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
  coord_cartesian(xlim = c(0, 100))


# Sex plot
sex_plot <- ggplot(svd_gender, aes(x = prop, y = factor(sex_label, levels = rev(sex_label)), fill = sex_label)) +
  geom_bar(width = 0.9, stat='identity') + 
  labs(subtitle = "Sex",
       x = "Percent of firearm injuries", y = NULL,
       fill = NULL, 
       caption = "* Non-Hispanic/Latino/Latina/Latine \nData source: PPD Shooting Victims Dataset", size = 2) + # set footnotes + credits at bottom
  scale_fill_manual(values = gender_color) +
  geom_text(aes(label=paste0(round(prop, digits = 0),"%")), position=position_dodge(width=0.5), size = 4, vjust=0.35, hjust = -.1) +
  scale_x_continuous(labels = function(x) paste0(round(x, digits = 0), "%")) +
  theme_ghp_simple2_horiz() +
  theme(legend.position = "none") +
  coord_cartesian(xlim = c(0, 100)) +
  theme(axis.title.y = element_blank(),  # Hiding y-axis title for better alignment
        axis.text.y = element_text()) 



##  -- Combine the plots using patchwork and adjust aspect ratio - 

combined_plots <- (age_plot / race_plot / sex_plot) + plot_layout(heights = c(5.5, 4,2)) # manually adjust these numbers to make the plots match. (plot / plot) means that the plots are on top of one another (horizontal); + would mean that they're next to each other (vertical)



## -- Export combined plots

ggsave(plot = combined_plots,
       filename = here("Output","December 2023 Report", "Figures", "svddemographics.png"), # output path using specified R project & here() - CHANGE TO OWN OR USE STR_C
       width = 6.5, 
       height = 6)


## -- Display combined plots
combined_plots