library(tidyverse)
library(lubridate)
library(ggplot2)
library(patchwork)
library(httr)
library(jsonlite)
library(here)
Patchwork & ggplot2 to display PPD firearm data as a horizontal bar plot
Load libraries
Get PPD shooting victims data from ODP
Load the Philadelphia Police Department (PPD) Shooting Victims Dataset from OpenDataPhilly and apply common changes so that the data match what’s presented publicly by PPD and the Comptroller.
## -- Get PPD shooting victims data from OpenDataPhilly
<- httr::GET("https://phl.carto.com/api/v2/sql?q=SELECT+*+FROM+shootings") %>%
svd
# take only the HTTP response
content(as = 'text') %>%
# flatten the retrieved JSON file
fromJSON(flatten = TRUE) %>%
# take only the resulting rows
$rows %>%
.
# remove unneeded rows
select(year:fatal, -point_x, -point_y) %>%
# get correct date and year
mutate(
date_ = janitor::convert_to_datetime(date_),
year_correct = year(date_),
# if officer-involved, convert NA values in 'fatal' column to nonfatal to match what PPD & Comptroller display
fatal = case_when(
== 'Y' ~ 0,
officer_involved .default = as.numeric(fatal)),
# recategorize race/ethnicity
race_ethnicity = case_when(
== 1 ~ "Hispanic",
latino == "B" & latino == 0) ~ "Black*",
(race == "W" & latino == 0) ~ "White*",
(race == "A" & latino == 0) ~ "Asian*",
(race .default = "Unknown"),
# recategorize sex
sex_label = case_when(
== "M" ~ "Male",
sex == "F" ~ "Female",
sex .default = "Unknown")
)
Summarize into separate demographics dataframes
I found it easiest to create 3 separate summarized dataframes and then call them all into separate plots before combining them with patchwork.
# create a sex summary table
<- svd %>%
svd_gender count(sex_label) %>%
mutate(prop = prop.table(n) * 100)
# create a race/ethnicity summary table (excluding unknown race/ethnicity)
<- svd %>%
svd_raceeth filter(race_ethnicity != "Unknown") %>%
count(race_ethnicity) %>%
mutate(prop = prop.table(n) * 100)
# create an age summary table using groups from OD Stat report
<- svd %>%
svd_age mutate(agecat = cut(
as.numeric(age),
breaks = c(-Inf, 18, 30, 40, 50, 60, Inf),
labels = c("< 18",
"18-29",
"30-39",
"40-49",
"50-59",
"60+"))) %>%
filter(!is.na(agecat)) %>%
count(agecat) %>%
mutate(prop = prop.table(n) * 100)
Define theme for the plots
This theme code block will define grid styling, legend positioning, font elements such as family, size, color, and more.
<- function() {
theme_ghp_simple2_horiz theme_bw(base_family = "Calibri") +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.border = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
legend.key = element_rect(colour = "transparent", fill = "white"),
plot.title = element_text(face = "bold", size = 18),
plot.caption = element_text(colour = "#7a8489"),
plot.subtitle = element_text(size = 16),
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title.x = element_text(size = 15,
margin = margin(t = 0.15,
r = 0,
b = 0,
l = 0,
unit = "cm")),
axis.title.y = element_text(size = 15,
margin = margin(t = 0.25,
r = 0,
b = 0,
l = 0,
unit = "cm"))
) }
Plot all 3 summarized dataframes into one horizontal bar plot with ggplot2 and patchwork
Note: the outputted plot will likely look a bit different than the displayed preview. If exporting, be sure to change the ggsave() filename parameter to your own path, either using here() or str_c() using your own specified outpath.
## -- Define consistent colors for each dataframe - all bars for each are the same color
<- rep(c("#f2bf3b"), length.out = nrow(svd_age)) # dependent on number of rows in each dataframe
age_color <- rep(c("#219EBC"), length.out = nrow(svd_raceeth))
raceeth_color <- rep(c("#2E548A"), length.out = nrow(svd_gender))
gender_color
## -- Create horizontal plot for age dataframe
<- ggplot(svd_age, aes(x = prop, y = factor(agecat, levels = rev(agecat)), fill = agecat)) + # enforce order of display
age_plot geom_bar(width = 0.9, stat = "identity") + # set width of bars
labs(title = "Firearm injuries in Philadelphia by demographic \ncharacteristics, 2015-2023",
subtitle = "Age",
x = NULL, y = NULL, # so that axis titles and legend don't show up
fill = NULL) +
scale_fill_manual(values = age_color) + # however many bars in plot will be same color
geom_text(aes(label=paste0(round(prop, digits = 0),"%")), position=position_dodge(width=0.99), size = 4, vjust=0.35, hjust = -.1) + # add labels in % format to outside of bar
theme_ghp_simple2_horiz() + # set theme w/ font family, sizes, etc. - I defined this variable in another script
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) + # lower plots will have these elements, but can be empty for this one
coord_cartesian(xlim = c(0, 100)) # scale from 0-100%
# Race/ethnicity plot
<- ggplot(svd_raceeth, aes(x = prop, y = factor(race_ethnicity, levels = rev(race_ethnicity)), fill = race_ethnicity)) +
race_plot geom_bar(width = 0.9, stat = "identity") +
labs(subtitle = "Race/ethnicity", # won't set title, axis titles are again null
x = NULL, y = NULL,
fill = NULL) +
scale_fill_manual(values = raceeth_color) +
geom_text(aes(label=paste0(round(prop, digits = 0),"%")), position=position_dodge(width=0.5), size = 4, vjust=0.35, hjust = -.1) +
theme_ghp_simple2_horiz() +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
coord_cartesian(xlim = c(0, 100))
# Sex plot
<- ggplot(svd_gender, aes(x = prop, y = factor(sex_label, levels = rev(sex_label)), fill = sex_label)) +
sex_plot geom_bar(width = 0.9, stat='identity') +
labs(subtitle = "Sex",
x = "Percent of firearm injuries", y = NULL,
fill = NULL,
caption = "* Non-Hispanic/Latino/Latina/Latine \nData source: PPD Shooting Victims Dataset", size = 2) + # set footnotes + credits at bottom
scale_fill_manual(values = gender_color) +
geom_text(aes(label=paste0(round(prop, digits = 0),"%")), position=position_dodge(width=0.5), size = 4, vjust=0.35, hjust = -.1) +
scale_x_continuous(labels = function(x) paste0(round(x, digits = 0), "%")) +
theme_ghp_simple2_horiz() +
theme(legend.position = "none") +
coord_cartesian(xlim = c(0, 100)) +
theme(axis.title.y = element_blank(), # Hiding y-axis title for better alignment
axis.text.y = element_text())
## -- Combine the plots using patchwork and adjust aspect ratio -
<- (age_plot / race_plot / sex_plot) + plot_layout(heights = c(5.5, 4,2)) # manually adjust these numbers to make the plots match. (plot / plot) means that the plots are on top of one another (horizontal); + would mean that they're next to each other (vertical)
combined_plots
## -- Export combined plots
ggsave(plot = combined_plots,
filename = here("Output","December 2023 Report", "Figures", "svddemographics.png"), # output path using specified R project & here() - CHANGE TO OWN OR USE STR_C
width = 6.5,
height = 6)
## -- Display combined plots
combined_plots