Community shooting review

Author

Evelyn Gorey

Purpose

The purpose of this code is to summarize publicly available criminal shootings recorded by the Philadelphia Police Department (PPD) that occurred citywide in the last year. This data summary is to inform a community shooting review led by the Philadelphia Department of Public Health (PDPH).

1. Load packages

# load packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, janitor, httr, jsonlite, ggplot2, sf, rosm, ggspatial, prettymapr, viridis)

2. Define geojson retrieval function

get_geojson <- (function(api_url) {
  
  # parse provided URL
  url <- httr::parse_url(api_url)
  
  # set query - we want all fields, geometry field, and geojson format
  url$query <- list(where = "1=1",
                    outFields = "*",
                    returnGeometry = TRUE,
                    f = "geojson")
  
  # build geojson using sf from url
  geojson_data <- sf::st_read(httr::build_url(url))
  
  return(geojson_data)
  
})

3. Load geospatial data

3.1 Philadelphia city limits

phl <- get_geojson(
  "https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/City_Limits/FeatureServer/0/query/"
) |>
  
  # reproject 
  st_transform(crs = 3857) # 2272 for StatePlane

3.2 Public PPD shooting victims dataset

svg <- get_geojson(
  "https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/SHOOTINGS/FeatureServer/0/query"
) |>
  
  # clean names
  janitor::clean_names() |>
  
  # apply mutations
  mutate(
    
    # get correct date and year
    date = lubridate::ymd(date), 
    year = year(date),
    
    # read DC key as character
    dc_key = as.character(dc_key),
    
    # combine race and ethnicity variables
    race_ethnicity = case_when(
      latino == 1 ~ "Hispanic",
      (race == "B" & latino == 0) ~ "Non-Hispanic Black",
      (race == "W" & latino == 0) ~ "Non-Hispanic White",
      (race == "A" & latino == 0) ~ "Non-Hispanic Asian",
      .default = "Unknown"),
    
    # label the gender/sex variable
    sex_label = case_when(
      sex == "M" ~ "Male",
      sex == "F" ~ "Female",
      .default = "Unknown"),
    
    # re-categorize age
    age_cat = cut(
      as.numeric(age), 
      breaks = c(-Inf, 18, 30, 40, 50, 60, Inf),
      labels = c("< 18",
                 "18-29",
                 "30-39",
                 "40-49",
                 "50-59",
                 "60+")),
    
    # define time periods into weeks and months
    month = month(date),
    week = isoweek(date),
    day_of_month = day(date),
    month_label = case_when(
      month == 1 ~ "Jan",
      month == 2 ~ "Feb",
      month == 3 ~ "Mar",
      month == 4 ~ "Apr",
      month == 5 ~ "May",
      month == 6 ~ "Jun",
      month == 7 ~ "Jul",
      month == 8 ~ "Aug",
      month == 9 ~ "Sep",
      month == 10 ~ "Oct",
      month == 11 ~ "Nov",
      month == 12 ~ "Dec"
    ),
    
    # label the fatal variable
    fatality = case_when(
      fatal == 0 ~ "Nonfatal",
      fatal == 1 ~ "Fatal",
      .default = "Unknown"
    )
  ) |>
  
  # reproject 
  st_transform(crs = 3857) |> # 2272 for StatePlane
  
  # extract lat and lon
  mutate(
    x = st_coordinates(geometry)[, 1],
    y = st_coordinates(geometry)[, 2]
  )

4. Define shooting review cohort

svg_filtered <- svg |>
  
  # filter to only include shootings that occurred in last year
  filter(date >= "2024-07-01" & date <= "2025-06-30") |>
  
  # add combined month+year label column
  unite(month_yr_label, month_label, year, sep = " ", remove = FALSE) |>
  
  mutate(
    # relevel year label column
    year = factor(year, levels = c("2024", "2025")),
    # relevel month yr label column             
    month_yr_label = factor(month_yr_label, 
                            levels = c("Jul 2024", "Aug 2024", "Sep 2024",
                                       "Oct 2024", "Nov 2024", "Dec 2024",
                                       "Jan 2025", "Feb 2025", "Mar 2025",
                                       "Apr 2025", "May 2025", "Jun 2025"))
  )

5. Plot heat maps by injury month

heatmaps <- ggplot() +
  # basemap
  annotation_map_tile(type = "cartolight", zoomin = 1, progress = "none") +
  # mask area outside PHL boundary
  geom_sf(data = st_difference(
    st_as_sfc(st_bbox(svg_filtered)),  # full extent
    st_union(phl)             # subtract phl boundary
  ), fill = "white", color = NA) +     # mask = white
  # heat styling: transparency
  stat_density_2d(
    data = svg_filtered,
    aes(x = x, y = y, fill = after_stat(level), alpha = after_stat(level)),
    geom = "polygon",
    color = NA,
    adjust = 0.45
  ) +
  # heat styling: color scheme and legend
  scale_fill_viridis_c(
    option = "inferno",
    name = NULL,
    labels = c("", "Sparse", "", "", "", "Dense"),
    guide = guide_colorbar(
      barwidth = unit(8, "cm"),    # wider
      barheight = unit(0.6, "cm"), # taller (if horizontal)
      title.position = "top",
      label.position = "bottom"
    )
  ) +
  scale_alpha(range = c(0.1, 0.9), guide = "none") +
  # map for each month
  facet_wrap(~ month_yr_label, ncol = 4) +
  coord_sf(crs = 3857, expand = FALSE) +
  # theme details
  theme_void() +
  labs(title = "Philadelphia firearm injury locations by month",
       subtitle = "July 2024 – June 2025 (n=1,049)",
       caption = "Data source: PPD Shooting Victims Dataset", size = 20) + 
  theme_bw(base_family = "Calibri") +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 30),  
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    plot.title = element_text(face = "bold", size = 50, margin = margin(b = 20)),
    plot.caption = element_text(colour = "#7a8489", size = 28, margin = margin(b = 30)),
    plot.subtitle = element_text(size = 40, margin = margin(b = 20)),
    strip.text = element_text(size = 35),
    strip.background = element_rect(colour=NA, fill="#dedcd3"),
    panel.border = element_rect(fill = NA, color = "white")
  )