Introduction

Child Neglect as a Public Health Issue

Lots of stuff here about neglected children…

The objective of this tutorial: 1. Code, data for paper in progress 2. Use supervised machine learning to predict new cases of child neglect

Data Source

Methodology

# Base URL path
library(RSocrata)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dplyr)
base_url = "https://data.lacity.org/resource/63jg-8b9z.json?"

#7749esl6s83yobtq1033yn1gq -- ID
#3bpoe7q2jkpodx0025841q59ct6asvpwgyzlpwuijqxlec95fb -- secret
my_token <- "w0BkWUPZYzjQRwNEVX8KEijw4"
fdf <- read.socrata(base_url, my_token)
#secret token 6epp7RDgzqjARarbW5EpUEJm-ZPFe1-EROQK
glimpse(fdf)
## Observations: 2,002,224
## Variables: 28
## $ dr_no          <chr> "001307355", "011401303", "070309629", "090631215…
## $ date_rptd      <dttm> 2010-02-20, 2010-09-13, 2010-08-09, 2010-01-05, …
## $ date_occ       <dttm> 2010-02-20, 2010-09-12, 2010-08-09, 2010-01-05, …
## $ time_occ       <chr> "1350", "0045", "1515", "0150", "2100", "1650", "…
## $ area           <chr> "13", "14", "13", "06", "01", "01", "01", "01", "…
## $ area_name      <chr> "Newton", "Pacific", "Newton", "Hollywood", "Cent…
## $ rpt_dist_no    <chr> "1385", "1485", "1324", "0646", "0176", "0162", "…
## $ part_1_2       <chr> "2", "2", "2", "2", "1", "1", "1", "1", "1", "1",…
## $ crm_cd         <chr> "900", "740", "946", "900", "122", "442", "330", …
## $ crm_cd_desc    <chr> "VIOLATION OF COURT ORDER", "VANDALISM - FELONY (…
## $ mocodes        <chr> "0913 1814 2000", "0329", "0344", "1100 0400 1402…
## $ vict_age       <chr> "48", "0", "0", "47", "47", "23", "46", "51", "30…
## $ vict_sex       <chr> "M", "M", "M", "F", "F", "M", "M", "M", "M", "M",…
## $ vict_descent   <chr> "H", "W", "H", "W", "H", "B", "H", "B", "H", "W",…
## $ premis_cd      <chr> "501", "101", "103", "101", "103", "404", "101", …
## $ premis_desc    <chr> "SINGLE FAMILY DWELLING", "STREET", "ALLEY", "STR…
## $ status         <chr> "AA", "IC", "IC", "IC", "IC", "AA", "IC", "AA", "…
## $ status_desc    <chr> "Adult Arrest", "Invest Cont", "Invest Cont", "In…
## $ crm_cd_1       <chr> "900", "740", "946", "900", "122", "442", "330", …
## $ location       <chr> "300 E  GAGE                         AV", "SEPULV…
## $ lat            <chr> "33.9825", "33.9599", "34.0224", "34.1016", "34.0…
## $ lon            <chr> "-118.2695", "-118.3962", "-118.2524", "-118.3295…
## $ cross_street   <chr> NA, "MANCHESTER                   AV", NA, "HOLLY…
## $ weapon_used_cd <chr> NA, NA, NA, "102", "400", NA, NA, "500", "400", N…
## $ weapon_desc    <chr> NA, NA, NA, "HAND GUN", "STRONG-ARM (HANDS, FIST,…
## $ crm_cd_2       <chr> NA, NA, NA, "998", NA, NA, NA, NA, NA, "998", NA,…
## $ crm_cd_3       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ crm_cd_4       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## [1] "list"

The following code transforms the age variable from character to numeric and filters the victim’s age to be between 6 and 12. Then, it searches the crime descriptions for the word “child.” Finally, we plot a histogram of victim’s age.

childcrimes <- fdf %>%
  mutate(vict_age = as.numeric(vict_age)) %>%
  filter(vict_age >=6 & vict_age < 13) %>%
  filter(str_detect(crm_cd_desc, 'Child|CHILD|child'))

ggplot(childcrimes, mapping = aes(vict_age)) + geom_bar()

What Neighborhoods Have the Most Child Crimes?

First, we count the number of total crimes

## # A tibble: 8 x 2
##   crm_cd_desc                                     n
##   <chr>                                       <int>
## 1 CHILD ABANDONMENT                              26
## 2 CHILD ABUSE (PHYSICAL) - AGGRAVATED ASSAULT   518
## 3 CHILD ABUSE (PHYSICAL) - SIMPLE ASSAULT      3814
## 4 CHILD ANNOYING (17YRS & UNDER)               1740
## 5 CHILD NEGLECT (SEE 300 W.I.C.)               1674
## 6 CHILD PORNOGRAPHY                              14
## 7 CHILD STEALING                                 32
## 8 LEWD/LASCIVIOUS ACTS WITH CHILD               108

Then, we group the crimes by neighborhood and count the number of crimes by neighborhood.

crimebyarea <-  childcrimes  %>% 
  dplyr::group_by(crm_cd_desc, area_name) %>% 
  dplyr::summarise(count= n())

crimebyarea <- na.omit(crimebyarea)

wide_crimebyarea <- crimebyarea %>% spread(key = area_name, value = count)
wide_crimebyarea[is.na(wide_crimebyarea)] <- 0
wide_crimebyarea$total_col = apply(wide_crimebyarea[,-1], 1, sum)
wide_crimebyarea$percent_Mission <- wide_crimebyarea$Mission / wide_crimebyarea$total_col

Now we can plot the neighborhood with the most crimes by type of crime.

ggplot(wide_crimebyarea, aes(x= reorder(crm_cd_desc, percent_Mission), 
  y = percent_Mission, fill = percent_Mission, group = 1)) +
  geom_bar(stat = "identity") +
  ylab("Percentage of All Crimes") +
  xlab("Crime Subtypes") + scale_x_discrete(labels = c("CHILD ABANDONMENT" = "CA", 
                                        "CHILD ABUSE (PHYSICAL) - AGGRAVATED ASSAULT" = "PAAGG", 
                                        "CHILD ABUSE (PHYSICAL) - SIMPLE ASSAULT" = "PASA", 
                                        "CHILD ANNOYING (17YRS & UNDER)" = "ANNOY", 
                                        "CHILD NEGLECT (SEE 300 W.I.C.)" = "NEG",
                                        "CHILD PORNOGRAPHY" = "PORN", 
                                        "CHILD STEALING" = "STEAL", 
                                        "LEWD/LASCIVIOUS ACTS WITH CHILD" = "CSA")) +
  labs(title = "Crimes Against Children Aged 6-12 in Mission Hills",
       subtitle = "2010 - 2018, City of Los Angeles")+
  theme(axis.text.x = element_text(size=12))+ 
  theme(axis.text.y = element_text(size = 12))+
  theme(axis.title.x = element_text(size=12,color = "#424242"))+ 
  theme(axis.title.y = element_text(size = 12,color = "#424242"))+
  theme(plot.title = element_text(lineheight=3, color="black", face = "bold", size=16, hjust =0.5),
        plot.subtitle=element_text(size=14, hjust = 0.5))

The abbreviations for these crimes are as follows…. We see that about 12% of children who were abandoned were in the neighborhood of Mission Hills. Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Avatar
Gia Elise Barboza-Salerno
Assistant Professor in the School of Criminal Justice and Public Administration

My research interests include applied spatial policy and analysis, child welfare and criminal justice system reform, victimization by bullying, domestic abuse.