Loading ...
import altair as alt
import pandas as pd
from io import BytesIO
from pyodide.http import open_url
from datetime import datetime
from js import fetch
from js import console
from js import document
info_element = document.getElementById('info')
info_element.innerText = 'Loading Data ...'
start_time = datetime.now()
# read slimmed down 2022 crimes CSV data in gzip format
data_url = 'https://raw.githubusercontent.com/RandomFractals/chicago-crimes/main/data'
crimes_data_url = f'{data_url}/crimes-2022-slim.csv.gz'
data_response = await fetch(crimes_data_url)
crimes_data = await data_response.arrayBuffer()
crimes = pd.read_csv(BytesIO(crimes_data.to_py()),
sep=',',
compression='gzip',
parse_dates=['Date'],
cache_dates=True,
low_memory=False)
# calculate, log, and display data load time
end_time = datetime.now()
load_time = (end_time - start_time).total_seconds()
status = f'Data load time: {load_time} seconds'
console.log(status)
info_element.innerText = status
# print(crimes.head(100).to_html())
# get crime counts by primary type
crimes_by_type = crimes.groupby('Primary Type').size().to_frame(name='Total')
crimes_by_type = crimes_by_type.reset_index().sort_values(by=['Total'], ascending=False)
# print(crimes_by_type.to_html())
# create crimes by type bar chart
crimes_by_type_chart = alt.Chart(crimes_by_type[0:20]).mark_bar().encode(
x={'field': 'Total', 'type': 'quantitative', 'title': 'Reports'},
y={'field': 'Primary Type', 'type': 'nominal', 'title': 'Crime', 'sort': '-x'},
tooltip=[
{'field': 'Primary Type', 'type': 'nominal', 'title': 'Crime Type'},
{'field': 'Total', 'type': 'quantitative', 'title': 'Reports'}
],
color=alt.value('crimson')
).properties(title='Chicago Crime Reports by Type - 2022')
# set Date index for time series plots
crimes.index = crimes['Date']
# get crimes by type
crimes_by_type = crimes[['Primary Type']]
# sum daily crime reports
daily_reports = crimes_by_type.resample('D').count()
daily_reports.columns = ['Reports'] # rename Primary Type column
daily_reports = daily_reports.reset_index()
# plot daily crime reports
daily_reports_chart = alt.Chart(daily_reports).mark_line().encode(
x='Date:T',
y='Reports:Q',
tooltip=['Reports', 'Date'],
color=alt.value('crimson')
).properties(title='Dialy Chicago Crime Reports - 2022')
# get arrests
arrests = crimes[crimes['Arrest'] == True]['Arrest']
# sum arrests per day
daily_arrests = arrests.resample('D').sum().to_frame(name='Arrests')
daily_arrests = daily_arrests.reset_index()
# plot daily arrests
daily_arrests_chart = alt.Chart(daily_arrests).mark_line().encode(
x='Date:T',
y='Arrests:Q',
tooltip=['Arrests', 'Date'],
color=alt.value('crimson')
).properties(title='Dialy Chicago Arrests - 2022')
# get domestic crime reports
domestic_reports = crimes[crimes['Domestic'] == True]['Domestic']
# sum domestic crime reports per day
daily_domestic_reports = domestic_reports.resample('D').sum().to_frame(name='Reports')
daily_domestic_reports = daily_domestic_reports.reset_index()
# plot daily domestic crime reports
daily_domestic_reports_chart = alt.Chart(daily_domestic_reports).mark_line().encode(
x='Date:T',
y='Reports:Q',
tooltip=['Reports', 'Date'],
color=alt.value('crimson')
).properties(title='Daily Domestic Chicago Crime Reports - 2022')
# get crime location counts
crime_locations = crimes.groupby('Location Description').size()
crime_locations = crime_locations.sort_values(
ascending=False).rename('Reports').reset_index()
# plot top 20 crime locations
crime_locations_chart = alt.Chart(crime_locations[0:20]).mark_bar().encode(
x={'field': 'Reports', 'type': 'quantitative'},
y={'field': 'Location Description', 'type': 'nominal', 'sort': '-x'},
tooltip=['Location Description', 'Reports'],
color=alt.value('crimson')
).properties(title='Top 20 Chicago Crime Locations - 2022')
# load Chicago community areas for plotting crime
# by Chicago sides and community areas
community_data_url = f'{data_url}/chicago-community-areas.csv'
community_areas = pd.read_csv(open_url(community_data_url))
# get community crime stats
community_areas['Reports'] = crimes.groupby(
'Community Area').size().rename('Reports')
community_crime = community_areas.sort_values(
by='Reports', ascending=False).dropna()
# drop unused columns and reindex
community_crime = community_crime.drop(
['CommunityArea', 'Side'], axis=1) # denotes column
# plot 20 high crime communities
high_crime_communities_chart = alt.Chart(community_crime[0:20]).mark_bar().encode(
x={'field': 'Reports', 'type': 'quantitative'},
y={'field': 'CommunityName', 'type': 'nominal', 'title': 'Community', 'sort': '-x'},
tooltip=['CommunityName', 'Reports'],
color=alt.value('crimson')
).properties(title='Top 20 High Crime Chicago Communities - 2022')
# plot 20 low crime communities
low_crime_communities_chart = alt.Chart(community_crime.tail(20)).mark_bar().encode(
x={'field': 'Reports', 'type': 'quantitative'},
y={'field': 'CommunityName', 'type': 'nominal', 'title': 'Community', 'sort': 'x'},
tooltip=['CommunityName', 'Reports']
).properties(title='Bottom 20 Low Crime Chicago Communities - 2022')
# get narcotics crimes
narcotics = crimes[crimes['Primary Type'] == 'NARCOTICS']
# get narcotics crime description counts
narcotics_crimes = narcotics[['Primary Type', 'Description']]\
.groupby('Description').count()\
.sort_values(by='Primary Type', ascending=False)
narcotics_crimes.columns = ['Reports'] # rename Primary Type column
narcotics_crimes = narcotics_crimes.reset_index()
# plot top 20 narcotics crime reports by description
narcotic_crimes_chart = alt.Chart(narcotics_crimes[0:20]).mark_bar().encode(
x={'field': 'Reports', 'type': 'quantitative'},
y={'field': 'Description', 'type': 'nominal', 'sort': '-x'},
tooltip=['Description', 'Reports'],
color=alt.value('crimson')
).properties(title='Top 20 Chicago Narcotics Crime Reports - 2022')
# get other offenses crimes
other_offenses = crimes[crimes['Primary Type'] == 'OTHER OFFENSE']
# get other offense crime description counts
other_offense_crimes = other_offenses[['Primary Type', 'Description']]\
.groupby('Description').count()\
.sort_values(by='Primary Type', ascending=False)
other_offense_crimes.columns = ['Reports'] # rename Primary Type column
other_offense_crimes = other_offense_crimes.reset_index()
# plot top 20 other offense crime reports by description
other_offenses_chart = alt.Chart(other_offense_crimes[0:20]).mark_bar().encode(
x={'field': 'Reports', 'type': 'quantitative'},
y={'field': 'Description', 'type': 'nominal', 'sort': '-x'},
tooltip=['Description', 'Reports'],
color=alt.value('crimson')
).properties(title='Top 20 Other Offense Chicago Crime Reports - 2022')
# get homicides
homicides = crimes[crimes['Primary Type'] == 'HOMICIDE']['Primary Type']
# create daily homicide counts
daily_homicides = homicides.resample('D').count().to_frame(name='Reports')
daily_homicides = daily_homicides.reset_index()
daily_homicides_chart = alt.Chart(daily_homicides).mark_line().encode(
x='Date:T',
y='Reports:Q',
tooltip=['Date', 'Reports'],
color=alt.value('crimson')
).properties(title='Daily Chicago Homicide Reports - 2022')
# combine and show Altair charts
alt.vconcat(
crimes_by_type_chart,
daily_reports_chart,
daily_arrests_chart,
daily_domestic_reports_chart,
crime_locations_chart,
high_crime_communities_chart,
low_crime_communities_chart,
narcotic_crimes_chart,
other_offenses_chart,
daily_homicides_chart
)