packages = [ "altair" "pandas" ]
Loading ...
import altair as alt import pandas as pd from io import BytesIO from pyodide.http import open_url from datetime import datetime from js import fetch from js import console from js import document info_element = document.getElementById('info') info_element.innerText = 'Loading Data ...' start_time = datetime.now() # read slimmed down 2022 crimes CSV data in gzip format data_url = 'https://raw.githubusercontent.com/RandomFractals/chicago-crimes/main/data' crimes_data_url = f'{data_url}/crimes-2022-slim.csv.gz' data_response = await fetch(crimes_data_url) crimes_data = await data_response.arrayBuffer() crimes = pd.read_csv(BytesIO(crimes_data.to_py()), sep=',', compression='gzip', parse_dates=['Date'], cache_dates=True, low_memory=False) # calculate, log, and display data load time end_time = datetime.now() load_time = (end_time - start_time).total_seconds() status = f'Data load time: {load_time} seconds' console.log(status) info_element.innerText = status # print(crimes.head(100).to_html()) # get crime counts by primary type crimes_by_type = crimes.groupby('Primary Type').size().to_frame(name='Total') crimes_by_type = crimes_by_type.reset_index().sort_values(by=['Total'], ascending=False) # print(crimes_by_type.to_html()) # create crimes by type bar chart crimes_by_type_chart = alt.Chart(crimes_by_type[0:20]).mark_bar().encode( x={'field': 'Total', 'type': 'quantitative', 'title': 'Reports'}, y={'field': 'Primary Type', 'type': 'nominal', 'title': 'Crime', 'sort': '-x'}, tooltip=[ {'field': 'Primary Type', 'type': 'nominal', 'title': 'Crime Type'}, {'field': 'Total', 'type': 'quantitative', 'title': 'Reports'} ], color=alt.value('crimson') ).properties(title='Chicago Crime Reports by Type - 2022') # set Date index for time series plots crimes.index = crimes['Date'] # get crimes by type crimes_by_type = crimes[['Primary Type']] # sum daily crime reports daily_reports = crimes_by_type.resample('D').count() daily_reports.columns = ['Reports'] # rename Primary Type column daily_reports = daily_reports.reset_index() # plot daily crime reports daily_reports_chart = alt.Chart(daily_reports).mark_line().encode( x='Date:T', y='Reports:Q', tooltip=['Reports', 'Date'], color=alt.value('crimson') ).properties(title='Dialy Chicago Crime Reports - 2022') # get arrests arrests = crimes[crimes['Arrest'] == True]['Arrest'] # sum arrests per day daily_arrests = arrests.resample('D').sum().to_frame(name='Arrests') daily_arrests = daily_arrests.reset_index() # plot daily arrests daily_arrests_chart = alt.Chart(daily_arrests).mark_line().encode( x='Date:T', y='Arrests:Q', tooltip=['Arrests', 'Date'], color=alt.value('crimson') ).properties(title='Dialy Chicago Arrests - 2022') # get domestic crime reports domestic_reports = crimes[crimes['Domestic'] == True]['Domestic'] # sum domestic crime reports per day daily_domestic_reports = domestic_reports.resample('D').sum().to_frame(name='Reports') daily_domestic_reports = daily_domestic_reports.reset_index() # plot daily domestic crime reports daily_domestic_reports_chart = alt.Chart(daily_domestic_reports).mark_line().encode( x='Date:T', y='Reports:Q', tooltip=['Reports', 'Date'], color=alt.value('crimson') ).properties(title='Daily Domestic Chicago Crime Reports - 2022') # get crime location counts crime_locations = crimes.groupby('Location Description').size() crime_locations = crime_locations.sort_values( ascending=False).rename('Reports').reset_index() # plot top 20 crime locations crime_locations_chart = alt.Chart(crime_locations[0:20]).mark_bar().encode( x={'field': 'Reports', 'type': 'quantitative'}, y={'field': 'Location Description', 'type': 'nominal', 'sort': '-x'}, tooltip=['Location Description', 'Reports'], color=alt.value('crimson') ).properties(title='Top 20 Chicago Crime Locations - 2022') # load Chicago community areas for plotting crime # by Chicago sides and community areas community_data_url = f'{data_url}/chicago-community-areas.csv' community_areas = pd.read_csv(open_url(community_data_url)) # get community crime stats community_areas['Reports'] = crimes.groupby( 'Community Area').size().rename('Reports') community_crime = community_areas.sort_values( by='Reports', ascending=False).dropna() # drop unused columns and reindex community_crime = community_crime.drop( ['CommunityArea', 'Side'], axis=1) # denotes column # plot 20 high crime communities high_crime_communities_chart = alt.Chart(community_crime[0:20]).mark_bar().encode( x={'field': 'Reports', 'type': 'quantitative'}, y={'field': 'CommunityName', 'type': 'nominal', 'title': 'Community', 'sort': '-x'}, tooltip=['CommunityName', 'Reports'], color=alt.value('crimson') ).properties(title='Top 20 High Crime Chicago Communities - 2022') # plot 20 low crime communities low_crime_communities_chart = alt.Chart(community_crime.tail(20)).mark_bar().encode( x={'field': 'Reports', 'type': 'quantitative'}, y={'field': 'CommunityName', 'type': 'nominal', 'title': 'Community', 'sort': 'x'}, tooltip=['CommunityName', 'Reports'] ).properties(title='Bottom 20 Low Crime Chicago Communities - 2022') # get narcotics crimes narcotics = crimes[crimes['Primary Type'] == 'NARCOTICS'] # get narcotics crime description counts narcotics_crimes = narcotics[['Primary Type', 'Description']]\ .groupby('Description').count()\ .sort_values(by='Primary Type', ascending=False) narcotics_crimes.columns = ['Reports'] # rename Primary Type column narcotics_crimes = narcotics_crimes.reset_index() # plot top 20 narcotics crime reports by description narcotic_crimes_chart = alt.Chart(narcotics_crimes[0:20]).mark_bar().encode( x={'field': 'Reports', 'type': 'quantitative'}, y={'field': 'Description', 'type': 'nominal', 'sort': '-x'}, tooltip=['Description', 'Reports'], color=alt.value('crimson') ).properties(title='Top 20 Chicago Narcotics Crime Reports - 2022') # get other offenses crimes other_offenses = crimes[crimes['Primary Type'] == 'OTHER OFFENSE'] # get other offense crime description counts other_offense_crimes = other_offenses[['Primary Type', 'Description']]\ .groupby('Description').count()\ .sort_values(by='Primary Type', ascending=False) other_offense_crimes.columns = ['Reports'] # rename Primary Type column other_offense_crimes = other_offense_crimes.reset_index() # plot top 20 other offense crime reports by description other_offenses_chart = alt.Chart(other_offense_crimes[0:20]).mark_bar().encode( x={'field': 'Reports', 'type': 'quantitative'}, y={'field': 'Description', 'type': 'nominal', 'sort': '-x'}, tooltip=['Description', 'Reports'], color=alt.value('crimson') ).properties(title='Top 20 Other Offense Chicago Crime Reports - 2022') # get homicides homicides = crimes[crimes['Primary Type'] == 'HOMICIDE']['Primary Type'] # create daily homicide counts daily_homicides = homicides.resample('D').count().to_frame(name='Reports') daily_homicides = daily_homicides.reset_index() daily_homicides_chart = alt.Chart(daily_homicides).mark_line().encode( x='Date:T', y='Reports:Q', tooltip=['Date', 'Reports'], color=alt.value('crimson') ).properties(title='Daily Chicago Homicide Reports - 2022') # combine and show Altair charts alt.vconcat( crimes_by_type_chart, daily_reports_chart, daily_arrests_chart, daily_domestic_reports_chart, crime_locations_chart, high_crime_communities_chart, low_crime_communities_chart, narcotic_crimes_chart, other_offenses_chart, daily_homicides_chart )