{ "cells": [ { "cell_type": "markdown", "id": "e52e43de", "metadata": {}, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import geoplot as gplt\n", "import geopandas as gpd\n", "import geoplot.crs as gcrs\n", "import pandas as pd\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "import numpy as np" ] }, { "cell_type": "markdown", "id": "733941c8", "metadata": {}, "source": [ "### Data Ingest" ] }, { "cell_type": "code", "execution_count": 2, "id": "1edc840b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_17408/2120724538.py:2: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " orgdata = pd.read_csv(\n" ] } ], "source": [ "# read raw data\n", "orgdata = pd.read_csv(\n", " \"data/crimes.csv\", \n", " usecols=[\"DATE OF OCCURRENCE\", \"FBI CD\", \"beat_num\"],\n", " parse_dates=[\"DATE OF OCCURRENCE\"]\n", ")\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "ba2a1ef5", "metadata": {}, "outputs": [], "source": [ "\n", "# filter for trailing twelve months and only violent crime\n", "data = orgdata[\n", " (orgdata[\"DATE OF OCCURRENCE\"] > \"2022-09-13 00:00:00\") &\n", " (orgdata[\"FBI CD\"].isin([\"01A\", \"02\", \"03\", \"04A\", \"04B\"]))]\n", "\n", "# get shapefile for CPD beats\n", "types = {\n", " \"beat\": int,\n", " \"beat_num\": int,\n", " \"district\": int,\n", " \"sector\": int,\n", "}\n", "beats = gpd.read_file(\"maps/geo_export_cf56fcdf-4e4b-485b-83b8-f76287821f6a.shp\").astype(types)" ] }, { "cell_type": "markdown", "id": "12b1398e", "metadata": {}, "source": [ "#### Verification" ] }, { "cell_type": "code", "execution_count": 4, "id": "86b9b04d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | DATE OF OCCURRENCE | \n", "beat_num | \n", "FBI CD | \n", "
---|---|---|---|
79 | \n", "2022-12-28 10:00:00 | \n", "1214 | \n", "04B | \n", "
92 | \n", "2022-10-25 13:00:00 | \n", "1722 | \n", "02 | \n", "
109 | \n", "2023-08-31 19:00:00 | \n", "833 | \n", "04B | \n", "
170 | \n", "2022-11-26 06:15:00 | \n", "113 | \n", "03 | \n", "
188 | \n", "2022-12-08 15:38:00 | \n", "1834 | \n", "01A | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
256904 | \n", "2023-02-04 12:29:00 | \n", "2234 | \n", "03 | \n", "
256913 | \n", "2023-03-22 10:47:00 | \n", "731 | \n", "04B | \n", "
256925 | \n", "2022-11-12 18:35:00 | \n", "512 | \n", "04B | \n", "
256931 | \n", "2023-07-31 00:00:00 | \n", "312 | \n", "04B | \n", "
256933 | \n", "2023-06-18 01:05:00 | \n", "1824 | \n", "04B | \n", "
27466 rows × 3 columns
\n", "\n", " | beat | \n", "beat_num | \n", "district | \n", "sector | \n", "geometry | \n", "count | \n", "
---|---|---|---|---|---|---|
0 | \n", "1 | \n", "1713 | \n", "17 | \n", "1 | \n", "POLYGON ((-87.70473 41.97577, -87.70472 41.975... | \n", "47 | \n", "
1 | \n", "5 | \n", "1651 | \n", "16 | \n", "5 | \n", "POLYGON ((-87.90684 41.97656, -87.91070 41.975... | \n", "12 | \n", "
2 | \n", "1 | \n", "1914 | \n", "19 | \n", "1 | \n", "POLYGON ((-87.64492 41.96973, -87.64431 41.969... | \n", "103 | \n", "
3 | \n", "1 | \n", "1915 | \n", "19 | \n", "1 | \n", "POLYGON ((-87.63724 41.96599, -87.63644 41.965... | \n", "55 | \n", "
4 | \n", "1 | \n", "1913 | \n", "19 | \n", "1 | \n", "POLYGON ((-87.65967 41.96903, -87.65967 41.968... | \n", "67 | \n", "
\n", " | beat | \n", "beat_num | \n", "district | \n", "sector | \n", "geometry | \n", "count | \n", "deviations | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "1713 | \n", "17 | \n", "1 | \n", "POLYGON ((-87.70473 41.97577, -87.70472 41.975... | \n", "47 | \n", "-53.240876 | \n", "
1 | \n", "5 | \n", "1651 | \n", "16 | \n", "5 | \n", "POLYGON ((-87.90684 41.97656, -87.91070 41.975... | \n", "12 | \n", "-88.240876 | \n", "
2 | \n", "1 | \n", "1914 | \n", "19 | \n", "1 | \n", "POLYGON ((-87.64492 41.96973, -87.64431 41.969... | \n", "103 | \n", "2.759124 | \n", "
3 | \n", "1 | \n", "1915 | \n", "19 | \n", "1 | \n", "POLYGON ((-87.63724 41.96599, -87.63644 41.965... | \n", "55 | \n", "-45.240876 | \n", "
4 | \n", "1 | \n", "1913 | \n", "19 | \n", "1 | \n", "POLYGON ((-87.65967 41.96903, -87.65967 41.968... | \n", "67 | \n", "-33.240876 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
269 | \n", "1 | \n", "314 | \n", "3 | \n", "1 | \n", "POLYGON ((-87.58879 41.78612, -87.58844 41.786... | \n", "79 | \n", "-21.240876 | \n", "
270 | \n", "2 | \n", "825 | \n", "8 | \n", "2 | \n", "POLYGON ((-87.67917 41.78656, -87.67912 41.784... | \n", "129 | \n", "28.759124 | \n", "
271 | \n", "1 | \n", "313 | \n", "3 | \n", "1 | \n", "POLYGON ((-87.60854 41.78583, -87.60808 41.785... | \n", "98 | \n", "-2.240876 | \n", "
272 | \n", "2 | \n", "823 | \n", "8 | \n", "2 | \n", "POLYGON ((-87.69354 41.78458, -87.69354 41.784... | \n", "185 | \n", "84.759124 | \n", "
273 | \n", "1 | \n", "312 | \n", "3 | \n", "1 | \n", "POLYGON ((-87.60431 41.78043, -87.60400 41.780... | \n", "148 | \n", "47.759124 | \n", "
274 rows × 7 columns
\n", "