#import pandas and matplotlib

import pandas as pd
import matplotlib.pyplot as plt

#mount drive
from google.colab import drive
drive.mount('/content/drive')

#read in rat csv
rat_df = pd.read_csv('/content/drive/My Drive/Rat_Sightings.csv')

#display it
rat_df.head()

Mounted at /content/drive

<ipython-input-1-10037fa3c024>:11: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.
  rat_df = pd.read_csv('/content/drive/My Drive/Rat_Sightings.csv')


#get rid of unnecesary columns
rat_df.drop(columns = ['Closed Date','Agency', 'Cross Street 1', 'Cross Street 2', 'Intersection Street 1', 'Intersection Street 2', 'Address Type', 'Landmark', 'Status','Due Date','Resolution Action Updated Date', 'Street Name', 'City', 'Facility Type', 'Due Date', 'Park Facility Name', 'Park Borough', 'X Coordinate (State Plane)', 'Y Coordinate (State Plane)', 'Vehicle Type', 'Taxi Company Borough', 'Taxi Pick Up Location', 'Bridge Highway Name', 'Bridge Highway Direction', 'Road Ramp', 'Bridge Highway Segment', 'Location' ], inplace=True)

#use to_datetime function to change Created Date to Datetime format so we can extract month and year
rat_df['Created Date'] = pd.to_datetime(rat_df['Created Date'], format='%m/%d/%Y %I:%M:%S %p')

#Extract month and year into new columns
rat_df['Month'] = rat_df['Created Date'].dt.month
rat_df['Year'] = rat_df['Created Date'].dt.year
rat_df['Day'] = rat_df['Created Date'].dt.day

rat_df['formatted_date'] = pd.to_datetime(rat_df[['Month', 'Day', 'Year']]).dt.strftime('%m/%d/%Y')
rat_df['formatted_date'] = pd.to_datetime(rat_df['formatted_date'], format='%m/%d/%Y')

#set Created Date as index
rat_df.set_index("formatted_date", inplace=True)
rat_df.head()


#display datatypes
display(rat_df.dtypes)

Unique Key                   int64
Created Date        datetime64[ns]
Agency Name                 object
Complaint Type              object
Descriptor                  object
Location Type               object
Incident Zip               float64
Incident Address            object
Community Board             object
Borough                     object
Latitude                   float64
Longitude                  float64
Month                        int64
Year                         int64
Day                          int64
dtype: object


#Make dataframe which has amount of rats spotted per day
rat_per_day_df = rat_df.groupby('formatted_date').size().reset_index(name='rat_sightings_count')
rat_per_day_df.head()


#show frequency of rat sightings by borough
rat_df['Borough'].value_counts()

BROOKLYN         84208
MANHATTAN        61172
BRONX            42182
QUEENS           34808
STATEN ISLAND     9250
Unspecified         25
Name: Borough, dtype: int64


value_counts = rat_df['Location Type'].value_counts()
print(value_counts)

3+ Family Apt. Building          95573
1-2 Family Dwelling              43520
Other (Explain Below)            36116
3+ Family Mixed Use Building     15217
Commercial Building              12864
Vacant Lot                        6216
Construction Site                 5254
1-2 Family Mixed Use Building     3840
Vacant Building                   3719
Parking Lot/Garage                2439
Catch Basin/Sewer                 2258
Public Garden                      981
3+ Family Apartment Building       658
Government Building                495
Street Area                        434
School/Pre-School                  397
Single Room Occupancy (SRO)        266
Day Care/Nursery                   235
Office Building                    221
Residential Building               215
Public Stairs                      197
Hospital                           152
Apartment                           82
Other                               76
Residence                           72
Building (Non-Residential)          23
Abandoned Building                  16
1-3 Family Dwelling                 15
Store                               13
Residential Property                12
Restaurant                          11
Summer Camp                          8
3+ Family Apt.                       7
Ground                               7
Restaurant/Bar/Deli/Bakery           6
School                               3
1-3 Family Mixed Use Building        3
Commercial Property                  2
Beach                                2
Grocery Store                        2
Vacant Lot/Property                  2
3+ Family Apt                        1
3+Family Apt.                        1
1-2 FamilyDwelling                   1
Private House                        1
Cafeteria - Public School            1
Retail Store                         1
Street Fair Vendor                   1
Catering Service                     1
Name: Location Type, dtype: int64


#new dataframe showing boroughs
borough_df = pd.DataFrame({'Borough':['Brooklyn','Manhattan','Bronx', 'Queens', 'Staten Island', 'Unspecified'], 'Rat Sightings': [83461, 60699, 41917, 34507,9213,33]})
borough_df.head(5)


import plotly.express as px

#don't include rows with no latitue or longitude
rat_df = rat_df.dropna(subset=['Latitude', 'Longitude'])

#don't include unspecified
filtered_rat_df = rat_df[rat_df['Borough'] != 'Unspecified']

#plotting the map with different colors for each borough
fig = px.scatter_mapbox(filtered_rat_df, lat='Latitude', lon='Longitude', color='Borough', mapbox_style='open-street-map')
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


import pandas as pd
import plotly.express as px

#remove rows with missing latitude or longitude values
rat_df = rat_df.dropna(subset=['Latitude', 'Longitude'])

#do not include reports that say unspecfied borough
filtered_rat_df = rat_df[rat_df['Borough'] != 'Unspecified']

#group data by Borough and then find latitude and longitude with the most sightings
sightings_count = filtered_rat_df.groupby(['Borough', 'Latitude', 'Longitude']).size().reset_index(name='Sightings')
idx = sightings_count.groupby('Borough')['Sightings'].transform(max) == sightings_count['Sightings']
most_sightings = sightings_count[idx]

#make map showing which latitute and logitude had the most rat sightings for each borough
most_sightings['MarkerSize'] = 10
fig = px.scatter_mapbox(most_sightings, lat='Latitude', lon='Longitude', hover_name='Borough',
                        zoom=10, color='Borough', mapbox_style='open-street-map', size='MarkerSize',
                        hover_data={'Sightings': True,})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

<ipython-input-11-fca2bc3d6c55>:16: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


#display rat frequency per year
rat_df.Year.plot.hist()

<Axes: ylabel='Frequency'>


from google.colab import drive

#Bringing in Population data
population_df = pd.read_csv('/content/drive/My Drive/new_pop_again.csv')
population_df = population_df.drop([0]).reset_index(drop=True)
population_df['Population'] = population_df['Population'].str.replace(',', '').astype(int)

population_df.head(10)


#combine borough and population then calculate per capita
result_df = pd.merge(borough_df, population_df, on='Borough')

#unspecified doesn't matter, it is so minimal
#delete unnecessary columns
result_df= result_df.drop(columns=['Index', 'GeoID', 'GeoType', 'Name'])

#rename rat sightings
result_df = result_df.rename(columns={'Rat Sightings': 'Rat_Sightings'})

#set borough as index
result_df.set_index("Borough", inplace=True)

result_df.head(7)


#Creating new column calculating the amount of rat sightings per 1000 people.

result_df['per_capita_1000'] = (result_df['Rat_Sightings']/result_df['Population'])*1000
result_df.head(8)


#Plot rat sightings by borough
result_df.Rat_Sightings.plot.bar()

#Label axes
plt.xlabel('Borough')
plt.ylabel('Rat Sightings')
plt.title('Rat Sightings by Borough')
plt.show()

#Create another graph to compare rat sightings per 1000 people in each borough.
result_df.per_capita_1000.plot.bar()
plt.xlabel('Borough')
plt.ylabel('Per 1000 people Rat Sightings')
plt.title('Rat Sightings per 1000 people by Borough')
plt.show()


# Read the CSV file skipping the first row
COVID_df = pd.read_csv('/content/drive/My Drive/COVID_Daily_Counts.csv')

# Choosing the columns we want to focus on
columns = ['date_of_interest', 'CASE_COUNT', 'BX_CASE_COUNT', 'BK_CASE_COUNT', 'MN_CASE_COUNT', 'QN_CASE_COUNT', 'SI_CASE_COUNT']
new_COVID_df = COVID_df[columns]

# Formatting date_of_interest column
new_COVID_df['date_of_interest'] = pd.to_datetime(new_COVID_df['date_of_interest'], errors='coerce', format='%m/%d/%Y')

new_COVID_df.head()

<ipython-input-17-253d1df6431f>:9: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


#merge Covid cases and rat sightings dataframe
new_COVID_df['date_of_interest'] = pd.to_datetime(new_COVID_df['date_of_interest'], format='%m/%d/%Y')

rat_COVID_df = pd.merge(rat_per_day_df, new_COVID_df, left_on='formatted_date', right_on='date_of_interest', how='left')

#Drop the duplicate formatted_date column from COVID case
rat_COVID_df = rat_COVID_df.drop('date_of_interest', axis=1)

#display
rat_COVID_df.head()

<ipython-input-18-68eaf60e929d>:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


rat_COVID_df.index = pd.to_datetime(rat_COVID_df.index)
date_column = 'formatted_date'

#determining the dates we want to view
start_date = '2019-09-01'
end_date = '2023-08-30'
rat_COVID_zoomed = rat_COVID_df[(rat_COVID_df[date_column] >= start_date) & (rat_COVID_df[date_column] <= end_date)]

plt.figure(figsize=(18, 12))

#plot data and covid case count
plt.plot(rat_COVID_zoomed['formatted_date'], rat_COVID_zoomed['CASE_COUNT'], label='COVID Cases', color='blue')
plt.xlabel('Date')

#setting the color of the left y-axis for COVID cases
plt.ylabel('COVID Cases', color='blue')

#Second y-axis for rat sightings so rat sightings doesnt look like nothing (normalized)
ax2 = plt.gca().twinx()
ax2.plot(rat_COVID_zoomed['formatted_date'], rat_COVID_zoomed['rat_sightings_count'], label='Rat Sightings per Day', color='red')
ax2.set_ylabel('Rat Sightings per Day', color='red')

#Making plot look nice
plt.title('COVID Cases and Rat Sightings per day From 2019-2023')
plt.legend()
plt.grid(True)
plt.show()


#create column top_5 labeling rat sightings as in or not in top 5.
community_boards_to_match = ['03 BROOKLYN', '07 MANHATTAN', '05 QUEENS', '04 BRONX', '01 STATEN ISLAND']

# Create a new column based on the conditions
rat_df['top_five'] = rat_df['Community Board'].isin(community_boards_to_match).astype(int)

# Display the modified DataFrame
rat_df.head()


rat_df = rat_df[(rat_df["Month"].notna()) & (rat_df["Location Type"].notna())]
rat_df.head()


value_counts = rat_df['top_five'].value_counts()
print(value_counts)

0    195153
1     34226
Name: top_five, dtype: int64


import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt


#Select feature location type and target variable top_five
X = rat_df[['Location Type']]
y = rat_df['top_five']

#dummies for location type
X = pd.get_dummies(X, columns=['Location Type'])

#training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Create random forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

#predict on test set
y_pred = model.predict(X_test)

#test model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

#results
print("Accuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_rep)

#Plot feature importances
feature_importances = model.feature_importances_
feature_names = X.columns
sorted_idx = feature_importances.argsort()

plt.figure(figsize=(10, 8))
plt.barh(range(len(sorted_idx)), feature_importances[sorted_idx])
plt.yticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx])
plt.xlabel('Feature Importance')
plt.title('Random Forest Feature Importance')
plt.show()

Accuracy: 0.8491586014473799

Confusion Matrix:
 [[38956     1]
 [ 6919     0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.85      1.00      0.92     38957
           1       0.00      0.00      0.00      6919

    accuracy                           0.85     45876
   macro avg       0.42      0.50      0.46     45876
weighted avg       0.72      0.85      0.78     45876


import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

#endocde top_five target variable 0 and 1
le = LabelEncoder()
rat_df['top_five'] = le.fit_transform(rat_df['top_five'])

#choose features we want to include
features = ['Location Type','Borough', 'Month', 'Year']
X = rat_df[features]
y = rat_df['top_five']

categorical_features = ['Location Type', 'Borough']
X[categorical_features] = X[categorical_features].fillna('missing')

#One-hot encode categorical features
X = pd.get_dummies(X, columns=categorical_features)

#Training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Create random Forrest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

#predict the test set
y_pred = model.predict(X_test)

#evaluate
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

#esults
print("Accuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_rep)

#Plot feature importances
feature_importances = model.feature_importances_
feature_names = X.columns
sorted_idx = feature_importances.argsort()

plt.figure(figsize=(10, 10))
plt.barh(range(len(sorted_idx)), feature_importances[sorted_idx])
plt.yticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx])
plt.xlabel('Feature Importance')
plt.title('Random Forest Feature Importance')
plt.show()

<ipython-input-32-25a8357a4f48>:20: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Accuracy: 0.8475673554799895

Confusion Matrix:
 [[38491   466]
 [ 6527   392]]

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.99      0.92     38957
           1       0.46      0.06      0.10      6919

    accuracy                           0.85     45876
   macro avg       0.66      0.52      0.51     45876
weighted avg       0.79      0.85      0.79     45876


import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

#time series with monthly rat sightings
time_series = rat_df.groupby(pd.Grouper(key='Created Date', freq='M')).size()

#training and testing sets
train_size = int(len(time_series) * 0.8)
train, test = time_series[:train_size], time_series[train_size:]

#fit ARIMA model
order = (3, 1, 3)
model = ARIMA(train, order=order)
model_fit = model.fit()

#predict
predictions = model_fit.forecast(steps=len(test))

#evaluate model with mse and rmse
mse = mean_squared_error(test, predictions)
rmse = mse**0.5

#Plot the predicted sightings
plt.figure(figsize=(10, 6))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(test.index, predictions, label='Predictions')
plt.title(f'ARIMA Predictions (RMSE: {rmse:.2f})')
plt.ylabel('Rat Sightings')
plt.xlabel('Year')
plt.legend()
plt.show()

/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning:

Non-invertible starting MA parameters found. Using zeros as starting parameters.

/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

	Unique Key	Created Date	Closed Date	Agency	Agency Name	Complaint Type	Descriptor	Location Type	Incident Zip	Incident Address	...	Vehicle Type	Taxi Company Borough	Taxi Pick Up Location	Bridge Highway Name	Bridge Highway Direction	Road Ramp	Bridge Highway Segment	Latitude	Longitude	Location
0	15636547	01/02/2010 09:38:29 AM	NaN	DOHMH	Department of Health and Mental Hygiene	Rodent	Rat Sighting	Other (Explain Below)	11432.0	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.703342	-73.800203	(40.70334174980328, -73.80020318978804)
1	15636689	01/02/2010 07:09:56 AM	NaN	DOHMH	Department of Health and Mental Hygiene	Rodent	Rat Sighting	Catch Basin/Sewer	11204.0	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.608288	-73.977775	(40.60828832742201, -73.97777515791093)
2	15636710	01/02/2010 09:04:46 AM	NaN	DOHMH	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Mixed Use Building	11375.0	68-12 YELLOWSTON BOULEVARD	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.726060	-73.848329	(40.726059538480236, -73.84832900182792)
3	15636731	01/02/2010 06:27:59 PM	NaN	DOHMH	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Mixed Use Building	11233.0	1040 HERKIMER STREET	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.677957	-73.918921	(40.67795748580213, -73.91892129635086)
4	15636907	01/02/2010 12:50:16 PM	NaN	DOHMH	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Apt. Building	10034.0	241 SHERMAN AVENUE	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.866008	-73.919401	(40.86600810878767, -73.9194006525354)

	Unique Key	Created Date	Agency Name	Complaint Type	Descriptor	Location Type	Incident Zip	Incident Address	Community Board	Borough	Latitude	Longitude	Month	Year	Day
formatted_date
2010-01-02	15636547	2010-01-02 09:38:29	Department of Health and Mental Hygiene	Rodent	Rat Sighting	Other (Explain Below)	11432.0	NaN	12 QUEENS	QUEENS	40.703342	-73.800203	1	2010	2
2010-01-02	15636689	2010-01-02 07:09:56	Department of Health and Mental Hygiene	Rodent	Rat Sighting	Catch Basin/Sewer	11204.0	NaN	11 BROOKLYN	BROOKLYN	40.608288	-73.977775	1	2010	2
2010-01-02	15636710	2010-01-02 09:04:46	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Mixed Use Building	11375.0	68-12 YELLOWSTON BOULEVARD	06 QUEENS	QUEENS	40.726060	-73.848329	1	2010	2
2010-01-02	15636731	2010-01-02 18:27:59	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Mixed Use Building	11233.0	1040 HERKIMER STREET	03 BROOKLYN	BROOKLYN	40.677957	-73.918921	1	2010	2
2010-01-02	15636907	2010-01-02 12:50:16	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Apt. Building	10034.0	241 SHERMAN AVENUE	12 MANHATTAN	MANHATTAN	40.866008	-73.919401	1	2010	2

	formatted_date	rat_sightings_count	CASE_COUNT	BX_CASE_COUNT	BK_CASE_COUNT	MN_CASE_COUNT	QN_CASE_COUNT	SI_CASE_COUNT
0	2010-01-01	9	NaN	NaN	NaN	NaN	NaN	NaN
1	2010-01-02	12	NaN	NaN	NaN	NaN	NaN	NaN
2	2010-01-03	3	NaN	NaN	NaN	NaN	NaN	NaN
3	2010-01-04	24	NaN	NaN	NaN	NaN	NaN	NaN
4	2010-01-05	14	NaN	NaN	NaN	NaN	NaN	NaN

	Unique Key	Created Date	Agency Name	Complaint Type	Descriptor	Location Type	Incident Zip	Incident Address	Community Board	Borough	Latitude	Longitude	Month	Year	Day	top_five
formatted_date
2010-01-02	15636547	2010-01-02 09:38:29	Department of Health and Mental Hygiene	Rodent	Rat Sighting	Other (Explain Below)	11432.0	NaN	12 QUEENS	QUEENS	40.703342	-73.800203	1	2010	2	0
2010-01-02	15636689	2010-01-02 07:09:56	Department of Health and Mental Hygiene	Rodent	Rat Sighting	Catch Basin/Sewer	11204.0	NaN	11 BROOKLYN	BROOKLYN	40.608288	-73.977775	1	2010	2	0
2010-01-02	15636710	2010-01-02 09:04:46	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Mixed Use Building	11375.0	68-12 YELLOWSTON BOULEVARD	06 QUEENS	QUEENS	40.726060	-73.848329	1	2010	2	0
2010-01-02	15636731	2010-01-02 18:27:59	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Mixed Use Building	11233.0	1040 HERKIMER STREET	03 BROOKLYN	BROOKLYN	40.677957	-73.918921	1	2010	2	1
2010-01-02	15636907	2010-01-02 12:50:16	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Apt. Building	10034.0	241 SHERMAN AVENUE	12 MANHATTAN	MANHATTAN	40.866008	-73.919401	1	2010	2	0

	Unique Key	Created Date	Agency Name	Complaint Type	Descriptor	Location Type	Incident Zip	Incident Address	Community Board	Borough	Latitude	Longitude	Month	Year	Day	top_five
formatted_date
2010-01-02	15636547	2010-01-02 09:38:29	Department of Health and Mental Hygiene	Rodent	Rat Sighting	Other (Explain Below)	11432.0	NaN	12 QUEENS	QUEENS	40.703342	-73.800203	1	2010	2	0
2010-01-02	15636689	2010-01-02 07:09:56	Department of Health and Mental Hygiene	Rodent	Rat Sighting	Catch Basin/Sewer	11204.0	NaN	11 BROOKLYN	BROOKLYN	40.608288	-73.977775	1	2010	2	0
2010-01-02	15636710	2010-01-02 09:04:46	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Mixed Use Building	11375.0	68-12 YELLOWSTON BOULEVARD	06 QUEENS	QUEENS	40.726060	-73.848329	1	2010	2	0
2010-01-02	15636731	2010-01-02 18:27:59	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Mixed Use Building	11233.0	1040 HERKIMER STREET	03 BROOKLYN	BROOKLYN	40.677957	-73.918921	1	2010	2	1
2010-01-02	15636907	2010-01-02 12:50:16	Department of Health and Mental Hygiene	Rodent	Rat Sighting	3+ Family Apt. Building	10034.0	241 SHERMAN AVENUE	12 MANHATTAN	MANHATTAN	40.866008	-73.919401	1	2010	2	0

Rat Sightings in New York City¶

Project Goals¶

Why Rats?¶

Project Dataset¶

Collaboration Plan¶

ETL (Extraction, Transform, and Load)¶

EDA (Exploratory Data Analysis)¶

Exploring Rat Populations Across Boroughs¶

Comparing Rat Sightings with COVID cases¶

Model¶

Final Thoughts¶

Ideas for Future Research¶

	Borough	Rat Sightings
0	Brooklyn	83461
1	Manhattan	60699
2	Bronx	41917
3	Queens	34507
4	Staten Island	9213

	Index	GeoType	Borough	GeoID	Name	Population
0	2	Boro	Manhattan	1	Manhattan	1694251
1	3	Boro	Bronx	2	Bronx	1472654
2	4	Boro	Brooklyn	3	Brooklyn	2736074
3	5	Boro	Queens	4	Queens	2405464
4	6	Boro	Staten Island	5	Staten Island	495747

	date_of_interest	CASE_COUNT	BK_CASE_COUNT	MN_CASE_COUNT	QN_CASE_COUNT
0	2020-02-29	1	0	1	0
1	2020-03-01	0	0	0	0
2	2020-03-02	0	0	0	0
3	2020-03-03	1	0	0	1
4	2020-03-04	5	1	2	2