# spatial join of fire locations to states
state_fire = gdf_fire.loc[fire_mask, fire_cols].sjoin(
gdf2.loc[boundary_mask, boundary_cols]
)
- once state has been associated, can aggregate data to get number of fires per year per state
- have visualised these first with plotly as this allows me to animate frames for each year, plus simpler debugging with hover info
- then visualised with matplotlib. recreated same format GeoDataFrame you used, aggregating years to 2000-2014, then joined on state polygon for plotting
- added
edgecolor="black" so that edges are clearly marked.
import geopandas as gpd
import pandas as pd
import plotly.express as px
import requests
from pathlib import Path
from zipfile import ZipFile
import urllib
import requests
# get wild fire data..
# https://data-nifc.opendata.arcgis.com/datasets/wfigs-wildland-fire-locations-full-history/explore?showTable=true
gdf_fire = gpd.GeoDataFrame.from_features(
requests.get(
"https://opendata.arcgis.com/datasets/d8fdb82b3931403db3ef47744c825fbf_0.geojson"
).json()
)
# fmt: off
# download boundaries
url = "https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_1_states_provinces.zip"
f = Path.cwd().joinpath(urllib.parse.urlparse(url).path.split("/")[-1])
# fmt: on
if not f.exists():
r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(f)
zfile.extractall(f.stem)
# load downloaded boundaries
gdf2 = gpd.read_file(str(f.parent.joinpath(f.stem).joinpath(f"{f.stem}.shp")))
# a bit of cleanup, data types and CRS
gdf_fire["FireDiscoveryDateTime"] = pd.to_datetime(gdf_fire["FireDiscoveryDateTime"])
gdf_fire = gdf_fire.set_crs("EPSG:4326")
# filters, US states and fires with a date...
boundary_cols = ["adm1_code", "iso_3166_2", "iso_a2", "name", "geometry"]
boundary_mask = gdf2["iso_a2"].eq("US")
fire_cols = ["OBJECTID", "FireDiscoveryDateTime", "geometry"]
# fire_mask = gdf_fire["FireDiscoveryDateTime"].dt.year.between(2010,2012)
fire_mask = ~gdf_fire["FireDiscoveryDateTime"].isna()
# spatial join of fire locations to states
state_fire = gdf_fire.loc[fire_mask, fire_cols].sjoin(
gdf2.loc[boundary_mask, boundary_cols]
)
# summarize data by year and state
df_fires_by_year = (
state_fire.groupby(
boundary_cols[0:-1]
+ ["index_right", state_fire["FireDiscoveryDateTime"].dt.year],
as_index=False,
)
.size()
.sort_values(["FireDiscoveryDateTime", "index_right"])
)
# and finally visualize...
px.choropleth_mapbox(
df_fires_by_year,
geojson=gdf2.loc[boundary_mask, "geometry"].__geo_interface__,
locations="index_right",
color="size",
animation_frame="FireDiscoveryDateTime",
hover_name="name",
hover_data={"index_right": False},
color_continuous_scale="OrRd",
).update_layout(
mapbox={
"style": "carto-positron",
"zoom": 2,
"center": {"lat": 39.50, "lon": -98.35},
},
margin={"l": 0, "r": 0, "t": 0, "b": 0},
)

matplotlib
import matplotlib.pyplot as plt
# recreate dataframe in question. Exclude Alaska and Hawaii as they mess up boundaries...
# further aggregate the defined years....
states_00_14 = gpd.GeoDataFrame(
df_fires_by_year.loc[df_fires_by_year["FireDiscoveryDateTime"].between(2000, 2014)]
.groupby("index_right", as_index=False)
.agg({"size": "sum"})
.merge(
gdf2.loc[boundary_mask & ~gdf2["iso_3166_2"].isin(["US-AK","US-HI"])],
left_on="index_right",
right_index=True,
how="inner",
)
)
f, ax = plt.subplots(1, figsize=(12, 6))
ax = states_00_14.plot(column="size", cmap="OrRd", legend=True, ax=ax, edgecolor="black")
lims = plt.axis("equal")
f.suptitle("US Wildfire count per state in 2000-2014")
ax.set_axis_off()

states_00_14 = states_00_14.merge(st_cts_00_14.reset_index(name='num_fires'))is this not correct?