I have a below dataframe which is further processed to create a pivot table. Now, I am trying to plot the multi index pivot data in Plotly. But in plotly, somehow its not taking the values and showing error.
I need to plot the category 'develop' and 'developing' in x axis and plot the data associated with those categories. Associated 'employee' data should be plotted in each category. 'Y-axis must be 'GDP' and stack bar must be 'cond_cat'. Below is the code for reference.
Sample DataFrame
import pandas as pd
import numpy as np
s = 200
np.random.seed(365) # so the data is the same each time
df = pd.DataFrame({"Country": np.random.choice(["USA America", "JPY one two", "MEX", "IND", "AUS"], s),
"employee": np.random.choice(["Bob", "Sam", "John", "Tom", "Harry"], s),
"economy_cat": np.random.choice(["developing","develop"], s),
"cond_cat": np.random.choice(["good","bad", 'worse', 'better', 'average'], s),
"gdp": np.random.randint(5, 75, s),
})
df = df[df.Country=='USA America']
# print(df.head())
Country employee economy_cat cond_cat gdp
9 USA America Sam developing better 30
11 USA America Bob developing average 45
21 USA America John develop bad 29
22 USA America Sam develop bad 73
30 USA America Harry develop bad 25
Reshape
df_pivot = df.pivot_table(index=['economy_cat','employee'],columns=['cond_cat'],values='gdp',aggfunc='sum')
# print(df_pivot)
cond_cat average bad better good worse
economy_cat employee
develop Bob 6.0 NaN 46.0 NaN NaN
Harry NaN 25.0 9.0 NaN NaN
John 37.0 29.0 NaN NaN NaN
Sam NaN 82.0 NaN NaN 60.0
Tom 48.0 NaN NaN 51.0 NaN
developing Bob 45.0 NaN NaN 45.0 NaN
Harry 75.0 183.0 113.0 NaN NaN
John 16.0 36.0 27.0 67.0 NaN
Sam NaN NaN 30.0 NaN 43.0
Tom 111.0 NaN NaN 77.0 73.0
Plot
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
go.Bar(
x= df_pivot["economy_cat","employee"],
y= df_pivot["cond_cat"],marker_color = "#1f77b4",showlegend=False,
marker_line_color = '#1f77b4',
),
row=1,
col=1,
)
fig.add_trace(
go.Bar(
x= df_pivot["economy_cat","employee"],
y= df_pivot["cond_cat"],marker_color = "rgba(255, 0, 0, 0.6)",showlegend=False,
marker_line_color = "rgba(255, 0, 0, 0.6)",
),
row=1,
col=1,
)
fig.update_layout(barmode = 'stack')
fig.show()
Error when plotting
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
e:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
e:\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
e:\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: ('economy_cat', 'employee')
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
C:\Users\TRENTO~1.MCK\AppData\Local\Temp/ipykernel_18596/2928341867.py in <module>
14 fig.add_trace(
15 go.Bar(
---> 16 x= df_pivot["economy_cat","employee"],
17 y= df_pivot["cond_cat"],marker_color = "#1f77b4",showlegend=False,
18 marker_line_color = '#1f77b4',
e:\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3456 if self.columns.nlevels > 1:
3457 return self._getitem_multilevel(key)
-> 3458 indexer = self.columns.get_loc(key)
3459 if is_integer(indexer):
3460 indexer = [indexer]
e:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: ('economy_cat', 'employee')
