I have a dataframe with a mix of data types (object and numeric). I want to plot a scatter plot for all numeric columns in the dataset against specific columns: col_32, col_69,col_74 and col_80 thereby generating 4 figures for each of the numeric columns.
Example:
col_1againstcol_32,col_69,col_74andcol_80( 4 plots)col_2againstcol_32,col_69,col_74andcol_80(4 plots)col_3againstcol_32,col_69,col_74andcol_80(4 plots)...
col_85againstcol_32,col_69,col_74andcol_80(4 plots)
import pandas as pd
from random import uniform
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import gmean
#Generate dataframe
df = pd.DataFrame(
data=np.random.uniform(low=5.5, high=30.75, size=(160, 84)),
columns=[f'col_{i}' for i in range(1,85)],)
df.insert(
loc=0, column='Location',
value=np.repeat(['A','B','C','D'], 40, axis=0),)
# Insert NaN in the dataset just like the original dataset
# Define the probability of introducing a NaN (e.g., 15%)
nan_probability = 0.15
np.random.seed(123)
df = df.mask(np.random.random(df.shape) < nan_probability)
# final dataset
df
I need help here, see my attempt below:
# select numeric columns
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
print(f"Numeric columns: {numeric_cols}")
# create a list of specific columns col_32,col_69,col_74 and col_80
specific_x_cols = ['col_32','col_69','col_74','col_80']
for x_col in specific_x_cols:
# Create a new figure for each numeric column against the 4 specific_x_columns
plt.subplots(nrows=2, ncols=2, figsize=(10, 8))
for y_col in numeric_cols:
if y_col != x_col: # Avoid plotting a column against itself
sns.scatterplot(x =x_col, y = y_col,data=df)
plt.title(f"Scatterplot of {y_col} against {x_col}")
plt.xlabel(x_col)
plt.ylabel("numeric columns")
plt.grid(True)
plt.legend()
plt.savefig(f'{y_col}_scatterplot.png') # Save as a PNG file with a descriptive name
plt.show()
print("scatterplot generated and saved successfully!")
Please share your code if you can

col_32,col_69,col_74andcol_80(each numeric column vs 4 target columns, making a total 81 X 4 scatterplots