My problem is that I do not understand why pandas/numpy is returning a certain array shape when I convert a column to a numpy array. I would expect a shape of (1440, 130, 13) but due to the fact that I get a np.array of "list calls" (literally no idea why) when I call .to_numpy() on my dataframe column, I get a shape of (1440, ).
At first I thought maybe the file type as which I stored the dataframe was the Problem (I tried json and csv before) but I had the same issue with any of them.
Many thanks in advance!
def extract_features(data_df):
mfcc_list = []
for i in tqdm(range(len(data_df))):
signal, sr = librosa.load(data_df.path[i], sr=SAMPLE_RATE, duration=3)
mfcc = librosa.feature.mfcc(signal, sr=sr, n_mfcc=13, n_fft=2048, hop_length=512)
mfcc = mfcc.T
mfcc_list.append(mfcc.tolist()) # I make sure that everything is in list form
data_df['mfcc'] = mfcc_list
return data_df
data_df = extract_features(data_df=data_df)
data_df.to_pickle('path/to/file')
df = pd.read_pickle('path/to/file')
a = df["mfcc"].to_numpy() # I would expect a shape of (1440, 130, 13)
b = np.array(df.iloc[0]["mfcc"])
print(a)
# output in a shape like this:
# [list([[], ..., []]), ..., list([[], ..., []])]
print(type(a)) # output: <class 'numpy.ndarray'>
print(type(a[0])) # output: <class 'list'>
print(type(b)) # output: <class 'numpy.ndarray'>
print(type(b[0])) # output: <class 'numpy.ndarray'>
df.info()
# output:
# <class 'pandas.core.frame.DataFrame'>
# Int64Index: 1440 entries, 0 to 1439
# Data columns (total 9 columns):
# # Column Non-Null Count Dtype
# --- ------ -------------- -----
# 0 path 1440 non-null object
# 1 source 1440 non-null object
# 2 actor 1440 non-null object
# 3 gender 1440 non-null object
# 4 statement 1440 non-null object
# 5 repetition 1440 non-null object
# 6 intensity 1440 non-null object
# 7 emotion 1440 non-null object
# 8 mfcc 1440 non-null object
# dtypes: object(9)
# memory usage: 112.5+ KB
print(df.shape) # output: (1440, 9)
df["mfcc"]
# output:
# 0 [[-857.3094533443688, 0.0, 0.0, 0.0, 0.0, 0.0,...
# 1 [[-864.8902862773604, 0.0, 0.0, 0.0, 0.0, 0.0,...
# 2 [[-849.4454325616318, 9.397479238778757, 9.257...
# 3 [[-832.7343966188961, 11.492822043371124, 0.14...
# 4 [[-902.4064116162402, 6.517241898027468, 6.427...
# ...
# 1435 [[-764.9126134873547, 0.0, 0.0, 0.0, 0.0, 0.0,...
# 1436 [[-732.3714481202685, 0.0, 0.0, 0.0, 0.0, 0.0,...
# 1437 [[-741.4161339882342, 0.0, 0.0, 0.0, 0.0, 0.0,...
# 1438 [[-713.4635562123195, 0.0, 0.0, 0.0, 0.0, 0.0,...
# 1439 [[-718.5457158330038, 0.0, 0.0, 0.0, 0.0, 0.0,...
# Name: mfcc, Length: 1440, dtype: object
dfinfo - dtypes, shape, info.