You can convert numpy slices to lists to add them to the dataframe.
Set up the dataframe
import numpy as np
import pandas as pd
input_data = np.array([
[11, 0.34, 0.56, 0.99, 0.24, 0.44, 9],
[23, 0.12, 0.66, 0.21, 0.21, 0.24, 4]
])
df = pd.DataFrame({
'ID.': input_data[:,0].astype(np.int),
'scalar':input_data[:,-1].astype(np.int)
})
Add input_data as lists to the dataframe
df['vector1'] = input_data[:,1:4].tolist()
df['vector2'] = input_data[:,4:6].tolist()
df[['ID.', 'vector1', 'vector2', 'scalar']]
Output
ID. vector1 vector2 scalar
0 11 [0.34, 0.56, 0.99] [0.24, 0.44] 9
1 23 [0.12, 0.66, 0.21] [0.21, 0.24] 4
Micro-Benchmark
As both solutions are not vectorized, I was wondering which performance was impacted more.
Results

Code for the benchmark
import numpy as np
import pandas as pd
def create_data(n):
input_data = np.array([
[11, 0.34, 0.56, 0.99, 0.24, 0.44, 9],
[23, 0.12, 0.66, 0.21, 0.21, 0.24, 4]
])
return np.repeat(input_data, n, 0)
def add_with_list(input_data):
df = pd.DataFrame({
'ID.': input_data[:,0].astype(np.int),
'scalar':input_data[:,-1].astype(np.int)
})
df['vector1'] = input_data[:,1:4].tolist()
df['vector2'] = input_data[:,4:6].tolist()
return df[['ID.', 'vector1', 'vector2', 'scalar']]
def add_with_apply(input_data):
df = pd.DataFrame(input_data)
df2 = df[0].to_frame()
df2.columns = ['ID.']
df2['vector1'] = df.apply(lambda x: [x[1], x[2], x[3]], axis=1)
df2['vector2'] = df.apply(lambda x: [x[4], x[5]], axis=1)
df2['scalar'] = df[6]
return df2
import perfplot
perfplot.show(
setup=create_data,
kernels=[add_with_list, add_with_apply],
n_range=[2**k for k in range(2,20)],
logx=True,
logy=True,
equality_check=False,
xlabel='len(df)')