或者您遍历每一列,请参阅 Test3。总体思路:
结果:
- Test1:232.96580633900157(@IsuShrestha,展平每一行,逐行追加)
- Test2:6.919965944998694(@BenReiniger,将每一行展平)
- 测试3:0.3909464059997845(逐列追加=一次性添加所有行=29个追加)
(AMD A8-3870 APU,带 Radeon(tm) 高清显卡,3000 MHz,4 核,4 个逻辑处理器,8 GB 内存,Windows 10 64 位)
import numpy as np
import pandas as pd
# https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
from timeit import default_timer as timer
row = [np.array([
[255, 248, 253, 255, 251, 253, 254, 236, 220, 217, 191, 145, 139,
185, 216, 227, 252, 251, 254, 248, 251, 236, 221, 222, 213, 175,
120, 75, 74, 209],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 255, 248, 252, 255, 202, 88, 15, 16, 14, 11, 11,
12, 12, 20, 40, 46, 38, 43, 40, 25, 21, 19, 17, 35,
53, 58, 64, 124],
[255, 253, 254, 253, 252, 254, 223, 146, 87, 75, 58, 30, 27,
58, 86, 116, 157, 168, 164, 165, 167, 136, 96, 71, 59, 49,
21, 9, 27, 144],
[255, 255, 254, 254, 253, 252, 253, 254, 255, 255, 254, 252, 249,
249, 251, 213, 126, 178, 231, 252, 248, 250, 254, 254, 252, 253,
255, 255, 255, 255]
], dtype='uint8'), "क"]
data_np = []
for i in range(7000):
data_np.append(row)
data = pd.DataFrame(data_np, columns=['pixels','labels'])
# Test1 (@IsuShrestha, flatten each row, append row-wise)
def prep_pixels(X):
df = pd.DataFrame()
for i in range(len(X.index)): #iterate through whole series
df = df.append(pd.Series(X[i].ravel()), ignore_index=True)
return df
start = timer()
test = prep_pixels(data['pixels'])
print(timer()-start)
print(test.shape)
# 232.96580633900157
# (7000, 900)
# Test2 (@BenReiniger, flatten each row)
def prep_pixels2(X):
flatX = X.apply(lambda x: x.flatten())
return pd.DataFrame(row for row in flatX)
start = timer()
test2 = prep_pixels2(data['pixels'])
print(timer()-start)
print(test2.shape)
# 6.919965944998694
# (7000, 900)
# Test3 (append column-wise = over all rows in one go = 29 appends)
def prep_pixels3(X):
test = np.array([x[0] for x in X])
for i in range(len(X[0])-1):
# print(i)
test = np.append(arr=test, values=np.array([x[i+1] for x in X]), axis=1)
return pd.DataFrame(test)
start = timer()
test3 = prep_pixels3(data['pixels'].to_numpy())
print(timer()-start)
print(test3.shape)
# 0.3909464059997845
# (7000, 900)