我是深度学习的新手。我想创建一个分类器,它可以根据驾驶执照 ID 上的国籍来预测国籍名称。为此,我创建了一个美国驾照图像数据集。然后创建训练数据集、数据的标签和特征以及分类模型。
当我运行我的 python 脚本时,准确率为 92%。当我用不同的图像测试我的模型时,它给出了错误的结果。
像这样的 Python 代码:
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard
import pickle
import os
import cv2
import time
DATADIR = '/home/anupam/Documents/workspace/DjangoProject/DF-Web/datafornix/USADL_DATASET'
CATEGORIES = [
'Alabma',
'Connecticut',
'California',
'Delaware',
'Georgia',
'Indiana',
'Louisiana',
'Maine',
'Massachusetts',
'MaryLand',
'NewHamshire',
'NewJersey',
'NewYork',
'NewMexico',
'Pennsylvania',
'RohodeIsland',
'Vermont',
'Virginia',
]
encoder = LabelEncoder()
city_labels = encoder.fit_transform(CATEGORIES)
# print(city_labels)
encoder = OneHotEncoder(sparse=False)
city_labels = city_labels.reshape((18, 1))
state_array = encoder.fit_transform(city_labels)
training_data = []
IMG_SIZE = 200
def create_traing_dataset():
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
for x in state_array:
try:
img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
training_data.append([new_array, x])
except Exception as e:
pass
create_traing_dataset()
#Randomize the dataset
import random
random.shuffle(training_data)
#Create a model
X = []
y = []
for features,label in training_data:
X.append(features)
y.append(label)
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
pickle_out = open("X.pickle","wb")
pickle.dump(X, pickle_out)
pickle_out.close()
pickle_out = open("y.pickle","wb")
pickle.dump(y, pickle_out)
pickle_out.close()
pickle_in = open("X.pickle","rb")
X = pickle.load(pickle_in)
pickle_in = open("y.pickle","rb")
y = pickle.load(pickle_in)
y_data = []
for i in y:
y_data.append(i[0])
y = np.array(y_data)
print(y)
X = X/255.0
print(X.size)
dense_layers = [0]
layer_sizes = [64]
conv_layers = [3]
for dense_layer in dense_layers:
for layer_size in layer_sizes:
for conv_layer in conv_layers:
NAME = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
print(NAME)
model = Sequential()
model.add(Conv2D(layer_size, (3, 3), input_shape=X.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
for l in range(conv_layer-1):
model.add(Conv2D(layer_size, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
for _ in range(dense_layer):
model.add(Dense(layer_size))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'],
)
model.fit(X, y,
batch_size=32,
epochs=10,
validation_split=0.3,
callbacks=[tensorboard])
model.save('64x3-CNN.model')
并像这样测试分类器模型:-
import cv2
import tensorflow as tf
model = tf.keras.models.load_model("64x3-CNN.model")
CATEGORIES = [
'Alabma',
'Connecticut',
'California',
'Delaware',
'Georgia',
'Indiana',
'Louisiana',
'Maine',
'Massachusetts',
'MaryLand',
'NewHamshire',
'NewJersey',
'NewYork',
'NewMexico',
'Pennsylvania',
'RohodeIsland',
'Vermont',
'Virginia',
]
def prepare(filepath):
IMG_SIZE = 200 # 50 in txt-based
img_array = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE) # read in the image, convert to grayscale
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE)) # resize image to match model's expected sizing
return new_array.reshape(-1, IMG_SIZE, IMG_SIZE, 1) # return the image with shaping that TF wants.
prediction = model.predict([prepare('/home/anupam/Documents/workspace/DjangoProject/DF-Web/datafornix/download.jpeg')]) # REMEMBER YOU'RE PASSING A LIST OF THINGS YOU WISH TO PREDICT
print(prediction)
print(CATEGORIES[int(prediction[0][0])])
我认为,我对训练数据集的特征和标签有误。如果我错了。我如何解决这种情况?
找到最佳结果的方法是什么?
请帮助清除我的上述查询。
谢谢