Adding in the data first preprocessing, as well as the first model training.

This commit is contained in:
Lucas Oskorep
2019-04-14 14:32:08 -05:00
parent 81a48a51ae
commit bc44d30180
11 changed files with 600 additions and 4 deletions
Regular → Executable
View File
+108
View File
@@ -0,0 +1,108 @@
import glob
import subprocess
import os
import re
import logging
import traceback
from random import randint
import imghdr
import PIL
from PIL import Image
import sys
directory = "downloads"
def random_with_N_digits(n):
range_start = 10 ** (n - 1)
range_end = (10 ** n) - 1
return randint(range_start, range_end)
def change_file_extension(file_obj, extension):
old_path = os.path.splitext(file_obj)
if not os.path.isfile(old_path[0] + extension):
new_file = old_path[0] + extension
elif not os.path.isfile(file_obj + extension):
new_file = file_obj + extension
else:
print(f"Found {extension} hiding as JPEG but couldn't rename:", file_obj)
return
print(f"Found {extension} hiding as JPEG, renaming:", file_obj, '->', new_file)
subprocess.run(['mv', file_obj, new_file])
def get_frames_from_gif(infile):
try:
im = Image.open(infile)
except IOError:
print
"Cant load", infile
sys.exit(1)
i = 0
try:
while 1:
im2 = im.convert('RGBA')
im2.load()
filename = os.path.join(os.path.dirname(infile), 'foo' + str(i) + '.jpg')
background = Image.new("RGB", im2.size, (255, 255, 255))
background.paste(im2, mask=im2.split()[3])
background.save(filename, 'JPEG', quality=80)
print(f"FOUND GIF, SAVING FRAME AS {filename}")
i += 1
im.seek(im.tell() + 1)
except EOFError:
pass # end of sequence
for root, dirs, files in os.walk(directory):
for file in files:
try:
file_obj = os.path.join(root, file)
exten = os.path.splitext(file)[1].lower()
img_type = imghdr.what(file_obj)
# print(file_obj)
if img_type is None:
os.remove(file_obj)
elif "jpeg" in img_type:
if "jpeg" not in exten and "jpg" not in exten:
change_file_extension(file_obj, ".jpeg")
elif "png" in img_type:
if "png" not in exten:
change_file_extension(file_obj, ".png")
elif "gif" in img_type:
get_frames_from_gif(file_obj)
os.remove(file_obj)
else:
os.remove(file_obj)
except Exception as e:
logging.error(traceback.format_exc())
i = 1
for root, dirs, files in os.walk(directory):
for file in files:
try:
file_obj = os.path.join(root, file)
path, file_base_name = os.path.split(file_obj)
old_path = os.path.splitext(file_base_name)
old_ext = old_path[1]
old_name = old_path[0]
new_file = os.path.join(path, str(i) + "-" + str(random_with_N_digits(10)) + old_ext)
if file_obj != new_file and "foo" not in old_name:
print(f"Moving file\n"
f"{new_file}\n"
f"{file_obj}")
subprocess.run(['mv', file_obj, new_file])
i += 1
except Exception as e:
logging.error(traceback.format_exc())
print("Cleaning JPEGs done")
Regular → Executable
+4 -2
View File
@@ -6,12 +6,14 @@ df = pd.read_csv("pokemon.csv")
response = google_images_download.googleimagesdownload() response = google_images_download.googleimagesdownload()
for pokemon in df["identifier"][:251]: for pokemon in ["abra", "xatu", "yanma", "zapdos", "zubat"]: # df["identifier"][:251]:
absolute_image_paths = response.download( absolute_image_paths = response.download(
{ {
"keywords": pokemon, "keywords": pokemon,
"limit": 250, "limit": 250,
"chromedriver": "/usr/lib/chromium-browser/chromedriver" "chromedriver": "/usr/lib/chromium-browser/chromedriver",
# This needs to be changed based on the computer trying to download the images
"format": "jpg"
} }
) )
+12
View File
@@ -0,0 +1,12 @@
from tensorflow import keras
from tensorflow.contrib import lite
keras_file = "weights.mobilenet.non-transfer.best.hdf5"
keras.models.load_model(keras_file)
h5_model = keras.models.load_model(keras_file)
converter = lite.TocoConverter.from_keras_model_file(keras_file)
tflite_model = converter.convert()
with open('mobilenet.tflite', 'wb') as f:
f.write(tflite_model)
Regular → Executable
View File
+80
View File
@@ -0,0 +1,80 @@
import os
from random import random
from shutil import copyfile, rmtree
train_dir = "./data/train/"
test_dir = "./data/test/"
val_dir = "./data/val/"
train = .75
test = .20
val = .05
def add_train_data(file, filename, label):
dest = train_dir + label + "/" + filename
print(dest, label, filename)
if not os.path.exists(os.path.dirname(dest)):
try:
os.makedirs(os.path.dirname(dest))
except Exception as e:
print(e)
try:
copyfile(file, dest)
except Exception as e:
print(e)
print("INVALID FILE")
os.remove(file)
# TODO: Remove the files
def add_val_data(file, filename, label):
dest = val_dir + label + "/" + filename
if not os.path.exists(os.path.dirname(dest)):
try:
os.makedirs(os.path.dirname(dest))
except Exception as e:
print(e)
copyfile(file, dest)
def add_test_data(file, filename, label):
dest = test_dir + label + "/" + filename
if not os.path.exists(os.path.dirname(dest)):
try:
os.makedirs(os.path.dirname(dest))
except Exception as e:
print(e)
copyfile(file, dest)
def remove_previous():
if os.path.exists(os.path.dirname(test_dir)):
rmtree(test_dir)
if os.path.exists(os.path.dirname(train_dir)):
rmtree(train_dir)
if os.path.exists(os.path.dirname(val_dir)):
rmtree(val_dir)
remove_previous()
files_processed = 0
for root, dirs, files in os.walk("downloads/"):
for file in files:
print(file)
if file is ".DS_Store":
continue
c = random()
if c < train:
add_train_data(os.path.join(root, file), file, root.split("/")[-1])
elif c < (train + val):
add_val_data(os.path.join(root, file), file, root.split("/")[-1])
else:
add_test_data(os.path.join(root, file), file, root.split("/")[-1])
files_processed += 1
print(root.split("/")[-1])
print(files_processed)
print(file)
+167
View File
@@ -0,0 +1,167 @@
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import seaborn as sn
import matplotlib.pyplot as plt
from tensorflow import keras
from time import time
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
input_shape = (224, 224, 3)
batch_size = 32
model_name = "MobileNetV2FullDatasetNoTransfer"
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import preprocess_input
train_idg = ImageDataGenerator(
horizontal_flip=True,
preprocessing_function=preprocess_input
)
train_gen = train_idg.flow_from_directory(
'./data/train',
target_size=(input_shape[0], input_shape[1]),
batch_size=batch_size
)
val_idg = ImageDataGenerator(
horizontal_flip=True,
preprocessing_function=preprocess_input
)
val_gen = val_idg.flow_from_directory(
'./data/val',
target_size=(input_shape[0], input_shape[1]),
batch_size=batch_size
)
from keras.applications import inception_v3, mobilenet_v2, vgg16
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras import optimizers
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
nclass = len(train_gen.class_indices)
# base_model = vgg16.VGG16(
# weights='imagenet',
# include_top=False,
# input_shape=input_shape
# )
# base_model = inception_v3.InceptionV3(
# weights='imagenet',
# include_top=False,
# input_shape=input_shape
# )
base_model = mobilenet_v2.MobileNetV2(
weights='imagenet',
include_top=False,
input_shape=input_shape
)
add_model = Sequential()
add_model.add(base_model)
add_model.add(GlobalAveragePooling2D())
add_model.add(Dropout(0.5))
add_model.add(
Dense(1024, activation='relu')) # Adding some dense layers in order to learn complex functions from the base model
# Potentially throw another dropout layer here if you seem to be overfitting your
add_model.add(Dropout(0.5))
add_model.add(Dense(512, activation='relu'))
add_model.add(Dense(nclass, activation='softmax')) # Decision layer
model = add_model
model.compile(loss='categorical_crossentropy',
# optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
optimizer=optimizers.Adam(lr=1e-4),
metrics=['accuracy'])
model.summary()
# Train the model
file_path = "weights.mobilenet.non-transfer.best.hdf5"
checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
early = EarlyStopping(monitor="val_acc", mode="max", patience=15)
tensorboard = TensorBoard(
log_dir="logs/" + model_name + "{}".format(time()), histogram_freq=0, batch_size=32,
write_graph=True,
write_grads=True,
write_images=True,
update_freq=batch_size
)
callbacks_list = [checkpoint, early, tensorboard] # early
history = model.fit_generator(
train_gen,
validation_data=val_gen,
epochs=2,
shuffle=True,
verbose=True,
callbacks=callbacks_list
)
# Create Test generator
test_idg = ImageDataGenerator(
preprocessing_function=preprocess_input,
)
test_gen = test_idg.flow_from_directory(
'./data/test',
target_size=(input_shape[0], input_shape[1]),
batch_size=batch_size,
shuffle=False
)
len(test_gen.filenames)
score = model.evaluate_generator(test_gen, workers=1)
# predicts
predicts = model.predict_generator(test_gen, verbose=True, workers=1)
keras_file = 'finished.h5'
keras.models.save_model(model, keras_file)
print("Loss: ", score[0], "Accuracy: ", score[1])
print(score)
print(predicts)
print(type(predicts))
print(predicts.shape)
# Process the predictions
predicts = np.argmax(predicts,
axis=1)
# test_gen.reset()
label_index = {v: k for k, v in train_gen.class_indices.items()}
predicts = [label_index[p] for p in predicts]
reals = [label_index[p] for p in test_gen.classes]
# Save the results
print(label_index)
print(test_gen.classes)
print(test_gen.classes.shape)
print(type(test_gen.classes))
df = pd.DataFrame(columns=['fname', 'prediction', 'true_val'])
df['fname'] = [x for x in test_gen.filenames]
df['prediction'] = predicts
df["true_val"] = reals
df.to_csv("sub1_non_transfer.csv", index=False)
# Processed the saved results
from sklearn.metrics import accuracy_score, confusion_matrix
acc = accuracy_score(reals, predicts)
conf_mat = confusion_matrix(reals, predicts)
print("Testing accuracy score is ", acc)
print("Confusion Matrix", conf_mat)
df_cm = pd.DataFrame(conf_mat, index=[i for i in list(set(reals))],
columns=[i for i in list(set(reals))])
plt.figure(figsize=(10, 7))
sn.heatmap(df_cm, annot=True)
plt.show()
+167
View File
@@ -0,0 +1,167 @@
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
from time import time
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
input_shape = (224, 224, 3)
batch_size = 60
model_name = "MobileNetV2FullDataset"
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import preprocess_input
train_idg = ImageDataGenerator(
# horizontal_flip=True,
preprocessing_function=preprocess_input
)
train_gen = train_idg.flow_from_directory(
'./data/train',
target_size=(input_shape[0], input_shape[1]),
batch_size=batch_size
)
val_idg = ImageDataGenerator(
# horizontal_flip=True,
preprocessing_function=preprocess_input
)
val_gen = val_idg.flow_from_directory(
'./data/val',
target_size=(input_shape[0], input_shape[1]),
batch_size=batch_size
)
from keras.applications import inception_v3, mobilenet_v2, vgg16
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras import optimizers
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
nclass = len(train_gen.class_indices)
# base_model = vgg16.VGG16(
# weights='imagenet',
# include_top=False,
# input_shape=input_shape
# )
# base_model = inception_v3.InceptionV3(
# weights='imagenet',
# include_top=False,
# input_shape=input_shape
# )
base_model = mobilenet_v2.MobileNetV2(
weights='imagenet',
include_top=False,
input_shape=input_shape
)
base_model.trainable = False
add_model = Sequential()
add_model.add(base_model)
add_model.add(GlobalAveragePooling2D())
add_model.add(Dropout(0.5))
add_model.add(Dense(1024, activation='relu'))
# Adding some dense layers in order to learn complex functions from the base model
add_model.add(Dropout(0.5))
add_model.add(Dense(512, activation='relu'))
add_model.add(Dense(nclass, activation='softmax')) # Decision layer
model = add_model
model.compile(loss='categorical_crossentropy',
# optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
optimizer=optimizers.Adam(lr=1e-4),
metrics=['accuracy'])
model.summary()
# Train the model
file_path = "weights.mobilenet.best.hdf5"
checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
early = EarlyStopping(monitor="val_acc", mode="max", patience=15)
tensorboard = TensorBoard(
log_dir="logs/" + model_name + "{}".format(time()), histogram_freq=0, batch_size=batch_size,
write_graph=True,
write_grads=True,
write_images=True,
update_freq=batch_size
)
callbacks_list = [checkpoint, early, tensorboard] # early
history = model.fit_generator(
train_gen,
steps_per_epoch=len(train_gen),
validation_data=val_gen,
validation_steps=len(val_gen),
epochs=5,
shuffle=True,
verbose=True,
callbacks=callbacks_list
)
# Create Test generator
test_idg = ImageDataGenerator(
preprocessing_function=preprocess_input,
)
test_gen = test_idg.flow_from_directory(
'./data/test',
target_size=(input_shape[0], input_shape[1]),
batch_size=batch_size,
shuffle=False
)
len(test_gen.filenames)
score = model.evaluate_generator(test_gen, workers=1)
# predicts
predicts = model.predict_generator(test_gen, verbose=True, workers=1)
print("Loss: ", score[0], "Accuracy: ", score[1])
print(score)
print(predicts)
print(type(predicts))
print(predicts.shape)
# Process the predictions
predicts = np.argmax(predicts,
axis=1)
# test_gen.reset()
label_index = {v: k for k, v in train_gen.class_indices.items()}
predicts = [label_index[p] for p in predicts]
reals = [label_index[p] for p in test_gen.classes]
# Save the results
print(label_index)
print(test_gen.classes)
print(test_gen.classes.shape)
print(type(test_gen.classes))
df = pd.DataFrame(columns=['fname', 'prediction', 'true_val'])
df['fname'] = [x for x in test_gen.filenames]
df['prediction'] = predicts
df["true_val"] = reals
df.to_csv("sub1.csv", index=False)
# Processed the saved results
from sklearn.metrics import accuracy_score, confusion_matrix
acc = accuracy_score(reals, predicts)
conf_mat = confusion_matrix(reals, predicts)
print("Testing accuracy score is ", acc)
print("Confusion Matrix", conf_mat)
df_cm = pd.DataFrame(conf_mat, index=[i for i in list(set(reals))],
columns=[i for i in list(set(reals))])
plt.figure(figsize=(10, 7))
sn.heatmap(df_cm, annot=True)
plt.show()
+62
View File
@@ -0,0 +1,62 @@
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix
def print_preds(reals, preds):
acc = accuracy_score(reals, predicts)
conf_mat = confusion_matrix(reals, predicts)
print("Testing accuracy score is ", acc)
print("Confusion Matrix", conf_mat)
df_cm = pd.DataFrame(conf_mat, index=[i for i in ["Block", "Meter", "Sign"]],
columns=[i for i in ["Block", "Meter", "Sign"]])
plt.figure(figsize=(10, 7))
sn.heatmap(df_cm, annot=True)
plt.show()
data = pd.read_csv("sub1_non_transfer.csv")
files_list = list(data["fname"])
reals = list(data["true_val"])
predicts = list(data["prediction"])
reals2 = []
wrong_files = []
for root, dirs, files in os.walk(".\\photos"):
for file in files:
if file in files_list:
x = data.loc[data["fname"] == file].values[0]
if (x[1] != x[2]):
print(x)
wrong_files.append((os.path.join(root, file), x[1]))
reals2.append(root.split("\\")[-1])
print_preds(reals, predicts)
print_preds(reals2, predicts)
import matplotlib.image as mpimg
from shutil import copyfile, rmtree
for file, pred in wrong_files:
print(file)
# img = mpimg.imread(file)
# # end
# # from now on you can use img as an image, but make sure you know what you are doing!
# imgplot = plt.imshow(img)
dest = file.split("\\")
dest[1] = "failed"
dest[-1] = pred + dest[-1]
dest = "\\".join(dest)
if not os.path.exists(os.path.dirname(dest)):
try:
os.makedirs(os.path.dirname(dest))
except Exception as e:
print(e)
copyfile(file, dest)
plt.show()
Regular → Executable
View File
-2
View File
@@ -1,2 +0,0 @@