From 5990351a4778d286771f7739fa8763f6394cf9c1 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 18 May 2022 18:37:29 -0400 Subject: [PATCH] moving to Tensorflow 2.0 and changing the test train split to use symlinks. --- 1 - ImageGatherer.py | 4 +- 2 - FixFileTypes.py | 5 +- 3 - TestTrainSplit.py | 15 ++-- 4 - TrainingModelKeras.py | 10 +-- 4 - TransferLearningKeras.py | 132 ++++++++++++++++++++--------------- 5 - TestModel.py | 6 +- 6 - KerasToTensorflow.py | 7 +- ShowIncorrectImages.py | 2 - 8 files changed, 98 insertions(+), 83 deletions(-) diff --git a/1 - ImageGatherer.py b/1 - ImageGatherer.py index 2fd9932..4e92693 100644 --- a/1 - ImageGatherer.py +++ b/1 - ImageGatherer.py @@ -59,8 +59,6 @@ def process_pokemon_names(df): return pokes_to_limits -import os - def get_images_for_pokemon(poke_to_limit): pokemon = poke_to_limit[0] @@ -69,7 +67,7 @@ def get_images_for_pokemon(poke_to_limit): response.download( { "keywords": pokemon + " pokemon", - "limit": 1,#limit, + "limit": limit, "chromedriver": "chromedriver" # Add chromedriver to your path or just point this var directly to your chromedriverv } diff --git a/2 - FixFileTypes.py b/2 - FixFileTypes.py index 5c7131b..d66d2e6 100644 --- a/2 - FixFileTypes.py +++ b/2 - FixFileTypes.py @@ -40,8 +40,7 @@ def get_frames_from_gif(infile): try: im = Image.open(infile) except IOError: - print - "Cant load", infile + print("Cant load", infile) sys.exit(1) iterator = 0 @@ -89,7 +88,7 @@ def clean_image(file_root): mutex.acquire() global i i += 1 - if i % 1 == 0: + if i % 100 == 0: print("changing type" + str(i)) mutex.release() diff --git a/3 - TestTrainSplit.py b/3 - TestTrainSplit.py index a1bd120..b99f8a1 100644 --- a/3 - TestTrainSplit.py +++ b/3 - TestTrainSplit.py @@ -1,14 +1,15 @@ import os from random import random from shutil import copyfile, rmtree +from pathlib import Path import multiprocessing train_dir = "./data/train/" test_dir = "./data/test/" val_dir = "./data/val/" train = .80 -test = .15 -val = .05 +test = .10 +val = .10 def add_train_data(file, filename, label): @@ -19,7 +20,7 @@ def add_train_data(file, filename, label): except Exception as e: print(e) try: - copyfile(file, dest) + Path(dest).absolute().symlink_to(Path(file).absolute()) except Exception as e: print(e) print("INVALID FILE") @@ -33,7 +34,8 @@ def add_val_data(file, filename, label): os.makedirs(os.path.dirname(dest)) except Exception as e: print(e) - copyfile(file, dest) + + Path(dest).absolute().symlink_to(Path(file).absolute()) def add_test_data(file, filename, label): @@ -43,7 +45,8 @@ def add_test_data(file, filename, label): os.makedirs(os.path.dirname(dest)) except Exception as e: print(e) - copyfile(file, dest) + + Path(dest).absolute().symlink_to(Path(file).absolute()) def remove_previous(): @@ -62,7 +65,7 @@ def test_split_file(file_root): file = file_root[1] # print(file) - if file is ".DS_Store": + if file == ".DS_Store": return c = random() diff --git a/4 - TrainingModelKeras.py b/4 - TrainingModelKeras.py index 09ea00a..0c6e57f 100644 --- a/4 - TrainingModelKeras.py +++ b/4 - TrainingModelKeras.py @@ -98,7 +98,6 @@ add_model.add(GlobalAveragePooling2D()) add_model.add(Dense(2024, activation='relu')) # Adding some dense layers in order to learn complex functions from the base model -# Potentially throw another dropout layer here if you seem to be overfitting your add_model.add(Dropout(0.5)) add_model.add(Dense(512, activation='relu')) add_model.add(Dense(len(train_gen.class_indices), activation='softmax')) # Decision layer @@ -109,12 +108,11 @@ model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=1e-4), metrics=['accuracy']) model.summary() - - - +print( + model.output_shape +) # Now that the model is created we can go ahead and train on it using the image generators we created earlier - file_path = model_name + ".hdf5" checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') @@ -143,8 +141,6 @@ history = model.fit_generator( ) - - # Finally we are going to grab predictions from our model, save it, and then run some analysis on the results predicts = model.predict_generator(test_gen, verbose=True, workers=1, steps=len(test_gen)) diff --git a/4 - TransferLearningKeras.py b/4 - TransferLearningKeras.py index af0b81f..b2c62a7 100644 --- a/4 - TransferLearningKeras.py +++ b/4 - TransferLearningKeras.py @@ -1,117 +1,138 @@ -import pandas as pd -import numpy as np -import seaborn as sn -import matplotlib.pyplot as plt - from time import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sn from PIL import ImageFile +from tensorflow import keras ImageFile.LOAD_TRUNCATED_IMAGES = True -input_shape = (244, 244, 3) +input_shape = (224, 224, 3) -batch_size = 60 -model_name = "MobileNetV2FullDataset" +batch_size = 64 +model_name = "TF2_Mobilenet_V2_transfer" -from keras.preprocessing.image import ImageDataGenerator -from keras.applications.inception_v3 import preprocess_input +# preproc = keras.applications.inception_v3.preprocess_input +preproc = keras.applications.mobilenet_v2.preprocess_input -train_idg = ImageDataGenerator( - # horizontal_flip=True, - preprocessing_function=preprocess_input +train_idg = keras.preprocessing.image.ImageDataGenerator( + horizontal_flip=True, + rescale=1. / 255, + # rotation_range=30, + # width_shift_range=[-.1, .1], + # height_shift_range=[-.1, .1], + # preprocessing_function=preproc ) train_gen = train_idg.flow_from_directory( - './data/train', + './downloads', target_size=(input_shape[0], input_shape[1]), - batch_size=batch_size -) -val_idg = ImageDataGenerator( - # horizontal_flip=True, - preprocessing_function=preprocess_input + batch_size=batch_size, + class_mode='categorical', + shuffle=True, + color_mode='rgb' ) +val_idg = keras.preprocessing.image.ImageDataGenerator( + horizontal_flip=True, + rescale=1. / 255, + # rotation_range=30, + # width_shift_range=[-.1, .1], + # height_shift_range=[-.1, .1], + # preprocessing_function=keras.applications.mobilenet_v2.preprocess_input +) val_gen = val_idg.flow_from_directory( './data/val', target_size=(input_shape[0], input_shape[1]), - batch_size=batch_size + batch_size=batch_size, + class_mode='categorical', + shuffle=True, ) -from keras.applications import inception_v3, mobilenet_v2, vgg16 -from keras.models import Sequential -from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard -from keras import optimizers -from keras.layers import Dense, Dropout, GlobalAveragePooling2D - +print((val_gen.classes)) nclass = len(train_gen.class_indices) +print(nclass) +# for _ in range(5): +# img, label = train_gen.next() +# print(img.shape) # (1,256,256,3) +# plt.imshow(img[0]) +# plt.show() +# plt.imshow( # base_model = vgg16.VGG16( # weights='imagenet', # include_top=False, # input_shape=input_shape # ) -# base_model = inception_v3.InceptionV3( +# base_model = keras.applications.InceptionV3( +# weights='imagenet', +# include_top=False, +# input_shape=input_shape +# ) +# base_model = keras.applications.xception.Xception( # weights='imagenet', # include_top=False, # input_shape=input_shape # ) -base_model = mobilenet_v2.MobileNetV2( +base_model = keras.applications.mobilenet_v2.MobileNetV2( weights='imagenet', include_top=False, input_shape=input_shape ) base_model.trainable = False +# i = keras.layers.Input([input_shape[0], input_shape[1], input_shape[2]]) +i = base_model.input +# x = preproc(i) +# x = base_model +x = keras.layers.GlobalAveragePooling2D()(base_model.output) +x = keras.layers.Dense(1024, activation='relu')(x) +x = keras.layers.Dropout(0.5)(x) +output = keras.layers.Dense(nclass, activation='softmax')(x) -add_model = Sequential() -add_model.add(base_model) -add_model.add(GlobalAveragePooling2D()) -add_model.add(Dropout(0.5)) -add_model.add(Dense(1024, activation='relu')) -# Adding some dense layers in order to learn complex functions from the base model -add_model.add(Dropout(0.5)) -add_model.add(Dense(512, activation='relu')) -add_model.add(Dense(nclass, activation='softmax')) # Decision layer +model = keras.Model(inputs=i, outputs=output) -model = add_model -model.compile(loss='categorical_crossentropy', - # optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), - optimizer=optimizers.Adam(lr=1e-4), +model.compile(optimizer=keras.optimizers.Adam(learning_rate=.0001), + loss=keras.losses.CategoricalCrossentropy(), metrics=['accuracy']) + model.summary() +print(model.output_shape) # Train the model file_path = "weights.mobilenet.best.hdf5" -checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') +checkpoint = keras.callbacks.ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, + mode='min') -early = EarlyStopping(monitor="val_acc", mode="max", patience=15) +early = keras.callbacks.EarlyStopping(monitor="loss", mode="min", patience=15) -tensorboard = TensorBoard( - log_dir="logs/" + model_name + "{}".format(time()), histogram_freq=0, batch_size=batch_size, +tensorboard = keras.callbacks.TensorBoard( + log_dir="logs/" + model_name + "{}".format(time()), + histogram_freq=1, write_graph=True, - write_grads=True, write_images=True, - update_freq=batch_size - + update_freq=1, + profile_batch=2, + embeddings_freq=1 ) callbacks_list = [checkpoint, early, tensorboard] # early -history = model.fit_generator( +history = model.fit( train_gen, - steps_per_epoch=len(train_gen), validation_data=val_gen, - validation_steps=len(val_gen), - epochs=5, + epochs=20, + batch_size=batch_size, shuffle=True, verbose=True, callbacks=callbacks_list - ) # Create Test generator -test_idg = ImageDataGenerator( - preprocessing_function=preprocess_input, +test_idg = keras.preprocessing.image.ImageDataGenerator( + rescale=1. / 255, ) test_gen = test_idg.flow_from_directory( @@ -128,7 +149,6 @@ score = model.evaluate_generator(test_gen, workers=1, steps=len(test_gen)) # predicts predicts = model.predict_generator(test_gen, verbose=True, workers=1, steps=len(test_gen)) - print("Loss: ", score[0], "Accuracy: ", score[1]) print(score) diff --git a/5 - TestModel.py b/5 - TestModel.py index f3ea1cd..ed4c0c9 100644 --- a/5 - TestModel.py +++ b/5 - TestModel.py @@ -59,7 +59,7 @@ df.to_csv("sub1_non_transfer.csv", index=False) acc = accuracy_score(reals, predicts) conf_mat = confusion_matrix(reals, predicts) -print(classification_report(reals, predicts, [l for l in label_index.values()])) +print(classification_report(reals, predicts, labels=[l for l in label_index.values()])) print("Testing accuracy score is ", acc) print("Confusion Matrix", conf_mat) @@ -67,7 +67,9 @@ df_cm = pd.DataFrame(conf_mat, index=[i for i in list(set(reals))], columns=[i for i in list(set(reals))]) print("made dataframe") plt.figure(figsize=(10, 7)) -sn.heatmap(df_cm, annot=True) +print("made plot") +# sn.heatmap(df_cm, annot=True) +print("showing plot") plt.show() with open("labels.txt", "w") as f: diff --git a/6 - KerasToTensorflow.py b/6 - KerasToTensorflow.py index 43b230c..e9bb2e9 100644 --- a/6 - KerasToTensorflow.py +++ b/6 - KerasToTensorflow.py @@ -1,11 +1,10 @@ -from tensorflow.contrib.keras.api import keras -from tensorflow.contrib import lite - +import tensorflow as tf +from tensorflow import keras keras_file = "mobilenetv2.hdf5" keras.models.load_model(keras_file) h5_model = keras.models.load_model(keras_file) -converter = lite.TocoConverter.from_keras_model_file(keras_file) +converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_file) tflite_model = converter.convert() with open('mobilenetv2.tflite', 'wb') as f: diff --git a/ShowIncorrectImages.py b/ShowIncorrectImages.py index 79b75a1..9838280 100644 --- a/ShowIncorrectImages.py +++ b/ShowIncorrectImages.py @@ -1,8 +1,6 @@ import pandas as pd -import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg -from pprint import pprint df = pd.read_csv("sub1_non_transfer.csv") df2 = pd.read_csv("poke_evos.csv")