From 5990351a4778d286771f7739fa8763f6394cf9c1 Mon Sep 17 00:00:00 2001
From: Lucas <lucas.oskorep@gmail.com>
Date: Wed, 18 May 2022 18:37:29 -0400
Subject: [PATCH] moving to Tensorflow 2.0 and changing the test train split to
 use symlinks.

---
 1 - ImageGatherer.py         |   4 +-
 2 - FixFileTypes.py          |   5 +-
 3 - TestTrainSplit.py        |  15 ++--
 4 - TrainingModelKeras.py    |  10 +--
 4 - TransferLearningKeras.py | 132 ++++++++++++++++++++---------------
 5 - TestModel.py             |   6 +-
 6 - KerasToTensorflow.py     |   7 +-
 ShowIncorrectImages.py       |   2 -
 8 files changed, 98 insertions(+), 83 deletions(-)

diff --git a/1 - ImageGatherer.py b/1 - ImageGatherer.py
index 2fd9932..4e92693 100644
--- a/1 - ImageGatherer.py	
+++ b/1 - ImageGatherer.py	
@@ -59,8 +59,6 @@ def process_pokemon_names(df):
     return pokes_to_limits
 
 
-import os
-
 
 def get_images_for_pokemon(poke_to_limit):
     pokemon = poke_to_limit[0]
@@ -69,7 +67,7 @@ def get_images_for_pokemon(poke_to_limit):
     response.download(
         {
             "keywords": pokemon + " pokemon",
-            "limit": 1,#limit,
+            "limit": limit,
             "chromedriver": "chromedriver"
             # Add chromedriver to your path or just point this var directly to your chromedriverv
         }
diff --git a/2 - FixFileTypes.py b/2 - FixFileTypes.py
index 5c7131b..d66d2e6 100644
--- a/2 - FixFileTypes.py	
+++ b/2 - FixFileTypes.py	
@@ -40,8 +40,7 @@ def get_frames_from_gif(infile):
     try:
         im = Image.open(infile)
     except IOError:
-        print
-        "Cant load", infile
+        print("Cant load", infile)
         sys.exit(1)
 
     iterator = 0
@@ -89,7 +88,7 @@ def clean_image(file_root):
     mutex.acquire()
     global i
     i += 1
-    if i % 1 == 0:
+    if i % 100 == 0:
         print("changing type" + str(i))
     mutex.release()
 
diff --git a/3 - TestTrainSplit.py b/3 - TestTrainSplit.py
index a1bd120..b99f8a1 100644
--- a/3 - TestTrainSplit.py	
+++ b/3 - TestTrainSplit.py	
@@ -1,14 +1,15 @@
 import os
 from random import random
 from shutil import copyfile, rmtree
+from pathlib import Path
 import multiprocessing
 
 train_dir = "./data/train/"
 test_dir = "./data/test/"
 val_dir = "./data/val/"
 train = .80
-test = .15
-val = .05
+test = .10
+val = .10
 
 
 def add_train_data(file, filename, label):
@@ -19,7 +20,7 @@ def add_train_data(file, filename, label):
         except Exception as e:
             print(e)
     try:
-        copyfile(file, dest)
+        Path(dest).absolute().symlink_to(Path(file).absolute())
     except Exception as e:
         print(e)
         print("INVALID FILE")
@@ -33,7 +34,8 @@ def add_val_data(file, filename, label):
             os.makedirs(os.path.dirname(dest))
         except Exception as e:
             print(e)
-    copyfile(file, dest)
+
+    Path(dest).absolute().symlink_to(Path(file).absolute())
 
 
 def add_test_data(file, filename, label):
@@ -43,7 +45,8 @@ def add_test_data(file, filename, label):
             os.makedirs(os.path.dirname(dest))
         except Exception as e:
             print(e)
-    copyfile(file, dest)
+
+    Path(dest).absolute().symlink_to(Path(file).absolute())
 
 
 def remove_previous():
@@ -62,7 +65,7 @@ def test_split_file(file_root):
     file = file_root[1]
     # print(file)
 
-    if file is ".DS_Store":
+    if file == ".DS_Store":
         return
     c = random()
 
diff --git a/4 - TrainingModelKeras.py b/4 - TrainingModelKeras.py
index 09ea00a..0c6e57f 100644
--- a/4 - TrainingModelKeras.py	
+++ b/4 - TrainingModelKeras.py	
@@ -98,7 +98,6 @@ add_model.add(GlobalAveragePooling2D())
 
 add_model.add(Dense(2024, activation='relu'))
 # Adding some dense layers in order to learn complex functions from the base model
-# Potentially throw another dropout layer here if you seem to be overfitting your
 add_model.add(Dropout(0.5))
 add_model.add(Dense(512, activation='relu'))
 add_model.add(Dense(len(train_gen.class_indices), activation='softmax'))  # Decision layer
@@ -109,12 +108,11 @@ model.compile(loss='categorical_crossentropy',
               optimizer=optimizers.Adam(lr=1e-4),
               metrics=['accuracy'])
 model.summary()
-
-
-
+print(
+    model.output_shape
+)
 
 # Now that the model is created we can go ahead and train on it using the image generators we created earlier
-
 file_path = model_name + ".hdf5"
 
 checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
@@ -143,8 +141,6 @@ history = model.fit_generator(
 )
 
 
-
-
 # Finally we are going to grab predictions from our model, save it, and then run some analysis on the results
 
 predicts = model.predict_generator(test_gen, verbose=True, workers=1, steps=len(test_gen))
diff --git a/4 - TransferLearningKeras.py b/4 - TransferLearningKeras.py
index af0b81f..b2c62a7 100644
--- a/4 - TransferLearningKeras.py	
+++ b/4 - TransferLearningKeras.py	
@@ -1,117 +1,138 @@
-import pandas as pd
-import numpy as np
-import seaborn as sn
-import matplotlib.pyplot as plt
-
 from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sn
 from PIL import ImageFile
+from tensorflow import keras
 
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 
-input_shape = (244, 244, 3)
+input_shape = (224, 224, 3)
 
-batch_size = 60
-model_name = "MobileNetV2FullDataset"
+batch_size = 64
+model_name = "TF2_Mobilenet_V2_transfer"
 
-from keras.preprocessing.image import ImageDataGenerator
-from keras.applications.inception_v3 import preprocess_input
+# preproc = keras.applications.inception_v3.preprocess_input
+preproc = keras.applications.mobilenet_v2.preprocess_input
 
-train_idg = ImageDataGenerator(
-    # horizontal_flip=True,
-    preprocessing_function=preprocess_input
+train_idg = keras.preprocessing.image.ImageDataGenerator(
+    horizontal_flip=True,
+    rescale=1. / 255,
+    # rotation_range=30,
+    # width_shift_range=[-.1, .1],
+    # height_shift_range=[-.1, .1],
+    # preprocessing_function=preproc
 )
 train_gen = train_idg.flow_from_directory(
-    './data/train',
+    './downloads',
     target_size=(input_shape[0], input_shape[1]),
-    batch_size=batch_size
-)
-val_idg = ImageDataGenerator(
-    # horizontal_flip=True,
-    preprocessing_function=preprocess_input
+    batch_size=batch_size,
+    class_mode='categorical',
+    shuffle=True,
+    color_mode='rgb'
 )
 
+val_idg = keras.preprocessing.image.ImageDataGenerator(
+    horizontal_flip=True,
+    rescale=1. / 255,
+    # rotation_range=30,
+    # width_shift_range=[-.1, .1],
+    # height_shift_range=[-.1, .1],
+    # preprocessing_function=keras.applications.mobilenet_v2.preprocess_input
+)
 val_gen = val_idg.flow_from_directory(
     './data/val',
     target_size=(input_shape[0], input_shape[1]),
-    batch_size=batch_size
+    batch_size=batch_size,
+    class_mode='categorical',
+    shuffle=True,
 )
 
-from keras.applications import inception_v3, mobilenet_v2, vgg16
-from keras.models import Sequential
-from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
-from keras import optimizers
-from keras.layers import Dense, Dropout, GlobalAveragePooling2D
-
+print((val_gen.classes))
 nclass = len(train_gen.class_indices)
+print(nclass)
+# for _ in range(5):
+#     img, label = train_gen.next()
+#     print(img.shape)  # (1,256,256,3)
+#     plt.imshow(img[0])
+#     plt.show()
+# plt.imshow(
 
 # base_model = vgg16.VGG16(
 #     weights='imagenet',
 #     include_top=False,
 #     input_shape=input_shape
 # )
-# base_model = inception_v3.InceptionV3(
+# base_model = keras.applications.InceptionV3(
+#     weights='imagenet',
+#     include_top=False,
+#     input_shape=input_shape
+# )
+# base_model = keras.applications.xception.Xception(
 #     weights='imagenet',
 #     include_top=False,
 #     input_shape=input_shape
 # )
 
-base_model = mobilenet_v2.MobileNetV2(
+base_model = keras.applications.mobilenet_v2.MobileNetV2(
     weights='imagenet',
     include_top=False,
     input_shape=input_shape
 )
 base_model.trainable = False
+# i = keras.layers.Input([input_shape[0], input_shape[1], input_shape[2]])
+i = base_model.input
+# x = preproc(i)
+# x = base_model
+x = keras.layers.GlobalAveragePooling2D()(base_model.output)
+x = keras.layers.Dense(1024, activation='relu')(x)
+x = keras.layers.Dropout(0.5)(x)
+output = keras.layers.Dense(nclass, activation='softmax')(x)
 
-add_model = Sequential()
-add_model.add(base_model)
-add_model.add(GlobalAveragePooling2D())
-add_model.add(Dropout(0.5))
-add_model.add(Dense(1024, activation='relu'))
-# Adding some dense layers in order to learn complex functions from the base model
-add_model.add(Dropout(0.5))
-add_model.add(Dense(512, activation='relu'))
-add_model.add(Dense(nclass, activation='softmax'))  # Decision layer
+model = keras.Model(inputs=i, outputs=output)
 
-model = add_model
-model.compile(loss='categorical_crossentropy',
-              # optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
-              optimizer=optimizers.Adam(lr=1e-4),
+model.compile(optimizer=keras.optimizers.Adam(learning_rate=.0001),
+              loss=keras.losses.CategoricalCrossentropy(),
               metrics=['accuracy'])
+
 model.summary()
+print(model.output_shape)
 
 # Train the model
 file_path = "weights.mobilenet.best.hdf5"
 
-checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
+checkpoint = keras.callbacks.ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True,
+                                             mode='min')
 
-early = EarlyStopping(monitor="val_acc", mode="max", patience=15)
+early = keras.callbacks.EarlyStopping(monitor="loss", mode="min", patience=15)
 
-tensorboard = TensorBoard(
-    log_dir="logs/" + model_name + "{}".format(time()), histogram_freq=0, batch_size=batch_size,
+tensorboard = keras.callbacks.TensorBoard(
+    log_dir="logs/" + model_name + "{}".format(time()),
+    histogram_freq=1,
     write_graph=True,
-    write_grads=True,
     write_images=True,
-    update_freq=batch_size
-
+    update_freq=1,
+    profile_batch=2,
+    embeddings_freq=1
 )
 
 callbacks_list = [checkpoint, early, tensorboard]  # early
 
-history = model.fit_generator(
+history = model.fit(
     train_gen,
-    steps_per_epoch=len(train_gen),
     validation_data=val_gen,
-    validation_steps=len(val_gen),
-    epochs=5,
+    epochs=20,
+    batch_size=batch_size,
     shuffle=True,
     verbose=True,
     callbacks=callbacks_list
-
 )
 
 # Create Test generator
-test_idg = ImageDataGenerator(
-    preprocessing_function=preprocess_input,
+test_idg = keras.preprocessing.image.ImageDataGenerator(
+    rescale=1. / 255,
 )
 
 test_gen = test_idg.flow_from_directory(
@@ -128,7 +149,6 @@ score = model.evaluate_generator(test_gen, workers=1, steps=len(test_gen))
 # predicts
 predicts = model.predict_generator(test_gen, verbose=True, workers=1, steps=len(test_gen))
 
-
 print("Loss: ", score[0], "Accuracy: ", score[1])
 print(score)
 
diff --git a/5 - TestModel.py b/5 - TestModel.py
index f3ea1cd..ed4c0c9 100644
--- a/5 - TestModel.py	
+++ b/5 - TestModel.py	
@@ -59,7 +59,7 @@ df.to_csv("sub1_non_transfer.csv", index=False)
 
 acc = accuracy_score(reals, predicts)
 conf_mat = confusion_matrix(reals, predicts)
-print(classification_report(reals, predicts, [l for l in label_index.values()]))
+print(classification_report(reals, predicts, labels=[l for l in label_index.values()]))
 print("Testing accuracy score is ", acc)
 print("Confusion Matrix", conf_mat)
 
@@ -67,7 +67,9 @@ df_cm = pd.DataFrame(conf_mat, index=[i for i in list(set(reals))],
                      columns=[i for i in list(set(reals))])
 print("made dataframe")
 plt.figure(figsize=(10, 7))
-sn.heatmap(df_cm, annot=True)
+print("made plot")
+# sn.heatmap(df_cm, annot=True)
+print("showing plot")
 plt.show()
 
 with open("labels.txt", "w") as f:
diff --git a/6 - KerasToTensorflow.py b/6 - KerasToTensorflow.py
index 43b230c..e9bb2e9 100644
--- a/6 - KerasToTensorflow.py	
+++ b/6 - KerasToTensorflow.py	
@@ -1,11 +1,10 @@
-from tensorflow.contrib.keras.api import keras
-from tensorflow.contrib import lite
-
+import tensorflow as tf
+from tensorflow import keras
 keras_file = "mobilenetv2.hdf5"
 keras.models.load_model(keras_file)
 
 h5_model = keras.models.load_model(keras_file)
-converter = lite.TocoConverter.from_keras_model_file(keras_file)
+converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_file)
 
 tflite_model = converter.convert()
 with open('mobilenetv2.tflite', 'wb') as f:
diff --git a/ShowIncorrectImages.py b/ShowIncorrectImages.py
index 79b75a1..9838280 100644
--- a/ShowIncorrectImages.py
+++ b/ShowIncorrectImages.py
@@ -1,8 +1,6 @@
 import pandas as pd
-import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.image as mpimg
-from pprint import pprint
 
 df = pd.read_csv("sub1_non_transfer.csv")
 df2 = pd.read_csv("poke_evos.csv")