moving to Tensorflow 2.0 and changing the test train split to use symlinks.

2022-05-18 18:37:29 -04:00
parent e1e3319653
commit 5990351a47
8 changed files with 98 additions and 83 deletions
@@ -59,8 +59,6 @@ def process_pokemon_names(df):
    return pokes_to_limits
 import os
 def get_images_for_pokemon(poke_to_limit):
    pokemon = poke_to_limit[0]
@@ -69,7 +67,7 @@ def get_images_for_pokemon(poke_to_limit):
    response.download(
        {
            "keywords": pokemon + " pokemon",
-            "limit": 1,#limit,
+            "limit": limit,
            "chromedriver": "chromedriver"
            # Add chromedriver to your path or just point this var directly to your chromedriverv
        }
@@ -40,8 +40,7 @@ def get_frames_from_gif(infile):
    try:
        im = Image.open(infile)
    except IOError:
-        print
+        print("Cant load", infile)
        "Cant load", infile
        sys.exit(1)
    iterator = 0
@@ -89,7 +88,7 @@ def clean_image(file_root):
    mutex.acquire()
    global i
    i += 1
-    if i % 1 == 0:
+    if i % 100 == 0:
        print("changing type" + str(i))
    mutex.release()
@@ -1,14 +1,15 @@
 import os
 from random import random
 from shutil import copyfile, rmtree
 from pathlib import Path
 import multiprocessing
 train_dir = "./data/train/"
 test_dir = "./data/test/"
 val_dir = "./data/val/"
 train = .80
-test = .15
+test = .10
-val = .05
+val = .10
 def add_train_data(file, filename, label):
@@ -19,7 +20,7 @@ def add_train_data(file, filename, label):
        except Exception as e:
            print(e)
    try:
-        copyfile(file, dest)
+        Path(dest).absolute().symlink_to(Path(file).absolute())
    except Exception as e:
        print(e)
        print("INVALID FILE")
@@ -33,7 +34,8 @@ def add_val_data(file, filename, label):
            os.makedirs(os.path.dirname(dest))
        except Exception as e:
            print(e)
-    copyfile(file, dest)
+
    Path(dest).absolute().symlink_to(Path(file).absolute())
 def add_test_data(file, filename, label):
@@ -43,7 +45,8 @@ def add_test_data(file, filename, label):
            os.makedirs(os.path.dirname(dest))
        except Exception as e:
            print(e)
-    copyfile(file, dest)
+
    Path(dest).absolute().symlink_to(Path(file).absolute())
 def remove_previous():
@@ -62,7 +65,7 @@ def test_split_file(file_root):
    file = file_root[1]
    # print(file)
-    if file is ".DS_Store":
+    if file == ".DS_Store":
        return
    c = random()
@@ -98,7 +98,6 @@ add_model.add(GlobalAveragePooling2D())
 add_model.add(Dense(2024, activation='relu'))
 # Adding some dense layers in order to learn complex functions from the base model
 # Potentially throw another dropout layer here if you seem to be overfitting your
 add_model.add(Dropout(0.5))
 add_model.add(Dense(512, activation='relu'))
 add_model.add(Dense(len(train_gen.class_indices), activation='softmax'))  # Decision layer
@@ -109,12 +108,11 @@ model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(lr=1e-4),
              metrics=['accuracy'])
 model.summary()
-
+print(
-
+    model.output_shape
-
+)
 # Now that the model is created we can go ahead and train on it using the image generators we created earlier
 file_path = model_name + ".hdf5"
 checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
@@ -143,8 +141,6 @@ history = model.fit_generator(
 )
 # Finally we are going to grab predictions from our model, save it, and then run some analysis on the results
 predicts = model.predict_generator(test_gen, verbose=True, workers=1, steps=len(test_gen))
@@ -1,117 +1,138 @@
 import pandas as pd
 import numpy as np
 import seaborn as sn
 import matplotlib.pyplot as plt
 from time import time
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import seaborn as sn
 from PIL import ImageFile
 from tensorflow import keras
 ImageFile.LOAD_TRUNCATED_IMAGES = True
-input_shape = (244, 244, 3)
+input_shape = (224, 224, 3)
-batch_size = 60
+batch_size = 64
-model_name = "MobileNetV2FullDataset"
+model_name = "TF2_Mobilenet_V2_transfer"
-from keras.preprocessing.image import ImageDataGenerator
+# preproc = keras.applications.inception_v3.preprocess_input
-from keras.applications.inception_v3 import preprocess_input
+preproc = keras.applications.mobilenet_v2.preprocess_input
-train_idg = ImageDataGenerator(
+train_idg = keras.preprocessing.image.ImageDataGenerator(
-    # horizontal_flip=True,
+    horizontal_flip=True,
-    preprocessing_function=preprocess_input
+    rescale=1. / 255,
    # rotation_range=30,
    # width_shift_range=[-.1, .1],
    # height_shift_range=[-.1, .1],
    # preprocessing_function=preproc
 )
 train_gen = train_idg.flow_from_directory(
-    './data/train',
+    './downloads',
    target_size=(input_shape[0], input_shape[1]),
-    batch_size=batch_size
+    batch_size=batch_size,
-)
+    class_mode='categorical',
-val_idg = ImageDataGenerator(
+    shuffle=True,
-    # horizontal_flip=True,
+    color_mode='rgb'
    preprocessing_function=preprocess_input
 )
 val_idg = keras.preprocessing.image.ImageDataGenerator(
    horizontal_flip=True,
    rescale=1. / 255,
    # rotation_range=30,
    # width_shift_range=[-.1, .1],
    # height_shift_range=[-.1, .1],
    # preprocessing_function=keras.applications.mobilenet_v2.preprocess_input
 )
 val_gen = val_idg.flow_from_directory(
    './data/val',
    target_size=(input_shape[0], input_shape[1]),
-    batch_size=batch_size
+    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True,
 )
-from keras.applications import inception_v3, mobilenet_v2, vgg16
+print((val_gen.classes))
 from keras.models import Sequential
 from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
 from keras import optimizers
 from keras.layers import Dense, Dropout, GlobalAveragePooling2D
 nclass = len(train_gen.class_indices)
 print(nclass)
 # for _ in range(5):
 #     img, label = train_gen.next()
 #     print(img.shape)  # (1,256,256,3)
 #     plt.imshow(img[0])
 #     plt.show()
 # plt.imshow(
 # base_model = vgg16.VGG16(
 #     weights='imagenet',
 #     include_top=False,
 #     input_shape=input_shape
 # )
-# base_model = inception_v3.InceptionV3(
+# base_model = keras.applications.InceptionV3(
 #     weights='imagenet',
 #     include_top=False,
 #     input_shape=input_shape
 # )
 # base_model = keras.applications.xception.Xception(
 #     weights='imagenet',
 #     include_top=False,
 #     input_shape=input_shape
 # )
-base_model = mobilenet_v2.MobileNetV2(
+base_model = keras.applications.mobilenet_v2.MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=input_shape
 )
 base_model.trainable = False
 # i = keras.layers.Input([input_shape[0], input_shape[1], input_shape[2]])
 i = base_model.input
 # x = preproc(i)
 # x = base_model
 x = keras.layers.GlobalAveragePooling2D()(base_model.output)
 x = keras.layers.Dense(1024, activation='relu')(x)
 x = keras.layers.Dropout(0.5)(x)
 output = keras.layers.Dense(nclass, activation='softmax')(x)
-add_model = Sequential()
+model = keras.Model(inputs=i, outputs=output)
 add_model.add(base_model)
 add_model.add(GlobalAveragePooling2D())
 add_model.add(Dropout(0.5))
 add_model.add(Dense(1024, activation='relu'))
 # Adding some dense layers in order to learn complex functions from the base model
 add_model.add(Dropout(0.5))
 add_model.add(Dense(512, activation='relu'))
 add_model.add(Dense(nclass, activation='softmax'))  # Decision layer
-model = add_model
+model.compile(optimizer=keras.optimizers.Adam(learning_rate=.0001),
-model.compile(loss='categorical_crossentropy',
+              loss=keras.losses.CategoricalCrossentropy(),
              # optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              optimizer=optimizers.Adam(lr=1e-4),
              metrics=['accuracy'])
 model.summary()
 print(model.output_shape)
 # Train the model
 file_path = "weights.mobilenet.best.hdf5"
-checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
+checkpoint = keras.callbacks.ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True,
                                             mode='min')
-early = EarlyStopping(monitor="val_acc", mode="max", patience=15)
+early = keras.callbacks.EarlyStopping(monitor="loss", mode="min", patience=15)
-tensorboard = TensorBoard(
+tensorboard = keras.callbacks.TensorBoard(
-    log_dir="logs/" + model_name + "{}".format(time()), histogram_freq=0, batch_size=batch_size,
+    log_dir="logs/" + model_name + "{}".format(time()),
    histogram_freq=1,
    write_graph=True,
    write_grads=True,
    write_images=True,
-    update_freq=batch_size
+    update_freq=1,
-
+    profile_batch=2,
    embeddings_freq=1
 )
 callbacks_list = [checkpoint, early, tensorboard]  # early
-history = model.fit_generator(
+history = model.fit(
    train_gen,
    steps_per_epoch=len(train_gen),
    validation_data=val_gen,
-    validation_steps=len(val_gen),
+    epochs=20,
-    epochs=5,
+    batch_size=batch_size,
    shuffle=True,
    verbose=True,
    callbacks=callbacks_list
 )
 # Create Test generator
-test_idg = ImageDataGenerator(
+test_idg = keras.preprocessing.image.ImageDataGenerator(
-    preprocessing_function=preprocess_input,
+    rescale=1. / 255,
 )
 test_gen = test_idg.flow_from_directory(
@@ -128,7 +149,6 @@ score = model.evaluate_generator(test_gen, workers=1, steps=len(test_gen))
 # predicts
 predicts = model.predict_generator(test_gen, verbose=True, workers=1, steps=len(test_gen))
 print("Loss: ", score[0], "Accuracy: ", score[1])
 print(score)
@@ -59,7 +59,7 @@ df.to_csv("sub1_non_transfer.csv", index=False)
 acc = accuracy_score(reals, predicts)
 conf_mat = confusion_matrix(reals, predicts)
-print(classification_report(reals, predicts, [l for l in label_index.values()]))
+print(classification_report(reals, predicts, labels=[l for l in label_index.values()]))
 print("Testing accuracy score is ", acc)
 print("Confusion Matrix", conf_mat)
@@ -67,7 +67,9 @@ df_cm = pd.DataFrame(conf_mat, index=[i for i in list(set(reals))],
                     columns=[i for i in list(set(reals))])
 print("made dataframe")
 plt.figure(figsize=(10, 7))
-sn.heatmap(df_cm, annot=True)
+print("made plot")
 # sn.heatmap(df_cm, annot=True)
 print("showing plot")
 plt.show()
 with open("labels.txt", "w") as f:
@@ -1,11 +1,10 @@
-from tensorflow.contrib.keras.api import keras
+import tensorflow as tf
-from tensorflow.contrib import lite
+from tensorflow import keras
 keras_file = "mobilenetv2.hdf5"
 keras.models.load_model(keras_file)
 h5_model = keras.models.load_model(keras_file)
-converter = lite.TocoConverter.from_keras_model_file(keras_file)
+converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_file)
 tflite_model = converter.convert()
 with open('mobilenetv2.tflite', 'wb') as f:
@@ -1,8 +1,6 @@
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.image as mpimg
 from pprint import pprint
 df = pd.read_csv("sub1_non_transfer.csv")
 df2 = pd.read_csv("poke_evos.csv")