Updates to all parts of model building - moving to frozen transfer learning followed by slowed learning rate fine tuning using EfficientNets for final model.

2022-06-03 13:44:34 -04:00
parent 755fcde3a9
commit d111cdae69
13 changed files with 156 additions and 79 deletions
@@ -1,76 +1,64 @@
-import pandas as pd
-import matplotlib.pyplot as plt
-import numpy as np
-
-from keras.preprocessing.image import ImageDataGenerator
-from keras.models import load_model
-from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
 from glob import glob

+import matplotlib.pyplot as plt
+import pandas as pd
 from PIL import ImageFile
+from keras.models import load_model
+from keras.preprocessing.image import ImageDataGenerator
+
+from modeling_utils import get_metrics

 ImageFile.LOAD_TRUNCATED_IMAGES = True

-
 accuracies = []
 losses = []
 filenames = []

 input_shape = (224, 224, 3)
 batch_size = 32
+metrics_df = pd.read_csv("all_model_output.csv")

 test_gen = ImageDataGenerator().flow_from_directory(
    './data/test',
-    # './single_image_test_set',
+    target_size=(input_shape[0], input_shape[1]),
+    batch_size=batch_size,
+    shuffle=False
+)
+#
+single_gen = ImageDataGenerator().flow_from_directory(
+    './single_image_test_set',
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    shuffle=False
 )

-for file  in glob("./models/keras/*"):
-    filenames.append(file)
+
+for file in glob("./models/keras/*.hdf5"):
+
    print(file)
+    print(metrics_df["model"])
+    if file in metrics_df.values:
+        continue
    model = load_model(file)
+    test_acc, test_ll = get_metrics(test_gen, model)
+    single_acc, single_ll = get_metrics(single_gen, model, file[:-5] + ".csv")
+    metrics_df = metrics_df.append({
+        "model": file,
+        "test_acc": test_acc,
+        "test_loss": test_ll,
+        "single_acc": single_acc,
+        "single_loss": single_ll,
+    }, ignore_index=True)

-    predictions = model.predict(test_gen, verbose=True, workers=12)

-    print(predictions)
-    print(type(predictions))
-    print(predictions.shape)

-    # Process the predictions
-    predictions = np.argmax(predictions,
-                            axis=1)
-    label_index = {v: k for k, v in test_gen.class_indices.items()}
-    predictions = [label_index[p] for p in predictions]
-    reals = [label_index[p] for p in test_gen.classes]
+# Save the results

-    # Save the results
-    print(label_index)
-    print(test_gen.classes)
-    print(test_gen.classes.shape)
-    print(type(test_gen.classes))
-    df = pd.DataFrame(columns=['fname', 'prediction', 'true_val'])
-    df['fname'] = [x for x in test_gen.filenames]
-    df['prediction'] = predictions
-    df["true_val"] = reals
-    df.to_csv("sub1_non_transfer.csv", index=False)
-
-    # Processed the saved results
-
-    acc = accuracy_score(reals, predictions)
-    conf_mat = confusion_matrix(reals, predictions)
-    print(classification_report(reals, predictions, labels=[l for l in label_index.values()]))
-    print("Testing accuracy score is ", acc)
-    print("Confusion Matrix", conf_mat)
-
-    accuracies.append(acc)
-
-overall_df = pd.DataFrame(list(zip(filenames, accuracies)),
-               columns =['model', 'acc']).sort_values('acc')
-
-print(overall_df)
-overall_df.plot.bar(y="acc", rot=90)
+metrics_df.to_csv("all_model_output.csv", index=False)
+print(metrics_df)
+metrics_df = metrics_df.sort_values('single_acc')
+metrics_df.plot.bar(y=["test_acc", "single_acc"], rot=90)
+metrics_df = metrics_df.sort_values('test_acc')
+metrics_df.plot.bar(y=["test_acc", "single_acc"], rot=90)
 plt.tight_layout()
 plt.show()
-overall_df.to_csv("all_model_output.csv")