Updates to all parts of model building - moving to frozen transfer learning followed by slowed learning rate fine tuning using EfficientNets for final model.
This commit is contained in:
+26
-14
@@ -3,9 +3,10 @@ from enum import Enum
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from PIL import ImageFile
|
from PIL import ImageFile
|
||||||
from tensorflow import keras
|
|
||||||
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
||||||
from model_builder import ImageClassModelBuilder, ImageClassModels
|
from tensorflow import keras
|
||||||
|
|
||||||
|
from modeling_utils import ImageClassModelBuilder, ImageClassModels
|
||||||
|
|
||||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||||
|
|
||||||
@@ -52,20 +53,18 @@ def get_gen(path, dataset_type: DatasetType = DatasetType.TRAIN):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def train_model(model_builder, train_gen, val_gen):
|
def train_model(model, model_name, train_gen, val_gen):
|
||||||
model = model_builder.create_model()
|
|
||||||
model_name = model_builder.get_name()
|
|
||||||
print(model)
|
print(model)
|
||||||
print(f"NOW TRAINING: {model_name}")
|
print(f"NOW TRAINING: {model_name}")
|
||||||
checkpoint = keras.callbacks.ModelCheckpoint(
|
checkpoint = keras.callbacks.ModelCheckpoint(
|
||||||
f"./models/keras/{model_name}.hdf5",
|
f"./models/keras/{model_name}.hdf5",
|
||||||
monitor='val_loss',
|
monitor='val_categorical_crossentropy',
|
||||||
verbose=1,
|
verbose=1,
|
||||||
save_best_only=True,
|
save_best_only=True,
|
||||||
mode='min'
|
mode='min'
|
||||||
)
|
)
|
||||||
early = keras.callbacks.EarlyStopping(
|
early = keras.callbacks.EarlyStopping(
|
||||||
monitor="val_loss",
|
monitor="val_categorical_crossentropy",
|
||||||
mode="auto",
|
mode="auto",
|
||||||
patience=4,
|
patience=4,
|
||||||
restore_best_weights=True,
|
restore_best_weights=True,
|
||||||
@@ -80,10 +79,10 @@ def train_model(model_builder, train_gen, val_gen):
|
|||||||
profile_batch=2,
|
profile_batch=2,
|
||||||
embeddings_freq=1,
|
embeddings_freq=1,
|
||||||
)
|
)
|
||||||
history = model.fit(
|
model.fit(
|
||||||
train_gen,
|
train_gen,
|
||||||
validation_data=val_gen,
|
validation_data=val_gen,
|
||||||
epochs=8,
|
epochs=100,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
shuffle=True,
|
shuffle=True,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
@@ -91,7 +90,6 @@ def train_model(model_builder, train_gen, val_gen):
|
|||||||
callbacks=[checkpoint, early, tensorboard],
|
callbacks=[checkpoint, early, tensorboard],
|
||||||
max_queue_size=1000
|
max_queue_size=1000
|
||||||
)
|
)
|
||||||
print(history)
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
@@ -119,7 +117,6 @@ def test_model(model, test_gen):
|
|||||||
print("made dataframe")
|
print("made dataframe")
|
||||||
plt.figure(figsize=(10, 7))
|
plt.figure(figsize=(10, 7))
|
||||||
print("made plot")
|
print("made plot")
|
||||||
# sn.heatmap(df_cm, annot=True)
|
|
||||||
print("showing plot")
|
print("showing plot")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
@@ -131,15 +128,30 @@ if __name__ == "__main__":
|
|||||||
n_classes=807,
|
n_classes=807,
|
||||||
optimizer=keras.optimizers.Adam(learning_rate=.0001),
|
optimizer=keras.optimizers.Adam(learning_rate=.0001),
|
||||||
pre_trained=True,
|
pre_trained=True,
|
||||||
fine_tune=True,
|
freeze_layers=True,
|
||||||
base_model_type=ImageClassModels.MOBILENET_V2,
|
freeze_batch_norm=True,
|
||||||
|
base_model_type=ImageClassModels.EFFICIENTNET_V2B0,
|
||||||
dense_layer_neurons=1024,
|
dense_layer_neurons=1024,
|
||||||
dropout_rate=.5,
|
dropout_rate=.5,
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
for mb in model_builders:
|
for mb in model_builders:
|
||||||
|
model = mb.create_model()
|
||||||
|
model_name = mb.get_name()
|
||||||
train_gen = get_gen('./data/train', dataset_type=DatasetType.TRAIN)
|
train_gen = get_gen('./data/train', dataset_type=DatasetType.TRAIN)
|
||||||
val_gen = get_gen('./data/val', dataset_type=DatasetType.VAL)
|
val_gen = get_gen('./data/val', dataset_type=DatasetType.VAL)
|
||||||
test_gen = get_gen('./data/test', dataset_type=DatasetType.TEST)
|
test_gen = get_gen('./data/test', dataset_type=DatasetType.TEST)
|
||||||
model = train_model(mb, train_gen, val_gen)
|
model = train_model(model, model_name, train_gen, val_gen)
|
||||||
|
for layer in model.layers[2].layers:
|
||||||
|
if not isinstance(layer, keras.layers.BatchNormalization):
|
||||||
|
layer.trainable = True
|
||||||
|
model.layers[2].trainable = True
|
||||||
|
print(model)
|
||||||
|
model.compile(
|
||||||
|
optimizer=keras.optimizers.Adam(learning_rate=.00001),
|
||||||
|
loss=keras.losses.CategoricalCrossentropy(),
|
||||||
|
metrics=['accuracy', 'categorical_crossentropy']
|
||||||
|
)
|
||||||
|
model.summary()
|
||||||
|
model = train_model(model, model_name + "-second_stage", train_gen, val_gen)
|
||||||
test_model(model, test_gen)
|
test_model(model, test_gen)
|
||||||
|
|||||||
+35
-47
@@ -1,76 +1,64 @@
|
|||||||
import pandas as pd
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from keras.preprocessing.image import ImageDataGenerator
|
|
||||||
from keras.models import load_model
|
|
||||||
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
|
||||||
from glob import glob
|
from glob import glob
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
from PIL import ImageFile
|
from PIL import ImageFile
|
||||||
|
from keras.models import load_model
|
||||||
|
from keras.preprocessing.image import ImageDataGenerator
|
||||||
|
|
||||||
|
from modeling_utils import get_metrics
|
||||||
|
|
||||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||||
|
|
||||||
|
|
||||||
accuracies = []
|
accuracies = []
|
||||||
losses = []
|
losses = []
|
||||||
filenames = []
|
filenames = []
|
||||||
|
|
||||||
input_shape = (224, 224, 3)
|
input_shape = (224, 224, 3)
|
||||||
batch_size = 32
|
batch_size = 32
|
||||||
|
metrics_df = pd.read_csv("all_model_output.csv")
|
||||||
|
|
||||||
test_gen = ImageDataGenerator().flow_from_directory(
|
test_gen = ImageDataGenerator().flow_from_directory(
|
||||||
'./data/test',
|
'./data/test',
|
||||||
# './single_image_test_set',
|
target_size=(input_shape[0], input_shape[1]),
|
||||||
|
batch_size=batch_size,
|
||||||
|
shuffle=False
|
||||||
|
)
|
||||||
|
#
|
||||||
|
single_gen = ImageDataGenerator().flow_from_directory(
|
||||||
|
'./single_image_test_set',
|
||||||
target_size=(input_shape[0], input_shape[1]),
|
target_size=(input_shape[0], input_shape[1]),
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
shuffle=False
|
shuffle=False
|
||||||
)
|
)
|
||||||
|
|
||||||
for file in glob("./models/keras/*"):
|
|
||||||
filenames.append(file)
|
for file in glob("./models/keras/*.hdf5"):
|
||||||
|
|
||||||
print(file)
|
print(file)
|
||||||
|
print(metrics_df["model"])
|
||||||
|
if file in metrics_df.values:
|
||||||
|
continue
|
||||||
model = load_model(file)
|
model = load_model(file)
|
||||||
|
test_acc, test_ll = get_metrics(test_gen, model)
|
||||||
|
single_acc, single_ll = get_metrics(single_gen, model, file[:-5] + ".csv")
|
||||||
|
metrics_df = metrics_df.append({
|
||||||
|
"model": file,
|
||||||
|
"test_acc": test_acc,
|
||||||
|
"test_loss": test_ll,
|
||||||
|
"single_acc": single_acc,
|
||||||
|
"single_loss": single_ll,
|
||||||
|
}, ignore_index=True)
|
||||||
|
|
||||||
predictions = model.predict(test_gen, verbose=True, workers=12)
|
|
||||||
|
|
||||||
print(predictions)
|
|
||||||
print(type(predictions))
|
|
||||||
print(predictions.shape)
|
|
||||||
|
|
||||||
# Process the predictions
|
|
||||||
predictions = np.argmax(predictions,
|
|
||||||
axis=1)
|
|
||||||
label_index = {v: k for k, v in test_gen.class_indices.items()}
|
|
||||||
predictions = [label_index[p] for p in predictions]
|
|
||||||
reals = [label_index[p] for p in test_gen.classes]
|
|
||||||
|
|
||||||
# Save the results
|
# Save the results
|
||||||
print(label_index)
|
|
||||||
print(test_gen.classes)
|
|
||||||
print(test_gen.classes.shape)
|
|
||||||
print(type(test_gen.classes))
|
|
||||||
df = pd.DataFrame(columns=['fname', 'prediction', 'true_val'])
|
|
||||||
df['fname'] = [x for x in test_gen.filenames]
|
|
||||||
df['prediction'] = predictions
|
|
||||||
df["true_val"] = reals
|
|
||||||
df.to_csv("sub1_non_transfer.csv", index=False)
|
|
||||||
|
|
||||||
# Processed the saved results
|
metrics_df.to_csv("all_model_output.csv", index=False)
|
||||||
|
print(metrics_df)
|
||||||
acc = accuracy_score(reals, predictions)
|
metrics_df = metrics_df.sort_values('single_acc')
|
||||||
conf_mat = confusion_matrix(reals, predictions)
|
metrics_df.plot.bar(y=["test_acc", "single_acc"], rot=90)
|
||||||
print(classification_report(reals, predictions, labels=[l for l in label_index.values()]))
|
metrics_df = metrics_df.sort_values('test_acc')
|
||||||
print("Testing accuracy score is ", acc)
|
metrics_df.plot.bar(y=["test_acc", "single_acc"], rot=90)
|
||||||
print("Confusion Matrix", conf_mat)
|
|
||||||
|
|
||||||
accuracies.append(acc)
|
|
||||||
|
|
||||||
overall_df = pd.DataFrame(list(zip(filenames, accuracies)),
|
|
||||||
columns =['model', 'acc']).sort_values('acc')
|
|
||||||
|
|
||||||
print(overall_df)
|
|
||||||
overall_df.plot.bar(y="acc", rot=90)
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show()
|
plt.show()
|
||||||
overall_df.to_csv("all_model_output.csv")
|
|
||||||
|
|||||||
@@ -0,0 +1,41 @@
|
|||||||
|
import os
|
||||||
|
from glob import glob
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import tensorflow as tf
|
||||||
|
from keras.preprocessing.image import ImageDataGenerator
|
||||||
|
from tensorflow import keras
|
||||||
|
|
||||||
|
from modeling_utils import get_metrics
|
||||||
|
|
||||||
|
# TODO: Move these to a config for the project
|
||||||
|
input_shape = (224, 224, 3)
|
||||||
|
batch_size = 32
|
||||||
|
|
||||||
|
single_gen = ImageDataGenerator().flow_from_directory(
|
||||||
|
'./single_image_test_set',
|
||||||
|
target_size=(input_shape[0], input_shape[1]),
|
||||||
|
batch_size=batch_size,
|
||||||
|
shuffle=False
|
||||||
|
)
|
||||||
|
|
||||||
|
pd.DataFrame(sorted([f.name for f in os.scandir("./data/train") if f.is_dir()])).to_csv("./models/tflite/labels.txt",
|
||||||
|
index=False, header=False)
|
||||||
|
|
||||||
|
for file in glob("./models/keras/*.hdf5"):
|
||||||
|
path = Path(file)
|
||||||
|
tflite_file = f'./models/tflite/models/{path.name[:-5] + ".tflite"}'
|
||||||
|
if not Path(tflite_file).exists():
|
||||||
|
keras_model = keras.models.load_model(file)
|
||||||
|
|
||||||
|
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
|
||||||
|
tflite_model = converter.convert()
|
||||||
|
with open(tflite_file, 'wb') as f:
|
||||||
|
f.write(tflite_model)
|
||||||
|
# interpreter = tf.lite.Interpreter(model_path=tflite_file)
|
||||||
|
# single_acc, single_ll = get_metrics(single_gen, keras_model)
|
||||||
|
# tf_single_acc, tf_single_ll = get_metrics(single_gen, tflite_model)
|
||||||
|
#
|
||||||
|
# print(single_acc, tf_single_acc)
|
||||||
|
# print(single_ll, tf_single_ll)
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
model,test_acc,test_loss,single_acc,single_loss
|
||||||
|
./models/keras\pt-fl-fbn-efficientnet_v2b0-d1024-do0.5-l11.e-04-l21.e-04-5224-second_stage.hdf5,0.6720150708068079,1.7423864365349095,0.9893048128342246,0.4364729183409372
|
||||||
|
./models/keras\pt-fl-fbn-efficientnet_v2b0-d1024-do0.5-l11.e-04-l21.e-04-5224.hdf5,0.410029881772119,3.346152696366266,0.986096256684492,0.3234976000776315
|
||||||
|
@@ -1,11 +0,0 @@
|
|||||||
import tensorflow as tf
|
|
||||||
from tensorflow import keras
|
|
||||||
keras_file = "mobilenetv2.hdf5"
|
|
||||||
keras.models.load_model(keras_file)
|
|
||||||
|
|
||||||
h5_model = keras.models.load_model(keras_file)
|
|
||||||
converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_file)
|
|
||||||
|
|
||||||
tflite_model = converter.convert()
|
|
||||||
with open('mobilenetv2.tflite', 'wb') as f:
|
|
||||||
f.write(tflite_model)
|
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import os
|
import os
|
||||||
|
|
||||||
pd.DataFrame(sorted([f.name for f in os.scandir("./data/train") if f.is_dir()])).to_csv("labels.txt", index=False, header=False)
|
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
from .image_class_builder import ImageClassModelBuilder, ImageClassModels
|
from .image_class_builder import ImageClassModelBuilder, ImageClassModels
|
||||||
|
from .model_testing import get_metrics
|
||||||
@@ -25,6 +25,17 @@ class ImageClassModels(Enum):
|
|||||||
keras.applications.mobilenet_v2.preprocess_input,
|
keras.applications.mobilenet_v2.preprocess_input,
|
||||||
"mobilenet_v2"
|
"mobilenet_v2"
|
||||||
)
|
)
|
||||||
|
EFFICIENTNET_V2S = ModelWrapper(
|
||||||
|
keras.applications.efficientnet_v2.EfficientNetV2S,
|
||||||
|
tf.keras.applications.efficientnet_v2.preprocess_input,
|
||||||
|
"efficientnet_v2s"
|
||||||
|
)
|
||||||
|
EFFICIENTNET_V2B0 = ModelWrapper(
|
||||||
|
keras.applications.efficientnet_v2.EfficientNetV2B0,
|
||||||
|
tf.keras.applications.efficientnet_v2.preprocess_input,
|
||||||
|
"efficientnet_v2b0"
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ImageClassModelBuilder(object):
|
class ImageClassModelBuilder(object):
|
||||||
@@ -35,7 +46,8 @@ class ImageClassModelBuilder(object):
|
|||||||
optimizer: tf.keras.optimizers.Optimizer = keras.optimizers.Adam(
|
optimizer: tf.keras.optimizers.Optimizer = keras.optimizers.Adam(
|
||||||
learning_rate=.0001),
|
learning_rate=.0001),
|
||||||
pre_trained: bool = True,
|
pre_trained: bool = True,
|
||||||
fine_tune: bool = False,
|
freeze_batch_norm: bool = False,
|
||||||
|
freeze_layers: bool = False,
|
||||||
base_model_type: ImageClassModels = ImageClassModels.MOBILENET_V2,
|
base_model_type: ImageClassModels = ImageClassModels.MOBILENET_V2,
|
||||||
dense_layer_neurons: int = 1024,
|
dense_layer_neurons: int = 1024,
|
||||||
dropout_rate: float = .5,
|
dropout_rate: float = .5,
|
||||||
@@ -45,7 +57,8 @@ class ImageClassModelBuilder(object):
|
|||||||
self.n_classes = n_classes
|
self.n_classes = n_classes
|
||||||
self.optimizer = optimizer
|
self.optimizer = optimizer
|
||||||
self.pre_trained = pre_trained
|
self.pre_trained = pre_trained
|
||||||
self.fine_tune = fine_tune
|
self.freeze_layers = freeze_layers
|
||||||
|
self.freeze_batch_norm = freeze_batch_norm
|
||||||
self.dense_layer_neurons = dense_layer_neurons
|
self.dense_layer_neurons = dense_layer_neurons
|
||||||
self.dropout_rate = dropout_rate
|
self.dropout_rate = dropout_rate
|
||||||
self.l1 = l1
|
self.l1 = l1
|
||||||
@@ -61,8 +74,12 @@ class ImageClassModelBuilder(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def create_model(self):
|
def create_model(self):
|
||||||
if not self.fine_tune:
|
if self.freeze_layers:
|
||||||
self.base_model.trainable = False
|
self.base_model.trainable = False
|
||||||
|
if self.freeze_batch_norm:
|
||||||
|
for layer in self.base_model.layers:
|
||||||
|
if isinstance(layer, keras.layers.BatchNormalization):
|
||||||
|
layer.trainable = False
|
||||||
i = tf.keras.layers.Input([self.input_shape[0], self.input_shape[1], self.input_shape[2]], dtype=tf.float32)
|
i = tf.keras.layers.Input([self.input_shape[0], self.input_shape[1], self.input_shape[2]], dtype=tf.float32)
|
||||||
x = tf.cast(i, tf.float32)
|
x = tf.cast(i, tf.float32)
|
||||||
x = self.base_model_type.value.model_preprocessor(x)
|
x = self.base_model_type.value.model_preprocessor(x)
|
||||||
@@ -93,7 +110,7 @@ class ImageClassModelBuilder(object):
|
|||||||
return self.model
|
return self.model
|
||||||
|
|
||||||
def get_name(self):
|
def get_name(self):
|
||||||
return f"{'pt-' if self.pre_trained else ''}{'ft-' if self.fine_tune else ''}" \
|
return f"{'pt-' if self.pre_trained else ''}{'fl-' if self.freeze_layers else ''}{'fbn-' if self.freeze_batch_norm else ''}" \
|
||||||
f"{self.base_model_type.value.name}-d{self.dense_layer_neurons}-do{self.dropout_rate}" \
|
f"{self.base_model_type.value.name}-d{self.dense_layer_neurons}-do{self.dropout_rate}" \
|
||||||
f"{'-l1' + np.format_float_scientific(self.l1) if self.l1 > 0 else ''}{'-l2' + np.format_float_scientific(self.l2) if self.l2 > 0 else ''}" \
|
f"{'-l1' + np.format_float_scientific(self.l1) if self.l1 > 0 else ''}{'-l2' + np.format_float_scientific(self.l2) if self.l2 > 0 else ''}" \
|
||||||
f"-{random.randint(1111, 9999)}"
|
f"-{random.randint(1111, 9999)}"
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
import numpy as np
|
||||||
|
from sklearn.metrics import accuracy_score, confusion_matrix, log_loss
|
||||||
|
|
||||||
|
|
||||||
|
def get_metrics(gen, model, save_predictions_file=None):
|
||||||
|
model_output = model.predict(gen, verbose=True, workers=12)
|
||||||
|
prediction_indices = np.argmax(model_output, axis=1)
|
||||||
|
label_index = {v: k for k, v in gen.class_indices.items()}
|
||||||
|
predictions = [label_index[p] for p in prediction_indices]
|
||||||
|
reals = [label_index[p] for p in gen.classes]
|
||||||
|
acc = accuracy_score(reals, predictions)
|
||||||
|
ll = log_loss(gen.classes, model_output, labels=[l for l in label_index.keys()])
|
||||||
|
conf_mat = confusion_matrix(reals, predictions, labels=[l for l in label_index.values()])
|
||||||
|
# print(classification_report(reals, predictions, labels=[l for l in label_index.values()]))
|
||||||
|
print("Testing accuracy score is ", acc)
|
||||||
|
print("Confusion Matrix", conf_mat)
|
||||||
|
if save_predictions_file:
|
||||||
|
df = pd.DataFrame(columns=['fname', 'prediction', 'true_val'])
|
||||||
|
df['fname'] = [x for x in gen.filenames]
|
||||||
|
df['prediction'] = predictions
|
||||||
|
df["true_val"] = reals
|
||||||
|
df.to_csv(save_predictions_file, index=False)
|
||||||
|
return acc, ll
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
import pandas as pd
|
||||||
|
df = pd.read_csv("models/keras/pt-fl-fbn-efficientnet_v2b0-d1024-do0.5-l11.e-04-l21.e-04-5224-second_stage.csv")
|
||||||
|
|
||||||
|
print(df.loc[df["prediction"] != df["true_val"]])
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 64 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 41 KiB |
Reference in New Issue
Block a user