Made all processes threaded. Also changed up the neural network to hopefully better support the higher class load

2019-04-26 02:57:18 -05:00
parent 493f5cfb25
commit 24e8d71306
4 changed files with 160 additions and 91 deletions
@@ -3,23 +3,26 @@ import multiprocessing
 from google_images_download import google_images_download
 df = pd.read_csv("pokemon.csv")
 response = google_images_download.googleimagesdownload()
 def get_images_for_pokemon(pokemon):
    response = google_images_download.googleimagesdownload()
    response.download(
        {
-            "keywords": pokemon,# + " pokemon",
+            "keywords": pokemon + " pokemon",
            "limit": 250,
            "chromedriver": "chromedriver",
-            "thumbnail":True
+            "thumbnail": True
-        #     Add chromedriver to your path or just point this var directly to your chromedriver
+            #     Add chromedriver to your path or just point this var directly to your chromedriverv
        }
    )
 pool = multiprocessing.Pool(multiprocessing.cpu_count()*4)
-pool.map(get_images_for_pokemon, df["identifier"][:490])
+    # freeze_support()
    df = pd.read_csv("pokemon.csv")
    pool = multiprocessing.Pool(multiprocessing.cpu_count()*3)
    fixes = []
    pool.map(get_images_for_pokemon, [fixes])#df["identifier"]
    # for pokemon in df["identifier"][:490]:
    #     get_images_for_pokemon(pokemon)
@@ -9,10 +9,12 @@ import imghdr
 import PIL
 from PIL import Image
 import sys
 import multiprocessing
 from threading import Thread, Lock
 directory = "downloads"
 def random_with_N_digits(n):
    range_start = 10 ** (n - 1)
    range_end = (10 ** n) - 1
@@ -26,12 +28,13 @@ def change_file_extension(file_obj, extension):
    elif not os.path.isfile(file_obj + extension):
        new_file = file_obj + extension
    else:
-        print(f"Found {extension} hiding as JPEG but couldn't rename:", file_obj)
+        # print(f"Found {extension} hiding as JPEG but couldn't rename:", file_obj)
        return
    print(f"Found {extension} hiding as JPEG, renaming:", file_obj, '->', new_file)
-    subprocess.run(['mv', file_obj, new_file])
+    # subprocess.run(['mv', file_obj, new_file])
    os.rename(file_obj, new_file)
 def get_frames_from_gif(infile):
@@ -42,8 +45,7 @@ def get_frames_from_gif(infile):
        "Cant load", infile
        sys.exit(1)
-    i = 0
+    iterator = 0
    try:
        while 1:
            im2 = im.convert('RGBA')
@@ -52,18 +54,20 @@ def get_frames_from_gif(infile):
            background = Image.new("RGB", im2.size, (255, 255, 255))
            background.paste(im2, mask=im2.split()[3])
            background.save(filename, 'JPEG', quality=80)
-            print(f"FOUND GIF, SAVING FRAME AS {filename}")
+            # print(f"FOUND GIF, SAVING FRAME AS {filename}")
-            i += 1
+            iterator += 1
            while (iterator % 10 != 0):
                im.seek(im.tell() + 1)
    except EOFError:
        pass  # end of sequence
-for root, dirs, files in os.walk(directory):
+i = 1
    for file in files:
 def clean_image(file_root):
    root = file_root[0]
    file = file_root[1]
    try:
        file_obj = os.path.join(root, file)
        exten = os.path.splitext(file)[1].lower()
@@ -82,27 +86,69 @@ for root, dirs, files in os.walk(directory):
            os.remove(file_obj)
        else:
            os.remove(file_obj)
    except Exception as e:
        logging.error(traceback.format_exc())
    mutex.acquire()
    global i
    i += 1
    if i % 1 == 0:
        print("changing type" + str(i))
    mutex.release()
-i = 1
+
-for root, dirs, files in os.walk(directory):
+ii = 1
-    for file in files:
+
 def rename_images(file_root):
    root = file_root[0]
    file = file_root[1]
    try:
        file_obj = os.path.join(root, file)
        path, file_base_name = os.path.split(file_obj)
        old_path = os.path.splitext(file_base_name)
        old_ext = old_path[1]
        old_name = old_path[0]
-            new_file = os.path.join(path, str(i) + "-" + str(random_with_N_digits(10)) + old_ext)
+        mutex.acquire()
        global ii
        ii += 1
        new_file = os.path.join(path, str(ii) + "-" + str(random_with_N_digits(10)) + old_ext)
        if ii % 1000 == 0:
            print(f"Moving file"
                  f"{new_file}"
                  f"{file_obj} - {ii}")
        mutex.release()
        if file_obj != new_file and "foo" not in old_name:
-                print(f"Moving file\n"
+            # subprocess.run(['mv', file_obj, new_file])
-                      f"{new_file}\n"
+            os.rename(file_obj, new_file)
-                      f"{file_obj}")
+
-                subprocess.run(['mv', file_obj, new_file])
+
                i += 1
    except Exception as e:
        logging.error(traceback.format_exc())
-print("Cleaning JPEGs done")
+mutex = Lock()
 if __name__ == '__main__':
    pool = multiprocessing.Pool(multiprocessing.cpu_count())
    file_root_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_root_list.append((root, file))
    pool.map(clean_image, file_root_list)
    file_root_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_root_list.append((root, file))
    pool.map(rename_images, file_root_list)
    print("Cleaning JPEGs done")
@@ -1,6 +1,7 @@
 import os
 from random import random
 from shutil import copyfile, rmtree
 import multiprocessing
 train_dir = "./data/train/"
 test_dir = "./data/test/"
@@ -12,7 +13,6 @@ val = .05
 def add_train_data(file, filename, label):
    dest = train_dir + label + "/" + filename
    print(dest, label, filename)
    if not os.path.exists(os.path.dirname(dest)):
        try:
            os.makedirs(os.path.dirname(dest))
@@ -56,16 +56,15 @@ def remove_previous():
        rmtree(val_dir)
 remove_previous()
 files_processed = 0
-
+def test_split_file(file_root):
-for root, dirs, files in os.walk("downloads/"):
+    global files_processed
-
+    root = file_root[0]
-    for file in files:
+    file = file_root[1]
-        print(file)
+    # print(file)
    if file is ".DS_Store":
-            continue
+        return
    c = random()
    if c < train:
@@ -75,6 +74,24 @@ for root, dirs, files in os.walk("downloads/"):
    else:
        add_test_data(os.path.join(root, file), file, root.split("/")[-1])
    files_processed += 1
    if files_processed % 1000==0:
        print(root.split("/")[-1])
        print(files_processed)
        print(file)
 if __name__ == '__main__':
    remove_previous()
    file_root_list = []
    for root, dirs, files in os.walk("downloads/"):
        for file in files:
            file_root_list.append((root, file))
    pool = multiprocessing.Pool(multiprocessing.cpu_count()*2)
    pool.map(test_split_file, file_root_list)
@@ -21,10 +21,10 @@ from PIL import ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
-input_shape = (299, 299, 3)
+input_shape = (224, 224, 3)
-batch_size = 32
+batch_size = 60
-model_name = "InceptionV3Full"
+model_name = "mobilenet"
 # Next we set up the Image Data Generators to feed into the training cycles.
 # We need one for training, validation, and testing
@@ -41,6 +41,7 @@ train_gen = train_idg.flow_from_directory(
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size
 )
 print(len(train_gen.classes))
 val_idg = ImageDataGenerator(
@@ -75,26 +76,28 @@ test_gen = test_idg.flow_from_directory(
 #     include_top=False,
 #     input_shape=input_shape
 # )
-base_model = inception_v3.InceptionV3(
+# base_model = inception_v3.InceptionV3(
    weights='imagenet',
    include_top=False,
    input_shape=input_shape
 )
 # base_model = mobilenet_v2.MobileNetV2(
 #     weights='imagenet',
 #     include_top=False,
 #     input_shape=input_shape
 # )
 base_model = mobilenet_v2.MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=input_shape
 )
 # Create a new top for that model
 add_model = Sequential()
 add_model.add(base_model)
 add_model.add(GlobalAveragePooling2D())
 add_model.add(Dense(4048, activation='relu'))
 add_model.add(Dropout(0.5))
-add_model.add(
+
-    Dense(1024, activation='relu'))  # Adding some dense layers in order to learn complex functions from the base model
+add_model.add(Dense(2024, activation='relu'))
 # Adding some dense layers in order to learn complex functions from the base model
 # Potentially throw another dropout layer here if you seem to be overfitting your
 add_model.add(Dropout(0.5))
 add_model.add(Dense(512, activation='relu'))
@@ -133,7 +136,7 @@ history = model.fit_generator(
    validation_data=val_gen,
    steps_per_epoch=len(train_gen),
    validation_steps=len(val_gen),
-    epochs=60,
+    epochs=25,
    shuffle=True,
    verbose=True,
    callbacks=callbacks_list