Made all processes threaded. Also changed up the neural network to hopefully better support the higher class load
This commit is contained in:
+11
-8
@@ -3,23 +3,26 @@ import multiprocessing
|
|||||||
|
|
||||||
from google_images_download import google_images_download
|
from google_images_download import google_images_download
|
||||||
|
|
||||||
df = pd.read_csv("pokemon.csv")
|
|
||||||
|
|
||||||
response = google_images_download.googleimagesdownload()
|
|
||||||
|
|
||||||
|
|
||||||
def get_images_for_pokemon(pokemon):
|
def get_images_for_pokemon(pokemon):
|
||||||
|
response = google_images_download.googleimagesdownload()
|
||||||
response.download(
|
response.download(
|
||||||
{
|
{
|
||||||
"keywords": pokemon,# + " pokemon",
|
"keywords": pokemon + " pokemon",
|
||||||
"limit": 250,
|
"limit": 250,
|
||||||
"chromedriver": "chromedriver",
|
"chromedriver": "chromedriver",
|
||||||
"thumbnail": True
|
"thumbnail": True
|
||||||
# Add chromedriver to your path or just point this var directly to your chromedriver
|
# Add chromedriver to your path or just point this var directly to your chromedriverv
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
pool = multiprocessing.Pool(multiprocessing.cpu_count()*4)
|
|
||||||
|
|
||||||
pool.map(get_images_for_pokemon, df["identifier"][:490])
|
# freeze_support()
|
||||||
|
df = pd.read_csv("pokemon.csv")
|
||||||
|
|
||||||
|
pool = multiprocessing.Pool(multiprocessing.cpu_count()*3)
|
||||||
|
fixes = []
|
||||||
|
pool.map(get_images_for_pokemon, [fixes])#df["identifier"]
|
||||||
|
|
||||||
|
# for pokemon in df["identifier"][:490]:
|
||||||
|
# get_images_for_pokemon(pokemon)
|
||||||
+66
-20
@@ -9,10 +9,12 @@ import imghdr
|
|||||||
import PIL
|
import PIL
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import sys
|
import sys
|
||||||
|
import multiprocessing
|
||||||
|
from threading import Thread, Lock
|
||||||
|
|
||||||
|
|
||||||
directory = "downloads"
|
directory = "downloads"
|
||||||
|
|
||||||
|
|
||||||
def random_with_N_digits(n):
|
def random_with_N_digits(n):
|
||||||
range_start = 10 ** (n - 1)
|
range_start = 10 ** (n - 1)
|
||||||
range_end = (10 ** n) - 1
|
range_end = (10 ** n) - 1
|
||||||
@@ -26,12 +28,13 @@ def change_file_extension(file_obj, extension):
|
|||||||
elif not os.path.isfile(file_obj + extension):
|
elif not os.path.isfile(file_obj + extension):
|
||||||
new_file = file_obj + extension
|
new_file = file_obj + extension
|
||||||
else:
|
else:
|
||||||
print(f"Found {extension} hiding as JPEG but couldn't rename:", file_obj)
|
# print(f"Found {extension} hiding as JPEG but couldn't rename:", file_obj)
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"Found {extension} hiding as JPEG, renaming:", file_obj, '->', new_file)
|
print(f"Found {extension} hiding as JPEG, renaming:", file_obj, '->', new_file)
|
||||||
|
|
||||||
subprocess.run(['mv', file_obj, new_file])
|
# subprocess.run(['mv', file_obj, new_file])
|
||||||
|
os.rename(file_obj, new_file)
|
||||||
|
|
||||||
|
|
||||||
def get_frames_from_gif(infile):
|
def get_frames_from_gif(infile):
|
||||||
@@ -42,8 +45,7 @@ def get_frames_from_gif(infile):
|
|||||||
"Cant load", infile
|
"Cant load", infile
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
i = 0
|
iterator = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while 1:
|
while 1:
|
||||||
im2 = im.convert('RGBA')
|
im2 = im.convert('RGBA')
|
||||||
@@ -52,18 +54,20 @@ def get_frames_from_gif(infile):
|
|||||||
background = Image.new("RGB", im2.size, (255, 255, 255))
|
background = Image.new("RGB", im2.size, (255, 255, 255))
|
||||||
background.paste(im2, mask=im2.split()[3])
|
background.paste(im2, mask=im2.split()[3])
|
||||||
background.save(filename, 'JPEG', quality=80)
|
background.save(filename, 'JPEG', quality=80)
|
||||||
print(f"FOUND GIF, SAVING FRAME AS {filename}")
|
# print(f"FOUND GIF, SAVING FRAME AS {filename}")
|
||||||
i += 1
|
iterator += 1
|
||||||
|
while (iterator % 10 != 0):
|
||||||
im.seek(im.tell() + 1)
|
im.seek(im.tell() + 1)
|
||||||
|
|
||||||
except EOFError:
|
except EOFError:
|
||||||
pass # end of sequence
|
pass # end of sequence
|
||||||
|
|
||||||
|
|
||||||
for root, dirs, files in os.walk(directory):
|
i = 1
|
||||||
|
|
||||||
for file in files:
|
|
||||||
|
|
||||||
|
def clean_image(file_root):
|
||||||
|
root = file_root[0]
|
||||||
|
file = file_root[1]
|
||||||
try:
|
try:
|
||||||
file_obj = os.path.join(root, file)
|
file_obj = os.path.join(root, file)
|
||||||
exten = os.path.splitext(file)[1].lower()
|
exten = os.path.splitext(file)[1].lower()
|
||||||
@@ -82,27 +86,69 @@ for root, dirs, files in os.walk(directory):
|
|||||||
os.remove(file_obj)
|
os.remove(file_obj)
|
||||||
else:
|
else:
|
||||||
os.remove(file_obj)
|
os.remove(file_obj)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(traceback.format_exc())
|
logging.error(traceback.format_exc())
|
||||||
|
mutex.acquire()
|
||||||
|
global i
|
||||||
|
i += 1
|
||||||
|
if i % 1 == 0:
|
||||||
|
print("changing type" + str(i))
|
||||||
|
mutex.release()
|
||||||
|
|
||||||
i = 1
|
|
||||||
for root, dirs, files in os.walk(directory):
|
ii = 1
|
||||||
for file in files:
|
|
||||||
|
|
||||||
|
def rename_images(file_root):
|
||||||
|
root = file_root[0]
|
||||||
|
file = file_root[1]
|
||||||
try:
|
try:
|
||||||
file_obj = os.path.join(root, file)
|
file_obj = os.path.join(root, file)
|
||||||
path, file_base_name = os.path.split(file_obj)
|
path, file_base_name = os.path.split(file_obj)
|
||||||
old_path = os.path.splitext(file_base_name)
|
old_path = os.path.splitext(file_base_name)
|
||||||
old_ext = old_path[1]
|
old_ext = old_path[1]
|
||||||
old_name = old_path[0]
|
old_name = old_path[0]
|
||||||
new_file = os.path.join(path, str(i) + "-" + str(random_with_N_digits(10)) + old_ext)
|
mutex.acquire()
|
||||||
|
global ii
|
||||||
|
ii += 1
|
||||||
|
new_file = os.path.join(path, str(ii) + "-" + str(random_with_N_digits(10)) + old_ext)
|
||||||
|
if ii % 1000 == 0:
|
||||||
|
print(f"Moving file"
|
||||||
|
f"{new_file}"
|
||||||
|
f"{file_obj} - {ii}")
|
||||||
|
mutex.release()
|
||||||
|
|
||||||
if file_obj != new_file and "foo" not in old_name:
|
if file_obj != new_file and "foo" not in old_name:
|
||||||
print(f"Moving file\n"
|
# subprocess.run(['mv', file_obj, new_file])
|
||||||
f"{new_file}\n"
|
os.rename(file_obj, new_file)
|
||||||
f"{file_obj}")
|
|
||||||
subprocess.run(['mv', file_obj, new_file])
|
|
||||||
i += 1
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(traceback.format_exc())
|
logging.error(traceback.format_exc())
|
||||||
|
|
||||||
|
mutex = Lock()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
|
||||||
|
pool = multiprocessing.Pool(multiprocessing.cpu_count())
|
||||||
|
|
||||||
|
file_root_list = []
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
|
file_root_list.append((root, file))
|
||||||
|
|
||||||
|
pool.map(clean_image, file_root_list)
|
||||||
|
|
||||||
|
file_root_list = []
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
|
file_root_list.append((root, file))
|
||||||
|
|
||||||
|
pool.map(rename_images, file_root_list)
|
||||||
|
|
||||||
print("Cleaning JPEGs done")
|
print("Cleaning JPEGs done")
|
||||||
|
|
||||||
|
|||||||
+25
-8
@@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from random import random
|
from random import random
|
||||||
from shutil import copyfile, rmtree
|
from shutil import copyfile, rmtree
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
train_dir = "./data/train/"
|
train_dir = "./data/train/"
|
||||||
test_dir = "./data/test/"
|
test_dir = "./data/test/"
|
||||||
@@ -12,7 +13,6 @@ val = .05
|
|||||||
|
|
||||||
def add_train_data(file, filename, label):
|
def add_train_data(file, filename, label):
|
||||||
dest = train_dir + label + "/" + filename
|
dest = train_dir + label + "/" + filename
|
||||||
print(dest, label, filename)
|
|
||||||
if not os.path.exists(os.path.dirname(dest)):
|
if not os.path.exists(os.path.dirname(dest)):
|
||||||
try:
|
try:
|
||||||
os.makedirs(os.path.dirname(dest))
|
os.makedirs(os.path.dirname(dest))
|
||||||
@@ -56,16 +56,15 @@ def remove_previous():
|
|||||||
rmtree(val_dir)
|
rmtree(val_dir)
|
||||||
|
|
||||||
|
|
||||||
remove_previous()
|
|
||||||
files_processed = 0
|
files_processed = 0
|
||||||
|
def test_split_file(file_root):
|
||||||
for root, dirs, files in os.walk("downloads/"):
|
global files_processed
|
||||||
|
root = file_root[0]
|
||||||
for file in files:
|
file = file_root[1]
|
||||||
print(file)
|
# print(file)
|
||||||
|
|
||||||
if file is ".DS_Store":
|
if file is ".DS_Store":
|
||||||
continue
|
return
|
||||||
c = random()
|
c = random()
|
||||||
|
|
||||||
if c < train:
|
if c < train:
|
||||||
@@ -75,6 +74,24 @@ for root, dirs, files in os.walk("downloads/"):
|
|||||||
else:
|
else:
|
||||||
add_test_data(os.path.join(root, file), file, root.split("/")[-1])
|
add_test_data(os.path.join(root, file), file, root.split("/")[-1])
|
||||||
files_processed += 1
|
files_processed += 1
|
||||||
|
|
||||||
|
if files_processed % 1000==0:
|
||||||
print(root.split("/")[-1])
|
print(root.split("/")[-1])
|
||||||
print(files_processed)
|
print(files_processed)
|
||||||
print(file)
|
print(file)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
remove_previous()
|
||||||
|
|
||||||
|
file_root_list = []
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk("downloads/"):
|
||||||
|
for file in files:
|
||||||
|
file_root_list.append((root, file))
|
||||||
|
|
||||||
|
|
||||||
|
pool = multiprocessing.Pool(multiprocessing.cpu_count()*2)
|
||||||
|
|
||||||
|
pool.map(test_split_file, file_root_list)
|
||||||
|
|
||||||
|
|||||||
+16
-13
@@ -21,10 +21,10 @@ from PIL import ImageFile
|
|||||||
|
|
||||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||||
|
|
||||||
input_shape = (299, 299, 3)
|
input_shape = (224, 224, 3)
|
||||||
batch_size = 32
|
batch_size = 60
|
||||||
|
|
||||||
model_name = "InceptionV3Full"
|
model_name = "mobilenet"
|
||||||
|
|
||||||
# Next we set up the Image Data Generators to feed into the training cycles.
|
# Next we set up the Image Data Generators to feed into the training cycles.
|
||||||
# We need one for training, validation, and testing
|
# We need one for training, validation, and testing
|
||||||
@@ -41,6 +41,7 @@ train_gen = train_idg.flow_from_directory(
|
|||||||
target_size=(input_shape[0], input_shape[1]),
|
target_size=(input_shape[0], input_shape[1]),
|
||||||
batch_size=batch_size
|
batch_size=batch_size
|
||||||
)
|
)
|
||||||
|
|
||||||
print(len(train_gen.classes))
|
print(len(train_gen.classes))
|
||||||
|
|
||||||
val_idg = ImageDataGenerator(
|
val_idg = ImageDataGenerator(
|
||||||
@@ -75,26 +76,28 @@ test_gen = test_idg.flow_from_directory(
|
|||||||
# include_top=False,
|
# include_top=False,
|
||||||
# input_shape=input_shape
|
# input_shape=input_shape
|
||||||
# )
|
# )
|
||||||
base_model = inception_v3.InceptionV3(
|
# base_model = inception_v3.InceptionV3(
|
||||||
weights='imagenet',
|
|
||||||
include_top=False,
|
|
||||||
input_shape=input_shape
|
|
||||||
)
|
|
||||||
|
|
||||||
# base_model = mobilenet_v2.MobileNetV2(
|
|
||||||
# weights='imagenet',
|
# weights='imagenet',
|
||||||
# include_top=False,
|
# include_top=False,
|
||||||
# input_shape=input_shape
|
# input_shape=input_shape
|
||||||
# )
|
# )
|
||||||
|
|
||||||
|
base_model = mobilenet_v2.MobileNetV2(
|
||||||
|
weights='imagenet',
|
||||||
|
include_top=False,
|
||||||
|
input_shape=input_shape
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Create a new top for that model
|
# Create a new top for that model
|
||||||
add_model = Sequential()
|
add_model = Sequential()
|
||||||
add_model.add(base_model)
|
add_model.add(base_model)
|
||||||
add_model.add(GlobalAveragePooling2D())
|
add_model.add(GlobalAveragePooling2D())
|
||||||
|
add_model.add(Dense(4048, activation='relu'))
|
||||||
add_model.add(Dropout(0.5))
|
add_model.add(Dropout(0.5))
|
||||||
add_model.add(
|
|
||||||
Dense(1024, activation='relu')) # Adding some dense layers in order to learn complex functions from the base model
|
add_model.add(Dense(2024, activation='relu'))
|
||||||
|
# Adding some dense layers in order to learn complex functions from the base model
|
||||||
# Potentially throw another dropout layer here if you seem to be overfitting your
|
# Potentially throw another dropout layer here if you seem to be overfitting your
|
||||||
add_model.add(Dropout(0.5))
|
add_model.add(Dropout(0.5))
|
||||||
add_model.add(Dense(512, activation='relu'))
|
add_model.add(Dense(512, activation='relu'))
|
||||||
@@ -133,7 +136,7 @@ history = model.fit_generator(
|
|||||||
validation_data=val_gen,
|
validation_data=val_gen,
|
||||||
steps_per_epoch=len(train_gen),
|
steps_per_epoch=len(train_gen),
|
||||||
validation_steps=len(val_gen),
|
validation_steps=len(val_gen),
|
||||||
epochs=60,
|
epochs=25,
|
||||||
shuffle=True,
|
shuffle=True,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
callbacks=callbacks_list
|
callbacks=callbacks_list
|
||||||
|
|||||||
Reference in New Issue
Block a user