Made all processes threaded. Also changed up the neural network to hopefully better support the higher class load
This commit is contained in:
+11
-8
@@ -3,23 +3,26 @@ import multiprocessing
|
||||
|
||||
from google_images_download import google_images_download
|
||||
|
||||
df = pd.read_csv("pokemon.csv")
|
||||
|
||||
response = google_images_download.googleimagesdownload()
|
||||
|
||||
|
||||
def get_images_for_pokemon(pokemon):
|
||||
response = google_images_download.googleimagesdownload()
|
||||
response.download(
|
||||
{
|
||||
"keywords": pokemon,# + " pokemon",
|
||||
"keywords": pokemon + " pokemon",
|
||||
"limit": 250,
|
||||
"chromedriver": "chromedriver",
|
||||
"thumbnail": True
|
||||
# Add chromedriver to your path or just point this var directly to your chromedriver
|
||||
# Add chromedriver to your path or just point this var directly to your chromedriverv
|
||||
}
|
||||
)
|
||||
|
||||
pool = multiprocessing.Pool(multiprocessing.cpu_count()*4)
|
||||
|
||||
pool.map(get_images_for_pokemon, df["identifier"][:490])
|
||||
# freeze_support()
|
||||
df = pd.read_csv("pokemon.csv")
|
||||
|
||||
pool = multiprocessing.Pool(multiprocessing.cpu_count()*3)
|
||||
fixes = []
|
||||
pool.map(get_images_for_pokemon, [fixes])#df["identifier"]
|
||||
|
||||
# for pokemon in df["identifier"][:490]:
|
||||
# get_images_for_pokemon(pokemon)
|
||||
+66
-20
@@ -9,10 +9,12 @@ import imghdr
|
||||
import PIL
|
||||
from PIL import Image
|
||||
import sys
|
||||
import multiprocessing
|
||||
from threading import Thread, Lock
|
||||
|
||||
|
||||
directory = "downloads"
|
||||
|
||||
|
||||
def random_with_N_digits(n):
|
||||
range_start = 10 ** (n - 1)
|
||||
range_end = (10 ** n) - 1
|
||||
@@ -26,12 +28,13 @@ def change_file_extension(file_obj, extension):
|
||||
elif not os.path.isfile(file_obj + extension):
|
||||
new_file = file_obj + extension
|
||||
else:
|
||||
print(f"Found {extension} hiding as JPEG but couldn't rename:", file_obj)
|
||||
# print(f"Found {extension} hiding as JPEG but couldn't rename:", file_obj)
|
||||
return
|
||||
|
||||
print(f"Found {extension} hiding as JPEG, renaming:", file_obj, '->', new_file)
|
||||
|
||||
subprocess.run(['mv', file_obj, new_file])
|
||||
# subprocess.run(['mv', file_obj, new_file])
|
||||
os.rename(file_obj, new_file)
|
||||
|
||||
|
||||
def get_frames_from_gif(infile):
|
||||
@@ -42,8 +45,7 @@ def get_frames_from_gif(infile):
|
||||
"Cant load", infile
|
||||
sys.exit(1)
|
||||
|
||||
i = 0
|
||||
|
||||
iterator = 0
|
||||
try:
|
||||
while 1:
|
||||
im2 = im.convert('RGBA')
|
||||
@@ -52,18 +54,20 @@ def get_frames_from_gif(infile):
|
||||
background = Image.new("RGB", im2.size, (255, 255, 255))
|
||||
background.paste(im2, mask=im2.split()[3])
|
||||
background.save(filename, 'JPEG', quality=80)
|
||||
print(f"FOUND GIF, SAVING FRAME AS {filename}")
|
||||
i += 1
|
||||
# print(f"FOUND GIF, SAVING FRAME AS {filename}")
|
||||
iterator += 1
|
||||
while (iterator % 10 != 0):
|
||||
im.seek(im.tell() + 1)
|
||||
|
||||
except EOFError:
|
||||
pass # end of sequence
|
||||
|
||||
|
||||
for root, dirs, files in os.walk(directory):
|
||||
i = 1
|
||||
|
||||
for file in files:
|
||||
|
||||
def clean_image(file_root):
|
||||
root = file_root[0]
|
||||
file = file_root[1]
|
||||
try:
|
||||
file_obj = os.path.join(root, file)
|
||||
exten = os.path.splitext(file)[1].lower()
|
||||
@@ -82,27 +86,69 @@ for root, dirs, files in os.walk(directory):
|
||||
os.remove(file_obj)
|
||||
else:
|
||||
os.remove(file_obj)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(traceback.format_exc())
|
||||
mutex.acquire()
|
||||
global i
|
||||
i += 1
|
||||
if i % 1 == 0:
|
||||
print("changing type" + str(i))
|
||||
mutex.release()
|
||||
|
||||
i = 1
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
|
||||
ii = 1
|
||||
|
||||
|
||||
def rename_images(file_root):
|
||||
root = file_root[0]
|
||||
file = file_root[1]
|
||||
try:
|
||||
file_obj = os.path.join(root, file)
|
||||
path, file_base_name = os.path.split(file_obj)
|
||||
old_path = os.path.splitext(file_base_name)
|
||||
old_ext = old_path[1]
|
||||
old_name = old_path[0]
|
||||
new_file = os.path.join(path, str(i) + "-" + str(random_with_N_digits(10)) + old_ext)
|
||||
mutex.acquire()
|
||||
global ii
|
||||
ii += 1
|
||||
new_file = os.path.join(path, str(ii) + "-" + str(random_with_N_digits(10)) + old_ext)
|
||||
if ii % 1000 == 0:
|
||||
print(f"Moving file"
|
||||
f"{new_file}"
|
||||
f"{file_obj} - {ii}")
|
||||
mutex.release()
|
||||
|
||||
if file_obj != new_file and "foo" not in old_name:
|
||||
print(f"Moving file\n"
|
||||
f"{new_file}\n"
|
||||
f"{file_obj}")
|
||||
subprocess.run(['mv', file_obj, new_file])
|
||||
i += 1
|
||||
# subprocess.run(['mv', file_obj, new_file])
|
||||
os.rename(file_obj, new_file)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logging.error(traceback.format_exc())
|
||||
|
||||
mutex = Lock()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
pool = multiprocessing.Pool(multiprocessing.cpu_count())
|
||||
|
||||
file_root_list = []
|
||||
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
file_root_list.append((root, file))
|
||||
|
||||
pool.map(clean_image, file_root_list)
|
||||
|
||||
file_root_list = []
|
||||
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
file_root_list.append((root, file))
|
||||
|
||||
pool.map(rename_images, file_root_list)
|
||||
|
||||
print("Cleaning JPEGs done")
|
||||
|
||||
|
||||
+25
-8
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
from random import random
|
||||
from shutil import copyfile, rmtree
|
||||
import multiprocessing
|
||||
|
||||
train_dir = "./data/train/"
|
||||
test_dir = "./data/test/"
|
||||
@@ -12,7 +13,6 @@ val = .05
|
||||
|
||||
def add_train_data(file, filename, label):
|
||||
dest = train_dir + label + "/" + filename
|
||||
print(dest, label, filename)
|
||||
if not os.path.exists(os.path.dirname(dest)):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(dest))
|
||||
@@ -56,16 +56,15 @@ def remove_previous():
|
||||
rmtree(val_dir)
|
||||
|
||||
|
||||
remove_previous()
|
||||
files_processed = 0
|
||||
|
||||
for root, dirs, files in os.walk("downloads/"):
|
||||
|
||||
for file in files:
|
||||
print(file)
|
||||
def test_split_file(file_root):
|
||||
global files_processed
|
||||
root = file_root[0]
|
||||
file = file_root[1]
|
||||
# print(file)
|
||||
|
||||
if file is ".DS_Store":
|
||||
continue
|
||||
return
|
||||
c = random()
|
||||
|
||||
if c < train:
|
||||
@@ -75,6 +74,24 @@ for root, dirs, files in os.walk("downloads/"):
|
||||
else:
|
||||
add_test_data(os.path.join(root, file), file, root.split("/")[-1])
|
||||
files_processed += 1
|
||||
|
||||
if files_processed % 1000==0:
|
||||
print(root.split("/")[-1])
|
||||
print(files_processed)
|
||||
print(file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
remove_previous()
|
||||
|
||||
file_root_list = []
|
||||
|
||||
for root, dirs, files in os.walk("downloads/"):
|
||||
for file in files:
|
||||
file_root_list.append((root, file))
|
||||
|
||||
|
||||
pool = multiprocessing.Pool(multiprocessing.cpu_count()*2)
|
||||
|
||||
pool.map(test_split_file, file_root_list)
|
||||
|
||||
|
||||
+16
-13
@@ -21,10 +21,10 @@ from PIL import ImageFile
|
||||
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
|
||||
input_shape = (299, 299, 3)
|
||||
batch_size = 32
|
||||
input_shape = (224, 224, 3)
|
||||
batch_size = 60
|
||||
|
||||
model_name = "InceptionV3Full"
|
||||
model_name = "mobilenet"
|
||||
|
||||
# Next we set up the Image Data Generators to feed into the training cycles.
|
||||
# We need one for training, validation, and testing
|
||||
@@ -41,6 +41,7 @@ train_gen = train_idg.flow_from_directory(
|
||||
target_size=(input_shape[0], input_shape[1]),
|
||||
batch_size=batch_size
|
||||
)
|
||||
|
||||
print(len(train_gen.classes))
|
||||
|
||||
val_idg = ImageDataGenerator(
|
||||
@@ -75,26 +76,28 @@ test_gen = test_idg.flow_from_directory(
|
||||
# include_top=False,
|
||||
# input_shape=input_shape
|
||||
# )
|
||||
base_model = inception_v3.InceptionV3(
|
||||
weights='imagenet',
|
||||
include_top=False,
|
||||
input_shape=input_shape
|
||||
)
|
||||
|
||||
# base_model = mobilenet_v2.MobileNetV2(
|
||||
# base_model = inception_v3.InceptionV3(
|
||||
# weights='imagenet',
|
||||
# include_top=False,
|
||||
# input_shape=input_shape
|
||||
# )
|
||||
|
||||
base_model = mobilenet_v2.MobileNetV2(
|
||||
weights='imagenet',
|
||||
include_top=False,
|
||||
input_shape=input_shape
|
||||
)
|
||||
|
||||
|
||||
# Create a new top for that model
|
||||
add_model = Sequential()
|
||||
add_model.add(base_model)
|
||||
add_model.add(GlobalAveragePooling2D())
|
||||
add_model.add(Dense(4048, activation='relu'))
|
||||
add_model.add(Dropout(0.5))
|
||||
add_model.add(
|
||||
Dense(1024, activation='relu')) # Adding some dense layers in order to learn complex functions from the base model
|
||||
|
||||
add_model.add(Dense(2024, activation='relu'))
|
||||
# Adding some dense layers in order to learn complex functions from the base model
|
||||
# Potentially throw another dropout layer here if you seem to be overfitting your
|
||||
add_model.add(Dropout(0.5))
|
||||
add_model.add(Dense(512, activation='relu'))
|
||||
@@ -133,7 +136,7 @@ history = model.fit_generator(
|
||||
validation_data=val_gen,
|
||||
steps_per_epoch=len(train_gen),
|
||||
validation_steps=len(val_gen),
|
||||
epochs=60,
|
||||
epochs=25,
|
||||
shuffle=True,
|
||||
verbose=True,
|
||||
callbacks=callbacks_list
|
||||
|
||||
Reference in New Issue
Block a user