From 285955fa5c8b076731694c158abc88b1fbad6f13 Mon Sep 17 00:00:00 2001
From: lucasoskorep <lucas.oskorep@gmail.com>
Date: Thu, 25 Apr 2019 16:13:05 -0500
Subject: [PATCH] added multithreading to the downloader

---
 ModelTraining/1 - ImageGatherer.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/ModelTraining/1 - ImageGatherer.py b/ModelTraining/1 - ImageGatherer.py
index ff17d83..712a08b 100755
--- a/ModelTraining/1 - ImageGatherer.py	
+++ b/ModelTraining/1 - ImageGatherer.py	
@@ -1,4 +1,5 @@
 import pandas as pd
+import multiprocessing
 
 from google_images_download import google_images_download
 
@@ -6,15 +7,19 @@ df = pd.read_csv("pokemon.csv")
 
 response = google_images_download.googleimagesdownload()
 
-for pokemon in df["identifier"][:251]:
-    absolute_image_paths = response.download(
+
+def get_images_for_pokemon(pokemon):
+    response.download(
         {
-            "keywords": pokemon,
+            "keywords": pokemon,# + " pokemon",
             "limit": 250,
-            "chromedriver": "/usr/lib/chromium-browser/chromedriver",
-            # This needs to be changed based on the computer trying to download the images
-            "format": "jpg"
+            "chromedriver": "chromedriver",
+            "thumbnail":True
+        #     Add chromedriver to your path or just point this var directly to your chromedriver
         }
     )
 
-# TODO: Need to clean data up here.... really should be added to another class as well you lazy asshole
+pool = multiprocessing.Pool(multiprocessing.cpu_count()*4)
+
+pool.map(get_images_for_pokemon, df["identifier"][:490])
+