added multithreading to the downloader

This commit is contained in:
lucasoskorep
2019-04-25 16:13:05 -05:00
parent ad79e7a2ed
commit 285955fa5c
+12 -7
View File
@@ -1,4 +1,5 @@
import pandas as pd import pandas as pd
import multiprocessing
from google_images_download import google_images_download from google_images_download import google_images_download
@@ -6,15 +7,19 @@ df = pd.read_csv("pokemon.csv")
response = google_images_download.googleimagesdownload() response = google_images_download.googleimagesdownload()
for pokemon in df["identifier"][:251]:
absolute_image_paths = response.download( def get_images_for_pokemon(pokemon):
response.download(
{ {
"keywords": pokemon, "keywords": pokemon,# + " pokemon",
"limit": 250, "limit": 250,
"chromedriver": "/usr/lib/chromium-browser/chromedriver", "chromedriver": "chromedriver",
# This needs to be changed based on the computer trying to download the images "thumbnail":True
"format": "jpg" # Add chromedriver to your path or just point this var directly to your chromedriver
} }
) )
# TODO: Need to clean data up here.... really should be added to another class as well you lazy asshole pool = multiprocessing.Pool(multiprocessing.cpu_count()*4)
pool.map(get_images_for_pokemon, df["identifier"][:490])