Files
tensordex/2 - FixFileTypes.py
T

155 lines
3.9 KiB
Python

import glob
import subprocess
import os
import re
import logging
import traceback
from random import randint
import imghdr
import PIL
from PIL import Image
import sys
import multiprocessing
from threading import Thread, Lock
directory = "downloads"
def random_with_N_digits(n):
range_start = 10 ** (n - 1)
range_end = (10 ** n) - 1
return randint(range_start, range_end)
def change_file_extension(file_obj, extension):
old_path = os.path.splitext(file_obj)
if not os.path.isfile(old_path[0] + extension):
new_file = old_path[0] + extension
elif not os.path.isfile(file_obj + extension):
new_file = file_obj + extension
else:
# print(f"Found {extension} hiding as JPEG but couldn't rename:", file_obj)
return
print(f"Found {extension} hiding as JPEG, renaming:", file_obj, '->', new_file)
# subprocess.run(['mv', file_obj, new_file])
os.rename(file_obj, new_file)
def get_frames_from_gif(infile):
try:
im = Image.open(infile)
except IOError:
print
"Cant load", infile
sys.exit(1)
iterator = 0
try:
while 1:
im2 = im.convert('RGBA')
im2.load()
filename = os.path.join(os.path.dirname(infile), 'foo' + str(i) + '.jpg')
background = Image.new("RGB", im2.size, (255, 255, 255))
background.paste(im2, mask=im2.split()[3])
background.save(filename, 'JPEG', quality=80)
# print(f"FOUND GIF, SAVING FRAME AS {filename}")
iterator += 1
while (iterator % 10 != 0):
im.seek(im.tell() + 1)
except EOFError:
pass # end of sequence
i = 1
def clean_image(file_root):
root = file_root[0]
file = file_root[1]
try:
file_obj = os.path.join(root, file)
exten = os.path.splitext(file)[1].lower()
img_type = imghdr.what(file_obj)
# print(file_obj)
if img_type is None:
os.remove(file_obj)
elif "jpeg" in img_type:
if "jpeg" not in exten and "jpg" not in exten:
change_file_extension(file_obj, ".jpeg")
elif "png" in img_type:
if "png" not in exten:
change_file_extension(file_obj, ".png")
elif "gif" in img_type:
get_frames_from_gif(file_obj)
os.remove(file_obj)
else:
os.remove(file_obj)
except Exception as e:
logging.error(traceback.format_exc())
mutex.acquire()
global i
i += 1
if i % 1 == 0:
print("changing type" + str(i))
mutex.release()
ii = 1
def rename_images(file_root):
root = file_root[0]
file = file_root[1]
try:
file_obj = os.path.join(root, file)
path, file_base_name = os.path.split(file_obj)
old_path = os.path.splitext(file_base_name)
old_ext = old_path[1]
old_name = old_path[0]
mutex.acquire()
global ii
ii += 1
new_file = os.path.join(path, str(ii) + "-" + str(random_with_N_digits(10)) + old_ext)
if ii % 1000 == 0:
print(f"Moving file"
f"{new_file}"
f"{file_obj} - {ii}")
mutex.release()
if file_obj != new_file and "foo" not in old_name:
# subprocess.run(['mv', file_obj, new_file])
os.rename(file_obj, new_file)
except Exception as e:
logging.error(traceback.format_exc())
mutex = Lock()
if __name__ == '__main__':
pool = multiprocessing.Pool(multiprocessing.cpu_count())
file_root_list = []
for root, dirs, files in os.walk(directory):
for file in files:
file_root_list.append((root, file))
pool.map(clean_image, file_root_list)
file_root_list = []
for root, dirs, files in os.walk(directory):
for file in files:
file_root_list.append((root, file))
pool.map(rename_images, file_root_list)
print("Cleaning JPEGs done")