
| """该文件解决如下问题:文件夹中所有图片都以jpg结尾,但是有些图片真实类型不是jpg,可能是png或者gif 解决方案为:将png或者bmp图片转化为jpg图片,并按原图名称保存。对于gif,读取每一帧图片,并生成随机独特名称保存。 对于原图,可以选择移动还是删除
对于jpeg4py在Ubuntu下需要 sudo apt-get install libturbojpeg """ import imghdr import os import struct import cv2 from PIL import Image, ImageSequence import uuid import numpy as np import multiprocessing as mp import shutil import jpeg4py as jpeg
image_path = "./IJCAI_2019_AAAC_train/flower_photos" sub_dir_exit = True dir_format = "jpg" mv_wrong_jpg = True mv_path = './IJCAI_2019_AAAC_train/old'
type_dict = { 'FFD8FF': 'jpg', '89504E47': 'png', '47494638': 'gif', '49492A00': 'tif', '424D': 'bmp', '41433130': 'dwg', '38425053': 'psd', '7B5C727466': 'rtf', '3C3F786D6C': 'xml', '68746D6C3E': 'html', '44656C69766572792D646174653A': 'eml', 'CFAD12FEC5FD746F': 'dbx', '2142444E': 'pst', 'D0CF11E0': 'doc/xls', '5374616E64617264204A': 'mdb', 'FF575043': 'wpd', '252150532D41646F6265': 'ps/eps', '255044462D312E': 'pdf', 'AC9EBD8F': 'qdf', 'E3828596': 'pwl', '504B0304': 'zip', '52617221': 'rar', '57415645': 'wav', '41564920': 'avi', '2E7261FD': 'ram', '2E524D46': 'rm', '000001BA': 'mpg', '000001B3': 'mpg', '6D6F6F76': 'mov', '3026B2758E66CF11': 'asf', '4D546864': 'mid' }
def check_remove_broken(img_path): try: x = jpeg.JPEG(img_path).decode() return False except Exception: print('Decoding error:', img_path) return True
def bytes2hex(bytes): num = len(bytes) hexstr = u"" for i in range(num): t = u"%x" % bytes[i] if len(t) % 2: hexstr += u"0" hexstr += t return hexstr.upper()
def get_filetype(filename): file = open(filename, 'rb') ftype = 'unknown'
for k, v in type_dict.items(): num_bytes = int(len(k) / 2) file.seek(0) hbytes = struct.unpack('B' * num_bytes, file.read(num_bytes)) code = bytes2hex(hbytes) if code == k: ftype = v break
file.close() return ftype
def modify_image_formate(image_name, origin_format, dir_format='.jpg'): '''修改图片为正确的存储格式
origin_format:图片的正确格式 image_name: 待修改的图片的存储路径 dir_format: 目标格式 ''' if origin_format == 'png' or origin_format == 'bmp': image = cv2.imread(image_name) (filename, extension) = os.path.splitext(image_name) dir_image_name = filename + dir_format if mv_wrong_jpg: if sub_dir_exit: mv_path_ = os.path.join(mv_path, image_name.split('/')[-2]) else: mv_path_ = mv_path if not os.path.exists(mv_path_): os.makedirs(mv_path_)
shutil.move(image_name, os.path.join(mv_path_, image_name.split('/')[-1])) else: os.remove(image_name) cv2.imwrite(dir_image_name, image)
elif origin_format == 'gif': im = Image.open(image_name) iter = ImageSequence.Iterator(im) for frame in iter: frame = frame.convert("RGB") frame = cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) (filepath, tempfilename) = os.path.split(image_name) new_image_name = os.path.join(filepath, uuid.uuid4().hex + dir_format) cv2.imwrite(new_image_name, frame) if mv_wrong_jpg: if sub_dir_exit: mv_path_ = os.path.join(mv_path, image_name.split('/')[-2]) else: mv_path_ = mv_path if not os.path.exists(mv_path_): os.makedirs(mv_path_)
shutil.move(image_name, os.path.join(mv_path_, image_name.split('/')[-1])) else: os.remove(image_name)
def run(image_full_name): image_type = get_filetype(image_full_name)
if image_type is dir_format: pass else: print("Modifing {}, it's right format is: {}.".format(image_full_name, image_type)) modify_image_formate(image_full_name, origin_format=image_type, dir_format='.jpg')
def get_image_list(): if sub_dir_exit: sub_dirs = os.listdir(image_path) else: sub_dirs = image_path img_list = [] for sub_dir in sub_dirs: print("{}----".format(sub_dir)) if sub_dir_exit: image_names = os.listdir(os.path.join(image_path, sub_dir)) else: image_names = sub_dir
for image_name in image_names: if sub_dir_exit: image_full_name = os.path.join(image_path, sub_dir, image_name) else: image_full_name = os.path.join(sub_dir, image_name)
img_list.append(image_full_name) return img_list
if __name__ == "__main__": if mv_wrong_jpg and (not os.path.exists(mv_path)): os.makedirs(mv_path)
img_list = get_image_list() pool = mp.Pool() pool.map(run, img_list) print('Convert Done!')
|