1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
| """该文件解决如下问题:文件夹中所有图片都以jpg结尾,但是有些图片真实类型不是jpg,可能是png或者gif 解决方案为:将png或者bmp图片转化为jpg图片,并按原图名称保存。对于gif,读取每一帧图片,并生成随机独特名称保存。 对于原图,可以选择移动还是删除
对于jpeg4py在Ubuntu下需要 sudo apt-get install libturbojpeg """ import imghdr import os import struct import cv2 from PIL import Image, ImageSequence import uuid import numpy as np import multiprocessing as mp import shutil import jpeg4py as jpeg
image_path = "./IJCAI_2019_AAAC_train/flower_photos" sub_dir_exit = True dir_format = "jpg" mv_wrong_jpg = True mv_path = './IJCAI_2019_AAAC_train/old'
type_dict = { 'FFD8FF': 'jpg', '89504E47': 'png', '47494638': 'gif', '49492A00': 'tif', '424D': 'bmp', '41433130': 'dwg', '38425053': 'psd', '7B5C727466': 'rtf', '3C3F786D6C': 'xml', '68746D6C3E': 'html', '44656C69766572792D646174653A': 'eml', 'CFAD12FEC5FD746F': 'dbx', '2142444E': 'pst', 'D0CF11E0': 'doc/xls', '5374616E64617264204A': 'mdb', 'FF575043': 'wpd', '252150532D41646F6265': 'ps/eps', '255044462D312E': 'pdf', 'AC9EBD8F': 'qdf', 'E3828596': 'pwl', '504B0304': 'zip', '52617221': 'rar', '57415645': 'wav', '41564920': 'avi', '2E7261FD': 'ram', '2E524D46': 'rm', '000001BA': 'mpg', '000001B3': 'mpg', '6D6F6F76': 'mov', '3026B2758E66CF11': 'asf', '4D546864': 'mid' }
def check_remove_broken(img_path): try: x = jpeg.JPEG(img_path).decode() return False except Exception: print('Decoding error:', img_path) return True
def bytes2hex(bytes): num = len(bytes) hexstr = u"" for i in range(num): t = u"%x" % bytes[i] if len(t) % 2: hexstr += u"0" hexstr += t return hexstr.upper()
def get_filetype(filename): file = open(filename, 'rb') ftype = 'unknown'
for k, v in type_dict.items(): num_bytes = int(len(k) / 2) file.seek(0) hbytes = struct.unpack('B' * num_bytes, file.read(num_bytes)) code = bytes2hex(hbytes) if code == k: ftype = v break
file.close() return ftype
def modify_image_formate(image_name, origin_format, dir_format='.jpg'): '''修改图片为正确的存储格式
origin_format:图片的正确格式 image_name: 待修改的图片的存储路径 dir_format: 目标格式 ''' if origin_format == 'png' or origin_format == 'bmp': image = cv2.imread(image_name) (filename, extension) = os.path.splitext(image_name) dir_image_name = filename + dir_format if mv_wrong_jpg: if sub_dir_exit: mv_path_ = os.path.join(mv_path, image_name.split('/')[-2]) else: mv_path_ = mv_path if not os.path.exists(mv_path_): os.makedirs(mv_path_)
shutil.move(image_name, os.path.join(mv_path_, image_name.split('/')[-1])) else: os.remove(image_name) cv2.imwrite(dir_image_name, image)
elif origin_format == 'gif': im = Image.open(image_name) iter = ImageSequence.Iterator(im) for frame in iter: frame = frame.convert("RGB") frame = cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) (filepath, tempfilename) = os.path.split(image_name) new_image_name = os.path.join(filepath, uuid.uuid4().hex + dir_format) cv2.imwrite(new_image_name, frame) if mv_wrong_jpg: if sub_dir_exit: mv_path_ = os.path.join(mv_path, image_name.split('/')[-2]) else: mv_path_ = mv_path if not os.path.exists(mv_path_): os.makedirs(mv_path_)
shutil.move(image_name, os.path.join(mv_path_, image_name.split('/')[-1])) else: os.remove(image_name)
def run(image_full_name): image_type = get_filetype(image_full_name)
if image_type is dir_format: pass else: print("Modifing {}, it's right format is: {}.".format(image_full_name, image_type)) modify_image_formate(image_full_name, origin_format=image_type, dir_format='.jpg')
def get_image_list(): if sub_dir_exit: sub_dirs = os.listdir(image_path) else: sub_dirs = image_path img_list = [] for sub_dir in sub_dirs: print("{}----".format(sub_dir)) if sub_dir_exit: image_names = os.listdir(os.path.join(image_path, sub_dir)) else: image_names = sub_dir
for image_name in image_names: if sub_dir_exit: image_full_name = os.path.join(image_path, sub_dir, image_name) else: image_full_name = os.path.join(sub_dir, image_name)
img_list.append(image_full_name) return img_list
if __name__ == "__main__": if mv_wrong_jpg and (not os.path.exists(mv_path)): os.makedirs(mv_path)
img_list = get_image_list() pool = mp.Pool() pool.map(run, img_list) print('Convert Done!')
|