import os import re import shutil import sys def convert(input_dir, annotation_dir, output_dir): """ Places the VOC xml files alongside the images, appends the actual category. :param input_dir: the image directory ("/some/where/images") :type input_dir: str :param annotation_dir: the directory with the XML annotations ("/some/where/annotation/xmls") :type annotation_dir: str :param output_dir: the output directory :type output_dir: str """ pattern = re.compile("([a-zA-Z_]+)_[0-9]+.jpg") if not os.path.exists(output_dir): os.makedirs(output_dir) count = 0 for f in os.listdir(input_dir): if not f.endswith(".jpg"): continue img_file_in = os.path.join(input_dir, f) ann_file_in = os.path.join(annotation_dir, f.replace(".jpg", ".xml")) if not os.path.exists(ann_file_in): print("Missing annotation: %s" % f) continue match = pattern.match(f) category = match.group(1).lower() img_file_out = os.path.join(output_dir, f) ann_file_out = os.path.join(output_dir, f.replace(".jpg", ".xml")) count += 1 shutil.copy(img_file_in, img_file_out) with open(ann_file_in, "r") as fp: lines = fp.readlines() for i in range(len(lines)): lines[i] = lines[i].replace("cat", "cat:%s" % category) lines[i] = lines[i].replace("dog", "dog:%s" % category) with open(ann_file_out, "w") as fp: fp.writelines(lines) if count % 100 == 0: print(count) input_dir = os.path.join(sys.argv[1], "images") annotation_dir = os.path.join(sys.argv[1], "annotations/xmls") output_dir = os.path.join(sys.argv[2], "voc-head") convert(input_dir, annotation_dir, output_dir)