Source code for megengine.data.dataset.vision.voc

# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------
# Part of the following code in this file refs to torchvision
# BSD 3-Clause License
#
# Copyright (c) Soumith Chintala 2016,
# All rights reserved.
# ---------------------------------------------------------------------
import collections.abc
import os
import xml.etree.ElementTree as ET

import cv2
import numpy as np

from .meta_vision import VisionDataset


[docs]class PascalVOC(VisionDataset): r"""`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Dataset.""" supported_order = ( "image", "boxes", "boxes_category", "mask", "info", ) def __init__(self, root, image_set, *, order=None): if ("boxes" in order or "boxes_category" in order) and "mask" in order: raise ValueError( "PascalVOC only supports boxes & boxes_category or mask, not both." ) super().__init__(root, order=order, supported_order=self.supported_order) if not os.path.isdir(self.root): raise RuntimeError("Dataset not found or corrupted.") self.image_set = image_set image_dir = os.path.join(self.root, "JPEGImages") if "boxes" in order or "boxes_category" in order: annotation_dir = os.path.join(self.root, "Annotations") splitdet_dir = os.path.join(self.root, "ImageSets/Main") split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt") with open(os.path.join(split_f), "r") as f: self.file_names = [x.strip() for x in f.readlines()] self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] self.annotations = [ os.path.join(annotation_dir, x + ".xml") for x in self.file_names ] assert len(self.images) == len(self.annotations) elif "mask" in order: if "aug" in image_set: mask_dir = os.path.join(self.root, "SegmentationClass_aug") else: mask_dir = os.path.join(self.root, "SegmentationClass") splitmask_dir = os.path.join(self.root, "ImageSets/Segmentation") split_f = os.path.join(splitmask_dir, image_set.rstrip("\n") + ".txt") with open(os.path.join(split_f), "r") as f: self.file_names = [x.strip() for x in f.readlines()] self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] self.masks = [os.path.join(mask_dir, x + ".png") for x in self.file_names] assert len(self.images) == len(self.masks) else: raise NotImplementedError self.img_infos = dict() def __getitem__(self, index): target = [] for k in self.order: if k == "image": image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) target.append(image) elif k == "boxes": anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]] # boxes type xyxy boxes = [ (bb["xmin"], bb["ymin"], bb["xmax"], bb["ymax"]) for bb in boxes ] boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) target.append(boxes) elif k == "boxes_category": anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) boxes_category = [obj["name"] for obj in anno["annotation"]["object"]] boxes_category = [ self.class_names.index(bc) + 1 for bc in boxes_category ] boxes_category = np.array(boxes_category, dtype=np.int32) target.append(boxes_category) elif k == "mask": if "aug" in self.image_set: mask = cv2.imread(self.masks[index], cv2.IMREAD_GRAYSCALE) else: mask = cv2.imread(self.masks[index], cv2.IMREAD_COLOR) mask = self._trans_mask(mask) mask = mask[:, :, np.newaxis] target.append(mask) elif k == "info": info = self.get_img_info(index, image) info = [info["height"], info["width"], info["file_name"]] target.append(info) else: raise NotImplementedError return tuple(target) def __len__(self): return len(self.images) def get_img_info(self, index, image=None): if index not in self.img_infos: if image is None: image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) self.img_infos[index] = dict( height=image.shape[0], width=image.shape[1], file_name=self.file_names[index], ) return self.img_infos[index] def _trans_mask(self, mask): label = np.ones(mask.shape[:2]) * 255 for i in range(len(self.class_colors)): b, g, r = self.class_colors[i] label[ (mask[:, :, 0] == b) & (mask[:, :, 1] == g) & (mask[:, :, 2] == r) ] = i return label.astype(np.uint8) def parse_voc_xml(self, node): voc_dict = {} children = list(node) if children: def_dic = collections.defaultdict(list) for dc in map(self.parse_voc_xml, children): for ind, v in dc.items(): def_dic[ind].append(v) if node.tag == "annotation": def_dic["object"] = [def_dic["object"]] voc_dict = { node.tag: { ind: v[0] if len(v) == 1 else v for ind, v in def_dic.items() } } if node.text: text = node.text.strip() if not children: voc_dict[node.tag] = text return voc_dict class_names = ( "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", ) class_colors = [ [0, 0, 0], # background [0, 0, 128], [0, 128, 0], [0, 128, 128], [128, 0, 0], [128, 0, 128], [128, 128, 0], [128, 128, 128], [0, 0, 64], [0, 0, 192], [0, 128, 64], [0, 128, 192], [128, 0, 64], [128, 0, 192], [128, 128, 64], [128, 128, 192], [0, 64, 0], [0, 64, 128], [0, 192, 0], [0, 192, 128], [128, 64, 0], ]