1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
- import copy
- import os
- import cv2
- import numpy as np
- from pycocotools.coco import COCO
- from super_gradients.common.abstractions.abstract_logger import get_logger
- from super_gradients.training.datasets.datasets_conf import COCO_DETECTION_CLASSES_LIST
- from super_gradients.training.datasets.detection_datasets.detection_dataset import DetectionDataset
- from super_gradients.training.exceptions.dataset_exceptions import DatasetValidationException, ParameterMismatchException
- from super_gradients.training.utils.detection_utils import DetectionTargetsFormat
- logger = get_logger(__name__)
- class COCODetectionDataset(DetectionDataset):
- """Dataset for COCO object detection.
- To use this Dataset you need to:
- - Download coco dataset:
- annotations: http://images.cocodataset.org/annotations/annotations_trainval2017.zip
- train2017: http://images.cocodataset.org/zips/train2017.zip
- val2017: http://images.cocodataset.org/zips/val2017.zip
- - Unzip and organize it as below:
- coco
- ├── annotations
- │ ├─ instances_train2017.json
- │ ├─ instances_val2017.json
- │ └─ ...
- └── images
- ├── train2017
- │ ├─ 000000000001.jpg
- │ └─ ...
- └── val2017
- └─ ...
- - Install CoCo API: https://github.com/pdollar/coco/tree/master/PythonAPI
- - Instantiate the dataset:
- >> train_set = COCODetectionDataset(data_dir='.../coco', subdir='images/train2017', json_file='instances_train2017.json', ...)
- >> valid_set = COCODetectionDataset(data_dir='.../coco', subdir='images/val2017', json_file='instances_val2017.json', ...)
- """
- def __init__(
- self,
- json_file: str = "instances_train2017.json",
- subdir: str = "images/train2017",
- tight_box_rotation: bool = False,
- with_crowd: bool = True,
- *args,
- **kwargs,
- ):
- """
- :param json_file: Name of the coco json file, that resides in data_dir/annotations/json_file.
- :param subdir: Sub directory of data_dir containing the data.
- :param tight_box_rotation: bool, whether to use of segmentation maps convex hull as target_seg
- (check get_sample docs).
- :param with_crowd: Add the crowd groundtruths to __getitem__
- kwargs:
- all_classes_list: all classes list, default is COCO_DETECTION_CLASSES_LIST.
- """
- self.subdir = subdir
- self.json_file = json_file
- self.tight_box_rotation = tight_box_rotation
- self.with_crowd = with_crowd
- target_fields = ["target", "crowd_target"] if self.with_crowd else ["target"]
- kwargs["target_fields"] = target_fields
- kwargs["output_fields"] = ["image", *target_fields]
- kwargs["original_target_format"] = DetectionTargetsFormat.XYXY_LABEL
- kwargs["all_classes_list"] = kwargs.get("all_classes_list") or COCO_DETECTION_CLASSES_LIST
- super().__init__(*args, **kwargs)
- if len(self.original_classes) != len(self.all_classes_list):
- if set(self.all_classes_list).issubset(set(self.original_classes)):
- raise ParameterMismatchException(
- "Parameter `all_classes_list` contains a subset of classes from dataset JSON. "
- "Please use `class_inclusion_list` to train with reduced number of classes",
- )
- else:
- raise DatasetValidationException(
- "Number of classes in dataset JSON do not match with number of classes in all_classes_list parameter. "
- "Most likely this indicates an error in your all_classes_list parameter"
- )
- def _setup_data_source(self) -> int:
- """Initialize img_and_target_path_list and warn if label file is missing
- :return: List of tuples made of (img_path,target_path)
- """
- self.coco = self._init_coco()
- self.class_ids = sorted(self.coco.getCatIds())
- self.original_classes = list([category["name"] for category in self.coco.loadCats(self.class_ids)])
- self.classes = copy.deepcopy(self.original_classes)
- self.sample_id_to_coco_id = self.coco.getImgIds()
- return len(self.sample_id_to_coco_id)
- def _init_coco(self) -> COCO:
- annotation_file_path = os.path.join(self.data_dir, "annotations", self.json_file)
- if not os.path.exists(annotation_file_path):
- raise ValueError("Could not find annotation file under " + str(annotation_file_path))
- coco = COCO(annotation_file_path)
- remove_useless_info(coco, self.tight_box_rotation)
- return coco
- def _load_annotation(self, sample_id: int) -> dict:
- """
- Load relevant information of a specific image.
- :param sample_id: Sample_id in the dataset
- :return target: Target Bboxes (detection) in XYXY_LABEL format
- :return crowd_target: Crowd target Bboxes (detection) in XYXY_LABEL format
- :return target_segmentation: Segmentation
- :return initial_img_shape: Image (height, width)
- :return resized_img_shape: Resides image (height, width)
- :return img_path: Path to the associated image
- """
- img_id = self.sample_id_to_coco_id[sample_id]
- img_metadata = self.coco.loadImgs(img_id)[0]
- width = img_metadata["width"]
- height = img_metadata["height"]
- img_annotation_ids = self.coco.getAnnIds(imgIds=[int(img_id)])
- img_annotations = self.coco.loadAnns(img_annotation_ids)
- cleaned_annotations = []
- for annotation in img_annotations:
- x1 = np.max((0, annotation["bbox"][0]))
- y1 = np.max((0, annotation["bbox"][1]))
- x2 = np.min((width, x1 + np.max((0, annotation["bbox"][2]))))
- y2 = np.min((height, y1 + np.max((0, annotation["bbox"][3]))))
- if annotation["area"] > 0 and x2 >= x1 and y2 >= y1:
- annotation["clean_bbox"] = [x1, y1, x2, y2]
- cleaned_annotations.append(annotation)
- non_crowd_annotations = [annotation for annotation in cleaned_annotations if annotation["iscrowd"] == 0]
- target = np.zeros((len(non_crowd_annotations), 5))
- num_seg_values = 98 if self.tight_box_rotation else 0
- target_segmentation = np.ones((len(non_crowd_annotations), num_seg_values))
- target_segmentation.fill(np.nan)
- for ix, annotation in enumerate(non_crowd_annotations):
- cls = self.class_ids.index(annotation["category_id"])
- target[ix, 0:4] = annotation["clean_bbox"]
- target[ix, 4] = cls
- if self.tight_box_rotation:
- seg_points = [j for i in annotation.get("segmentation", []) for j in i]
- if seg_points:
- seg_points_c = np.array(seg_points).reshape((-1, 2)).astype(np.int)
- seg_points_convex = cv2.convexHull(seg_points_c).ravel()
- else:
- seg_points_convex = []
- target_segmentation[ix, : len(seg_points_convex)] = seg_points_convex
- crowd_annotations = [annotation for annotation in cleaned_annotations if annotation["iscrowd"] == 1]
- crowd_target = np.zeros((len(crowd_annotations), 5))
- for ix, annotation in enumerate(crowd_annotations):
- cls = self.class_ids.index(annotation["category_id"])
- crowd_target[ix, 0:4] = annotation["clean_bbox"]
- crowd_target[ix, 4] = cls
- r = min(self.input_dim[0] / height, self.input_dim[1] / width)
- target[:, :4] *= r
- crowd_target[:, :4] *= r
- target_segmentation *= r
- initial_img_shape = (height, width)
- resized_img_shape = (int(height * r), int(width * r))
- file_name = img_metadata["file_name"] if "file_name" in img_metadata else "{:012}".format(img_id) + ".jpg"
- img_path = os.path.join(self.data_dir, self.subdir, file_name)
- img_id = self.sample_id_to_coco_id[sample_id]
- annotation = {
- "target": target,
- "crowd_target": crowd_target,
- "target_segmentation": target_segmentation,
- "initial_img_shape": initial_img_shape,
- "resized_img_shape": resized_img_shape,
- "img_path": img_path,
- "id": np.array([img_id]),
- }
- return annotation
- def remove_useless_info(coco, use_seg_info=False):
- """
- Remove useless info in coco dataset. COCO object is modified inplace.
- This function is mainly used for saving memory (save about 30% mem).
- """
- if isinstance(coco, COCO):
- dataset = coco.dataset
- dataset.pop("info", None)
- dataset.pop("licenses", None)
- for img in dataset["images"]:
- img.pop("license", None)
- img.pop("coco_url", None)
- img.pop("date_captured", None)
- img.pop("flickr_url", None)
- if "annotations" in coco.dataset and not use_seg_info:
- for anno in coco.dataset["annotations"]:
- anno.pop("segmentation", None)
|