Source code for tinyms.vision.utils

# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Bbox utils"""
import json
import math
import itertools as it
import numpy as np
from easydict import EasyDict as ed

ssd300_config = ed({
    "img_shape": [300, 300],
    "num_ssd_boxes": 1917,
    "match_threshold": 0.5,
    "nms_threshold": 0.6,
    "min_score": 0.7,
    "max_boxes": 100,

    # learing rate settings
    "lr_init": 0.001,
    "lr_end_rate": 0.001,
    "warmup_epochs": 2,
    "momentum": 0.9,
    "weight_decay": 1.5e-4,

    # network
    "num_default": [3, 6, 6, 6, 6, 6],
    "extras_in_channels": [256, 576, 1280, 512, 256, 256],
    "extras_out_channels": [576, 1280, 512, 256, 256, 128],
    "extras_strides": [1, 1, 2, 2, 2, 2],
    "extras_ratio": [0.2, 0.2, 0.2, 0.25, 0.5, 0.25],
    "feature_size": [19, 10, 5, 3, 2, 1],
    "min_scale": 0.2,
    "max_scale": 0.95,
    "aspect_ratios": [(), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3)],
    "steps": (16, 32, 64, 100, 150, 300),
    "prior_scaling": (0.1, 0.2),
    "gamma": 2.0,
    "alpha": 0.75,
})


class GenerateDefaultBoxes():
    """
    Generate Default boxes for SSD300, follows the order of (W, H, archor_sizes).
    `self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [y, x, h, w].
    `self.default_boxes_tlbr` has a shape as `self.default_boxes`, the last dimension is [y1, x1, y2, x2].
    """

    def __init__(self):
        fk = ssd300_config.img_shape[0] / np.array(ssd300_config.steps)
        scale_rate = (ssd300_config.max_scale - ssd300_config.min_scale) / (len(ssd300_config.num_default) - 1)
        scales = [ssd300_config.min_scale + scale_rate * i for i in range(len(ssd300_config.num_default))] + [1.0]
        self.default_boxes = []
        for idex, feature_size in enumerate(ssd300_config.feature_size):
            sk1 = scales[idex]
            sk2 = scales[idex + 1]
            sk3 = math.sqrt(sk1 * sk2)
            if idex == 0 and not ssd300_config.aspect_ratios[idex]:
                w, h = sk1 * math.sqrt(2), sk1 / math.sqrt(2)
                all_sizes = [(0.1, 0.1), (w, h), (h, w)]
            else:
                all_sizes = [(sk1, sk1)]
                for aspect_ratio in ssd300_config.aspect_ratios[idex]:
                    w, h = sk1 * math.sqrt(aspect_ratio), sk1 / math.sqrt(aspect_ratio)
                    all_sizes.append((w, h))
                    all_sizes.append((h, w))
                all_sizes.append((sk3, sk3))

            assert len(all_sizes) == ssd300_config.num_default[idex]

            for i, j in it.product(range(feature_size), repeat=2):
                for w, h in all_sizes:
                    cx, cy = (j + 0.5) / fk[idex], (i + 0.5) / fk[idex]
                    self.default_boxes.append([cy, cx, h, w])

        def to_tlbr(cy, cx, h, w):
            return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2

        # For IoU calculation
        self.default_boxes_tlbr = np.array(tuple(to_tlbr(*i) for i in self.default_boxes), dtype='float32')
        self.default_boxes = np.array(self.default_boxes, dtype='float32')


ssd_default_boxes_tlbr = GenerateDefaultBoxes().default_boxes_tlbr
ssd_default_boxes = GenerateDefaultBoxes().default_boxes


[docs]def ssd_bboxes_encode(boxes): r""" Labels anchors with ground truth inputs. Args: boxes (numpy.ndarray): Ground truth with shape [N, 5], for each row, it stores [ymin, xmin, ymax, xmax, cls]. Returns: numpy.ndarray, location ground truth with shape [num_anchors, 4]. numpy.ndarray, class ground truth with shape [num_anchors, 1]. numpy.ndarray, number of positives in an image. """ y1, x1, y2, x2 = np.split(ssd_default_boxes_tlbr[:, :4], 4, axis=-1) vol_anchors = (x2 - x1) * (y2 - y1) def jaccard_with_anchors(bbox): """Compute jaccard score a box and the anchors.""" # Intersection bbox and volume. ymin = np.maximum(y1, bbox[0]) xmin = np.maximum(x1, bbox[1]) ymax = np.minimum(y2, bbox[2]) xmax = np.minimum(x2, bbox[3]) w = np.maximum(xmax - xmin, 0.) h = np.maximum(ymax - ymin, 0.) # Volumes. inter_vol = h * w union_vol = vol_anchors + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - inter_vol jaccard = inter_vol / union_vol return np.squeeze(jaccard) pre_scores = np.zeros((ssd300_config.num_ssd_boxes), dtype=np.float32) t_boxes = np.zeros((ssd300_config.num_ssd_boxes, 4), dtype=np.float32) t_label = np.zeros((ssd300_config.num_ssd_boxes), dtype=np.int64) for bbox in boxes: # Add one for inserting background label label = int(bbox[4]) + 1 scores = jaccard_with_anchors(bbox) idx = np.argmax(scores) scores[idx] = 2.0 mask = (scores > ssd300_config.match_threshold) mask = mask & (scores > pre_scores) pre_scores = np.maximum(pre_scores, scores * mask) t_label = mask * label + (1 - mask) * t_label for i in range(4): t_boxes[:, i] = mask * bbox[i] + (1 - mask) * t_boxes[:, i] index = np.nonzero(t_label) # Transform to tlbr. bboxes = np.zeros((ssd300_config.num_ssd_boxes, 4), dtype=np.float32) bboxes[:, [0, 1]] = (t_boxes[:, [0, 1]] + t_boxes[:, [2, 3]]) / 2 bboxes[:, [2, 3]] = t_boxes[:, [2, 3]] - t_boxes[:, [0, 1]] # Encode features. bboxes_t = bboxes[index] default_boxes_t = ssd_default_boxes[index] bboxes_t[:, :2] = (bboxes_t[:, :2] - default_boxes_t[:, :2]) / \ (default_boxes_t[:, 2:] * ssd300_config.prior_scaling[0]) tmp = np.maximum(bboxes_t[:, 2:4] / default_boxes_t[:, 2:4], 0.000001) bboxes_t[:, 2:4] = np.log(tmp) / ssd300_config.prior_scaling[1] bboxes[index] = bboxes_t num_match = np.array([len(np.nonzero(t_label)[0])], dtype=np.int32) return bboxes, t_label.astype(np.int32), num_match
[docs]def ssd_bboxes_filter(boxes, box_scores, image_shape): r""" Filter predict boxes with minimum score and nms threshold. Args: boxes (numpy.ndarray): Ground truth with shape [N, 4], for each row, it stores [ymin, xmin, ymax, xmax]. box_scores (numpy.ndarray): Class scores with shape [N, 21]. image_shape (tuple): Shape of original image with the format [h, w]. Returns: list[list[float]], ground truth with shape [N, 4], for each row, it stores [ymin, xmin, ymax, xmax]. list[list[float]], class scores with shape [N, 21]. list[list[int]], class label with shape [N, 21]. """ final_boxes = [] final_label = [] final_score = [] h, w = image_shape # Ignore background(0) label class for c in range(1, box_scores.shape[1]): class_box_scores = box_scores[:, c] score_mask = class_box_scores > ssd300_config.min_score class_box_scores = class_box_scores[score_mask] class_boxes = boxes[score_mask] * [h, w, h, w] if score_mask.any(): nms_index = apply_nms(class_boxes, class_box_scores, ssd300_config.nms_threshold) class_boxes = class_boxes[nms_index] class_box_scores = class_box_scores[nms_index] final_boxes += class_boxes.tolist() final_score += class_box_scores.tolist() final_label += [c] * len(class_box_scores) return final_boxes, final_score, final_label
def jaccard_numpy(box_a, box_b): """Compute the jaccard overlap of two sets of boxes.""" def intersect(box_a, box_b): """Compute the intersect of two sets of boxes.""" max_yx = np.minimum(box_a[:, 2:4], box_b[2:4]) min_yx = np.maximum(box_a[:, :2], box_b[:2]) inter = np.clip((max_yx - min_yx), a_min=0, a_max=np.inf) return inter[:, 0] * inter[:, 1] inter = intersect(box_a, box_b) area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])) area_b = ((box_b[2] - box_b[0]) * (box_b[3] - box_b[1])) union = area_a + area_b - inter return inter / union def apply_nms(all_boxes, all_scores, thres=0.6, max_boxes=100): """Apply NMS to all bounding boxes.""" y1 = all_boxes[:, 0] x1 = all_boxes[:, 1] y2 = all_boxes[:, 2] x2 = all_boxes[:, 3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = all_scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) if len(keep) >= max_boxes: break xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= thres)[0] order = order[inds + 1] return keep
[docs]def coco_eval(pred_data, anno_file): """Calculate mAP of predicted bboxes.""" from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval predictions = [] img_ids = [] for sample in pred_data: pred_boxes = sample['boxes'] box_scores = sample['box_scores'] img_id = sample['img_id'] img_ids.append(img_id) final_pred = ssd_bboxes_filter(pred_boxes, box_scores, sample['image_shape']) for loc, score, label in zip(final_pred[0], final_pred[1], final_pred[2]): res = {} res['image_id'] = img_id res['bbox'] = [loc[1], loc[0], loc[3] - loc[1], loc[2] - loc[0]] res['score'] = score res['category_id'] = label predictions.append(res) with open('predictions.json', 'w') as f: json.dump(predictions, f) coco_gt = COCO(anno_file) coco_dt = coco_gt.loadRes('predictions.json') E = COCOeval(coco_gt, coco_dt, iouType='bbox') E.params.imgIds = img_ids E.evaluate() E.accumulate() E.summarize() return E.stats[0]