@@ -34,3 +34,4 @@ pygments>=2.7.4
 
                             stringcase>=1.2.0
                
 
                             numpy<=1.23
                
 
                             rapidfuzz
                
 
                            +json-tricks==3.16.1
                
 
            import numpy as np
from pycocotools.coco import COCO

from super_gradients.common.abstractions.abstract_logger import get_logger

logger = get_logger(__name__)

__all__ = ["check_keypoints_outside_image", "check_for_duplicate_annotations", "make_keypoints_outside_image_invisible", "remove_duplicate_annotations"]


def check_keypoints_outside_image(coco: COCO) -> None:
    """
    Check if there are any keypoints outside the image.
    :param coco:
    :return: None
    """
    for ann in coco.anns.values():
        keypoints = np.array(ann["keypoints"]).reshape(-1, 3)
        image_rows = coco.imgs[ann["image_id"]]["height"]
        image_cols = coco.imgs[ann["image_id"]]["width"]

        visible_joints = keypoints[:, 2] > 0
        joints_outside_image = (keypoints[:, 0] < 0) | (keypoints[:, 0] >= image_cols) | (keypoints[:, 1] < 0) | (keypoints[:, 1] >= image_rows)
        visible_joints_outside_image = visible_joints & joints_outside_image
        if visible_joints_outside_image.any():
            logger.warning(
                f"Annotation {ann['id']} for image {ann['image_id']} (width={image_cols}, height={image_rows}) "
                f"contains keypoints outside image boundary {keypoints[joints_outside_image]}. "
            )


def check_for_duplicate_annotations(coco: COCO, max_distance_threshold=2) -> None:
    """
    Check if there are any duplicate (overlapping) object annotations.
    :param coco:
    :param max_distance_threshold: Maximum average distance between keypoints of two instances to be considered as duplicate.
    :return: None
    """

    image_ids = list(coco.imgs.keys())
    for image_id in image_ids:
        ann_ids = coco.getAnnIds(imgIds=image_id)
        annotations = coco.loadAnns(ann_ids)

        joints = []
        for ann in annotations:
            keypoints = np.array(ann["keypoints"]).reshape(-1, 3)
            joints.append(keypoints[:, :2])

        if len(joints) == 0:
            continue

        gt_joints1 = np.expand_dims(joints, axis=0)  # [1, Num_people, Num_joints, 2]
        gt_joints2 = np.expand_dims(joints, axis=1)  # [Num_people, 1, Num_joints, 2]
        diff = np.sqrt(np.sum((gt_joints1 - gt_joints2) ** 2, axis=-1))  # [Num_people, Num_people, Num_joints]
        diffmean = np.mean(diff, axis=-1)

        duplicate_mask = np.triu(diffmean < max_distance_threshold, k=1)
        duplicate_indexes_i, duplicate_indexes_j = np.nonzero(duplicate_mask)

        for i, j in zip(duplicate_indexes_i, duplicate_indexes_j):
            logger.warning(f"Duplicate annotations for image {image_id}: {annotations[i]['id']} and {annotations[j]['id']}")


def make_keypoints_outside_image_invisible(coco: COCO):
    for ann in coco.anns.values():
        keypoints = np.array(ann["keypoints"]).reshape(-1, 3)
        image_rows = coco.imgs[ann["image_id"]]["height"]
        image_cols = coco.imgs[ann["image_id"]]["width"]

        visible_joints = keypoints[:, 2] > 0
        joints_outside_image = (keypoints[:, 0] < 0) | (keypoints[:, 0] >= image_cols) | (keypoints[:, 1] < 0) | (keypoints[:, 1] >= image_rows)
        visible_joints_outside_image = visible_joints & joints_outside_image
        if visible_joints_outside_image.any():
            logger.debug(
                f"Detected GT joints outside image size (width={image_cols}, height={image_rows}). "
                f"{keypoints[joints_outside_image]} for obj_id {ann['id']} image_id {ann['image_id']}. "
                f"Changing visibility to invisible."
            )
            keypoints[visible_joints_outside_image, 2] = 0

            ann["keypoints"] = [float(x) for x in keypoints.reshape(-1)]
    return coco


def remove_duplicate_annotations(coco: COCO):
    ann_to_remove = []

    image_ids = list(coco.imgs.keys())
    for image_id in image_ids:
        ann_ids = coco.getAnnIds(imgIds=image_id)
        annotations = coco.loadAnns(ann_ids)

        joints = []
        for ann in annotations:
            keypoints = np.array(ann["keypoints"]).reshape(-1, 3)
            joints.append(keypoints[:, :2])

        if len(joints) == 0:
            continue

        gt_joints1 = np.expand_dims(joints, axis=0)  # [1, Num_people, Num_joints, 2]
        gt_joints2 = np.expand_dims(joints, axis=1)  # [Num_people, 1, Num_joints, 2]
        diff = np.sqrt(np.sum((gt_joints1 - gt_joints2) ** 2, axis=-1))  # [Num_people, Num_people, Num_joints]
        diffmean = np.mean(diff, axis=-1)

        duplicate_mask = np.triu(diffmean < 2, k=1)
        duplicate_indexes_i, duplicate_indexes_j = np.nonzero(duplicate_mask)

        for j in duplicate_indexes_j:
            ann_to_remove.append(ann_ids[j])

    if len(ann_to_remove) > 0:
        logger.debug(f"Removing {len(ann_to_remove)} duplicate annotations")
        len_before = len(coco.dataset["annotations"])
        coco.dataset["annotations"] = [v for v in coco.dataset["annotations"] if v["id"] not in ann_to_remove]
        len_after = len(coco.dataset["annotations"])
        logger.debug(f"Removed {len_before - len_after} duplicate annotations")
        coco.createIndex()

    return coco


def remove_crowd_annotations(coco: COCO):
    ann_to_remove = []

    image_ids = list(coco.imgs.keys())
    for image_id in image_ids:
        ann_ids = coco.getAnnIds(imgIds=image_id)
        annotations = coco.loadAnns(ann_ids)

        for ann in annotations:
            if bool(ann["iscrowd"]):
                ann_to_remove.append(ann["id"])

    if len(ann_to_remove) > 0:
        logger.debug(f"Removing {len(ann_to_remove)} crowd annotations")
        len_before = len(coco.dataset["annotations"])
        coco.dataset["annotations"] = [v for v in coco.dataset["annotations"] if v["id"] not in ann_to_remove]
        len_after = len(coco.dataset["annotations"])
        logger.debug(f"Removed {len_before - len_after} crowd annotations")
        coco.createIndex()

    return coco

          
@@ -4,7 +4,7 @@ from super_gradients.training.metrics.classification_metrics import accuracy, Ac
 
                             from super_gradients.training.metrics.detection_metrics import DetectionMetrics, DetectionMetrics_050, DetectionMetrics_075, DetectionMetrics_050_095
                
 
                             from super_gradients.training.metrics.segmentation_metrics import PreprocessSegmentationMetricsArgs, PixelAccuracy, IoU, Dice, BinaryIOU, BinaryDice
                
 
                             from super_gradients.training.metrics.all_metrics import METRICS, Metrics
                
 
                            -
                
 
                            +from super_gradients.training.metrics.pose_estimation_metrics import PoseEstimationMetrics
                
 
                             __all__ = [
                
 
                                 "METRICS",
                
@@ -23,4 +23,5 @@ __all__ = [
 
                                 "DetectionMetrics_050",
                
 
                                 "DetectionMetrics_075",
                
 
                                 "DetectionMetrics_050_095",
                
 
                            +    "PoseEstimationMetrics",
                
 
                             ]
                
 
            import itertools
from typing import Dict, Union, List, Optional, Tuple, Callable, Iterable, Any

import numpy as np
import torch
from torch import Tensor
from torchmetrics import Metric

import super_gradients
from super_gradients.common.abstractions.abstract_logger import get_logger
from super_gradients.common.registry.registry import register_metric
from super_gradients.training.metrics.pose_estimation_utils import compute_img_keypoint_matching, compute_visible_bbox_xywh
from super_gradients.training.utils.detection_utils import compute_detection_metrics_per_cls

logger = get_logger(__name__)

__all__ = ["PoseEstimationMetrics"]


@register_metric("PoseEstimationMetrics")
class PoseEstimationMetrics(Metric):
    """
    Implementation of COCO Keypoint evaluation metric.
    When instantiated with default parameters, it will default to COCO params.
    By default, only AR and AP metrics are computed:

    >>> from super_gradients.training.metrics import PoseEstimationMetrics
    >>> metric = PoseEstimationMetrics(...)
    >>> metric.update(...)
    >>> metrics = metric.compute() # {"AP": 0.123, "AR": 0.456 }

    If you wish to get AR/AR at specific thresholds, you can specify them using `iou_thresholds_to_report` argument:

    >>> from super_gradients.training.metrics import PoseEstimationMetrics
    >>> metric = PoseEstimationMetrics(..., iou_thresholds_to_report=[0.5, 0.75])
    >>> metric.update(...)
    >>> metrics = metric.compute() # {"AP": 0.123, "AP_0.5": 0.222, "AP_0.75: 0.111, "AR": 0.456, "AR_0.5":0.212, "AR_0.75": 0.443 }

    """

    def __init__(
        self,
        post_prediction_callback: Callable[[Any], Tuple[Tensor, Tensor]],
        num_joints: int,
        max_objects_per_image: int = 20,
        oks_sigmas: Optional[Iterable] = None,
        iou_thresholds: Optional[Iterable] = None,
        recall_thresholds: Optional[Iterable] = None,
        iou_thresholds_to_report: Optional[Iterable] = None,
    ):
        """
        Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values.
        If you need to get additional metrics (AP at specific threshold), pass these thresholds via `iou_thresholds_to_report` argument.

        :param post_prediction_callback:  A callback to decode model predictions to poses. This should be callable that takes input (model predictions)
                                          and returns a tuple of (poses, scores)

        :param num_joints:                Number of joints per pose

        :param max_objects_per_image:     Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

        :param oks_sigmas:                OKS sigma factor for custom keypoint detection dataset.
                                          If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

        :param recall_thresholds:         List of recall thresholds to compute AP.
                                          If None, then will use default 101 recall thresholds from COCO in range [0..1]

        :param iou_thresholds:            List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

        :param: iou_thresholds_to_report: List of IoU thresholds to return in metric. By default, only AP/AR metrics are returned, but one
                                          may also request to return AP_0.5,AP_0.75,AR_0.5,AR_0.75 setting `iou_thresholds_to_report=[0.5, 0.75]`

        """
        super().__init__(dist_sync_on_step=False)
        self.num_joints = num_joints
        self.max_objects_per_image = max_objects_per_image
        self.stats_names = ["AP", "AR"]

        if recall_thresholds is None:
            recall_thresholds = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True, dtype=np.float32)
        self.recall_thresholds = torch.tensor(recall_thresholds, dtype=torch.float32)

        if iou_thresholds is None:
            iou_thresholds = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True, dtype=np.float32)
        self.iou_thresholds = torch.tensor(iou_thresholds, dtype=torch.float32)

        if iou_thresholds_to_report is not None:
            self.iou_thresholds_to_report = np.array([float(t) for t in iou_thresholds_to_report], dtype=np.float32)

            if not np.isin(self.iou_thresholds_to_report, self.iou_thresholds).all():
                missing = ~np.isin(self.iou_thresholds_to_report, self.iou_thresholds)
                raise RuntimeError(
                    f"One or many IoU thresholds to report are not present in IoU thresholds. Missing thresholds: {self.iou_thresholds_to_report[missing]}"
                )

            self.stats_names += [f"AP_{t:.2f}" for t in self.iou_thresholds_to_report]
            self.stats_names += [f"AR_{t:.2f}" for t in self.iou_thresholds_to_report]
        else:
            self.iou_thresholds_to_report = None

        self.greater_component_is_better = dict((k, True) for k in self.stats_names)

        if oks_sigmas is None:
            oks_sigmas = np.array([0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 1.07, 1.07, 0.87, 0.87, 0.89, 0.89]) / 10.0

        if len(oks_sigmas) != num_joints:
            raise ValueError(f"Length of oks_sigmas ({len(oks_sigmas)}) should be equal to num_joints {num_joints}")

        self.oks_sigmas = torch.tensor(oks_sigmas).float()

        self.component_names = list(self.greater_component_is_better.keys())
        self.components = len(self.component_names)

        self.post_prediction_callback = post_prediction_callback
        self.is_distributed = super_gradients.is_distributed()
        self.world_size = None
        self.rank = None
        self.add_state("predictions", default=[], dist_reduce_fx=None)

    def reset(self) -> None:
        self.predictions = []

    def update(
        self,
        preds,
        target,
        gt_joints: List[np.ndarray],
        gt_iscrowd: List[np.ndarray] = None,
        gt_bboxes: List[np.ndarray] = None,
        gt_areas: List[np.ndarray] = None,
    ):
        """
        Decode the predictions and update the metric

        :param preds :           Raw output of the model

        :param target:           Targets for the model training (rarely used for evaluation)

        :param gt_joints:        List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
                                 Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints.
                                 This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation,
                                 where predictions rescaled back to original size of the image.
                                 However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate
                                 system of the predicted image.

        :param gt_iscrowd:       Optional argument indicating which instance is annotated with `iscrowd` flog and is not used for evaluation;
                                 If not provided, all instances are considered as non-crowd targets.
                                 For instance, in CrowdPose all instances are considered as "non-crowd".

        :param gt_bboxes:        Bounding boxes of the groundtruth instances (XYWH).
                                 This is COCO-specific and is used in OKS computation for instances w/o visible keypoints.
                                 If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

        :param gt_areas:         Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box,
                                 so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation.
                                 If not provided, the area is computed as the product of the width and height of the bounding box.
                                 (For instance this is used in CrowdPose dataset)

        """
        predicted_poses, predicted_scores = self.post_prediction_callback(preds)  # Decode raw predictions into poses

        if gt_bboxes is None:
            gt_bboxes = [compute_visible_bbox_xywh(torch.tensor(joints[:, :, 0:2]), torch.tensor(joints[:, :, 2])) for joints in gt_joints]

        if gt_areas is None:
            gt_areas = [bboxes[:, 2] * bboxes[:, 3] for bboxes in gt_bboxes]

        if gt_iscrowd is None:
            gt_iscrowd = [[False] * len(x) for x in gt_joints]

        for i in range(len(predicted_poses)):
            self.update_single_image(
                predicted_poses[i], predicted_scores[i], gt_joints[i], gt_areas=gt_areas[i], gt_bboxes=gt_bboxes[i], gt_is_crowd=gt_iscrowd[i]
            )

    def update_single_image(
        self,
        predicted_poses: Union[Tensor, np.ndarray],
        predicted_scores: Union[Tensor, np.ndarray],
        groundtruths: Union[Tensor, np.ndarray],
        gt_bboxes: Union[Tensor, np.ndarray],
        gt_areas: Union[Tensor, np.ndarray],
        gt_is_crowd: Union[Tensor, np.ndarray, List[bool]],
    ):
        if len(predicted_poses) == 0 and len(groundtruths) == 0:
            return
        if len(predicted_poses) != len(predicted_scores):
            raise ValueError("Length of predicted poses and scores should be equal. Got {} and {}".format(len(predicted_poses), len(predicted_scores)))
        if len(groundtruths) != len(gt_areas) != len(gt_bboxes) != len(gt_is_crowd):
            raise ValueError(
                "Length of groundtruths, areas, bboxes and iscrowd should be equal. Got {} and {} and {} and {}".format(
                    len(groundtruths), len(gt_areas), len(gt_bboxes), len(gt_is_crowd)
                )
            )

        predicted_poses = torch.tensor(predicted_poses, dtype=torch.float, device=self.device)
        predicted_scores = torch.tensor(predicted_scores, dtype=torch.float, device=self.device)

        gt_keypoints = torch.tensor(groundtruths, dtype=torch.float, device=self.device)
        gt_areas = torch.tensor(gt_areas, dtype=torch.float, device=self.device)
        gt_bboxes = torch.tensor(gt_bboxes, dtype=torch.float, device=self.device)

        gt_keypoints_xy = gt_keypoints[:, :, 0:2]
        gt_keypoints_visibility = gt_keypoints[:, :, 2]
        gt_all_kpts_invisible = gt_keypoints_visibility.eq(0).all(dim=1)
        gt_is_crowd = torch.tensor(gt_is_crowd, dtype=torch.bool, device=self.device)
        gt_is_ignore = gt_all_kpts_invisible | gt_is_crowd

        targets = gt_keypoints_xy[~gt_is_ignore] if len(groundtruths) else []
        targets_visibilities = gt_keypoints_visibility[~gt_is_ignore] if len(groundtruths) else []
        targets_areas = gt_areas[~gt_is_ignore] if len(groundtruths) else []
        targets_bboxes = gt_bboxes[~gt_is_ignore]
        targets_ignored = gt_is_ignore[~gt_is_ignore]

        crowd_targets = gt_keypoints_xy[gt_is_ignore] if len(groundtruths) else []
        crowd_visibilities = gt_keypoints_visibility[gt_is_ignore] if len(groundtruths) else []
        crowd_targets_areas = gt_areas[gt_is_ignore]
        crowd_targets_bboxes = gt_bboxes[gt_is_ignore]

        preds_matched, preds_to_ignore, preds_scores, num_targets = compute_img_keypoint_matching(
            predicted_poses,
            predicted_scores,
            #
            targets=targets,
            targets_visibilities=targets_visibilities,
            targets_areas=targets_areas,
            targets_bboxes=targets_bboxes,
            targets_ignored=targets_ignored,
            #
            crowd_targets=crowd_targets,
            crowd_visibilities=crowd_visibilities,
            crowd_targets_areas=crowd_targets_areas,
            crowd_targets_bboxes=crowd_targets_bboxes,
            #
            iou_thresholds=self.iou_thresholds.to(self.device),
            sigmas=self.oks_sigmas.to(self.device),
            top_k=self.max_objects_per_image,
        )

        self.predictions.append((preds_matched, preds_to_ignore, preds_scores, num_targets))

    def _sync_dist(self, dist_sync_fn=None, process_group=None):
        """
        When in distributed mode, stats are aggregated after each forward pass to the metric state. Since these have all
        different sizes we override the synchronization function since it works only for tensors (and use
        all_gather_object)
        @param dist_sync_fn:
        @return:
        """
        if self.world_size is None:
            self.world_size = torch.distributed.get_world_size() if self.is_distributed else -1
        if self.rank is None:
            self.rank = torch.distributed.get_rank() if self.is_distributed else -1

        if self.is_distributed:
            local_state_dict = self.predictions
            gathered_state_dicts = [None] * self.world_size
            torch.distributed.barrier()
            torch.distributed.all_gather_object(gathered_state_dicts, local_state_dict)
            self.predictions = list(itertools.chain(*gathered_state_dicts))

    def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
        """Compute the metrics for all the accumulated results.
        :return: Metrics of interest
        """
        T = len(self.iou_thresholds)
        K = 1  # num categories

        precision = -np.ones((T, K))
        recall = -np.ones((T, K))

        predictions = self.predictions  # All gathered by this time
        if len(predictions) > 0:
            preds_matched = torch.cat([x[0] for x in predictions], dim=0)
            preds_to_ignore = torch.cat([x[1] for x in predictions], dim=0)
            preds_scores = torch.cat([x[2] for x in predictions], dim=0)
            n_targets = sum([x[3] for x in predictions])

            cls_precision, _, cls_recall = compute_detection_metrics_per_cls(
                preds_matched=preds_matched,
                preds_to_ignore=preds_to_ignore,
                preds_scores=preds_scores,
                n_targets=n_targets,
                recall_thresholds=self.recall_thresholds.to(self.device),
                score_threshold=0,
                device=self.device,
            )

            precision[:, 0] = cls_precision.cpu().numpy()
            recall[:, 0] = cls_recall.cpu().numpy()

        def summarize(s):
            if len(s[s > -1]) == 0:
                mean_s = -1
            else:
                mean_s = np.mean(s[s > -1])

            return mean_s

        metrics = {"AP": summarize(precision), "AR": summarize(recall)}

        if self.iou_thresholds_to_report is not None and len(self.iou_thresholds_to_report):
            for t in self.iou_thresholds_to_report:
                mask = np.where(t == self.iou_thresholds)[0]
                metrics[f"AP_{t:.2f}"] = summarize(precision[mask])
                metrics[f"AR_{t:.2f}"] = summarize(recall[mask])

        return metrics

          
 
            from typing import Tuple

import numpy as np
import torch
from torch import Tensor


def compute_visible_bbox_xywh(joints: Tensor, visibility_mask: Tensor) -> np.ndarray:
    """
    Compute the bounding box (X,Y,W,H) of the visible joints for each instance.

    :param joints:  [Num Instances, Num Joints, 2+] last channel must have dimension of
                    at least 2 that is considered to contain (X,Y) coordinates of the keypoint
    :param visibility_mask: [Num Instances, Num Joints]
    :return: A numpy array [Num Instances, 4] where last dimension contains bbox in format XYWH
    """
    visibility_mask = visibility_mask > 0
    initial_value = 1_000_000

    x1 = torch.min(joints[:, :, 0], where=visibility_mask, initial=initial_value, dim=-1)
    y1 = torch.min(joints[:, :, 1], where=visibility_mask, initial=initial_value, dim=-1)

    x1[x1 == initial_value] = 0
    y1[y1 == initial_value] = 0

    x2 = torch.max(joints[:, :, 0], where=visibility_mask, initial=0, dim=-1)
    y2 = torch.max(joints[:, :, 1], where=visibility_mask, initial=0, dim=-1)

    w = x2 - x1
    h = y2 - y1

    return torch.stack([x1, y1, w, h], dim=-1)


def compute_oks(
    pred_joints: Tensor,
    gt_joints: Tensor,
    gt_keypoint_visibility: Tensor,
    sigmas: Tensor,
    gt_areas: Tensor = None,
    gt_bboxes: Tensor = None,
) -> np.ndarray:
    """

    :param pred_joints: [K, NumJoints, 2] or [K, NumJoints, 3]
    :param pred_scores: [K]
    :param gt_joints:   [M, NumJoints, 2]
    :param gt_keypoint_visibility: [M, NumJoints]
    :param gt_areas: [M] Area of each ground truth instance. COCOEval uses area of the instance mask to scale OKs, so it must be provided separately.
        If None, we will use area of bounding box of each instance computed from gt_joints.

    :param gt_bboxes: [M, 4] Bounding box (X,Y,W,H) of each ground truth instance. If None, we will use bounding box of each instance computed from gt_joints.
    :param sigmas: [NumJoints]
    :return: IoU matrix [K, M]
    """

    ious = torch.zeros((len(pred_joints), len(gt_joints)), device=pred_joints.device)
    vars = (sigmas * 2) ** 2

    if gt_bboxes is None:
        gt_bboxes = compute_visible_bbox_xywh(gt_joints, gt_keypoint_visibility)

    if gt_areas is None:
        gt_areas = gt_bboxes[:, 2] * gt_bboxes[:, 3]

    # compute oks between each detection and ground truth object
    for gt_index, (gt_keypoints, gt_keypoint_visibility, gt_bbox, gt_area) in enumerate(zip(gt_joints, gt_keypoint_visibility, gt_bboxes, gt_areas)):
        # create bounds for ignore regions(double the gt bbox)
        xg = gt_keypoints[:, 0]
        yg = gt_keypoints[:, 1]
        k1 = torch.count_nonzero(gt_keypoint_visibility > 0)

        x0 = gt_bbox[0] - gt_bbox[2]
        x1 = gt_bbox[0] + gt_bbox[2] * 2
        y0 = gt_bbox[1] - gt_bbox[3]
        y1 = gt_bbox[1] + gt_bbox[3] * 2

        for pred_index, pred_keypoints in enumerate(pred_joints):
            xd = pred_keypoints[:, 0]
            yd = pred_keypoints[:, 1]
            if k1 > 0:
                # measure the per-keypoint distance if keypoints visible
                dx = xd - xg
                dy = yd - yg
            else:
                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                dx = (x0 - xd).clamp_min(0) + (xd - x1).clamp_min(0)
                dy = (y0 - yd).clamp_min(0) + (yd - y1).clamp_min(0)

            e = (dx**2 + dy**2) / vars / (gt_area + torch.finfo(torch.float64).eps) / 2

            if k1 > 0:
                e = e[gt_keypoint_visibility > 0]
            ious[pred_index, gt_index] = torch.sum(torch.exp(-e)) / e.shape[0]

    return ious


def compute_img_keypoint_matching(
    preds: Tensor,
    pred_scores: Tensor,
    targets: Tensor,
    targets_visibilities: Tensor,
    targets_areas: Tensor,
    targets_bboxes: Tensor,
    targets_ignored: Tensor,
    crowd_targets: Tensor,
    crowd_visibilities: Tensor,
    crowd_targets_areas: Tensor,
    crowd_targets_bboxes: Tensor,
    iou_thresholds: torch.Tensor,
    sigmas: Tensor,
    top_k: int,
) -> Tuple[Tensor, Tensor, Tensor, int]:
    """
    Match predictions and the targets (ground truth) with respect to IoU and confidence score for a given image.

    :param preds:            Tensor of shape (K, NumJoints, 3) - Array of predicted skeletons.
                             Last dimension encode X,Y and confidence score of each joint

    :param pred_scores:      Tensor of shape (K) - Confidence scores for each pose

    :param targets:          Targets joints (M, NumJoints, 2) - Array of groundtruth skeletons

    :param targets_visibilities: Visibility status for each keypoint (M, NumJoints).
                             Values are 0 - invisible, 1 - occluded, 2 - fully visible

    :param targets_areas:    Tensor of shape (M) - Areas of target objects

    :param targets_bboxes:   Tensor of shape (M,4) - Bounding boxes (XYWH) of targets

    :param targets_ignored:  Tensor of shape (M) - Array of target that marked as ignored
                             (E.g all keypoints are not visible or target does not fit the desired area range)

    :param crowd_targets:    Targets joints (Mc, NumJoints, 3) - Array of groundtruth skeletons
                             Last dimension encode X,Y and visibility score of each joint:
                             (0 - invisible, 1 - occluded, 2 - fully visible)

    :param crowd_visibilities: Visibility status for each keypoint of crowd targets (Mc, NumJoints).
                             Values are 0 - invisible, 1 - occluded, 2 - fully visible

    :param crowd_targets_areas: Tensor of shape (Mc) - Areas of target objects

    :param crowd_targets_bboxes: Tensor of shape (Mc, 4) - Bounding boxes (XYWH) of crowd targets

    :param iou_thresholds:  IoU Threshold to compute the mAP

    :param sigmas:          Tensor of shape (NumJoints) with sigmas for each joint. Sigma value represent how 'hard'
                            it is to locate the exact groundtruth position of the joint.

    :param top_k:           Number of predictions to keep, ordered by confidence score

    :return:
        :preds_matched:     Tensor of shape (min(top_k, len(preds)), n_iou_thresholds)
                                True when prediction (i) is matched with a target with respect to the (j)th IoU threshold

        :preds_to_ignore:   Tensor of shape (min(top_k, len(preds)), n_iou_thresholds)
                                True when prediction (i) is matched with a crowd target with respect to the (j)th IoU threshold

        :preds_scores:      Tensor of shape (min(top_k, len(preds))) with scores of top-k predictions

        :num_targets:       Number of groundtruth targets (total num targets minus number of ignored)

    """
    num_iou_thresholds = len(iou_thresholds)

    device = preds.device if torch.is_tensor(preds) else (targets.device if torch.is_tensor(targets) else "cpu")

    if preds is None or len(preds) == 0:
        preds_matched = torch.zeros((0, num_iou_thresholds), dtype=torch.bool, device=device)
        preds_to_ignore = torch.zeros((0, num_iou_thresholds), dtype=torch.bool, device=device)
        preds_scores = torch.zeros((0,), dtype=torch.float, device=device)
        return preds_matched, preds_to_ignore, preds_scores, len(targets)

    preds_matched = torch.zeros(len(preds), num_iou_thresholds, dtype=torch.bool, device=device)
    targets_matched = torch.zeros(len(targets), num_iou_thresholds, dtype=torch.bool, device=device)
    preds_to_ignore = torch.zeros(len(preds), num_iou_thresholds, dtype=torch.bool, device=device)

    # Ignore all but the predictions that were top_k
    k = min(top_k, len(pred_scores))
    preds_idx_to_use = torch.topk(pred_scores, k=k, sorted=True, largest=True).indices
    preds_to_ignore[:, :] = True
    preds_to_ignore[preds_idx_to_use] = False

    if len(targets) > 0:
        iou = compute_oks(preds[preds_idx_to_use], targets, targets_visibilities, sigmas, gt_areas=targets_areas, gt_bboxes=targets_bboxes)

        # The matching priority is first detection confidence and then IoU value.
        # The detection is already sorted by confidence in NMS, so here for each prediction we order the targets by iou.
        sorted_iou, target_sorted = iou.sort(descending=True, stable=True)

        # Only iterate over IoU values higher than min threshold to speed up the process
        for pred_selected_i, target_sorted_i in (sorted_iou > iou_thresholds[0]).nonzero(as_tuple=False):

            # pred_selected_i and target_sorted_i are relative to filters/sorting, so we extract their absolute indexes
            pred_i = preds_idx_to_use[pred_selected_i]
            target_i = target_sorted[pred_selected_i, target_sorted_i]

            # Vector[j], True when IoU(pred_i, target_i) is above the (j)th threshold
            is_iou_above_threshold = sorted_iou[pred_selected_i, target_sorted_i] > iou_thresholds

            # Vector[j], True when both pred_i and target_i are not matched yet for the (j)th threshold
            are_candidates_free = torch.logical_and(~preds_matched[pred_i, :], ~targets_matched[target_i, :])

            # Vector[j], True when (pred_i, target_i) can be matched for the (j)th threshold
            are_candidates_good = torch.logical_and(is_iou_above_threshold, are_candidates_free)

            is_matching_with_ignore = are_candidates_free & are_candidates_good & targets_ignored[target_i]

            if preds_matched[pred_i].any() and is_matching_with_ignore.any():
                continue

            # For every threshold (j) where target_i and pred_i can be matched together ( are_candidates_good[j]==True )
            # fill the matching placeholders with True
            targets_matched[target_i, are_candidates_good] = True
            preds_matched[pred_i, are_candidates_good] = True

            preds_to_ignore[pred_i] = torch.logical_or(preds_to_ignore[pred_i], is_matching_with_ignore)

            # When all the targets are matched with a prediction for every IoU Threshold, stop.
            if targets_matched.all():
                break

    # Crowd targets can be matched with many predictions.
    # Therefore, for every prediction we just need to check if it has IoA large enough with any crowd target.
    if len(crowd_targets) > 0:
        # shape = (n_preds_to_use x n_crowd_targets)
        ioa = compute_oks(
            preds[preds_idx_to_use],
            crowd_targets,
            crowd_visibilities,
            sigmas,
            gt_areas=crowd_targets_areas,
            gt_bboxes=crowd_targets_bboxes,
        )

        # For each prediction, we keep it's highest score with any crowd target (of same class)
        # shape = (n_preds_to_use)
        best_ioa, _ = ioa.max(1)

        # If a prediction has IoA higher than threshold (with any target of same class), then there is a match
        # shape = (n_preds_to_use x iou_thresholds)
        is_matching_with_crowd = best_ioa.view(-1, 1) > iou_thresholds.view(1, -1)

        preds_to_ignore[preds_idx_to_use] = torch.logical_or(preds_to_ignore[preds_idx_to_use], is_matching_with_crowd)

    # return preds_matched, preds_to_ignore, pred_scores, len(targets)
    num_targets = len(targets) - torch.count_nonzero(targets_ignored)
    return preds_matched[preds_idx_to_use], preds_to_ignore[preds_idx_to_use], pred_scores[preds_idx_to_use], num_targets.item()

          
@@ -57,6 +57,7 @@ from tests.unit_tests.config_inspector_test import ConfigInspectTest
 
                             from tests.unit_tests.repvgg_block_tests import TestRepVGGBlock
                
 
                             from tests.unit_tests.training_utils_test import TestTrainingUtils
                
 
                             from tests.unit_tests.dekr_loss_test import DEKRLossTest
                
 
                            +from tests.unit_tests.pose_estimation_metrics_test import TestPoseEstimationMetrics
                
 
                             class CoreUnitTestSuiteRunner:
                
@@ -125,6 +126,7 @@ class CoreUnitTestSuiteRunner:
 
                                     self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestTransforms))
                
 
                                     self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestPPYOLOE))
                
 
                                     self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(DEKRLossTest))
                
 
                            +        self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestPoseEstimationMetrics))
                
 
                                 def _add_modules_to_end_to_end_tests_suite(self):
                
 
                                     """
                
 
            import collections
import os.path
import random
import tempfile
import unittest
from pprint import pprint
from typing import List, Tuple

import json_tricks as json
import numpy as np
import torch.cuda
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

from super_gradients.training.datasets.pose_estimation_datasets.coco_utils import (
    remove_duplicate_annotations,
    make_keypoints_outside_image_invisible,
    remove_crowd_annotations,
)
from super_gradients.training.metrics.pose_estimation_metrics import PoseEstimationMetrics


class TestPoseEstimationMetrics(unittest.TestCase):
    def _load_coco_groundtruth(self, with_crowd: bool, with_duplicates: bool, with_invisible_keypoitns: bool):
        gt_annotations_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data/coco2017/annotations/person_keypoints_val2017.json")
        assert os.path.isfile(gt_annotations_path)

        gt = COCO(gt_annotations_path)
        if not with_duplicates:
            gt = remove_duplicate_annotations(gt)

        if not with_invisible_keypoitns:
            gt = make_keypoints_outside_image_invisible(gt)

        if not with_crowd:
            gt = remove_crowd_annotations(gt)

        return gt

    def _internal_compare_method(self, with_crowd: bool, with_duplicates: bool, with_invisible_keypoitns: bool, device: str):
        random.seed(0)
        np.random.seed(0)

        # Load groundtruth annotations
        gt = self._load_coco_groundtruth(with_crowd, with_duplicates, with_invisible_keypoitns)

        # Generate predictions by randomly dropping some instances and adding noise to remaining poses
        (
            predicted_poses,
            predicted_scores,
            groundtruths_poses,
            groundtruths_iscrowd,
            groundtruths_areas,
            groundtruths_bboxes,
            image_ids,
        ) = self.generate_noised_predictions(gt, instance_drop_probability=0.1, pose_offset=1)

        # Compute metrics using SG implementation
        def convert_predictions_to_target_format(preds):
            # This is out predictions decode function. Here it's no-op since we pass decoded predictions as the input
            # but in real life this post-processing callback should be doing actual pose decoding & NMS
            return preds

        sg_metrics = PoseEstimationMetrics(
            post_prediction_callback=convert_predictions_to_target_format,
            num_joints=17,
            max_objects_per_image=20,
            iou_thresholds_to_report=(0.5, 0.75),
        ).to(device)

        sg_metrics.update(
            preds=(predicted_poses, predicted_scores),
            target=None,
            gt_joints=groundtruths_poses,
            gt_iscrowd=groundtruths_iscrowd,
            gt_areas=groundtruths_areas,
            gt_bboxes=groundtruths_bboxes,
        )

        actual_metrics = sg_metrics.compute()
        pprint(actual_metrics)

        coco_pred = self._coco_convert_predictions_to_dict(predicted_poses, predicted_scores, image_ids)

        with tempfile.TemporaryDirectory() as td:
            res_file = os.path.join(td, "keypoints_coco2017_results.json")

            with open(res_file, "w") as f:
                json.dump(coco_pred, f, sort_keys=True, indent=4)

            coco_dt = self._load_coco_groundtruth(with_crowd, with_duplicates, with_invisible_keypoitns)
            coco_dt = coco_dt.loadRes(res_file)

            coco_evaluator = COCOeval(gt, coco_dt, iouType="keypoints")
            coco_evaluator.evaluate()  # run per image evaluation
            coco_evaluator.accumulate()  # accumulate per image results
            coco_evaluator.summarize()  # display summary metrics of results
            expected_metrics = coco_evaluator.stats

        self.assertAlmostEquals(expected_metrics[0], actual_metrics["AP"], delta=0.002)
        self.assertAlmostEquals(expected_metrics[5], actual_metrics["AR"], delta=0.002)

    def test_compare_pycocotools_with_our_implementation_no_crowd(self):
        for device in ["cuda", "cpu"] if torch.cuda.is_available() else ["cpu"]:
            self._internal_compare_method(False, True, True, device)

    def test_compare_pycocotools_with_our_implementation_no_duplicates(self):
        for device in ["cuda", "cpu"] if torch.cuda.is_available() else ["cpu"]:
            self._internal_compare_method(True, False, True, device)

    def test_compare_pycocotools_with_our_implementation_no_invisible(self):
        for device in ["cuda", "cpu"] if torch.cuda.is_available() else ["cpu"]:
            self._internal_compare_method(True, True, False, device)

    def test_metric_works_on_empty_predictions(self):
        # Compute metrics using SG implementation
        def convert_predictions_to_target_format(preds):
            # This is out predictions decode function. Here it's no-op since we pass decoded predictions as the input
            # but in real life this post-processing callback should be doing actual pose decoding & NMS
            return preds

        sg_metrics = PoseEstimationMetrics(
            post_prediction_callback=convert_predictions_to_target_format,
            num_joints=17,
            max_objects_per_image=20,
            iou_thresholds=None,
            oks_sigmas=None,
        )

        actual_metrics = sg_metrics.compute()
        pprint(actual_metrics)

        self.assertEqual(-1, actual_metrics["AP"])
        self.assertEqual(-1, actual_metrics["AR"])

    def generate_noised_predictions(self, coco: COCO, instance_drop_probability: float, pose_offset: float) -> Tuple[List, List, List]:
        """

        :param coco:
        :return: List of tuples (poses, image_id)
        """
        image_ids = []

        predicted_poses = []
        predicted_scores = []

        groundtruths_poses = []
        groundtruths_iscrowd = []
        groundtruths_areas = []
        groundtruths_bboxes = []

        for image_id, image_info in coco.imgs.items():
            image_id_int = int(image_id)
            image_width = image_info["width"]
            image_height = image_info["height"]

            ann_ids = coco.getAnnIds(imgIds=image_id_int)
            anns = coco.loadAnns(ann_ids)

            image_pred_keypoints = []
            image_gt_keypoints = []
            image_gt_iscrowd = []
            image_gt_areas = []
            image_gt_bboxes = []

            for ann in anns:
                gt_keypoints = np.array(ann["keypoints"]).reshape(-1, 3).astype(np.float32)

                image_gt_keypoints.append(gt_keypoints)
                image_gt_iscrowd.append(ann["iscrowd"])
                image_gt_areas.append(ann["area"])
                image_gt_bboxes.append(ann["bbox"])

                if np.random.rand() < instance_drop_probability:
                    continue

                keypoints = gt_keypoints.copy()
                if pose_offset > 0:
                    keypoints[:, 0] += (2 * np.random.randn() - 1) * pose_offset
                    keypoints[:, 1] += (2 * np.random.randn() - 1) * pose_offset

                    keypoints[:, 0] = np.clip(keypoints[:, 0], 0, image_width)
                    keypoints[:, 1] = np.clip(keypoints[:, 1], 0, image_height)

                    # Apply random score for visible keypoints
                    keypoints[:, 2] = (keypoints[:, 2] > 0) * np.random.randn(len(keypoints))

                image_pred_keypoints.append(keypoints)

            image_ids.append(image_id_int)
            predicted_poses.append(image_pred_keypoints)
            predicted_scores.append(np.random.rand(len(image_pred_keypoints)))

            groundtruths_poses.append(image_gt_keypoints)
            groundtruths_iscrowd.append(np.array(image_gt_iscrowd, dtype=bool))
            groundtruths_areas.append(np.array(image_gt_areas))
            groundtruths_bboxes.append(np.array(image_gt_bboxes))

        return predicted_poses, predicted_scores, groundtruths_poses, groundtruths_iscrowd, groundtruths_areas, groundtruths_bboxes, image_ids

    def _coco_convert_predictions_to_dict(self, predicted_poses, predicted_scores, image_ids):
        kpts = collections.defaultdict(list)
        for poses, scores, image_id_int in zip(predicted_poses, predicted_scores, image_ids):

            for person_index, kpt in enumerate(poses):
                area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (np.max(kpt[:, 1]) - np.min(kpt[:, 1]))
                kpt = self._coco_process_keypoints(kpt)
                kpts[image_id_int].append({"keypoints": kpt[:, 0:3], "score": float(scores[person_index]), "image": image_id_int, "area": area})

        oks_nmsed_kpts = []
        # image x person x (keypoints)
        for img in kpts.keys():
            # person x (keypoints)
            img_kpts = kpts[img]
            # person x (keypoints)
            # do not use nms, keep all detections
            keep = []
            if len(keep) == 0:
                oks_nmsed_kpts.append(img_kpts)
            else:
                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])

        classes = ["__background__", "person"]
        _class_to_coco_ind = {cls: i for i, cls in enumerate(classes)}

        data_pack = [
            {"cat_id": _class_to_coco_ind[cls], "cls_ind": cls_ind, "cls": cls, "ann_type": "keypoints", "keypoints": oks_nmsed_kpts}
            for cls_ind, cls in enumerate(classes)
            if not cls == "__background__"
        ]

        results = self._coco_keypoint_results_one_category_kernel(data_pack[0], num_joints=17)
        return results

    def _coco_keypoint_results_one_category_kernel(self, data_pack, num_joints: int):
        cat_id = data_pack["cat_id"]
        keypoints = data_pack["keypoints"]
        cat_results = []

        for img_kpts in keypoints:
            if len(img_kpts) == 0:
                continue

            _key_points = np.array([img_kpts[k]["keypoints"] for k in range(len(img_kpts))])
            key_points = np.zeros((_key_points.shape[0], num_joints * 3), dtype=np.float32)

            for ipt in range(num_joints):
                key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0]
                key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1]
                # keypoints score.
                key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2]

            for k in range(len(img_kpts)):
                kpt = key_points[k].reshape((num_joints, 3))
                left_top = np.amin(kpt, axis=0)
                right_bottom = np.amax(kpt, axis=0)

                w = right_bottom[0] - left_top[0]
                h = right_bottom[1] - left_top[1]

                cat_results.append(
                    {
                        "image_id": img_kpts[k]["image"],
                        "category_id": cat_id,
                        "keypoints": list(key_points[k]),
                        "score": img_kpts[k]["score"],
                        "bbox": list([left_top[0], left_top[1], w, h]),
                    }
                )

        return cat_results

    def _coco_process_keypoints(self, keypoints):
        tmp = keypoints.copy()
        if keypoints[:, 2].max() > 0:
            num_keypoints = keypoints.shape[0]
            for i in range(num_keypoints):
                tmp[i][0:3] = [float(keypoints[i][0]), float(keypoints[i][1]), float(keypoints[i][2])]

        return tmp


if __name__ == "__main__":
    unittest.main()