@@ -1,6 +1,12 @@
 
           version: 2.1
            
 
           
            
 
           parameters:
            
 
          +  ad_hoc_container_build:
            
 
          +    type: boolean
            
 
          +    default: false
            
 
          +  ad_hoc_container_build_code_only:
            
 
          +    type: boolean
            
 
          +    default: false
            
 
             remote_docker_version:
            
 
               type: string
            
 
               description: remote docker version
            
@@ -12,7 +18,7 @@ parameters:
 
             orb_version:
            
 
               type: string
            
 
               description: Deci ai ORB version https://circleci.com/developer/orbs/orb/deci-ai/circleci-common-orb
            
 
          -    default: "10.5.0"
            
 
          +    default: "10.5.1"
            
 
           #    default: "dev:alpha"
            
 
           
            
 
           orbs:
            
@@ -42,6 +48,47 @@ release_candidate_tag_filter: &release_candidate_tag_filter
 
                 only: /^\d+\.\d+\.\d+rc\d+/
            
 
           
            
 
           commands:
            
 
          +  build_and_publish_command:
            
 
          +    parameters:
            
 
          +      repo_name:
            
 
          +        type: string
            
 
          +      docker_context:
            
 
          +        type: string
            
 
          +      image_tag:
            
 
          +        type: string
            
 
          +      additional_tags:
            
 
          +        type: string
            
 
          +      build_args:
            
 
          +        type: string
            
 
          +        default: ""
            
 
          +      dockerfile:
            
 
          +        type: string
            
 
          +        default: "Dockerfile"
            
 
          +    steps:
            
 
          +      - checkout
            
 
          +      - attach_workspace:
            
 
          +          at: ~/
            
 
          +      - run:
            
 
          +          name: Put config dir in repo context
            
 
          +          command: |
            
 
          +            if [ -d ~/.config ]; then
            
 
          +              echo "found a .config directory, copying to repo dir"
            
 
          +              cp -r  ~/.config ~/project/<< parameters.docker_context >>
            
 
          +            fi
            
 
          +      - deci-common/ecr_login_dev
            
 
          +      - deci-common/container_image_build:
            
 
          +          context: << parameters.docker_context >>
            
 
          +          working_directory: "."
            
 
          +          repository_name: << parameters.repo_name >>
            
 
          +          image_tag: << parameters.image_tag >>
            
 
          +          dockerfile: << parameters.dockerfile >>
            
 
          +          build_args: << parameters.build_args >>
            
 
          +      #          build_args: "PYTHON_VERSION=3.8 SG_VERSION=3.0.7"
            
 
          +      - deci-common/push_docker_image_aws_dev:
            
 
          +          repository_name: << parameters.repo_name >>
            
 
          +          image_tag: << parameters.image_tag >>
            
 
          +          additional_tags: << parameters.additional_tags >>
            
 
          +
            
 
             get_beta_and_rc_tags:
            
 
               description: "getting beta and rc tag (if exist) according to ouir convention"
            
 
               steps:
            
@@ -607,8 +654,60 @@ jobs:
 
                     command: "rm -r << parameters.sg_new_env_name >>"
            
 
                     when: on_fail
            
 
           
            
 
          +  docker-build-and-publish-branch:
            
 
          +    docker:
            
 
          +      - image: cimg/base:stable-20.04
            
 
          +    parameters:
            
 
          +      repo_name:
            
 
          +        type: string
            
 
          +        default: "deci/super-gradients"
            
 
          +      docker_context:
            
 
          +        type: string
            
 
          +        default: "."
            
 
          +      additional_tags:
            
 
          +        type: string
            
 
          +        default: ''
            
 
          +    steps:
            
 
          +      - setup_remote_docker:
            
 
          +          version: 20.10.7
            
 
          +          docker_layer_caching: true
            
 
          +      - deci-common/container_image_lint_tag:
            
 
          +          image_tag: "${CIRCLE_BRANCH}"
            
 
          +      - run:
            
 
          +          command: |
            
 
          +            ADDITIONAL_TAGS="<< parameters.additional_tags >>"
            
 
          +            echo "export ADDITIONAL_TAGS=${ADDITIONAL_TAGS}" >> $BASH_ENV
            
 
          +      - run:
            
 
          +          command: |
            
 
          +            source $BASH_ENV
            
 
          +            echo "$CONTAINER_LINT_TAG"
            
 
          +            echo "$ADDITIONAL_TAGS"
            
 
          +      - when:
            
 
          +          condition: << pipeline.parameters.ad_hoc_container_build_code_only >>
            
 
          +          steps:
            
 
          +            - build_and_publish_command:
            
 
          +                repo_name: << parameters.repo_name >>
            
 
          +                docker_context: << parameters.docker_context >>
            
 
          +                image_tag: $CONTAINER_LINT_TAG
            
 
          +                additional_tags: $ADDITIONAL_TAGS
            
 
          +                dockerfile: 'scripts/Dockerfile.branch.code'
            
 
          +                build_args: "BASE_TAG=$CONTAINER_LINT_TAG"
            
 
          +      - unless:
            
 
          +          condition: << pipeline.parameters.ad_hoc_container_build_code_only >>
            
 
          +          steps:
            
 
          +            - build_and_publish_command:
            
 
          +                repo_name: << parameters.repo_name >>
            
 
          +                docker_context: << parameters.docker_context >>
            
 
          +                image_tag: $CONTAINER_LINT_TAG
            
 
          +                additional_tags: $ADDITIONAL_TAGS
            
 
          +                dockerfile: 'scripts/Dockerfile.branch'
            
 
          +
            
 
           workflows:
            
 
             release:
            
 
          +    unless:
            
 
          +      or:
            
 
          +        - << pipeline.parameters.ad_hoc_container_build >>
            
 
          +        - << pipeline.parameters.ad_hoc_container_build_code_only >>
            
 
               jobs:
            
 
                 - deci-common/persist_version_info:
            
 
                     version_override: $CIRCLE_TAG
            
@@ -670,6 +769,10 @@ workflows:
 
           
            
 
           
            
 
             build_and_deploy:
            
 
          +    unless:
            
 
          +      or:
            
 
          +        - << pipeline.parameters.ad_hoc_container_build >>
            
 
          +        - << pipeline.parameters.ad_hoc_container_build_code_only >>
            
 
               jobs:
            
 
                 - deci-common/persist_version_info:
            
 
                     use_rc: true
            
@@ -695,42 +798,46 @@ workflows:
 
                     <<: *release_candidate_filter
            
 
           
            
 
             SG_docker:
            
 
          -     jobs:
            
 
          -       - change_rc_to_b: # works on release candidate creation
            
 
          -           <<: *release_candidate_tag_filter
            
 
          -       - build_and_publish_sg_container:  # works on release candidate creation
            
 
          -           requires:
            
 
          -             - "change_rc_to_b"
            
 
          -           <<: *release_candidate_tag_filter
            
 
          -       - testing_supergradients_docker_image:  # works on release candidate creation
            
 
          +    unless:
            
 
          +      or:
            
 
          +        - << pipeline.parameters.ad_hoc_container_build >>
            
 
          +        - << pipeline.parameters.ad_hoc_container_build_code_only >>
            
 
          +    jobs:
            
 
          +      - change_rc_to_b: # works on release candidate creation
            
 
          +          <<: *release_candidate_tag_filter
            
 
          +      - build_and_publish_sg_container:  # works on release candidate creation
            
 
          +          requires:
            
 
          +            - "change_rc_to_b"
            
 
          +          <<: *release_candidate_tag_filter
            
 
          +      - testing_supergradients_docker_image:  # works on release candidate creation
            
 
                     image_repo: '307629990626.dkr.ecr.us-east-1.amazonaws.com/deci/super-gradients'
            
 
                     requires:
            
 
                       - "build_and_publish_sg_container"
            
 
                       - "change_rc_to_b"
            
 
                     <<: *release_candidate_tag_filter
            
 
          -       - add_rc_tag_to_beta: # works on release candidate creation for ECR Repo
            
 
          +      - add_rc_tag_to_beta: # works on release candidate creation for ECR Repo
            
 
                     requires:
            
 
                       - "testing_supergradients_docker_image"
            
 
                       - "change_rc_to_b"
            
 
                     <<: *release_candidate_tag_filter
            
 
          -       - find_rc_tag_per_sha: # works on release
            
 
          -           <<: *release_tag_filter
            
 
          -       - add_release_tag_to_rc: # works on release
            
 
          -            requires:
            
 
          -              - "find_rc_tag_per_sha"
            
 
          -            <<: *release_tag_filter
            
 
          -       - slack/on-hold:
            
 
          -           context: slack
            
 
          -           channel: "sg-integration-tests"
            
 
          -           requires:
            
 
          -             - "add_release_tag_to_rc"
            
 
          -           <<: *release_tag_filter
            
 
          -       - hold-sg-public-release:  # works on release
            
 
          -           type: approval
            
 
          -           requires:
            
 
          -             - "slack/on-hold"
            
 
          -           <<: *release_tag_filter
            
 
          -       - docker/publish:  # works on release
            
 
          +      - find_rc_tag_per_sha: # works on release
            
 
          +          <<: *release_tag_filter
            
 
          +      - add_release_tag_to_rc: # works on release
            
 
          +          requires:
            
 
          +            - "find_rc_tag_per_sha"
            
 
          +          <<: *release_tag_filter
            
 
          +      - slack/on-hold:
            
 
          +          context: slack
            
 
          +          channel: "sg-integration-tests"
            
 
          +          requires:
            
 
          +           - "add_release_tag_to_rc"
            
 
          +          <<: *release_tag_filter
            
 
          +      - hold-sg-public-release:  # works on release
            
 
          +          type: approval
            
 
          +          requires:
            
 
          +           - "slack/on-hold"
            
 
          +          <<: *release_tag_filter
            
 
          +      - docker/publish:  # works on release
            
 
                     executor:
            
 
                         image: cimg/base
            
 
                         tag: stable-20.04
            
@@ -748,7 +855,7 @@ workflows:
 
                     requires:
            
 
                       - "hold-sg-public-release"
            
 
                     <<: *release_tag_filter
            
 
          -       - docker/publish: # works on release
            
 
          +      - docker/publish: # works on release
            
 
                     executor:
            
 
                         image: cimg/base
            
 
                         tag: stable-20.04
            
@@ -765,3 +872,11 @@ workflows:
 
                     requires:
            
 
                       - "hold-sg-public-release"
            
 
                     <<: *release_tag_filter
            
 
          +  build-and-push-container-flow:
            
 
          +    when: << pipeline.parameters.ad_hoc_container_build >>
            
 
          +    jobs:
            
 
          +      - docker-build-and-publish-branch
            
 
          +  build-and-push-container-code-only-flow:
            
 
          +    when: << pipeline.parameters.ad_hoc_container_build_code_only >>
            
 
          +    jobs:
            
 
          +      - docker-build-and-publish-branch
            
@@ -44,12 +44,20 @@ ________________________________________________________________________________
 
           
            
 
           
            
 
           ### Ready to deploy pre-trained SOTA models
            
 
          +
            
 
          +YOLO-NAS architecture is out! The new YOLO-NAS delivers state-of-the-art performance with the unparalleled accuracy-speed performance, outperforming other models such as YOLOv5, YOLOv6, YOLOv7 and YOLOv8.
            
 
          +Check it out here: [YOLO-NAS](YOLONAS.md).
            
 
          +
            
 
          +<div align="center">
            
 
          +<img src="./documentation/source/images/yolo_nas_frontier.png" width="800px">
            
 
          +</div>
            
 
          +
            
 
           ```python
            
 
           # Load model with pretrained weights
            
 
           from super_gradients.training import models
            
 
           from super_gradients.common.object_names import Models
            
 
           
            
 
          -model = models.get(Models.YOLOX_S, pretrained_weights="coco")
            
 
          +model = models.get(Models.YOLO_NAS_M, pretrained_weights="coco")
            
 
           ```
            
 
           #### All Computer Vision Models - Pretrained Checkpoints can be found in the [Model Zoo](http://bit.ly/41dkt89)
            
 
           
            
@@ -89,17 +97,17 @@ All SuperGradients models’ are production ready in the sense that they are com
 
           from super_gradients.training import models
            
 
           from super_gradients.common.object_names import Models
            
 
           
            
 
          -model = models.get(Models.YOLOX_S, pretrained_weights="coco")
            
 
          +model = models.get(Models.YOLO_NAS_M, pretrained_weights="coco")
            
 
           
            
 
           # Prepare model for conversion
            
 
          -# Input size is in format of [Batch x Channels x Width x Height] where 640 is the standart COCO dataset dimensions
            
 
          +# Input size is in format of [Batch x Channels x Width x Height] where 640 is the standard COCO dataset dimensions
            
 
           model.eval()
            
 
           model.prep_model_for_conversion(input_size=[1, 3, 640, 640])
            
 
               
            
 
           # Create dummy_input
            
 
           
            
 
           # Convert model to onnx
            
 
          -torch.onnx.export(model, dummy_input,  "yolox_s.onnx")
            
 
          +torch.onnx.export(model, dummy_input,  "yolo_nas_m.onnx")
            
 
           ```
            
 
           More information on how to take your model to production can be found in [Getting Started](#getting-started) notebooks
            
 
           
            
 
            ARG DOCKER_IMAGE_TAG=11.3.1-devel-ubuntu20.04
FROM nvidia/cuda:${DOCKER_IMAGE_TAG}

LABEL maintainer "DECI.AI <services@deci.ai>"
ARG DEBIAN_FRONTEND=noninteractive
RUN mkdir /SG
WORKDIR /SG


RUN apt-get update && apt-get install -y python3-pip python-is-python3 pip libgl1 libglib2.0-0 git python3-distutils python3-typing-extensions \
    && rm -rf /var/lib/apt/lists/*

COPY . .
RUN pip install . --no-cache-dir && pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 --extra-index-url https://download.pytorch.org/whl/cu113 --no-cache-dir
RUN pip uninstall -y typing_extensions && pip install wandb --no-cache-dir

          
 
            ARG BASE_TAG
ARG BASE_DOCKER_REPO
FROM ${BASE_DOCKER_REPO:-307629990626.dkr.ecr.us-east-1.amazonaws.com/deci/super-gradients}:${BASE_TAG:-latest}
LABEL maintainer "DECI.AI <services@deci.ai>"
ARG DEBIAN_FRONTEND=noninteractive
WORKDIR /
RUN rm -rf /SG && mkdir /SG
WORKDIR /SG
COPY . .
RUN pip install . --no-cache-dir

          
@@ -304,6 +304,9 @@ class Models:
 
               DEKR_W32_NO_DC = "dekr_w32_no_dc"
            
 
               POSE_PP_YOLO_L = "pose_ppyolo_l"
            
 
               POSE_DDRNET_39 = "pose_ddrnet39"
            
 
          +    YOLO_NAS_S = "yolo_nas_s"
            
 
          +    YOLO_NAS_M = "yolo_nas_m"
            
 
          +    YOLO_NAS_L = "yolo_nas_l"
            
 
           
            
 
           
            
 
           class ConcatenatedTensorFormats:
            
@@ -326,8 +329,8 @@ class Dataloaders:
 
               COCO2017_VAL = "coco2017_val"
            
 
               COCO2017_TRAIN_YOLOX = "coco2017_train_yolox"
            
 
               COCO2017_VAL_YOLOX = "coco2017_val_yolox"
            
 
          -    COCO2017_TRAIN_DECIYOLO = "coco2017_train_deci_yolo"
            
 
          -    COCO2017_VAL_DECIYOLO = "coco2017_val_deci_yolo"
            
 
          +    COCO2017_TRAIN_YOLO_NAS = "coco2017_train_yolo_nas"
            
 
          +    COCO2017_VAL_YOLO_NAS = "coco2017_val_yolo_nas"
            
 
               COCO2017_TRAIN_PPYOLOE = "coco2017_train_ppyoloe"
            
 
               COCO2017_VAL_PPYOLOE = "coco2017_val_ppyoloe"
            
 
               COCO2017_TRAIN_SSD_LITE_MOBILENET_V2 = "coco2017_train_ssd_lite_mobilenet_v2"
            
@@ -1,8 +1,8 @@
 
           from super_gradients.common.object_names import Models
            
 
           from super_gradients.training import models
            
 
           
            
 
          -# Note that currently only YoloX and PPYoloE are supported.
            
 
          -model = models.get(Models.PP_YOLOE_S, pretrained_weights="coco")
            
 
          +# Note that currently only YoloX, PPYoloE and YOLO-NAS are supported.
            
 
          +model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco")
            
 
           
            
 
           IMAGES = [
            
 
               "../../../../documentation/source/images/examples/countryside.jpg",
            
@@ -1,8 +1,8 @@
 
           from super_gradients.common.object_names import Models
            
 
           from super_gradients.training import models
            
 
           
            
 
          -# Note that currently only YoloX and PPYoloE are supported.
            
 
          -model = models.get(Models.YOLOX_N, pretrained_weights="coco")
            
 
          +# Note that currently only YoloX, PPYoloE and YOLO-NAS are supported.
            
 
          +model = models.get(Models.YoloNAS_L, pretrained_weights="coco")
            
 
           
            
 
           image_folder_path = "../../../../documentation/source/images/examples"
            
 
           
            
@@ -2,8 +2,8 @@ import torch
 
           from super_gradients.common.object_names import Models
            
 
           from super_gradients.training import models
            
 
           
            
 
          -# Note that currently only YoloX and PPYoloE are supported.
            
 
          -model = models.get(Models.YOLOX_N, pretrained_weights="coco")
            
 
          +# Note that currently only YoloX, PPYoloE and YOLO-NAS are supported.
            
 
          +model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco")
            
 
           
            
 
           # We want to use cuda if available to speed up inference.
            
 
           model = model.to("cuda" if torch.cuda.is_available() else "cpu")
            
@@ -3,8 +3,8 @@ import torch
 
           from super_gradients.common.object_names import Models
            
 
           from super_gradients.training import models
            
 
           
            
 
          -# Note that currently only YoloX and PPYoloE are supported.
            
 
          -model = models.get(Models.YOLOX_N, pretrained_weights="coco")
            
 
          +# Note that currently only YoloX, PPYoloE and YOLO-NAS are supported.
            
 
          +model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco")
            
 
           
            
 
           # We want to use cuda if available to speed up inference.
            
 
           model = model.to("cuda" if torch.cuda.is_available() else "cpu")
            
@@ -1,3 +1,3 @@
 
          -from .module_interfaces import HasPredict, HasPreprocessingParams
            
 
          +from .module_interfaces import HasPredict, HasPreprocessingParams, SupportsReplaceNumClasses
            
 
           
            
 
          -__all__ = ["HasPredict", "HasPreprocessingParams"]
            
 
          +__all__ = ["HasPredict", "HasPreprocessingParams", "SupportsReplaceNumClasses"]
            
@@ -1,3 +1,6 @@
 
          +from typing import Callable
            
 
          +
            
 
          +from torch import nn
            
 
           from typing_extensions import Protocol, runtime_checkable
            
 
           
            
 
           
            
@@ -31,3 +34,28 @@ class HasPredict(Protocol):
 
           
            
 
               def predict_webcam(self, *args, **kwargs):
            
 
                   ...
            
 
          +
            
 
          +
            
 
          +@runtime_checkable
            
 
          +class SupportsReplaceNumClasses(Protocol):
            
 
          +    """
            
 
          +    Protocol interface for modules that support replacing the number of classes.
            
 
          +    Derived classes should implement the `replace_num_classes` method.
            
 
          +
            
 
          +    This interface class serves a purpose of explicitly indicating whether a class supports optimized head replacement:
            
 
          +
            
 
          +    >>> class PredictionHead(nn.Module, SupportsReplaceNumClasses):
            
 
          +    >>>    def replace_num_classes(self, num_classes: int, compute_new_weights_fn: Callable[[nn.Module, int], nn.Module] = None):
            
 
          +    >>>       ...
            
 
          +    """
            
 
          +
            
 
          +    def replace_num_classes(self, num_classes: int, compute_new_weights_fn: Callable[[nn.Module, int], nn.Module]):
            
 
          +        """
            
 
          +        Replace the number of classes in the module.
            
 
          +
            
 
          +        :param num_classes: New number of classes.
            
 
          +        :param compute_new_weights_fn: (callable) An optional function that computes the new weights for the new classes.
            
 
          +            It takes existing nn.Module and returns a new one.
            
 
          +        :return: None
            
 
          +        """
            
 
          +        ...
            
@@ -16,7 +16,23 @@ from super_gradients.modules.skip_connections import (
 
           from super_gradients.common.abstractions.abstract_logger import get_logger
            
 
           from super_gradients.common.registry.registry import ALL_DETECTION_MODULES
            
 
           
            
 
          +from super_gradients.modules.base_modules import BaseDetectionModule
            
 
          +from super_gradients.modules.detection_modules import (
            
 
          +    PANNeck,
            
 
          +    NHeads,
            
 
          +    MultiOutputBackbone,
            
 
          +    NStageBackbone,
            
 
          +    MobileNetV1Backbone,
            
 
          +    MobileNetV2Backbone,
            
 
          +    SSDNeck,
            
 
          +    SSDInvertedResidualNeck,
            
 
          +    SSDBottleneckNeck,
            
 
          +    SSDHead,
            
 
          +)
            
 
          +from super_gradients.module_interfaces import SupportsReplaceNumClasses
            
 
          +
            
 
           __all__ = [
            
 
          +    "BaseDetectionModule",
            
 
               "ALL_DETECTION_MODULES",
            
 
               "PixelShuffle",
            
 
               "AntiAliasDownsample",
            
@@ -33,6 +49,17 @@ __all__ = [
 
               "BackboneInternalSkipConnection",
            
 
               "HeadInternalSkipConnection",
            
 
               "LightweightDEKRHead",
            
 
          +    "PANNeck",
            
 
          +    "NHeads",
            
 
          +    "MultiOutputBackbone",
            
 
          +    "NStageBackbone",
            
 
          +    "MobileNetV1Backbone",
            
 
          +    "MobileNetV2Backbone",
            
 
          +    "SSDNeck",
            
 
          +    "SSDInvertedResidualNeck",
            
 
          +    "SSDBottleneckNeck",
            
 
          +    "SSDHead",
            
 
          +    "SupportsReplaceNumClasses",
            
 
           ]
            
 
           
            
 
           logger = get_logger(__name__)
            
 
            from abc import abstractmethod, ABC
from typing import Union, List

from torch import nn

__all__ = ["BaseDetectionModule"]


class BaseDetectionModule(nn.Module, ABC):
    """
    An interface for a module that is easy to integrate into a model with complex connections
    """

    def __init__(self, in_channels: Union[List[int], int], **kwargs):
        """
        :param in_channels: defines channels of tensor(s) that will be accepted by a module in forward
        """
        super().__init__()
        self.in_channels = in_channels

    @property
    @abstractmethod
    def out_channels(self) -> Union[List[int], int]:
        """
        :return: channels of tensor(s) that will be returned by a module  in forward
        """
        raise NotImplementedError()

          
@@ -1,37 +1,30 @@
 
          +from abc import ABC, abstractmethod
            
 
           from typing import Union, List
            
 
          -from abc import abstractmethod, ABC
            
 
           
            
 
           import torch
            
 
          -from torch import nn
            
 
          -from omegaconf.listconfig import ListConfig
            
 
           from omegaconf import DictConfig
            
 
          -
            
 
          +from omegaconf.listconfig import ListConfig
            
 
           from super_gradients.common.registry.registry import register_detection_module
            
 
          +from super_gradients.modules.base_modules import BaseDetectionModule
            
 
          +from super_gradients.modules.multi_output_modules import MultiOutputModule
            
 
          +from super_gradients.training.models import MobileNet, MobileNetV2
            
 
           from super_gradients.training.models.classification_models.mobilenetv2 import InvertedResidual
            
 
           from super_gradients.training.utils.utils import HpmStruct
            
 
          -from super_gradients.training.models import MobileNet, MobileNetV2
            
 
          -from super_gradients.modules.multi_output_modules import MultiOutputModule
            
 
          -
            
 
          -
            
 
          -class BaseDetectionModule(nn.Module, ABC):
            
 
          -    """
            
 
          -    An interface for a module that is easy to integrate into a model with complex connections
            
 
          -    """
            
 
          -
            
 
          -    def __init__(self, in_channels: Union[List[int], int], **kwargs):
            
 
          -        """
            
 
          -        :param in_channels: defines channels of tensor(s) that will be accepted by a module in forward
            
 
          -        """
            
 
          -        super().__init__()
            
 
          -        self.in_channels = in_channels
            
 
          +from torch import nn
            
 
           
            
 
          -    @property
            
 
          -    @abstractmethod
            
 
          -    def out_channels(self) -> Union[List[int], int]:
            
 
          -        """
            
 
          -        :return: channels of tensor(s) that will be returned by a module  in forward
            
 
          -        """
            
 
          -        raise NotImplementedError()
            
 
          +__all__ = [
            
 
          +    "PANNeck",
            
 
          +    "NHeads",
            
 
          +    "MultiOutputBackbone",
            
 
          +    "NStageBackbone",
            
 
          +    "MobileNetV1Backbone",
            
 
          +    "MobileNetV2Backbone",
            
 
          +    "SSDNeck",
            
 
          +    "SSDInvertedResidualNeck",
            
 
          +    "SSDBottleneckNeck",
            
 
          +    "SSDHead",
            
 
          +    "BaseDetectionModule",
            
 
          +]
            
 
           
            
 
           
            
 
           @register_detection_module()
            
 
            from typing import Union

import torch
from torch import nn

__all__ = ["replace_num_classes_with_random_weights"]


def replace_num_classes_with_random_weights(module: Union[nn.Conv2d, nn.Linear, nn.Module], num_classes: int) -> nn.Module:
    """
    Replace the number of classes in the module with random weights.
    This is useful for replacing the output layer of a detection/classification head.
    This implementation support Conv2d and Linear layers.
    Returned module will have the same device and dtype as the original module.
    Random weights are initialized with the same mean and std as the original weights.

    :param module: (nn.Module) Module to replace the number of classes in.
    :param num_classes: New number of classes.
    :return: nn.Module
    """
    if isinstance(module, nn.Conv2d):
        new_module = nn.Conv2d(
            module.in_channels,
            num_classes,
            kernel_size=module.kernel_size,
            stride=module.stride,
            padding=module.padding,
            dilation=module.dilation,
            groups=module.groups,
            bias=module.bias is not None,
            device=module.weight.device,
            dtype=module.weight.dtype,
        )
        torch.nn.init.normal_(new_module.weight, mean=module.weight.mean().item(), std=module.weight.std(dim=(0, 1, 2, 3)).item())
        if module.bias is not None:
            torch.nn.init.normal_(new_module.bias, mean=module.bias.mean().item(), std=module.bias.std(dim=0).item())

        return new_module
    elif isinstance(module, nn.Linear):
        new_module = nn.Linear(module.in_features, num_classes, device=module.weight.device, dtype=module.weight.dtype, bias=module.bias is not None)
        torch.nn.init.normal_(new_module.weight, mean=module.weight.mean().item(), std=module.weight.std(dim=(0, 1, 2)).item())
        if module.bias is not None:
            torch.nn.init.normal_(new_module.bias, mean=module.bias.mean().item(), std=module.bias.std(dim=0).item())

        return new_module
    else:
        raise ValueError(f"Module {module} does not support replacing the number of classes")

          
@@ -5,7 +5,7 @@ from super_gradients.common.decorators.factory_decorator import resolve_param
 
           from super_gradients.common.factories.activations_type_factory import ActivationsTypeFactory
            
 
           from torch import nn, Tensor
            
 
           
            
 
          -from super_gradients.modules.detection_modules import BaseDetectionModule
            
 
          +from super_gradients.modules.base_modules import BaseDetectionModule
            
 
           from super_gradients.common.registry.registry import register_detection_module
            
 
           
            
 
           
            
 
            backbone:
  NStageBackbone:

    stem:
      YoloNASStem:
        out_channels: 48

    stages:
      - YoloNASStage:
          out_channels: 96
          num_blocks: 2
          activation_type: relu
          hidden_channels: 96
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 192
          num_blocks: 3
          activation_type: relu
          hidden_channels: 128
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 384
          num_blocks: 5
          activation_type: relu
          hidden_channels: 256
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 768
          num_blocks: 2
          activation_type: relu
          hidden_channels: 512
          concat_intermediates: True


    context_module:
      SPP:
        output_channels: 768
        activation_type: relu
        k: [5,9,13]

    out_layers: [stage1, stage2, stage3, context_module]

neck:
  YoloNASPANNeckWithC2:

    neck1:
      YoloNASUpStage:
        out_channels: 192
        num_blocks: 4
        hidden_channels: 128
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck2:
      YoloNASUpStage:
        out_channels: 96
        num_blocks: 4
        hidden_channels: 128
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck3:
      YoloNASDownStage:
        out_channels: 192
        num_blocks: 4
        hidden_channels: 128
        activation_type: relu
        width_mult: 1
        depth_mult: 1

    neck4:
      YoloNASDownStage:
        out_channels: 384
        num_blocks: 4
        hidden_channels: 256
        activation_type: relu
        width_mult: 1
        depth_mult: 1

heads:
  NDFLHeads:
    num_classes: 80
    reg_max: 16
    heads_list:
      - YoloNASDFLHead:
          inter_channels: 128
          width_mult: 1
          first_conv_group_size: 0
          stride: 8
      - YoloNASDFLHead:
          inter_channels: 256
          width_mult: 1
          first_conv_group_size: 0
          stride: 16
      - YoloNASDFLHead:
          inter_channels: 512
          width_mult: 1
          first_conv_group_size: 0
          stride: 32

bn_eps: 1e-3
bn_momentum: 0.03
inplace_act: True

_convert_: all

          
 
            backbone:
  NStageBackbone:

    stem:
      YoloNASStem:
        out_channels: 48

    stages:
      - YoloNASStage:
          out_channels: 96
          num_blocks: 2
          activation_type: relu
          hidden_channels: 64
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 192
          num_blocks: 3
          activation_type: relu
          hidden_channels: 128
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 384
          num_blocks: 5
          activation_type: relu
          hidden_channels: 256
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 768
          num_blocks: 2
          activation_type: relu
          hidden_channels: 384
          concat_intermediates: False


    context_module:
      SPP:
        output_channels: 768
        activation_type: relu
        k: [5,9,13]

    out_layers: [stage1, stage2, stage3, context_module]

neck:
  YoloNASPANNeckWithC2:

    neck1:
      YoloNASUpStage:
        out_channels: 192
        num_blocks: 2
        hidden_channels: 192
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck2:
      YoloNASUpStage:
        out_channels: 96
        num_blocks: 3
        hidden_channels: 64
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck3:
      YoloNASDownStage:
        out_channels: 192
        num_blocks: 2
        hidden_channels: 192
        activation_type: relu
        width_mult: 1
        depth_mult: 1

    neck4:
      YoloNASDownStage:
        out_channels: 384
        num_blocks: 3
        hidden_channels: 256
        activation_type: relu
        width_mult: 1
        depth_mult: 1

heads:
  NDFLHeads:
    num_classes: 80
    reg_max: 16
    heads_list:
      - YoloNASDFLHead:
          inter_channels: 128
          width_mult: 0.75
          first_conv_group_size: 0
          stride: 8
      - YoloNASDFLHead:
          inter_channels: 256
          width_mult: 0.75
          first_conv_group_size: 0
          stride: 16
      - YoloNASDFLHead:
          inter_channels: 512
          width_mult: 0.75
          first_conv_group_size: 0
          stride: 32

bn_eps: 1e-3
bn_momentum: 0.03
inplace_act: True

_convert_: all

          
 
            backbone:
  NStageBackbone:

    stem:
      YoloNASStem:
        out_channels: 48

    stages:
      - YoloNASStage:
          out_channels: 96
          num_blocks: 2
          activation_type: relu
          hidden_channels: 32
          concat_intermediates: False

      - YoloNASStage:
          out_channels: 192
          num_blocks: 3
          activation_type: relu
          hidden_channels: 64
          concat_intermediates: False

      - YoloNASStage:
          out_channels: 384
          num_blocks: 5
          activation_type: relu
          hidden_channels: 96
          concat_intermediates: False

      - YoloNASStage:
          out_channels: 768
          num_blocks: 2
          activation_type: relu
          hidden_channels: 192
          concat_intermediates: False


    context_module:
      SPP:
        output_channels: 768
        activation_type: relu
        k: [5,9,13]

    out_layers: [stage1, stage2, stage3, context_module]

neck:
  YoloNASPANNeckWithC2:

    neck1:
      YoloNASUpStage:
        out_channels: 192
        num_blocks: 2
        hidden_channels: 64
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck2:
      YoloNASUpStage:
        out_channels: 96
        num_blocks: 2
        hidden_channels: 48
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck3:
      YoloNASDownStage:
        out_channels: 192
        num_blocks: 2
        hidden_channels: 64
        activation_type: relu
        width_mult: 1
        depth_mult: 1

    neck4:
      YoloNASDownStage:
        out_channels: 384
        num_blocks: 2
        hidden_channels: 64
        activation_type: relu
        width_mult: 1
        depth_mult: 1

heads:
  NDFLHeads:
    num_classes: 80
    reg_max: 16
    heads_list:
      - YoloNASDFLHead:
          inter_channels: 128
          width_mult: 0.5
          first_conv_group_size: 0
          stride: 8
      - YoloNASDFLHead:
          inter_channels: 256
          width_mult: 0.5
          first_conv_group_size: 0
          stride: 16
      - YoloNASDFLHead:
          inter_channels: 512
          width_mult: 0.5
          first_conv_group_size: 0
          stride: 32

bn_eps: 1e-3
bn_momentum: 0.03
inplace_act: True

_convert_: all

          
 
            # YoloNAS-S Detection training on COCO2017 Dataset:
# This training recipe is for demonstration purposes only. Pretrained models were trained using a different recipe.
# So it will not be possible to reproduce the results of the pretrained models using this recipe.

# Instructions:
#   0. Make sure that the data is stored in dataset_params.dataset_dir or add "dataset_params.data_dir=<PATH-TO-DATASET>" at the end of the command below (feel free to check ReadMe)
#   1. Move to the project root (where you will find the ReadMe and src folder)
#   2. Run the command you want:
#         yolo_nas_s: python src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=coco2017_yolo_nas_s
#

defaults:
  - training_hyperparams: coco2017_yolo_nas_train_params
  - dataset_params: coco_detection_yolo_nas_dataset_params
  - arch_params: yolo_nas_s_arch_params
  - checkpoint_params: default_checkpoint_params
  - _self_
  - variable_setup

train_dataloader: coco2017_train_yolo_nas
val_dataloader: coco2017_val_yolo_nas

load_checkpoint: False
resume: False

dataset_params:
  train_dataloader_params:
    batch_size: 32

arch_params:
  num_classes: 80

training_hyperparams:
  resume: ${resume}
  mixed_precision: True

architecture: yolo_nas_s

multi_gpu: DDP
num_gpus: 8

experiment_suffix: ""
experiment_name: coco2017_${architecture}${experiment_suffix}

          
@@ -30,15 +30,15 @@ train_dataset_params:
 
                   mixup_scale: [ 0.5, 1.5 ]         # random rescale range for the additional sample in mixup
            
 
                   prob: 0.5                       # probability to apply per-sample mixup
            
 
                   flip_prob: 0.5                  # probability to apply horizontal flip
            
 
          -    - DetectionStandardizeImage:
            
 
          -        max_value: 255.
            
 
               - DetectionPaddedRescale:
            
 
                   input_dim: [640, 640]
            
 
                   max_targets: 120
            
 
                   pad_value: 114
            
 
          +    - DetectionStandardize:
            
 
          +        max_value: 255.
            
 
               - DetectionTargetsFormatTransform:
            
 
                   max_targets: 256
            
 
          -        output_format: LABEL_NORMALIZED_CXCYWH
            
 
          +        output_format: LABEL_CXCYWH
            
 
           
            
 
             tight_box_rotation: False
            
 
             class_inclusion_list:
            
@@ -67,13 +67,13 @@ val_dataset_params:
 
               - DetectionPadToSize:
            
 
                   output_size: [640, 640]
            
 
                   pad_value: 114
            
 
          -    - DetectionStandardizeImage:
            
 
          +    - DetectionStandardize:
            
 
                   max_value: 255.
            
 
               - DetectionImagePermute
            
 
               - DetectionTargetsFormatTransform:
            
 
                   max_targets: 50
            
 
                   input_dim: [640, 640]
            
 
          -        output_format: LABEL_NORMALIZED_CXCYWH
            
 
          +        output_format: LABEL_CXCYWH
            
 
             tight_box_rotation: False
            
 
             class_inclusion_list:
            
 
             max_num_samples:
            
@@ -83,6 +83,7 @@ val_dataloader_params:
 
             batch_size: 25
            
 
             num_workers: 8
            
 
             drop_last: False
            
 
          +  shuffle: False
            
 
             pin_memory: True
            
 
             collate_fn:
            
 
               _target_: super_gradients.training.utils.detection_utils.CrowdDetectionCollateFN
            
@@ -9,18 +9,27 @@ train_dataset_params:
 
             input_dim: [640, 640]
            
 
             cache_dir:
            
 
             cache: False
            
 
          +  ignore_empty_annotations: False
            
 
             transforms:
            
 
          +    - DetectionMosaic:
            
 
          +        input_dim: ${dataset_params.train_dataset_params.input_dim}
            
 
          +        prob: 1.
            
 
               - DetectionRandomAffine:
            
 
                   degrees: 0.                  # rotation degrees, randomly sampled from [-degrees, degrees]
            
 
                   translate: 0.1                # image translation fraction
            
 
                   scales: [ 0.5, 1.5 ]              # random rescale range (keeps size by padding/cropping) after mosaic transform.
            
 
                   shear: 0.0                    # shear degrees, randomly sampled from [-degrees, degrees]
            
 
                   target_size: ${dataset_params.train_dataset_params.input_dim}
            
 
          -        filter_box_candidates: True   # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
            
 
          +        filter_box_candidates: False  # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
            
 
                   wh_thr: 2                     # edge size threshold when filter_box_candidates = True (pixels)
            
 
                   area_thr: 0.1                 # threshold for area ratio between original image and the transformed one, when filter_box_candidates = True
            
 
                   ar_thr: 20                    # aspect ratio threshold when filter_box_candidates = True
            
 
                   border_value: 128
            
 
          +#    - DetectionMixup:
            
 
          +#        input_dim: ${dataset_params.train_dataset_params.input_dim}
            
 
          +#        mixup_scale: [ 0.5, 1.5 ]         # random rescale range for the additional sample in mixup
            
 
          +#        prob: 1.0                       # probability to apply per-sample mixup
            
 
          +#        flip_prob: 0.5                  # probability to apply horizontal flip
            
 
               - DetectionHSV:
            
 
                   prob: 1.0                       # probability to apply HSV transform
            
 
                   hgain: 5                        # HSV transform hue gain (randomly sampled from [-hgain, hgain])
            
@@ -30,8 +39,11 @@ train_dataset_params:
 
                   prob: 0.5                       # probability to apply horizontal flip
            
 
               - DetectionPaddedRescale:
            
 
                   input_dim: ${dataset_params.train_dataset_params.input_dim}
            
 
          -        max_targets: 120
            
 
          +        max_targets: 300
            
 
          +    - DetectionStandardize:
            
 
          +        max_value: 255.
            
 
               - DetectionTargetsFormatTransform:
            
 
          +        max_targets: 300
            
 
                   input_dim: ${dataset_params.train_dataset_params.input_dim}
            
 
                   output_format: LABEL_CXCYWH
            
 
             tight_box_rotation: False
            
@@ -43,8 +55,8 @@ train_dataset_params:
 
           train_dataloader_params:
            
 
             shuffle: True
            
 
             batch_size: 16
            
 
          -  num_workers: 0
            
 
          -  sampler:
            
 
          +  min_samples: 512
            
 
          +  num_workers: 4
            
 
             drop_last: False
            
 
             pin_memory: True
            
 
             worker_init_fn:
            
@@ -60,11 +72,16 @@ val_dataset_params:
 
             input_dim: [640, 640]
            
 
             cache_dir:
            
 
             cache: False
            
 
          +  ignore_empty_annotations: False
            
 
             transforms:
            
 
             - DetectionPaddedRescale:
            
 
                 input_dim: ${dataset_params.val_dataset_params.input_dim}
            
 
          +      max_targets: 300
            
 
          +      pad_value: 114
            
 
          +  - DetectionStandardize:
            
 
          +      max_value: 255.
            
 
             - DetectionTargetsFormatTransform:
            
 
          -      max_targets: 50
            
 
          +      max_targets: 300
            
 
                 input_dim: ${dataset_params.val_dataset_params.input_dim}
            
 
                 output_format: LABEL_CXCYWH
            
 
             tight_box_rotation: False
            
@@ -74,10 +91,10 @@ val_dataset_params:
 
             verbose: 0
            
 
           
            
 
           val_dataloader_params:
            
 
          -  batch_size: 64
            
 
          -  num_workers: 0
            
 
          -  sampler:
            
 
          +  batch_size: 32
            
 
          +  num_workers: 4
            
 
             drop_last: False
            
 
          +  shuffle: False
            
 
             pin_memory: True
            
 
             collate_fn: # collate function for valset
            
 
               _target_: super_gradients.training.utils.detection_utils.CrowdDetectionCollateFN
            
 
            # A recipe to fine-tune YoloNAS on Roboflow datasets.
# Checkout the datasets at https://universe.roboflow.com/roboflow-100?ref=blog.roboflow.com
#
# `dataset_name` refers to the official name of the dataset.
# You can find it in the url of the dataset: https://universe.roboflow.com/roboflow-100/digits-t2eg6 -> digits-t2eg6
#
# Example: python -m super_gradients.train_from_recipe --config-name=roboflow_yolo_nas_m dataset_name=digits-t2eg6

defaults:
  - training_hyperparams: coco2017_yolo_nas_train_params
  - dataset_params: roboflow_detection_dataset_params
  - checkpoint_params: default_checkpoint_params
  - arch_params: yolo_nas_m_arch_params
  - _self_
  - variable_setup

train_dataloader: roboflow_train_yolox
val_dataloader: roboflow_val_yolox

dataset_name: ??? # Placeholder for the name of the dataset you want to use (e.g. "digits-t2eg6")
dataset_params:
  dataset_name: ${dataset_name}

  train_dataloader_params:
    batch_size: 12

  val_dataloader_params:
    batch_size: 16

num_classes: ${roboflow_dataset_num_classes:${dataset_name}}

architecture: yolo_nas_m
arch_params:
  num_classes: ${num_classes}


load_checkpoint: False
checkpoint_params:
  pretrained_weights: coco


result_path: # By defaults saves results in checkpoints directory
resume: False
training_hyperparams:
  resume: ${resume}
  zero_weight_decay_on_bias_and_bn: True

  lr_warmup_epochs: 3
  warmup_mode: linear_epoch_step

  initial_lr: 4e-4
  cosine_final_lr_ratio: 0.1

  optimizer_params:
    weight_decay: 0.0001

  ema: True
  ema_params:
    decay: 0.9

  max_epochs: 100
  mixed_precision: True
  criterion_params:
    num_classes: ${num_classes}


  phase_callbacks: []
  loss:
    ppyoloe_loss:
      num_classes: ${num_classes}
      reg_max: 16

  valid_metrics_list:
    - DetectionMetrics_050:
        score_thres: 0.1
        top_k_predictions: 300
        num_cls: ${num_classes}
        normalize_targets: True
        post_prediction_callback:
          _target_: super_gradients.training.models.detection_models.pp_yolo_e.PPYoloEPostPredictionCallback
          score_threshold: 0.01
          nms_top_k: 1000
          max_predictions: 300
          nms_threshold: 0.7

  metric_to_watch: 'mAP@0.50'

multi_gpu: Off
num_gpus: 1

experiment_suffix: ""
experiment_name: ${architecture}_roboflow_${dataset_name}${experiment_suffix}

          
 
            # A recipe to fine-tune YoloNAS on Roboflow datasets.
# Checkout the datasets at https://universe.roboflow.com/roboflow-100?ref=blog.roboflow.com
#
# `dataset_name` refers to the official name of the dataset.
# You can find it in the url of the dataset: https://universe.roboflow.com/roboflow-100/digits-t2eg6 -> digits-t2eg6
#
# Example: python -m super_gradients.train_from_recipe --config-name=roboflow_yolo_nas_s dataset_name=digits-t2eg6

defaults:
  - training_hyperparams: coco2017_yolo_nas_train_params
  - dataset_params: roboflow_detection_dataset_params
  - checkpoint_params: default_checkpoint_params
  - arch_params: yolo_nas_s_arch_params
  - _self_
  - variable_setup

train_dataloader: roboflow_train_yolox
val_dataloader: roboflow_val_yolox

dataset_name: ??? # Placeholder for the name of the dataset you want to use (e.g. "digits-t2eg6")
dataset_params:
  dataset_name: ${dataset_name}

  train_dataloader_params:
    batch_size: 16

  val_dataloader_params:
    batch_size: 16

num_classes: ${roboflow_dataset_num_classes:${dataset_name}}

architecture: yolo_nas_s
arch_params:
  num_classes: ${num_classes}


load_checkpoint: False
checkpoint_params:
  pretrained_weights: coco


result_path: # By defaults saves results in checkpoints directory
resume: False
training_hyperparams:
  resume: ${resume}
  zero_weight_decay_on_bias_and_bn: True

  lr_warmup_epochs: 3
  warmup_mode: linear_epoch_step

  initial_lr: 5e-4
  cosine_final_lr_ratio: 0.1

  optimizer_params:
    weight_decay: 0.0001

  ema: True
  ema_params:
    decay: 0.9

  max_epochs: 100
  mixed_precision: True
  criterion_params:
    num_classes: ${num_classes}


  phase_callbacks: []
  loss:
    ppyoloe_loss:
      num_classes: ${num_classes}
      reg_max: 16

  valid_metrics_list:
    - DetectionMetrics_050:
        score_thres: 0.1
        top_k_predictions: 300
        num_cls: ${num_classes}
        normalize_targets: True
        post_prediction_callback:
          _target_: super_gradients.training.models.detection_models.pp_yolo_e.PPYoloEPostPredictionCallback
          score_threshold: 0.01
          nms_top_k: 1000
          max_predictions: 300
          nms_threshold: 0.7

  metric_to_watch: 'mAP@0.50'

multi_gpu: Off
num_gpus: 1

experiment_suffix: ""
experiment_name: ${architecture}_roboflow_${dataset_name}${experiment_suffix}

          
 
            defaults:
  - roboflow_yolo_nas_s
  - quantization_params: default_quantization_params
  - _self_

checkpoint_params:
  checkpoint_path: ???
  strict_load: no_key_matching

pre_launch_callbacks_list:
    - QATRecipeModificationCallback:
        batch_size_divisor: 2
        max_epochs_divisor: 10
        lr_decay_factor: 0.01
        warmup_epochs_divisor: 10
        cosine_final_lr_ratio: 0.01
        disable_phase_callbacks: True
        disable_augmentations: False

          
 
            defaults:
  - default_train_params

max_epochs: 300

warmup_mode: "linear_batch_step"
warmup_initial_lr:  1e-6
lr_warmup_steps: 1000
lr_warmup_epochs: 0

initial_lr:  2e-4
lr_mode: cosine
cosine_final_lr_ratio: 0.1

zero_weight_decay_on_bias_and_bn: True
batch_accumulate: 1

save_ckpt_epoch_list: [100, 200, 250]

loss:
  ppyoloe_loss:
    use_static_assigner: False
    num_classes: ${arch_params.num_classes}
    reg_max: 16

optimizer: AdamW
optimizer_params:
  weight_decay: 0.00001

ema: True
ema_params:
  decay: 0.9997
  decay_type: threshold

mixed_precision: False
sync_bn: True

valid_metrics_list:
  - DetectionMetrics:
      score_thres: 0.1
      top_k_predictions: 300
      num_cls: ${arch_params.num_classes}
      normalize_targets: True
      post_prediction_callback:
        _target_: super_gradients.training.models.detection_models.pp_yolo_e.PPYoloEPostPredictionCallback
        score_threshold: 0.01
        nms_top_k: 1000
        max_predictions: 300
        nms_threshold: 0.7

pre_prediction_callback:

metric_to_watch: 'mAP@0.50:0.95'
greater_metric_to_watch_is_better: True

_convert_: all

          
@@ -9,8 +9,8 @@ from .dataloaders import (
 
               coco2017_val_ppyoloe,
            
 
               coco2017_pose_train,
            
 
               coco2017_pose_val,
            
 
          -    coco2017_train_deci_yolo,
            
 
          -    coco2017_val_deci_yolo,
            
 
          +    coco2017_train_yolo_nas,
            
 
          +    coco2017_val_yolo_nas,
            
 
               imagenet_train,
            
 
               imagenet_val,
            
 
               imagenet_efficientnet_train,
            
@@ -68,8 +68,8 @@ __all__ = [
 
               "coco2017_val_ppyoloe",
            
 
               "coco2017_pose_train",
            
 
               "coco2017_pose_val",
            
 
          -    "coco2017_train_deci_yolo",
            
 
          -    "coco2017_val_deci_yolo",
            
 
          +    "coco2017_train_yolo_nas",
            
 
          +    "coco2017_val_yolo_nas",
            
 
               "imagenet_train",
            
 
               "imagenet_val",
            
 
               "imagenet_efficientnet_train",
            
@@ -172,10 +172,10 @@ def coco2017_val(dataset_params: Dict = None, dataloader_params: Dict = None) ->
 
               )
            
 
           
            
 
           
            
 
          -@register_dataloader(Dataloaders.COCO2017_TRAIN_DECIYOLO)
            
 
          -def coco2017_train_deci_yolo(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
            
 
          +@register_dataloader(Dataloaders.COCO2017_TRAIN_YOLO_NAS)
            
 
          +def coco2017_train_yolo_nas(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
            
 
               return get_data_loader(
            
 
          -        config_name="coco_detection_deci_yolo_dataset_params",
            
 
          +        config_name="coco_detection_yolo_nas_dataset_params",
            
 
                   dataset_cls=COCODetectionDataset,
            
 
                   train=True,
            
 
                   dataset_params=dataset_params,
            
@@ -183,10 +183,10 @@ def coco2017_train_deci_yolo(dataset_params: Dict = None, dataloader_params: Dic
 
               )
            
 
           
            
 
           
            
 
          -@register_dataloader(Dataloaders.COCO2017_VAL_DECIYOLO)
            
 
          -def coco2017_val_deci_yolo(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
            
 
          +@register_dataloader(Dataloaders.COCO2017_VAL_YOLO_NAS)
            
 
          +def coco2017_val_yolo_nas(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
            
 
               return get_data_loader(
            
 
          -        config_name="coco_detection_deci_yolo_dataset_params",
            
 
          +        config_name="coco_detection_yolo_nas_dataset_params",
            
 
                   dataset_cls=COCODetectionDataset,
            
 
                   train=False,
            
 
                   dataset_params=dataset_params,
            
@@ -33,7 +33,7 @@ DATASETS_METADATA = {
 
               "underwater-objects-5v7p8": {"category": "underwater", "train": 5320, "test": 760, "valid": 1520, "size": 7600, "num_classes": 5, "num_classes_found": 5},
            
 
               "coral-lwptl": {"category": "underwater", "train": 427, "test": 74, "valid": 93, "size": 594, "num_classes": 14, "num_classes_found": 14},
            
 
               "tweeter-posts": {"category": "documents", "train": 87, "test": 9, "valid": 21, "size": 117, "num_classes": 2, "num_classes_found": 2},
            
 
          -    "tweeter-profile": {"category": "documents", "train": 425, "test": 61, "valid": 121, "size": 607, "num_classes": 1, "num_classes_found": 0},
            
 
          +    "tweeter-profile": {"category": "documents", "train": 425, "test": 61, "valid": 121, "size": 607, "num_classes": 1, "num_classes_found": 1},
            
 
               "document-parts": {"category": "documents", "train": 906, "test": 150, "valid": 318, "size": 1374, "num_classes": 2, "num_classes_found": 2},
            
 
               "activity-diagrams-qdobr": {"category": "documents", "train": 259, "test": 45, "valid": 74, "size": 378, "num_classes": 19, "num_classes_found": 19},
            
 
               "signatures-xc8up": {"category": "documents", "train": 257, "test": 37, "valid": 74, "size": 368, "num_classes": 1, "num_classes_found": 1},
            
@@ -148,7 +148,7 @@ _NUM_CLASSES_FOUND = {
 
               "underwater-objects-5v7p8": 5,
            
 
               "coral-lwptl": 14,
            
 
               "tweeter-posts": 2,
            
 
          -    "tweeter-profile": 0,
            
 
          +    "tweeter-profile": 1,
            
 
               "document-parts": 2,
            
 
               "activity-diagrams-qdobr": 19,
            
 
               "signatures-xc8up": 1,
            
@@ -62,13 +62,26 @@ from super_gradients.training.models.classification_models.vgg import VGG
 
           from super_gradients.training.models.classification_models.vit import ViT, ViTBase, ViTLarge, ViTHuge
            
 
           
            
 
           # Detection models
            
 
          -from super_gradients.training.models.detection_models.csp_darknet53 import CSPDarknet53
            
 
          -from super_gradients.training.models.detection_models.pp_yolo_e.pp_yolo_e import PPYoloE, PPYoloE_S, PPYoloE_M, PPYoloE_L, PPYoloE_X
            
 
          +from super_gradients.training.models.detection_models.csp_darknet53 import CSPDarknet53, SPP
            
 
          +from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloE, PPYoloE_S, PPYoloE_M, PPYoloE_L, PPYoloE_X
            
 
           from super_gradients.training.models.detection_models.darknet53 import Darknet53, Darknet53Base
            
 
           from super_gradients.training.models.detection_models.ssd import SSDMobileNetV1, SSDLiteMobileNetV2
            
 
           from super_gradients.training.models.detection_models.yolo_base import YoloBase, YoloPostPredictionCallback
            
 
           from super_gradients.training.models.detection_models.yolox import YoloX_N, YoloX_T, YoloX_S, YoloX_M, YoloX_L, YoloX_X, CustomYoloX
            
 
           from super_gradients.training.models.detection_models.customizable_detector import CustomizableDetector
            
 
          +from super_gradients.training.models.detection_models.yolo_nas import (
            
 
          +    YoloNASStage,
            
 
          +    YoloNASStem,
            
 
          +    YoloNASDownStage,
            
 
          +    YoloNASUpStage,
            
 
          +    YoloNASBottleneck,
            
 
          +    YoloNASDFLHead,
            
 
          +    NDFLHeads,
            
 
          +    YoloNASPANNeckWithC2,
            
 
          +    YoloNAS_S,
            
 
          +    YoloNAS_M,
            
 
          +    YoloNAS_L,
            
 
          +)
            
 
           
            
 
           # Segmentation models
            
 
           from super_gradients.training.models.segmentation_models.shelfnet import (
            
@@ -96,7 +109,6 @@ from super_gradients.training.models.segmentation_models.stdc import (
 
               STDCSegmentationBase,
            
 
               CustomSTDCSegmentation,
            
 
           )
            
 
          -from super_gradients.training.models.segmentation_models.segformer import SegFormerB0, SegFormerB1, SegFormerB2, SegFormerB3, SegFormerB4, SegFormerB5
            
 
           
            
 
           # Pose estimation
            
 
           from super_gradients.training.models.pose_estimation_models.pose_ppyolo import PosePPYoloL
            
@@ -116,6 +128,18 @@ from super_gradients.common.object_names import Models
 
           from super_gradients.common.registry.registry import ARCHITECTURES
            
 
           
            
 
           __all__ = [
            
 
          +    "SPP",
            
 
          +    "YoloNAS_S",
            
 
          +    "YoloNAS_M",
            
 
          +    "YoloNAS_L",
            
 
          +    "YoloNASStage",
            
 
          +    "YoloNASUpStage",
            
 
          +    "YoloNASStem",
            
 
          +    "YoloNASDownStage",
            
 
          +    "YoloNASDFLHead",
            
 
          +    "YoloNASBottleneck",
            
 
          +    "NDFLHeads",
            
 
          +    "YoloNASPANNeckWithC2",
            
 
               "SgModule",
            
 
               "Beit",
            
 
               "BeitLargePatch16_224",
            
@@ -259,10 +283,4 @@ __all__ = [
 
               "ARCHITECTURES",
            
 
               "Models",
            
 
               "user_models",
            
 
          -    "SegFormerB0",
            
 
          -    "SegFormerB1",
            
 
          -    "SegFormerB2",
            
 
          -    "SegFormerB3",
            
 
          -    "SegFormerB4",
            
 
          -    "SegFormerB5",
            
 
           ]
            
@@ -7,9 +7,11 @@ from typing import Tuple, Type
 
           import torch
            
 
           import torch.nn as nn
            
 
           
            
 
          +from super_gradients.common.decorators.factory_decorator import resolve_param
            
 
          +from super_gradients.common.factories.activations_type_factory import ActivationsTypeFactory
            
 
           from super_gradients.common.object_names import Models
            
 
          -from super_gradients.common.registry.registry import register_model
            
 
          -from super_gradients.modules import Residual, Conv
            
 
          +from super_gradients.common.registry.registry import register_model, register_detection_module
            
 
          +from super_gradients.modules import Residual, Conv, BaseDetectionModule
            
 
           from super_gradients.modules.utils import width_multiplier
            
 
           from super_gradients.training.models.sg_module import SgModule
            
 
           from super_gradients.training.utils.utils import get_param, HpmStruct
            
@@ -127,13 +129,16 @@ class BottleneckCSP(nn.Module):
 
                   return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
            
 
           
            
 
           
            
 
          -class SPP(nn.Module):
            
 
          +@register_detection_module()
            
 
          +class SPP(BaseDetectionModule):
            
 
               # SPATIAL PYRAMID POOLING LAYER
            
 
          -    def __init__(self, input_channels, output_channels, k: Tuple, activation_type: Type[nn.Module]):
            
 
          -        super().__init__()
            
 
          +    @resolve_param("activation_type", ActivationsTypeFactory())
            
 
          +    def __init__(self, in_channels, output_channels, k: Tuple, activation_type: Type[nn.Module]):
            
 
          +        super().__init__(in_channels)
            
 
          +        self._output_channels = output_channels
            
 
           
            
 
          -        hidden_channels = input_channels // 2
            
 
          -        self.cv1 = Conv(input_channels, hidden_channels, 1, 1, activation_type)
            
 
          +        hidden_channels = in_channels // 2
            
 
          +        self.cv1 = Conv(in_channels, hidden_channels, 1, 1, activation_type)
            
 
                   self.cv2 = Conv(hidden_channels * (len(k) + 1), output_channels, 1, 1, activation_type)
            
 
                   self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
            
 
           
            
@@ -141,6 +146,13 @@ class SPP(nn.Module):
 
                   x = self.cv1(x)
            
 
                   return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
            
 
           
            
 
          +    @property
            
 
          +    def out_channels(self):
            
 
          +        """
            
 
          +        :return: channels of tensor(s) that will be returned by a module  in forward
            
 
          +        """
            
 
          +        return self._output_channels
            
 
          +
            
 
           
            
 
           class ViewModule(nn.Module):
            
 
               """
            
@@ -12,6 +12,8 @@ from omegaconf import DictConfig
 
           
            
 
           from super_gradients.common.decorators.factory_decorator import resolve_param
            
 
           from super_gradients.common.factories.processing_factory import ProcessingFactory
            
 
          +from super_gradients.module_interfaces import SupportsReplaceNumClasses
            
 
          +from super_gradients.modules.head_replacement_utils import replace_num_classes_with_random_weights
            
 
           from super_gradients.training.utils.utils import HpmStruct
            
 
           from super_gradients.training.models.sg_module import SgModule
            
 
           import super_gradients.common.factories.detection_modules_factory as det_factory
            
@@ -102,6 +104,8 @@ class CustomizableDetector(SgModule):
 
                       raise ValueError("At least one of new_num_classes, new_head must be given to replace output layer.")
            
 
                   if new_head is not None:
            
 
                       self.heads = new_head
            
 
          +        elif isinstance(self.heads, SupportsReplaceNumClasses):
            
 
          +            self.heads.replace_num_classes(new_num_classes, replace_num_classes_with_random_weights)
            
 
                   else:
            
 
                       factory = det_factory.DetectionModulesFactory()
            
 
                       self.heads_params = factory.insert_module_param(self.heads_params, "num_classes", new_num_classes)
            
@@ -1,4 +1,4 @@
 
          -from .pp_yolo_e import PPYoloE
            
 
          +from .pp_yolo_e import PPYoloE, PPYoloE_S, PPYoloE_M, PPYoloE_L, PPYoloE_X
            
 
           from .post_prediction_callback import PPYoloEPostPredictionCallback
            
 
           
            
 
          -__all__ = ["PPYoloE", "PPYoloEPostPredictionCallback"]
            
 
          +__all__ = ["PPYoloE", "PPYoloEPostPredictionCallback", "PPYoloE_L", "PPYoloE_M", "PPYoloE_S", "PPYoloE_X"]
            
@@ -10,10 +10,10 @@ from super_gradients.common.factories.activations_type_factory import Activation
 
           from super_gradients.training.models.detection_models.csp_resnet import CSPResNetBasicBlock
            
 
           from super_gradients.modules import ConvBNAct
            
 
           
            
 
          -__all__ = ["CustomCSPPAN"]
            
 
          +__all__ = ["PPYoloECSPPAN"]
            
 
           
            
 
           
            
 
          -class SPP(nn.Module):
            
 
          +class PPYoloESPP(nn.Module):
            
 
               def __init__(
            
 
                   self,
            
 
                   in_channels: int,
            
@@ -52,7 +52,7 @@ class CSPStage(nn.Module):
 
                   for i in range(n):
            
 
                       convs.append((str(i), CSPResNetBasicBlock(next_ch_in, ch_mid, activation_type=activation_type, use_residual_connection=False)))
            
 
                       if i == (n - 1) // 2 and spp:
            
 
          -                convs.append(("spp", SPP(ch_mid, ch_mid, 1, (5, 9, 13), activation_type=activation_type)))
            
 
          +                convs.append(("spp", PPYoloESPP(ch_mid, ch_mid, 1, (5, 9, 13), activation_type=activation_type)))
            
 
                       next_ch_in = ch_mid
            
 
           
            
 
                   self.convs = nn.Sequential(collections.OrderedDict(convs))
            
@@ -68,7 +68,7 @@ class CSPStage(nn.Module):
 
           
            
 
           
            
 
           @register_detection_module()
            
 
          -class CustomCSPPAN(nn.Module):
            
 
          +class PPYoloECSPPAN(nn.Module):
            
 
               @resolve_param("activation", ActivationsTypeFactory())
            
 
               def __init__(
            
 
                   self,
            
@@ -1,6 +1,7 @@
 
           from typing import Union, Optional, List
            
 
           
            
 
           from torch import Tensor
            
 
          +
            
 
           from super_gradients.common.decorators.factory_decorator import resolve_param
            
 
           from super_gradients.common.factories.processing_factory import ProcessingFactory
            
 
           from super_gradients.common.registry.registry import register_model
            
@@ -8,7 +9,7 @@ from super_gradients.common.object_names import Models
 
           from super_gradients.modules import RepVGGBlock
            
 
           from super_gradients.training.models.sg_module import SgModule
            
 
           from super_gradients.training.models.detection_models.csp_resnet import CSPResNetBackbone
            
 
          -from super_gradients.training.models.detection_models.pp_yolo_e.pan import CustomCSPPAN
            
 
          +from super_gradients.training.models.detection_models.pp_yolo_e.pan import PPYoloECSPPAN
            
 
           from super_gradients.training.models.detection_models.pp_yolo_e.pp_yolo_head import PPYOLOEHead
            
 
           from super_gradients.training.utils import HpmStruct
            
 
           from super_gradients.training.models.arch_params_factory import get_arch_params
            
@@ -26,7 +27,7 @@ class PPYoloE(SgModule):
 
                       arch_params = arch_params.to_dict()
            
 
           
            
 
                   self.backbone = CSPResNetBackbone(**arch_params["backbone"], depth_mult=arch_params["depth_mult"], width_mult=arch_params["width_mult"])
            
 
          -        self.neck = CustomCSPPAN(**arch_params["neck"], depth_mult=arch_params["depth_mult"], width_mult=arch_params["width_mult"])
            
 
          +        self.neck = PPYoloECSPPAN(**arch_params["neck"], depth_mult=arch_params["depth_mult"], width_mult=arch_params["width_mult"])
            
 
                   self.head = PPYOLOEHead(**arch_params["head"], width_mult=arch_params["width_mult"], num_classes=arch_params["num_classes"])
            
 
           
            
 
                   self._class_names: Optional[List[str]] = None
            
@@ -175,11 +175,12 @@ class PPYOLOEHead(nn.Module):
 
               @torch.jit.ignore
            
 
               def replace_num_classes(self, num_classes: int):
            
 
                   bias_cls = bias_init_with_prob(0.01)
            
 
          +        device = self.pred_cls[0].weight.device
            
 
                   self.pred_cls = nn.ModuleList()
            
 
                   self.num_classes = num_classes
            
 
           
            
 
                   for in_c in self.in_channels:
            
 
          -            predict_layer = nn.Conv2d(in_c, num_classes, 3, padding=1)
            
 
          +            predict_layer = nn.Conv2d(in_c, num_classes, 3, padding=1, device=device)
            
 
                       torch.nn.init.constant_(predict_layer.weight, 0.0)
            
 
                       torch.nn.init.constant_(predict_layer.bias, bias_cls)
            
 
                       self.pred_cls.append(predict_layer)
            
 
            from super_gradients.training.models.detection_models.yolo_nas.dfl_heads import YoloNASDFLHead, NDFLHeads

from super_gradients.training.models.detection_models.yolo_nas.panneck import YoloNASPANNeckWithC2

from super_gradients.training.models.detection_models.yolo_nas.yolo_stages import (
    YoloNASStage,
    YoloNASStem,
    YoloNASDownStage,
    YoloNASUpStage,
    YoloNASBottleneck,
)
from super_gradients.training.models.detection_models.yolo_nas.yolo_nas_variants import YoloNAS_S, YoloNAS_M, YoloNAS_L

__all__ = [
    "YoloNASBottleneck",
    "YoloNASUpStage",
    "YoloNASDownStage",
    "YoloNASStem",
    "YoloNASStage",
    "NDFLHeads",
    "YoloNASDFLHead",
    "YoloNASPANNeckWithC2",
    "YoloNAS_S",
    "YoloNAS_M",
    "YoloNAS_L",
]

          
 
            import math
from typing import Tuple, Union, List, Callable, Optional

import torch
from omegaconf import DictConfig
from torch import nn, Tensor

import super_gradients.common.factories.detection_modules_factory as det_factory
from super_gradients.common.registry import register_detection_module
from super_gradients.modules import ConvBNReLU
from super_gradients.modules.base_modules import BaseDetectionModule
from super_gradients.module_interfaces import SupportsReplaceNumClasses
from super_gradients.modules.utils import width_multiplier
from super_gradients.training.models.detection_models.pp_yolo_e.pp_yolo_head import generate_anchors_for_grid_cell
from super_gradients.training.utils import HpmStruct, torch_version_is_greater_or_equal
from super_gradients.training.utils.bbox_utils import batch_distance2bbox


@register_detection_module()
class YoloNASDFLHead(BaseDetectionModule, SupportsReplaceNumClasses):
    def __init__(self, in_channels: int, inter_channels: int, width_mult: float, first_conv_group_size: int, num_classes: int, stride: int, reg_max: int):
        """
        Initialize the YoloNASDFLHead
        :param in_channels: Input channels
        :param inter_channels: Intermediate number of channels
        :param width_mult: Width multiplier
        :param first_conv_group_size: Group size
        :param num_classes: Number of detection classes
        :param stride: Output stride for this head
        :param reg_max: Number of bins in the regression head
        """
        super().__init__(in_channels)

        inter_channels = width_multiplier(inter_channels, width_mult, 8)
        if first_conv_group_size == 0:
            groups = 0
        elif first_conv_group_size == -1:
            groups = 1
        else:
            groups = inter_channels // first_conv_group_size

        self.num_classes = num_classes
        self.stem = ConvBNReLU(in_channels, inter_channels, kernel_size=1, stride=1, padding=0, bias=False)

        first_cls_conv = [ConvBNReLU(inter_channels, inter_channels, kernel_size=3, stride=1, padding=1, groups=groups, bias=False)] if groups else []
        self.cls_convs = nn.Sequential(*first_cls_conv, ConvBNReLU(inter_channels, inter_channels, kernel_size=3, stride=1, padding=1, bias=False))

        first_reg_conv = [ConvBNReLU(inter_channels, inter_channels, kernel_size=3, stride=1, padding=1, groups=groups, bias=False)] if groups else []
        self.reg_convs = nn.Sequential(*first_reg_conv, ConvBNReLU(inter_channels, inter_channels, kernel_size=3, stride=1, padding=1, bias=False))

        self.cls_pred = nn.Conv2d(inter_channels, self.num_classes, 1, 1, 0)
        self.reg_pred = nn.Conv2d(inter_channels, 4 * (reg_max + 1), 1, 1, 0)

        self.grid = torch.zeros(1)
        self.stride = stride

        self.prior_prob = 1e-2
        self._initialize_biases()

    def replace_num_classes(self, num_classes: int, compute_new_weights_fn: Callable[[nn.Module, int], nn.Module]):
        self.cls_pred = compute_new_weights_fn(self.cls_pred, num_classes)
        self.num_classes = num_classes

    @property
    def out_channels(self):
        return None

    def forward(self, x):
        x = self.stem(x)

        cls_feat = self.cls_convs(x)
        cls_output = self.cls_pred(cls_feat)

        reg_feat = self.reg_convs(x)
        reg_output = self.reg_pred(reg_feat)

        return reg_output, cls_output

    def _initialize_biases(self):
        prior_bias = -math.log((1 - self.prior_prob) / self.prior_prob)
        torch.nn.init.constant_(self.cls_pred.bias, prior_bias)

    @staticmethod
    def _make_grid(nx=20, ny=20):
        if torch_version_is_greater_or_equal(1, 10):
            # https://github.com/pytorch/pytorch/issues/50276
            yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)], indexing="ij")
        else:
            yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()


@register_detection_module()
class NDFLHeads(BaseDetectionModule, SupportsReplaceNumClasses):
    def __init__(
        self,
        num_classes: int,
        in_channels: Tuple[int, int, int],
        heads_list: Union[str, HpmStruct, DictConfig],
        grid_cell_scale: float = 5.0,
        grid_cell_offset: float = 0.5,
        reg_max: int = 16,
        eval_size: Optional[Tuple[int, int]] = None,
        width_mult: float = 1.0,
    ):
        """
        Initializes the NDFLHeads module.

        :param num_classes: Number of detection classes
        :param in_channels: Number of channels for each feature map (See width_mult)
        :param grid_cell_scale:
        :param grid_cell_offset:
        :param reg_max: Number of bins in the regression head
        :param eval_size: (rows, cols) Size of the image for evaluation. Setting this value can be beneficial for inference speed,
               since anchors will not be regenerated for each forward call.
        :param width_mult: A scaling factor applied to in_channels.
        """
        super(NDFLHeads, self).__init__(in_channels)
        in_channels = [max(round(c * width_mult), 1) for c in in_channels]

        self.in_channels = tuple(in_channels)
        self.num_classes = num_classes
        self.grid_cell_scale = grid_cell_scale
        self.grid_cell_offset = grid_cell_offset
        self.reg_max = reg_max
        self.eval_size = eval_size

        # Do not apply quantization to this tensor
        proj = torch.linspace(0, self.reg_max, self.reg_max + 1).reshape([1, self.reg_max + 1, 1, 1])
        self.register_buffer("proj_conv", proj, persistent=False)

        self._init_weights()

        factory = det_factory.DetectionModulesFactory()
        heads_list = self._pass_args(heads_list, factory, num_classes, reg_max)

        self.num_heads = len(heads_list)
        fpn_strides: List[int] = []
        for i in range(self.num_heads):
            new_head = factory.get(factory.insert_module_param(heads_list[i], "in_channels", in_channels[i]))
            fpn_strides.append(new_head.stride)
            setattr(self, f"head{i + 1}", new_head)

        self.fpn_strides = tuple(fpn_strides)

    def replace_num_classes(self, num_classes: int, compute_new_weights_fn: Callable[[nn.Module, int], nn.Module]):
        for i in range(self.num_heads):
            head = getattr(self, f"head{i + 1}")
            head.replace_num_classes(num_classes, compute_new_weights_fn)

        self.num_classes = num_classes

    @staticmethod
    def _pass_args(heads_list, factory, num_classes, reg_max):
        for i in range(len(heads_list)):
            heads_list[i] = factory.insert_module_param(heads_list[i], "num_classes", num_classes)
            heads_list[i] = factory.insert_module_param(heads_list[i], "reg_max", reg_max)
        return heads_list

    @torch.jit.ignore
    def cache_anchors(self, input_size: Tuple[int, int]):
        self.eval_size = input_size
        anchor_points, stride_tensor = self._generate_anchors()
        self.anchor_points = anchor_points
        self.stride_tensor = stride_tensor

    @torch.jit.ignore
    def _init_weights(self):
        if self.eval_size:
            anchor_points, stride_tensor = self._generate_anchors()
            self.anchor_points = anchor_points
            self.stride_tensor = stride_tensor

    @torch.jit.ignore
    def forward_train(self, feats: Tuple[Tensor, ...]):
        anchors, anchor_points, num_anchors_list, stride_tensor = generate_anchors_for_grid_cell(
            feats, self.fpn_strides, self.grid_cell_scale, self.grid_cell_offset
        )

        cls_score_list, reg_distri_list = [], []
        for i, feat in enumerate(feats):
            reg_distri, cls_logit = getattr(self, f"head{i + 1}")(feat)
            # cls and reg
            # Note we don't apply sigmoid on class predictions to ensure good numerical stability at loss computation
            cls_score_list.append(torch.permute(cls_logit.flatten(2), [0, 2, 1]))
            reg_distri_list.append(torch.permute(reg_distri.flatten(2), [0, 2, 1]))
        cls_score_list = torch.cat(cls_score_list, dim=1)
        reg_distri_list = torch.cat(reg_distri_list, dim=1)

        return cls_score_list, reg_distri_list, anchors, anchor_points, num_anchors_list, stride_tensor

    def forward_eval(self, feats: Tuple[Tensor, ...]) -> Tuple[Tuple[Tensor, Tensor], Tuple[Tensor, ...]]:

        cls_score_list, reg_distri_list, reg_dist_reduced_list = [], [], []

        for i, feat in enumerate(feats):
            b, _, h, w = feat.shape
            height_mul_width = h * w
            reg_distri, cls_logit = getattr(self, f"head{i + 1}")(feat)
            reg_distri_list.append(torch.permute(reg_distri.flatten(2), [0, 2, 1]))

            reg_dist_reduced = torch.permute(reg_distri.reshape([-1, 4, self.reg_max + 1, height_mul_width]), [0, 2, 3, 1])
            reg_dist_reduced = torch.nn.functional.conv2d(torch.nn.functional.softmax(reg_dist_reduced, dim=1), weight=self.proj_conv).squeeze(1)

            # cls and reg
            cls_score_list.append(cls_logit.reshape([b, self.num_classes, height_mul_width]))
            reg_dist_reduced_list.append(reg_dist_reduced)

        cls_score_list = torch.cat(cls_score_list, dim=-1)  # [B, C, Anchors]
        cls_score_list = torch.permute(cls_score_list, [0, 2, 1])  # # [B, Anchors, C]

        reg_distri_list = torch.cat(reg_distri_list, dim=1)  # [B, Anchors, 4 * (self.reg_max + 1)]
        reg_dist_reduced_list = torch.cat(reg_dist_reduced_list, dim=1)  # [B, Anchors, 4]

        # Decode bboxes
        # Note in eval mode, anchor_points_inference is different from anchor_points computed on train
        if self.eval_size:
            anchor_points_inference, stride_tensor = self.anchor_points, self.stride_tensor
        else:
            anchor_points_inference, stride_tensor = self._generate_anchors(feats)

        pred_scores = cls_score_list.sigmoid()
        pred_bboxes = batch_distance2bbox(anchor_points_inference, reg_dist_reduced_list) * stride_tensor  # [B, Anchors, 4]

        decoded_predictions = pred_bboxes, pred_scores

        if torch.jit.is_tracing():
            return decoded_predictions

        anchors, anchor_points, num_anchors_list, _ = generate_anchors_for_grid_cell(feats, self.fpn_strides, self.grid_cell_scale, self.grid_cell_offset)

        raw_predictions = cls_score_list, reg_distri_list, anchors, anchor_points, num_anchors_list, stride_tensor
        return decoded_predictions, raw_predictions

    @property
    def out_channels(self):
        return None

    def forward(self, feats: Tuple[Tensor]):
        if self.training:
            return self.forward_train(feats)
        else:
            return self.forward_eval(feats)

    def _generate_anchors(self, feats=None, dtype=torch.float):
        # just use in eval time
        anchor_points = []
        stride_tensor = []
        for i, stride in enumerate(self.fpn_strides):
            if feats is not None:
                _, _, h, w = feats[i].shape
            else:
                h = int(self.eval_size[0] / stride)
                w = int(self.eval_size[1] / stride)
            shift_x = torch.arange(end=w) + self.grid_cell_offset
            shift_y = torch.arange(end=h) + self.grid_cell_offset
            if torch_version_is_greater_or_equal(1, 10):
                shift_y, shift_x = torch.meshgrid(shift_y, shift_x, indexing="ij")
            else:
                shift_y, shift_x = torch.meshgrid(shift_y, shift_x)

            anchor_point = torch.stack([shift_x, shift_y], dim=-1).to(dtype=dtype)
            anchor_points.append(anchor_point.reshape([-1, 2]))
            stride_tensor.append(torch.full([h * w, 1], stride, dtype=dtype))
        anchor_points = torch.cat(anchor_points)
        stride_tensor = torch.cat(stride_tensor)
        if feats is not None:
            anchor_points = anchor_points.to(feats[0].device)
            stride_tensor = stride_tensor.to(feats[0].device)
        return anchor_points, stride_tensor

          
 
            from typing import Union, List, Tuple

from omegaconf import DictConfig
from torch import Tensor

from super_gradients.common.registry import register_detection_module
from super_gradients.modules.detection_modules import BaseDetectionModule
from super_gradients.training.utils.utils import HpmStruct
import super_gradients.common.factories.detection_modules_factory as det_factory


@register_detection_module("YoloNASPANNeckWithC2")
class YoloNASPANNeckWithC2(BaseDetectionModule):
    """
    A PAN (path aggregation network) neck with 4 stages (2 up-sampling and 2 down-sampling stages)
    where the up-sampling stages include a higher resolution skip
    Returns outputs of neck stage 2, stage 3, stage 4
    """

    def __init__(
        self,
        in_channels: List[int],
        neck1: Union[str, HpmStruct, DictConfig],
        neck2: Union[str, HpmStruct, DictConfig],
        neck3: Union[str, HpmStruct, DictConfig],
        neck4: Union[str, HpmStruct, DictConfig],
    ):
        """
        Initialize the PAN neck

        :param in_channels: Input channels of the 4 feature maps from the backbone
        :param neck1: First neck stage config
        :param neck2: Second neck stage config
        :param neck3: Third neck stage config
        :param neck4: Fourth neck stage config
        """
        super().__init__(in_channels)
        c2_out_channels, c3_out_channels, c4_out_channels, c5_out_channels = in_channels

        factory = det_factory.DetectionModulesFactory()
        self.neck1 = factory.get(factory.insert_module_param(neck1, "in_channels", [c5_out_channels, c4_out_channels, c3_out_channels]))
        self.neck2 = factory.get(factory.insert_module_param(neck2, "in_channels", [self.neck1.out_channels[1], c3_out_channels, c2_out_channels]))
        self.neck3 = factory.get(factory.insert_module_param(neck3, "in_channels", [self.neck2.out_channels[1], self.neck2.out_channels[0]]))
        self.neck4 = factory.get(factory.insert_module_param(neck4, "in_channels", [self.neck3.out_channels, self.neck1.out_channels[0]]))

        self._out_channels = [
            self.neck2.out_channels[1],
            self.neck3.out_channels,
            self.neck4.out_channels,
        ]

    @property
    def out_channels(self):
        return self._out_channels

    def forward(self, inputs: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Tuple[Tensor, Tensor, Tensor]:
        c2, c3, c4, c5 = inputs

        x_n1_inter, x = self.neck1([c5, c4, c3])
        x_n2_inter, p3 = self.neck2([x, c3, c2])
        p4 = self.neck3([p3, x_n2_inter])
        p5 = self.neck4([p4, x_n1_inter])

        return p3, p4, p5

          
 
            import copy
from typing import Union

from omegaconf import DictConfig

from super_gradients.common.object_names import Models
from super_gradients.common.registry import register_model
from super_gradients.training.models.arch_params_factory import get_arch_params
from super_gradients.training.models.detection_models.customizable_detector import CustomizableDetector
from super_gradients.training.utils import HpmStruct, get_param

from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback


@register_model(Models.YOLO_NAS_S)
class YoloNAS_S(CustomizableDetector):
    def __init__(self, arch_params: Union[HpmStruct, DictConfig], in_channels: int = 3):
        default_arch_params = get_arch_params("yolo_nas_s_arch_params")
        merged_arch_params = HpmStruct(**copy.deepcopy(default_arch_params))
        merged_arch_params.override(**arch_params.to_dict())
        super().__init__(
            backbone=merged_arch_params.backbone,
            neck=merged_arch_params.neck,
            heads=merged_arch_params.heads,
            num_classes=get_param(merged_arch_params, "num_classes", None),
            in_channels=in_channels,
            bn_momentum=get_param(merged_arch_params, "bn_momentum", None),
            bn_eps=get_param(merged_arch_params, "bn_eps", None),
            inplace_act=get_param(merged_arch_params, "inplace_act", None),
        )

    @staticmethod
    def get_post_prediction_callback(conf: float, iou: float) -> PPYoloEPostPredictionCallback:
        return PPYoloEPostPredictionCallback(score_threshold=conf, nms_threshold=iou, nms_top_k=1000, max_predictions=300)

    @property
    def num_classes(self):
        return self.heads.num_classes


@register_model(Models.YOLO_NAS_M)
class YoloNAS_M(CustomizableDetector):
    def __init__(self, arch_params: Union[HpmStruct, DictConfig], in_channels: int = 3):
        default_arch_params = get_arch_params("yolo_nas_m_arch_params")
        merged_arch_params = HpmStruct(**copy.deepcopy(default_arch_params))
        merged_arch_params.override(**arch_params.to_dict())
        super().__init__(
            backbone=merged_arch_params.backbone,
            neck=merged_arch_params.neck,
            heads=merged_arch_params.heads,
            num_classes=get_param(merged_arch_params, "num_classes", None),
            in_channels=in_channels,
            bn_momentum=get_param(merged_arch_params, "bn_momentum", None),
            bn_eps=get_param(merged_arch_params, "bn_eps", None),
            inplace_act=get_param(merged_arch_params, "inplace_act", None),
        )

    @staticmethod
    def get_post_prediction_callback(conf: float, iou: float) -> PPYoloEPostPredictionCallback:
        return PPYoloEPostPredictionCallback(score_threshold=conf, nms_threshold=iou, nms_top_k=1000, max_predictions=300)

    @property
    def num_classes(self):
        return self.heads.num_classes


@register_model(Models.YOLO_NAS_L)
class YoloNAS_L(CustomizableDetector):
    def __init__(self, arch_params: Union[HpmStruct, DictConfig], in_channels: int = 3):
        default_arch_params = get_arch_params("yolo_nas_l_arch_params")
        merged_arch_params = HpmStruct(**copy.deepcopy(default_arch_params))
        merged_arch_params.override(**arch_params.to_dict())
        super().__init__(
            backbone=merged_arch_params.backbone,
            neck=merged_arch_params.neck,
            heads=merged_arch_params.heads,
            num_classes=get_param(merged_arch_params, "num_classes", None),
            in_channels=in_channels,
            bn_momentum=get_param(merged_arch_params, "bn_momentum", None),
            bn_eps=get_param(merged_arch_params, "bn_eps", None),
            inplace_act=get_param(merged_arch_params, "inplace_act", None),
        )

    @staticmethod
    def get_post_prediction_callback(conf: float, iou: float) -> PPYoloEPostPredictionCallback:
        return PPYoloEPostPredictionCallback(score_threshold=conf, nms_threshold=iou, nms_top_k=1000, max_predictions=300)

    @property
    def num_classes(self):
        return self.heads.num_classes

          
 
            from functools import partial
from typing import Type, List

import torch
from torch import nn, Tensor

from super_gradients.common.registry import register_detection_module
from super_gradients.modules import Residual, BaseDetectionModule
from super_gradients.common.decorators.factory_decorator import resolve_param
from super_gradients.common.factories.activations_type_factory import ActivationsTypeFactory
from super_gradients.modules import QARepVGGBlock, Conv
from super_gradients.modules.utils import width_multiplier

__all__ = ["YoloNASStage", "YoloNASUpStage", "YoloNASStem", "YoloNASDownStage", "YoloNASBottleneck"]


class YoloNASBottleneck(nn.Module):
    """
    A bottleneck block for YoloNAS. Consists of two consecutive blocks and optional residual connection.
    """

    def __init__(
        self, input_channels: int, output_channels: int, block_type: Type[nn.Module], activation_type: Type[nn.Module], shortcut: bool, use_alpha: bool
    ):
        """
        Initialize the YoloNASBottleneck block

        :param input_channels: Number of input channels
        :param output_channels: Number of output channels
        :param block_type: Type of the convolutional block
        :param activation_type: Activation type for the convolutional block
        :param shortcut: If True, adds the residual connection from input to output.
        :param use_alpha: If True, adds the learnable alpha parameter (multiplier for the residual connection).
        """
        super().__init__()

        self.cv1 = block_type(input_channels, output_channels, activation_type=activation_type)
        self.cv2 = block_type(output_channels, output_channels, activation_type=activation_type)
        self.add = shortcut and input_channels == output_channels
        self.shortcut = Residual() if self.add else None
        if use_alpha:
            self.alpha = torch.nn.Parameter(torch.tensor([1.0]), requires_grad=True)
        else:
            self.alpha = 1.0

    def forward(self, x):
        return self.alpha * self.shortcut(x) + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))


class SequentialWithIntermediates(nn.Sequential):
    """
    A Sequential module that can return all intermediate values as a list of Tensors
    """

    def __init__(self, output_intermediates: bool, *args):
        super(SequentialWithIntermediates, self).__init__(*args)
        self.output_intermediates = output_intermediates

    def forward(self, input: Tensor) -> List[Tensor]:
        if self.output_intermediates:
            output = [input]
            for module in self:
                output.append(module(output[-1]))
            return output
        #  For uniformity, we return a list even if we don't output intermediates
        return [super(SequentialWithIntermediates, self).forward(input)]


class YoloNASCSPLayer(nn.Module):
    """
    Cross-stage layer module for YoloNAS.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        num_bottlenecks: int,
        block_type: Type[nn.Module],
        activation_type: Type[nn.Module],
        shortcut: bool = True,
        use_alpha: bool = True,
        expansion: float = 0.5,
        hidden_channels: int = None,
        concat_intermediates: bool = False,
    ):
        """

        :param in_channels: Number of input channels.
        :param out_channels:  Number of output channels.
        :param num_bottlenecks: Number of bottleneck blocks.
        :param block_type: Bottleneck block type.
        :param activation_type: Activation type for all blocks.
        :param shortcut: If True, adds the residual connection from input to output.
        :param use_alpha: If True, adds the learnable alpha parameter (multiplier for the residual connection).
        :param expansion: If hidden_channels is None, hidden_channels is set to in_channels * expansion.
        :param hidden_channels: If not None, sets the number of hidden channels used inside the bottleneck blocks.
        :param concat_intermediates:
        """
        super(YoloNASCSPLayer, self).__init__()
        if hidden_channels is None:
            hidden_channels = int(out_channels * expansion)
        self.conv1 = Conv(in_channels, hidden_channels, 1, stride=1, activation_type=activation_type)
        self.conv2 = Conv(in_channels, hidden_channels, 1, stride=1, activation_type=activation_type)
        self.conv3 = Conv(hidden_channels * (2 + concat_intermediates * num_bottlenecks), out_channels, 1, stride=1, activation_type=activation_type)
        module_list = [YoloNASBottleneck(hidden_channels, hidden_channels, block_type, activation_type, shortcut, use_alpha) for _ in range(num_bottlenecks)]
        self.bottlenecks = SequentialWithIntermediates(concat_intermediates, *module_list)

    def forward(self, x: Tensor) -> Tensor:
        x_1 = self.conv1(x)
        x_1 = self.bottlenecks(x_1)
        x_2 = self.conv2(x)
        x = torch.cat((*x_1, x_2), dim=1)
        return self.conv3(x)


@register_detection_module()
class YoloNASStem(BaseDetectionModule):
    """
    Stem module for YoloNAS. Consists of a single QARepVGGBlock with stride of two.
    """

    def __init__(self, in_channels: int, out_channels: int):
        """
        Initialize the YoloNASStem module
        :param in_channels: Number of input channels
        :param out_channels: Number of output channels
        """
        super().__init__(in_channels)
        self._out_channels = out_channels
        self.conv = QARepVGGBlock(in_channels, out_channels, stride=2, use_residual_connection=False)

    @property
    def out_channels(self):
        return self._out_channels

    def forward(self, x: Tensor) -> Tensor:
        return self.conv(x)


@register_detection_module()
class YoloNASStage(BaseDetectionModule):
    """
    A single stage module for YoloNAS. It consists of a downsample block (QARepVGGBlock) followed by YoloNASCSPLayer.
    """

    @resolve_param("activation_type", ActivationsTypeFactory())
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        num_blocks: int,
        activation_type: Type[nn.Module],
        hidden_channels: int = None,
        concat_intermediates: bool = False,
    ):
        """
        Initialize the YoloNASStage module
        :param in_channels: Number of input channels
        :param out_channels: Number of output channels
        :param num_blocks: Number of bottleneck blocks in the YoloNASCSPLayer
        :param activation_type: Activation type for all blocks
        :param hidden_channels: If not None, sets the number of hidden channels used inside the bottleneck blocks.
        :param concat_intermediates: If True, concatenates the intermediate values from the YoloNASCSPLayer.
        """
        super().__init__(in_channels)
        self._out_channels = out_channels
        self.downsample = QARepVGGBlock(in_channels, out_channels, stride=2, activation_type=activation_type, use_residual_connection=False)
        self.blocks = YoloNASCSPLayer(
            out_channels,
            out_channels,
            num_blocks,
            QARepVGGBlock,
            activation_type,
            True,
            hidden_channels=hidden_channels,
            concat_intermediates=concat_intermediates,
        )

    @property
    def out_channels(self):
        return self._out_channels

    def forward(self, x):
        return self.blocks(self.downsample(x))


@register_detection_module()
class YoloNASUpStage(BaseDetectionModule):
    """
    Upsampling stage for YoloNAS.
    """

    @resolve_param("activation_type", ActivationsTypeFactory())
    def __init__(
        self,
        in_channels: List[int],
        out_channels: int,
        width_mult: float,
        num_blocks: int,
        depth_mult: float,
        activation_type: Type[nn.Module],
        hidden_channels: int = None,
        concat_intermediates: bool = False,
        reduce_channels: bool = False,
    ):
        """
        Initialize the YoloNASUpStage module
        :param in_channels: Number of input channels
        :param out_channels: Number of output channels
        :param width_mult: Multiplier for the number of channels in the stage.
        :param num_blocks: Number of bottleneck blocks
        :param depth_mult: Multiplier for the number of blocks in the stage.
        :param activation_type: Activation type for all blocks
        :param hidden_channels: If not None, sets the number of hidden channels used inside the bottleneck blocks
        :param concat_intermediates:
        :param reduce_channels:
        """
        super().__init__(in_channels)

        num_inputs = len(in_channels)
        if num_inputs == 2:
            in_channels, skip_in_channels = in_channels
        else:
            in_channels, skip_in_channels1, skip_in_channels2 = in_channels
            skip_in_channels = skip_in_channels1 + out_channels  # skip2 downsample results in out_channels channels
        out_channels = width_multiplier(out_channels, width_mult, 8)
        num_blocks = max(round(num_blocks * depth_mult), 1) if num_blocks > 1 else num_blocks

        if num_inputs == 2:
            self.reduce_skip = Conv(skip_in_channels, out_channels, 1, 1, activation_type) if reduce_channels else nn.Identity()
        else:
            self.reduce_skip1 = Conv(skip_in_channels1, out_channels, 1, 1, activation_type) if reduce_channels else nn.Identity()
            self.reduce_skip2 = Conv(skip_in_channels2, out_channels, 1, 1, activation_type) if reduce_channels else nn.Identity()

        self.conv = Conv(in_channels, out_channels, 1, 1, activation_type)
        self.upsample = nn.ConvTranspose2d(in_channels=out_channels, out_channels=out_channels, kernel_size=2, stride=2)
        if num_inputs == 3:
            self.downsample = Conv(out_channels if reduce_channels else skip_in_channels2, out_channels, kernel=3, stride=2, activation_type=activation_type)

        self.reduce_after_concat = Conv(num_inputs * out_channels, out_channels, 1, 1, activation_type) if reduce_channels else nn.Identity()

        after_concat_channels = out_channels if reduce_channels else out_channels + skip_in_channels
        self.blocks = YoloNASCSPLayer(
            after_concat_channels,
            out_channels,
            num_blocks,
            QARepVGGBlock,
            activation_type,
            hidden_channels=hidden_channels,
            concat_intermediates=concat_intermediates,
        )

        self._out_channels = [out_channels, out_channels]

    @property
    def out_channels(self):
        return self._out_channels

    def forward(self, inputs):
        if len(inputs) == 2:
            x, skip_x = inputs
            skip_x = [self.reduce_skip(skip_x)]
        else:
            x, skip_x1, skip_x2 = inputs
            skip_x1, skip_x2 = self.reduce_skip1(skip_x1), self.reduce_skip2(skip_x2)
            skip_x = [skip_x1, self.downsample(skip_x2)]
        x_inter = self.conv(x)
        x = self.upsample(x_inter)
        x = torch.cat([x, *skip_x], 1)
        x = self.reduce_after_concat(x)
        x = self.blocks(x)
        return x_inter, x


@register_detection_module()
class YoloNASDownStage(BaseDetectionModule):
    @resolve_param("activation_type", ActivationsTypeFactory())
    def __init__(
        self,
        in_channels: List[int],
        out_channels: int,
        width_mult: float,
        num_blocks: int,
        depth_mult: float,
        activation_type: Type[nn.Module],
        hidden_channels: int = None,
        concat_intermediates: bool = False,
    ):
        """
        Initializes a YoloNASDownStage.

        :param in_channels: Number of input channels.
        :param out_channels: Number of output channels.
        :param width_mult: Multiplier for the number of channels in the stage.
        :param num_blocks: Number of blocks in the stage.
        :param depth_mult: Multiplier for the number of blocks in the stage.
        :param activation_type: Type of activation to use inside the blocks.
        :param hidden_channels: If not None, sets the number of hidden channels used inside the bottleneck blocks.
        :param concat_intermediates:
        """

        super().__init__(in_channels)

        in_channels, skip_in_channels = in_channels
        out_channels = width_multiplier(out_channels, width_mult, 8)
        num_blocks = max(round(num_blocks * depth_mult), 1) if num_blocks > 1 else num_blocks

        self.conv = Conv(in_channels, out_channels // 2, 3, 2, activation_type)
        after_concat_channels = out_channels // 2 + skip_in_channels
        self.blocks = YoloNASCSPLayer(
            in_channels=after_concat_channels,
            out_channels=out_channels,
            num_bottlenecks=num_blocks,
            block_type=partial(Conv, kernel=3, stride=1),
            activation_type=activation_type,
            hidden_channels=hidden_channels,
            concat_intermediates=concat_intermediates,
        )

        self._out_channels = out_channels

    @property
    def out_channels(self):
        return self._out_channels

    def forward(self, inputs):
        x, skip_x = inputs
        x = self.conv(x)
        x = torch.cat([x, skip_x], 1)
        x = self.blocks(x)
        return x

          
@@ -52,7 +52,7 @@ class Pipeline(ABC):
 
               def __init__(self, model: SgModule, image_processor: Union[Processing, List[Processing]], class_names: List[str], device: Optional[str] = None):
            
 
                   super().__init__()
            
 
                   self.device = device or next(model.parameters()).device
            
 
          -        self.model = model.to(device)
            
 
          +        self.model = model.to(self.device)
            
 
                   self.class_names = class_names
            
 
           
            
 
                   if isinstance(image_processor, list):
            
@@ -265,7 +265,12 @@ class DetectionPipeline(Pipeline):
 
               def _combine_image_prediction_to_images(
            
 
                   self, images_predictions: Iterable[ImageDetectionPrediction], n_images: Optional[int] = None
            
 
               ) -> ImagesDetectionPrediction:
            
 
          -        images_predictions = [image_predictions for image_predictions in tqdm(images_predictions, total=n_images, desc="Predicting Images")]
            
 
          +        if n_images is not None and n_images == 1:
            
 
          +            # Do not show tqdm progress bar if there is only one image
            
 
          +            pass
            
 
          +        else:
            
 
          +            images_predictions = [image_predictions for image_predictions in tqdm(images_predictions, total=n_images, desc="Predicting Images")]
            
 
          +
            
 
                   return ImagesDetectionPrediction(_images_prediction_lst=images_predictions)
            
 
           
            
 
               def _combine_image_prediction_to_video(
            
@@ -59,6 +59,10 @@ MODEL_URLS = {
 
               "ppyoloe_m_coco": "https://deci-pretrained-models.s3.amazonaws.com/ppyolo_e/coco2017_ppyoloe_m.pth",
            
 
               "ppyoloe_l_coco": "https://deci-pretrained-models.s3.amazonaws.com/ppyolo_e/coco2017_pp_yoloe_l_best_model_21uffbb8.pth",  # 0.4948
            
 
               "ppyoloe_x_coco": "https://deci-pretrained-models.s3.amazonaws.com/ppyolo_e/coco2017_pp_yoloe_x_best_model_z03if91o.pth",  # 0.5115
            
 
          +    #
            
 
          +    "yolo_nas_s_coco": "https://deci-pretrained-models.s3.amazonaws.com/yolo_nas/yolo_nas_s_coco2017.pth",
            
 
          +    "yolo_nas_m_coco": "https://deci-pretrained-models.s3.amazonaws.com/yolo_nas/yolo_nas_m_coco2017.pth",
            
 
          +    "yolo_nas_l_coco": "https://deci-pretrained-models.s3.amazonaws.com/yolo_nas/yolo_nas_l_coco2017.pth",
            
 
           }
            
 
           
            
 
           PRETRAINED_NUM_CLASSES = {
            
@@ -305,8 +305,8 @@ def default_ppyoloe_coco_processing_params() -> dict:
 
               return params
            
 
           
            
 
           
            
 
          -def default_deciyolo_coco_processing_params() -> dict:
            
 
          -    """Processing parameters commonly used for training DeciYolo on COCO dataset.
            
 
          +def default_yolo_nas_coco_processing_params() -> dict:
            
 
          +    """Processing parameters commonly used for training YoloNAS on COCO dataset.
            
 
               TODO: remove once we load it from the checkpoint
            
 
               """
            
 
           
            
@@ -322,8 +322,8 @@ def default_deciyolo_coco_processing_params() -> dict:
 
               params = dict(
            
 
                   class_names=COCO_DETECTION_CLASSES_LIST,
            
 
                   image_processor=image_processor,
            
 
          -        iou=0.65,
            
 
          -        conf=0.5,
            
 
          +        iou=0.7,
            
 
          +        conf=0.25,
            
 
               )
            
 
               return params
            
 
           
            
@@ -337,6 +337,6 @@ def get_pretrained_processing_params(model_name: str, pretrained_weights: str) -
 
                       return default_yolox_coco_processing_params()
            
 
                   elif "ppyoloe" in model_name:
            
 
                       return default_ppyoloe_coco_processing_params()
            
 
          -        elif "deciyolo" in model_name:
            
 
          -            return default_deciyolo_coco_processing_params()
            
 
          +        elif "yolo_nas" in model_name:
            
 
          +            return default_yolo_nas_coco_processing_params()
            
 
               return dict()
            
@@ -291,11 +291,21 @@ def load_pretrained_weights(model: torch.nn.Module, architecture: str, pretraine
 
               :param pretrained_weights: name for the pretrianed weights (i.e imagenet)
            
 
               :return: None
            
 
               """
            
 
          +    from super_gradients.common.object_names import Models
            
 
          +
            
 
               model_url_key = architecture + "_" + str(pretrained_weights)
            
 
               if model_url_key not in MODEL_URLS.keys():
            
 
                   raise MissingPretrainedWeightsException(model_url_key)
            
 
           
            
 
               url = MODEL_URLS[model_url_key]
            
 
          +
            
 
          +    if architecture in {Models.YOLO_NAS_S, Models.YOLO_NAS_M, Models.YOLO_NAS_L}:
            
 
          +        logger.info(
            
 
          +            "License Notification: YOLO-NAS pre-trained weights are subjected to the specific license terms and conditions detailed in \n"
            
 
          +            "https://github.com/Deci-AI/super-gradients/LICENSE.YOLONAS.md. \n"
            
 
          +            "By downloading the pre-trained weight files you agree to comply with these terms."
            
 
          +        )
            
 
          +
            
 
               unique_filename = url.split("https://deci-pretrained-models.s3.amazonaws.com/")[1].replace("/", "_").replace(" ", "_")
            
 
               map_location = torch.device("cpu")
            
 
               pretrained_state_dict = load_state_dict_from_url(url=url, map_location=map_location, file_name=unique_filename)
            
@@ -1,7 +1,7 @@
 
           import sys
            
 
           import unittest
            
 
           
            
 
          -from tests.integration_tests import EMAIntegrationTest, LRTest, PoseEstimationDatasetIntegrationTest
            
 
          +from tests.integration_tests import EMAIntegrationTest, LRTest, PoseEstimationDatasetIntegrationTest, YoloNASIntegrationTest
            
 
           
            
 
           
            
 
           class CoreIntegrationTestSuiteRunner:
            
@@ -19,6 +19,7 @@ class CoreIntegrationTestSuiteRunner:
 
                   self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(EMAIntegrationTest))
            
 
                   self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(LRTest))
            
 
                   self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(PoseEstimationDatasetIntegrationTest))
            
 
          +        self.integration_tests_suite.addTest(self.test_loader.loadTestsFromModule(YoloNASIntegrationTest))
            
 
           
            
 
           
            
 
           if __name__ == "__main__":
            
@@ -34,6 +34,7 @@ from tests.unit_tests.pose_estimation_dataset_test import TestPoseEstimationData
 
           from tests.unit_tests.preprocessing_unit_test import PreprocessingUnitTest
            
 
           from tests.unit_tests.quantization_utility_tests import QuantizationUtilityTest
            
 
           from tests.unit_tests.random_erase_test import RandomEraseTest
            
 
          +from tests.unit_tests.replace_head_test import ReplaceHeadUnitTest
            
 
           from tests.unit_tests.strictload_enum_test import StrictLoadEnumTest
            
 
           from tests.unit_tests.train_with_intialized_param_args_test import TrainWithInitializedObjectsTest
            
 
           from tests.unit_tests.pretrained_models_unit_test import PretrainedModelsUnitTest
            
@@ -132,6 +133,7 @@ class CoreUnitTestSuiteRunner:
 
                   self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestPoseEstimationMetrics))
            
 
                   self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestPoseEstimationDataset))
            
 
                   self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(LoadCheckpointTest))
            
 
          +        self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(ReplaceHeadUnitTest))
            
 
                   self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(PreprocessingUnitTest))
            
 
           
            
 
               def _add_modules_to_end_to_end_tests_suite(self):
            
@@ -3,5 +3,6 @@
 
           from tests.integration_tests.ema_train_integration_test import EMAIntegrationTest
            
 
           from tests.integration_tests.lr_test import LRTest
            
 
           from tests.integration_tests.pose_estimation_dataset_test import PoseEstimationDatasetIntegrationTest
            
 
          +from tests.integration_tests.yolo_nas_integration_test import YoloNASIntegrationTest
            
 
           
            
 
          -__all__ = ["EMAIntegrationTest", "LRTest", "PoseEstimationDatasetIntegrationTest"]
            
 
          +__all__ = ["EMAIntegrationTest", "LRTest", "PoseEstimationDatasetIntegrationTest", "YoloNASIntegrationTest"]
            
 
            import unittest
from super_gradients.training import models
from super_gradients.training.dataloaders import coco2017_val_yolo_nas
from super_gradients.training import Trainer
from super_gradients.training.metrics import DetectionMetrics
from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback


class YoloNASIntegrationTest(unittest.TestCase):
    def test_yolo_nas_s_coco(self):
        trainer = Trainer("test_yolo_nas_s")
        model = models.get("yolo_nas_s", num_classes=80, pretrained_weights="coco")
        dl = coco2017_val_yolo_nas()
        metric = DetectionMetrics(
            normalize_targets=True,
            post_prediction_callback=PPYoloEPostPredictionCallback(score_threshold=0.03, nms_top_k=1000, max_predictions=300, nms_threshold=0.65),
            num_cls=80,
        )
        metric_values = trainer.test(model=model, test_loader=dl, test_metrics_list=[metric])
        self.assertAlmostEqual(metric_values[metric.map_str], 0.475, delta=0.001)

    def test_yolo_nas_m_coco(self):
        trainer = Trainer("test_yolo_nas_m")
        model = models.get("yolo_nas_m", num_classes=80, pretrained_weights="coco")
        dl = coco2017_val_yolo_nas()
        metric = DetectionMetrics(
            normalize_targets=True,
            post_prediction_callback=PPYoloEPostPredictionCallback(score_threshold=0.03, nms_top_k=1000, max_predictions=300, nms_threshold=0.65),
            num_cls=80,
        )
        metric_values = trainer.test(model=model, test_loader=dl, test_metrics_list=[metric])
        self.assertAlmostEqual(metric_values[metric.map_str], 0.5155, delta=0.001)

    def test_yolo_nas_l_coco(self):
        trainer = Trainer("test_yolo_nas_l")
        model = models.get("yolo_nas_l", num_classes=80, pretrained_weights="coco")
        dl = coco2017_val_yolo_nas()
        metric = DetectionMetrics(
            normalize_targets=True,
            post_prediction_callback=PPYoloEPostPredictionCallback(score_threshold=0.03, nms_top_k=1000, max_predictions=300, nms_threshold=0.65),
            num_cls=80,
        )
        metric_values = trainer.test(model=model, test_loader=dl, test_metrics_list=[metric])
        self.assertAlmostEqual(metric_values[metric.map_str], 0.5222, delta=0.001)


if __name__ == "__main__":
    unittest.main()

          
 
            import os
import shutil
import unittest

import torch

import super_gradients
from super_gradients.common.object_names import Models
from super_gradients.training import models


class ReplaceHeadUnitTest(unittest.TestCase):
    def setUp(self) -> None:
        self.device = "cuda" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
        super_gradients.init_trainer()

    def test_ppyolo_replace_head(self):
        input = torch.randn(1, 3, 640, 640).to(self.device)
        for model in [Models.PP_YOLOE_S, Models.PP_YOLOE_M, Models.PP_YOLOE_L, Models.PP_YOLOE_X]:
            model = models.get(model, pretrained_weights="coco").to(self.device).eval()
            model.replace_head(new_num_classes=100)
            (_, pred_scores), _ = model.forward(input)
            self.assertEqual(pred_scores.size(2), 100)

    def test_yolo_nas_replace_head(self):
        input = torch.randn(1, 3, 640, 640).to(self.device)
        for model in [Models.YOLO_NAS_S, Models.YOLO_NAS_M, Models.YOLO_NAS_L]:
            model = models.get(model, pretrained_weights="coco").to(self.device).eval()
            model.replace_head(new_num_classes=100)
            (_, pred_scores), _ = model.forward(input)
            self.assertEqual(pred_scores.size(2), 100)

    def tearDown(self) -> None:
        if os.path.exists("~/.cache/torch/hub/"):
            shutil.rmtree("~/.cache/torch/hub/")


if __name__ == "__main__":
    unittest.main()
Model	mAP	Latency (ms)
YOLO-NAS S	47.5	3.21
YOLO-NAS M	51.55	5.85
YOLO-NAS L	52.22	7.87
YOLO-NAS S INT-8	47.03	2.36
YOLO-NAS M INT-8	51.0	3.78
YOLO-NAS L INT-8	52.1	4.78