backbone:
  NStageBackbone:

    stem:
      YoloNASStem:
        out_channels: 48

    stages:
      - YoloNASStage:
          out_channels: 96
          num_blocks: 2
          activation_type: relu
          hidden_channels: 96
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 192
          num_blocks: 3
          activation_type: relu
          hidden_channels: 128
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 384
          num_blocks: 5
          activation_type: relu
          hidden_channels: 256
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 768
          num_blocks: 2
          activation_type: relu
          hidden_channels: 512
          concat_intermediates: True


    context_module:
      SPP:
        output_channels: 768
        activation_type: relu
        k: [5,9,13]

    out_layers: [stage1, stage2, stage3, context_module]

neck:
  YoloNASPANNeckWithC2:

    neck1:
      YoloNASUpStage:
        out_channels: 192
        num_blocks: 4
        hidden_channels: 128
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck2:
      YoloNASUpStage:
        out_channels: 96
        num_blocks: 4
        hidden_channels: 128
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck3:
      YoloNASDownStage:
        out_channels: 192
        num_blocks: 4
        hidden_channels: 128
        activation_type: relu
        width_mult: 1
        depth_mult: 1

    neck4:
      YoloNASDownStage:
        out_channels: 384
        num_blocks: 4
        hidden_channels: 256
        activation_type: relu
        width_mult: 1
        depth_mult: 1

heads:
  NDFLHeads:
    num_classes: 80
    reg_max: 16
    heads_list:
      - YoloNASDFLHead:
          inter_channels: 128
          width_mult: 1
          first_conv_group_size: 0
          stride: 8
      - YoloNASDFLHead:
          inter_channels: 256
          width_mult: 1
          first_conv_group_size: 0
          stride: 16
      - YoloNASDFLHead:
          inter_channels: 512
          width_mult: 1
          first_conv_group_size: 0
          stride: 32

bn_eps: 1e-3
bn_momentum: 0.03
inplace_act: True

_convert_: all

          
 
            backbone:
  NStageBackbone:

    stem:
      YoloNASStem:
        out_channels: 48

    stages:
      - YoloNASStage:
          out_channels: 96
          num_blocks: 2
          activation_type: relu
          hidden_channels: 64
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 192
          num_blocks: 3
          activation_type: relu
          hidden_channels: 128
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 384
          num_blocks: 5
          activation_type: relu
          hidden_channels: 256
          concat_intermediates: True

      - YoloNASStage:
          out_channels: 768
          num_blocks: 2
          activation_type: relu
          hidden_channels: 384
          concat_intermediates: False


    context_module:
      SPP:
        output_channels: 768
        activation_type: relu
        k: [5,9,13]

    out_layers: [stage1, stage2, stage3, context_module]

neck:
  YoloNASPANNeckWithC2:

    neck1:
      YoloNASUpStage:
        out_channels: 192
        num_blocks: 2
        hidden_channels: 192
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck2:
      YoloNASUpStage:
        out_channels: 96
        num_blocks: 3
        hidden_channels: 64
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck3:
      YoloNASDownStage:
        out_channels: 192
        num_blocks: 2
        hidden_channels: 192
        activation_type: relu
        width_mult: 1
        depth_mult: 1

    neck4:
      YoloNASDownStage:
        out_channels: 384
        num_blocks: 3
        hidden_channels: 256
        activation_type: relu
        width_mult: 1
        depth_mult: 1

heads:
  NDFLHeads:
    num_classes: 80
    reg_max: 16
    heads_list:
      - YoloNASDFLHead:
          inter_channels: 128
          width_mult: 0.75
          first_conv_group_size: 0
          stride: 8
      - YoloNASDFLHead:
          inter_channels: 256
          width_mult: 0.75
          first_conv_group_size: 0
          stride: 16
      - YoloNASDFLHead:
          inter_channels: 512
          width_mult: 0.75
          first_conv_group_size: 0
          stride: 32

bn_eps: 1e-3
bn_momentum: 0.03
inplace_act: True

_convert_: all

          
 
            backbone:
  NStageBackbone:

    stem:
      YoloNASStem:
        out_channels: 48

    stages:
      - YoloNASStage:
          out_channels: 96
          num_blocks: 2
          activation_type: relu
          hidden_channels: 32
          concat_intermediates: False

      - YoloNASStage:
          out_channels: 192
          num_blocks: 3
          activation_type: relu
          hidden_channels: 64
          concat_intermediates: False

      - YoloNASStage:
          out_channels: 384
          num_blocks: 5
          activation_type: relu
          hidden_channels: 96
          concat_intermediates: False

      - YoloNASStage:
          out_channels: 768
          num_blocks: 2
          activation_type: relu
          hidden_channels: 192
          concat_intermediates: False


    context_module:
      SPP:
        output_channels: 768
        activation_type: relu
        k: [5,9,13]

    out_layers: [stage1, stage2, stage3, context_module]

neck:
  YoloNASPANNeckWithC2:

    neck1:
      YoloNASUpStage:
        out_channels: 192
        num_blocks: 2
        hidden_channels: 64
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck2:
      YoloNASUpStage:
        out_channels: 96
        num_blocks: 2
        hidden_channels: 48
        width_mult: 1
        depth_mult: 1
        activation_type: relu
        reduce_channels: True

    neck3:
      YoloNASDownStage:
        out_channels: 192
        num_blocks: 2
        hidden_channels: 64
        activation_type: relu
        width_mult: 1
        depth_mult: 1

    neck4:
      YoloNASDownStage:
        out_channels: 384
        num_blocks: 2
        hidden_channels: 64
        activation_type: relu
        width_mult: 1
        depth_mult: 1

heads:
  NDFLHeads:
    num_classes: 80
    reg_max: 16
    heads_list:
      - YoloNASDFLHead:
          inter_channels: 128
          width_mult: 0.5
          first_conv_group_size: 0
          stride: 8
      - YoloNASDFLHead:
          inter_channels: 256
          width_mult: 0.5
          first_conv_group_size: 0
          stride: 16
      - YoloNASDFLHead:
          inter_channels: 512
          width_mult: 0.5
          first_conv_group_size: 0
          stride: 32

bn_eps: 1e-3
bn_momentum: 0.03
inplace_act: True

_convert_: all

          
 
            # YoloNAS-S Detection training on COCO2017 Dataset:
# This training recipe is for demonstration purposes only. Pretrained models were trained using a different recipe.
# So it will not be possible to reproduce the results of the pretrained models using this recipe.

# Instructions:
#   0. Make sure that the data is stored in dataset_params.dataset_dir or add "dataset_params.data_dir=<PATH-TO-DATASET>" at the end of the command below (feel free to check ReadMe)
#   1. Move to the project root (where you will find the ReadMe and src folder)
#   2. Run the command you want:
#         yolo_nas_s: python src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=coco2017_yolo_nas_s
#

defaults:
  - training_hyperparams: coco2017_yolo_nas_train_params
  - dataset_params: coco_detection_yolo_nas_dataset_params
  - arch_params: yolo_nas_s_arch_params
  - checkpoint_params: default_checkpoint_params
  - _self_
  - variable_setup

train_dataloader: coco2017_train_yolo_nas
val_dataloader: coco2017_val_yolo_nas

load_checkpoint: False
resume: False

dataset_params:
  train_dataloader_params:
    batch_size: 32

arch_params:
  num_classes: 80

training_hyperparams:
  resume: ${resume}
  mixed_precision: True

architecture: yolo_nas_s

multi_gpu: DDP
num_gpus: 8

experiment_suffix: ""
experiment_name: coco2017_${architecture}${experiment_suffix}

          
@@ -30,15 +30,15 @@ train_dataset_params:
 
                                     mixup_scale: [ 0.5, 1.5 ]         # random rescale range for the additional sample in mixup
                
 
                                     prob: 0.5                       # probability to apply per-sample mixup
                
 
                                     flip_prob: 0.5                  # probability to apply horizontal flip
                
 
                            -    - DetectionStandardizeImage:
                
 
                            -        max_value: 255.
                
 
                                 - DetectionPaddedRescale:
                
 
                                     input_dim: [640, 640]
                
 
                                     max_targets: 120
                
 
                                     pad_value: 114
                
 
                            +    - DetectionStandardize:
                
 
                            +        max_value: 255.
                
 
                                 - DetectionTargetsFormatTransform:
                
 
                                     max_targets: 256
                
 
                            -        output_format: LABEL_NORMALIZED_CXCYWH
                
 
                            +        output_format: LABEL_CXCYWH
                
 
                               tight_box_rotation: False
                
 
                               class_inclusion_list:
                
@@ -67,13 +67,13 @@ val_dataset_params:
 
                                 - DetectionPadToSize:
                
 
                                     output_size: [640, 640]
                
 
                                     pad_value: 114
                
 
                            -    - DetectionStandardizeImage:
                
 
                            +    - DetectionStandardize:
                
 
                                     max_value: 255.
                
 
                                 - DetectionImagePermute
                
 
                                 - DetectionTargetsFormatTransform:
                
 
                                     max_targets: 50
                
 
                                     input_dim: [640, 640]
                
 
                            -        output_format: LABEL_NORMALIZED_CXCYWH
                
 
                            +        output_format: LABEL_CXCYWH
                
 
                               tight_box_rotation: False
                
 
                               class_inclusion_list:
                
 
                               max_num_samples:
                
@@ -83,6 +83,7 @@ val_dataloader_params:
 
                               batch_size: 25
                
 
                               num_workers: 8
                
 
                               drop_last: False
                
 
                            +  shuffle: False
                
 
                               pin_memory: True
                
 
                               collate_fn:
                
 
                                 _target_: super_gradients.training.utils.detection_utils.CrowdDetectionCollateFN
                
@@ -9,18 +9,27 @@ train_dataset_params:
 
                               input_dim: [640, 640]
                
 
                               cache_dir:
                
 
                               cache: False
                
 
                            +  ignore_empty_annotations: False
                
 
                               transforms:
                
 
                            +    - DetectionMosaic:
                
 
                            +        input_dim: ${dataset_params.train_dataset_params.input_dim}
                
 
                            +        prob: 1.
                
 
                                 - DetectionRandomAffine:
                
 
                                     degrees: 0.                  # rotation degrees, randomly sampled from [-degrees, degrees]
                
 
                                     translate: 0.1                # image translation fraction
                
 
                                     scales: [ 0.5, 1.5 ]              # random rescale range (keeps size by padding/cropping) after mosaic transform.
                
 
                                     shear: 0.0                    # shear degrees, randomly sampled from [-degrees, degrees]
                
 
                                     target_size: ${dataset_params.train_dataset_params.input_dim}
                
 
                            -        filter_box_candidates: True   # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
                
 
                            +        filter_box_candidates: False  # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
                
 
                                     wh_thr: 2                     # edge size threshold when filter_box_candidates = True (pixels)
                
 
                                     area_thr: 0.1                 # threshold for area ratio between original image and the transformed one, when filter_box_candidates = True
                
 
                                     ar_thr: 20                    # aspect ratio threshold when filter_box_candidates = True
                
 
                                     border_value: 128
                
 
                            +#    - DetectionMixup:
                
 
                            +#        input_dim: ${dataset_params.train_dataset_params.input_dim}
                
 
                            +#        mixup_scale: [ 0.5, 1.5 ]         # random rescale range for the additional sample in mixup
                
 
                            +#        prob: 1.0                       # probability to apply per-sample mixup
                
 
                            +#        flip_prob: 0.5                  # probability to apply horizontal flip
                
 
                                 - DetectionHSV:
                
 
                                     prob: 1.0                       # probability to apply HSV transform
                
 
                                     hgain: 5                        # HSV transform hue gain (randomly sampled from [-hgain, hgain])
                
@@ -30,8 +39,11 @@ train_dataset_params:
 
                                     prob: 0.5                       # probability to apply horizontal flip
                
 
                                 - DetectionPaddedRescale:
                
 
                                     input_dim: ${dataset_params.train_dataset_params.input_dim}
                
 
                            -        max_targets: 120
                
 
                            +        max_targets: 300
                
 
                            +    - DetectionStandardize:
                
 
                            +        max_value: 255.
                
 
                                 - DetectionTargetsFormatTransform:
                
 
                            +        max_targets: 300
                
 
                                     input_dim: ${dataset_params.train_dataset_params.input_dim}
                
 
                                     output_format: LABEL_CXCYWH
                
 
                               tight_box_rotation: False
                
@@ -43,8 +55,8 @@ train_dataset_params:
 
                             train_dataloader_params:
                
 
                               shuffle: True
                
 
                               batch_size: 16
                
 
                            -  num_workers: 0
                
 
                            -  sampler:
                
 
                            +  min_samples: 512
                
 
                            +  num_workers: 4
                
 
                               drop_last: False
                
 
                               pin_memory: True
                
 
                               worker_init_fn:
                
@@ -60,11 +72,16 @@ val_dataset_params:
 
                               input_dim: [640, 640]
                
 
                               cache_dir:
                
 
                               cache: False
                
 
                            +  ignore_empty_annotations: False
                
 
                               transforms:
                
 
                               - DetectionPaddedRescale:
                
 
                                   input_dim: ${dataset_params.val_dataset_params.input_dim}
                
 
                            +      max_targets: 300
                
 
                            +      pad_value: 114
                
 
                            +  - DetectionStandardize:
                
 
                            +      max_value: 255.
                
 
                               - DetectionTargetsFormatTransform:
                
 
                            -      max_targets: 50
                
 
                            +      max_targets: 300
                
 
                                   input_dim: ${dataset_params.val_dataset_params.input_dim}
                
 
                                   output_format: LABEL_CXCYWH
                
 
                               tight_box_rotation: False
                
@@ -74,10 +91,10 @@ val_dataset_params:
 
                               verbose: 0
                
 
                             val_dataloader_params:
                
 
                            -  batch_size: 64
                
 
                            -  num_workers: 0
                
 
                            -  sampler:
                
 
                            +  batch_size: 32
                
 
                            +  num_workers: 4
                
 
                               drop_last: False
                
 
                            +  shuffle: False
                
 
                               pin_memory: True
                
 
                               collate_fn: # collate function for valset
                
 
                                 _target_: super_gradients.training.utils.detection_utils.CrowdDetectionCollateFN
                
 
            # A recipe to fine-tune YoloNAS on Roboflow datasets.
# Checkout the datasets at https://universe.roboflow.com/roboflow-100?ref=blog.roboflow.com
#
# `dataset_name` refers to the official name of the dataset.
# You can find it in the url of the dataset: https://universe.roboflow.com/roboflow-100/digits-t2eg6 -> digits-t2eg6
#
# Example: python -m super_gradients.train_from_recipe --config-name=roboflow_yolo_nas_m dataset_name=digits-t2eg6

defaults:
  - training_hyperparams: coco2017_yolo_nas_train_params
  - dataset_params: roboflow_detection_dataset_params
  - checkpoint_params: default_checkpoint_params
  - arch_params: yolo_nas_m_arch_params
  - _self_
  - variable_setup

train_dataloader: roboflow_train_yolox
val_dataloader: roboflow_val_yolox

dataset_name: ??? # Placeholder for the name of the dataset you want to use (e.g. "digits-t2eg6")
dataset_params:
  dataset_name: ${dataset_name}

  train_dataloader_params:
    batch_size: 12

  val_dataloader_params:
    batch_size: 16

num_classes: ${roboflow_dataset_num_classes:${dataset_name}}

architecture: yolo_nas_m
arch_params:
  num_classes: ${num_classes}


load_checkpoint: False
checkpoint_params:
  pretrained_weights: coco


result_path: # By defaults saves results in checkpoints directory
resume: False
training_hyperparams:
  resume: ${resume}
  zero_weight_decay_on_bias_and_bn: True

  lr_warmup_epochs: 3
  warmup_mode: linear_epoch_step

  initial_lr: 4e-4
  cosine_final_lr_ratio: 0.1

  optimizer_params:
    weight_decay: 0.0001

  ema: True
  ema_params:
    decay: 0.9

  max_epochs: 100
  mixed_precision: True
  criterion_params:
    num_classes: ${num_classes}


  phase_callbacks: []
  loss:
    ppyoloe_loss:
      num_classes: ${num_classes}
      reg_max: 16

  valid_metrics_list:
    - DetectionMetrics_050:
        score_thres: 0.1
        top_k_predictions: 300
        num_cls: ${num_classes}
        normalize_targets: True
        post_prediction_callback:
          _target_: super_gradients.training.models.detection_models.pp_yolo_e.PPYoloEPostPredictionCallback
          score_threshold: 0.01
          nms_top_k: 1000
          max_predictions: 300
          nms_threshold: 0.7

  metric_to_watch: 'mAP@0.50'

multi_gpu: Off
num_gpus: 1

experiment_suffix: ""
experiment_name: ${architecture}_roboflow_${dataset_name}${experiment_suffix}

          
 
            # A recipe to fine-tune YoloNAS on Roboflow datasets.
# Checkout the datasets at https://universe.roboflow.com/roboflow-100?ref=blog.roboflow.com
#
# `dataset_name` refers to the official name of the dataset.
# You can find it in the url of the dataset: https://universe.roboflow.com/roboflow-100/digits-t2eg6 -> digits-t2eg6
#
# Example: python -m super_gradients.train_from_recipe --config-name=roboflow_yolo_nas_s dataset_name=digits-t2eg6

defaults:
  - training_hyperparams: coco2017_yolo_nas_train_params
  - dataset_params: roboflow_detection_dataset_params
  - checkpoint_params: default_checkpoint_params
  - arch_params: yolo_nas_s_arch_params
  - _self_
  - variable_setup

train_dataloader: roboflow_train_yolox
val_dataloader: roboflow_val_yolox

dataset_name: ??? # Placeholder for the name of the dataset you want to use (e.g. "digits-t2eg6")
dataset_params:
  dataset_name: ${dataset_name}

  train_dataloader_params:
    batch_size: 16

  val_dataloader_params:
    batch_size: 16

num_classes: ${roboflow_dataset_num_classes:${dataset_name}}

architecture: yolo_nas_s
arch_params:
  num_classes: ${num_classes}


load_checkpoint: False
checkpoint_params:
  pretrained_weights: coco


result_path: # By defaults saves results in checkpoints directory
resume: False
training_hyperparams:
  resume: ${resume}
  zero_weight_decay_on_bias_and_bn: True

  lr_warmup_epochs: 3
  warmup_mode: linear_epoch_step

  initial_lr: 5e-4
  cosine_final_lr_ratio: 0.1

  optimizer_params:
    weight_decay: 0.0001

  ema: True
  ema_params:
    decay: 0.9

  max_epochs: 100
  mixed_precision: True
  criterion_params:
    num_classes: ${num_classes}


  phase_callbacks: []
  loss:
    ppyoloe_loss:
      num_classes: ${num_classes}
      reg_max: 16

  valid_metrics_list:
    - DetectionMetrics_050:
        score_thres: 0.1
        top_k_predictions: 300
        num_cls: ${num_classes}
        normalize_targets: True
        post_prediction_callback:
          _target_: super_gradients.training.models.detection_models.pp_yolo_e.PPYoloEPostPredictionCallback
          score_threshold: 0.01
          nms_top_k: 1000
          max_predictions: 300
          nms_threshold: 0.7

  metric_to_watch: 'mAP@0.50'

multi_gpu: Off
num_gpus: 1

experiment_suffix: ""
experiment_name: ${architecture}_roboflow_${dataset_name}${experiment_suffix}

          
 
            defaults:
  - roboflow_yolo_nas_s
  - quantization_params: default_quantization_params
  - _self_

checkpoint_params:
  checkpoint_path: ???
  strict_load: no_key_matching

pre_launch_callbacks_list:
    - QATRecipeModificationCallback:
        batch_size_divisor: 2
        max_epochs_divisor: 10
        lr_decay_factor: 0.01
        warmup_epochs_divisor: 10
        cosine_final_lr_ratio: 0.01
        disable_phase_callbacks: True
        disable_augmentations: False

          
 
            defaults:
  - default_train_params

max_epochs: 300

warmup_mode: "linear_batch_step"
warmup_initial_lr:  1e-6
lr_warmup_steps: 1000
lr_warmup_epochs: 0

initial_lr:  2e-4
lr_mode: cosine
cosine_final_lr_ratio: 0.1

zero_weight_decay_on_bias_and_bn: True
batch_accumulate: 1

save_ckpt_epoch_list: [100, 200, 250]

loss:
  ppyoloe_loss:
    use_static_assigner: False
    num_classes: ${arch_params.num_classes}
    reg_max: 16

optimizer: AdamW
optimizer_params:
  weight_decay: 0.00001

ema: True
ema_params:
  decay: 0.9997
  decay_type: threshold

mixed_precision: False
sync_bn: True

valid_metrics_list:
  - DetectionMetrics:
      score_thres: 0.1
      top_k_predictions: 300
      num_cls: ${arch_params.num_classes}
      normalize_targets: True
      post_prediction_callback:
        _target_: super_gradients.training.models.detection_models.pp_yolo_e.PPYoloEPostPredictionCallback
        score_threshold: 0.01
        nms_top_k: 1000
        max_predictions: 300
        nms_threshold: 0.7

pre_prediction_callback:

metric_to_watch: 'mAP@0.50:0.95'
greater_metric_to_watch_is_better: True

_convert_: all