Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#394 Add eval recipe

Merged
Ghost merged 1 commits into Deci-AI:master from deci-ai:feature/SG-246-add_eval_recipe
31 changed files with 396 additions and 25 deletions
  1. 2
    1
      src/super_gradients/common/environment/env_helpers.py
  2. 0
    0
      src/super_gradients/examples/evaluate_checkpoint_example/__init__.py
  3. 40
    0
      src/super_gradients/examples/evaluate_checkpoint_example/evaluate_checkpoint.py
  4. 0
    0
      src/super_gradients/examples/evaluate_from_recipe_example/__init__.py
  5. 41
    0
      src/super_gradients/examples/evaluate_from_recipe_example/evaluate_from_recipe.py
  6. 0
    0
      src/super_gradients/examples/resume_experiment_example/__init__.py
  7. 18
    0
      src/super_gradients/examples/resume_experiment_example/resume_experiment.py
  8. 8
    1
      src/super_gradients/recipes/cifar10_resnet.yaml
  9. 3
    2
      src/super_gradients/recipes/cityscapes_ddrnet.yaml
  10. 4
    3
      src/super_gradients/recipes/cityscapes_regseg48.yaml
  11. 6
    0
      src/super_gradients/recipes/cityscapes_stdc_base.yaml
  12. 4
    1
      src/super_gradients/recipes/cityscapes_stdc_seg50.yaml
  13. 3
    0
      src/super_gradients/recipes/cityscapes_stdc_seg75.yaml
  14. 7
    0
      src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml
  15. 5
    0
      src/super_gradients/recipes/coco2017_yolox.yaml
  16. 4
    1
      src/super_gradients/recipes/coco_segmentation_shelfnet_lw.yaml
  17. 7
    0
      src/super_gradients/recipes/imagenet_efficientnet.yaml
  18. 7
    0
      src/super_gradients/recipes/imagenet_mobilenetv2.yaml
  19. 7
    0
      src/super_gradients/recipes/imagenet_mobilenetv3_base.yaml
  20. 7
    0
      src/super_gradients/recipes/imagenet_mobilenetv3_large.yaml
  21. 7
    0
      src/super_gradients/recipes/imagenet_mobilenetv3_small.yaml
  22. 8
    1
      src/super_gradients/recipes/imagenet_regnetY.yaml
  23. 7
    0
      src/super_gradients/recipes/imagenet_repvgg.yaml
  24. 8
    1
      src/super_gradients/recipes/imagenet_resnet50.yaml
  25. 8
    1
      src/super_gradients/recipes/imagenet_resnet50_kd.yaml
  26. 8
    1
      src/super_gradients/recipes/imagenet_vit_base.yaml
  27. 8
    1
      src/super_gradients/recipes/imagenet_vit_large.yaml
  28. 91
    11
      src/super_gradients/training/sg_trainer/sg_trainer.py
  29. 23
    0
      src/super_gradients/training/utils/checkpoint_utils.py
  30. 53
    0
      src/super_gradients/training/utils/hydra_utils.py
  31. 2
    0
      src/super_gradients/training/utils/sg_trainer_utils.py
@@ -4,6 +4,7 @@ import os
 import sys
 import sys
 import socket
 import socket
 from functools import wraps
 from functools import wraps
+from typing import Any
 
 
 from omegaconf import OmegaConf
 from omegaconf import OmegaConf
 
 
@@ -96,7 +97,7 @@ def register_hydra_resolvers():
     OmegaConf.register_new_resolver("class", lambda *args: get_cls(*args), replace=True)
     OmegaConf.register_new_resolver("class", lambda *args: get_cls(*args), replace=True)
 
 
 
 
-def pop_arg(arg_name: str, default_value: int = None) -> argparse.Namespace:
+def pop_arg(arg_name: str, default_value: Any = None) -> Any:
     """Get the specified args and remove them from argv"""
     """Get the specified args and remove them from argv"""
 
 
     parser = argparse.ArgumentParser()
     parser = argparse.ArgumentParser()
Discard
    Discard
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    1. """
    2. Evaluate a checkpoint resulting from an experiment that you ran previously.
    3. Use this script if:
    4. - You want to evaluate a checkpoint resulting from one of your previous experiment,
    5. using the same parameters (dataset, valid_metrics,...) as used during the training of the experiment.
    6. Don't use this script if:
    7. - You want to train and evaluate a model (use examples/train_from_recipe_example)
    8. - You want to evaluate a pretrained model from model zoo (use examples/evaluate_from_recipe_example)
    9. - You want to evaluate a checkpoint from one of your previous experiment, but with different validation parameters
    10. such as dataset params or metrics for instance (use examples/evaluate_from_recipe_example)
    11. Note:
    12. The parameters will be unchanged even if the recipe used for that experiment was changed since then.
    13. This is to ensure that validation of the experiment will remain exactly the same as during training.
    14. Example: python evaluate_checkpoint.py --experiment_name=my_experiment_name --ckpt_name=average_model.pth
    15. -> Evaluate the checkpoint average_model from experiment my_experiment_name.
    16. """
    17. from super_gradients import Trainer, init_trainer
    18. from super_gradients.common.environment.env_helpers import pop_arg
    19. def main() -> None:
    20. init_trainer()
    21. experiment_name = pop_arg("experiment_name")
    22. ckpt_name = pop_arg("ckpt_name", default_value="ckpt_latest.pth")
    23. ckpt_root_dir = pop_arg("ckpt_root_dir", default_value=None)
    24. Trainer.evaluate_checkpoint(experiment_name=experiment_name, ckpt_name=ckpt_name, ckpt_root_dir=ckpt_root_dir)
    25. def run():
    26. init_trainer()
    27. main()
    28. if __name__ == "__main__":
    29. run()
    Discard
      Discard
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      1. """
      2. Evaluate a SuperGradient's recipes.
      3. Use this script if:
      4. - You want to evaluate a pretrained model from model zoo
      5. - You want to evaluate a checkpoint from one of your previous experiment, but with different validation parameters
      6. such as dataset params or metrics for instance
      7. Don't use this script if:
      8. - You want to train and evaluate a model (use examples/train_from_recipe_example)
      9. - You want to evaluate a checkpoint from one of your previous experiment, using the same parameters as used during the
      10. training of the experiment (use examples/evaluate_checkpoint_example)
      11. Note:
      12. This script does NOT run TRAINING, so make sure in the recipe that you load a PRETRAINED MODEL
      13. either from one of your checkpoint or from a pretrained model.
      14. General use: python evaluate_from_recipe.py --config-name="DESIRED_RECIPE".
      15. -> Evaluate a model according to parameters set in "DESIRED_RECIPE"
      16. For recipe's specific instructions and details refer to the recipe's configuration file in the recipes directory.
      17. """
      18. from omegaconf import DictConfig
      19. import hydra
      20. import pkg_resources
      21. from super_gradients import Trainer, init_trainer
      22. @hydra.main(config_path=pkg_resources.resource_filename("super_gradients.recipes", ""), version_base="1.2")
      23. def main(cfg: DictConfig) -> None:
      24. Trainer.evaluate_from_recipe(cfg)
      25. def run():
      26. init_trainer()
      27. main()
      28. if __name__ == "__main__":
      29. run()
      Discard
        Discard
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        1. """
        2. Example code for resuming SuperGradient's recipes.
        3. General use: python resume_experiment.py --experiment_name=<PREVIOUSLY-RUN-EXPERIMENT>
        4. """
        5. from super_gradients import Trainer, init_trainer
        6. from super_gradients.common.environment.env_helpers import pop_arg
        7. def main() -> None:
        8. init_trainer()
        9. experiment_name = pop_arg("experiment_name")
        10. ckpt_root_dir = pop_arg("ckpt_root_dir")
        11. Trainer.resume_experiment(experiment_name=experiment_name, ckpt_root_dir=ckpt_root_dir)
        12. if __name__ == "__main__":
        13. main()
        Discard
        @@ -27,4 +27,11 @@ ckpt_root_dir:
         
         
         architecture: resnet18_cifar
         architecture: resnet18_cifar
         
         
        -experiment_name: resnet18_cifar
        +experiment_name: resnet18_cifar
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -84,8 +84,9 @@ ckpt_root_dir:
         multi_gpu: DDP
         multi_gpu: DDP
         num_gpus: 4
         num_gpus: 4
         
         
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
         hydra:
         hydra:
           run:
           run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
             dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
             dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        -
        -
        Discard
        @@ -90,8 +90,9 @@ experiment_name: ${architecture}_cityscapes
         multi_gpu: AUTO
         multi_gpu: AUTO
         num_gpus: 4
         num_gpus: 4
         
         
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
         hydra:
         hydra:
        -  searchpath:
        -    - pkg://super_gradients.recipes
           run:
           run:
        -    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -29,3 +29,9 @@ ckpt_root_dir:
         
         
         multi_gpu: DDP
         multi_gpu: DDP
         
         
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -75,6 +75,9 @@ num_gpus: 2
         experiment_name: ${architecture}50_cityscapes
         experiment_name: ${architecture}50_cityscapes
         ckpt_root_dir:
         ckpt_root_dir:
         
         
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
         hydra:
         hydra:
           run:
           run:
        -    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -79,7 +79,10 @@ num_gpus: 4
         experiment_name: ${architecture}75_cityscapes
         experiment_name: ${architecture}75_cityscapes
         ckpt_root_dir:
         ckpt_root_dir:
         
         
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
         hydra:
         hydra:
           run:
           run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
             dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
             dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
         
         
        Discard
        @@ -55,3 +55,10 @@ training_hyperparams:
         
         
         multi_gpu: DDP
         multi_gpu: DDP
         num_gpus: 8
         num_gpus: 8
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -58,4 +58,9 @@ experiment_name: ${architecture}_coco2017_${experiment_suffix}
         ckpt_root_dir:
         ckpt_root_dir:
         
         
         
         
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
         
         
        Discard
        @@ -42,6 +42,9 @@ ckpt_root_dir:
         
         
         architecture: shelfnet34_lw
         architecture: shelfnet34_lw
         
         
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
         hydra:
         hydra:
           run:
           run:
        -    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -34,3 +34,10 @@ multi_gpu: DDP
         num_gpus: 4
         num_gpus: 4
         
         
         architecture: efficientnet_b0
         architecture: efficientnet_b0
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -39,3 +39,10 @@ multi_gpu: DDP
         num_gpus: 2
         num_gpus: 2
         
         
         architecture: mobilenet_v2
         architecture: mobilenet_v2
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -21,3 +21,10 @@ multi_gpu: DDP
         num_gpus: 2
         num_gpus: 2
         
         
         architecture: mobilenet_v3_large
         architecture: mobilenet_v3_large
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -19,3 +19,10 @@ arch_params:
         experiment_name: mobileNetv3_large_training
         experiment_name: mobileNetv3_large_training
         
         
         architecture: mobilenet_v3_large
         architecture: mobilenet_v3_large
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -19,3 +19,10 @@ arch_params:
         experiment_name: mobileNetv3_small_training
         experiment_name: mobileNetv3_small_training
         
         
         architecture: mobilenet_v3_small
         architecture: mobilenet_v3_small
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -49,4 +49,11 @@ multi_gpu: Off
         
         
         
         
         architecture: regnetY800
         architecture: regnetY800
        -experiment_name: ${architecture}
        +experiment_name: ${architecture}
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -36,3 +36,10 @@ multi_gpu: DDP
         num_gpus: 4
         num_gpus: 4
         
         
         architecture: repvgg_a0
         architecture: repvgg_a0
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -36,4 +36,11 @@ ckpt_root_dir:
         multi_gpu: DDP
         multi_gpu: DDP
         num_gpus: 4
         num_gpus: 4
         
         
        -architecture: resnet50
        +architecture: resnet50
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -80,4 +80,11 @@ num_gpus: 8
         
         
         architecture: kd_module
         architecture: kd_module
         student_architecture: resnet50
         student_architecture: resnet50
        -teacher_architecture: beit_base_patch16_224
        +teacher_architecture: beit_base_patch16_224
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -31,4 +31,11 @@ experiment_name: vit_base_imagenet1k
         
         
         architecture: vit_base
         architecture: vit_base
         multi_gpu: DDP
         multi_gpu: DDP
        -num_gpus: 8
        +num_gpus: 8
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -27,4 +27,11 @@ architecture: vit_large
         
         
         experiment_name: vit_large_imagenet1k
         experiment_name: vit_large_imagenet1k
         multi_gpu: DDP
         multi_gpu: DDP
        -num_gpus: 8
        +num_gpus: 8
        +
        +
        +# THE FOLLOWING PARAMS ARE DIRECTLY USED BY HYDRA
        +hydra:
        +  run:
        +    # Set the output directory (i.e. where .hydra folder that logs all the input params will be generated)
        +    dir: ${hydra_output_dir:${ckpt_root_dir}, ${experiment_name}}
        Discard
        @@ -3,10 +3,11 @@ import os
         import sys
         import sys
         from copy import deepcopy
         from copy import deepcopy
         from typing import Union, Tuple, Mapping
         from typing import Union, Tuple, Mapping
        +from pathlib import Path
         
         
        -import hydra
         import numpy as np
         import numpy as np
         import torch
         import torch
        +import hydra
         from omegaconf import DictConfig
         from omegaconf import DictConfig
         from torch import nn
         from torch import nn
         from torch.utils.data import DataLoader
         from torch.utils.data import DataLoader
        @@ -47,13 +48,14 @@ from super_gradients.training.utils.weight_averaging_utils import ModelWeightAve
         from super_gradients.training.metrics import Accuracy, Top5
         from super_gradients.training.metrics import Accuracy, Top5
         from super_gradients.training.utils import random_seed
         from super_gradients.training.utils import random_seed
         from super_gradients.training.utils.checkpoint_utils import get_ckpt_local_path, read_ckpt_state_dict, \
         from super_gradients.training.utils.checkpoint_utils import get_ckpt_local_path, read_ckpt_state_dict, \
        -    load_checkpoint_to_model, load_pretrained_weights
        +    load_checkpoint_to_model, load_pretrained_weights, get_checkpoints_dir_path
         from super_gradients.training.datasets.datasets_utils import DatasetStatisticsTensorboardLogger
         from super_gradients.training.datasets.datasets_utils import DatasetStatisticsTensorboardLogger
         from super_gradients.training.utils.callbacks import CallbackHandler, Phase, LR_SCHEDULERS_CLS_DICT, PhaseContext, \
         from super_gradients.training.utils.callbacks import CallbackHandler, Phase, LR_SCHEDULERS_CLS_DICT, PhaseContext, \
             MetricsUpdateCallback, LR_WARMUP_CLS_DICT, ContextSgMethods, LRCallbackBase
             MetricsUpdateCallback, LR_WARMUP_CLS_DICT, ContextSgMethods, LRCallbackBase
         from super_gradients.common.environment import environment_config
         from super_gradients.common.environment import environment_config
         from super_gradients.training.utils import HpmStruct
         from super_gradients.training.utils import HpmStruct
         from super_gradients.training.datasets.samplers.infinite_sampler import InfiniteSampler
         from super_gradients.training.datasets.samplers.infinite_sampler import InfiniteSampler
        +from super_gradients.training.utils.hydra_utils import load_experiment_cfg, add_params_to_cfg
         
         
         logger = get_logger(__name__)
         logger = get_logger(__name__)
         
         
        @@ -132,14 +134,7 @@ class Trainer:
                 self.experiment_name = experiment_name
                 self.experiment_name = experiment_name
                 self.ckpt_name = None
                 self.ckpt_name = None
         
         
        -        # CREATING THE LOGGING DIR BASED ON THE INPUT PARAMS TO PREVENT OVERWRITE OF LOCAL VERSION
        -        if ckpt_root_dir:
        -            self.checkpoints_dir_path = os.path.join(ckpt_root_dir, self.experiment_name)
        -        elif os.path.exists(environment_config.PKG_CHECKPOINTS_DIR):
        -            self.checkpoints_dir_path = os.path.join(environment_config.PKG_CHECKPOINTS_DIR, self.experiment_name)
        -        else:
        -            raise ValueError("Illegal checkpoints directory: pass ckpt_root_dir that exists, or add 'checkpoints' to"
        -                             "resources.")
        +        self.checkpoints_dir_path = get_checkpoints_dir_path(experiment_name, ckpt_root_dir)
         
         
                 # INITIALIZE THE DEVICE FOR THE MODEL
                 # INITIALIZE THE DEVICE FOR THE MODEL
                 self._initialize_device(requested_device=device, requested_multi_gpu=multi_gpu)
                 self._initialize_device(requested_device=device, requested_multi_gpu=multi_gpu)
        @@ -203,6 +198,87 @@ class Trainer:
                               valid_loader=val_dataloader,
                               valid_loader=val_dataloader,
                               training_params=cfg.training_hyperparams)
                               training_params=cfg.training_hyperparams)
         
         
        +    @classmethod
        +    def resume_experiment(cls, experiment_name: str, ckpt_root_dir: str = None) -> None:
        +        """
        +        Resume a training that was run using our recipes.
        +
        +        :param experiment_name:     Name of the experiment to resume
        +        :param ckpt_root_dir:       Directory including the checkpoints
        +        """
        +        logger.info("Resume training using the checkpoint recipe, ignoring the current recipe")
        +        cfg = load_experiment_cfg(experiment_name, ckpt_root_dir)
        +        add_params_to_cfg(cfg, params=["training_hyperparams.resume=True"])
        +        cls.train_from_config(cfg)
        +
        +    @classmethod
        +    def evaluate_from_recipe(cls, cfg: DictConfig) -> None:
        +        """
        +        Evaluate according to a cfg recipe configuration.
        +
        +        Note:   This script does NOT run training, only validation.
        +                Please make sure that the config refers to a PRETRAINED MODEL either from one of your checkpoint or from pretrained weights from model zoo.
        +        :param cfg: The parsed DictConfig from yaml recipe files or a dictionary
        +        """
        +
        +        # INSTANTIATE ALL OBJECTS IN CFG
        +        cfg = hydra.utils.instantiate(cfg)
        +
        +        kwargs = parse_args(cfg, cls.__init__)
        +
        +        trainer = Trainer(**kwargs)
        +
        +        # INSTANTIATE DATA LOADERS
        +        val_dataloader = dataloaders.get(name=cfg.val_dataloader,
        +                                         dataset_params=cfg.dataset_params.val_dataset_params,
        +                                         dataloader_params=cfg.dataset_params.val_dataloader_params)
        +
        +        checkpoints_dir = Path(get_checkpoints_dir_path(experiment_name=cfg.experiment_name, ckpt_root_dir=cfg.ckpt_root_dir))
        +        ckpt_name = core_utils.get_param(cfg, 'ckpt_name', 'ckpt_latest.pth')
        +        checkpoint_path = str(checkpoints_dir / ckpt_name)
        +
        +        # BUILD NETWORK
        +        model = models.get(model_name=cfg.architecture,
        +                           num_classes=cfg.arch_params.num_classes,
        +                           arch_params=cfg.arch_params,
        +                           pretrained_weights=cfg.checkpoint_params.pretrained_weights,
        +                           checkpoint_path=checkpoint_path,
        +                           load_backbone=cfg.checkpoint_params.load_backbone)
        +
        +        # TEST
        +        val_results_tuple = trainer.test(model=model,
        +                                         test_loader=val_dataloader,
        +                                         test_metrics_list=cfg.training_hyperparams.valid_metrics_list)
        +
        +        valid_metrics_dict = get_metrics_dict(val_results_tuple, trainer.test_metrics,
        +                                              trainer.loss_logging_items_names)
        +
        +        results = ["Validate Results"]
        +        results += [f"   - {metric:10}: {value}" for metric, value in valid_metrics_dict.items()]
        +        logger.info("\n".join(results))
        +
        +    @classmethod
        +    def evaluate_checkpoint(cls, experiment_name: str, ckpt_name: str = "ckpt_latest.pth", ckpt_root_dir: str = None) -> None:
        +        """
        +        Evaluate a checkpoint resulting from one of your previous experiment, using the same parameters (dataset, valid_metrics,...)
        +        as used during the training of the experiment
        +
        +        Note:
        +            The parameters will be unchanged even if the recipe used for that experiment was changed since then.
        +            This is to ensure that validation of the experiment will remain exactly the same as during training.
        +
        +        Example, evaluate the checkpoint "average_model.pth" from experiment "my_experiment_name":
        +            >> evaluate_checkpoint(experiment_name="my_experiment_name", ckpt_name="average_model.pth")
        +
        +        :param experiment_name:     Name of the experiment to validate
        +        :param ckpt_name:           Name of the checkpoint to test ("ckpt_latest.pth", "average_model.pth" or "ckpt_best.pth" for instance)
        +        :param ckpt_root_dir:       Directory including the checkpoints
        +        """
        +        logger.info("Evaluate checkpoint")
        +        cfg = load_experiment_cfg(experiment_name, ckpt_root_dir)
        +        add_params_to_cfg(cfg, params=["training_hyperparams.resume=True", f"ckpt_name={ckpt_name}"])
        +        cls.evaluate_from_recipe(cfg)
        +
             def _set_dataset_params(self):
             def _set_dataset_params(self):
                 self.dataset_params = {
                 self.dataset_params = {
                     "train_dataset_params": self.train_loader.dataset.dataset_params if hasattr(self.train_loader.dataset,
                     "train_dataset_params": self.train_loader.dataset.dataset_params if hasattr(self.train_loader.dataset,
        @@ -1129,6 +1205,10 @@ class Trainer:
             def _set_valid_metrics(self, valid_metrics_list):
             def _set_valid_metrics(self, valid_metrics_list):
                 self.valid_metrics = MetricCollection(valid_metrics_list)
                 self.valid_metrics = MetricCollection(valid_metrics_list)
         
         
        +    @resolve_param('test_metrics_list', ListFactory(MetricsFactory()))
        +    def _set_test_metrics(self, test_metrics_list):
        +        self.test_metrics = MetricCollection(test_metrics_list)
        +
             def _initialize_mixed_precision(self, mixed_precision_enabled: bool):
             def _initialize_mixed_precision(self, mixed_precision_enabled: bool):
                 # SCALER IS ALWAYS INITIALIZED BUT IS DISABLED IF MIXED PRECISION WAS NOT SET
                 # SCALER IS ALWAYS INITIALIZED BUT IS DISABLED IF MIXED PRECISION WAS NOT SET
                 self.scaler = GradScaler(enabled=mixed_precision_enabled)
                 self.scaler = GradScaler(enabled=mixed_precision_enabled)
        @@ -1391,7 +1471,7 @@ class Trainer:
                     self.phase_callbacks = []
                     self.phase_callbacks = []
         
         
                 if test_metrics_list:
                 if test_metrics_list:
        -            self.test_metrics = MetricCollection(test_metrics_list)
        +            self._set_test_metrics(test_metrics_list)
                     self._add_metrics_update_callback(Phase.TEST_BATCH_END)
                     self._add_metrics_update_callback(Phase.TEST_BATCH_END)
                     self.phase_callback_handler = CallbackHandler(self.phase_callbacks)
                     self.phase_callback_handler = CallbackHandler(self.phase_callbacks)
         
         
        Discard
        @@ -1,15 +1,38 @@
         import os
         import os
         import tempfile
         import tempfile
         import pkg_resources
         import pkg_resources
        +
         import torch
         import torch
        +
        +from super_gradients.common.abstractions.abstract_logger import get_logger
         from super_gradients.common import explicit_params_validation, ADNNModelRepositoryDataInterfaces
         from super_gradients.common import explicit_params_validation, ADNNModelRepositoryDataInterfaces
         from super_gradients.training.pretrained_models import MODEL_URLS
         from super_gradients.training.pretrained_models import MODEL_URLS
        +from super_gradients.common.environment import environment_config
         try:
         try:
             from torch.hub import download_url_to_file, load_state_dict_from_url
             from torch.hub import download_url_to_file, load_state_dict_from_url
         except (ModuleNotFoundError, ImportError, NameError):
         except (ModuleNotFoundError, ImportError, NameError):
             from torch.hub import _download_url_to_file as download_url_to_file
             from torch.hub import _download_url_to_file as download_url_to_file
         
         
         
         
        +logger = get_logger(__name__)
        +
        +
        +def get_checkpoints_dir_path(experiment_name: str, ckpt_root_dir: str = None):
        +    """Creating the checkpoint directory of a given experiment.
        +    :param experiment_name:     Name of the experiment.
        +    :param ckpt_root_dir:       Local root directory path where all experiment logging directories will
        +                                reside. When none is give, it is assumed that pkg_resources.resource_filename('checkpoints', "")
        +                                exists and will be used.
        +    :return:                    checkpoints_dir_path
        +    """
        +    if ckpt_root_dir:
        +        return os.path.join(ckpt_root_dir, experiment_name)
        +    elif os.path.exists(environment_config.PKG_CHECKPOINTS_DIR):
        +        return os.path.join(environment_config.PKG_CHECKPOINTS_DIR, experiment_name)
        +    else:
        +        raise ValueError("Illegal checkpoints directory: pass ckpt_root_dir that exists, or add 'checkpoints' to resources.")
        +
        +
         def get_ckpt_local_path(source_ckpt_folder_name: str, experiment_name: str, ckpt_name: str, external_checkpoint_path: str):
         def get_ckpt_local_path(source_ckpt_folder_name: str, experiment_name: str, ckpt_name: str, external_checkpoint_path: str):
             """
             """
             Gets the local path to the checkpoint file, which will be:
             Gets the local path to the checkpoint file, which will be:
        Discard
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        1. from pathlib import Path
        2. from typing import List
        3. from hydra import initialize_config_dir, compose
        4. from hydra.core.global_hydra import GlobalHydra
        5. from omegaconf import OmegaConf, open_dict, DictConfig
        6. from super_gradients.training.utils.checkpoint_utils import get_checkpoints_dir_path
        7. def load_experiment_cfg(experiment_name: str, ckpt_root_dir: str = None) -> DictConfig:
        8. """
        9. Load the hydra config associated to a specific experiment.
        10. Background Information: every time an experiment is launched based on a recipe, all the hydra config params are stored in a hidden folder ".hydra".
        11. This hidden folder is used here to recreate the exact same config as the one that was used to launch the experiment (Also include hydra overrides).
        12. The motivation is to be able to resume or evaluate an experiment with the exact same config as the one that was used when the experiment was
        13. initially started, regardless of any change that might have been introduced to the recipe, and also while using the same overrides that were used
        14. for that experiment.
        15. :param experiment_name: Name of the experiment to resume
        16. :param ckpt_root_dir: Directory including the checkpoints
        17. :return: The config that was used for that experiment
        18. """
        19. if not experiment_name:
        20. raise ValueError(f"experiment_name should be non empty string but got :{experiment_name}")
        21. checkpoints_dir_path = Path(get_checkpoints_dir_path(experiment_name, ckpt_root_dir))
        22. if not checkpoints_dir_path.exists():
        23. raise FileNotFoundError(f"Impossible to find checkpoint dir ({checkpoints_dir_path})")
        24. resume_dir = Path(checkpoints_dir_path) / ".hydra"
        25. if not resume_dir.exists():
        26. raise FileNotFoundError(f"The checkpoint directory {checkpoints_dir_path} does not include .hydra artifacts to resume the experiment.")
        27. # Load overrides that were used in previous run
        28. overrides_cfg = list(OmegaConf.load(resume_dir / "overrides.yaml"))
        29. GlobalHydra.instance().clear()
        30. with initialize_config_dir(config_dir=str(resume_dir)):
        31. cfg = compose(config_name="config.yaml", overrides=overrides_cfg)
        32. return cfg
        33. def add_params_to_cfg(cfg: DictConfig, params: List[str]):
        34. """Add parameters to an existing config
        35. :param cfg: OmegaConf config
        36. :param params: List of parameters to add, in dotlist format (i.e. ["training_hyperparams.resume=True"])"""
        37. new_cfg = OmegaConf.from_dotlist(params)
        38. with open_dict(cfg): # This is required to add new fields to existing config
        39. cfg.merge_with(new_cfg)
        Discard
        @@ -14,6 +14,7 @@ from super_gradients.common.abstractions.abstract_logger import get_logger
         from treelib import Tree
         from treelib import Tree
         from termcolor import colored
         from termcolor import colored
         import torch
         import torch
        +
         from torch.utils.tensorboard import SummaryWriter
         from torch.utils.tensorboard import SummaryWriter
         
         
         from super_gradients.training.exceptions.dataset_exceptions import UnsupportedBatchItemsFormat
         from super_gradients.training.exceptions.dataset_exceptions import UnsupportedBatchItemsFormat
        @@ -26,6 +27,7 @@ IS_GREATER_SYMBOLS = {True: "↗", False: "↘"}
         
         
         logger = get_logger(__name__)
         logger = get_logger(__name__)
         
         
        +
         @dataclass
         @dataclass
         class MonitoredValue:
         class MonitoredValue:
             """Store a value and some indicators relative to its past iterations.
             """Store a value and some indicators relative to its past iterations.
        Discard