@@ -99,6 +99,12 @@ qat_params:
 
                               num_calib_batches: 2 # int, number of batches to collect the statistics from.
                
 
                               percentile: 99.99 # float, percentile value to use when quant_modules_calib_method='percentile'. Discarded when other methods are used (Default=99.99).
                
 
                            +max_train_batches: None,  # For debug- when not None- will break out of inner train loop
                
 
                            +# (i.e iterating over train_loader) when reaching this number of batches.
                
 
                            +
                
 
                            +max_valid_batches: None,  # For debug- when not None- will break out of inner valid loop
                
 
                            +# (i.e iterating over valid_loader) when reaching this number of batches.
                
 
                            +
                
 
                             sg_logger: base_sg_logger
                
 
                             sg_logger_params:
                
 
                               tb_files_user_prompt: False # Asks User for Tensorboard Deletion Prompt
                
@@ -68,6 +68,10 @@ DEFAULT_TRAINING_PARAMS = {
 
                                 "ckpt_name": "ckpt_latest.pth",
                
 
                                 "resume_strict_load": False,
                
 
                                 "sync_bn": False,
                
 
                            +    "max_train_batches": None,  # For debug- when not None- will break out of inner train loop
                
 
                            +    # (i.e iterating over train_loader) when reaching this number of batches.
                
 
                            +    "max_valid_batches": None,  # For debug- when not None- will break out of inner valid loop
                
 
                            +    # (i.e iterating over valid_loader) when reaching this number of batches.
                
 
                             }
                
 
                             DEFAULT_OPTIMIZER_PARAMS_SGD = {"weight_decay": 1e-4, "momentum": 0.9}
                
@@ -193,6 +193,8 @@ class Trainer:
 
                                     self.train_monitored_values = {}
                
 
                                     self.valid_monitored_values = {}
                
 
                            +        self.max_train_batches = None
                
 
                            +        self.max_valid_batches = None
                
 
                                 @property
                
 
                                 def device(self) -> str:
                
@@ -445,7 +447,9 @@ class Trainer:
 
                                         # TODO: ITERATE BY MAX ITERS
                
 
                                         # FOR INFINITE SAMPLERS WE MUST BREAK WHEN REACHING LEN ITERATIONS.
                
 
                            -            if self._infinite_train_loader and batch_idx == len(self.train_loader) - 1:
                
 
                            +            if (self._infinite_train_loader and batch_idx == len(self.train_loader) - 1) or (
                
 
                            +                self.max_train_batches is not None and self.max_train_batches - 1 <= batch_idx
                
 
                            +            ):
                
 
                                             break
                
 
                                     if not self.ddp_silent_mode:
                
@@ -965,6 +969,13 @@ class Trainer:
 
                                                     percentile: float, percentile value to use when Trainer,quant_modules_calib_method='percentile'.
                
 
                                                      Discarded when other methods are used (Default=99.99).
                
 
                            +                -   `max_train_batches`: int, for debug- when not None- will break out of inner train loop (i.e iterating over
                
 
                            +                      train_loader) when reaching this number of batches. Usefull for debugging (default=None).
                
 
                            +
                
 
                            +                -   `max_valid_batches`: int, for debug- when not None- will break out of inner valid loop (i.e iterating over
                
 
                            +                      valid_loader) when reaching this number of batches. Usefull for debugging (default=None).
                
 
                            +
                
 
                            +
                
 
                                     :return:
                
 
                                     """
                
@@ -1143,6 +1154,21 @@ class Trainer:
 
                                     self.ckpt_best_name = self.training_params.ckpt_best_name
                
 
                            +        if self.training_params.max_train_batches is not None and (
                
 
                            +            self.training_params.max_train_batches > len(self.train_loader) or self.training_params.max_train_batches <= 0
                
 
                            +        ):
                
 
                            +
                
 
                            +            raise ValueError("max_train_batches must be positive and smaller then len(train_loader).")
                
 
                            +
                
 
                            +        self.max_train_batches = self.training_params.max_train_batches
                
 
                            +
                
 
                            +        if self.training_params.max_valid_batches is not None and (
                
 
                            +            self.training_params.max_valid_batches > len(self.valid_loader) or self.training_params.max_valid_batches <= 0
                
 
                            +        ):
                
 
                            +
                
 
                            +            raise ValueError("max_valid_batches must be positive and smaller then len(valid_loader).")
                
 
                            +        self.max_valid_batches = self.training_params.max_valid_batches
                
 
                            +
                
 
                                     # STATE ATTRIBUTE SET HERE FOR SUBSEQUENT TRAIN() CALLS
                
 
                                     self._first_backward = True
                
@@ -1754,6 +1780,9 @@ class Trainer:
 
                                                 progress_bar_data_loader.set_postfix(**pbar_message_dict)
                
 
                            +                if evaluation_type == EvaluationType.VALIDATION and self.max_valid_batches is not None and self.max_valid_batches - 1 <= batch_idx:
                
 
                            +                    break
                
 
                            +
                
 
                                     # NEED TO COMPUTE METRICS FOR THE FIRST TIME IF PROGRESS VERBOSITY IS NOT SET
                
 
                                     if not metrics_progress_verbose:
                
 
                                         # COMPUTE THE RUNNING USER METRICS AND LOSS RUNNING ITEMS. RESULT TUPLE IS THEIR CONCATENATION.
                
@@ -26,6 +26,7 @@ from tests.unit_tests.detection_utils_test import TestDetectionUtils
 
                             from tests.unit_tests.detection_dataset_test import DetectionDatasetTest
                
 
                             from tests.unit_tests.export_onnx_test import TestModelsONNXExport
                
 
                             from tests.unit_tests.local_ckpt_head_replacement_test import LocalCkptHeadReplacementTest
                
 
                            +from tests.unit_tests.max_batches_loop_break_test import MaxBatchesLoopBreakTest
                
 
                             from tests.unit_tests.phase_delegates_test import ContextMethodsTest
                
 
                             from tests.unit_tests.quantization_utility_tests import QuantizationUtilityTest
                
 
                             from tests.unit_tests.random_erase_test import RandomEraseTest
                
@@ -117,6 +118,7 @@ class CoreUnitTestSuiteRunner:
 
                                     self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(LocalCkptHeadReplacementTest))
                
 
                                     self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(DetectionDatasetTest))
                
 
                                     self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestModelsONNXExport))
                
 
                            +        self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(MaxBatchesLoopBreakTest))
                
 
                                 def _add_modules_to_end_to_end_tests_suite(self):
                
 
                                     """
                
 
            import unittest
from super_gradients.training import Trainer
from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader
from super_gradients.training.metrics import Accuracy, Top5
from super_gradients.training.utils.callbacks import PhaseCallback, Phase, PhaseContext
from super_gradients.training.models import LeNet


class LastBatchIdxCollector(PhaseCallback):
    def __init__(self, train: bool = True):
        phase = Phase.TRAIN_BATCH_END if train else Phase.VALIDATION_BATCH_END
        super().__init__(phase=phase)
        self.last_batch_idx = 0

    def __call__(self, context: PhaseContext):
        self.last_batch_idx = context.batch_idx


class MaxBatchesLoopBreakTest(unittest.TestCase):
    def test_max_train_batches_loop_break(self):
        last_batch_collector = LastBatchIdxCollector()
        train_params = {
            "max_epochs": 2,
            "lr_updates": [1],
            "lr_decay_factor": 0.1,
            "lr_mode": "step",
            "lr_warmup_epochs": 0,
            "initial_lr": 0.1,
            "loss": "cross_entropy",
            "optimizer": "SGD",
            "criterion_params": {},
            "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
            "train_metrics_list": [Accuracy(), Top5()],
            "valid_metrics_list": [Accuracy(), Top5()],
            "metric_to_watch": "Accuracy",
            "greater_metric_to_watch_is_better": True,
            "phase_callbacks": [last_batch_collector],
            "max_train_batches": 3,
        }

        # Define Model
        net = LeNet()
        trainer = Trainer("test_max_batches_break_train")
        trainer.train(
            model=net,
            training_params=train_params,
            train_loader=classification_test_dataloader(dataset_size=16, batch_size=4),
            valid_loader=classification_test_dataloader(),
        )

        # ASSERT LAST BATCH IDX IS 2
        print(last_batch_collector.last_batch_idx)
        self.assertTrue(last_batch_collector.last_batch_idx == 2)

    def test_max_valid_batches_loop_break(self):
        last_batch_collector = LastBatchIdxCollector(train=False)
        train_params = {
            "max_epochs": 2,
            "lr_updates": [1],
            "lr_decay_factor": 0.1,
            "lr_mode": "step",
            "lr_warmup_epochs": 0,
            "initial_lr": 0.1,
            "loss": "cross_entropy",
            "optimizer": "SGD",
            "criterion_params": {},
            "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
            "train_metrics_list": [Accuracy(), Top5()],
            "valid_metrics_list": [Accuracy(), Top5()],
            "metric_to_watch": "Accuracy",
            "greater_metric_to_watch_is_better": True,
            "phase_callbacks": [last_batch_collector],
            "max_valid_batches": 3,
        }

        # Define Model
        net = LeNet()
        trainer = Trainer("test_max_batches_break_val")
        trainer.train(
            model=net,
            training_params=train_params,
            train_loader=classification_test_dataloader(),
            valid_loader=classification_test_dataloader(dataset_size=16, batch_size=4),
        )

        # ASSERT LAST BATCH IDX IS 2
        self.assertTrue(last_batch_collector.last_batch_idx == 2)


if __name__ == "__main__":
    unittest.main()