Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

#258 fix naming of private functions

Merged
Ofri Masad merged 1 commits into Deci-AI:master from deci-ai:feature/SG-185_rename_and_cleanup
@@ -159,8 +159,8 @@ class KDModel(SgModel):
             else:
             else:
                 subnet_arch_params['num_classes'] = len(self.classes)
                 subnet_arch_params['num_classes'] = len(self.classes)
 
 
-    def instantiate_net(self, architecture: Union[KDModule, KDModule.__class__, str], arch_params: dict,
-                        checkpoint_params: dict, *args, **kwargs) -> tuple:
+    def _instantiate_net(self, architecture: Union[KDModule, KDModule.__class__, str], arch_params: dict,
+                         checkpoint_params: dict, *args, **kwargs) -> tuple:
         """
         """
         Instantiates kd_module according to architecture and arch_params, handles pretrained weights for the student
         Instantiates kd_module according to architecture and arch_params, handles pretrained weights for the student
          and teacher networks, and the required module manipulation (i.e head replacement) for the teacher network.
          and teacher networks, and the required module manipulation (i.e head replacement) for the teacher network.
@@ -182,10 +182,10 @@ class KDModel(SgModel):
         student_pretrained_weights = get_param(checkpoint_params, 'student_pretrained_weights')
         student_pretrained_weights = get_param(checkpoint_params, 'student_pretrained_weights')
         teacher_pretrained_weights = get_param(checkpoint_params, 'teacher_pretrained_weights')
         teacher_pretrained_weights = get_param(checkpoint_params, 'teacher_pretrained_weights')
 
 
-        student = super().instantiate_net(student_architecture, student_arch_params,
-                                          {"pretrained_weights": student_pretrained_weights})
-        teacher = super().instantiate_net(teacher_architecture, teacher_arch_params,
-                                          {"pretrained_weights": teacher_pretrained_weights})
+        student = super()._instantiate_net(student_architecture, student_arch_params,
+                                           {"pretrained_weights": student_pretrained_weights})
+        teacher = super()._instantiate_net(teacher_architecture, teacher_arch_params,
+                                           {"pretrained_weights": teacher_pretrained_weights})
 
 
         run_teacher_on_eval = get_param(kwargs, "run_teacher_on_eval", default_val=False)
         run_teacher_on_eval = get_param(kwargs, "run_teacher_on_eval", default_val=False)
 
 
@@ -237,11 +237,11 @@ class KDModel(SgModel):
         """
         """
         self.phase_callbacks.append(KDModelMetricsUpdateCallback(phase))
         self.phase_callbacks.append(KDModelMetricsUpdateCallback(phase))
 
 
-    def get_hyper_param_config(self):
+    def _get_hyper_param_config(self):
         """
         """
         Creates a training hyper param config for logging with additional KD related hyper params.
         Creates a training hyper param config for logging with additional KD related hyper params.
         """
         """
-        hyper_param_config = super().get_hyper_param_config()
+        hyper_param_config = super()._get_hyper_param_config()
         hyper_param_config.update({"student_architecture": self.student_architecture,
         hyper_param_config.update({"student_architecture": self.student_architecture,
                                    "teacher_architecture": self.teacher_architecture,
                                    "teacher_architecture": self.teacher_architecture,
                                    "student_arch_params": self.student_arch_params,
                                    "student_arch_params": self.student_arch_params,
@@ -249,7 +249,7 @@ class KDModel(SgModel):
                                    })
                                    })
         return hyper_param_config
         return hyper_param_config
 
 
-    def instantiate_ema_model(self, decay: float = 0.9999, beta: float = 15, exp_activation: bool = True) -> KDModelEMA:
+    def _instantiate_ema_model(self, decay: float = 0.9999, beta: float = 15, exp_activation: bool = True) -> KDModelEMA:
         """Instantiate KD ema model for KDModule.
         """Instantiate KD ema model for KDModule.
 
 
         If the model is of class KDModule, the instance will be adapted to work on knowledge distillation.
         If the model is of class KDModule, the instance will be adapted to work on knowledge distillation.
Discard
@@ -293,7 +293,7 @@ class SgModel:
         self.arch_params = core_utils.HpmStruct(**arch_params)
         self.arch_params = core_utils.HpmStruct(**arch_params)
         self.checkpoint_params = core_utils.HpmStruct(**checkpoint_params)
         self.checkpoint_params = core_utils.HpmStruct(**checkpoint_params)
 
 
-        self.net = self.instantiate_net(architecture, self.arch_params, checkpoint_params, *args, **kwargs)
+        self.net = self._instantiate_net(architecture, self.arch_params, checkpoint_params, *args, **kwargs)
 
 
         # SAVE THE ARCHITECTURE FOR NEURAL ARCHITECTURE SEARCH
         # SAVE THE ARCHITECTURE FOR NEURAL ARCHITECTURE SEARCH
 
 
@@ -404,7 +404,7 @@ class SgModel:
             if not self.ddp_silent_mode and batch_idx == 0:
             if not self.ddp_silent_mode and batch_idx == 0:
                 self._write_lrs(epoch)
                 self._write_lrs(epoch)
 
 
-            self.backward_step(loss, epoch, batch_idx, context)
+            self._backward_step(loss, epoch, batch_idx, context)
 
 
             # COMPUTE THE RUNNING USER METRICS AND LOSS RUNNING ITEMS. RESULT TUPLE IS THEIR CONCATENATION.
             # COMPUTE THE RUNNING USER METRICS AND LOSS RUNNING ITEMS. RESULT TUPLE IS THEIR CONCATENATION.
             logging_values = loss_avg_meter.average + get_metrics_results_tuple(self.train_metrics)
             logging_values = loss_avg_meter.average + get_metrics_results_tuple(self.train_metrics)
@@ -446,7 +446,7 @@ class SgModel:
         # RETURN AND THE LOSS LOGGING ITEMS COMPUTED DURING LOSS FORWARD PASS
         # RETURN AND THE LOSS LOGGING ITEMS COMPUTED DURING LOSS FORWARD PASS
         return loss, loss_logging_items
         return loss, loss_logging_items
 
 
-    def backward_step(self, loss: torch.Tensor, epoch: int, batch_idx: int, context: PhaseContext, *args, **kwargs):
+    def _backward_step(self, loss: torch.Tensor, epoch: int, batch_idx: int, context: PhaseContext, *args, **kwargs):
         """
         """
         Run backprop on the loss and perform a step
         Run backprop on the loss and perform a step
         :param loss: The value computed by the loss function
         :param loss: The value computed by the loss function
@@ -478,8 +478,8 @@ class SgModel:
             # RUN PHASE CALLBACKS
             # RUN PHASE CALLBACKS
             self.phase_callback_handler(Phase.TRAIN_BATCH_STEP, context)
             self.phase_callback_handler(Phase.TRAIN_BATCH_STEP, context)
 
 
-    def save_checkpoint(self, optimizer=None, epoch: int = None, validation_results_tuple: tuple = None,
-                        context: PhaseContext = None):
+    def _save_checkpoint(self, optimizer=None, epoch: int = None, validation_results_tuple: tuple = None,
+                         context: PhaseContext = None):
         """
         """
         Save the current state dict as latest (always), best (if metric was improved), epoch# (if determined in training
         Save the current state dict as latest (always), best (if metric was improved), epoch# (if determined in training
         params)
         params)
@@ -876,7 +876,7 @@ class SgModel:
         if self.ema:
         if self.ema:
             ema_params = self.training_params.ema_params
             ema_params = self.training_params.ema_params
             logger.info(f'Using EMA with params {ema_params}')
             logger.info(f'Using EMA with params {ema_params}')
-            self.ema_model = self.instantiate_ema_model(**ema_params)
+            self.ema_model = self._instantiate_ema_model(**ema_params)
             self.ema_model.updates = self.start_epoch * num_batches // self.batch_accumulate
             self.ema_model.updates = self.start_epoch * num_batches // self.batch_accumulate
             if self.load_checkpoint:
             if self.load_checkpoint:
                 if 'ema_net' in self.checkpoint.keys():
                 if 'ema_net' in self.checkpoint.keys():
@@ -951,7 +951,7 @@ class SgModel:
         if not self.load_checkpoint or self.load_weights_only:
         if not self.load_checkpoint or self.load_weights_only:
             # WHEN STARTING TRAINING FROM SCRATCH, DO NOT LOAD OPTIMIZER PARAMS (EVEN IF LOADING BACKBONE)
             # WHEN STARTING TRAINING FROM SCRATCH, DO NOT LOAD OPTIMIZER PARAMS (EVEN IF LOADING BACKBONE)
             self.start_epoch = 0
             self.start_epoch = 0
-            self.reset_best_metric()
+            self._reset_best_metric()
             load_opt_params = False
             load_opt_params = False
 
 
         if isinstance(self.training_params.optimizer, str):
         if isinstance(self.training_params.optimizer, str):
@@ -1092,7 +1092,7 @@ class SgModel:
 
 
                 self.sg_logger.close()
                 self.sg_logger.close()
 
 
-    def reset_best_metric(self):
+    def _reset_best_metric(self):
         self.best_metric = -1 * np.inf if self.greater_metric_to_watch_is_better else np.inf
         self.best_metric = -1 * np.inf if self.greater_metric_to_watch_is_better else np.inf
 
 
     @resolve_param('train_metrics_list', ListFactory(MetricsFactory()))
     @resolve_param('train_metrics_list', ListFactory(MetricsFactory()))
@@ -1263,101 +1263,22 @@ class SgModel:
 
 
         return outputs, acc, net_time, gross_time
         return outputs, acc, net_time, gross_time
 
 
-    def compute_model_runtime(self, input_dims: tuple = None,
-                              batch_sizes: Union[tuple, list, int] = (1, 8, 16, 32, 64),
-                              verbose: bool = True):
-        """
-        Compute the "atomic" inference time and throughput.
-        Atomic refers to calculating the forward pass independently, discarding effects such as data augmentation,
-        data upload to device, multi-gpu distribution etc.
-        :param input_dims: tuple
-            shape of a basic input to the network (without the first index) e.g. (3, 224, 224)
-            if None uses an input from the test loader
-        :param batch_sizes: int or list
-            Batch sizes for latency calculation
-        :param verbose: bool
-            Prints results to screen
-        :return: log: dict
-            Latency and throughput for each tested batch size
-        """
-        assert input_dims or self.test_loader is not None, 'Must get \'input_dims\' or connect a dataset interface'
-        assert self.multi_gpu not in (MultiGPUMode.DATA_PARALLEL, MultiGPUMode.DISTRIBUTED_DATA_PARALLEL), \
-            'The model is on multiple GPUs, move it to a single GPU is order to compute runtime'
-
-        # TRANSFER THE MODEL TO EVALUATION MODE BUT REMEMBER THE MODE TO RETURN TO
-        was_in_training_mode = True if self.net.training else False
-        self.net.eval()
-
-        # INITIALIZE LOGS AND PRINTS
-        timer = core_utils.Timer(self.device)
-        logs = {}
-        log_print = f"{'-' * 35}\n" \
-                    f"Batch   Time per Batch  Throughput\n" \
-                    f"size         (ms)        (im/s)\n" \
-                    f"{'-' * 35}\n"
-
-        # GET THE INPUT SHAPE FROM THE DATA LOADER IF NOT PROVIDED EXPLICITLY
-        input_dims = input_dims or next(iter(self.test_loader))[0].shape[1:]
-
-        # DEFINE NUMBER ACCORDING TO DEVICE
-        repetitions = 200 if self.device == 'cuda' else 20
-
-        # CREATE A LIST OF BATCH SIZES
-        batch_sizes = [batch_sizes] if type(batch_sizes) == int else batch_sizes
-
-        for batch_size in sorted(batch_sizes):
-            try:
-                # CREATE A RANDOM TENSOR AS INPUT
-                dummy_batch = torch.randn((batch_size, *input_dims), device=self.device)
-
-                # WARM UP
-                for _ in range(10):
-                    _ = self.net(dummy_batch)
-
-                # RUN & TIME
-                accumulated_time = 0
-                with torch.no_grad():
-                    for _ in range(repetitions):
-                        timer.start()
-                        _ = self.net(dummy_batch)
-                        accumulated_time += timer.stop()
-
-                # PERFORMANCE CALCULATION
-                time_per_batch = accumulated_time / repetitions
-                throughput = batch_size * 1000 / time_per_batch
-
-                logs[batch_size] = {'time_per_batch': time_per_batch, 'throughput': throughput}
-                log_print += f"{batch_size:4.0f} {time_per_batch:12.1f} {throughput:12.0f}\n"
-
-            except RuntimeError as e:
-                # ONLY FOR THE CASE OF CUDA OUT OF MEMORY WE CATCH THE EXCEPTION AND CONTINUE THE FUNCTION
-                if 'CUDA out of memory' in str(e):
-                    log_print += f"{batch_size:4d}\t{'CUDA out of memory':13s}\n"
-                else:
-                    raise
-
-        # PRINT RESULTS
-        if verbose:
-            logger.info(log_print)
-
-        # RETURN THE MODEL TO THE PREVIOUS MODE
-        self.net.train(was_in_training_mode)
-
-        return logs
-
+    @property
     def get_arch_params(self):
     def get_arch_params(self):
         return self.arch_params.to_dict()
         return self.arch_params.to_dict()
 
 
+    @property
     def get_structure(self):
     def get_structure(self):
         return self.net.module.structure
         return self.net.module.structure
 
 
+    @property
     def get_architecture(self):
     def get_architecture(self):
         return self.architecture
         return self.architecture
 
 
     def set_experiment_name(self, experiment_name):
     def set_experiment_name(self, experiment_name):
         self.experiment_name = experiment_name
         self.experiment_name = experiment_name
 
 
-    def re_build_model(self, arch_params={}):
+    def _re_build_model(self, arch_params={}):
         """
         """
         arch_params : dict
         arch_params : dict
             Architecture H.P. e.g.: block, num_blocks, num_classes, etc.
             Architecture H.P. e.g.: block, num_blocks, num_classes, etc.
@@ -1371,7 +1292,7 @@ class SgModel:
 
 
         self.arch_params = core_utils.HpmStruct(**arch_params)
         self.arch_params = core_utils.HpmStruct(**arch_params)
         self.classes = self.arch_params.num_classes
         self.classes = self.arch_params.num_classes
-        self.net = self.instantiate_net(self.architecture, self.arch_params, self.checkpoint_params)
+        self.net = self._instantiate_net(self.architecture, self.arch_params, self.checkpoint_params)
         # save the architecture for neural architecture search
         # save the architecture for neural architecture search
         if hasattr(self.net, 'structure'):
         if hasattr(self.net, 'structure'):
             self.architecture = self.net.structure
             self.architecture = self.net.structure
@@ -1384,24 +1305,7 @@ class SgModel:
                                          device_ids=self.device_ids) if self.multi_gpu else core_utils.WrappedModel(
                                          device_ids=self.device_ids) if self.multi_gpu else core_utils.WrappedModel(
             self.net)
             self.net)
 
 
-    def update_architecture(self, structure):
-        '''
-        architecture : str
-            Defines the network's architecture according to the options in models/all_architectures
-        load_checkpoint : bool
-            Loads a checkpoint according to experiment_name
-        arch_params : dict
-            Architecture H.P. e.g.: block, num_blocks, num_classes, etc.
-        :return:
-        '''
-        if hasattr(self.net.module, 'update_structure'):
-
-            self.net.module.update_structure(structure)
-            self.net.to(self.device)
-
-        else:
-            raise Exception("architecture is not valid for NAS")
-
+    @property
     def get_module(self):
     def get_module(self):
         return self.net
         return self.net
 
 
@@ -1625,13 +1529,13 @@ class SgModel:
 
 
         # IN CASE SG_LOGGER UPDATED THE DIR PATH
         # IN CASE SG_LOGGER UPDATED THE DIR PATH
         self.checkpoints_dir_path = self.sg_logger.local_dir()
         self.checkpoints_dir_path = self.sg_logger.local_dir()
-        hyper_param_config = self.get_hyper_param_config()
+        hyper_param_config = self._get_hyper_param_config()
 
 
         self.sg_logger.add_config("hyper_params", hyper_param_config)
         self.sg_logger.add_config("hyper_params", hyper_param_config)
 
 
         self.sg_logger.flush()
         self.sg_logger.flush()
 
 
-    def get_hyper_param_config(self):
+    def _get_hyper_param_config(self):
         """
         """
         Creates a training hyper param config for logging.
         Creates a training hyper param config for logging.
         """
         """
@@ -1662,7 +1566,7 @@ class SgModel:
 
 
         # SAVE THE CHECKPOINT
         # SAVE THE CHECKPOINT
         if self.training_params.save_model:
         if self.training_params.save_model:
-            self.save_checkpoint(self.optimizer, epoch + 1, validation_results, context)
+            self._save_checkpoint(self.optimizer, epoch + 1, validation_results, context)
 
 
     def _write_lrs(self, epoch):
     def _write_lrs(self, epoch):
         lrs = [self.optimizer.param_groups[i]['lr'] for i in range(len(self.optimizer.param_groups))]
         lrs = [self.optimizer.param_groups[i]['lr'] for i in range(len(self.optimizer.param_groups))]
@@ -1843,8 +1747,8 @@ class SgModel:
 
 
         return logging_values
         return logging_values
 
 
-    def instantiate_net(self, architecture: Union[torch.nn.Module, SgModule.__class__, str], arch_params: dict,
-                        checkpoint_params: dict, *args, **kwargs) -> tuple:
+    def _instantiate_net(self, architecture: Union[torch.nn.Module, SgModule.__class__, str], arch_params: dict,
+                         checkpoint_params: dict, *args, **kwargs) -> tuple:
         """
         """
         Instantiates nn.Module according to architecture and arch_params, and handles pretrained weights and the required
         Instantiates nn.Module according to architecture and arch_params, and handles pretrained weights and the required
             module manipulation (i.e head replacement).
             module manipulation (i.e head replacement).
@@ -1879,7 +1783,7 @@ class SgModel:
 
 
         return net
         return net
 
 
-    def instantiate_ema_model(self, decay: float = 0.9999, beta: float = 15, exp_activation: bool = True) -> ModelEMA:
+    def _instantiate_ema_model(self, decay: float = 0.9999, beta: float = 15, exp_activation: bool = True) -> ModelEMA:
         """Instantiate ema model for standard SgModule.
         """Instantiate ema model for standard SgModule.
         :param decay: the maximum decay value. as the training process advances, the decay will climb towards this value
         :param decay: the maximum decay value. as the training process advances, the decay will climb towards this value
                       until the EMA_t+1 = EMA_t * decay + TRAINING_MODEL * (1- decay)
                       until the EMA_t+1 = EMA_t * decay + TRAINING_MODEL * (1- decay)
@@ -1888,6 +1792,7 @@ class SgModel:
         """
         """
         return ModelEMA(self.net, decay, beta, exp_activation)
         return ModelEMA(self.net, decay, beta, exp_activation)
 
 
+    @property
     def get_net(self):
     def get_net(self):
         """
         """
         Getter for network.
         Getter for network.
@@ -1933,7 +1838,7 @@ class SgModel:
             context_methods = ContextSgMethods(get_net=self.get_net,
             context_methods = ContextSgMethods(get_net=self.get_net,
                                                set_net=self.set_net,
                                                set_net=self.set_net,
                                                set_ckpt_best_name=self.set_ckpt_best_name,
                                                set_ckpt_best_name=self.set_ckpt_best_name,
-                                               reset_best_metric=self.reset_best_metric,
+                                               reset_best_metric=self._reset_best_metric,
                                                build_model=self.build_model,
                                                build_model=self.build_model,
                                                validate_epoch=self._validate_epoch,
                                                validate_epoch=self._validate_epoch,
                                                set_ema=self.set_ema)
                                                set_ema=self.set_ema)
Discard
@@ -294,7 +294,7 @@ class QATCallback(PhaseCallback):
                 self._calibrate_model(context)
                 self._calibrate_model(context)
 
 
             # RESET THE BEST METRIC VALUE SO WE SAVE CHECKPOINTS AFTER THE EXPECTED QAT ACCURACY DEGRADATION
             # RESET THE BEST METRIC VALUE SO WE SAVE CHECKPOINTS AFTER THE EXPECTED QAT ACCURACY DEGRADATION
-            context.context_methods.reset_best_metric()
+            context.context_methods._reset_best_metric()
 
 
             # SET NEW FILENAME FOR THE BEST CHECKPOINT SO WE DON'T OVERRIDE THE PREVIOUS ONES
             # SET NEW FILENAME FOR THE BEST CHECKPOINT SO WE DON'T OVERRIDE THE PREVIOUS ONES
             context.context_methods.set_ckpt_best_name('qat_ckpt_best.pth')
             context.context_methods.set_ckpt_best_name('qat_ckpt_best.pth')
Discard
@@ -90,29 +90,10 @@ class TestTrainer(unittest.TestCase):
             ckpt = torch.load(ckpt_path)
             ckpt = torch.load(ckpt_path)
             self.assertListEqual(['net', 'acc', 'epoch', 'optimizer_state_dict', 'scaler_state_dict'],
             self.assertListEqual(['net', 'acc', 'epoch', 'optimizer_state_dict', 'scaler_state_dict'],
                                  list(ckpt.keys()))
                                  list(ckpt.keys()))
-        model.save_checkpoint()
+        model._save_checkpoint()
         weights_only = torch.load(os.path.join(model.checkpoints_dir_path, 'ckpt_latest_weights_only.pth'))
         weights_only = torch.load(os.path.join(model.checkpoints_dir_path, 'ckpt_latest_weights_only.pth'))
         self.assertListEqual(['net'], list(weights_only.keys()))
         self.assertListEqual(['net'], list(weights_only.keys()))
 
 
-    def test_compute_model_runtime(self):
-        model = self.get_classification_trainer(self.folder_names[6])
-        model.compute_model_runtime()
-        model.compute_model_runtime(batch_sizes=1, input_dims=(3, 224, 224), verbose=False)
-        model.compute_model_runtime(batch_sizes=[1, 2, 3], verbose=True)
-        # VERIFY MODEL RETURNS TO PREVIOUS TRAINING MODE
-        model.net.train()
-        model.compute_model_runtime(batch_sizes=1, verbose=False)
-        assert model.net.training, 'MODEL WAS SET TO eval DURING compute_model_runtime, BUT DIDN\'t RETURN TO PREVIOUS'
-        model.net.eval()
-        model.compute_model_runtime(batch_sizes=1, verbose=False)
-        assert not model.net.training, 'MODEL WAS SET TO eval DURING compute_model_runtime, BUT RETURNED TO TRAINING'
-
-        # THESE SHOULD HANDLE THE EXCEPTION OF CUDA OUT OF MEMORY
-        if torch.cuda.is_available():
-            model._switch_device('cuda')
-            model.compute_model_runtime(batch_sizes=10000, verbose=False, input_dims=(3, 224, 224))
-            model.compute_model_runtime(batch_sizes=[10000, 10, 50, 100, 1000, 5000], verbose=True)
-
     def test_predict(self):
     def test_predict(self):
         model = self.get_classification_trainer(self.folder_names[6])
         model = self.get_classification_trainer(self.folder_names[6])
         inputs = torch.randn((5, 3, 32, 32))
         inputs = torch.randn((5, 3, 32, 32))
Discard
@@ -58,7 +58,7 @@ class StrictLoadEnumTest(unittest.TestCase):
         cls.sg_model.sg_logger = BaseSGLogger('project_name', 'load_checkpoint_test', 'local', resumed=False,
         cls.sg_model.sg_logger = BaseSGLogger('project_name', 'load_checkpoint_test', 'local', resumed=False,
                                               training_params=HpmStruct(max_epochs=10),
                                               training_params=HpmStruct(max_epochs=10),
                                               checkpoints_dir_path=cls.sg_model.checkpoints_dir_path)
                                               checkpoints_dir_path=cls.sg_model.checkpoints_dir_path)
-        cls.sg_model.save_checkpoint()
+        cls.sg_model._save_checkpoint()
 
 
     @classmethod
     @classmethod
     def tearDownClass(cls):
     def tearDownClass(cls):
Discard