Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

trainer_test.py 3.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
  1. import shutil
  2. import unittest
  3. from super_gradients.training import models
  4. import super_gradients
  5. import torch
  6. import os
  7. from super_gradients import Trainer, ClassificationTestDatasetInterface
  8. from super_gradients.training.metrics import Accuracy, Top5
  9. class TestTrainer(unittest.TestCase):
  10. @classmethod
  11. def setUp(cls):
  12. super_gradients.init_trainer()
  13. # NAMES FOR THE EXPERIMENTS TO LATER DELETE
  14. cls.folder_names = ['test_train', 'test_save_load', 'test_load_w', 'test_load_w2',
  15. 'test_load_w3', 'test_checkpoint_content', 'analyze']
  16. cls.training_params = {"max_epochs": 1,
  17. "silent_mode": True,
  18. "lr_decay_factor": 0.1,
  19. "initial_lr": 0.1,
  20. "lr_updates": [4],
  21. "lr_mode": "step",
  22. "loss": "cross_entropy", "train_metrics_list": [Accuracy(), Top5()],
  23. "valid_metrics_list": [Accuracy(), Top5()],
  24. "loss_logging_items_names": ["Loss"], "metric_to_watch": "Accuracy",
  25. "greater_metric_to_watch_is_better": True}
  26. @classmethod
  27. def tearDownClass(cls) -> None:
  28. # ERASE ALL THE FOLDERS THAT WERE CREATED DURING THIS TEST
  29. for folder in cls.folder_names:
  30. if os.path.isdir(os.path.join('checkpoints', folder)):
  31. shutil.rmtree(os.path.join('checkpoints', folder))
  32. @staticmethod
  33. def get_classification_trainer(name=''):
  34. trainer = Trainer(name, model_checkpoints_location='local')
  35. dataset_params = {"batch_size": 4}
  36. dataset = ClassificationTestDatasetInterface(dataset_params=dataset_params, image_size=224)
  37. trainer.connect_dataset_interface(dataset)
  38. model = models.get("resnet18", arch_params={"num_classes": 5})
  39. return trainer, model
  40. def test_train(self):
  41. trainer, model = self.get_classification_trainer(self.folder_names[0])
  42. trainer.train(model=model, training_params=self.training_params)
  43. def test_save_load(self):
  44. trainer, model = self.get_classification_trainer(self.folder_names[1])
  45. trainer.train(model=model, training_params=self.training_params)
  46. resume_training_params = self.training_params.copy()
  47. resume_training_params["resume"] = True
  48. resume_training_params["max_epochs"] = 2
  49. trainer, model = self.get_classification_trainer(self.folder_names[1])
  50. trainer.train(model=model, training_params=resume_training_params)
  51. def test_checkpoint_content(self):
  52. """VERIFY THAT ALL CHECKPOINTS ARE SAVED AND CONTAIN ALL THE EXPECTED KEYS"""
  53. trainer, model = self.get_classification_trainer(self.folder_names[5])
  54. params = self.training_params.copy()
  55. params["save_ckpt_epoch_list"] = [1]
  56. trainer.train(model=model, training_params=params)
  57. ckpt_filename = ['ckpt_best.pth', 'ckpt_latest.pth', 'ckpt_epoch_1.pth']
  58. ckpt_paths = [os.path.join(trainer.checkpoints_dir_path, suf) for suf in ckpt_filename]
  59. for ckpt_path in ckpt_paths:
  60. ckpt = torch.load(ckpt_path)
  61. self.assertListEqual(['net', 'acc', 'epoch', 'optimizer_state_dict', 'scaler_state_dict'],
  62. list(ckpt.keys()))
  63. trainer._save_checkpoint()
  64. weights_only = torch.load(os.path.join(trainer.checkpoints_dir_path, 'ckpt_latest_weights_only.pth'))
  65. self.assertListEqual(['net'], list(weights_only.keys()))
  66. if __name__ == '__main__':
  67. unittest.main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...