Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

test_model_weight_averaging.py 2.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  1. import collections
  2. import tempfile
  3. import unittest
  4. import numpy as np
  5. import torch
  6. from torch import nn
  7. from super_gradients.training.utils.weight_averaging_utils import ModelWeightAveraging
  8. class TestModelWeightAveraging(unittest.TestCase):
  9. def test_model_weight_averaging_single_model(self):
  10. with tempfile.TemporaryDirectory() as tmp_dir:
  11. avg = ModelWeightAveraging(
  12. ckpt_dir=tmp_dir,
  13. greater_is_better=True,
  14. metric_to_watch="acc",
  15. load_checkpoint=False,
  16. number_of_models_to_average=10,
  17. )
  18. model = self._create_dummy_model()
  19. model_sd = model.state_dict()
  20. avg_model_sd = avg.get_average_model(model, {"acc": 0.99})
  21. self.assertStateDictAlmostEqual(avg_model_sd, model_sd)
  22. def test_model_weight_averaging_with_nan_metric(self):
  23. corrupted_metric_values = np.nan, +np.inf, -np.inf, torch.nan, torch.inf, -torch.inf
  24. for corrupted_metric_value in corrupted_metric_values:
  25. with self.subTest(corrupted_metric_value=corrupted_metric_value):
  26. with tempfile.TemporaryDirectory() as tmp_dir:
  27. avg = ModelWeightAveraging(
  28. ckpt_dir=tmp_dir,
  29. greater_is_better=True,
  30. metric_to_watch="acc",
  31. load_checkpoint=False,
  32. number_of_models_to_average=10,
  33. )
  34. model = self._create_dummy_model()
  35. model_sd = model.state_dict()
  36. avg.get_average_model(model, {"acc": 0.99})
  37. corrupted_model = self._create_dummy_model()
  38. corrupted_model.fc1.weight.data = torch.randn(10, 10) * torch.nan
  39. avg_model_sd = avg.get_average_model(corrupted_model, {"acc": corrupted_metric_value})
  40. self.assertStateDictAlmostEqual(avg_model_sd, model_sd)
  41. def assertStateDictAlmostEqual(self, sd1, sd2, eps=1e-5):
  42. self.assertEqual(set(sd1.keys()), set(sd2.keys()))
  43. for key in sd1.keys():
  44. v1 = sd1[key]
  45. v2 = sd2[key]
  46. if torch.is_floating_point(v1) and torch.is_floating_point(v2):
  47. difference = torch.nn.functional.l1_loss(v1, v2)
  48. self.assertLessEqual(difference, eps, msg=f"{key}: {v1} vs {v2}")
  49. else:
  50. self.assertEqual(v1, v2)
  51. def _create_dummy_model(self) -> nn.Module:
  52. net = nn.Sequential(collections.OrderedDict([("fc1", nn.Linear(10, 10)), ("bn", nn.BatchNorm1d(10))]))
  53. net.fc1.weight.data = torch.randn(10, 10)
  54. return net
  55. if __name__ == "__main__":
  56. unittest.main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...