Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

crash_tips_test.py 5.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
  1. import sys
  2. import unittest
  3. import dataclasses
  4. from typing import Type
  5. import omegaconf
  6. from super_gradients.common.crash_handler.crash_tips import (
  7. get_relevant_crash_tip_message,
  8. CrashTip,
  9. TorchCudaMissingTip,
  10. RecipeFactoryFormatTip,
  11. DDPNotInitializedTip,
  12. WrongHydraVersionTip,
  13. InterpolationKeyErrorTip,
  14. SGLoggerIsNoneTip,
  15. )
  16. @dataclasses.dataclass
  17. class DocumentedException:
  18. exc_value: Exception
  19. expected_crash_tip: Type[CrashTip]
  20. # author/person who faced this exception?
  21. class CrashTipTest(unittest.TestCase):
  22. def setUp(self) -> None:
  23. # Add any exception that we want to support here to make sure that it will be handled by our crash tip handler
  24. self.documented_exceptions = [
  25. DocumentedException(
  26. exc_value=OSError(
  27. "/home/tomer.keren/.conda/envs/tomer-dev-sg3/lib/python3.10/site-packages/torch/lib/../../nvidia/cublas/lib/libcublas.so.11: symbol "
  28. "cublasLtHSHMatmulAlgoInit version libcublasLt.so.11 not defined in file libcublasLt.so.11 with link time reference"
  29. ),
  30. expected_crash_tip=TorchCudaMissingTip,
  31. ),
  32. DocumentedException(
  33. exc_value=RuntimeError(
  34. "Malformed object definition in configuration. Expecting either a string of object type or a single entry dictionary{type_name(str): "
  35. "{parameters...}}.received: {'my_callback': None, 'lr_step': 2.4}"
  36. ),
  37. expected_crash_tip=RecipeFactoryFormatTip,
  38. ),
  39. DocumentedException(
  40. exc_value=RuntimeError("Default process group has not been initialized, please make sure to call init_process_group."),
  41. expected_crash_tip=DDPNotInitializedTip,
  42. ),
  43. DocumentedException(
  44. exc_value=TypeError("__init__() got an unexpected keyword argument 'version_base'"),
  45. expected_crash_tip=WrongHydraVersionTip,
  46. ),
  47. DocumentedException(
  48. exc_value=omegaconf.errors.InterpolationKeyError("omegaconf.errors.InterpolationKeyError: Interpolation key 'x' not found"),
  49. expected_crash_tip=InterpolationKeyErrorTip,
  50. ),
  51. DocumentedException(
  52. exc_value=AttributeError("AttributeError: 'NoneType' object has no attribute 'add_scalar'"),
  53. expected_crash_tip=SGLoggerIsNoneTip,
  54. ),
  55. ]
  56. def test_found_exceptions(self):
  57. """Test all the exceptions that were documented, and make sure that they have an associated tip."""
  58. for documented_exception in self.documented_exceptions:
  59. exc_value, expected_crash_tip = documented_exception.exc_value, documented_exception.expected_crash_tip
  60. try:
  61. raise exc_value
  62. except type(exc_value):
  63. exc_type, exc_value, exc_traceback = sys.exc_info()
  64. with self.subTest(
  65. msg="Making sure that the CrashTip is considered relevant for the exception...",
  66. expected_tip=expected_crash_tip.__name__,
  67. exception=exc_value,
  68. ):
  69. is_relevant = expected_crash_tip.is_relevant(exc_type, exc_value, exc_traceback)
  70. self.assertTrue(
  71. is_relevant,
  72. msg=f"Crash tip '{expected_crash_tip.__name__}' should be relevant for exception '{exc_type.__name__}' but failed.",
  73. )
  74. with self.subTest(
  75. msg="Making sure that the CrashTip generates a message (None is returned if an error is raised internally, to avoid crashing atexit)...",
  76. crash_tip=expected_crash_tip.__name__,
  77. ):
  78. crash_tip_msg = expected_crash_tip.get_message(exc_type, exc_value, exc_traceback)
  79. self.assertIsNotNone(
  80. crash_tip_msg,
  81. msg=f"The crash tip '{expected_crash_tip.__name__}' returned None, "
  82. f"an exception was probably raised in '{expected_crash_tip.__name__}.get_message(...)'",
  83. )
  84. with self.subTest(
  85. msg="Making sure that we can find the relevant CrashTip and get it's summary for the exception...",
  86. expected_tip=expected_crash_tip.__name__,
  87. exception=exc_value,
  88. ):
  89. crash_tip_message = get_relevant_crash_tip_message(exc_type, exc_value, exc_traceback)
  90. expected_crash_tip_message = expected_crash_tip.get_message(exc_type, exc_value, exc_traceback)
  91. self.assertEqual(
  92. crash_tip_message,
  93. expected_crash_tip_message,
  94. msg=f"Crash tip message should be '{expected_crash_tip_message}' but got '{crash_tip_message}' instead.",
  95. )
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...