1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
- """
- Quantization utilities
- Methods are based on:
- https://github.com/NVIDIA/TensorRT/blob/51a4297753d3e12d0eed864be52400f429a6a94c/tools/pytorch-quantization/examples/torchvision/classification_flow.py#L385
- (Licensed under the Apache License, Version 2.0)
- """
- import torch
- from tqdm import tqdm
- from super_gradients.common.abstractions.abstract_logger import get_logger
- from super_gradients.training.utils.distributed_training_utils import get_local_rank, get_world_size
- from torch.distributed import all_gather
- logger = get_logger(__name__)
- try:
- from pytorch_quantization import nn as quant_nn
- from pytorch_quantization import calib
- _imported_pytorch_quantization_failure = None
- except (ImportError, NameError, ModuleNotFoundError) as import_err:
- logger.warning("Failed to import pytorch_quantization")
- _imported_pytorch_quantization_failure = import_err
- class QuantizationCalibrator:
- def __init__(self, torch_hist: bool = True, verbose: bool = True) -> None:
- if _imported_pytorch_quantization_failure is not None:
- raise _imported_pytorch_quantization_failure
- super().__init__()
- self.verbose = verbose
- self.torch_hist = torch_hist
- def calibrate_model(
- self,
- model: torch.nn.Module,
- calib_data_loader: torch.utils.data.DataLoader,
- method: str = "percentile",
- num_calib_batches: int = 2,
- percentile: float = 99.99,
- ):
- """
- Calibrates torch model with quantized modules.
- :param model: torch.nn.Module, model to perfrom the calibration on.
- :param calib_data_loader: torch.utils.data.DataLoader, data loader of the calibration dataset.
- Assumes that the first element of the tuple is the input image.
- :param method: str, One of [percentile, mse, entropy, max].
- Statistics method for amax computation of the quantized modules
- (Default=percentile).
- :param num_calib_batches: int, number of batches to collect the statistics from.
- :param percentile: float, percentile value to use when SgModel,quant_modules_calib_method='percentile'.
- Discarded when other methods are used (Default=99.99).
- """
- acceptable_methods = ["percentile", "mse", "entropy", "max"]
- if method in acceptable_methods:
- with torch.no_grad():
- self._collect_stats(model, calib_data_loader, num_batches=num_calib_batches)
- # FOR PERCENTILE WE MUST PASS PERCENTILE VALUE THROUGH KWARGS,
- # SO IT WOULD BE PASSED TO module.load_calib_amax(**kwargs), AND IN OTHER METHODS WE MUST NOT PASS IT.
- if method == "precentile":
- self._compute_amax(model, method="percentile", percentile=percentile)
- else:
- self._compute_amax(model, method=method)
- else:
- raise ValueError(f"Unsupported quantization calibration method, " f"expected one of: {'.'.join(acceptable_methods)}, however, received: {method}")
- def _collect_stats(self, model, data_loader, num_batches):
- """Feed data to the network and collect statistics"""
- local_rank = get_local_rank()
- world_size = get_world_size()
- device = next(model.parameters()).device
- # Enable calibrators
- self._enable_calibrators(model)
- # Feed data to the network for collecting stats
- for i, (image, *_) in tqdm(enumerate(data_loader), total=num_batches, disable=local_rank > 0):
- if world_size > 1:
- all_batches = [torch.zeros_like(image, device=device) for _ in range(world_size)]
- all_gather(all_batches, image.to(device=device))
- else:
- all_batches = [image]
- for local_image in all_batches:
- model(local_image.to(device=device))
- if i >= num_batches:
- break
- # Disable calibrators
- self._disable_calibrators(model)
- def _disable_calibrators(self, model):
- for name, module in model.named_modules():
- if isinstance(module, quant_nn.TensorQuantizer):
- if module._calibrator is not None:
- module.disable_calib()
- module.enable_quant()
- else:
- module.enable()
- def _enable_calibrators(self, model):
- for name, module in model.named_modules():
- if isinstance(module, quant_nn.TensorQuantizer):
- if module._calibrator is not None:
- if isinstance(module._calibrator, calib.HistogramCalibrator):
- module._calibrator._torch_hist = self.torch_hist # TensorQuantizer does not expose it as API
- module.disable_quant()
- module.enable_calib()
- else:
- module.disable()
- def _compute_amax(self, model, **kwargs):
- for name, module in model.named_modules():
- if isinstance(module, quant_nn.TensorQuantizer):
- if module._calibrator is not None:
- if isinstance(module._calibrator, calib.MaxCalibrator):
- module.load_calib_amax()
- else:
- module.load_calib_amax(**kwargs)
- if hasattr(module, "clip"):
- module.init_learn_amax()
- if self.verbose:
- print(f"{name:40}: {module}")
|