Deci-AI
/
super-gradients
connected to https://github.com/Deci-AI/super-gradients.git


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
100

	
101

	
102

	
103

	
104

	
105

	
106

	
107

	
108

	
109

	
110

	
111

	
112

	
113

	
114

	
115

	
116

	
117

	
118

	
119

	
120

	
121

	
122

	
123

	
124

	
125

	
126

	
            # Yolo v5 Detection training on CoCo2017 Dataset:
# Yolo v5s train on 320x320 mAP@0.5-0.95 (confidence 0.001, test on 320x320 images) ~28.4
# Yolo v5s train in 640x640 mAP@0.5-0.95 (confidence 0.001, test on 320x320 images) ~29.1

# Yolo v5 Detection training on CoCo2014 Dataset:
# Yolo v5s train on 320x320 mAP@0.5-0.95 (confidence 0.001, test on 320x320 images) ~28.77

# batch size may need to change depending on model size and GPU (2080Ti, V100)
# The code is optimized for running with a Mini-Batch of 64 examples... So depending on the amount of GPUs,
# you should change the "batch_accumulate" param in the training_params dict to be batch_size * gpu_num * batch_accumulate = 64.

import super_gradients
import argparse
import torch
from super_gradients.training import SgModel, MultiGPUMode
from super_gradients.training.datasets import CoCoDetectionDatasetInterface, CoCo2014DetectionDatasetInterface
from super_gradients.training.models.yolov5 import YoloV5PostPredictionCallback
from super_gradients.training.utils.detection_utils import base_detection_collate_fn
from super_gradients.training.datasets.datasets_utils import ComposedCollateFunction, MultiScaleCollateFunction
from super_gradients.common.aws_connection.aws_secrets_manager_connector import AWSSecretsManagerConnector
from super_gradients.training.metrics import DetectionMetrics

super_gradients.init_trainer()

parser = argparse.ArgumentParser()

#################################
# Model Options
################################

parser.add_argument("--model", type=str, required=True, choices=["s", "m", "l", "x", "c"],
                    help='on of s,m,l,x,c (small, medium, large, extra-large, custom)')
parser.add_argument("--depth", type=float, help='not applicable for default models(s/m/l/x)')
parser.add_argument("--width", type=float, help='not applicable for default models(s/m/l/x)')
parser.add_argument("--reload", action="store_true")
parser.add_argument("--max_epochs", type=int, default=300)
parser.add_argument("--batch", type=int, default=64)
parser.add_argument("--test-img-size", type=int, default=320)
parser.add_argument("--train-img-size", type=int, default=640)
parser.add_argument("--multi-scale", action="store_true")
parser.add_argument("--coco2014", action="store_true")

args, _ = parser.parse_known_args()

models_dict = {"s": "yolo_v5s", "m": "yolo_v5m", "l": "yolo_v5l", "x": "yolo_v5x", "c": "custom_yolov5"}

if args.model == "c":
    assert args.depth is not None and args.width is not None, "when setting model type to c (custom), depth and width flags must be set"
    assert 0 <= args.depth <= 1, "depth must be in the range [0,1]"
    assert 0 <= args.width <= 1, "width must be in the range [0,1]"
else:
    assert args.depth is None and args.width is None, "depth and width flags have no effect when the model is not c"

args.model = models_dict[args.model]
distributed = super_gradients.is_distributed()

if args.multi_scale:
    train_collate_fn = ComposedCollateFunction([base_detection_collate_fn,
                                                MultiScaleCollateFunction(target_size=args.train_img_size)])
else:
    train_collate_fn = base_detection_collate_fn

dataset_params = {"batch_size": args.batch,
                  "test_batch_size": args.batch,
                  "train_image_size": args.train_img_size,
                  "test_image_size": args.test_img_size,
                  "test_collate_fn": base_detection_collate_fn,
                  "train_collate_fn": train_collate_fn,
                  "test_sample_loading_method": "default",  # TODO: remove when fixing distributed_data_parallel
                  "dataset_hyper_param": {
                      "hsv_h": 0.015,  # IMAGE HSV-Hue AUGMENTATION (fraction)
                      "hsv_s": 0.7,  # IMAGE HSV-Saturation AUGMENTATION (fraction)
                      "hsv_v": 0.4,  # IMAGE HSV-Value AUGMENTATION (fraction)
                      "degrees": 0.0,  # IMAGE ROTATION (+/- deg)
                      "translate": 0.1,  # IMAGE TRANSLATION (+/- fraction)
                      "scale": 0.5,  # IMAGE SCALE (+/- gain)
                      "shear": 0.0}  # IMAGE SHEAR (+/- deg)
                  }

arch_params = {"depth_mult_factor": args.depth,
               "width_mult_factor": args.width
               }
dataset_string = 'coco2017' if not args.coco2014 else 'coco2014'
model_repo_bucket_name = AWSSecretsManagerConnector.get_secret_value_for_secret_key(aws_env='research',
                                                                                    secret_name='training_secrets',
                                                                                    secret_key='S3.MODEL_REPOSITORY_BUCKET_NAME')
model = SgModel(args.model + '____' + dataset_string,
                model_checkpoints_location="s3://" + model_repo_bucket_name,
                multi_gpu=MultiGPUMode.DISTRIBUTED_DATA_PARALLEL if distributed else MultiGPUMode.DATA_PARALLEL,
                post_prediction_callback=YoloV5PostPredictionCallback())

devices = torch.cuda.device_count() if not distributed else 1

dataset_interface_class = CoCoDetectionDatasetInterface if not args.coco2014 else CoCo2014DetectionDatasetInterface
dataset_interface = dataset_interface_class(dataset_params=dataset_params)
model.connect_dataset_interface(dataset_interface, data_loader_num_workers=20)
model.build_model(args.model, arch_params=arch_params, load_checkpoint=args.reload)

post_prediction_callback = YoloV5PostPredictionCallback()
training_params = {"max_epochs": args.max_epochs,
                   "lr_mode": "cosine",
                   "initial_lr": 0.01,
                   "cosine_final_lr_ratio": 0.2,
                   "lr_warmup_epochs": 3,
                   "batch_accumulate": 1,
                   "warmup_bias_lr": 0.1,
                   "loss": "yolo_v5_loss",
                   "criterion_params": {"model": model},
                   "optimizer": "SGD",
                   "warmup_momentum": 0.8,
                   "optimizer_params": {"momentum": 0.937,
                                        "weight_decay": 0.0005 * (args.batch / 64.0),
                                        "nesterov": True},
                   "mixed_precision": False,
                   "ema": True,
                   "train_metrics_list": [],
                   "valid_metrics_list": [DetectionMetrics(post_prediction_callback=post_prediction_callback,
                                                           num_cls=len(
                                                               dataset_interface.coco_classes))],
                   "loss_logging_items_names": ["GIoU", "obj", "cls", "Loss"],
                   "metric_to_watch": "mAP@0.50:0.95",
                   "greater_metric_to_watch_is_better": True}

print(f"Training Yolo v5 {args.model} on {dataset_string.upper()}:\n width-mult={args.width}, depth-mult={args.depth}, "
      f"train-img-size={args.train_img_size}, test-img-size={args.test_img_size} ")
model.train(training_params=training_params)