jinensetpal
/
vision
connected to https://github.com/jinensetpal/vision.git


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
            import os
import platform
import statistics

import torch
import torch.utils.benchmark as benchmark
import torchvision


def print_machine_specs():
    print("Processor:", platform.processor())
    print("Platform:", platform.platform())
    print("Logical CPUs:", os.cpu_count())
    print(f"\nCUDA device: {torch.cuda.get_device_name()}")
    print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


def get_data():
    transform = torchvision.transforms.Compose(
        [
            torchvision.transforms.PILToTensor(),
        ]
    )
    path = os.path.join(os.getcwd(), "data")
    testset = torchvision.datasets.Places365(
        root="./data", download=not os.path.exists(path), transform=transform, split="val"
    )
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=1000, shuffle=False, num_workers=1, collate_fn=lambda batch: [r[0] for r in batch]
    )
    return next(iter(testloader))


def run_encoding_benchmark(decoded_images):
    results = []
    for device in ["cpu", "cuda"]:
        decoded_images_device = [t.to(device=device) for t in decoded_images]
        for size in [1, 100, 1000]:
            for num_threads in [1, 12, 24]:
                for stmt, strat in zip(
                    [
                        "[torchvision.io.encode_jpeg(img) for img in decoded_images_device_trunc]",
                        "torchvision.io.encode_jpeg(decoded_images_device_trunc)",
                    ],
                    ["unfused", "fused"],
                ):
                    decoded_images_device_trunc = decoded_images_device[:size]
                    t = benchmark.Timer(
                        stmt=stmt,
                        setup="import torchvision",
                        globals={"decoded_images_device_trunc": decoded_images_device_trunc},
                        label="Image Encoding",
                        sub_label=f"{device.upper()} ({strat}): {stmt}",
                        description=f"{size} images",
                        num_threads=num_threads,
                    )
                    results.append(t.blocked_autorange())
    compare = benchmark.Compare(results)
    compare.print()


def run_decoding_benchmark(encoded_images):
    results = []
    for device in ["cpu", "cuda"]:
        for size in [1, 100, 1000]:
            for num_threads in [1, 12, 24]:
                for stmt, strat in zip(
                    [
                        f"[torchvision.io.decode_jpeg(img, device='{device}') for img in encoded_images_trunc]",
                        f"torchvision.io.decode_jpeg(encoded_images_trunc, device='{device}')",
                    ],
                    ["unfused", "fused"],
                ):
                    encoded_images_trunc = encoded_images[:size]
                    t = benchmark.Timer(
                        stmt=stmt,
                        setup="import torchvision",
                        globals={"encoded_images_trunc": encoded_images_trunc},
                        label="Image Decoding",
                        sub_label=f"{device.upper()} ({strat}): {stmt}",
                        description=f"{size} images",
                        num_threads=num_threads,
                    )
                    results.append(t.blocked_autorange())
    compare = benchmark.Compare(results)
    compare.print()


if __name__ == "__main__":
    print_machine_specs()
    decoded_images = get_data()
    mean_h, mean_w = statistics.mean(t.shape[-2] for t in decoded_images), statistics.mean(
        t.shape[-1] for t in decoded_images
    )
    print(f"\nMean image size: {int(mean_h)}x{int(mean_w)}")
    run_encoding_benchmark(decoded_images)
    encoded_images_cuda = torchvision.io.encode_jpeg([img.cuda() for img in decoded_images])
    encoded_images_cpu = [img.cpu() for img in encoded_images_cuda]
    run_decoding_benchmark(encoded_images_cpu)