Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

device.py 9.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
  1. import sys
  2. import ctypes
  3. import os
  4. import multiprocessing
  5. import json
  6. import time
  7. from pathlib import Path
  8. from core.interact import interact as io
  9. class Device(object):
  10. def __init__(self, index, tf_dev_type, name, total_mem, free_mem):
  11. self.index = index
  12. self.tf_dev_type = tf_dev_type
  13. self.name = name
  14. self.total_mem = total_mem
  15. self.total_mem_gb = total_mem / 1024**3
  16. self.free_mem = free_mem
  17. self.free_mem_gb = free_mem / 1024**3
  18. def __str__(self):
  19. return f"[{self.index}]:[{self.name}][{self.free_mem_gb:.3}/{self.total_mem_gb :.3}]"
  20. class Devices(object):
  21. all_devices = None
  22. def __init__(self, devices):
  23. self.devices = devices
  24. def __len__(self):
  25. return len(self.devices)
  26. def __getitem__(self, key):
  27. result = self.devices[key]
  28. if isinstance(key, slice):
  29. return Devices(result)
  30. return result
  31. def __iter__(self):
  32. for device in self.devices:
  33. yield device
  34. def get_best_device(self):
  35. result = None
  36. idx_mem = 0
  37. for device in self.devices:
  38. mem = device.total_mem
  39. if mem > idx_mem:
  40. result = device
  41. idx_mem = mem
  42. return result
  43. def get_worst_device(self):
  44. result = None
  45. idx_mem = sys.maxsize
  46. for device in self.devices:
  47. mem = device.total_mem
  48. if mem < idx_mem:
  49. result = device
  50. idx_mem = mem
  51. return result
  52. def get_device_by_index(self, idx):
  53. for device in self.devices:
  54. if device.index == idx:
  55. return device
  56. return None
  57. def get_devices_from_index_list(self, idx_list):
  58. result = []
  59. for device in self.devices:
  60. if device.index in idx_list:
  61. result += [device]
  62. return Devices(result)
  63. def get_equal_devices(self, device):
  64. device_name = device.name
  65. result = []
  66. for device in self.devices:
  67. if device.name == device_name:
  68. result.append (device)
  69. return Devices(result)
  70. def get_devices_at_least_mem(self, totalmemsize_gb):
  71. result = []
  72. for device in self.devices:
  73. if device.total_mem >= totalmemsize_gb*(1024**3):
  74. result.append (device)
  75. return Devices(result)
  76. @staticmethod
  77. def _get_tf_devices_proc(q : multiprocessing.Queue):
  78. if sys.platform[0:3] == 'win':
  79. compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache_ALL')
  80. os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
  81. if not compute_cache_path.exists():
  82. io.log_info("Caching GPU kernels...")
  83. compute_cache_path.mkdir(parents=True, exist_ok=True)
  84. import tensorflow
  85. tf_version = tensorflow.version.VERSION
  86. #if tf_version is None:
  87. # tf_version = tensorflow.version.GIT_VERSION
  88. if tf_version[0] == 'v':
  89. tf_version = tf_version[1:]
  90. if tf_version[0] == '2':
  91. tf = tensorflow.compat.v1
  92. else:
  93. tf = tensorflow
  94. import logging
  95. # Disable tensorflow warnings
  96. tf_logger = logging.getLogger('tensorflow')
  97. tf_logger.setLevel(logging.ERROR)
  98. from tensorflow.python.client import device_lib
  99. devices = []
  100. physical_devices = device_lib.list_local_devices()
  101. physical_devices_f = {}
  102. for dev in physical_devices:
  103. dev_type = dev.device_type
  104. dev_tf_name = dev.name
  105. dev_tf_name = dev_tf_name[ dev_tf_name.index(dev_type) : ]
  106. dev_idx = int(dev_tf_name.split(':')[-1])
  107. if dev_type in ['GPU','DML']:
  108. dev_name = dev_tf_name
  109. dev_desc = dev.physical_device_desc
  110. if len(dev_desc) != 0:
  111. if dev_desc[0] == '{':
  112. dev_desc_json = json.loads(dev_desc)
  113. dev_desc_json_name = dev_desc_json.get('name',None)
  114. if dev_desc_json_name is not None:
  115. dev_name = dev_desc_json_name
  116. else:
  117. for param, value in ( v.split(':') for v in dev_desc.split(',') ):
  118. param = param.strip()
  119. value = value.strip()
  120. if param == 'name':
  121. dev_name = value
  122. break
  123. physical_devices_f[dev_idx] = (dev_type, dev_name, dev.memory_limit)
  124. q.put(physical_devices_f)
  125. time.sleep(0.1)
  126. @staticmethod
  127. def initialize_main_env():
  128. if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 0:
  129. return
  130. if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
  131. os.environ.pop('CUDA_VISIBLE_DEVICES')
  132. os.environ['CUDA_​CACHE_​MAXSIZE'] = '2147483647'
  133. os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
  134. os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # tf log errors only
  135. q = multiprocessing.Queue()
  136. p = multiprocessing.Process(target=Devices._get_tf_devices_proc, args=(q,), daemon=True)
  137. p.start()
  138. p.join()
  139. visible_devices = q.get()
  140. os.environ['NN_DEVICES_INITIALIZED'] = '1'
  141. os.environ['NN_DEVICES_COUNT'] = str(len(visible_devices))
  142. for i in visible_devices:
  143. dev_type, name, total_mem = visible_devices[i]
  144. os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'] = dev_type
  145. os.environ[f'NN_DEVICE_{i}_NAME'] = name
  146. os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(total_mem)
  147. os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(total_mem)
  148. @staticmethod
  149. def getDevices():
  150. if Devices.all_devices is None:
  151. if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
  152. raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
  153. devices = []
  154. for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
  155. devices.append ( Device(index=i,
  156. tf_dev_type=os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'],
  157. name=os.environ[f'NN_DEVICE_{i}_NAME'],
  158. total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
  159. free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']), )
  160. )
  161. Devices.all_devices = Devices(devices)
  162. return Devices.all_devices
  163. """
  164. # {'name' : name.split(b'\0', 1)[0].decode(),
  165. # 'total_mem' : totalMem.value
  166. # }
  167. return
  168. min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
  169. libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
  170. for libname in libnames:
  171. try:
  172. cuda = ctypes.CDLL(libname)
  173. except:
  174. continue
  175. else:
  176. break
  177. else:
  178. return Devices([])
  179. nGpus = ctypes.c_int()
  180. name = b' ' * 200
  181. cc_major = ctypes.c_int()
  182. cc_minor = ctypes.c_int()
  183. freeMem = ctypes.c_size_t()
  184. totalMem = ctypes.c_size_t()
  185. result = ctypes.c_int()
  186. device = ctypes.c_int()
  187. context = ctypes.c_void_p()
  188. error_str = ctypes.c_char_p()
  189. devices = []
  190. if cuda.cuInit(0) == 0 and \
  191. cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
  192. for i in range(nGpus.value):
  193. if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \
  194. cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \
  195. cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0:
  196. continue
  197. if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
  198. if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
  199. cc = cc_major.value * 10 + cc_minor.value
  200. if cc >= min_cc:
  201. devices.append ( {'name' : name.split(b'\0', 1)[0].decode(),
  202. 'total_mem' : totalMem.value,
  203. 'free_mem' : freeMem.value,
  204. 'cc' : cc
  205. })
  206. cuda.cuCtxDetach(context)
  207. os.environ['NN_DEVICES_COUNT'] = str(len(devices))
  208. for i, device in enumerate(devices):
  209. os.environ[f'NN_DEVICE_{i}_NAME'] = device['name']
  210. os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
  211. os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
  212. os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc'])
  213. """
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...