Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dataset_data_interface.py 1.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
  1. import os
  2. from super_gradients.common import S3Connector
  3. from super_gradients.common import explicit_params_validation
  4. import zipfile
  5. class DatasetDataInterface:
  6. def __init__(self, env: str, data_connection_source: str = 's3'):
  7. """
  8. :param env: str "development"/"production"
  9. :param data_connection_source: str "s3" for aws bny default
  10. """
  11. self.env = env
  12. self.s3_connector = None
  13. self.data_connection_source = data_connection_source
  14. @explicit_params_validation(validation_type='None')
  15. def load_remote_dataset_file(self, remote_file: str, local_dir: str, overwrite_local_dataset: bool = False) -> str:
  16. """
  17. :param remote_file: str - the name of s3 file
  18. :param local_dir: str - the directory to put the dataset in
  19. :param overwrite_local_dataset: Whether too delete the dataset dir before downloading
  20. :return:
  21. """
  22. dataset_full_path = local_dir
  23. bucket = remote_file.split("/")[2]
  24. file_path = "/".join(remote_file.split("/")[3:])
  25. if self.data_connection_source == 's3':
  26. self.s3_connector = S3Connector(self.env, bucket)
  27. # DELETE THE LOCAL VERSION ON THE MACHINE
  28. if os.path.exists(dataset_full_path):
  29. if overwrite_local_dataset:
  30. filelist = os.listdir(local_dir)
  31. for f in filelist:
  32. os.remove(os.path.join(local_dir, f))
  33. else:
  34. Warning("Overwrite local dataset set to False but dataset exists in the dir")
  35. if not os.path.exists(local_dir):
  36. os.mkdir(local_dir)
  37. local_file = self.s3_connector.download_file_by_path(file_path, local_dir)
  38. with zipfile.ZipFile(local_dir + "/" + local_file, 'r') as zip_ref:
  39. zip_ref.extractall(local_dir + "/")
  40. os.remove(local_dir + "/" + local_file)
  41. return local_dir
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...