Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

reduce_dataset.py 846 B

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. from omegaconf import OmegaConf
  4. from glob import glob
  5. import pandas as pd
  6. import os
  7. GENERAL_CONST_PATH = os.path.join('src', 'const.yaml')
  8. def subsample(data_dir, data_file, data_image_dir):
  9. df = pd.read_csv(os.path.join(data_dir, data_file))
  10. df = df.sample(n=df.shape[0] // 45)
  11. data_image_paths = {os.path.basename(x): x for x in glob(os.path.join(data_dir, *data_image_dir))}
  12. [data_image_paths.pop(img) for img in list(df['Image Index'])]
  13. [os.remove(img) for img in data_image_paths.values()]
  14. if __name__ == '__main__':
  15. general_const = OmegaConf.load(os.path.join(os.getcwd(), GENERAL_CONST_PATH))
  16. subsample(data_dir=os.path.join(os.getcwd(), *general_const.DATA_DIR),
  17. data_file=general_const.DATA_FILE,
  18. data_image_dir=general_const.IMAGE_PATH)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...