Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

utils.py 1.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  1. '''
  2. utility functions for loading
  3. '''
  4. import os
  5. import pickle
  6. import pandas as pd
  7. from lendingclub import config
  8. def cut_to_ids(df, ids):
  9. '''
  10. cuts a dataframe to a list of ids
  11. '''
  12. query = 'id in @ids'
  13. return df.query(query)
  14. def load_dataset(ds_type = 'train'):
  15. '''
  16. load in train or test datasets (base_loan_info, eval_loan_info, ids)
  17. '''
  18. with open(os.path.join(config.data_dir, 'train_test_ids.pkl'), 'rb') as file:
  19. train_test_ids_dict = pickle.load(file)
  20. base_loan_info = pd.read_feather(os.path.join(config.data_dir, 'base_loan_info.fth'))
  21. eval_loan_info = pd.read_feather(os.path.join(config.data_dir, 'eval_loan_info.fth'))
  22. if ds_type not in ['train', 'test', 'train_testable']:
  23. print('ds_type must be "train", "test", or "train_testable"')
  24. return None
  25. if ds_type == 'test':
  26. ids = train_test_ids_dict['test']
  27. elif ds_type == 'train':
  28. ids = train_test_ids_dict['train']
  29. else:
  30. ids = train_test_ids_dict['train_testable']
  31. return cut_to_ids(base_loan_info, ids), cut_to_ids(eval_loan_info, ids), ids
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...