Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

lc_utils.py 1.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
  1. import pandas as pd
  2. # make a crude test set for now
  3. def get_split_date(df, date_column, quantile):
  4. """
  5. https://stackoverflow.com/questions/31018622/pandas-quantile-function-for-dates
  6. Get the date on which to split a dataframe for timeseries splitting
  7. Adjusted coerce param to errors since SO is old.
  8. """
  9. # 1. convert date_column to datetime (useful in case it is a string)
  10. # 2. convert into int (for sorting)
  11. # 3. get the quantile
  12. # 4. get the corresponding date
  13. # 5. return, pray that it works
  14. quantile_date = pd.to_datetime(df[date_column], errors = 'raise').astype('int64').quantile(q=quantile)#.astype('datetime64[ns]')
  15. return pd.to_datetime(quantile_date)
  16. def split_out_traintestable_loans(df, eval_df, oldness_thrsh=.9):
  17. '''Can train/test on loans that pass the oldness_thrsh or have status paid/defaulted/charged_off'''
  18. old_enough_ids = eval_df[(eval_df['maturity_time_stat_adj'] >= oldness_thrsh) |
  19. (eval_df['maturity_paid_stat_adj'] >= oldness_thrsh) |
  20. (eval_df['loan_status'].isin(['paid', 'defaulted', 'charged_off']))]['id'].unique()
  21. df = df[df['id'].isin(old_enough_ids)]
  22. eval_df = eval_df[eval_df['id'].isin(old_enough_ids)]
  23. return df, eval_df
  24. def add_custom_lc_features(df):
  25. # added features
  26. df['monthly_inc'] = df['annual_inc'] / 12
  27. df['dti_w_loan'] = (df['dti'] * df['monthly_inc'] +
  28. df['installment']) / df['monthly_inc']
  29. df['delinq_to_monthly_inc'] = df['delinq_amnt'] / \
  30. df['monthly_inc']
  31. df['tot_cur_bal_to_monthly_inc'] = df['tot_cur_bal'] / \
  32. df['monthly_inc']
  33. df['loan_to_inc'] = df['loan_amount'] / \
  34. df['monthly_inc']
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...